diff --git a/.gitignore b/.gitignore index 8def1549..10fcc112 100644 --- a/.gitignore +++ b/.gitignore @@ -278,3 +278,6 @@ BUCKAROO_DEPS # Vim *.swp *.swo + +# clangd cache +/.cache/clangd diff --git a/README.md b/README.md index a191c506..7f689223 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ C++ client for [ClickHouse](https://clickhouse.com/). ## Supported data types * Array(T) +* Bool * Date * DateTime, DateTime64 * DateTime([timezone]), DateTime64(N, [timezone]) @@ -256,5 +257,3 @@ client.Insert("default.test", block); ```sql ALTER USER insert_account SETTINGS async_insert=1,wait_for_async_insert=1,async_insert_use_adaptive_busy_timeout=0,async_insert_busy_timeout_ms=5000,async_insert_max_data_size=104857600 ``` - - diff --git a/clickhouse/columns/factory.cpp b/clickhouse/columns/factory.cpp index 460d66fa..ea2ec2f3 100644 --- a/clickhouse/columns/factory.cpp +++ b/clickhouse/columns/factory.cpp @@ -49,6 +49,8 @@ static ColumnRef CreateTerminalColumn(const TypeAst& ast) { case Type::Void: return std::make_shared(); + case Type::Bool: + return std::make_shared(); case Type::UInt8: return std::make_shared(); case Type::UInt16: @@ -162,16 +164,26 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti case TypeAst::Tuple: { std::vector columns; + std::vector names; columns.reserve(ast.elements.size()); + names.reserve(ast.elements.size()); + bool any_named = false; for (const auto& elem : ast.elements) { if (auto col = CreateColumnFromAst(elem, settings)) { columns.push_back(col); + names.push_back(elem.element_name); + if (!elem.element_name.empty()) { + any_named = true; + } } else { return nullptr; } } + if (any_named) { + return std::make_shared(columns, std::move(names)); + } return std::make_shared(columns); } diff --git a/clickhouse/columns/itemview.cpp b/clickhouse/columns/itemview.cpp index 0116070a..e3c82bcc 100644 --- a/clickhouse/columns/itemview.cpp +++ b/clickhouse/columns/itemview.cpp @@ -44,6 +44,7 @@ void ItemView::ValidateData(Type::Code type, DataType data) { case Type::Code::Int8: case Type::Code::UInt8: case Type::Code::Enum8: + case Type::Code::Bool: return AssertSize({1}); case Type::Code::Int16: diff --git a/clickhouse/columns/itemview.h b/clickhouse/columns/itemview.h index 199994b6..1018cafe 100644 --- a/clickhouse/columns/itemview.h +++ b/clickhouse/columns/itemview.h @@ -28,7 +28,10 @@ struct ItemView { inline auto ConvertToStorageValue(const T& t) { if constexpr (std::is_same_v || std::is_same_v) { return std::string_view{t}; - } else if constexpr (std::is_fundamental_v || std::is_same_v> || std::is_same_v>) { + } else if constexpr (std::is_fundamental_v + || std::is_same_v> + || std::is_same_v> + || std::is_same_v>) { return std::string_view{reinterpret_cast(&t), sizeof(T)}; } else { static_assert(!std::is_same_v, "Unknown type, which can't be stored in ItemView"); @@ -65,7 +68,10 @@ struct ItemView { using ValueType = std::remove_cv_t>; if constexpr (std::is_same_v || std::is_same_v) { return data; - } else if constexpr (std::is_fundamental_v || std::is_same_v || std::is_same_v) { + } else if constexpr (std::is_fundamental_v + || std::is_same_v + || std::is_same_v + || std::is_same_v) { if (sizeof(ValueType) == data.size()) { return *reinterpret_cast(data.data()); } else { diff --git a/clickhouse/columns/numeric.cpp b/clickhouse/columns/numeric.cpp index 4819f37a..04a08b1e 100644 --- a/clickhouse/columns/numeric.cpp +++ b/clickhouse/columns/numeric.cpp @@ -113,6 +113,7 @@ template class ColumnVector; template class ColumnVector; template class ColumnVector; +template class ColumnVector; template class ColumnVector; template class ColumnVector; template class ColumnVector; diff --git a/clickhouse/columns/numeric.h b/clickhouse/columns/numeric.h index 5187b727..4140ef0c 100644 --- a/clickhouse/columns/numeric.h +++ b/clickhouse/columns/numeric.h @@ -70,6 +70,7 @@ using Int128 = absl::int128; using UInt128 = absl::uint128; using Int64 = int64_t; +using ColumnBool = ColumnVector; using ColumnUInt8 = ColumnVector; using ColumnUInt16 = ColumnVector; using ColumnUInt32 = ColumnVector; diff --git a/clickhouse/columns/tuple.cpp b/clickhouse/columns/tuple.cpp index 56858590..8e3bfa7f 100644 --- a/clickhouse/columns/tuple.cpp +++ b/clickhouse/columns/tuple.cpp @@ -16,6 +16,13 @@ ColumnTuple::ColumnTuple(const std::vector& columns) { } +ColumnTuple::ColumnTuple(const std::vector& columns, + std::vector names) + : Column(Type::CreateTuple(CollectTypes(columns), std::move(names))) + , columns_(columns) +{ +} + size_t ColumnTuple::TupleSize() const { return columns_.size(); } @@ -48,7 +55,11 @@ ColumnRef ColumnTuple::Slice(size_t begin, size_t len) const { sliced_columns.push_back(column->Slice(begin, len)); } - return std::make_shared(sliced_columns); + const auto& names = this->Type()->As()->GetItemNames(); + if (names.empty()) { + return std::make_shared(sliced_columns); + } + return std::make_shared(sliced_columns, names); } ColumnRef ColumnTuple::CloneEmpty() const { @@ -59,7 +70,11 @@ ColumnRef ColumnTuple::CloneEmpty() const { result_columns.push_back(column->CloneEmpty()); } - return std::make_shared(result_columns); + const auto& names = this->Type()->As()->GetItemNames(); + if (names.empty()) { + return std::make_shared(result_columns); + } + return std::make_shared(result_columns, names); } bool ColumnTuple::LoadPrefix(InputStream* input, size_t rows) { diff --git a/clickhouse/columns/tuple.h b/clickhouse/columns/tuple.h index ebc1b895..5bf3b0d6 100644 --- a/clickhouse/columns/tuple.h +++ b/clickhouse/columns/tuple.h @@ -13,6 +13,8 @@ namespace clickhouse { class ColumnTuple : public Column { public: ColumnTuple(const std::vector& columns); + ColumnTuple(const std::vector& columns, + std::vector names); /// Returns count of columns in the tuple. size_t TupleSize() const; diff --git a/clickhouse/types/type_parser.cpp b/clickhouse/types/type_parser.cpp index d488a079..e26a69d5 100644 --- a/clickhouse/types/type_parser.cpp +++ b/clickhouse/types/type_parser.cpp @@ -22,7 +22,9 @@ bool TypeAst::operator==(const TypeAst & other) const { return meta == other.meta && code == other.code && name == other.name + && element_name == other.element_name && value == other.value + && value_string == other.value_string && std::equal(elements.begin(), elements.end(), other.elements.begin(), other.elements.end()); } @@ -32,7 +34,7 @@ static const std::unordered_map kTypeCode = { { "Int16", Type::Int16 }, { "Int32", Type::Int32 }, { "Int64", Type::Int64 }, - { "Bool", Type::UInt8 }, + { "Bool", Type::Bool }, { "UInt8", Type::UInt8 }, { "UInt16", Type::UInt16 }, { "UInt32", Type::UInt32 }, @@ -167,6 +169,12 @@ bool TypeParser::Parse(TypeAst* type) { break; } case Token::Name: + if (!type_->name.empty()) { + // A second Name token on the same element means the + // previous one was a field name in a named-tuple element + // (e.g. "a" in "Tuple(a Int32, …)"). + type_->element_name = std::move(type_->name); + } type_->meta = GetTypeMeta(token.value); type_->name = token.value.to_string(); type_->code = GetTypeCode(type_->name); diff --git a/clickhouse/types/type_parser.h b/clickhouse/types/type_parser.h index 2f8f2f6f..9cc29512 100644 --- a/clickhouse/types/type_parser.h +++ b/clickhouse/types/type_parser.h @@ -31,6 +31,9 @@ struct TypeAst { /// Type's name. /// Need to cache TypeAst, so can't use StringView for name. std::string name; + /// Name of this element inside its parent (e.g. field name inside a named + /// Tuple). Empty for unnamed elements. + std::string element_name; /// Value associated with the node, /// used for fixed-width types and enum values. int64_t value = 0; diff --git a/clickhouse/types/types.cpp b/clickhouse/types/types.cpp index a5588c68..ba2ad58a 100644 --- a/clickhouse/types/types.cpp +++ b/clickhouse/types/types.cpp @@ -54,6 +54,7 @@ const char* Type::TypeName(Type::Code code) { case Type::Code::MultiPolygon: return "MultiPolygon"; case Type::Code::Time: return "Time"; case Type::Code::Time64: return "Time64"; + case Type::Code::Bool: return "Bool"; } return "Unknown type"; @@ -85,6 +86,7 @@ std::string Type::GetName() const { case Ring: case Polygon: case MultiPolygon: + case Bool: return TypeName(code_); case Time64: return As()->GetName(); @@ -146,6 +148,7 @@ uint64_t Type::GetTypeUniqueId() const { case Ring: case Polygon: case MultiPolygon: + case Bool: // For simple types, unique ID is the same as Type::Code return code_; @@ -243,6 +246,11 @@ TypeRef Type::CreateTuple(const std::vector& item_types) { return TypeRef(new TupleType(item_types)); } +TypeRef Type::CreateTuple(const std::vector& item_types, + std::vector item_names) { + return TypeRef(new TupleType(item_types, std::move(item_names))); +} + TypeRef Type::CreateEnum8(const std::vector& enum_items) { return TypeRef(new EnumType(Type::Enum8, enum_items)); } @@ -447,6 +455,11 @@ NullableType::NullableType(TypeRef nested_type) : Type(Nullable), nested_type_(n TupleType::TupleType(const std::vector& item_types) : Type(Tuple), item_types_(item_types) { } +TupleType::TupleType(const std::vector& item_types, + std::vector item_names) + : Type(Tuple), item_types_(item_types), item_names_(std::move(item_names)) { +} + /// class LowCardinalityType LowCardinalityType::LowCardinalityType(TypeRef nested_type) : Type(LowCardinality), nested_type_(nested_type) { } @@ -456,13 +469,30 @@ LowCardinalityType::~LowCardinalityType() { std::string TupleType::GetName() const { std::string result("Tuple("); + bool has_complete_names = item_names_.size() == item_types_.size(); + if (has_complete_names) { + for (const auto& item_name : item_names_) { + if (item_name.empty()) { + has_complete_names = false; + break; + } + } + } if (!item_types_.empty()) { - result += item_types_[0]->GetName(); + if (has_complete_names) { + result += item_names_[0] + " " + item_types_[0]->GetName(); + } else { + result += item_types_[0]->GetName(); + } } for (size_t i = 1; i < item_types_.size(); ++i) { - result += ", " + item_types_[i]->GetName(); + if (has_complete_names) { + result += ", " + item_names_[i] + " " + item_types_[i]->GetName(); + } else { + result += ", " + item_types_[i]->GetName(); + } } result += ")"; diff --git a/clickhouse/types/types.h b/clickhouse/types/types.h index 2275cfba..cb627e50 100644 --- a/clickhouse/types/types.h +++ b/clickhouse/types/types.h @@ -15,6 +15,14 @@ using Int128 = absl::int128; using UInt128 = absl::uint128; using Int64 = int64_t; +/// Distinct type for the ClickHouse Bool type. Backed by `bool` so it has the +/// same single-byte layout as `uint8_t` without std::vector's +/// bit-packing, while remaining a type distinct from all integer types. +enum Bool : bool { + false_ = false, + true_ = true, +}; + using TypeRef = std::shared_ptr; class Type { @@ -59,6 +67,7 @@ class Type { MultiPolygon, Time, Time64, + Bool, }; using EnumItem = std::pair; @@ -126,6 +135,9 @@ class Type { static TypeRef CreateTuple(const std::vector& item_types); + static TypeRef CreateTuple(const std::vector& item_types, + std::vector item_names); + static TypeRef CreateEnum8(const std::vector& enum_items); static TypeRef CreateEnum16(const std::vector& enum_items); @@ -293,14 +305,21 @@ class NullableType : public Type { class TupleType : public Type { public: explicit TupleType(const std::vector& item_types); + TupleType(const std::vector& item_types, + std::vector item_names); std::string GetName() const; /// Type of nested Tuple element type. std::vector GetTupleType() const { return item_types_; } + /// Field names for named tuples. Same length as GetTupleType() when + /// populated, or empty when the tuple has no field names. + const std::vector& GetItemNames() const { return item_names_; } + private: std::vector item_types_; + std::vector item_names_; }; class LowCardinalityType : public Type { @@ -384,6 +403,11 @@ inline TypeRef Type::CreateSimple() { return TypeRef(new Type(UInt64)); } +template <> +inline TypeRef Type::CreateSimple() { + return TypeRef(new Type(Bool)); +} + template <> inline TypeRef Type::CreateSimple() { return TypeRef(new Type(Float32)); diff --git a/ut/CreateColumnByType_ut.cpp b/ut/CreateColumnByType_ut.cpp index 556dfc36..ba18beac 100644 --- a/ut/CreateColumnByType_ut.cpp +++ b/ut/CreateColumnByType_ut.cpp @@ -62,7 +62,8 @@ class CreateColumnByTypeWithName : public ::testing::TestWithParamGetType().GetName(), "UInt8"); + EXPECT_EQ(col->GetType().GetName(), "Bool"); + EXPECT_NE(nullptr, col->As()); } TEST_P(CreateColumnByTypeWithName, CreateColumnByType) @@ -75,6 +76,7 @@ TEST_P(CreateColumnByTypeWithName, CreateColumnByType) INSTANTIATE_TEST_SUITE_P(Basic, CreateColumnByTypeWithName, ::testing::Values( "Int8", "Int16", "Int32", "Int64", "UInt8", "UInt16", "UInt32", "UInt64", + "Bool", "String", "Date", "DateTime", "UUID", "Int128", "UInt128" )); diff --git a/ut/client_ut.cpp b/ut/client_ut.cpp index 29b0d47b..9550cad0 100644 --- a/ut/client_ut.cpp +++ b/ut/client_ut.cpp @@ -400,11 +400,11 @@ TEST_P(ClientCase, Generic) { auto id = std::make_shared(); auto name = std::make_shared(); - auto f = std::make_shared (); + auto f = std::make_shared(); for (auto const& td : TEST_DATA) { id->Append(td.id); name->Append(td.name); - f->Append(td.f); + f->Append(static_cast(td.f)); } block.AppendColumn("id" , id); @@ -426,7 +426,7 @@ TEST_P(ClientCase, Generic) { for (size_t c = 0; c < block.GetRowCount(); ++c, ++row) { EXPECT_EQ(TEST_DATA[row].id, (*block[0]->As())[c]); EXPECT_EQ(TEST_DATA[row].name, (*block[1]->As())[c]); - EXPECT_EQ(TEST_DATA[row].f, (*block[2]->As())[c]); + EXPECT_EQ(static_cast(TEST_DATA[row].f), (*block[2]->As())[c]); } } ); @@ -468,13 +468,13 @@ TEST_P(ClientCase, InsertData) { // Fetch the derived columns. auto id = block[0]->As(); auto name = block[1]->As(); - auto f = block[2]->As(); + auto f = block[2]->As(); // Insert some values. for (auto const& td : TEST_DATA) { id->Append(td.id); name->Append(td.name); - f->Append(td.f); + f->Append(static_cast(td.f)); } block.RefreshRowCount(); client_->SendInsertBlock(block); @@ -484,7 +484,7 @@ TEST_P(ClientCase, InsertData) { for (auto const& td : TEST_DATA2) { id->Append(td.id); name->Append(td.name); - f->Append(td.f); + f->Append(static_cast(td.f)); } block.RefreshRowCount(); client_->SendInsertBlock(block); @@ -509,13 +509,13 @@ TEST_P(ClientCase, InsertData) { for (size_t c = 0; c < block.GetRowCount(); ++c, ++row) { EXPECT_EQ(TEST_DATA[row].id, (*block[0]->As())[c]); EXPECT_EQ(TEST_DATA[row].name, (*block[1]->As())[c]); - EXPECT_EQ(TEST_DATA[row].f, (*block[2]->As())[c]); + EXPECT_EQ(static_cast(TEST_DATA[row].f), (*block[2]->As())[c]); } } else { for (size_t c = 0; c < block.GetRowCount(); ++c, ++row) { EXPECT_EQ(TEST_DATA2[row-block_two_row_num].id, (*block[0]->As())[c]); EXPECT_EQ(TEST_DATA2[row-block_two_row_num].name, (*block[1]->As())[c]); - EXPECT_EQ(TEST_DATA2[row-block_two_row_num].f, (*block[2]->As())[c]); + EXPECT_EQ(static_cast(TEST_DATA2[row-block_two_row_num].f), (*block[2]->As())[c]); } } } diff --git a/ut/type_parser_ut.cpp b/ut/type_parser_ut.cpp index 4cff5237..d4012f82 100644 --- a/ut/type_parser_ut.cpp +++ b/ut/type_parser_ut.cpp @@ -89,10 +89,28 @@ TEST(TypeParserCase, ParseTuple) { auto element = ast.elements.begin(); for (size_t i = 0; i < 2; ++i) { ASSERT_EQ(element->name, names[i]); + ASSERT_TRUE(element->element_name.empty()); ++element; } } +TEST(TypeParserCase, ParseNamedTuple) { + TypeAst ast; + TypeParser("Tuple(a UInt8, b String)").Parse(&ast); + ASSERT_EQ(ast.meta, TypeAst::Tuple); + ASSERT_EQ(ast.name, "Tuple"); + ASSERT_EQ(ast.code, Type::Tuple); + ASSERT_EQ(ast.elements.size(), 2u); + + ASSERT_EQ(ast.elements[0].element_name, "a"); + ASSERT_EQ(ast.elements[0].name, "UInt8"); + ASSERT_EQ(ast.elements[0].code, Type::UInt8); + + ASSERT_EQ(ast.elements[1].element_name, "b"); + ASSERT_EQ(ast.elements[1].name, "String"); + ASSERT_EQ(ast.elements[1].code, Type::String); +} + TEST(TypeParserCase, ParseDecimal) { TypeAst ast; TypeParser("Decimal(12, 5)").Parse(&ast); @@ -167,6 +185,20 @@ TEST(TypeParserCase, ParseDateTime_MINSK_TIMEZONE) { ASSERT_EQ(ast.elements[0].meta, TypeAst::Terminal); } +TEST(TypeParserCase, EqualityIncludesValueString) { + TypeAst utc; + TypeAst minsk; + ASSERT_TRUE(TypeParser("DateTime('UTC')").Parse(&utc)); + ASSERT_TRUE(TypeParser("DateTime('Europe/Minsk')").Parse(&minsk)); + ASSERT_NE(utc, minsk); + + TypeAst enum_one; + TypeAst enum_two; + ASSERT_TRUE(TypeParser("Enum8('ONE' = 1)").Parse(&enum_one)); + ASSERT_TRUE(TypeParser("Enum8('TWO' = 1)").Parse(&enum_two)); + ASSERT_NE(enum_one, enum_two); +} + TEST(TypeParserCase, LowCardinality_String) { TypeAst ast; ASSERT_TRUE(TypeParser("LowCardinality(String)").Parse(&ast)); @@ -194,7 +226,7 @@ TEST(TypeParserCase, LowCardinality_FixedString) { ASSERT_EQ(ast.elements[0].name, "FixedString"); ASSERT_EQ(ast.elements[0].value, 0); ASSERT_EQ(ast.elements[0].elements.size(), 1u); - auto param = TypeAst{TypeAst::Number, Type::Void, "", 10, {}, {}}; + auto param = TypeAst{TypeAst::Number, Type::Void, "", "", 10, {}, {}}; ASSERT_EQ(ast.elements[0].elements[0], param); } diff --git a/ut/types_ut.cpp b/ut/types_ut.cpp index 7af343b5..2655e54a 100644 --- a/ut/types_ut.cpp +++ b/ut/types_ut.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -34,6 +35,22 @@ TEST(TypesCase, TypeName) { ); ASSERT_EQ(Type::CreateMap(Type::CreateSimple(), Type::CreateString())->GetName(), "Map(Int32, String)"); + + ASSERT_EQ(Type::CreateSimple()->GetName(), "Bool"); +} + +TEST(TypesCase, ColumnBool) { + auto col = std::make_shared(); + col->Append(true_); + col->Append(false_); + col->Append(true_); + + ASSERT_EQ(col->Size(), 3u); + ASSERT_EQ(col->At(0), true_); + ASSERT_EQ(col->At(1), false_); + ASSERT_EQ(col->At(2), true_); + ASSERT_EQ(col->GetType().GetName(), "Bool"); + ASSERT_EQ(col->GetType().GetCode(), Type::Bool); } TEST(TypesCase, NullableType) { @@ -41,6 +58,66 @@ TEST(TypesCase, NullableType) { ASSERT_EQ(Type::CreateNullable(nested)->As()->GetNestedType(), nested); } +TEST(TypesCase, TupleTypeItemNames) { + auto unnamed = Type::CreateTuple({ + Type::CreateSimple(), + Type::CreateString()}); + ASSERT_TRUE(unnamed->As()->GetItemNames().empty()); + + auto named = Type::CreateTuple( + {Type::CreateSimple(), Type::CreateString()}, + {"a", "b"}); + const auto& names = named->As()->GetItemNames(); + ASSERT_EQ(names.size(), 2u); + ASSERT_EQ(names[0], "a"); + ASSERT_EQ(names[1], "b"); +} + +TEST(TypesCase, TupleTypeNameIncludesFieldNames) { + auto named = Type::CreateTuple( + {Type::CreateSimple(), Type::CreateString()}, + {"a", "b"}); + ASSERT_EQ(named->GetName(), "Tuple(a UInt8, b String)"); + + auto partially_named = Type::CreateTuple( + {Type::CreateSimple(), Type::CreateString()}, + {"a", ""}); + ASSERT_EQ(partially_named->GetName(), "Tuple(UInt8, String)"); + + auto mismatched_names = Type::CreateTuple( + {Type::CreateSimple(), Type::CreateString()}, + {"a"}); + ASSERT_EQ(mismatched_names->GetName(), "Tuple(UInt8, String)"); +} + +TEST(TypesCase, TupleTypeNamesFromFactory) { + auto col = CreateColumnByType("Tuple(a UInt8, b String)"); + ASSERT_NE(col, nullptr); + const auto& names = col->Type()->As()->GetItemNames(); + ASSERT_EQ(names.size(), 2u); + ASSERT_EQ(names[0], "a"); + ASSERT_EQ(names[1], "b"); + + auto col_unnamed = CreateColumnByType("Tuple(UInt8, String)"); + ASSERT_NE(col_unnamed, nullptr); + ASSERT_TRUE(col_unnamed->Type()->As()->GetItemNames().empty()); +} + +TEST(TypesCase, TupleTypeEqualityIncludesFieldNames) { + auto unnamed = Type::CreateTuple( + {Type::CreateSimple(), Type::CreateString()}); + auto named_ab = Type::CreateTuple( + {Type::CreateSimple(), Type::CreateString()}, + {"a", "b"}); + auto named_xy = Type::CreateTuple( + {Type::CreateSimple(), Type::CreateString()}, + {"x", "y"}); + + ASSERT_TRUE(named_ab->IsEqual(named_ab)); + ASSERT_FALSE(named_ab->IsEqual(unnamed)); + ASSERT_FALSE(named_ab->IsEqual(named_xy)); +} + TEST(TypesCase, EnumTypes) { auto enum8 = Type::CreateEnum8({{"One", 1}, {"Two", 2}}); ASSERT_EQ(enum8->GetName(), "Enum8('One' = 1, 'Two' = 2)"); diff --git a/ut/utils.cpp b/ut/utils.cpp index 5c0dec92..516e25cc 100644 --- a/ut/utils.cpp +++ b/ut/utils.cpp @@ -361,6 +361,9 @@ std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view) { case Type::UInt8: ostr << static_cast(item_view.get()); break; + case Type::Bool: + ostr << (item_view.get() ? "true" : "false"); + break; case Type::UInt16: ostr << static_cast(item_view.get()); break;