Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Add JSON support
Implemented the same as String, which seems to work fine except for
round-tripping. I assume it's not actually a string in the protocol so
it will need a different format. But hopefully the rest of the
infrastructure for it is helpful.
  • Loading branch information
theory committed Nov 19, 2025
commit a591622f4ddd91edc886a86d80657db0ab0a01ae
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ C++ client for [ClickHouse](https://clickhouse.com/).
* UUID
* Map
* Point, Ring, Polygon, MultiPolygon
* JSON

## Dependencies
In the most basic case one needs only:
Expand Down
6 changes: 6 additions & 0 deletions clickhouse/columns/factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ static ColumnRef CreateTerminalColumn(const TypeAst& ast) {

case Type::String:
return std::make_shared<ColumnString>();
case Type::JSON:
return std::make_shared<ColumnJSON>();
case Type::FixedString:
return std::make_shared<ColumnFixedString>(GetASTChildElement(ast, 0).value);

Expand Down Expand Up @@ -201,6 +203,8 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti
// TODO (nemkov): update this to maximize code reuse.
case Type::String:
return std::make_shared<LowCardinalitySerializationAdaptor<ColumnString>>();
case Type::JSON:
return std::make_shared<LowCardinalitySerializationAdaptor<ColumnJSON>>();
case Type::FixedString:
return std::make_shared<LowCardinalitySerializationAdaptor<ColumnFixedString>>(GetASTChildElement(nested, 0).value);
case Type::Nullable:
Expand All @@ -214,6 +218,8 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti
// TODO (nemkov): update this to maximize code reuse.
case Type::String:
return std::make_shared<ColumnLowCardinalityT<ColumnString>>();
case Type::JSON:
return std::make_shared<ColumnLowCardinalityT<ColumnJSON>>();
case Type::FixedString:
return std::make_shared<ColumnLowCardinalityT<ColumnFixedString>>(GetASTChildElement(nested, 0).value);
case Type::Nullable:
Expand Down
1 change: 1 addition & 0 deletions clickhouse/columns/itemview.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ void ItemView::ValidateData(Type::Code type, DataType data) {

case Type::Code::String:
case Type::Code::FixedString:
case Type::Code::JSON:
// value can be of any size
return;

Expand Down
2 changes: 1 addition & 1 deletion clickhouse/columns/itemview.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ struct ItemView {
if (sizeof(ValueType) == data.size()) {
return *reinterpret_cast<const T*>(data.data());
} else {
throw AssertionError("Incompatitable value type and size. Requested size: "
throw AssertionError("Incompatible value type and size. Requested size: "
+ std::to_string(sizeof(ValueType)) + " stored size: " + std::to_string(data.size()));
}
}
Expand Down
211 changes: 211 additions & 0 deletions clickhouse/columns/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,4 +330,215 @@ ItemView ColumnString::GetItem(size_t index) const {
return ItemView{Type::String, this->At(index)};
}

struct ColumnJSON::Block
{
using CharT = typename std::string::value_type;

explicit Block(size_t starting_capacity)
: size(0),
capacity(starting_capacity),
data_(new CharT[capacity])
{}

inline auto GetAvailable() const {
return capacity - size;
}

std::string_view AppendUnsafe(std::string_view str) {
const auto pos = &data_[size];

memcpy(pos, str.data(), str.size());
size += str.size();

return std::string_view(pos, str.size());
}

auto GetCurrentWritePos() {
return &data_[size];
}

std::string_view ConsumeTailAsJSONViewUnsafe(size_t len) {
const auto start = &data_[size];
size += len;
return std::string_view(start, len);
}

size_t size;
const size_t capacity;
std::unique_ptr<CharT[]> data_;
};

ColumnJSON::ColumnJSON()
: Column(Type::CreateJSON())
{
}

ColumnJSON::ColumnJSON(size_t element_count)
: Column(Type::CreateJSON())
{
items_.reserve(element_count);
// 16 is arbitrary number, assumption that string values are about ~256 bytes long.
blocks_.reserve(std::max<size_t>(1, element_count / 16));
}

ColumnJSON::ColumnJSON(const std::vector<std::string>& data)
: ColumnJSON()
{
items_.reserve(data.size());
blocks_.emplace_back(ComputeTotalSize(data));

for (const auto & s : data) {
AppendUnsafe(s);
}
}

ColumnJSON::ColumnJSON(std::vector<std::string>&& data)
: ColumnJSON()
{
items_.reserve(data.size());

for (auto&& d : data) {
append_data_.emplace_back(std::move(d));
auto& last_data = append_data_.back();
items_.emplace_back(std::string_view{ last_data.data(),last_data.length() });
}
}

ColumnJSON::~ColumnJSON()
{}

void ColumnJSON::Reserve(size_t new_cap) {
items_.reserve(new_cap);
// 16 is arbitrary number, assumption that string values are about ~256 bytes long.
blocks_.reserve(std::max<size_t>(1, new_cap / 16));
}

void ColumnJSON::Append(std::string_view str) {
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) {
blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size()));
}

items_.emplace_back(blocks_.back().AppendUnsafe(str));
}

void ColumnJSON::Append(const char* str) {
Append(std::string_view(str, strlen(str)));
}

void ColumnJSON::Append(std::string&& steal_value) {
append_data_.emplace_back(std::move(steal_value));
auto& last_data = append_data_.back();
items_.emplace_back(std::string_view{ last_data.data(),last_data.length() });
}

void ColumnJSON::AppendNoManagedLifetime(std::string_view str) {
items_.emplace_back(str);
}

void ColumnJSON::AppendUnsafe(std::string_view str) {
items_.emplace_back(blocks_.back().AppendUnsafe(str));
}

void ColumnJSON::Clear() {
items_.clear();
blocks_.clear();
append_data_.clear();
}

std::string_view ColumnJSON::At(size_t n) const {
return items_.at(n);
}

void ColumnJSON::Append(ColumnRef column) {
if (auto col = column->As<ColumnJSON>()) {
const auto total_size = ComputeTotalSize(col->items_);

// TODO: fill up existing block with some items and then add a new one for the rest of items
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < total_size)
blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, total_size));

// Intentionally not doing items_.reserve() since that cripples performance.
for (size_t i = 0; i < column->Size(); ++i) {
this->AppendUnsafe((*col)[i]);
}
}
}

bool ColumnJSON::LoadBody(InputStream* input, size_t rows) {
if (rows == 0) {
items_.clear();
blocks_.clear();

return true;
}

decltype(items_) new_items;
decltype(blocks_) new_blocks;

new_items.reserve(rows);

// Suboptimzal if the first row string is >DEFAULT_BLOCK_SIZE, but that must be a very rare case.
Block * block = &new_blocks.emplace_back(DEFAULT_BLOCK_SIZE);

for (size_t i = 0; i < rows; ++i) {
uint64_t len;
if (!WireFormat::ReadUInt64(*input, &len))
return false;

if (len > block->GetAvailable())
block = &new_blocks.emplace_back(std::max<size_t>(DEFAULT_BLOCK_SIZE, len));

if (!WireFormat::ReadBytes(*input, block->GetCurrentWritePos(), len))
return false;

new_items.emplace_back(block->ConsumeTailAsJSONViewUnsafe(len));
}

items_.swap(new_items);
blocks_.swap(new_blocks);

return true;
}

void ColumnJSON::SaveBody(OutputStream* output) {
for (const auto & item : items_) {
WireFormat::WriteString(*output, item);
}
}

size_t ColumnJSON::Size() const {
return items_.size();
}

ColumnRef ColumnJSON::Slice(size_t begin, size_t len) const {
auto result = std::make_shared<ColumnJSON>();

if (begin < items_.size()) {
len = std::min(len, items_.size() - begin);
result->items_.reserve(len);

result->blocks_.emplace_back(ComputeTotalSize(items_, begin, len));
for (size_t i = begin; i < begin + len; ++i) {
result->Append(items_[i]);
}
}

return result;
}

ColumnRef ColumnJSON::CloneEmpty() const {
return std::make_shared<ColumnJSON>();
}

void ColumnJSON::Swap(Column& other) {
auto & col = dynamic_cast<ColumnJSON &>(other);
items_.swap(col.items_);
blocks_.swap(col.blocks_);
append_data_.swap(col.append_data_);
}

ItemView ColumnJSON::GetItem(size_t index) const {
return ItemView{Type::JSON, this->At(index)};
}

}
72 changes: 72 additions & 0 deletions clickhouse/columns/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,4 +142,76 @@ class ColumnString : public Column {
std::deque<std::string> append_data_;
};

/**
* Represents column of variable-length strings.
*/
class ColumnJSON : public Column {
public:
// Type this column takes as argument of Append and returns with At() and operator[]
using ValueType = std::string_view;

ColumnJSON();
~ColumnJSON();

explicit ColumnJSON(size_t element_count);
explicit ColumnJSON(const std::vector<std::string> & data);
explicit ColumnJSON(std::vector<std::string>&& data);
ColumnJSON& operator=(const ColumnJSON&) = delete;
ColumnJSON(const ColumnJSON&) = delete;

/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends one element to the column.
void Append(std::string_view str);

/// Appends one element to the column.
void Append(const char* str);

/// Appends one element to the column.
void Append(std::string&& steal_value);

/// Appends one element to the column.
/// If str lifetime is managed elsewhere and guaranteed to outlive the Block sent to the server
void AppendNoManagedLifetime(std::string_view str);

/// Returns element at given row number.
std::string_view At(size_t n) const;

/// Returns element at given row number.
inline std::string_view operator [] (size_t n) const { return At(n); }

public:
/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;

/// Loads column data from input stream.
bool LoadBody(InputStream* input, size_t rows) override;

/// Saves column data to output stream.
void SaveBody(OutputStream* output) override;

/// Clear column data .
void Clear() override;

/// Returns count of rows in the column.
size_t Size() const override;

/// Makes slice of the current column.
ColumnRef Slice(size_t begin, size_t len) const override;
ColumnRef CloneEmpty() const override;
void Swap(Column& other) override;
ItemView GetItem(size_t) const override;

private:
void AppendUnsafe(std::string_view);

private:
struct Block;

std::vector<std::string_view> items_;
std::vector<Block> blocks_;
std::deque<std::string> append_data_;
};

}
5 changes: 3 additions & 2 deletions clickhouse/types/type_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ static const std::unordered_map<std::string, Type::Code> kTypeCode = {
{ "Float32", Type::Float32 },
{ "Float64", Type::Float64 },
{ "String", Type::String },
{ "JSON", Type::JSON },
{ "FixedString", Type::FixedString },
{ "DateTime", Type::DateTime },
{ "DateTime64", Type::DateTime64 },
Expand Down Expand Up @@ -68,7 +69,7 @@ static const std::unordered_map<std::string, Type::Code> kTypeCode = {
};

template <typename L, typename R>
inline int CompateStringsCaseInsensitive(const L& left, const R& right) {
inline int CompareStringsCaseInsensitive(const L& left, const R& right) {
int64_t size_diff = left.size() - right.size();
if (size_diff != 0)
return size_diff > 0 ? 1 : -1;
Expand Down Expand Up @@ -129,7 +130,7 @@ bool ValidateAST(const TypeAst& ast) {
// Void terminal that is not actually "void" produced when unknown type is encountered.
if (ast.meta == TypeAst::Terminal
&& ast.code == Type::Void
&& CompateStringsCaseInsensitive(ast.name, std::string_view("void")) != 0)
&& CompareStringsCaseInsensitive(ast.name, std::string_view("void")) != 0)
//throw UnimplementedError("Unsupported type: " + ast.name);
return false;

Expand Down
Loading
Loading