Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
2007cec
Update vendored DuckDB sources to 5657cbdc0b
duckdblabs-bot Oct 4, 2025
6a14da4
Update vendored DuckDB sources to b3c8acdc0e
duckdblabs-bot Oct 7, 2025
0b478d3
Update vendored DuckDB sources to f793ea27c6
duckdblabs-bot Oct 8, 2025
45e0299
Update vendored DuckDB sources to 353406bd7f
duckdblabs-bot Oct 9, 2025
0eda34b
Update vendored DuckDB sources to 9d77bcf518
duckdblabs-bot Oct 11, 2025
f10aea5
Update vendored DuckDB sources to 2762f1aa72
duckdblabs-bot Oct 12, 2025
2088ae4
Update vendored DuckDB sources to 7621d1bad9
duckdblabs-bot Oct 14, 2025
855724b
Update vendored DuckDB sources to 746404c6aa
duckdblabs-bot Oct 15, 2025
81d6e16
Update vendored DuckDB sources to 5d422e423e
duckdblabs-bot Oct 16, 2025
b3f1929
Update vendored DuckDB sources to f256271bbc
duckdblabs-bot Oct 17, 2025
498da86
Update vendored DuckDB sources to 50acc16a8d
duckdblabs-bot Oct 18, 2025
457acae
Update vendored DuckDB sources to d921f4d6ba
duckdblabs-bot Oct 19, 2025
f24aa46
Update vendored DuckDB sources to c8906e701e
duckdblabs-bot Oct 22, 2025
7565c88
Update vendored DuckDB sources to bfc1fc8800
duckdblabs-bot Oct 25, 2025
80341dc
Update vendored DuckDB sources to ee49c5b308
duckdblabs-bot Oct 28, 2025
a0eaba7
Update vendored DuckDB sources to 4ff549de0d
duckdblabs-bot Oct 29, 2025
d1c6fb9
Update vendored DuckDB sources to c8906e701e
duckdblabs-bot Oct 29, 2025
a476351
Update vendored DuckDB sources to 620e685c6d
duckdblabs-bot Nov 5, 2025
f320879
Update vendored DuckDB sources to 963000678a
duckdblabs-bot Nov 6, 2025
bdbfad9
Update vendored DuckDB sources to d573b275ce
duckdblabs-bot Nov 7, 2025
9989217
Update vendored DuckDB sources to 44b706b2b7
duckdblabs-bot Nov 8, 2025
b762a52
Update vendored DuckDB sources to 5511fb39ac
duckdblabs-bot Nov 11, 2025
4e7f155
Update vendored DuckDB sources to 39f5583f99
duckdblabs-bot Nov 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Update vendored DuckDB sources to d573b275ce
  • Loading branch information
duckdblabs-bot committed Nov 11, 2025
commit bdbfad9d5f073840627fa277bd85f37c4a31eb40
1 change: 0 additions & 1 deletion src/duckdb/extension/parquet/include/parquet_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ struct ParquetOptions {
explicit ParquetOptions(ClientContext &context);

bool binary_as_string = false;
bool variant_legacy_encoding = false;
bool file_row_number = false;
shared_ptr<ParquetEncryptionConfig> encryption_config;
bool debug_use_openssl = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ struct VariantValue {

public:
yyjson_mut_val *ToJSON(ClientContext &context, yyjson_mut_doc *doc) const;
static void ToVARIANT(vector<VariantValue> &input, Vector &result);

public:
VariantValueType value_type;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace duckdb {

class VariantColumnReader : public ColumnReader {
public:
static constexpr const PhysicalType TYPE = PhysicalType::VARCHAR;
static constexpr const PhysicalType TYPE = PhysicalType::STRUCT;

public:
VariantColumnReader(ClientContext &context, ParquetReader &reader, const ParquetColumnSchema &schema,
Expand Down
3 changes: 0 additions & 3 deletions src/duckdb/extension/parquet/parquet_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -998,9 +998,6 @@ static void LoadInternal(ExtensionLoader &loader) {
"enable_geoparquet_conversion",
"Attempt to decode/encode geometry data in/as GeoParquet files if the spatial extension is present.",
LogicalType::BOOLEAN, Value::BOOLEAN(true));
config.AddExtensionOption("variant_legacy_encoding",
"Enables the Parquet reader to identify a Variant structurally.", LogicalType::BOOLEAN,
Value::BOOLEAN(false));
}

void ParquetExtension::Load(ExtensionLoader &loader) {
Expand Down
4 changes: 0 additions & 4 deletions src/duckdb/extension/parquet/parquet_multi_file_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,10 +397,6 @@ bool ParquetMultiFileInfo::ParseOption(ClientContext &context, const string &ori
options.binary_as_string = BooleanValue::Get(val);
return true;
}
if (key == "variant_legacy_encoding") {
options.variant_legacy_encoding = BooleanValue::Get(val);
return true;
}
if (key == "file_row_number") {
options.file_row_number = BooleanValue::Get(val);
return true;
Expand Down
54 changes: 1 addition & 53 deletions src/duckdb/extension/parquet/parquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -517,52 +517,6 @@ unique_ptr<BaseStatistics> ParquetColumnSchema::Stats(const FileMetaData &file_m
return ParquetStatisticsUtils::TransformColumnStatistics(*this, columns, parquet_options.can_have_nan);
}

static bool IsVariantType(const SchemaElement &root, const vector<ParquetColumnSchema> &children) {
if (children.size() < 2) {
return false;
}
auto &child0 = children[0];
auto &child1 = children[1];

ParquetColumnSchema const *metadata;
ParquetColumnSchema const *value;

if (child0.name == "metadata" && child1.name == "value") {
metadata = &child0;
value = &child1;
} else if (child1.name == "metadata" && child0.name == "value") {
metadata = &child1;
value = &child0;
} else {
return false;
}

//! Verify names
if (metadata->name != "metadata") {
return false;
}
if (value->name != "value") {
return false;
}

//! Verify types
if (metadata->parquet_type != duckdb_parquet::Type::BYTE_ARRAY) {
return false;
}
if (value->parquet_type != duckdb_parquet::Type::BYTE_ARRAY) {
return false;
}
if (children.size() == 3) {
auto &typed_value = children[2];
if (typed_value.name != "typed_value") {
return false;
}
} else if (children.size() != 2) {
return false;
}
return true;
}

ParquetColumnSchema ParquetReader::ParseSchemaRecursive(idx_t depth, idx_t max_define, idx_t max_repeat,
idx_t &next_schema_idx, idx_t &next_file_idx,
ClientContext &context) {
Expand Down Expand Up @@ -629,9 +583,6 @@ ParquetColumnSchema ParquetReader::ParseSchemaRecursive(idx_t depth, idx_t max_d
const bool is_map = s_ele.__isset.converted_type && s_ele.converted_type == ConvertedType::MAP;
bool is_map_kv = s_ele.__isset.converted_type && s_ele.converted_type == ConvertedType::MAP_KEY_VALUE;
bool is_variant = s_ele.__isset.logicalType && s_ele.logicalType.__isset.VARIANT == true;
if (!is_variant) {
is_variant = parquet_options.variant_legacy_encoding && IsVariantType(s_ele, child_schemas);
}

if (!is_map_kv && this_idx > 0) {
// check if the parent node of this is a map
Expand Down Expand Up @@ -667,7 +618,7 @@ ParquetColumnSchema ParquetReader::ParseSchemaRecursive(idx_t depth, idx_t max_d

LogicalType result_type;
if (is_variant) {
result_type = LogicalType::JSON();
result_type = LogicalType::VARIANT();
} else {
result_type = LogicalType::STRUCT(std::move(struct_types));
}
Expand Down Expand Up @@ -808,9 +759,6 @@ ParquetOptions::ParquetOptions(ClientContext &context) {
if (context.TryGetCurrentSetting("binary_as_string", lookup_value)) {
binary_as_string = lookup_value.GetValue<bool>();
}
if (context.TryGetCurrentSetting("variant_legacy_encoding", lookup_value)) {
variant_legacy_encoding = lookup_value.GetValue<bool>();
}
}

ParquetColumnDefinition ParquetColumnDefinition::FromSchemaValue(ClientContext &context, const Value &column_value) {
Expand Down
10 changes: 9 additions & 1 deletion src/duckdb/extension/parquet/parquet_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ void ParquetWriter::FlushRowGroup(PreparedRowGroup &prepared) {
row_group.__isset.total_compressed_size = true;

if (encryption_config) {
auto row_group_ordinal = num_row_groups.load();
const auto row_group_ordinal = file_meta_data.row_groups.size();
if (row_group_ordinal > std::numeric_limits<int16_t>::max()) {
throw InvalidInputException("RowGroup ordinal exceeds 32767 when encryption enabled");
}
Expand All @@ -562,6 +562,14 @@ void ParquetWriter::Flush(ColumnDataCollection &buffer) {
return;
}

// "total_written" is only used for the FILE_SIZE_BYTES flag, and only when threads are writing in parallel.
// We pre-emptively increase it here to try to reduce overshooting when many threads are writing in parallel.
// However, waiting for the exact value (PrepareRowGroup) takes too long, and would cause overshoots to happen.
// So, we guess the compression ratio. We guess 3x, but this will be off depending on the data.
// "total_written" is restored to the exact number of written bytes at the end of FlushRowGroup.
// PhysicalCopyToFile should be reworked to use prepare/flush batch separately for better accuracy.
total_written += buffer.SizeInBytes() / 2;

PreparedRowGroup prepared_row_group;
PrepareRowGroup(buffer, prepared_row_group);
buffer.Reset();
Expand Down
Loading