Skip to content

Commit fa87659

Browse files
authored
Merge pull request ClickHouse#89332 from ClickHouse/allow_statistics_optimize-by-default
Enable `allow_statistics_optimize` by default
2 parents 1a06f58 + aa1aa70 commit fa87659

12 files changed

+27
-19
lines changed

src/Core/Settings.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7188,9 +7188,9 @@ The maximum number of rows in the right table to determine whether to rerange th
71887188
If it is set to true, and the conditions of `join_to_sort_minimum_perkey_rows` and `join_to_sort_maximum_table_rows` are met, rerange the right table by key to improve the performance in left or inner hash join.
71897189
)", EXPERIMENTAL) \
71907190
\
7191-
DECLARE_WITH_ALIAS(Bool, allow_statistics_optimize, false, R"(
7191+
DECLARE_WITH_ALIAS(Bool, allow_statistics_optimize, true, R"(
71927192
Allows using statistics to optimize queries
7193-
)", EXPERIMENTAL, allow_statistic_optimize) \
7193+
)", BETA, allow_statistic_optimize) \
71947194
DECLARE_WITH_ALIAS(Bool, allow_experimental_statistics, false, R"(
71957195
Allows defining columns with [statistics](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-creating-a-table) and [manipulate statistics](../../engines/table-engines/mergetree-family/mergetree.md/#column-statistics).
71967196
)", EXPERIMENTAL, allow_experimental_statistic) \

src/Core/SettingsChangesHistory.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
4646
{"query_plan_optimize_join_order_limit", 1, 10, "Allow JOIN reordering with more tables by default"},
4747
{"iceberg_insert_max_partitions", 100, 100, "New setting."},
4848
{"use_paimon_partition_pruning", false, false, "New setting."},
49+
{"allow_statistics_optimize", false, true, "Enable this optimization by default."},
50+
{"allow_statistic_optimize", false, true, "Enable this optimization by default."},
4951
{"query_plan_text_index_add_hint", true, true, "New setting"},
5052
{"text_index_hint_max_selectivity", 0.2, 0.2, "New setting"},
5153
{"allow_experimental_time_time64_type", false, true, "Enable Time and Time64 type by default"},

src/Storages/Statistics/ConditionSelectivityEstimator.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,9 @@ RelationProfile ConditionSelectivityEstimator::estimateRelationProfileImpl(std::
127127
auto* final_element = rpn_stack.top();
128128
final_element->finalize(column_estimators);
129129
RelationProfile result;
130-
result.rows = static_cast<UInt64>(final_element->selectivity * total_rows);
130+
Float64 final_rows = final_element->selectivity * total_rows;
131+
final_rows = std::max<Float64>(final_rows, 0);
132+
result.rows = static_cast<UInt64>(final_rows);
131133
for (const auto & [column_name, estimator] : column_estimators)
132134
{
133135
UInt64 cardinality = std::min(result.rows, estimator.estimateCardinality());
@@ -259,7 +261,7 @@ bool ConditionSelectivityEstimator::extractAtomFromTree(const StorageMetadataPtr
259261
column_type = removeLowCardinalityAndNullable(column_desc->type);
260262
}
261263
/// In some cases we need to cast the type of const
262-
bool cast_not_needed = !column_type ||
264+
bool cast_not_needed = !column_type || !const_type ||
263265
((isNativeInteger(column_type) || isDateTime(column_type))
264266
&& (isNativeInteger(const_type) || isDateTime(const_type)));
265267

@@ -347,7 +349,9 @@ Float64 ConditionSelectivityEstimator::ColumnEstimator::estimateRanges(const Pla
347349
/// In case that there is an empty statistics.
348350
if (stats->rowCount() == 0)
349351
return 0;
350-
return result / stats->rowCount();
352+
Float64 selectivity = result / stats->rowCount();
353+
selectivity = std::max<Float64>(selectivity, 0);
354+
return selectivity;
351355
}
352356

353357
UInt64 ConditionSelectivityEstimator::ColumnEstimator::estimateCardinality() const

src/Storages/Statistics/Statistics.cpp

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <Common/FieldVisitorConvertToNumber.h>
55
#include <Common/logger_useful.h>
66
#include <DataTypes/DataTypesNumber.h>
7+
#include <Functions/FunctionFactory.h>
78
#include <IO/ReadHelpers.h>
89
#include <IO/WriteHelpers.h>
910
#include <Interpreters/convertFieldToType.h>
@@ -44,19 +45,14 @@ std::optional<Float64> StatisticsUtils::tryConvertToFloat64(const Field & value,
4445
if (!data_type->isValueRepresentedByNumber())
4546
return {};
4647

47-
Field value_converted;
48-
if (isInteger(data_type) && !isBool(data_type))
49-
/// For case val_int32 < 10.5 or val_int32 < '10.5' we should convert 10.5 to Float64.
50-
value_converted = convertFieldToType(value, DataTypeFloat64());
51-
else
52-
/// We should convert value to the real column data type and then translate it to Float64.
53-
/// For example for expression col_date > '2024-08-07', if we directly convert '2024-08-07' to Float64, we will get null.
54-
value_converted = convertFieldToType(value, *data_type);
48+
auto column = data_type->createColumn();
49+
column->insert(value);
50+
ColumnsWithTypeAndName arguments({ColumnWithTypeAndName(std::move(column), data_type, "stats_const")});
5551

56-
if (value_converted.isNull())
57-
return {};
58-
59-
return applyVisitor(FieldVisitorConvertToNumber<Float64>(), value_converted);
52+
auto cast_resolver = FunctionFactory::instance().get("toFloat64", nullptr);
53+
auto cast_function = cast_resolver->build(arguments);
54+
ColumnPtr result = cast_function->execute(arguments, std::make_shared<DataTypeFloat64>(), 1, false);
55+
return result->getFloat64(0);
6056
}
6157

6258
IStatistics::IStatistics(const SingleStatisticsDescription & stat_)

src/Storages/Statistics/StatisticsCountMinSketch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const
3838
/// For example: if data_type is Int32:
3939
/// 1. For 1.0, 1, '1', return Field(1)
4040
/// 2. For 1.1, max_value_int64, return null
41-
Field val_converted = convertFieldToType(val, *data_type);
41+
Field val_converted = convertFieldToType(val, *data_type, data_type.get());
4242
if (val_converted.isNull())
4343
return 0;
4444

tests/queries/0_stateless/01763_filter_push_down_bugs.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
SET allow_statistics_optimize = 0;
12
SELECT * FROM (SELECT col1, col2 FROM (select '1' as col1, '2' as col2) GROUP by col1, col2) AS expr_qry WHERE col2 != '';
23
SELECT * FROM (SELECT materialize('1') AS s1, materialize('2') AS s2 GROUP BY s1, s2) WHERE s2 = '2';
34
SELECT * FROM (SELECT materialize([1]) AS s1, materialize('2') AS s2 GROUP BY s1, s2) WHERE s2 = '2';

tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
SET optimize_move_to_prewhere = 1;
22
SET enable_multiple_prewhere_read_steps = 1;
33
SET optimize_functions_to_subcolumns = 0;
4+
SET allow_statistics_optimize = 0;
45

56
DROP TABLE IF EXISTS t_02848_mt1;
67
DROP TABLE IF EXISTS t_02848_mt2;

tests/queries/0_stateless/03279_join_choose_build_table.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
1+
SET allow_statistics_optimize = 0;
22
SET use_skip_indexes_on_data_read = 0; -- for correct row count estimation in join order planning
33
DROP TABLE IF EXISTS products;
44
DROP TABLE IF EXISTS sales;

tests/queries/0_stateless/03357_join_pk_sharding.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
-- Tags: long
22

3+
SET allow_statistics_optimize = 0;
34
drop table if exists tab_l;
45
drop table if exists tab_m;
56
drop table if exists tab_r;

tests/queries/0_stateless/03591_optimize_prewhere_row_policy.reference

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
SET use_query_condition_cache = 0;
44
SET enable_parallel_replicas = 0;
5+
SET allow_statistics_optimize = 0;
56
DROP TABLE IF EXISTS 03591_test;
67
DROP ROW POLICY IF EXISTS 03591_rp ON 03591_test;
78
CREATE TABLE 03591_test (a Int32, b Int32) ENGINE=MergeTree ORDER BY tuple();

0 commit comments

Comments
 (0)