From 1efeb75fd83572f55baeb815231e4473e4e11028 Mon Sep 17 00:00:00 2001 From: Gabor Pali Date: Mon, 17 Apr 2023 22:33:47 +0200 Subject: [PATCH 01/11] Remove explicit import --- src/mango/src/mango_idx_text.erl | 133 ++++++++++++-------------- src/mango/src/mango_selector_text.erl | 130 +++++++++++-------------- 2 files changed, 120 insertions(+), 143 deletions(-) diff --git a/src/mango/src/mango_idx_text.erl b/src/mango/src/mango_idx_text.erl index db8af795b9f..21b818288a9 100644 --- a/src/mango/src/mango_idx_text.erl +++ b/src/mango/src/mango_idx_text.erl @@ -29,10 +29,6 @@ -include("mango.hrl"). -include("mango_idx.hrl"). --ifdef(TEST). --import(test_util, [as_selector/1]). --endif. - validate_new(#idx{} = Idx, Db) -> {ok, Def} = do_validate(Idx#idx.def), maybe_reject_index_all_req(Def, Db), @@ -476,76 +472,73 @@ warn_index_all({Idx, Db}) -> ?assertThrow({test_error, logged_warning}, validate_new(Idx, Db)) end). +indexable(Selector) -> + indexable_fields(test_util:as_selector(Selector)). + indexable_fields_test() -> ?assertEqual( [<<"$default">>, <<"field1:boolean">>, <<"field2:number">>, <<"field3:string">>], - indexable_fields( - as_selector( - #{ - <<"$default">> => #{<<"$text">> => <<"text">>}, - <<"field1">> => true, - <<"field2">> => 42, - <<"field3">> => #{<<"$regex">> => <<".*">>} - } - ) + indexable( + #{ + <<"$default">> => #{<<"$text">> => <<"text">>}, + <<"field1">> => true, + <<"field2">> => 42, + <<"field3">> => #{<<"$regex">> => <<".*">>} + } ) ), ?assertEqual( [<<"f1:string">>, <<"f2:string">>, <<"f3:string">>, <<"f4:string">>, <<"f5:string">>], lists:sort( - indexable_fields( - as_selector( - #{ - <<"$and">> => - [ - #{<<"f1">> => <<"v1">>}, - #{<<"f2">> => <<"v2">>} - ], - <<"$or">> => - [ - #{<<"f3">> => <<"v3">>}, - #{<<"f4">> => <<"v4">>} - ], - <<"$not">> => #{<<"f5">> => <<"v5">>} - } - ) - ) - ) - ), - - ?assertEqual( - [], - indexable_fields( - as_selector( - #{ - <<"field1">> => null, - <<"field2">> => #{<<"$size">> => 3}, - <<"field3">> => #{<<"$type">> => <<"type">>} - } - ) - ) - ), - ?assertEqual( - [], - indexable_fields( - as_selector( + indexable( #{ <<"$and">> => [ - #{<<"f1">> => null}, - #{<<"f2">> => null} + #{<<"f1">> => <<"v1">>}, + #{<<"f2">> => <<"v2">>} ], <<"$or">> => [ - #{<<"f3">> => null}, - #{<<"f4">> => null} + #{<<"f3">> => <<"v3">>}, + #{<<"f4">> => <<"v4">>} ], - <<"$not">> => #{<<"f5">> => null} + <<"$not">> => #{<<"f5">> => <<"v5">>} } ) ) + ), + ?assertEqual( + [], + indexable( + #{ + <<"field1">> => null, + <<"field2">> => #{<<"$size">> => 3}, + <<"field3">> => #{<<"$type">> => <<"type">>} + } + ) + ), + ?assertEqual( + [], + indexable( + #{ + <<"$and">> => + [ + #{<<"f1">> => null}, + #{<<"f2">> => null} + ], + <<"$or">> => + [ + #{<<"f3">> => null}, + #{<<"f4">> => null} + ], + <<"$not">> => #{<<"f5">> => null} + } + ) ). +usable(Index, Selector, Fields) -> + is_usable(Index, test_util:as_selector(Selector), Fields). + is_usable_test() -> ?assertNot(is_usable(undefined, {[]}, undefined)), @@ -555,38 +548,38 @@ is_usable_test() -> Field1 = {[{<<"name">>, <<"field1">>}, {<<"type">>, <<"string">>}]}, Field2 = {[{<<"name">>, <<"field2">>}, {<<"type">>, <<"number">>}]}, Index = #idx{def = {[{<<"fields">>, [Field1, Field2]}]}}, - ?assert(is_usable(Index, as_selector(#{<<"field1">> => <<"value">>}), undefined)), - ?assertNot(is_usable(Index, as_selector(#{<<"field1">> => 42}), undefined)), - ?assertNot(is_usable(Index, as_selector(#{<<"field3">> => true}), undefined)), + ?assert(usable(Index, #{<<"field1">> => <<"value">>}, undefined)), + ?assertNot(usable(Index, #{<<"field1">> => 42}, undefined)), + ?assertNot(usable(Index, #{<<"field3">> => true}, undefined)), ?assert( - is_usable(Index, as_selector(#{<<"field1">> => #{<<"$type">> => <<"string">>}}), undefined) + usable(Index, #{<<"field1">> => #{<<"$type">> => <<"string">>}}, undefined) ), ?assert( - is_usable(Index, as_selector(#{<<"field1">> => #{<<"$type">> => <<"boolean">>}}), undefined) + usable(Index, #{<<"field1">> => #{<<"$type">> => <<"boolean">>}}, undefined) ), ?assert( - is_usable(Index, as_selector(#{<<"field3">> => #{<<"$type">> => <<"boolean">>}}), undefined) + usable(Index, #{<<"field3">> => #{<<"$type">> => <<"boolean">>}}, undefined) ), - ?assert(is_usable(Index, as_selector(#{<<"field1">> => #{<<"$exists">> => true}}), undefined)), - ?assert(is_usable(Index, as_selector(#{<<"field1">> => #{<<"$exists">> => false}}), undefined)), - ?assert(is_usable(Index, as_selector(#{<<"field3">> => #{<<"$exists">> => true}}), undefined)), - ?assert(is_usable(Index, as_selector(#{<<"field3">> => #{<<"$exists">> => false}}), undefined)), + ?assert(usable(Index, #{<<"field1">> => #{<<"$exists">> => true}}, undefined)), + ?assert(usable(Index, #{<<"field1">> => #{<<"$exists">> => false}}, undefined)), + ?assert(usable(Index, #{<<"field3">> => #{<<"$exists">> => true}}, undefined)), + ?assert(usable(Index, #{<<"field3">> => #{<<"$exists">> => false}}, undefined)), ?assert( - is_usable(Index, as_selector(#{<<"field1">> => #{<<"$regex">> => <<".*">>}}), undefined) + usable(Index, #{<<"field1">> => #{<<"$regex">> => <<".*">>}}, undefined) ), ?assertNot( - is_usable(Index, as_selector(#{<<"field2">> => #{<<"$regex">> => <<".*">>}}), undefined) + usable(Index, #{<<"field2">> => #{<<"$regex">> => <<".*">>}}, undefined) ), ?assertNot( - is_usable(Index, as_selector(#{<<"field3">> => #{<<"$regex">> => <<".*">>}}), undefined) + usable(Index, #{<<"field3">> => #{<<"$regex">> => <<".*">>}}, undefined) ), ?assertNot( - is_usable(Index, as_selector(#{<<"field1">> => #{<<"$nin">> => [1, 2, 3]}}), undefined) + usable(Index, #{<<"field1">> => #{<<"$nin">> => [1, 2, 3]}}, undefined) ), ?assert( - is_usable(Index, as_selector(#{<<"field2">> => #{<<"$nin">> => [1, 2, 3]}}), undefined) + usable(Index, #{<<"field2">> => #{<<"$nin">> => [1, 2, 3]}}, undefined) ), ?assertNot( - is_usable(Index, as_selector(#{<<"field3">> => #{<<"$nin">> => [1, 2, 3]}}), undefined) + usable(Index, #{<<"field3">> => #{<<"$nin">> => [1, 2, 3]}}, undefined) ). -endif. diff --git a/src/mango/src/mango_selector_text.erl b/src/mango/src/mango_selector_text.erl index 0d18516c9a3..1f8609ac27b 100644 --- a/src/mango/src/mango_selector_text.erl +++ b/src/mango/src/mango_selector_text.erl @@ -21,10 +21,6 @@ -include("mango.hrl"). --ifdef(TEST). --import(test_util, [as_selector/1]). --endif. - %% Regex for <<"\\.">> -define(PERIOD, "\\."). @@ -441,22 +437,25 @@ replace_array_indexes([Part | Rest], NewPartsAcc, HasIntAcc) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +convert_selector(Selector) -> + convert([], test_util:as_selector(Selector)). + convert_fields_test() -> ?assertEqual( {op_null, {[[<<"field">>], <<":">>, <<"null">>], <<"true">>}}, - convert([], as_selector(#{<<"field">> => null})) + convert_selector(#{<<"field">> => null}) ), ?assertEqual( {op_field, {[[<<"field">>], <<":">>, <<"boolean">>], <<"true">>}}, - convert([], as_selector(#{<<"field">> => true})) + convert_selector(#{<<"field">> => true}) ), ?assertEqual( {op_field, {[[<<"field">>], <<":">>, <<"number">>], <<"42">>}}, - convert([], as_selector(#{<<"field">> => 42})) + convert_selector(#{<<"field">> => 42}) ), ?assertEqual( {op_field, {[[<<"field">>], <<":">>, <<"string">>], <<"\"value\"">>}}, - convert([], as_selector(#{<<"field">> => <<"value">>})) + convert_selector(#{<<"field">> => <<"value">>}) ), ?assertEqual( {op_and, [ @@ -465,74 +464,74 @@ convert_fields_test() -> {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"number">>], <<"2">>}}, {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"number">>], <<"3">>}} ]}, - convert([], as_selector(#{<<"field">> => [1, 2, 3]})) + convert_selector(#{<<"field">> => [1, 2, 3]}) ), ?assertEqual( {op_field, { [[<<"field1">>, <<".">>, <<"field2">>], <<":">>, <<"string">>], <<"\"value\"">> }}, - convert([], as_selector(#{<<"field1">> => #{<<"field2">> => <<"value">>}})) + convert_selector(#{<<"field1">> => #{<<"field2">> => <<"value">>}}) ), ?assertEqual( {op_and, [ {op_field, {[[<<"field2">>], <<":">>, <<"string">>], <<"\"value2\"">>}}, {op_field, {[[<<"field1">>], <<":">>, <<"string">>], <<"\"value1\"">>}} ]}, - convert([], as_selector(#{<<"field1">> => <<"value1">>, <<"field2">> => <<"value2">>})) + convert_selector(#{<<"field1">> => <<"value1">>, <<"field2">> => <<"value2">>}) ). convert_default_test() -> ?assertEqual( {op_default, <<"\"text\"">>}, - convert([], as_selector(#{<<"$default">> => #{<<"$text">> => <<"text">>}})) + convert_selector(#{<<"$default">> => #{<<"$text">> => <<"text">>}}) ). convert_lt_test() -> ?assertEqual( {op_field, {[[<<"field">>], <<":">>, <<"number">>], [<<"[-Infinity TO ">>, <<"42">>, <<"}">>]}}, - convert([], as_selector(#{<<"field">> => #{<<"$lt">> => 42}})) + convert_selector(#{<<"field">> => #{<<"$lt">> => 42}}) ), ?assertEqual( {op_or, [ {op_fieldname, {[[<<"field">>], ":"], "*"}}, {op_fieldname, {[[<<"field">>]], ".*"}} ]}, - convert([], as_selector(#{<<"field">> => #{<<"$lt">> => [1, 2, 3]}})) + convert_selector(#{<<"field">> => #{<<"$lt">> => [1, 2, 3]}}) ), ?assertEqual( {op_or, [ {op_fieldname, {[[<<"field">>], ":"], "*"}}, {op_fieldname, {[[<<"field">>]], ".*"}} ]}, - convert([], as_selector(#{<<"field">> => #{<<"$lt">> => null}})) + convert_selector(#{<<"field">> => #{<<"$lt">> => null}}) ). convert_lte_test() -> ?assertEqual( {op_field, {[[<<"field">>], <<":">>, <<"number">>], [<<"[-Infinity TO ">>, <<"42">>, <<"]">>]}}, - convert([], as_selector(#{<<"field">> => #{<<"$lte">> => 42}})) + convert_selector(#{<<"field">> => #{<<"$lte">> => 42}}) ), ?assertEqual( {op_or, [ {op_fieldname, {[[<<"field">>], ":"], "*"}}, {op_fieldname, {[[<<"field">>]], ".*"}} ]}, - convert([], as_selector(#{<<"field">> => #{<<"$lte">> => [1, 2, 3]}})) + convert_selector(#{<<"field">> => #{<<"$lte">> => [1, 2, 3]}}) ), ?assertEqual( {op_or, [ {op_fieldname, {[[<<"field">>], ":"], "*"}}, {op_fieldname, {[[<<"field">>]], ".*"}} ]}, - convert([], as_selector(#{<<"field">> => #{<<"$lte">> => null}})) + convert_selector(#{<<"field">> => #{<<"$lte">> => null}}) ). convert_eq_test() -> ?assertEqual( {op_field, {[[<<"field">>], <<":">>, <<"number">>], <<"42">>}}, - convert([], as_selector(#{<<"field">> => #{<<"$eq">> => 42}})) + convert_selector(#{<<"field">> => #{<<"$eq">> => 42}}) ), ?assertEqual( {op_and, [ @@ -541,11 +540,11 @@ convert_eq_test() -> {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"number">>], <<"2">>}}, {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"number">>], <<"3">>}} ]}, - convert([], as_selector(#{<<"field">> => #{<<"$eq">> => [1, 2, 3]}})) + convert_selector(#{<<"field">> => #{<<"$eq">> => [1, 2, 3]}}) ), ?assertEqual( {op_null, {[[<<"field">>], <<":">>, <<"null">>], <<"true">>}}, - convert([], as_selector(#{<<"field">> => #{<<"$eq">> => null}})) + convert_selector(#{<<"field">> => #{<<"$eq">> => null}}) ). convert_ne_test() -> @@ -557,49 +556,49 @@ convert_ne_test() -> ]}, {op_field, {[[<<"field">>], <<":">>, <<"number">>], <<"42">>}} }}, - convert([], as_selector(#{<<"field">> => #{<<"$ne">> => 42}})) + convert_selector(#{<<"field">> => #{<<"$ne">> => 42}}) ). convert_gte_test() -> ?assertEqual( {op_field, {[[<<"field">>], <<":">>, <<"number">>], [<<"[">>, <<"42">>, <<" TO Infinity]">>]}}, - convert([], as_selector(#{<<"field">> => #{<<"$gte">> => 42}})) + convert_selector(#{<<"field">> => #{<<"$gte">> => 42}}) ), ?assertEqual( {op_or, [ {op_fieldname, {[[<<"field">>], ":"], "*"}}, {op_fieldname, {[[<<"field">>]], ".*"}} ]}, - convert([], as_selector(#{<<"field">> => #{<<"$gte">> => [1, 2, 3]}})) + convert_selector(#{<<"field">> => #{<<"$gte">> => [1, 2, 3]}}) ), ?assertEqual( {op_or, [ {op_fieldname, {[[<<"field">>], ":"], "*"}}, {op_fieldname, {[[<<"field">>]], ".*"}} ]}, - convert([], as_selector(#{<<"field">> => #{<<"$gte">> => null}})) + convert_selector(#{<<"field">> => #{<<"$gte">> => null}}) ). convert_gt_test() -> ?assertEqual( {op_field, {[[<<"field">>], <<":">>, <<"number">>], [<<"{">>, <<"42">>, <<" TO Infinity]">>]}}, - convert([], as_selector(#{<<"field">> => #{<<"$gt">> => 42}})) + convert_selector(#{<<"field">> => #{<<"$gt">> => 42}}) ), ?assertEqual( {op_or, [ {op_fieldname, {[[<<"field">>], ":"], "*"}}, {op_fieldname, {[[<<"field">>]], ".*"}} ]}, - convert([], as_selector(#{<<"field">> => #{<<"$gt">> => [1, 2, 3]}})) + convert_selector(#{<<"field">> => #{<<"$gt">> => [1, 2, 3]}}) ), ?assertEqual( {op_or, [ {op_fieldname, {[[<<"field">>], ":"], "*"}}, {op_fieldname, {[[<<"field">>]], ".*"}} ]}, - convert([], as_selector(#{<<"field">> => #{<<"$gt">> => null}})) + convert_selector(#{<<"field">> => #{<<"$gt">> => null}}) ). convert_all_test() -> @@ -612,37 +611,31 @@ convert_all_test() -> [[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"string">>], <<"\"value2\"">> }} ]}, - convert([], as_selector(#{<<"field">> => #{<<"$all">> => [<<"value1">>, <<"value2">>]}})) + convert_selector(#{<<"field">> => #{<<"$all">> => [<<"value1">>, <<"value2">>]}}) ). convert_elemMatch_test() -> ?assertEqual( {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"string">>], <<"\"value\"">>}}, - convert( - [], as_selector(#{<<"field">> => #{<<"$elemMatch">> => #{<<"$eq">> => <<"value">>}}}) - ) + convert_selector(#{<<"field">> => #{<<"$elemMatch">> => #{<<"$eq">> => <<"value">>}}}) ). convert_allMatch_test() -> ?assertEqual( {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"string">>], <<"\"value\"">>}}, - convert( - [], as_selector(#{<<"field">> => #{<<"$allMatch">> => #{<<"$eq">> => <<"value">>}}}) - ) + convert_selector(#{<<"field">> => #{<<"$allMatch">> => #{<<"$eq">> => <<"value">>}}}) ). convert_keyMapMatch_test() -> ?assertThrow( {mango_error, mango_selector_text, {invalid_operator, <<"$keyMapMatch">>}}, - convert( - [], as_selector(#{<<"field">> => #{<<"$keyMapMatch">> => #{<<"key">> => <<"value">>}}}) - ) + convert_selector(#{<<"field">> => #{<<"$keyMapMatch">> => #{<<"key">> => <<"value">>}}}) ). convert_in_test() -> ?assertEqual( {op_or, []}, - convert([], as_selector(#{<<"field">> => #{<<"$in">> => []}})) + convert_selector(#{<<"field">> => #{<<"$in">> => []}}) ), ?assertEqual( {op_or, [ @@ -659,7 +652,7 @@ convert_in_test() -> }} ]} ]}, - convert([], as_selector(#{<<"field">> => #{<<"$in">> => [<<"value1">>, <<"value2">>]}})) + convert_selector(#{<<"field">> => #{<<"$in">> => [<<"value1">>, <<"value2">>]}}) ). convert_nin_test() -> @@ -671,7 +664,7 @@ convert_nin_test() -> ]}, {op_or, []} }}, - convert([], as_selector(#{<<"field">> => #{<<"$nin">> => []}})) + convert_selector(#{<<"field">> => #{<<"$nin">> => []}}) ), ?assertEqual( {op_not, { @@ -690,7 +683,7 @@ convert_nin_test() -> ]} ]} }}, - convert([], as_selector(#{<<"field">> => #{<<"$nin">> => [1, 2]}})) + convert_selector(#{<<"field">> => #{<<"$nin">> => [1, 2]}}) ). convert_exists_test() -> @@ -699,7 +692,7 @@ convert_exists_test() -> {op_fieldname, {[[<<"field">>], ":"], "*"}}, {op_fieldname, {[[<<"field">>]], ".*"}} ]}, - convert([], as_selector(#{<<"field">> => #{<<"$exists">> => true}})) + convert_selector(#{<<"field">> => #{<<"$exists">> => true}}) ), ?assertEqual( {op_not, { @@ -709,7 +702,7 @@ convert_exists_test() -> ]}, false }}, - convert([], as_selector(#{<<"field">> => #{<<"$exists">> => false}})) + convert_selector(#{<<"field">> => #{<<"$exists">> => false}}) ). convert_type_test() -> @@ -718,25 +711,25 @@ convert_type_test() -> {op_fieldname, {[[<<"field">>], ":"], "*"}}, {op_fieldname, {[[<<"field">>]], ".*"}} ]}, - convert([], as_selector(#{<<"field">> => #{<<"$type">> => <<"string">>}})) + convert_selector(#{<<"field">> => #{<<"$type">> => <<"string">>}}) ). convert_mod_test() -> ?assertEqual( {op_fieldname, {[[<<"field">>], ":"], "number"}}, - convert([], as_selector(#{<<"field">> => #{<<"$mod">> => [2, 0]}})) + convert_selector(#{<<"field">> => #{<<"$mod">> => [2, 0]}}) ). convert_regex_test() -> ?assertEqual( {op_regex, [<<"field">>]}, - convert([], as_selector(#{<<"field">> => #{<<"$regex">> => <<".*">>}})) + convert_selector(#{<<"field">> => #{<<"$regex">> => <<".*">>}}) ). convert_size_test() -> ?assertEqual( {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":length">>], <<"6">>}}, - convert([], as_selector(#{<<"field">> => #{<<"$size">> => 6}})) + convert_selector(#{<<"field">> => #{<<"$size">> => 6}}) ). convert_not_test() -> @@ -748,57 +741,51 @@ convert_not_test() -> ]}, {op_fieldname, {[[<<"field">>], ":"], "number"}} }}, - convert([], as_selector(#{<<"field">> => #{<<"$not">> => #{<<"$mod">> => [2, 0]}}})) + convert_selector(#{<<"field">> => #{<<"$not">> => #{<<"$mod">> => [2, 0]}}}) ). convert_and_test() -> ?assertEqual( {op_and, []}, - convert([], as_selector(#{<<"$and">> => []})) + convert_selector(#{<<"$and">> => []}) ), ?assertEqual( {op_and, [{op_field, {[[<<"field">>], <<":">>, <<"string">>], <<"\"value\"">>}}]}, - convert([], as_selector(#{<<"$and">> => [#{<<"field">> => <<"value">>}]})) + convert_selector(#{<<"$and">> => [#{<<"field">> => <<"value">>}]}) ), ?assertEqual( {op_and, [ {op_field, {[[<<"field1">>], <<":">>, <<"string">>], <<"\"value1\"">>}}, {op_field, {[[<<"field2">>], <<":">>, <<"string">>], <<"\"value2\"">>}} ]}, - convert( - [], - as_selector(#{ - <<"$and">> => [#{<<"field1">> => <<"value1">>}, #{<<"field2">> => <<"value2">>}] - }) - ) + convert_selector(#{ + <<"$and">> => [#{<<"field1">> => <<"value1">>}, #{<<"field2">> => <<"value2">>}] + }) ). convert_or_test() -> ?assertEqual( {op_or, []}, - convert([], as_selector(#{<<"$or">> => []})) + convert_selector(#{<<"$or">> => []}) ), ?assertEqual( {op_or, [{op_field, {[[<<"field">>], <<":">>, <<"string">>], <<"\"value\"">>}}]}, - convert([], as_selector(#{<<"$or">> => [#{<<"field">> => <<"value">>}]})) + convert_selector(#{<<"$or">> => [#{<<"field">> => <<"value">>}]}) ), ?assertEqual( {op_or, [ {op_field, {[[<<"field1">>], <<":">>, <<"string">>], <<"\"value1\"">>}}, {op_field, {[[<<"field2">>], <<":">>, <<"string">>], <<"\"value2\"">>}} ]}, - convert( - [], - as_selector(#{ - <<"$or">> => [#{<<"field1">> => <<"value1">>}, #{<<"field2">> => <<"value2">>}] - }) - ) + convert_selector(#{ + <<"$or">> => [#{<<"field1">> => <<"value1">>}, #{<<"field2">> => <<"value2">>}] + }) ). convert_nor_test() -> ?assertEqual( {op_and, []}, - convert([], as_selector(#{<<"$nor">> => []})) + convert_selector(#{<<"$nor">> => []}) ), ?assertEqual( {op_and, [ @@ -810,7 +797,7 @@ convert_nor_test() -> {op_field, {[[<<"field">>], <<":">>, <<"string">>], <<"\"value\"">>}} }} ]}, - convert([], as_selector(#{<<"$nor">> => [#{<<"field">> => <<"value">>}]})) + convert_selector(#{<<"$nor">> => [#{<<"field">> => <<"value">>}]}) ), ?assertEqual( {op_and, [ @@ -829,12 +816,9 @@ convert_nor_test() -> {op_field, {[[<<"field2">>], <<":">>, <<"string">>], <<"\"value2\"">>}} }} ]}, - convert( - [], - as_selector(#{ - <<"$nor">> => [#{<<"field1">> => <<"value1">>}, #{<<"field2">> => <<"value2">>}] - }) - ) + convert_selector(#{ + <<"$nor">> => [#{<<"field1">> => <<"value1">>}, #{<<"field2">> => <<"value2">>}] + }) ). to_query_test() -> From 21b79279b61c3b5c26dfbe46461ab632f105bd4e Mon Sep 17 00:00:00 2001 From: Gabor Pali Date: Thu, 16 Mar 2023 20:23:44 +0100 Subject: [PATCH 02/11] mango: introduce support for covering indexes As a performance improvement, shorten the gap between Mango queries and the underlying map-reduce views: try to serve requests without pulling documents from the primary data set, i.e. run the query with `include_docs` set to `false` when there is a chance that it can be "covered" by the chosen index. The rows in the results are then built from the information stored there. Extend the response on the `_explain` endpoint to show information in the `covered` Boolean attribute about the query would be covered by the index or not. Remarks: - This should be a transparent optimization, without any semantical effect on the queries. - Because the main purpose of indexes is to store keys and the document identifiers, the change will only work in cases when the selected fields overlap with those. The chance of being covered could be increased by adding more non-key fields to the index, but that is not in scope here. --- src/mango/src/mango_cursor_view.erl | 86 ++++++++++++++++++++--------- src/mango/src/mango_idx_view.erl | 18 +++++- 2 files changed, 77 insertions(+), 27 deletions(-) diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index a8a255f72db..8e79f608e70 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -39,15 +39,19 @@ % viewcbargs wraps up the arguments that view_cb uses into a single % entry in the mrargs.extra list. We use a Map to allow us to later % add fields without having old messages causing errors/crashes. -viewcbargs_new(Selector, Fields) -> +viewcbargs_new(Selector, Fields, CoveringIndex) -> #{ selector => Selector, - fields => Fields + fields => Fields, + covering_index => CoveringIndex }. + viewcbargs_get(selector, Args) when is_map(Args) -> maps:get(selector, Args, undefined); viewcbargs_get(fields, Args) when is_map(Args) -> - maps:get(fields, Args, undefined). + maps:get(fields, Args, undefined); +viewcbargs_get(covering_index, Args) when is_map(Args) -> + maps:get(covering_index, Args, undefined). create(Db, Indexes, Selector, Opts) -> FieldRanges = mango_idx_view:field_ranges(Selector), @@ -73,13 +77,11 @@ create(Db, Indexes, Selector, Opts) -> bookmark = Bookmark }}. -explain(Cursor) -> - #cursor{ - opts = Opts - } = Cursor, - +explain(#cursor{opts = Opts} = Cursor) -> BaseArgs = base_args(Cursor), - Args = apply_opts(Opts, BaseArgs), + Args0 = apply_opts(Opts, BaseArgs), + #cursor{index = Index, fields = Fields} = Cursor, + Args = consider_index_coverage(Index, Fields, Args0), [ {mrargs, @@ -94,7 +96,8 @@ explain(Cursor) -> {stable, Args#mrargs.stable}, {update, Args#mrargs.update}, {conflicts, Args#mrargs.conflicts} - ]}} + ]}}, + {covered, mango_idx_view:covers(Index, Fields)} ]. % replace internal values that cannot @@ -125,6 +128,13 @@ base_args(#cursor{index = Idx, selector = Selector, fields = Fields} = Cursor) - mango_idx:end_key(Idx, Cursor#cursor.ranges) } end, + CoveringIndex = + case mango_idx_view:covers(Idx, Fields) of + true -> + Idx; + false -> + undefined + end, #mrargs{ view_type = map, reduce = false, @@ -137,7 +147,7 @@ base_args(#cursor{index = Idx, selector = Selector, fields = Fields} = Cursor) - {callback, {?MODULE, view_cb}}, % TODO remove selector. It supports older nodes during version upgrades. {selector, Selector}, - {callback_args, viewcbargs_new(Selector, Fields)}, + {callback_args, viewcbargs_new(Selector, Fields, CoveringIndex)}, {ignore_partition_query_limit, true} ] @@ -157,7 +167,8 @@ execute(#cursor{db = Db, index = Idx, execution_stats = Stats} = Cursor0, UserFu BaseArgs = base_args(Cursor), #cursor{opts = Opts, bookmark = Bookmark} = Cursor, Args0 = apply_opts(Opts, BaseArgs), - Args = mango_json_bookmark:update_args(Bookmark, Args0), + Args1 = consider_index_coverage(Idx, Cursor#cursor.fields, Args0), + Args = mango_json_bookmark:update_args(Bookmark, Args1), UserCtx = couch_util:get_value(user_ctx, Opts, #user_ctx{}), DbOpts = [{user_ctx, UserCtx}], Result = @@ -280,29 +291,25 @@ view_cb({row, Row}, #mrargs{extra = Options} = Acc) -> % or in the new record in `callback_args`. This is to support mid-upgrade % clusters where the non-upgraded coordinator nodes will send the older style. % TODO remove this in a couple of couchdb versions. - {Selector, Fields} = + {Selector, Fields, CoveringIndex} = case couch_util:get_value(callback_args, Options) of % old style undefined -> - {couch_util:get_value(selector, Options), undefined}; + {couch_util:get_value(selector, Options), undefined, undefined}; % new style - assume a viewcbargs Args = #{} -> - {viewcbargs_get(selector, Args), viewcbargs_get(fields, Args)} + { + viewcbargs_get(selector, Args), + viewcbargs_get(fields, Args), + viewcbargs_get(covering_index, Args) + } end, - case ViewRow#view_row.doc of - null -> - maybe_send_mango_ping(); - undefined -> - % include_docs=false. Use quorum fetch at coordinator - ok = rexi:stream2(ViewRow), - set_mango_msg_timestamp(); - Doc -> - % We slightly abuse the doc field in the view response here, + Process = + fun(Doc) -> + % slightly abuse the doc field in the view response here, % because we may return something other than the full document: % we may have projected the requested `fields` from the query. % However, this oddness is confined to being visible in this module. - put(mango_docs_examined, get(mango_docs_examined) + 1), - couch_stats:increment_counter([mango, docs_examined]), case match_and_extract_doc(Doc, Selector, Fields) of {match, FinalDoc} -> FinalViewRow = ViewRow#view_row{doc = FinalDoc}, @@ -311,6 +318,21 @@ view_cb({row, Row}, #mrargs{extra = Options} = Acc) -> {no_match, undefined} -> maybe_send_mango_ping() end + end, + case {ViewRow#view_row.doc, CoveringIndex} of + {null, _} -> + maybe_send_mango_ping(); + {undefined, Index = #idx{}} -> + Doc = derive_doc_from_index(Index, ViewRow), + Process(Doc); + {undefined, _} -> + % include_docs=false. Use quorum fetch at coordinator + ok = rexi:stream2(ViewRow), + set_mango_msg_timestamp(); + {Doc, _} -> + put(mango_docs_examined, get(mango_docs_examined) + 1), + couch_stats:increment_counter([mango, docs_examined]), + Process(Doc) end, {ok, Acc}; view_cb(complete, Acc) -> @@ -338,6 +360,14 @@ match_and_extract_doc(Doc, Selector, Fields) -> {no_match, undefined} end. +derive_doc_from_index(Index, #view_row{id = DocId, key = Keys}) -> + Columns = mango_idx:columns(Index), + lists:foldr( + fun({Column, Key}, Doc) -> mango_doc:set_field(Doc, Column, Key) end, + mango_doc:set_field({[]}, <<"_id">>, DocId), + lists:zip(Columns, Keys) + ). + maybe_send_mango_ping() -> Current = os:timestamp(), LastPing = get(mango_last_msg_timestamp), @@ -482,6 +512,10 @@ apply_opts([{_, _} | Rest], Args) -> % Ignore unknown options apply_opts(Rest, Args). +consider_index_coverage(Index, Fields, #mrargs{include_docs = IncludeDocs0} = Args) -> + IncludeDocs = IncludeDocs0 andalso (not mango_idx_view:covers(Index, Fields)), + Args#mrargs{include_docs = IncludeDocs}. + doc_member_and_extract(Cursor, RowProps) -> Db = Cursor#cursor.db, Opts = Cursor#cursor.opts, diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index ff8f6c6bb17..3ef410e12e3 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -26,7 +26,9 @@ indexable_fields/1, field_ranges/1, - field_ranges/2 + field_ranges/2, + + covers/2 ]). -include_lib("couch/include/couch_db.hrl"). @@ -521,3 +523,17 @@ can_use_sort([Col | RestCols], SortFields, Selector) -> true -> can_use_sort(RestCols, SortFields, Selector); false -> false end. + +% There is no information available about the full set of fields which +% comes the following consequences: an index cannot (reliably) cover +% an "all fields" type of query and nested fields are out of scope. +covers(_, all_fields) -> + false; +covers(Idx, Fields) -> + case mango_idx:def(Idx) of + all_docs -> + false; + _ -> + Available = [<<"_id">> | columns(Idx)], + sets:is_subset(sets:from_list(Fields), sets:from_list(Available)) + end. From 4ed65d588813ba15e4d96b92f9635722cafaf333 Mon Sep 17 00:00:00 2001 From: Gabor Pali Date: Mon, 20 Mar 2023 18:13:14 +0100 Subject: [PATCH 03/11] mango: add type information for better self-documentation --- src/mango/src/mango.hrl | 12 +++++ src/mango/src/mango_cursor_view.erl | 83 ++++++++++++++++++++++++++--- src/mango/src/mango_idx_view.erl | 1 + 3 files changed, 88 insertions(+), 8 deletions(-) diff --git a/src/mango/src/mango.hrl b/src/mango/src/mango.hrl index d50d17b6fb3..2ff07aa4b7e 100644 --- a/src/mango/src/mango.hrl +++ b/src/mango/src/mango.hrl @@ -12,6 +12,8 @@ -define(MANGO_ERROR(R), throw({mango_error, ?MODULE, R})). +-type maybe(A) :: A | undefined. + -type abstract_text_selector() :: {'op_and', [abstract_text_selector()]} | {'op_or', [abstract_text_selector()]} | {'op_not', {abstract_text_selector(), abstract_text_selector()}} @@ -21,3 +23,13 @@ | {'op_null', {_, _}} | {'op_default', _} | {'op_regex', binary()}. + +-type database() :: binary(). +-type field() :: binary(). +-type fields() :: all_fields | [field()]. +-type selector() :: any(). +-type ejson() :: {[{atom(), any()}]}. + +-type shard_stats() :: {docs_examined, non_neg_integer()}. +-type row_property_key() :: id | key | value | doc. +-type row_properties() :: [{row_property_key(), any()}]. diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index 8e79f608e70..80b7fe20550 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -30,15 +30,38 @@ -include_lib("couch_mrview/include/couch_mrview.hrl"). -include_lib("fabric/include/fabric.hrl"). +-include("mango.hrl"). -include("mango_cursor.hrl"). -include("mango_idx.hrl"). -include("mango_idx_view.hrl"). -define(HEARTBEAT_INTERVAL_IN_USEC, 4000000). +-type cursor_options() :: [{term(), term()}]. +-type message() :: + {meta, _} + | {row, row_properties()} + | {execution_stats, shard_stats()} + | {stop, #cursor{}} + | {complete, #cursor{}} + | {error, any()}. + % viewcbargs wraps up the arguments that view_cb uses into a single % entry in the mrargs.extra list. We use a Map to allow us to later % add fields without having old messages causing errors/crashes. + +-type viewcbargs() :: + #{ + selector => selector(), + fields => fields(), + covering_index => maybe(#idx{}) + }. + +-spec viewcbargs_new(Selector, Fields, CoveringIndex) -> ViewCBArgs when + Selector :: selector(), + Fields :: fields(), + CoveringIndex :: maybe(#idx{}), + ViewCBArgs :: viewcbargs(). viewcbargs_new(Selector, Fields, CoveringIndex) -> #{ selector => Selector, @@ -46,6 +69,9 @@ viewcbargs_new(Selector, Fields, CoveringIndex) -> covering_index => CoveringIndex }. +-spec viewcbargs_get(Key, Args) -> maybe(term()) when + Key :: selector | fields | covering_index, + Args :: viewcbargs(). viewcbargs_get(selector, Args) when is_map(Args) -> maps:get(selector, Args, undefined); viewcbargs_get(fields, Args) when is_map(Args) -> @@ -53,6 +79,11 @@ viewcbargs_get(fields, Args) when is_map(Args) -> viewcbargs_get(covering_index, Args) when is_map(Args) -> maps:get(covering_index, Args, undefined). +-spec create(Db, Indexes, Selector, Options) -> {ok, #cursor{}} when + Db :: database(), + Indexes :: [#idx{}], + Selector :: selector(), + Options :: cursor_options(). create(Db, Indexes, Selector, Opts) -> FieldRanges = mango_idx_view:field_ranges(Selector), Composited = composite_indexes(Indexes, FieldRanges), @@ -77,6 +108,7 @@ create(Db, Indexes, Selector, Opts) -> bookmark = Bookmark }}. +-spec explain(#cursor{}) -> nonempty_list(term()). explain(#cursor{opts = Opts} = Cursor) -> BaseArgs = base_args(Cursor), Args0 = apply_opts(Opts, BaseArgs), @@ -117,6 +149,7 @@ maybe_replace_max_json([H | T] = EndKey) when is_list(EndKey) -> maybe_replace_max_json(EndKey) -> EndKey. +-spec base_args(#cursor{}) -> #mrargs{}. base_args(#cursor{index = Idx, selector = Selector, fields = Fields} = Cursor) -> {StartKey, EndKey} = case Cursor#cursor.ranges of @@ -153,6 +186,10 @@ base_args(#cursor{index = Idx, selector = Selector, fields = Fields} = Cursor) - ] }. +-spec execute(#cursor{}, UserFunction, UserAccumulator) -> Result when + UserFunction :: fun(), + UserAccumulator :: any(), + Result :: {ok, UserAccumulator} | {error, any()}. execute(#cursor{db = Db, index = Idx, execution_stats = Stats} = Cursor0, UserFun, UserAcc) -> Cursor = Cursor0#cursor{ user_fun = UserFun, @@ -201,11 +238,15 @@ execute(#cursor{db = Db, index = Idx, execution_stats = Stats} = Cursor0, UserFu end end. +-type comparator() :: '$lt' | '$lte' | '$eq' | '$gte' | '$gt'. +-type range() :: {comparator(), any(), comparator(), any()} | empty. + % Any of these indexes may be a composite index. For each % index find the most specific set of fields for each % index. Ie, if an index has columns a, b, c, d, then % check FieldRanges for a, b, c, and d and return % the longest prefix of columns found. +-spec composite_indexes([#idx{}], [{field(), range()}]) -> [{#idx{}, [range()], integer()}]. composite_indexes(Indexes, FieldRanges) -> lists:foldl( fun(Idx, Acc) -> @@ -221,6 +262,7 @@ composite_indexes(Indexes, FieldRanges) -> Indexes ). +-spec composite_prefix([field()], [{field(), range()}]) -> [range()]. composite_prefix([], _) -> []; composite_prefix([Col | Rest], Ranges) -> @@ -242,9 +284,6 @@ composite_prefix([Col | Rest], Ranges) -> % In the future we can look into doing a cached parallel % reduce view read on each index with the ranges to find % the one that has the fewest number of rows or something. --type comparator() :: '$lt' | '$lte' | '$eq' | '$gte' | '$gt'. --type range() :: {comparator(), any(), comparator(), any()} | empty. - -spec choose_best_index(IndexRanges) -> Selection when IndexRanges :: nonempty_list({#idx{}, [range()], integer()}), Selection :: {#idx{}, [range()]}. @@ -275,6 +314,11 @@ choose_best_index(IndexRanges) -> {SelectedIndex, SelectedIndexRanges, _} = hd(lists:sort(Cmp, IndexRanges)), {SelectedIndex, SelectedIndexRanges}. +-spec view_cb + (Message, #mrargs{}) -> Response when + Message :: {meta, any()} | {row, row_properties()} | complete, + Response :: {ok, #mrargs{}}; + (ok, ddoc_updated) -> any(). view_cb({meta, Meta}, Acc) -> % Map function starting put(mango_docs_examined, 0), @@ -346,11 +390,11 @@ view_cb(ok, ddoc_updated) -> %% match_and_extract_doc checks whether Doc matches Selector. If it does, %% extract Fields and return {match, FinalDoc}; otherwise return {no_match, undefined}. --spec match_and_extract_doc( - Doc :: term(), - Selector :: term(), - Fields :: [string()] | undefined | all_fields -) -> {match | no_match, term() | undefined}. +-spec match_and_extract_doc(Doc, Selector, Fields) -> Result when + Doc :: ejson(), + Selector :: selector(), + Fields :: maybe(fields()), + Result :: {match, term()} | {no_match, undefined}. match_and_extract_doc(Doc, Selector, Fields) -> case mango_selector:match(Selector, Doc) of true -> @@ -360,6 +404,7 @@ match_and_extract_doc(Doc, Selector, Fields) -> {no_match, undefined} end. +-spec derive_doc_from_index(#idx{}, #view_row{}) -> term(). derive_doc_from_index(Index, #view_row{id = DocId, key = Keys}) -> Columns = mango_idx:columns(Index), lists:foldr( @@ -368,6 +413,7 @@ derive_doc_from_index(Index, #view_row{id = DocId, key = Keys}) -> lists:zip(Columns, Keys) ). +-spec maybe_send_mango_ping() -> ok | term(). maybe_send_mango_ping() -> Current = os:timestamp(), LastPing = get(mango_last_msg_timestamp), @@ -381,9 +427,14 @@ maybe_send_mango_ping() -> set_mango_msg_timestamp() end. +-spec set_mango_msg_timestamp() -> term(). set_mango_msg_timestamp() -> put(mango_last_msg_timestamp, os:timestamp()). +-spec handle_message(message(), #cursor{}) -> Response when + Response :: + {ok, #cursor{}} + | {error, any()}. handle_message({meta, _}, Cursor) -> {ok, Cursor}; handle_message({row, Props}, Cursor) -> @@ -414,6 +465,10 @@ handle_message(complete, Cursor) -> handle_message({error, Reason}, _Cursor) -> {error, Reason}. +-spec handle_all_docs_message(message(), #cursor{}) -> Response when + Response :: + {ok, #cursor{}} + | {error, any()}. handle_all_docs_message({row, Props}, Cursor) -> case is_design_doc(Props) of true -> {ok, Cursor}; @@ -422,6 +477,8 @@ handle_all_docs_message({row, Props}, Cursor) -> handle_all_docs_message(Message, Cursor) -> handle_message(Message, Cursor). +-spec handle_doc(#cursor{}, doc()) -> Response when + Response :: {ok, #cursor{}} | {stop, #cursor{}}. handle_doc(#cursor{skip = S} = C, _) when S > 0 -> {ok, C#cursor{skip = S - 1}}; handle_doc(#cursor{limit = L, execution_stats = Stats} = C, Doc) when L > 0 -> @@ -436,6 +493,7 @@ handle_doc(#cursor{limit = L, execution_stats = Stats} = C, Doc) when L > 0 -> handle_doc(C, _Doc) -> {stop, C}. +-spec ddocid(#idx{}) -> binary(). ddocid(Idx) -> case mango_idx:ddoc(Idx) of <<"_design/", Rest/binary>> -> @@ -444,6 +502,7 @@ ddocid(Idx) -> Else end. +-spec apply_opts(cursor_options(), #mrargs{}) -> #mrargs{}. apply_opts([], Args) -> Args; apply_opts([{r, RStr} | Rest], Args) -> @@ -512,10 +571,16 @@ apply_opts([{_, _} | Rest], Args) -> % Ignore unknown options apply_opts(Rest, Args). +-spec consider_index_coverage(#idx{}, fields(), #mrargs{}) -> #mrargs{}. consider_index_coverage(Index, Fields, #mrargs{include_docs = IncludeDocs0} = Args) -> IncludeDocs = IncludeDocs0 andalso (not mango_idx_view:covers(Index, Fields)), Args#mrargs{include_docs = IncludeDocs}. +-spec doc_member_and_extract(#cursor{}, row_properties()) -> Result when + Result :: + {ok | no_match, term(), {execution_stats, shard_stats()}} + | {no_match, null, {execution_stats, shard_stats()}} + | any(). doc_member_and_extract(Cursor, RowProps) -> Db = Cursor#cursor.db, Opts = Cursor#cursor.opts, @@ -554,12 +619,14 @@ doc_member_and_extract(Cursor, RowProps) -> {no_match, null, {execution_stats, ExecutionStats}} end. +-spec is_design_doc(row_properties()) -> boolean(). is_design_doc(RowProps) -> case couch_util:get_value(id, RowProps) of <<"_design/", _/binary>> -> true; _ -> false end. +-spec update_bookmark_keys(#cursor{}, row_properties()) -> #cursor{}. update_bookmark_keys(#cursor{limit = Limit} = Cursor, Props) when Limit > 0 -> Id = couch_util:get_value(id, Props), Key = couch_util:get_value(key, Props), diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index 3ef410e12e3..d3444a0e2e8 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -527,6 +527,7 @@ can_use_sort([Col | RestCols], SortFields, Selector) -> % There is no information available about the full set of fields which % comes the following consequences: an index cannot (reliably) cover % an "all fields" type of query and nested fields are out of scope. +-spec covers(#idx{}, fields()) -> boolean(). covers(_, all_fields) -> false; covers(Idx, Fields) -> From 72785746dd5be9f1c3f517e0b0c9d6c547a72b8e Mon Sep 17 00:00:00 2001 From: Gabor Pali Date: Wed, 22 Mar 2023 20:23:02 +0100 Subject: [PATCH 04/11] mango: increase coverage of the `choose_best_index/1` test --- src/mango/src/mango_cursor_view.erl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index 80b7fe20550..9abb5cb66ef 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -742,13 +742,20 @@ choose_best_index_with_singleton_test() -> %% - choose the index with the lowest difference between its prefix and field ranges choose_best_index_lowest_difference_test() -> - IndexRanges = + IndexRanges1 = [ {index1, ranges1, 3}, {index2, ranges2, 2}, {index3, ranges3, 1} ], - ?assertEqual({index3, ranges3}, choose_best_index(IndexRanges)). + ?assertEqual({index3, ranges3}, choose_best_index(IndexRanges1)), + IndexRanges2 = + [ + {index1, ranges1, 3}, + {index2, ranges2, 1}, + {index3, ranges3, 2} + ], + ?assertEqual({index2, ranges2}, choose_best_index(IndexRanges2)). %% - if that is equal, choose the index with the least number of fields in the index choose_best_index_least_number_of_fields_test() -> From 0394b9e3369c020b18fefd400704935f2810d6da Mon Sep 17 00:00:00 2001 From: Gabor Pali Date: Wed, 22 Mar 2023 20:24:23 +0100 Subject: [PATCH 05/11] mango: add eunit tests --- src/mango/src/mango_cursor_view.erl | 797 +++++++++++++++++++++++++++- src/mango/src/mango_idx_view.erl | 24 + 2 files changed, 820 insertions(+), 1 deletion(-) diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index 9abb5cb66ef..325727180c6 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -640,7 +640,794 @@ update_bookmark_keys(Cursor, _Props) -> %%%%%%%% module tests below %%%%%%%% -ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). + +viewcbargs_test() -> + ViewCBArgs = viewcbargs_new(selector, fields, index), + ?assertEqual(selector, viewcbargs_get(selector, ViewCBArgs)), + ?assertEqual(fields, viewcbargs_get(fields, ViewCBArgs)), + ?assertEqual(index, viewcbargs_get(covering_index, ViewCBArgs)), + ?assertError(function_clause, viewcbargs_get(something_else, ViewCBArgs)). + +maybe_replace_max_json_test() -> + ?assertEqual([], maybe_replace_max_json([])), + ?assertEqual(<<"">>, maybe_replace_max_json(?MAX_STR)), + ?assertEqual( + [val1, val2, <<"">>, val3], maybe_replace_max_json([val1, val2, ?MAX_JSON_OBJ, val3]) + ), + ?assertEqual(something, maybe_replace_max_json(something)). + +base_opts_test() -> + Index = + #idx{ + type = <<"json">>, + def = {[{<<"fields">>, {[{field1, undefined}, {field2, undefined}]}}]} + }, + Fields = [field1, field2], + Cursor = + #cursor{ + index = Index, + selector = selector, + fields = Fields, + ranges = [{'$gte', start_key, '$lte', end_key}] + }, + Extra = + [ + {callback, {mango_cursor_view, view_cb}}, + {selector, selector}, + {callback_args, #{ + selector => selector, + fields => Fields, + covering_index => Index + }}, + {ignore_partition_query_limit, true} + ], + MRArgs = + #mrargs{ + view_type = map, + reduce = false, + start_key = [start_key], + end_key = [end_key, ?MAX_JSON_OBJ], + include_docs = true, + extra = Extra + }, + ?assertEqual(MRArgs, base_args(Cursor)), + + % non-covering index + Cursor1 = Cursor#cursor{fields = all_fields}, + Extra1 = + [ + {callback, {mango_cursor_view, view_cb}}, + {selector, selector}, + {callback_args, #{ + selector => selector, + fields => all_fields, + covering_index => undefined + }}, + {ignore_partition_query_limit, true} + ], + MRArgs1 = MRArgs#mrargs{extra = Extra1}, + ?assertEqual(MRArgs1, base_args(Cursor1)). + +apply_opts_empty_test() -> + ?assertEqual(args, apply_opts([], args)). + +apply_opts_r_test() -> + Args = #mrargs{}, + ArgsWithDocs = Args#mrargs{include_docs = true}, + ?assertEqual(ArgsWithDocs, apply_opts([{r, "1"}], Args)), + ArgsWithoutDocs = Args#mrargs{include_docs = false}, + ?assertEqual(ArgsWithoutDocs, apply_opts([{r, "3"}], Args)). + +apply_opts_conflicts_test() -> + Args = #mrargs{}, + ArgsWithConflicts = Args#mrargs{conflicts = true}, + ?assertEqual(ArgsWithConflicts, apply_opts([{conflicts, true}], Args)), + ArgsWithoutConflicts = Args#mrargs{conflicts = undefined}, + ?assertEqual(ArgsWithoutConflicts, apply_opts([{conflicts, false}], Args)). + +apply_opts_sort_test() -> + Args = + #mrargs{ + start_key = start_key, + start_key_docid = start_key_docid, + end_key = end_key, + end_key_docid = end_key_docid + }, + ?assertEqual(Args, apply_opts([{sort, {[]}}], Args)), + ?assertEqual(Args, apply_opts([{sort, {[{field1, <<"asc">>}]}}], Args)), + ?assertEqual(Args, apply_opts([{sort, {[{field1, <<"asc">>}, {field2, <<"desc">>}]}}], Args)), + ArgsWithSort = + Args#mrargs{ + direction = rev, + start_key = end_key, + start_key_docid = end_key_docid, + end_key = start_key, + end_key_docid = start_key_docid + }, + ?assertEqual(ArgsWithSort, apply_opts([{sort, {[{field1, <<"desc">>}]}}], Args)). + +apply_opts_stale_test() -> + Args = #mrargs{}, + ArgsWithStale = Args#mrargs{stable = true, update = false}, + ?assertEqual(ArgsWithStale, apply_opts([{stale, ok}], Args)). + +apply_opts_stable_test() -> + Args = #mrargs{}, + ArgsWithStable = Args#mrargs{stable = true}, + ?assertEqual(ArgsWithStable, apply_opts([{stable, true}], Args)), + ArgsWithoutStable = Args#mrargs{stable = false}, + ?assertEqual(ArgsWithoutStable, apply_opts([{stable, false}], Args)). + +apply_opts_update_test() -> + Args = #mrargs{}, + ArgsWithUpdate = Args#mrargs{update = true}, + ?assertEqual(ArgsWithUpdate, apply_opts([{update, true}], Args)), + ArgsWithoutUpdate = Args#mrargs{update = false}, + ?assertEqual(ArgsWithoutUpdate, apply_opts([{update, false}], Args)). + +apply_opts_partition_test() -> + Args = #mrargs{}, + ArgsWithPartition = Args#mrargs{extra = [{partition, <<"partition">>}]}, + ?assertEqual(ArgsWithPartition, apply_opts([{partition, <<"partition">>}], Args)), + ArgsWithoutPartition = Args#mrargs{extra = []}, + ?assertEqual(ArgsWithoutPartition, apply_opts([{partition, <<>>}], Args)). + +consider_index_coverage_positive_test() -> + Index = + #idx{ + type = <<"json">>, + def = {[{<<"fields">>, {[]}}]} + }, + Fields = [<<"_id">>], + MRArgs = #mrargs{include_docs = true}, + MRArgsRef = MRArgs#mrargs{include_docs = false}, + ?assertEqual(MRArgsRef, consider_index_coverage(Index, Fields, MRArgs)), + MRArgs1 = #mrargs{include_docs = false}, + ?assertEqual(MRArgsRef, consider_index_coverage(Index, Fields, MRArgs1)). + +consider_index_coverage_negative_test() -> + Index = undefined, + Fields = all_fields, + MRArgs = #mrargs{include_docs = true}, + ?assertEqual(MRArgs, consider_index_coverage(Index, Fields, MRArgs)), + MRArgs1 = #mrargs{include_docs = false}, + ?assertEqual(MRArgs1, consider_index_coverage(Index, Fields, MRArgs1)). + +derive_doc_from_index_test() -> + Index = + #idx{ + type = <<"json">>, + def = {[{<<"fields">>, {[{<<"field1">>, undefined}, {<<"field2">>, undefined}]}}]} + }, + DocId = doc_id, + Keys = [key1, key2], + ViewRow = #view_row{id = DocId, key = Keys}, + Doc = {[{<<"_id">>, DocId}, {<<"field2">>, key2}, {<<"field1">>, key1}]}, + ?assertEqual(Doc, derive_doc_from_index(Index, ViewRow)). + +composite_indexes_test() -> + ?assertEqual([], composite_indexes([], [])), + Index1 = + #idx{ + type = <<"json">>, + def = {[{<<"fields">>, {[{field1, undefined}, {field2, undefined}]}}]} + }, + Index2 = + #idx{ + type = <<"json">>, + def = {[{<<"fields">>, {[{field1, undefined}, {field3, undefined}, {field4, range4}]}}]} + }, + Index3 = + #idx{ + type = <<"json">>, + def = {[{<<"fields">>, {[{field3, undefined}, {field4, undefined}]}}]} + }, + Indexes = [Index1, Index2, Index3], + Ranges = [{field1, range1}, {field3, range3}, {field4, range4}], + Result = [ + {Index3, [range3, range4], 1}, {Index2, [range1, range3, range4], 0}, {Index1, [range1], 2} + ], + ?assertEqual(Result, composite_indexes(Indexes, Ranges)). + +create_test() -> + Index = #idx{type = <<"json">>, def = {[{<<"fields">>, {[]}}]}}, + Indexes = [Index], + Ranges = [], + Selector = {[]}, + Options = [{limit, limit}, {skip, skip}, {fields, fields}, {bookmark, bookmark}], + Cursor = + #cursor{ + db = db, + index = Index, + ranges = Ranges, + selector = Selector, + opts = Options, + limit = limit, + skip = skip, + fields = fields, + bookmark = bookmark + }, + ?assertEqual({ok, Cursor}, create(db, Indexes, Selector, Options)). + +explain_test() -> + Cursor = + #cursor{ + ranges = [empty], + fields = all_fields, + opts = [] + }, + Response = + [ + {mrargs, + {[ + {include_docs, true}, + {view_type, map}, + {reduce, false}, + {partition, null}, + {start_key, null}, + {end_key, null}, + {direction, fwd}, + {stable, false}, + {update, true}, + {conflicts, undefined} + ]}}, + {covered, false} + ], + ?assertEqual(Response, explain(Cursor)). + +execute_test_() -> + { + foreach, + fun() -> + meck:new(foo, [non_strict]), + meck:new(fabric) + end, + fun(_) -> + meck:unload(fabric), + meck:unload(foo) + end, + [ + ?TDEF_FE(t_execute_empty), + ?TDEF_FE(t_execute_ok_all_docs), + ?TDEF_FE(t_execute_ok_query_view), + ?TDEF_FE(t_execute_error) + ] + }. + +t_execute_empty(_) -> + Cursor = #cursor{ranges = [empty]}, + meck:expect(fabric, all_docs, ['_', '_', '_', '_', '_'], meck:val(error)), + meck:expect(fabric, query_view, ['_', '_', '_', '_', '_', '_'], meck:val(error)), + ?assertEqual({ok, accumulator}, execute(Cursor, undefined, accumulator)), + ?assertNot(meck:called(fabric, all_docs, '_')), + ?assertNot(meck:called(fabric, query_view, '_')). + +t_execute_ok_all_docs(_) -> + Bookmark = bookmark, + UserFnParameters = [{add_key, bookmark, Bookmark}, accumulator], + meck:expect(foo, bar, UserFnParameters, meck:val({undefined, updated_accumulator})), + Index = #idx{type = <<"json">>, def = all_docs}, + Selector = {[]}, + Fields = all_fields, + Cursor = + #cursor{ + index = Index, + db = db, + selector = Selector, + fields = Fields, + ranges = [{'$gte', start_key, '$lte', end_key}], + opts = [{user_ctx, user_ctx}], + bookmark = nil + }, + Cursor1 = + Cursor#cursor{ + user_acc = accumulator, + user_fun = fun foo:bar/2, + execution_stats = '_' + }, + Cursor2 = + Cursor1#cursor{ + bookmark = Bookmark, + bookmark_docid = undefined, + bookmark_key = undefined, + execution_stats = #execution_stats{executionStartTime = {0, 0, 0}} + }, + Extra = + [ + {callback, {mango_cursor_view, view_cb}}, + {selector, Selector}, + {callback_args, #{ + selector => Selector, + fields => Fields, + covering_index => undefined + }}, + {ignore_partition_query_limit, true} + ], + Args = + #mrargs{ + view_type = map, + reduce = false, + start_key = [start_key], + end_key = [end_key, ?MAX_JSON_OBJ], + include_docs = true, + extra = Extra + }, + Parameters = [ + db, [{user_ctx, user_ctx}], fun mango_cursor_view:handle_all_docs_message/2, Cursor1, Args + ], + meck:expect(fabric, all_docs, Parameters, meck:val({ok, Cursor2})), + ?assertEqual({ok, updated_accumulator}, execute(Cursor, fun foo:bar/2, accumulator)), + ?assert(meck:called(fabric, all_docs, '_')). + +t_execute_ok_query_view(_) -> + Bookmark = bookmark, + UserFnParameters = [{add_key, bookmark, Bookmark}, accumulator], + meck:expect(foo, bar, UserFnParameters, meck:val({undefined, updated_accumulator})), + Index = + #idx{ + type = <<"json">>, + ddoc = <<"_design/ghibli">>, + name = index_name, + def = {[{<<"fields">>, {[{field1, undefined}]}}]} + }, + Selector = {[]}, + Fields = all_fields, + Cursor = + #cursor{ + index = Index, + db = db, + selector = Selector, + fields = Fields, + ranges = [{'$gte', start_key, '$lte', end_key}], + opts = [{user_ctx, user_ctx}], + bookmark = nil + }, + Cursor1 = + Cursor#cursor{ + user_acc = accumulator, + user_fun = fun foo:bar/2, + execution_stats = '_' + }, + Cursor2 = + Cursor1#cursor{ + bookmark = Bookmark, + bookmark_docid = undefined, + bookmark_key = undefined, + execution_stats = #execution_stats{executionStartTime = {0, 0, 0}} + }, + Extra = + [ + {callback, {mango_cursor_view, view_cb}}, + {selector, Selector}, + {callback_args, #{ + selector => Selector, + fields => Fields, + covering_index => undefined + }}, + {ignore_partition_query_limit, true} + ], + Args = + #mrargs{ + view_type = map, + reduce = false, + start_key = [start_key], + end_key = [end_key, ?MAX_JSON_OBJ], + include_docs = true, + extra = Extra + }, + Parameters = [ + db, + [{user_ctx, user_ctx}], + <<"ghibli">>, + index_name, + fun mango_cursor_view:handle_message/2, + Cursor1, + Args + ], + meck:expect(fabric, query_view, Parameters, meck:val({ok, Cursor2})), + ?assertEqual({ok, updated_accumulator}, execute(Cursor, fun foo:bar/2, accumulator)), + ?assert(meck:called(fabric, query_view, '_')). + +t_execute_error(_) -> + Cursor = + #cursor{ + index = #idx{type = <<"json">>, ddoc = <<"_design/ghibli">>, name = index_name}, + db = db, + selector = {[]}, + fields = all_fields, + ranges = [{'$gte', start_key, '$lte', end_key}], + opts = [{user_ctx, user_ctx}], + bookmark = nil + }, + Parameters = [ + db, '_', <<"ghibli">>, index_name, fun mango_cursor_view:handle_message/2, '_', '_' + ], + meck:expect(fabric, query_view, Parameters, meck:val({error, reason})), + ?assertEqual({error, reason}, execute(Cursor, undefined, accumulator)). + +view_cb_test_() -> + { + foreach, + fun() -> + meck:new(rexi) + end, + fun(_) -> + meck:unload(rexi) + end, + [ + ?TDEF_FE(t_view_cb_meta), + ?TDEF_FE(t_view_cb_row_matching_regular_doc), + ?TDEF_FE(t_view_cb_row_non_matching_regular_doc), + ?TDEF_FE(t_view_cb_row_null_doc), + ?TDEF_FE(t_view_cb_row_missing_doc_triggers_quorum_fetch), + ?TDEF_FE(t_view_cb_row_matching_covered_doc), + ?TDEF_FE(t_view_cb_row_non_matching_covered_doc), + ?TDEF_FE(t_view_cb_row_backwards_compatible), + ?TDEF_FE(t_view_cb_complete), + ?TDEF_FE(t_view_cb_ok) + ] + }. + +t_view_cb_meta(_) -> + meck:expect(rexi, stream2, [{meta, meta}], meck:val(ok)), + ?assertEqual({ok, accumulator}, view_cb({meta, meta}, accumulator)), + ?assert(meck:called(rexi, stream2, '_')). + +t_view_cb_row_matching_regular_doc(_) -> + Row = [{id, id}, {key, key}, {doc, doc}], + Result = #view_row{id = id, key = key, doc = doc}, + meck:expect(rexi, stream2, [Result], meck:val(ok)), + Accumulator = + #mrargs{ + extra = [ + {callback_args, #{ + selector => {[]}, + fields => all_fields, + covering_index => undefined + }} + ] + }, + put(mango_docs_examined, 0), + ?assertEqual({ok, Accumulator}, view_cb({row, Row}, Accumulator)), + ?assert(meck:called(rexi, stream2, '_')). + +t_view_cb_row_non_matching_regular_doc(_) -> + Doc = {[]}, + Row = [{id, id}, {key, key}, {doc, Doc}], + meck:expect(rexi, stream2, ['_'], undefined), + Accumulator = + #mrargs{ + extra = [ + {callback_args, #{ + selector => {[{<<"field">>, {[{<<"$exists">>, true}]}}]}, + fields => all_fields, + covering_index => undefined + }} + ] + }, + put(mango_docs_examined, 0), + put(mango_last_msg_timestamp, os:timestamp()), + ?assertEqual({ok, Accumulator}, view_cb({row, Row}, Accumulator)), + ?assertNot(meck:called(rexi, stream2, '_')). + +t_view_cb_row_null_doc(_) -> + Row = [{id, id}, {key, key}, {doc, null}], + meck:expect(rexi, stream2, ['_'], undefined), + Accumulator = + #mrargs{ + extra = [ + {callback_args, #{ + selector => {[]}, + fields => all_fields, + covering_index => undefined + }} + ] + }, + put(mango_last_msg_timestamp, os:timestamp()), + ?assertEqual({ok, Accumulator}, view_cb({row, Row}, Accumulator)), + ?assertNot(meck:called(rexi, stream2, '_')). + +t_view_cb_row_missing_doc_triggers_quorum_fetch(_) -> + Row = [{id, id}, {key, key}, {doc, undefined}], + ViewRow = #view_row{id = id, key = key, doc = undefined}, + meck:expect(rexi, stream2, [ViewRow], meck:val(ok)), + Accumulator = + #mrargs{ + extra = [ + {callback_args, #{ + selector => {[]}, + fields => all_fields, + covering_index => undefined + }} + ] + }, + ?assertEqual({ok, Accumulator}, view_cb({row, Row}, Accumulator)), + ?assert(meck:called(rexi, stream2, '_')). + +t_view_cb_row_matching_covered_doc(_) -> + Keys = [key1, key2], + Row = [{id, id}, {key, Keys}, {doc, undefined}], + Doc = {[{<<"field1">>, key1}, {<<"field2">>, key2}]}, + Result = #view_row{id = id, key = Keys, doc = Doc}, + Fields = [<<"field1">>, <<"field2">>], + Index = + #idx{ + type = <<"json">>, + def = {[{<<"fields">>, {[{<<"field1">>, undefined}, {<<"field2">>, undefined}]}}]} + }, + meck:expect(rexi, stream2, [Result], meck:val(ok)), + Accumulator = + #mrargs{ + extra = [ + {callback_args, #{ + selector => {[]}, + fields => Fields, + covering_index => Index + }} + ] + }, + ?assertEqual({ok, Accumulator}, view_cb({row, Row}, Accumulator)), + ?assert(meck:called(rexi, stream2, '_')). + +t_view_cb_row_non_matching_covered_doc(_) -> + Row = [{id, id}, {key, [key1, key2]}, {doc, undefined}], + Fields = [<<"field1">>, <<"field2">>], + Index = + #idx{ + type = <<"json">>, + def = {[{<<"fields">>, {[{<<"field1">>, undefined}, {<<"field2">>, undefined}]}}]} + }, + meck:expect(rexi, stream2, ['_'], undefined), + Accumulator = + #mrargs{ + extra = [ + {callback_args, #{ + selector => {[{<<"field">>, {[{<<"$exists">>, true}]}}]}, + fields => Fields, + covering_index => Index + }} + ] + }, + put(mango_last_msg_timestamp, os:timestamp()), + ?assertEqual({ok, Accumulator}, view_cb({row, Row}, Accumulator)), + ?assertNot(meck:called(rexi, stream2, '_')). + +t_view_cb_row_backwards_compatible(_) -> + Row = [{id, id}, {key, key}, {doc, null}], + meck:expect(rexi, stream2, ['_'], undefined), + Accumulator = #mrargs{extra = [{selector, {[]}}]}, + put(mango_last_msg_timestamp, os:timestamp()), + ?assertEqual({ok, Accumulator}, view_cb({row, Row}, Accumulator)), + ?assertNot(meck:called(rexi, stream2, '_')). + +t_view_cb_complete(_) -> + meck:expect(rexi, stream2, [{execution_stats, {docs_examined, '_'}}], meck:val(ok)), + meck:expect(rexi, stream_last, [complete], meck:val(ok)), + ?assertEqual({ok, accumulator}, view_cb(complete, accumulator)), + ?assert(meck:called(rexi, stream2, '_')), + ?assert(meck:called(rexi, stream_last, '_')). + +t_view_cb_ok(_) -> + meck:expect(rexi, reply, [{ok, ddoc_updated}], meck:val(ok)), + view_cb(ok, ddoc_updated), + ?assert(meck:called(rexi, reply, '_')). + +maybe_send_mango_ping_test_() -> + { + foreach, + fun() -> + meck:new(rexi) + end, + fun(_) -> + meck:unload(rexi) + end, + [ + ?TDEF_FE(t_maybe_send_mango_ping_nop), + ?TDEF_FE(t_maybe_send_mango_ping_happens) + ] + }. + +t_maybe_send_mango_ping_nop(_) -> + put(mango_last_msg_timestamp, os:timestamp()), + meck:expect(rexi, ping, [], meck:val(error)), + ?assertEqual(ok, maybe_send_mango_ping()), + ?assertNot(meck:called(rexi, ping, '_')). + +t_maybe_send_mango_ping_happens(_) -> + put(mango_last_msg_timestamp, {0, 0, 0}), + meck:expect(rexi, ping, [], meck:val(ok)), + maybe_send_mango_ping(), + ?assert(meck:called(rexi, ping, '_')), + Timestamp = get(mango_last_msg_timestamp), + ?assertNotEqual(Timestamp, {0, 0, 0}). + +ddocid_test() -> + ?assertEqual(<<"name">>, ddocid(#idx{ddoc = <<"_design/name">>})), + ?assertEqual(something_else, ddocid(#idx{ddoc = something_else})). + +is_design_doc_test() -> + ?assert(is_design_doc([{id, <<"_design/name">>}])), + ?assertNot(is_design_doc([{id, something_else}])). + +handle_message_test_() -> + { + foreach, + fun() -> + meck:new(foo, [non_strict]) + end, + fun(_) -> + meck:unload(foo) + end, + [ + ?TDEF_FE(t_handle_message_meta), + ?TDEF_FE(t_handle_message_row_ok_above_limit), + ?TDEF_FE(t_handle_message_row_ok_at_limit), + ?TDEF_FE(t_handle_message_row_ok_skip), + ?TDEF_FE(t_handle_message_row_ok_triggers_quorum_fetch_match), + ?TDEF_FE(t_handle_message_row_ok_triggers_quorum_fetch_no_match), + ?TDEF_FE(t_handle_message_row_no_match), + ?TDEF_FE(t_handle_message_row_error), + ?TDEF_FE(t_handle_message_execution_stats), + ?TDEF_FE(t_handle_message_complete), + ?TDEF_FE(t_handle_message_error) + ] + }. + +t_handle_message_meta(_) -> + ?assertEqual({ok, cursor}, handle_message({meta, undefined}, cursor)). + +t_handle_message_row_ok_above_limit(_) -> + Doc = {[{<<"field1">>, value1}, {<<"field2">>, value2}]}, + meck:expect(foo, bar, [{row, Doc}, accumulator], meck:val({go, updated_accumulator})), + Cursor = + #cursor{ + execution_stats = #execution_stats{resultsReturned = 0}, + fields = all_fields, + limit = 9, + user_acc = accumulator, + user_fun = fun foo:bar/2 + }, + Row = [{id, id}, {key, key}, {doc, Doc}], + Cursor1 = + Cursor#cursor{ + execution_stats = #execution_stats{resultsReturned = 1}, + limit = 8, + user_acc = updated_accumulator, + bookmark_docid = id, + bookmark_key = key + }, + ?assertEqual({go, Cursor1}, handle_message({row, Row}, Cursor)). + +t_handle_message_row_ok_at_limit(_) -> + Cursor = + #cursor{ + execution_stats = #execution_stats{resultsReturned = n}, + fields = all_fields, + limit = 0 + }, + Row = [{doc, {[]}}], + ?assertEqual({stop, Cursor}, handle_message({row, Row}, Cursor)). + +t_handle_message_row_ok_skip(_) -> + Cursor = + #cursor{ + execution_stats = #execution_stats{resultsReturned = n}, + fields = all_fields, + skip = 8 + }, + Row = [{doc, {[]}}], + Cursor1 = Cursor#cursor{skip = 7}, + ?assertEqual({ok, Cursor1}, handle_message({row, Row}, Cursor)). + +t_handle_message_row_ok_triggers_quorum_fetch_match(_) -> + Doc = #doc{id = id, body = {[{<<"field">>, something}]}}, + Object = {[{<<"_id">>, id}, {<<"field">>, something}]}, + meck:expect(foo, bar, [{row, Object}, accumulator], meck:val({go, updated_accumulator})), + Cursor = + #cursor{ + db = db, + opts = opts, + execution_stats = + #execution_stats{ + totalQuorumDocsExamined = 0, + resultsReturned = 0 + }, + fields = all_fields, + selector = {[{<<"field">>, {[{<<"$exists">>, true}]}}]}, + user_fun = fun foo:bar/2, + user_acc = accumulator, + limit = 1 + }, + Row = [{id, id}, {doc, undefined}], + Cursor1 = + Cursor#cursor{ + execution_stats = + #execution_stats{ + totalQuorumDocsExamined = 1, + resultsReturned = 1 + }, + user_acc = updated_accumulator, + limit = 0, + bookmark_docid = id + }, + meck:expect(mango_util, defer, [fabric, open_doc, [db, id, opts]], meck:val({ok, Doc})), + ?assertEqual({go, Cursor1}, handle_message({row, Row}, Cursor)), + ?assert(meck:called(mango_util, defer, '_')), + meck:delete(mango_util, defer, 3). + +t_handle_message_row_ok_triggers_quorum_fetch_no_match(_) -> + Cursor = + #cursor{ + db = db, + opts = opts, + execution_stats = #execution_stats{totalQuorumDocsExamined = 0}, + fields = all_fields, + selector = {[{<<"field">>, {[{<<"$exists">>, true}]}}]} + }, + Row = [{id, id}, {doc, undefined}], + Cursor1 = + Cursor#cursor{ + execution_stats = #execution_stats{totalQuorumDocsExamined = 1} + }, + Doc = #doc{id = id, body = {[]}}, + meck:expect(mango_util, defer, [fabric, open_doc, [db, id, opts]], meck:val({ok, Doc})), + ?assertEqual({ok, Cursor1}, handle_message({row, Row}, Cursor)), + ?assert(meck:called(mango_util, defer, '_')), + meck:delete(mango_util, defer, 3). + +t_handle_message_row_no_match(_) -> + Cursor = + #cursor{ + execution_stats = #execution_stats{resultsReturned = n} + }, + Row = [{doc, null}], + ?assertEqual({ok, Cursor}, handle_message({row, Row}, Cursor)). + +t_handle_message_row_error(_) -> + Cursor = + #cursor{ + db = db, + opts = opts, + execution_stats = #execution_stats{totalQuorumDocsExamined = 0} + }, + Row = [{id, id}, {doc, undefined}], + meck:expect(mango_util, defer, [fabric, open_doc, [db, id, opts]], meck:val(error)), + meck:expect(couch_log, error, ['_', [mango_cursor_view, error]], meck:val(ok)), + ?assertEqual({ok, Cursor}, handle_message({row, Row}, Cursor)), + ?assert(meck:called(mango_util, defer, '_')), + ?assert(meck:called(couch_log, error, '_')), + meck:delete(mango_util, defer, 3), + meck:delete(couch_log, error, 2). + +t_handle_message_execution_stats(_) -> + ShardStats = {docs_examined, 42}, + ExecutionStats = #execution_stats{totalDocsExamined = 11}, + ExecutionStats1 = #execution_stats{totalDocsExamined = 53}, + Cursor = #cursor{execution_stats = ExecutionStats}, + Cursor1 = #cursor{execution_stats = ExecutionStats1}, + ?assertEqual({ok, Cursor1}, handle_message({execution_stats, ShardStats}, Cursor)). + +t_handle_message_complete(_) -> + ?assertEqual({ok, cursor}, handle_message(complete, cursor)). + +t_handle_message_error(_) -> + ?assertEqual({error, reason}, handle_message({error, reason}, undefined)). + +handle_all_docs_message_ddoc_test() -> + Row = [{id, <<"_design/foobar">>}], + ?assertEqual({ok, cursor}, handle_all_docs_message({row, Row}, cursor)). + +handle_all_docs_message_row_test() -> + Cursor = + #cursor{ + execution_stats = #execution_stats{resultsReturned = n} + }, + Row = [{doc, null}], + ?assertEqual({ok, Cursor}, handle_all_docs_message({row, Row}, Cursor)). + +handle_all_docs_message_regular_test() -> + ?assertEqual(handle_message(complete, cursor), handle_all_docs_message(complete, cursor)). %% Test the doc_member_and_extract bypasses the selector check if it receives %% a document in RowProps.doc. @@ -821,4 +1608,12 @@ with_dummy_columns(Index, Count) -> Columns = {[{<<"field", (integer_to_binary(I))/binary>>, undefined} || I <- lists:seq(1, Count)]}, Index#idx{def = {[{<<"fields">>, Columns}]}}. + +update_bookmark_keys_test() -> + Cursor0 = #cursor{limit = 0}, + ?assertEqual(Cursor0, update_bookmark_keys(Cursor0, undefined)), + Cursor1 = #cursor{limit = 1}, + Row = [{id, id}, {key, key}], + UpdatedCursor1 = Cursor1#cursor{bookmark_docid = id, bookmark_key = key}, + ?assertEqual(UpdatedCursor1, update_bookmark_keys(Cursor1, Row)). -endif. diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index d3444a0e2e8..641b8140a34 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -538,3 +538,27 @@ covers(Idx, Fields) -> Available = [<<"_id">> | columns(Idx)], sets:is_subset(sets:from_list(Fields), sets:from_list(Available)) end. + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +covers_all_fields_test() -> + ?assertNot(covers(undefined, all_fields)). + +covers_all_docs_test() -> + ?assertNot(covers(#idx{def = all_docs}, undefined)). + +covers_empty_index_test() -> + Index = #idx{def = {[{<<"fields">>, {[]}}]}}, + ?assert(covers(Index, [])), + ?assert(covers(Index, [<<"_id">>])). + +covers_regular_index_test() -> + Index = #idx{def = {[{<<"fields">>, {[{field1, undefined}, {field2, undefined}]}}]}}, + ?assert(covers(Index, [])), + ?assert(covers(Index, [<<"_id">>])), + ?assert(covers(Index, [field1])), + ?assert(covers(Index, [field2, field1])), + ?assert(covers(Index, [<<"_id">>, field1, field2])), + ?assertNot(covers(Index, [field3, field1, field2])). +-endif. From 96d04802ff67b378f4de1c3cd52dae25d1ce2855 Mon Sep 17 00:00:00 2001 From: Gabor Pali Date: Thu, 23 Mar 2023 00:42:14 +0100 Subject: [PATCH 06/11] _find: mention the `covered` attribute in the `_explain` response --- src/docs/src/api/database/find.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/docs/src/api/database/find.rst b/src/docs/src/api/database/find.rst index 3f4b9ca17a9..027ddf8ee7e 100644 --- a/src/docs/src/api/database/find.rst +++ b/src/docs/src/api/database/find.rst @@ -1317,6 +1317,9 @@ it easier to take advantage of future improvements to query planning :>header Content-Type: - :mimetype:`application/json` :>header Transfer-Encoding: ``chunked`` + :>json boolean covered: Tell if the query could be answered only + by relying on the data stored in the index. When ``true``, no + documents are fetched, which results in a faster response. :>json string dbname: Name of database. :>json object index: Index used to fulfill the query. :>json object selector: Query selector used. @@ -1364,6 +1367,7 @@ it easier to take advantage of future improvements to query planning Transfer-Encoding: chunked { + "covered": false, "dbname": "movies", "index": { "ddoc": "_design/0d61d9177426b1e2aa8d0fe732ec6e506f5d443c", From 7cbcce470c6834b9023802fb655f21bfb4aa8381 Mon Sep 17 00:00:00 2001 From: Gabor Pali Date: Fri, 24 Mar 2023 16:40:48 +0100 Subject: [PATCH 07/11] mango: add integration tests for keys-only covering indexes --- src/mango/test/22-covering-index-test.py | 115 +++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 src/mango/test/22-covering-index-test.py diff --git a/src/mango/test/22-covering-index-test.py b/src/mango/test/22-covering-index-test.py new file mode 100644 index 00000000000..b2f0202ed55 --- /dev/null +++ b/src/mango/test/22-covering-index-test.py @@ -0,0 +1,115 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +import mango + + +class CoveringIndexTests(mango.UserDocsTests): + def is_covered(self, selector, fields, index, use_index=None): + resp = self.db.find(selector, fields=fields, use_index=use_index, explain=True) + self.assertEqual(resp["index"]["type"], "json") + self.assertEqual(resp["index"]["name"], index) + self.assertEqual(resp["mrargs"]["include_docs"], False) + self.assertEqual(resp["covered"], True) + + def is_not_covered(self, selector, fields): + resp = self.db.find(selector, fields=fields, explain=True) + self.assertEqual(resp["mrargs"]["include_docs"], True) + self.assertEqual(resp["covered"], False) + + def test_index_covers_query_1field_index_id(self): + self.is_covered({"age": {"$gte": 32}}, ["_id"], "age") + + def test_index_covers_query_2field_index_id(self): + self.is_covered( + {"company": "Lyria", "manager": True}, ["_id"], "company_and_manager" + ) + + def test_index_covers_query_2field_index_extract_field(self): + self.is_covered( + {"company": {"$exists": True}, "manager": True}, + ["company"], + "company_and_manager", + ) + + def test_index_covers_query_2field_index_extract_field_force_index(self): + self.is_covered( + {"company": {"$exists": True}, "manager": True}, + ["company"], + "company_and_manager", + use_index="company_and_manager", + ) + + def test_index_covers_query_elemMatch(self): + self.is_covered( + {"favorites": {"$elemMatch": {"$eq": "Erlang"}}}, ["favorites"], "favorites" + ) + + def test_index_covers_query_composite_field_id(self): + self.is_covered( + {"name": {"first": "Stephanie", "last": "Kirkland"}}, ["_id"], "name" + ) + + def test_index_does_not_cover_query_empty_selector(self): + self.is_not_covered({}, ["_id"]) + + def test_index_does_not_cover_query_field_not_in_index(self): + self.is_not_covered({"age": {"$gte": 32}}, ["name"]) + + def test_index_does_not_cover_query_all_fields(self): + self.is_not_covered({"age": {"$gte": 32}}, None) + + def test_index_does_not_cover_query_partial_selector_id(self): + self.is_not_covered({"location.state": "Nevada"}, ["_id"]) + + def test_index_does_not_cover_query_partial_selector(self): + self.is_not_covered({"name.last": "Hernandez"}, ["name.first"]) + + def test_covering_index_provides_correct_answer_id(self): + docs = self.db.find({"age": {"$gte": 32}}, fields=["_id"]) + expected = [ + {"_id": "659d0430-b1f4-413a-a6b7-9ea1ef071325"}, + {"_id": "48ca0455-8bd0-473f-9ae2-459e42e3edd1"}, + {"_id": "e900001d-bc48-48a6-9b1a-ac9a1f5d1a03"}, + {"_id": "b31dad3f-ae8b-4f86-8327-dfe8770beb27"}, + {"_id": "71562648-6acb-42bc-a182-df6b1f005b09"}, + {"_id": "c78c529f-0b07-4947-90a6-d6b7ca81da62"}, + {"_id": "8e1c90c0-ac18-4832-8081-40d14325bde0"}, + {"_id": "6c0afcf1-e57e-421d-a03d-0c0717ebf843"}, + {"_id": "5b61abc1-a3d3-4092-b9d7-ced90e675536"}, + {"_id": "a33d5457-741a-4dce-a217-3eab28b24e3e"}, + {"_id": "b06aadcf-cd0f-4ca6-9f7e-2c993e48d4c4"}, + {"_id": "b1e70402-8add-4068-af8f-b4f3d0feb049"}, + {"_id": "0461444c-e60a-457d-a4bb-b8d811853f21"}, + ] + self.assertEqual(docs, expected) + + def test_covering_index_provides_correct_answer_2field_index(self): + docs = self.db.find( + {"company": {"$exists": True}, "manager": True}, + sort=[{"company": "asc"}], + fields=["company"], + use_index="company_and_manager", + ) + expected = [ + {"company": "Affluex"}, + {"company": "Globoil"}, + {"company": "Lyria"}, + {"company": "Manglo"}, + {"company": "Myopium"}, + {"company": "Niquent"}, + {"company": "Oulu"}, + {"company": "Prosely"}, + {"company": "Tasmania"}, + {"company": "Zialactic"}, + ] + self.assertEqual(docs, expected) From cfd4c73532eab276e5d327c2f821dd2dd5949f94 Mon Sep 17 00:00:00 2001 From: Gabor Pali Date: Mon, 27 Mar 2023 15:20:08 +0200 Subject: [PATCH 08/11] mango: mark fields with the `$exists` operator indexable This is required to make index selection work better with covering indexes. The `$exists` operator prescribes the presence of the given field so that if an index has the field, it shall be considered because it implies true. Without this change, it will not happen, but covering indexes can work if the index is manually picked. --- src/mango/src/mango_idx_view.erl | 94 ++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index 641b8140a34..e3db24fbb7a 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -300,6 +300,10 @@ indexable({[{<<"$gt">>, _}]}) -> true; indexable({[{<<"$gte">>, _}]}) -> true; +% This is required to improve index selection for covering indexes. +% Making `$exists` indexable should not cause problems in other cases. +indexable({[{<<"$exists">>, _}]}) -> + true; % All other operators are currently not indexable. % This is also a subtle assertion that we don't % call indexable/1 on a field name. @@ -542,6 +546,96 @@ covers(Idx, Fields) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +indexable_fields_of(Selector) -> + indexable_fields(test_util:as_selector(Selector)). + +indexable_fields_empty_test() -> + ?assertEqual([], indexable_fields_of(#{})). + +indexable_fields_and_test() -> + Selector = #{<<"$and">> => [#{<<"field1">> => undefined, <<"field2">> => undefined}]}, + ?assertEqual([<<"field1">>, <<"field2">>], indexable_fields_of(Selector)). + +indexable_fields_or_test() -> + Selector = #{<<"$or">> => [#{<<"field1">> => undefined, <<"field2">> => undefined}]}, + ?assertEqual([], indexable_fields_of(Selector)). + +indexable_fields_nor_test() -> + Selector = #{<<"$nor">> => [#{<<"field1">> => undefined, <<"field2">> => undefined}]}, + ?assertEqual([], indexable_fields_of(Selector)). + +indexable_fields_all_test() -> + Selector = #{<<"field">> => #{<<"$all">> => []}}, + ?assertEqual([], indexable_fields_of(Selector)). + +indexable_fields_elemMatch_test() -> + Selector = #{<<"field">> => #{<<"$elemMatch">> => #{}}}, + ?assertEqual([], indexable_fields_of(Selector)). + +indexable_fields_allMatch_test() -> + Selector = #{<<"field">> => #{<<"$allMatch">> => #{}}}, + ?assertEqual([], indexable_fields_of(Selector)). + +indexable_fields_keyMapMatch_test() -> + Selector = #{<<"field">> => #{<<"$keyMapMatch">> => #{}}}, + ?assertEqual([], indexable_fields_of(Selector)). + +indexable_fields_in_test() -> + Selector = #{<<"field">> => #{<<"$in">> => []}}, + ?assertEqual([], indexable_fields_of(Selector)). + +indexable_fields_nin_test() -> + Selector = #{<<"field">> => #{<<"$nin">> => []}}, + ?assertEqual([], indexable_fields_of(Selector)). + +indexable_fields_not_test() -> + Selector = #{<<"field">> => #{<<"$not">> => #{}}}, + ?assertEqual([], indexable_fields_of(Selector)). + +indexable_fields_lt_test() -> + Selector = #{<<"field">> => #{<<"$lt">> => undefined}}, + ?assertEqual([<<"field">>], indexable_fields_of(Selector)). + +indexable_fields_lte_test() -> + Selector = #{<<"field">> => #{<<"$lte">> => undefined}}, + ?assertEqual([<<"field">>], indexable_fields_of(Selector)). + +indexable_fields_eq_test() -> + Selector = #{<<"field">> => #{<<"$eq">> => undefined}}, + ?assertEqual([<<"field">>], indexable_fields_of(Selector)). + +indexable_fields_ne_test() -> + Selector = #{<<"field">> => #{<<"$ne">> => undefined}}, + ?assertEqual([], indexable_fields_of(Selector)). + +indexable_fields_gte_test() -> + Selector = #{<<"field">> => #{<<"$gte">> => undefined}}, + ?assertEqual([<<"field">>], indexable_fields_of(Selector)). + +indexable_fields_gt_test() -> + Selector = #{<<"field">> => #{<<"$gt">> => undefined}}, + ?assertEqual([<<"field">>], indexable_fields_of(Selector)). + +indexable_fields_mod_test() -> + Selector = #{<<"field">> => #{<<"$mod">> => [0, 0]}}, + ?assertEqual([], indexable_fields_of(Selector)). + +indexable_fields_regex_test() -> + Selector = #{<<"field">> => #{<<"$regex">> => undefined}}, + ?assertEqual([], indexable_fields_of(Selector)). + +indexable_fields_exists_test() -> + Selector = #{<<"field">> => #{<<"$exists">> => true}}, + ?assertEqual([<<"field">>], indexable_fields_of(Selector)). + +indexable_fields_type_test() -> + Selector = #{<<"field">> => #{<<"$type">> => undefined}}, + ?assertEqual([], indexable_fields_of(Selector)). + +indexable_fields_size_test() -> + Selector = #{<<"field">> => #{<<"$size">> => 0}}, + ?assertEqual([], indexable_fields_of(Selector)). + covers_all_fields_test() -> ?assertNot(covers(undefined, all_fields)). From 138fdfe8bffd9ebefc064cbf9eed943f17161ada Mon Sep 17 00:00:00 2001 From: Gabor Pali Date: Mon, 27 Mar 2023 19:37:13 +0200 Subject: [PATCH 09/11] mango: enhance compositionality of `consider_index_coverage/3` Ideally, the effect of this function should be applied at a single spot of the code. When building the base options, covering index information should be left blank to make it consistent with the rest of the parameters. --- src/couch/src/couch_util.erl | 5 +- src/mango/src/mango_cursor_view.erl | 75 ++++++++++++++++------------- 2 files changed, 46 insertions(+), 34 deletions(-) diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl index dc58e2bf60e..739df28e59d 100644 --- a/src/couch/src/couch_util.erl +++ b/src/couch/src/couch_util.erl @@ -23,7 +23,7 @@ -export([to_binary/1, to_integer/1, to_list/1, url_encode/1]). -export([json_encode/1, json_decode/1, json_decode/2]). -export([verify/2, simple_call/2, shutdown_sync/1]). --export([get_value/2, get_value/3]). +-export([get_value/2, get_value/3, set_value/3]). -export([reorder_results/2, reorder_results/3]). -export([url_strip_password/1]). -export([encode_doc_id/1]). @@ -263,6 +263,9 @@ get_value(Key, List, Default) -> Default end. +set_value(Key, List, Value) -> + lists:keyreplace(Key, 1, List, {Key, Value}). + get_nested_json_value({Props}, [Key | Keys]) -> case couch_util:get_value(Key, Props, nil) of nil -> throw({not_found, <<"missing json key: ", Key/binary>>}); diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index 325727180c6..474d3bfe6f9 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -161,13 +161,6 @@ base_args(#cursor{index = Idx, selector = Selector, fields = Fields} = Cursor) - mango_idx:end_key(Idx, Cursor#cursor.ranges) } end, - CoveringIndex = - case mango_idx_view:covers(Idx, Fields) of - true -> - Idx; - false -> - undefined - end, #mrargs{ view_type = map, reduce = false, @@ -180,7 +173,7 @@ base_args(#cursor{index = Idx, selector = Selector, fields = Fields} = Cursor) - {callback, {?MODULE, view_cb}}, % TODO remove selector. It supports older nodes during version upgrades. {selector, Selector}, - {callback_args, viewcbargs_new(Selector, Fields, CoveringIndex)}, + {callback_args, viewcbargs_new(Selector, Fields, undefined)}, {ignore_partition_query_limit, true} ] @@ -572,9 +565,25 @@ apply_opts([{_, _} | Rest], Args) -> apply_opts(Rest, Args). -spec consider_index_coverage(#idx{}, fields(), #mrargs{}) -> #mrargs{}. -consider_index_coverage(Index, Fields, #mrargs{include_docs = IncludeDocs0} = Args) -> - IncludeDocs = IncludeDocs0 andalso (not mango_idx_view:covers(Index, Fields)), - Args#mrargs{include_docs = IncludeDocs}. +consider_index_coverage(Index, Fields, #mrargs{include_docs = IncludeDocs} = Args) -> + Covering = mango_idx_view:covers(Index, Fields), + Args0 = Args#mrargs{include_docs = IncludeDocs andalso (not Covering)}, + case + { + Args0#mrargs.include_docs, + Covering, + couch_util:get_value(callback_args, Args#mrargs.extra) + } + of + {false, true, ViewCBArgs} when ViewCBArgs =/= undefined -> + VCBSelector = viewcbargs_get(selector, ViewCBArgs), + VCBFields = viewcbargs_get(fields, ViewCBArgs), + ViewCBArgs0 = viewcbargs_new(VCBSelector, VCBFields, Index), + Extra = couch_util:set_value(callback_args, Args#mrargs.extra, ViewCBArgs0), + Args0#mrargs{extra = Extra}; + _ -> + Args0 + end. -spec doc_member_and_extract(#cursor{}, row_properties()) -> Result when Result :: @@ -678,7 +687,7 @@ base_opts_test() -> {callback_args, #{ selector => selector, fields => Fields, - covering_index => Index + covering_index => undefined }}, {ignore_partition_query_limit, true} ], @@ -691,23 +700,7 @@ base_opts_test() -> include_docs = true, extra = Extra }, - ?assertEqual(MRArgs, base_args(Cursor)), - - % non-covering index - Cursor1 = Cursor#cursor{fields = all_fields}, - Extra1 = - [ - {callback, {mango_cursor_view, view_cb}}, - {selector, selector}, - {callback_args, #{ - selector => selector, - fields => all_fields, - covering_index => undefined - }}, - {ignore_partition_query_limit, true} - ], - MRArgs1 = MRArgs#mrargs{extra = Extra1}, - ?assertEqual(MRArgs1, base_args(Cursor1)). + ?assertEqual(MRArgs, base_args(Cursor)). apply_opts_empty_test() -> ?assertEqual(args, apply_opts([], args)). @@ -780,10 +773,18 @@ consider_index_coverage_positive_test() -> def = {[{<<"fields">>, {[]}}]} }, Fields = [<<"_id">>], - MRArgs = #mrargs{include_docs = true}, - MRArgsRef = MRArgs#mrargs{include_docs = false}, + MRArgs = + #mrargs{ + include_docs = true, + extra = [{callback_args, viewcbargs_new(selector, fields, undefined)}] + }, + MRArgsRef = + MRArgs#mrargs{ + include_docs = false, + extra = [{callback_args, viewcbargs_new(selector, fields, Index)}] + }, ?assertEqual(MRArgsRef, consider_index_coverage(Index, Fields, MRArgs)), - MRArgs1 = #mrargs{include_docs = false}, + MRArgs1 = MRArgs#mrargs{include_docs = false}, ?assertEqual(MRArgsRef, consider_index_coverage(Index, Fields, MRArgs1)). consider_index_coverage_negative_test() -> @@ -792,7 +793,15 @@ consider_index_coverage_negative_test() -> MRArgs = #mrargs{include_docs = true}, ?assertEqual(MRArgs, consider_index_coverage(Index, Fields, MRArgs)), MRArgs1 = #mrargs{include_docs = false}, - ?assertEqual(MRArgs1, consider_index_coverage(Index, Fields, MRArgs1)). + ?assertEqual(MRArgs1, consider_index_coverage(Index, Fields, MRArgs1)), + % no extra attributes hence no effect + Index1 = + #idx{ + type = <<"json">>, + def = {[{<<"fields">>, {[]}}]} + }, + MRArgs2 = #mrargs{include_docs = false}, + ?assertEqual(MRArgs1, consider_index_coverage(Index1, [<<"_id">>], MRArgs2)). derive_doc_from_index_test() -> Index = From 842663dd2d34e75921de8eb0b8ba3b777c82470c Mon Sep 17 00:00:00 2001 From: Gabor Pali Date: Mon, 17 Apr 2023 19:49:34 +0200 Subject: [PATCH 10/11] mango: fix definition of index coverage Covering indexes shall provide all the fields that the selector may contain, otherwise the derived documents would get dropped on the "match and extract" phase even if they were matching. Extend the integration tests to check this case as well. --- src/mango/src/mango_cursor_view.erl | 45 +++++++++++++++++++++- src/mango/src/mango_selector.erl | 49 +++++++++++++++++++++++- src/mango/test/22-covering-index-test.py | 18 ++++++++- 3 files changed, 107 insertions(+), 5 deletions(-) diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index 474d3bfe6f9..4dbea77c8c0 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -108,11 +108,18 @@ create(Db, Indexes, Selector, Opts) -> bookmark = Bookmark }}. +-spec required_fields(#cursor{}) -> fields(). +required_fields(#cursor{fields = all_fields}) -> + all_fields; +required_fields(#cursor{fields = Fields, selector = Selector}) -> + lists:usort(Fields ++ mango_selector:fields(Selector)). + -spec explain(#cursor{}) -> nonempty_list(term()). explain(#cursor{opts = Opts} = Cursor) -> BaseArgs = base_args(Cursor), Args0 = apply_opts(Opts, BaseArgs), - #cursor{index = Index, fields = Fields} = Cursor, + #cursor{index = Index} = Cursor, + Fields = required_fields(Cursor), Args = consider_index_coverage(Index, Fields, Args0), [ @@ -197,7 +204,8 @@ execute(#cursor{db = Db, index = Idx, execution_stats = Stats} = Cursor0, UserFu BaseArgs = base_args(Cursor), #cursor{opts = Opts, bookmark = Bookmark} = Cursor, Args0 = apply_opts(Opts, BaseArgs), - Args1 = consider_index_coverage(Idx, Cursor#cursor.fields, Args0), + Fields = required_fields(Cursor), + Args1 = consider_index_coverage(Idx, Fields, Args0), Args = mango_json_bookmark:update_args(Bookmark, Args1), UserCtx = couch_util:get_value(user_ctx, Opts, #user_ctx{}), DbOpts = [{user_ctx, UserCtx}], @@ -859,6 +867,39 @@ create_test() -> }, ?assertEqual({ok, Cursor}, create(db, Indexes, Selector, Options)). +to_selector(Map) -> + test_util:as_selector(Map). + +required_fields_all_fields_test() -> + Cursor = #cursor{fields = all_fields}, + ?assertEqual(all_fields, required_fields(Cursor)). + +required_fields_disjoint_fields_test() -> + Fields1 = [<<"field1">>, <<"field2">>, <<"field3">>], + Selector1 = to_selector(#{}), + Cursor1 = #cursor{fields = Fields1, selector = Selector1}, + ?assertEqual([<<"field1">>, <<"field2">>, <<"field3">>], required_fields(Cursor1)), + Fields2 = [<<"field1">>, <<"field2">>], + Selector2 = to_selector(#{<<"field3">> => undefined, <<"field4">> => undefined}), + Cursor2 = #cursor{fields = Fields2, selector = to_selector(Selector2)}, + ?assertEqual( + [<<"field1">>, <<"field2">>, <<"field3">>, <<"field4">>], required_fields(Cursor2) + ). + +required_fields_overlapping_fields_test() -> + Fields1 = [<<"field1">>, <<"field2">>, <<"field3">>], + Selector1 = to_selector(#{<<"field3">> => undefined, <<"field4">> => undefined}), + Cursor1 = #cursor{fields = Fields1, selector = Selector1}, + ?assertEqual( + [<<"field1">>, <<"field2">>, <<"field3">>, <<"field4">>], required_fields(Cursor1) + ), + Fields2 = [<<"field3">>, <<"field1">>, <<"field2">>], + Selector2 = to_selector(#{<<"field4">> => undefined, <<"field1">> => undefined}), + Cursor2 = #cursor{fields = Fields2, selector = Selector2}, + ?assertEqual( + [<<"field1">>, <<"field2">>, <<"field3">>, <<"field4">>], required_fields(Cursor2) + ). + explain_test() -> Cursor = #cursor{ diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 7de16bd5162..59be7a6ebaf 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -16,7 +16,8 @@ normalize/1, match/2, has_required_fields/2, - is_constant_field/2 + is_constant_field/2, + fields/1 ]). -include_lib("couch/include/couch_db.hrl"). @@ -638,6 +639,14 @@ is_constant_field([{[{Field, {[{Cond, _Val}]}}]} | _Rest], Field) -> is_constant_field([{[{_UnMatched, _}]} | Rest], Field) -> is_constant_field(Rest, Field). +-spec fields(selector()) -> fields(). +fields({[{<<"$", _/binary>>, Args}]}) when is_list(Args) -> + lists:flatmap(fun fields/1, Args); +fields({[{Field, _Cond}]}) -> + [Field]; +fields({[]}) -> + []. + %%%%%%%% module tests below %%%%%%%% -ifdef(TEST). @@ -1007,4 +1016,42 @@ match_demo_test_() -> ?_assertEqual(false, Check({[{<<"_id">>, <<"foo">>}, {<<"_rev">>, <<"quux">>}]})) ]. +fields_of(Selector) -> + fields(test_util:as_selector(Selector)). + +fields_empty_test() -> + ?assertEqual([], fields_of(#{})). + +fields_primitive_test() -> + Selector = #{<<"field">> => undefined}, + ?assertEqual([<<"field">>], fields_of(Selector)). + +fields_nested_test() -> + Selector = #{<<"field1">> => #{<<"field2">> => undefined}}, + ?assertEqual([<<"field1.field2">>], fields_of(Selector)). + +fields_and_test() -> + Selector1 = #{<<"$and">> => []}, + ?assertEqual([], fields_of(Selector1)), + Selector2 = #{ + <<"$and">> => [#{<<"field1">> => undefined}, #{<<"field2">> => undefined}] + }, + ?assertEqual([<<"field1">>, <<"field2">>], fields_of(Selector2)). + +fields_or_test() -> + Selector1 = #{<<"$or">> => []}, + ?assertEqual([], fields_of(Selector1)), + Selector2 = #{ + <<"$or">> => [#{<<"field1">> => undefined}, #{<<"field2">> => undefined}] + }, + ?assertEqual([<<"field1">>, <<"field2">>], fields_of(Selector2)). + +fields_nor_test() -> + Selector1 = #{<<"$nor">> => []}, + ?assertEqual([], fields_of(Selector1)), + Selector2 = #{ + <<"$nor">> => [#{<<"field1">> => undefined}, #{<<"field2">> => undefined}] + }, + ?assertEqual([<<"field1">>, <<"field2">>], fields_of(Selector2)). + -endif. diff --git a/src/mango/test/22-covering-index-test.py b/src/mango/test/22-covering-index-test.py index b2f0202ed55..52a7f361287 100644 --- a/src/mango/test/22-covering-index-test.py +++ b/src/mango/test/22-covering-index-test.py @@ -21,8 +21,8 @@ def is_covered(self, selector, fields, index, use_index=None): self.assertEqual(resp["mrargs"]["include_docs"], False) self.assertEqual(resp["covered"], True) - def is_not_covered(self, selector, fields): - resp = self.db.find(selector, fields=fields, explain=True) + def is_not_covered(self, selector, fields, use_index=None): + resp = self.db.find(selector, fields=fields, use_index=use_index, explain=True) self.assertEqual(resp["mrargs"]["include_docs"], True) self.assertEqual(resp["covered"], False) @@ -74,6 +74,20 @@ def test_index_does_not_cover_query_partial_selector_id(self): def test_index_does_not_cover_query_partial_selector(self): self.is_not_covered({"name.last": "Hernandez"}, ["name.first"]) + def test_index_does_not_cover_selector_with_more_fields(self): + self.is_not_covered( + { + "$and": [ + {"age": {"$ne": 23}}, + {"twitter": {"$not": {"$regex": "^@.*[0-9]+$"}}}, + {"location.address.number": {"$gt": 4288}}, + {"location.city": {"$ne": "Pico Rivera"}}, + ] + }, + ["twitter"], + use_index="twitter", + ) + def test_covering_index_provides_correct_answer_id(self): docs = self.db.find({"age": {"$gte": 32}}, fields=["_id"]) expected = [ From cb9a3bf3c922ce1e5b91cbd84f7fb4af1a5d6fb5 Mon Sep 17 00:00:00 2001 From: Gabor Pali Date: Tue, 18 Apr 2023 09:37:42 +0200 Subject: [PATCH 11/11] mango: refactor --- src/mango/src/mango_cursor_view.erl | 43 +++++++++++++++-------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index 4dbea77c8c0..eec8dc4fe0d 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -114,14 +114,19 @@ required_fields(#cursor{fields = all_fields}) -> required_fields(#cursor{fields = Fields, selector = Selector}) -> lists:usort(Fields ++ mango_selector:fields(Selector)). --spec explain(#cursor{}) -> nonempty_list(term()). -explain(#cursor{opts = Opts} = Cursor) -> +-spec apply_cursor_opts(#cursor{}) -> {#mrargs{}, boolean()}. +apply_cursor_opts(#cursor{} = Cursor) -> + #cursor{index = Index, opts = Opts} = Cursor, BaseArgs = base_args(Cursor), Args0 = apply_opts(Opts, BaseArgs), - #cursor{index = Index} = Cursor, Fields = required_fields(Cursor), Args = consider_index_coverage(Index, Fields, Args0), + Covered = mango_idx_view:covers(Index, Fields), + {Args, Covered}. +-spec explain(#cursor{}) -> nonempty_list(term()). +explain(Cursor) -> + {Args, Covered} = apply_cursor_opts(Cursor), [ {mrargs, {[ @@ -136,7 +141,7 @@ explain(#cursor{opts = Opts} = Cursor) -> {update, Args#mrargs.update}, {conflicts, Args#mrargs.conflicts} ]}}, - {covered, mango_idx_view:covers(Index, Fields)} + {covered, Covered} ]. % replace internal values that cannot @@ -201,12 +206,9 @@ execute(#cursor{db = Db, index = Idx, execution_stats = Stats} = Cursor0, UserFu % empty indicates unsatisfiable ranges, so don't perform search {ok, UserAcc}; _ -> - BaseArgs = base_args(Cursor), + {Args0, _Covered} = apply_cursor_opts(Cursor), #cursor{opts = Opts, bookmark = Bookmark} = Cursor, - Args0 = apply_opts(Opts, BaseArgs), - Fields = required_fields(Cursor), - Args1 = consider_index_coverage(Idx, Fields, Args0), - Args = mango_json_bookmark:update_args(Bookmark, Args1), + Args = mango_json_bookmark:update_args(Bookmark, Args0), UserCtx = couch_util:get_value(user_ctx, Opts, #user_ctx{}), DbOpts = [{user_ctx, UserCtx}], Result = @@ -573,24 +575,25 @@ apply_opts([{_, _} | Rest], Args) -> apply_opts(Rest, Args). -spec consider_index_coverage(#idx{}, fields(), #mrargs{}) -> #mrargs{}. -consider_index_coverage(Index, Fields, #mrargs{include_docs = IncludeDocs} = Args) -> +consider_index_coverage(Index, Fields, #mrargs{include_docs = IncludeDocs0} = Args0) -> Covering = mango_idx_view:covers(Index, Fields), - Args0 = Args#mrargs{include_docs = IncludeDocs andalso (not Covering)}, + Args = Args0#mrargs{include_docs = IncludeDocs0 andalso (not Covering)}, + #mrargs{include_docs = IncludeDocs, extra = Extra0} = Args, case { - Args0#mrargs.include_docs, + IncludeDocs, Covering, - couch_util:get_value(callback_args, Args#mrargs.extra) + couch_util:get_value(callback_args, Extra0) } of - {false, true, ViewCBArgs} when ViewCBArgs =/= undefined -> - VCBSelector = viewcbargs_get(selector, ViewCBArgs), - VCBFields = viewcbargs_get(fields, ViewCBArgs), - ViewCBArgs0 = viewcbargs_new(VCBSelector, VCBFields, Index), - Extra = couch_util:set_value(callback_args, Args#mrargs.extra, ViewCBArgs0), - Args0#mrargs{extra = Extra}; + {false, true, ViewCBArgs0} when ViewCBArgs0 =/= undefined -> + VCBSelector = viewcbargs_get(selector, ViewCBArgs0), + VCBFields = viewcbargs_get(fields, ViewCBArgs0), + ViewCBArgs = viewcbargs_new(VCBSelector, VCBFields, Index), + Extra = couch_util:set_value(callback_args, Extra0, ViewCBArgs), + Args#mrargs{extra = Extra}; _ -> - Args0 + Args end. -spec doc_member_and_extract(#cursor{}, row_properties()) -> Result when