Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_COMPACT;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_CONCAT;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_CONCAT_AGG;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_CONTAINS;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_DISTINCT;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_EXCEPT;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_INTERSECT;
Expand Down Expand Up @@ -694,6 +695,7 @@ Builder populate2() {
defineMethod(STRUCT_ACCESS, BuiltInMethod.STRUCT_ACCESS.method, NullPolicy.ANY);
defineMethod(MEMBER_OF, BuiltInMethod.MEMBER_OF.method, NullPolicy.NONE);
defineMethod(ARRAY_COMPACT, BuiltInMethod.ARRAY_COMPACT.method, NullPolicy.STRICT);
defineMethod(ARRAY_CONTAINS, BuiltInMethod.ARRAY_CONTAINS.method, NullPolicy.ANY);
defineMethod(ARRAY_DISTINCT, BuiltInMethod.ARRAY_DISTINCT.method, NullPolicy.STRICT);
defineMethod(ARRAY_EXCEPT, BuiltInMethod.ARRAY_EXCEPT.method, NullPolicy.ANY);
defineMethod(ARRAY_INTERSECT, BuiltInMethod.ARRAY_INTERSECT.method, NullPolicy.ANY);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3880,6 +3880,11 @@ public static List compact(List list) {
return result;
}

/** Support the ARRAY_CONTAINS function. */
public static boolean arrayContains(List list, Object element) {
return list.contains(element);
}

/** Support the ARRAY_DISTINCT function. */
public static List distinct(List list) {
Set result = new LinkedHashSet<>(list);
Expand Down
3 changes: 3 additions & 0 deletions core/src/main/java/org/apache/calcite/sql/SqlKind.java
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,9 @@ public enum SqlKind {
/** {@code ARRAY_CONCAT} function (BigQuery semantics). */
ARRAY_CONCAT,

/** {@code ARRAY_CONTAINS} function (Spark semantics). */
ARRAY_CONTAINS,

/** {@code ARRAY_DISTINCT} function (Spark semantics). */
ARRAY_DISTINCT,

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -886,7 +886,14 @@ private static RelDataType arrayReturnType(SqlOperatorBinding opBinding) {
ReturnTypes.LEAST_RESTRICTIVE,
OperandTypes.AT_LEAST_ONE_SAME_VARIADIC);

/** The "ARRAY_DISTINCT(array)" function (Spark). */
/** The "ARRAY_CONTAINS(array, element)" function. */
@LibraryOperator(libraries = {SPARK})
public static final SqlFunction ARRAY_CONTAINS =
SqlBasicFunction.create(SqlKind.ARRAY_CONTAINS,
ReturnTypes.BOOLEAN_NULLABLE,
OperandTypes.ARRAY_ELEMENT);

/** The "ARRAY_DISTINCT(array)" function. */
@LibraryOperator(libraries = {SPARK})
public static final SqlFunction ARRAY_DISTINCT =
SqlBasicFunction.create(SqlKind.ARRAY_DISTINCT,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.sql.type;

import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.sql.SqlCallBinding;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlOperandCountRange;
import org.apache.calcite.sql.SqlOperator;

import com.google.common.collect.ImmutableList;

import static org.apache.calcite.sql.type.NonNullableAccessors.getComponentTypeOrThrow;
import static org.apache.calcite.util.Static.RESOURCE;

/**
* Parameter type-checking strategy where types must be Array and Array element type.
*/
public class ArrayElementOperandTypeChecker implements SqlOperandTypeChecker {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Many codes are same as MultisetOperandTypeChecker, Can we extract them?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes refer it.
do not find a good idea to abstract it.
MultisetOperandTypeChecker for two Multiset
ArrayElementOperandTypeChecker for Array and element type

//~ Methods ----------------------------------------------------------------

@Override public boolean checkOperandTypes(
SqlCallBinding callBinding,
boolean throwOnFailure) {
final SqlNode op0 = callBinding.operand(0);
if (!OperandTypes.ARRAY.checkSingleOperandType(
callBinding,
op0,
0,
throwOnFailure)) {
return false;
}

RelDataType arrayComponentType =
getComponentTypeOrThrow(SqlTypeUtil.deriveType(callBinding, op0));
final SqlNode op1 = callBinding.operand(1);
RelDataType aryType1 = SqlTypeUtil.deriveType(callBinding, op1);

RelDataType biggest =
callBinding.getTypeFactory().leastRestrictive(
ImmutableList.of(arrayComponentType, aryType1));
if (biggest == null) {
if (throwOnFailure) {
throw callBinding.newError(
RESOURCE.typeNotComparable(
arrayComponentType.toString(), aryType1.toString()));
}

return false;
}
return true;
}

@Override public SqlOperandCountRange getOperandCountRange() {
return SqlOperandCountRanges.of(2);
}

@Override public String getAllowedSignatures(SqlOperator op, String opName) {
return "<ARRAY> " + opName + " <ARRAY>";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,9 @@ public static SqlOperandTypeChecker variadic(
public static final SqlSingleOperandTypeChecker MAP =
family(SqlTypeFamily.MAP);

public static final SqlOperandTypeChecker ARRAY_ELEMENT =
new ArrayElementOperandTypeChecker();

/**
* Operand type-checking strategy where type must be a literal or NULL.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,13 +246,21 @@ public static SqlCall stripSeparator(SqlCall call) {
*/
public static final SqlReturnTypeInference BOOLEAN =
explicit(SqlTypeName.BOOLEAN);

/**
* Type-inference strategy whereby the result type of a call is Boolean,
* with nulls allowed if any of the operands allow nulls.
*/
public static final SqlReturnTypeInference BOOLEAN_NULLABLE =
BOOLEAN.andThen(SqlTypeTransforms.TO_NULLABLE);

/**
* Type-inference strategy whereby the result type of a call is Boolean,
* with nulls allowed if the type of the operand #0 (0-based) is nullable.
*/
public static final SqlReturnTypeInference BOOLEAN_NULLABLE_IF_ARG0_NULLABLE =
BOOLEAN.andThen(SqlTypeTransforms.ARG0_NULLABLE);

/**
* Type-inference strategy with similar effect to {@link #BOOLEAN_NULLABLE},
* which is more efficient, but can only be used if all arguments are
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,21 @@ public abstract class SqlTypeTransforms {
return typeToTransform;
};

/**
* Parameter type-inference transform strategy where a derived type is
* transformed into the same type, but nullable if and only if the type
* of a call's operand #0 (0-based) is nullable.
*/
public static final SqlTypeTransform ARG0_NULLABLE =
(opBinding, typeToTransform) -> {
RelDataType arg0 = opBinding.getOperandType(0);
if (arg0.isNullable()) {
return opBinding.getTypeFactory()
.createTypeWithNullability(typeToTransform, true);
}
return typeToTransform;
};

/**
* Type-inference strategy whereby the result type of a call is VARYING the
* type given. The length returned is the same as length of the first
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ public enum BuiltInMethod {
SUBMULTISET_OF(SqlFunctions.class, "submultisetOf", Collection.class,
Collection.class),
ARRAY_COMPACT(SqlFunctions.class, "compact", List.class),
ARRAY_CONTAINS(SqlFunctions.class, "arrayContains", List.class, Object.class),
ARRAY_DISTINCT(SqlFunctions.class, "distinct", List.class),
ARRAY_MAX(SqlFunctions.class, "arrayMax", List.class),
ARRAY_MIN(SqlFunctions.class, "arrayMin", List.class),
Expand Down
1 change: 1 addition & 0 deletions site/_docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2654,6 +2654,7 @@ BigQuery's type system uses confusingly different names for types and functions:
| s | ARRAY(expr [, expr ]*) | Construct an array in Apache Spark
| s | ARRAY_COMPACT(array) | Removes null values from the *array*
| b | ARRAY_CONCAT(array [, array ]*) | Concatenates one or more arrays. If any input argument is `NULL` the function returns `NULL`
| s | ARRAY_CONTAINS(array, element) | Returns true if the *array* contains the *element*
| s | ARRAY_DISTINCT(array) | Removes duplicate values from the *array* that keeps ordering of elements
| s | ARRAY_EXCEPT(array1, array2) | Returns an array of the elements in *array1* but not in *array2*, without duplicates
| s | ARRAY_INTERSECT(array1, array2) | Returns an array of the elements in the intersection of *array1* and *array2*, without duplicates
Expand Down
27 changes: 27 additions & 0 deletions testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5364,6 +5364,33 @@ private static void checkIf(SqlOperatorFixture f) {
f.checkNull("array_concat(cast(null as integer array), array[1])");
}

/** Tests {@code ARRAY_CONTAINS} function from Spark. */
@Test void testArrayContainsFunc() {
final SqlOperatorFixture f0 = fixture();
f0.setFor(SqlLibraryOperators.ARRAY_CONTAINS);
f0.checkFails("^array_contains(array[1, 2], 1)^",
"No match found for function signature "
+ "ARRAY_CONTAINS\\(<INTEGER ARRAY>, <NUMERIC>\\)", false);

final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.SPARK);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again curious what the behavior is/should be if you search an array of type X for a value of type Y, obviously it would return false but should it be allowed in the first place?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, @tanclary. The validator should give an error if you - say - search for a BOOLEAN in a DATE ARRAY. We should add a test case to this test method.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hi @julianhyde @tanclary , there is the unit test in the end
f.checkFails("^array_contains(array[1, 2], true)^",
"INTEGER is not comparable to BOOLEAN", false);

f.checkScalar("array_contains(array[1, 2], 1)", true,
"BOOLEAN NOT NULL");
f.checkScalar("array_contains(array[1], 1)", true,
"BOOLEAN NOT NULL");
f.checkScalar("array_contains(array(), 1)", false,
"BOOLEAN NOT NULL");
f.checkScalar("array_contains(array[array[1, 2], array[3, 4]], array[1, 2])", true,
"BOOLEAN NOT NULL");
f.checkScalar("array_contains(array[map[1, 'a'], map[2, 'b']], map[1, 'a'])", true,
"BOOLEAN NOT NULL");
Comment on lines +5384 to +5385
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

spark.sql("""select array_contains(array(map(1, "1"), map(2, "2")), map(2, "2"))""").show()
org.apache.spark.sql.AnalysisException: cannot resolve 'array_contains(array(map(1, '1'), map(2, '2')), map(2, '2'))' due to data type mismatch: function array_contains does not support ordering on type map<int,string>; line 1 pos 7;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Due to implementation limitation, currently Spark can't compare or do equality check between map types. As a result, map values can't appear in EQUAL or comparison expressions, can't be grouping key, etc.()
while calcite Map runtime implementation using java collection Map, which supports equality check. while spark not

/**
 * This is an internal data representation for map type in Spark SQL. This should not implement
 * `equals` and `hashCode` because the type cannot be used as join keys, grouping keys, or
 * in equality tests. See SPARK-9415 and PR#13847 for the discussions.
 */
abstract class MapData extends Serializable {

apache/spark#23045

f.checkNull("array_contains(cast(null as integer array), 1)");
f.checkType("array_contains(cast(null as integer array), 1)", "BOOLEAN");
f.checkNull("array_contains(array[1, null], cast(null as integer))");
f.checkType("array_contains(array[1, null], cast(null as integer))", "BOOLEAN");
f.checkFails("^array_contains(array[1, 2], true)^",
"INTEGER is not comparable to BOOLEAN", false);
}

/** Tests {@code ARRAY_DISTINCT} function from Spark. */
@Test void testArrayDistinctFunc() {
final SqlOperatorFixture f0 = fixture();
Expand Down