Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -2894,6 +2894,8 @@ setMethod("from_json", signature(x = "Column", schema = "characterOrstructTypeOr
# treated as struct or element type of array in order to make it more
# R-friendly.
if (class(schema) == "Column") {
df <- createDataFrame(list(list(0)))
jschema <- collect(select(df, schema))[[1]][[1]]
jschema <- callJStatic("org.apache.spark.sql.api.r.SQLUtils",
"createArrayType",
jschema)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7102,7 +7102,8 @@ object functions {
* @group collection_funcs
* @since 3.4.0
*/
def sequence(start: Column, stop: Column): Column = sequence(start, stop, lit(1L))
def sequence(start: Column, stop: Column): Column =
Column.fn("sequence", start, stop)

/**
* Creates an array containing the left argument repeated the number of times given by the right
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ class FunctionTestSuite extends ConnectFunSuite {
to_json(a, Collections.emptyMap[String, String]),
to_json(a, Map.empty[String, String]))
testEquals("sort_array", sort_array(a), sort_array(a, asc = true))
testEquals("sequence", sequence(lit(1), lit(10)), sequence(lit(1), lit(10), lit(1L)))
testEquals(
"from_csv",
from_csv(a, lit(schema.toDDL), Collections.emptyMap[String, String]),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [arrays_zip(e#0, sequence(cast(1 as bigint), cast(20 as bigint), Some(cast(1 as bigint)), Some(America/Los_Angeles)), e, 1) AS arrays_zip(e, sequence(1, 20, 1))#0]
Project [arrays_zip(e#0, sequence(1, 20, None, Some(America/Los_Angeles)), e, 1) AS arrays_zip(e, sequence(1, 20))#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [concat(cast(e#0 as array<bigint>), cast(array(1, 2) as array<bigint>), sequence(cast(33 as bigint), cast(40 as bigint), Some(cast(1 as bigint)), Some(America/Los_Angeles))) AS concat(e, array(1, 2), sequence(33, 40, 1))#0]
Project [concat(e#0, array(1, 2), sequence(33, 40, None, Some(America/Los_Angeles))) AS concat(e, array(1, 2), sequence(33, 40))#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [flatten(array(cast(e#0 as array<bigint>), sequence(cast(1 as bigint), cast(10 as bigint), Some(cast(1 as bigint)), Some(America/Los_Angeles)))) AS flatten(array(e, sequence(1, 10, 1)))#0]
Project [flatten(array(e#0, sequence(1, 10, None, Some(America/Los_Angeles)))) AS flatten(array(e, sequence(1, 10)))#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [sequence(cast(1 as bigint), cast(10 as bigint), Some(cast(1 as bigint)), Some(America/Los_Angeles)) AS sequence(1, 10, 1)#0]
Project [sequence(1, 10, None, Some(America/Los_Angeles)) AS sequence(1, 10)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@
"literal": {
"integer": 20
}
}, {
"literal": {
"long": "1"
}
}]
}
}]
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,6 @@
"literal": {
"integer": 40
}
}, {
"literal": {
"long": "1"
}
}]
}
}]
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,6 @@
"literal": {
"integer": 10
}
}, {
"literal": {
"long": "1"
}
}]
}
}]
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@
"literal": {
"integer": 10
}
}, {
"literal": {
"long": "1"
}
}]
}
}]
Expand Down
Binary file not shown.
10 changes: 5 additions & 5 deletions python/pyspark/sql/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,11 +712,11 @@ def __getitem__(self, k: Any) -> "Column":
--------
>>> df = spark.createDataFrame([('abcedfg', {"key": "value"})], ["l", "d"])
>>> df.select(df.l[slice(1, 3)], df.d['key']).show()
+------------------+------+
|substring(l, 1, 3)|d[key]|
+------------------+------+
| abc| value|
+------------------+------+
+---------------+------+
|substr(l, 1, 3)|d[key]|
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To avoid this change, I think we can call self.substring in this method. df.l[slice(1, 3)] does not require a certain function name, so keeping it as it was is better.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

@peter-toth peter-toth Sep 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried self.substring (see d611fd5) but there is no self.substring so reverted it in 6bd03d1. If column name doesn't matter then let's use self.substr.

+---------------+------+
| abc| value|
+---------------+------+
"""
if isinstance(k, slice):
if k.step is not None:
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def substr(self, startPos: Union[int, "Column"], length: Union[int, "Column"]) -
error_class="NOT_COLUMN_OR_INT",
message_parameters={"arg_name": "length", "arg_type": type(length).__name__},
)
return Column(UnresolvedFunction("substring", [self._expr, start_expr, length_expr]))
return Column(UnresolvedFunction("substr", [self._expr, start_expr, length_expr]))

substr.__doc__ = PySparkColumn.substr.__doc__

Expand Down
4 changes: 2 additions & 2 deletions python/pyspark/sql/connect/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1358,7 +1358,7 @@ def var_samp(col: "ColumnOrName") -> Column:


def variance(col: "ColumnOrName") -> Column:
return var_samp(col)
return _invoke_function_over_columns("variance", col)


variance.__doc__ = pysparkfuncs.variance.__doc__
Expand Down Expand Up @@ -1944,7 +1944,7 @@ def map_concat(


def map_contains_key(col: "ColumnOrName", value: Any) -> Column:
return array_contains(map_keys(col), lit(value))
return _invoke_function("map_contains_key", _to_col(col), lit(value))


map_contains_key.__doc__ = pysparkfuncs.map_contains_key.__doc__
Expand Down
22 changes: 11 additions & 11 deletions python/pyspark/sql/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2960,7 +2960,7 @@ def variance(col: "ColumnOrName") -> Column:
>>> df = spark.range(6)
>>> df.select(variance(df.id)).show()
+------------+
|var_samp(id)|
|variance(id)|
+------------+
| 3.5|
+------------+
Expand Down Expand Up @@ -13779,17 +13779,17 @@ def map_contains_key(col: "ColumnOrName", value: Any) -> Column:
>>> from pyspark.sql.functions import map_contains_key
>>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data")
>>> df.select(map_contains_key("data", 1)).show()
+---------------------------------+
|array_contains(map_keys(data), 1)|
+---------------------------------+
| true|
+---------------------------------+
+-------------------------+
|map_contains_key(data, 1)|
+-------------------------+
| true|
+-------------------------+
>>> df.select(map_contains_key("data", -1)).show()
+----------------------------------+
|array_contains(map_keys(data), -1)|
+----------------------------------+
| false|
+----------------------------------+
+--------------------------+
|map_contains_key(data, -1)|
+--------------------------+
| false|
+--------------------------+
"""
return _invoke_function("map_contains_key", _to_java_column(col), value)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ object CreateStruct {
case (u @ UnresolvedExtractValue(_, e: Literal), _) if e.dataType == StringType => Seq(e, u)
case (e: NamedExpression, _) if e.resolved => Seq(Literal(e.name), e)
case (e: NamedExpression, _) => Seq(NamePlaceholder, e)
case (g @ GetStructField(_, _, Some(name)), _) => Seq(Literal(name), g)
case (e, index) => Seq(Literal(s"col${index + 1}"), e)
})
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,12 +319,12 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
"type" -> v.getClass.toString))
}

def pivotColumnUnsupportedError(v: Any, dataType: DataType): RuntimeException = {
def pivotColumnUnsupportedError(v: Any, expr: Expression): RuntimeException = {
new SparkRuntimeException(
errorClass = "UNSUPPORTED_FEATURE.PIVOT_TYPE",
messageParameters = Map(
"value" -> v.toString,
"type" -> toSQLType(dataType)))
"type" -> (if (expr.resolved) toSQLType(expr.dataType) else "unknown")))
}

def noDefaultForDataTypeError(dataType: DataType): SparkException = {
Expand Down
Loading