From 84035954d9cff82d4e1b272d95f0bd4669328cfa Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 26 Jun 2024 18:15:06 +0800 Subject: [PATCH 1/3] init --- python/pyspark/sql/functions/builtin.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index ed66ca8684ef..c800ffa460cc 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -13618,10 +13618,7 @@ def array_contains(col: "ColumnOrName", value: Any) -> Column: | true| +----------+ """ - from pyspark.sql.classic.column import _to_java_column - - value = value._jc if isinstance(value, Column) else value - return _invoke_function("array_contains", _to_java_column(col), value) + return _invoke_function_over_columns("array_contains", col, lit(value)) @_try_remote_functions @@ -14064,10 +14061,7 @@ def array_position(col: "ColumnOrName", value: Any) -> Column: +-------------------------+ """ - from pyspark.sql.classic.column import _to_java_column - - value = _to_java_column(value) if isinstance(value, Column) else value - return _invoke_function("array_position", _to_java_column(col), value) + return _invoke_function_over_columns("array_position", col, lit(value)) @_try_remote_functions @@ -14515,10 +14509,7 @@ def array_remove(col: "ColumnOrName", element: Any) -> Column: | [2, 3]| +-----------------------+ """ - from pyspark.sql.classic.column import _to_java_column - - element = _to_java_column(element) if isinstance(element, Column) else element - return _invoke_function("array_remove", _to_java_column(col), element) + return _invoke_function_over_columns("array_remove", col, lit(element)) @_try_remote_functions @@ -17327,10 +17318,7 @@ def map_contains_key(col: "ColumnOrName", value: Any) -> Column: | true| +---------------------------+ """ - from pyspark.sql.classic.column import _to_java_column - - value = _to_java_column(value) if isinstance(value, Column) else value - return _invoke_function("map_contains_key", _to_java_column(col), value) + return _invoke_function_over_columns("map_contains_key", col, lit(value)) @_try_remote_functions From 14f1e0bbd5b63f7aee311f23edab5734e82fac4e Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 26 Jun 2024 18:24:27 +0800 Subject: [PATCH 2/3] init --- python/pyspark/sql/functions/builtin.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index c800ffa460cc..c8936cd1050c 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -10938,11 +10938,15 @@ def substring( target column to work on. pos : :class:`~pyspark.sql.Column` or str or int starting position in str. + + .. versionchanged:: 4.0.0 + `pos` now accepts Columns or names of Columns. + len : :class:`~pyspark.sql.Column` or str or int length of chars. .. versionchanged:: 4.0.0 - `pos` and `len` now also accept Columns or names of Columns. + `len` now accepts Columns or names of Columns. Returns ------- @@ -10962,11 +10966,9 @@ def substring( >>> df.select(substring(df.s, df.p, df.l).alias('s')).collect() [Row(s='par')] """ - from pyspark.sql.classic.column import _to_java_column - - pos = _to_java_column(lit(pos) if isinstance(pos, int) else pos) - len = _to_java_column(lit(len) if isinstance(len, int) else len) - return _invoke_function("substring", _to_java_column(str), pos, len) + pos = lit(pos) if isinstance(pos, int) else pos + len = lit(len) if isinstance(len, int) else len + return _invoke_function_over_columns("substring", str, pos, len) @_try_remote_functions From ed58bc5d3d88f72c0d054d3fb803f7dd9fce0d74 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 26 Jun 2024 18:29:25 +0800 Subject: [PATCH 3/3] init --- python/pyspark/sql/functions/builtin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index c8936cd1050c..b496cdaf0955 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -10940,13 +10940,13 @@ def substring( starting position in str. .. versionchanged:: 4.0.0 - `pos` now accepts Columns or names of Columns. + `pos` now accepts column and column name. len : :class:`~pyspark.sql.Column` or str or int length of chars. .. versionchanged:: 4.0.0 - `len` now accepts Columns or names of Columns. + `len` now accepts column and column name. Returns -------