diff --git a/.github/labeler.yml b/.github/labeler.yml index 84dfa35f2627e..afaeeecda51a2 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -84,12 +84,12 @@ SPARK SHELL: - "repl/**/*" - "bin/spark-shell*" SQL: -#- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming/**/*", "!python/pyspark/sql/tests/test_streaming.py"] +#- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming/**/*", "!python/pyspark/sql/tests/streaming/test_streaming.py"] - "**/sql/**/*" - "common/unsafe/**/*" #- "!python/pyspark/sql/avro/**/*" #- "!python/pyspark/sql/streaming/**/*" - #- "!python/pyspark/sql/tests/test_streaming.py" + #- "!python/pyspark/sql/tests/streaming/test_streaming.py" - "bin/spark-sql*" - "bin/beeline*" - "sbin/*thriftserver*.sh" @@ -125,7 +125,7 @@ STRUCTURED STREAMING: - "**/sql/**/streaming/**/*" - "connector/kafka-0-10-sql/**/*" - "python/pyspark/sql/streaming/**/*" - - "python/pyspark/sql/tests/test_streaming.py" + - "python/pyspark/sql/tests/streaming/test_streaming.py" - "**/*streaming.R" PYTHON: - "bin/pyspark*" @@ -156,5 +156,5 @@ CONNECT: - "**/sql/sparkconnect/**/*" - "python/pyspark/sql/**/connect/**/*" PROTOBUF: - - "connector/protobuf/**/*" - - "python/pyspark/sql/protobuf/**/*" \ No newline at end of file + - "connector/protobuf/**/*" + - "python/pyspark/sql/protobuf/**/*" diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 98eb7c0fb1e89..e4a515d203ccb 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -461,22 +461,22 @@ def __hash__(self): "pyspark.sql.tests.test_datasources", "pyspark.sql.tests.test_functions", "pyspark.sql.tests.test_group", - "pyspark.sql.tests.test_pandas_cogrouped_map", - "pyspark.sql.tests.test_pandas_grouped_map", - "pyspark.sql.tests.test_pandas_grouped_map_with_state", - "pyspark.sql.tests.test_pandas_map", + "pyspark.sql.tests.pandas.test_pandas_cogrouped_map", + "pyspark.sql.tests.pandas.test_pandas_grouped_map", + "pyspark.sql.tests.pandas.test_pandas_grouped_map_with_state", + "pyspark.sql.tests.pandas.test_pandas_map", "pyspark.sql.tests.test_arrow_map", - "pyspark.sql.tests.test_pandas_udf", - "pyspark.sql.tests.test_pandas_udf_grouped_agg", - "pyspark.sql.tests.test_pandas_udf_scalar", - "pyspark.sql.tests.test_pandas_udf_typehints", - "pyspark.sql.tests.test_pandas_udf_typehints_with_future_annotations", - "pyspark.sql.tests.test_pandas_udf_window", + "pyspark.sql.tests.pandas.test_pandas_udf", + "pyspark.sql.tests.pandas.test_pandas_udf_grouped_agg", + "pyspark.sql.tests.pandas.test_pandas_udf_scalar", + "pyspark.sql.tests.pandas.test_pandas_udf_typehints", + "pyspark.sql.tests.pandas.test_pandas_udf_typehints_with_future_annotations", + "pyspark.sql.tests.pandas.test_pandas_udf_window", "pyspark.sql.tests.test_readwriter", "pyspark.sql.tests.test_serde", "pyspark.sql.tests.test_session", - "pyspark.sql.tests.test_streaming", - "pyspark.sql.tests.test_streaming_listener", + "pyspark.sql.tests.streaming.test_streaming", + "pyspark.sql.tests.streaming.test_streaming_listener", "pyspark.sql.tests.test_types", "pyspark.sql.tests.test_udf", "pyspark.sql.tests.test_udf_profiler", @@ -492,10 +492,10 @@ def __hash__(self): # doctests # No doctests yet. # unittests - "pyspark.sql.tests.test_connect_column_expressions", - "pyspark.sql.tests.test_connect_plan_only", - "pyspark.sql.tests.test_connect_select_ops", - "pyspark.sql.tests.test_connect_basic", + "pyspark.sql.tests.connect.test_connect_column_expressions", + "pyspark.sql.tests.connect.test_connect_plan_only", + "pyspark.sql.tests.connect.test_connect_select_ops", + "pyspark.sql.tests.connect.test_connect_basic", ], excluded_python_implementations=[ "PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and diff --git a/python/pyspark/sql/connect/README.md b/python/pyspark/sql/connect/README.md index b9cfb31d13cdb..fa712677c1203 100644 --- a/python/pyspark/sql/connect/README.md +++ b/python/pyspark/sql/connect/README.md @@ -46,6 +46,6 @@ To use the release version of Spark Connect: ## Run Tests ```bash -./python/run-tests --testnames 'pyspark.sql.tests.test_connect_basic' +./python/run-tests --testnames 'pyspark.sql.tests.connect.test_connect_basic' ``` diff --git a/python/pyspark/sql/tests/connect/__init__.py b/python/pyspark/sql/tests/connect/__init__.py new file mode 100644 index 0000000000000..cce3acad34a49 --- /dev/null +++ b/python/pyspark/sql/tests/connect/__init__.py @@ -0,0 +1,16 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pyspark/sql/tests/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py similarity index 97% rename from python/pyspark/sql/tests/test_connect_basic.py rename to python/pyspark/sql/tests/connect/test_connect_basic.py index 937a36f94e10a..adc6f38f997c3 100644 --- a/python/pyspark/sql/tests/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -81,7 +81,7 @@ def test_simple_explain_string(self): if __name__ == "__main__": - from pyspark.sql.tests.test_connect_basic import * # noqa: F401 + from pyspark.sql.tests.connect.test_connect_basic import * # noqa: F401 try: import xmlrunner # type: ignore diff --git a/python/pyspark/sql/tests/test_connect_column_expressions.py b/python/pyspark/sql/tests/connect/test_connect_column_expressions.py similarity index 96% rename from python/pyspark/sql/tests/test_connect_column_expressions.py rename to python/pyspark/sql/tests/connect/test_connect_column_expressions.py index 826c32cdf908e..2aa686bbc3823 100644 --- a/python/pyspark/sql/tests/test_connect_column_expressions.py +++ b/python/pyspark/sql/tests/connect/test_connect_column_expressions.py @@ -54,7 +54,7 @@ def test_column_literals(self): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_connect_column_expressions import * # noqa: F401 + from pyspark.sql.tests.connect.test_connect_column_expressions import * # noqa: F401 try: import xmlrunner # type: ignore diff --git a/python/pyspark/sql/tests/test_connect_plan_only.py b/python/pyspark/sql/tests/connect/test_connect_plan_only.py similarity index 97% rename from python/pyspark/sql/tests/test_connect_plan_only.py rename to python/pyspark/sql/tests/connect/test_connect_plan_only.py index f3d4841a8e0f5..5c945e19f0529 100644 --- a/python/pyspark/sql/tests/test_connect_plan_only.py +++ b/python/pyspark/sql/tests/connect/test_connect_plan_only.py @@ -64,7 +64,7 @@ def read_table(x): if __name__ == "__main__": - from pyspark.sql.tests.test_connect_plan_only import * # noqa: F401 + from pyspark.sql.tests.connect.test_connect_plan_only import * # noqa: F401 try: import xmlrunner # type: ignore diff --git a/python/pyspark/sql/tests/test_connect_select_ops.py b/python/pyspark/sql/tests/connect/test_connect_select_ops.py similarity index 95% rename from python/pyspark/sql/tests/test_connect_select_ops.py rename to python/pyspark/sql/tests/connect/test_connect_select_ops.py index fc624b0d5cc29..f7f164c11db49 100644 --- a/python/pyspark/sql/tests/test_connect_select_ops.py +++ b/python/pyspark/sql/tests/connect/test_connect_select_ops.py @@ -29,7 +29,7 @@ def test_select_with_literal(self): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_connect_select_ops import * # noqa: F401 + from pyspark.sql.tests.connect.test_connect_select_ops import * # noqa: F401 try: import xmlrunner # type: ignore diff --git a/python/pyspark/sql/tests/pandas/__init__.py b/python/pyspark/sql/tests/pandas/__init__.py new file mode 100644 index 0000000000000..cce3acad34a49 --- /dev/null +++ b/python/pyspark/sql/tests/pandas/__init__.py @@ -0,0 +1,16 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py similarity index 99% rename from python/pyspark/sql/tests/test_pandas_cogrouped_map.py rename to python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py index 88ba396e3f5e0..b3f4c7331d162 100644 --- a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py @@ -404,7 +404,7 @@ def merge_pandas(lft, rgt): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_cogrouped_map import * # noqa: F401 + from pyspark.sql.tests.pandas.test_pandas_cogrouped_map import * # noqa: F401 try: import xmlrunner # type: ignore[import] diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py similarity index 99% rename from python/pyspark/sql/tests/test_pandas_grouped_map.py rename to python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py index b05c8fd86a93b..7f27671cfe0ce 100644 --- a/python/pyspark/sql/tests/test_pandas_grouped_map.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py @@ -741,7 +741,7 @@ def my_pandas_udf(pdf): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_grouped_map import * # noqa: F401 + from pyspark.sql.tests.pandas.test_pandas_grouped_map import * # noqa: F401 try: import xmlrunner # type: ignore[import] diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map_with_state.py b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py similarity index 98% rename from python/pyspark/sql/tests/test_pandas_grouped_map_with_state.py rename to python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py index 8671cc8519c4d..e75148e524ce5 100644 --- a/python/pyspark/sql/tests/test_pandas_grouped_map_with_state.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py @@ -237,7 +237,7 @@ def assert_test(): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_grouped_map_with_state import * # noqa: F401 + from pyspark.sql.tests.pandas.test_pandas_grouped_map_with_state import * # noqa: F401 try: import xmlrunner # type: ignore[import] diff --git a/python/pyspark/sql/tests/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py similarity index 99% rename from python/pyspark/sql/tests/test_pandas_map.py rename to python/pyspark/sql/tests/pandas/test_pandas_map.py index 11da879da3828..7f996ca55a85d 100644 --- a/python/pyspark/sql/tests/test_pandas_map.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py @@ -204,7 +204,7 @@ def func(iterator): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_map import * # noqa: F401 + from pyspark.sql.tests.pandas.test_pandas_map import * # noqa: F401 try: import xmlrunner # type: ignore[import] diff --git a/python/pyspark/sql/tests/test_pandas_udf.py b/python/pyspark/sql/tests/pandas/test_pandas_udf.py similarity index 99% rename from python/pyspark/sql/tests/test_pandas_udf.py rename to python/pyspark/sql/tests/pandas/test_pandas_udf.py index be80d7a56260e..077db2971ea8b 100644 --- a/python/pyspark/sql/tests/test_pandas_udf.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_udf.py @@ -293,7 +293,7 @@ def noop(s: pd.Series) -> pd.Series: if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_udf import * # noqa: F401 + from pyspark.sql.tests.pandas.test_pandas_udf import * # noqa: F401 try: import xmlrunner # type: ignore[import] diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py similarity index 99% rename from python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py rename to python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py index 56fad6fc1acde..6f475624b74c5 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py @@ -548,7 +548,7 @@ def mean(x): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_udf_grouped_agg import * # noqa: F401 + from pyspark.sql.tests.pandas.test_pandas_udf_grouped_agg import * # noqa: F401 try: import xmlrunner # type: ignore[import] diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py similarity index 99% rename from python/pyspark/sql/tests/test_pandas_udf_scalar.py rename to python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py index 8a38b9cc87ad4..6580f839a8612 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py @@ -1330,7 +1330,7 @@ def udf(x): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_udf_scalar import * # noqa: F401 + from pyspark.sql.tests.pandas.test_pandas_udf_scalar import * # noqa: F401 try: import xmlrunner # type: ignore[import] diff --git a/python/pyspark/sql/tests/test_pandas_udf_typehints.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py similarity index 99% rename from python/pyspark/sql/tests/test_pandas_udf_typehints.py rename to python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py index 44315c95614b8..8c77ed4b775a9 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_typehints.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py @@ -357,7 +357,7 @@ def func(col: "Union[pd.Series, pd.DataFrame]", *, col2: "pd.DataFrame") -> "pd. if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_udf_typehints import * # noqa: #401 + from pyspark.sql.tests.pandas.test_pandas_udf_typehints import * # noqa: #401 try: import xmlrunner # type: ignore[import] diff --git a/python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py similarity index 99% rename from python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py rename to python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py index 832086cb9ec8f..a6d3bd608de27 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py @@ -364,7 +364,7 @@ def func(col: "Union[pd.Series, pd.DataFrame]", *, col2: "pd.DataFrame") -> "pd. if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_udf_typehints_with_future_annotations import * # noqa: #401 + from pyspark.sql.tests.pandas.test_pandas_udf_typehints_with_future_annotations import * # noqa: #401 try: import xmlrunner # type: ignore[import] diff --git a/python/pyspark/sql/tests/test_pandas_udf_window.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py similarity index 99% rename from python/pyspark/sql/tests/test_pandas_udf_window.py rename to python/pyspark/sql/tests/pandas/test_pandas_udf_window.py index 92314c3724db8..07e10a58d2d60 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_window.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py @@ -395,7 +395,7 @@ def test_bounded_mixed(self): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_udf_window import * # noqa: F401 + from pyspark.sql.tests.pandas.test_pandas_udf_window import * # noqa: F401 try: import xmlrunner # type: ignore[import] diff --git a/python/pyspark/sql/tests/streaming/__init__.py b/python/pyspark/sql/tests/streaming/__init__.py new file mode 100644 index 0000000000000..cce3acad34a49 --- /dev/null +++ b/python/pyspark/sql/tests/streaming/__init__.py @@ -0,0 +1,16 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pyspark/sql/tests/test_streaming.py b/python/pyspark/sql/tests/streaming/test_streaming.py similarity index 99% rename from python/pyspark/sql/tests/test_streaming.py rename to python/pyspark/sql/tests/streaming/test_streaming.py index 809294d34c32f..1c99723577d5c 100644 --- a/python/pyspark/sql/tests/test_streaming.py +++ b/python/pyspark/sql/tests/streaming/test_streaming.py @@ -629,7 +629,7 @@ def test_streaming_write_to_table(self): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_streaming import * # noqa: F401 + from pyspark.sql.tests.streaming.test_streaming import * # noqa: F401 try: import xmlrunner # type: ignore[import] diff --git a/python/pyspark/sql/tests/test_streaming_listener.py b/python/pyspark/sql/tests/streaming/test_streaming_listener.py similarity index 99% rename from python/pyspark/sql/tests/test_streaming_listener.py rename to python/pyspark/sql/tests/streaming/test_streaming_listener.py index b37bc1340ad1a..de3456525401f 100644 --- a/python/pyspark/sql/tests/test_streaming_listener.py +++ b/python/pyspark/sql/tests/streaming/test_streaming_listener.py @@ -296,7 +296,7 @@ def onQueryTerminated(self, event): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_streaming_listener import * # noqa: F401 + from pyspark.sql.tests.streaming.test_streaming_listener import * # noqa: F401 try: import xmlrunner # type: ignore[import]