diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml new file mode 100644 index 000000000000..581ad8a66c2b --- /dev/null +++ b/.github/workflows/build_and_test.yml @@ -0,0 +1,283 @@ +name: Build and test + +on: + push: + branches: + - branch-3.0 + pull_request: + branches: + - branch-3.0 + +jobs: + # Build: build Spark and run the tests for specified modules. + build: + name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})" + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + java: + - 1.8 + hadoop: + - hadoop2.7 + hive: + - hive2.3 + # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now. + # Kinesis tests depends on external Amazon kinesis service. + # Note that the modules below are from sparktestsupport/modules.py. + modules: + - >- + core, unsafe, kvstore, avro, + network-common, network-shuffle, repl, launcher, + examples, sketch, graphx + - >- + catalyst, hive-thriftserver + - >- + streaming, sql-kafka-0-10, streaming-kafka-0-10, + mllib-local, mllib, + yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl + - >- + pyspark-sql, pyspark-mllib + - >- + pyspark-core, pyspark-streaming, pyspark-ml + - >- + sparkr + # Here, we split Hive and SQL tests into some of slow ones and the rest of them. + included-tags: [""] + # Some tests are disabled in GitHun Actions. Ideally, we should remove this tag + # and run all tests. + excluded-tags: ["org.apache.spark.tags.GitHubActionsUnstableTest"] + comment: [""] + include: + # Hive tests + - modules: hive + java: 1.8 + hadoop: hadoop2.7 + hive: hive2.3 + included-tags: org.apache.spark.tags.SlowHiveTest + comment: "- slow tests" + - modules: hive + java: 1.8 + hadoop: hadoop2.7 + hive: hive2.3 + excluded-tags: org.apache.spark.tags.SlowHiveTest,org.apache.spark.tags.GitHubActionsUnstableTest + comment: "- other tests" + # SQL tests + - modules: sql + java: 1.8 + hadoop: hadoop2.7 + hive: hive2.3 + included-tags: org.apache.spark.tags.ExtendedSQLTest + comment: "- slow tests" + - modules: sql + java: 1.8 + hadoop: hadoop2.7 + hive: hive2.3 + excluded-tags: org.apache.spark.tags.ExtendedSQLTest,org.apache.spark.tags.GitHubActionsUnstableTest + comment: "- other tests" + env: + MODULES_TO_TEST: ${{ matrix.modules }} + EXCLUDED_TAGS: ${{ matrix.excluded-tags }} + INCLUDED_TAGS: ${{ matrix.included-tags }} + HADOOP_PROFILE: ${{ matrix.hadoop }} + HIVE_PROFILE: ${{ matrix.hive }} + # GitHub Actions' default miniconda to use in pip packaging test. + CONDA_PREFIX: /usr/share/miniconda + GITHUB_PREV_SHA: ${{ github.event.before }} + steps: + - name: Checkout Spark repository + uses: actions/checkout@v2 + # In order to fetch changed files + with: + fetch-depth: 0 + # Cache local repositories. Note that GitHub Actions cache has a 2G limit. + - name: Cache Scala, SBT, Maven and Zinc + uses: actions/cache@v1 + with: + path: build + key: build-${{ hashFiles('**/pom.xml') }} + restore-keys: | + build- + - name: Cache Maven local repository + uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ matrix.java }}-${{ matrix.hadoop }}-maven- + - name: Cache Ivy local repository + uses: actions/cache@v2 + with: + path: ~/.ivy2/cache + key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }} + restore-keys: | + ${{ matrix.java }}-${{ matrix.hadoop }}-ivy- + - name: Install JDK ${{ matrix.java }} + uses: actions/setup-java@v1 + with: + java-version: ${{ matrix.java }} + # PySpark + - name: Install PyPy3 + # Note that order of Python installations here matters because default python3 is + # overridden by pypy3. + uses: actions/setup-python@v2 + if: contains(matrix.modules, 'pyspark') + with: + python-version: pypy3 + architecture: x64 + - name: Install Python 2.7 + uses: actions/setup-python@v2 + if: contains(matrix.modules, 'pyspark') + with: + python-version: 2.7 + architecture: x64 + - name: Install Python 3.8 + uses: actions/setup-python@v2 + # We should install one Python that is higher then 3+ for SQL and Yarn because: + # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils. + # - Yarn has a Python specific test too, for example, YarnClusterSuite. + if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + with: + python-version: 3.8 + architecture: x64 + - name: Install Python packages (Python 2.7 and PyPy3) + if: contains(matrix.modules, 'pyspark') + # PyArrow is not supported in PyPy yet, see ARROW-2651. + # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. + run: | + python2.7 -m pip install numpy pyarrow pandas scipy xmlrunner + python2.7 -m pip list + # PyPy does not have xmlrunner + pypy3 -m pip install numpy pandas + pypy3 -m pip list + - name: Install Python packages (Python 3.8) + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + run: | + python3.8 -m pip install numpy pyarrow pandas scipy xmlrunner + python3.8 -m pip list + # SparkR + - name: Install R 4.0 + if: contains(matrix.modules, 'sparkr') + run: | + sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list" + curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add + sudo apt-get update + sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev + - name: Install R packages + if: contains(matrix.modules, 'sparkr') + run: | + # qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497. + sudo apt-get install -y libcurl4-openssl-dev qpdf + sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')" + # Show installed packages in R. + sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]' + # Run the tests. + - name: Run tests + run: | + # Hive tests become flaky when running in parallel as it's too intensive. + if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi + mkdir -p ~/.m2 + ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" + rm -rf ~/.m2/repository/org/apache/spark + - name: Upload test results to report + if: always() + uses: actions/upload-artifact@v2 + with: + name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} + path: "**/target/test-reports/*.xml" + - name: Upload unit tests log files + if: failure() + uses: actions/upload-artifact@v2 + with: + name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} + path: "**/target/unit-tests.log" + + # Static analysis, and documentation build + lint: + name: Linters, licenses, dependencies and documentation generation + runs-on: ubuntu-latest + steps: + - name: Checkout Spark repository + uses: actions/checkout@v2 + - name: Cache Maven local repository + uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: docs-maven-repo-${{ hashFiles('**/pom.xml') }} + restore-keys: | + docs-maven- + - name: Install JDK 1.8 + uses: actions/setup-java@v1 + with: + java-version: 1.8 + - name: Install Python 3.6 + uses: actions/setup-python@v2 + with: + python-version: 3.6 + architecture: x64 + - name: Install Python linter dependencies + run: | + pip3 install flake8 sphinx numpy + - name: Install R 4.0 + run: | + sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list" + curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add + sudo apt-get update + sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev + - name: Install R linter dependencies and SparkR + run: | + sudo apt-get install -y libcurl4-openssl-dev + sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" + sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')" + ./R/install-dev.sh + - name: Install Ruby 2.7 for documentation generation + uses: actions/setup-ruby@v1 + with: + ruby-version: 2.7 + - name: Install dependencies for documentation generation + run: | + sudo apt-get install -y libcurl4-openssl-dev pandoc + pip install sphinx mkdocs numpy + gem install jekyll jekyll-redirect-from rouge + sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')" + - name: Scala linter + run: ./dev/lint-scala + - name: Java linter + run: ./dev/lint-java + - name: Python linter + run: ./dev/lint-python + - name: R linter + run: ./dev/lint-r + - name: License test + run: ./dev/check-license + - name: Dependencies test + run: ./dev/test-dependencies.sh + - name: Run documentation build + run: | + cd docs + jekyll build + + java11: + name: Java 11 build + runs-on: ubuntu-latest + steps: + - name: Checkout Spark repository + uses: actions/checkout@v2 + - name: Cache Maven local repository + uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: java11-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + java11-maven- + - name: Install Java 11 + uses: actions/setup-java@v1 + with: + java-version: 11 + - name: Build with Maven + run: | + export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" + export MAVEN_CLI_OPTS="--no-transfer-progress" + mkdir -p ~/.m2 + ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install + rm -rf ~/.m2/repository/org/apache/spark diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml deleted file mode 100644 index 36f9e33f9bbb..000000000000 --- a/.github/workflows/master.yml +++ /dev/null @@ -1,156 +0,0 @@ -name: master - -on: - push: - branches: - - branch-3.0 - pull_request: - branches: - - branch-3.0 - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - matrix: - java: [ '1.8', '11' ] - hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ] - hive: [ 'hive-1.2', 'hive-2.3' ] - exclude: - - java: '11' - hive: 'hive-1.2' - - hadoop: 'hadoop-3.2' - hive: 'hive-1.2' - name: Build Spark - JDK${{ matrix.java }}/${{ matrix.hadoop }}/${{ matrix.hive }} - - steps: - - uses: actions/checkout@master - # We split caches because GitHub Action Cache has a 400MB-size limit. - - uses: actions/cache@v1 - with: - path: build - key: build-${{ hashFiles('**/pom.xml') }} - restore-keys: | - build- - - uses: actions/cache@v1 - with: - path: ~/.m2/repository/com - key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com- - - uses: actions/cache@v1 - with: - path: ~/.m2/repository/org - key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org- - - uses: actions/cache@v1 - with: - path: ~/.m2/repository/net - key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net- - - uses: actions/cache@v1 - with: - path: ~/.m2/repository/io - key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io- - - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@v1 - with: - java-version: ${{ matrix.java }} - - name: Build with Maven - run: | - export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" - export MAVEN_CLI_OPTS="--no-transfer-progress" - mkdir -p ~/.m2 - ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -P${{ matrix.hive }} -Phive-thriftserver -P${{ matrix.hadoop }} -Phadoop-cloud -Djava.version=${{ matrix.java }} install - rm -rf ~/.m2/repository/org/apache/spark - - - lint: - runs-on: ubuntu-latest - name: Linters (Java/Scala/Python), licenses, dependencies - steps: - - uses: actions/checkout@master - - uses: actions/setup-java@v1 - with: - java-version: '11' - - uses: actions/setup-python@v1 - with: - python-version: '3.x' - architecture: 'x64' - - name: Scala - run: ./dev/lint-scala - - name: Java - run: ./dev/lint-java - - name: Python - run: | - pip install flake8 sphinx numpy - ./dev/lint-python - - name: License - run: ./dev/check-license - - name: Dependencies - run: ./dev/test-dependencies.sh - - lintr: - runs-on: ubuntu-latest - name: Linter (R) - steps: - - uses: actions/checkout@master - - uses: actions/setup-java@v1 - with: - java-version: '11' - - uses: r-lib/actions/setup-r@v1 - with: - r-version: '3.6.2' - - name: Install lib - run: | - sudo apt-get install -y libcurl4-openssl-dev - - name: install R packages - run: | - sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" - sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')" - - name: package and install SparkR - run: ./R/install-dev.sh - - name: lint-r - run: ./dev/lint-r - - docs: - runs-on: ubuntu-latest - name: Generate documents - steps: - - uses: actions/checkout@master - - uses: actions/cache@v1 - with: - path: ~/.m2/repository - key: docs-maven-repo-${{ hashFiles('**/pom.xml') }} - restore-keys: | - docs-maven-repo- - - uses: actions/setup-java@v1 - with: - java-version: '1.8' - - uses: actions/setup-python@v1 - with: - python-version: '3.x' - architecture: 'x64' - - uses: actions/setup-ruby@v1 - with: - ruby-version: '2.7' - - uses: r-lib/actions/setup-r@v1 - with: - r-version: '3.6.2' - - name: Install lib and pandoc - run: | - sudo apt-get install -y libcurl4-openssl-dev pandoc - - name: Install packages - run: | - pip install sphinx mkdocs numpy - gem install jekyll jekyll-redirect-from rouge - sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" - - name: Run jekyll build - run: | - cd docs - jekyll build diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml new file mode 100644 index 000000000000..93cdb8668726 --- /dev/null +++ b/.github/workflows/test_report.yml @@ -0,0 +1,24 @@ +name: Report test results +on: + workflow_run: + workflows: ["Build and test"] + types: + - completed + +jobs: + test_report: + runs-on: ubuntu-latest + steps: + - name: Download test results to report + uses: dawidd6/action-download-artifact@v2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + workflow: ${{ github.event.workflow_run.workflow_id }} + commit: ${{ github.event.workflow_run.head_commit.id }} + - name: Publish test report + uses: scacap/action-surefire-report@v1 + with: + check_name: Report test results + github_token: ${{ secrets.GITHUB_TOKEN }} + report_paths: "**/target/test-reports/*.xml" + commit: ${{ github.event.workflow_run.head_commit.id }} diff --git a/common/tags/src/test/java/org/apache/spark/tags/GitHubActionsUnstableTest.java b/common/tags/src/test/java/org/apache/spark/tags/GitHubActionsUnstableTest.java new file mode 100644 index 000000000000..a602656bb22f --- /dev/null +++ b/common/tags/src/test/java/org/apache/spark/tags/GitHubActionsUnstableTest.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.tags; + +import org.scalatest.TagAnnotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@TagAnnotation +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.METHOD, ElementType.TYPE}) +public @interface GitHubActionsUnstableTest { } diff --git a/common/tags/src/test/java/org/apache/spark/tags/SlowHiveTest.java b/common/tags/src/test/java/org/apache/spark/tags/SlowHiveTest.java new file mode 100644 index 000000000000..a7e6f352667d --- /dev/null +++ b/common/tags/src/test/java/org/apache/spark/tags/SlowHiveTest.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.tags; + +import org.scalatest.TagAnnotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@TagAnnotation +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.METHOD, ElementType.TYPE}) +public @interface SlowHiveTest { } diff --git a/dev/run-pip-tests b/dev/run-pip-tests index 81e33a6f961b..b322d3f61b44 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -63,7 +63,7 @@ fi PYSPARK_VERSION=$(python3 -c "exec(open('python/pyspark/version.py').read());print(__version__)") PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz" # The pip install options we use for all the pip commands -PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall " +PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall" # Test both regular user and edit/dev install modes. PIP_COMMANDS=("pip install $PIP_OPTIONS $PYSPARK_DIST" "pip install $PIP_OPTIONS -e python/") @@ -80,8 +80,12 @@ for python in "${PYTHON_EXECS[@]}"; do VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python rm -rf "$VIRTUALENV_PATH" if [ -n "$USE_CONDA" ]; then + if [ -f "$CONDA_PREFIX/etc/profile.d/conda.sh" ]; then + # See also https://github.com/conda/conda/issues/7980 + source "$CONDA_PREFIX/etc/profile.d/conda.sh" + fi conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip setuptools - source activate "$VIRTUALENV_PATH" + source activate "$VIRTUALENV_PATH" || conda activate "$VIRTUALENV_PATH" else mkdir -p "$VIRTUALENV_PATH" virtualenv --python=$python "$VIRTUALENV_PATH" @@ -124,7 +128,7 @@ for python in "${PYTHON_EXECS[@]}"; do # conda / virtualenv environments need to be deactivated differently if [ -n "$USE_CONDA" ]; then - source deactivate + source deactivate || conda deactivate else deactivate fi diff --git a/dev/run-tests.py b/dev/run-tests.py index 5255a77ec208..976854dad821 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -49,13 +49,12 @@ def determine_modules_for_files(filenames): ['pyspark-core', 'sql'] >>> [x.name for x in determine_modules_for_files(["file_not_matched_by_any_subproject"])] ['root'] - >>> [x.name for x in determine_modules_for_files( \ - [".github/workflows/master.yml", "appveyor.yml"])] + >>> [x.name for x in determine_modules_for_files(["appveyor.yml"])] [] """ changed_modules = set() for filename in filenames: - if filename in (".github/workflows/master.yml", "appveyor.yml"): + if filename in ("appveyor.yml",): continue matched_at_least_one_module = False for module in modules.all_modules: @@ -101,28 +100,52 @@ def setup_test_environ(environ): os.environ[k] = v -def determine_modules_to_test(changed_modules): +def determine_modules_to_test(changed_modules, deduplicated=True): """ Given a set of modules that have changed, compute the transitive closure of those modules' dependent modules in order to determine the set of modules that should be tested. Returns a topologically-sorted list of modules (ties are broken by sorting on module names). + If ``deduplicated`` is disabled, the modules are returned without tacking the deduplication + by dependencies into account. >>> [x.name for x in determine_modules_to_test([modules.root])] ['root'] >>> [x.name for x in determine_modules_to_test([modules.build])] ['root'] + >>> [x.name for x in determine_modules_to_test([modules.core])] + ['root'] + >>> [x.name for x in determine_modules_to_test([modules.launcher])] + ['root'] >>> [x.name for x in determine_modules_to_test([modules.graphx])] ['graphx', 'examples'] - >>> x = [x.name for x in determine_modules_to_test([modules.sql])] - >>> x # doctest: +NORMALIZE_WHITESPACE + >>> [x.name for x in determine_modules_to_test([modules.sql])] + ... # doctest: +NORMALIZE_WHITESPACE ['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver', 'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-ml'] + >>> sorted([x.name for x in determine_modules_to_test( + ... [modules.sparkr, modules.sql], deduplicated=False)]) + ... # doctest: +NORMALIZE_WHITESPACE + ['avro', 'examples', 'hive', 'hive-thriftserver', 'mllib', 'pyspark-ml', + 'pyspark-mllib', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10'] + >>> sorted([x.name for x in determine_modules_to_test( + ... [modules.sql, modules.core], deduplicated=False)]) + ... # doctest: +NORMALIZE_WHITESPACE + ['avro', 'catalyst', 'core', 'examples', 'graphx', 'hive', 'hive-thriftserver', + 'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib', + 'pyspark-sql', 'pyspark-streaming', 'repl', 'root', + 'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10', + 'streaming-kinesis-asl'] """ modules_to_test = set() for module in changed_modules: - modules_to_test = modules_to_test.union(determine_modules_to_test(module.dependent_modules)) + modules_to_test = modules_to_test.union( + determine_modules_to_test(module.dependent_modules, deduplicated)) modules_to_test = modules_to_test.union(set(changed_modules)) + + if not deduplicated: + return modules_to_test + # If we need to run all of the tests, then we should short-circuit and return 'root' if modules.root in modules_to_test: return [modules.root] @@ -415,7 +438,7 @@ def run_scala_tests_sbt(test_modules, test_profiles): exec_sbt(profiles_and_goals) -def run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags): +def run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags): """Function to properly execute all tests passed in as a set from the `determine_test_suites` function""" set_title_and_block("Running Spark unit tests", "BLOCK_SPARK_UNIT_TESTS") @@ -425,6 +448,8 @@ def run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags): test_profiles = extra_profiles + \ list(set(itertools.chain.from_iterable(m.build_profile_flags for m in test_modules))) + if included_tags: + test_profiles += ['-Dtest.include.tags=' + ",".join(included_tags)] if excluded_tags: test_profiles += ['-Dtest.exclude.tags=' + ",".join(excluded_tags)] @@ -532,6 +557,24 @@ def parse_opts(): "-p", "--parallelism", type=int, default=8, help="The number of suites to test in parallel (default %(default)d)" ) + parser.add_argument( + "-m", "--modules", type=str, + default=None, + help="A comma-separated list of modules to test " + "(default: %s)" % ",".join(sorted([m.name for m in modules.all_modules])) + ) + parser.add_argument( + "-e", "--excluded-tags", type=str, + default=None, + help="A comma-separated list of tags to exclude in the tests, " + "e.g., org.apache.spark.tags.ExtendedHiveTest " + ) + parser.add_argument( + "-i", "--included-tags", type=str, + default=None, + help="A comma-separated list of tags to include in the tests, " + "e.g., org.apache.spark.tags.ExtendedHiveTest " + ) args, unknown = parser.parse_known_args() if unknown: @@ -564,11 +607,20 @@ def main(): " install one and retry.") sys.exit(2) - # install SparkR - if which("R"): - run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")]) - else: - print("Cannot install SparkR as R was not found in PATH") + # Install SparkR + should_only_test_modules = opts.modules is not None + test_modules = [] + if should_only_test_modules: + str_test_modules = [m.strip() for m in opts.modules.split(",")] + test_modules = [m for m in modules.all_modules if m.name in str_test_modules] + + if not should_only_test_modules or modules.sparkr in test_modules: + # If tests modules are specified, we will not run R linter. + # SparkR needs the manual SparkR installation. + if which("R"): + run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")]) + else: + print("Cannot install SparkR as R was not found in PATH") if os.environ.get("AMPLAB_JENKINS"): # if we're on the Amplab Jenkins build servers setup variables @@ -582,27 +634,69 @@ def main(): # /home/jenkins/anaconda2/envs/py36/bin os.environ["PATH"] = "/home/anaconda/envs/py36/bin:" + os.environ.get("PATH") else: - # else we're running locally and can use local settings + # else we're running locally or Github Actions. build_tool = "sbt" hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7") hive_version = os.environ.get("HIVE_PROFILE", "hive2.3") - test_env = "local" + if "GITHUB_ACTIONS" in os.environ: + test_env = "github_actions" + else: + test_env = "local" print("[info] Using build tool", build_tool, "with Hadoop profile", hadoop_version, "and Hive profile", hive_version, "under environment", test_env) extra_profiles = get_hadoop_profiles(hadoop_version) + get_hive_profiles(hive_version) - changed_modules = None - changed_files = None - if test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"): + changed_modules = [] + changed_files = [] + included_tags = [] + excluded_tags = [] + if should_only_test_modules: + # If we're running the tests in Github Actions, attempt to detect and test + # only the affected modules. + if test_env == "github_actions": + if os.environ["GITHUB_BASE_REF"] != "": + # Pull requests + changed_files = identify_changed_files_from_git_commits( + os.environ["GITHUB_SHA"], target_branch=os.environ["GITHUB_BASE_REF"]) + else: + # Build for each commit. + changed_files = identify_changed_files_from_git_commits( + os.environ["GITHUB_SHA"], target_ref=os.environ["GITHUB_PREV_SHA"]) + + modules_to_test = determine_modules_to_test( + determine_modules_for_files(changed_files), deduplicated=False) + + if modules.root not in modules_to_test: + # If root module is not found, only test the intersected modules. + # If root module is found, just run the modules as specified initially. + test_modules = list(set(modules_to_test).intersection(test_modules)) + + changed_modules = test_modules + if len(changed_modules) == 0: + print("[info] There are no modules to test, exiting without testing.") + return + + # If we're running the tests in AMPLab Jenkins, calculate the diff from the targeted branch, and + # detect modules to test. + elif test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"): target_branch = os.environ["ghprbTargetBranch"] changed_files = identify_changed_files_from_git_commits("HEAD", target_branch=target_branch) changed_modules = determine_modules_for_files(changed_files) + test_modules = determine_modules_to_test(changed_modules) excluded_tags = determine_tags_to_exclude(changed_modules) + # If there is no changed module found, tests all. if not changed_modules: changed_modules = [modules.root] - excluded_tags = [] + if not test_modules: + test_modules = determine_modules_to_test(changed_modules) + + if opts.excluded_tags: + excluded_tags.extend([t.strip() for t in opts.excluded_tags.split(",")]) + if opts.included_tags: + included_tags.extend([t.strip() for t in opts.included_tags.split(",")]) + print("[info] Found the following changed modules:", ", ".join(x.name for x in changed_modules)) @@ -615,33 +709,32 @@ def main(): test_environ.update(m.environ) setup_test_environ(test_environ) - test_modules = determine_modules_to_test(changed_modules) - - # license checks - run_apache_rat_checks() - - # style checks - if not changed_files or any(f.endswith(".scala") - or f.endswith("scalastyle-config.xml") - for f in changed_files): - run_scala_style_checks(extra_profiles) should_run_java_style_checks = False - if not changed_files or any(f.endswith(".java") - or f.endswith("checkstyle.xml") - or f.endswith("checkstyle-suppressions.xml") - for f in changed_files): - # Run SBT Checkstyle after the build to prevent a side-effect to the build. - should_run_java_style_checks = True - if not changed_files or any(f.endswith("lint-python") - or f.endswith("tox.ini") - or f.endswith(".py") - for f in changed_files): - run_python_style_checks() - if not changed_files or any(f.endswith(".R") - or f.endswith("lint-r") - or f.endswith(".lintr") - for f in changed_files): - run_sparkr_style_checks() + if not should_only_test_modules: + # license checks + run_apache_rat_checks() + + # style checks + if not changed_files or any(f.endswith(".scala") + or f.endswith("scalastyle-config.xml") + for f in changed_files): + run_scala_style_checks(extra_profiles) + if not changed_files or any(f.endswith(".java") + or f.endswith("checkstyle.xml") + or f.endswith("checkstyle-suppressions.xml") + for f in changed_files): + # Run SBT Checkstyle after the build to prevent a side-effect to the build. + should_run_java_style_checks = True + if not changed_files or any(f.endswith("lint-python") + or f.endswith("tox.ini") + or f.endswith(".py") + for f in changed_files): + run_python_style_checks() + if not changed_files or any(f.endswith(".R") + or f.endswith("lint-r") + or f.endswith(".lintr") + for f in changed_files): + run_sparkr_style_checks() # determine if docs were changed and if we're inside the amplab environment # note - the below commented out until *all* Jenkins workers can get `jekyll` installed @@ -663,7 +756,7 @@ def main(): build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks) # run the test suites - run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags) + run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags) modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 391e4bbe1b1f..75bdec0f40f0 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -100,9 +100,75 @@ def __hash__(self): ] ) +kvstore = Module( + name="kvstore", + dependencies=[tags], + source_file_regexes=[ + "common/kvstore/", + ], + sbt_test_goals=[ + "kvstore/test", + ], +) + +network_common = Module( + name="network-common", + dependencies=[tags], + source_file_regexes=[ + "common/network-common/", + ], + sbt_test_goals=[ + "network-common/test", + ], +) + +network_shuffle = Module( + name="network-shuffle", + dependencies=[tags], + source_file_regexes=[ + "common/network-shuffle/", + ], + sbt_test_goals=[ + "network-shuffle/test", + ], +) + +unsafe = Module( + name="unsafe", + dependencies=[tags], + source_file_regexes=[ + "common/unsafe", + ], + sbt_test_goals=[ + "unsafe/test", + ], +) + +launcher = Module( + name="launcher", + dependencies=[tags], + source_file_regexes=[ + "launcher/", + ], + sbt_test_goals=[ + "launcher/test", + ], +) + +core = Module( + name="core", + dependencies=[kvstore, network_common, network_shuffle, unsafe, launcher], + source_file_regexes=[ + "core/", + ], + sbt_test_goals=[ + "core/test", + ], +) + catalyst = Module( name="catalyst", - dependencies=[tags], + dependencies=[tags, core], source_file_regexes=[ "sql/catalyst/", ], @@ -111,7 +177,6 @@ def __hash__(self): ], ) - sql = Module( name="sql", dependencies=[catalyst], @@ -123,7 +188,6 @@ def __hash__(self): ], ) - hive = Module( name="hive", dependencies=[sql], @@ -142,7 +206,6 @@ def __hash__(self): ] ) - repl = Module( name="repl", dependencies=[hive], @@ -154,7 +217,6 @@ def __hash__(self): ], ) - hive_thriftserver = Module( name="hive-thriftserver", dependencies=[hive], @@ -192,7 +254,6 @@ def __hash__(self): ] ) - sketch = Module( name="sketch", dependencies=[tags], @@ -204,10 +265,9 @@ def __hash__(self): ] ) - graphx = Module( name="graphx", - dependencies=[tags], + dependencies=[tags, core], source_file_regexes=[ "graphx/", ], @@ -216,10 +276,9 @@ def __hash__(self): ] ) - streaming = Module( name="streaming", - dependencies=[tags], + dependencies=[tags, core], source_file_regexes=[ "streaming", ], @@ -235,7 +294,7 @@ def __hash__(self): # fail other PRs. streaming_kinesis_asl = Module( name="streaming-kinesis-asl", - dependencies=[tags], + dependencies=[tags, core], source_file_regexes=[ "external/kinesis-asl/", "external/kinesis-asl-assembly/", @@ -254,21 +313,23 @@ def __hash__(self): streaming_kafka_0_10 = Module( name="streaming-kafka-0-10", - dependencies=[streaming], + dependencies=[streaming, core], source_file_regexes=[ # The ending "/" is necessary otherwise it will include "sql-kafka" codes "external/kafka-0-10/", "external/kafka-0-10-assembly", + "external/kafka-0-10-token-provider", ], sbt_test_goals=[ "streaming-kafka-0-10/test", + "token-provider-kafka-0-10/test" ] ) mllib_local = Module( name="mllib-local", - dependencies=[tags], + dependencies=[tags, core], source_file_regexes=[ "mllib-local", ], @@ -302,10 +363,9 @@ def __hash__(self): ] ) - pyspark_core = Module( name="pyspark-core", - dependencies=[], + dependencies=[core], source_file_regexes=[ "python/(?!pyspark/(ml|mllib|sql|streaming))" ], @@ -339,7 +399,6 @@ def __hash__(self): ] ) - pyspark_sql = Module( name="pyspark-sql", dependencies=[pyspark_core, hive, avro], @@ -578,7 +637,7 @@ def __hash__(self): # No other modules should directly depend on this module. root = Module( name="root", - dependencies=[build], # Changes to build should trigger all tests. + dependencies=[build, core], # Changes to build should trigger all tests. source_file_regexes=[], # In order to run all of the tests, enable every test profile: build_profile_flags=list(set( diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index a0e138c5bcb6..7a9634ac0fcb 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1017,6 +1017,15 @@ object TestSettings { sys.props.get("test.exclude.tags").map { tags => Seq("--exclude-categories=" + tags) }.getOrElse(Nil): _*), + // Include tags defined in a system property + testOptions in Test += Tests.Argument(TestFrameworks.ScalaTest, + sys.props.get("test.include.tags").map { tags => + tags.split(",").flatMap { tag => Seq("-n", tag) }.toSeq + }.getOrElse(Nil): _*), + testOptions in Test += Tests.Argument(TestFrameworks.JUnit, + sys.props.get("test.include.tags").map { tags => + Seq("--include-categories=" + tags) + }.getOrElse(Nil): _*), // Show full stack trace and duration in test cases. testOptions in Test += Tests.Argument("-oDF"), testOptions in Test += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"), diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py index 42f064a05ed9..15c5cf1ad9b0 100644 --- a/python/pyspark/sql/tests/test_arrow.py +++ b/python/pyspark/sql/tests/test_arrow.py @@ -21,6 +21,9 @@ import time import unittest import warnings +import sys +if sys.version >= '3': + basestring = unicode = str from pyspark import SparkContext, SparkConf from pyspark.sql import Row, SparkSession diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 81402f52af3b..016cafd66901 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -725,7 +725,8 @@ def assertCollectSuccess(typecode, value): if sys.version_info[0] < 3: all_types = set(['c', 'b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 'f', 'd']) else: - all_types = set(array.typecodes) + # PyPy seems not having array.typecodes. + all_types = set(['b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q', 'f', 'd']) unsupported_types = all_types - set(supported_types) # test unsupported types for t in unsupported_types: diff --git a/python/pyspark/streaming/tests/test_dstream.py b/python/pyspark/streaming/tests/test_dstream.py index 7ecdf6b0b12d..89edb23070c6 100644 --- a/python/pyspark/streaming/tests/test_dstream.py +++ b/python/pyspark/streaming/tests/test_dstream.py @@ -30,8 +30,9 @@ @unittest.skipIf( - "pypy" in platform.python_implementation().lower() and "COVERAGE_PROCESS_START" in os.environ, - "PyPy implementation causes to hang DStream tests forever when Coverage report is used.") + "pypy" in platform.python_implementation().lower(), + "The tests fail in PyPy3 implementation for an unknown reason. " + "With PyPy, it causes to hang DStream tests forever when Coverage report is used.") class BasicOperationTests(PySparkStreamingTestCase): def test_map(self): @@ -394,8 +395,9 @@ def failed_func(i): @unittest.skipIf( - "pypy" in platform.python_implementation().lower() and "COVERAGE_PROCESS_START" in os.environ, - "PyPy implementation causes to hang DStream tests forever when Coverage report is used.") + "pypy" in platform.python_implementation().lower(), + "The tests fail in PyPy3 implementation for an unknown reason. " + "With PyPy, it causes to hang DStream tests forever when Coverage report is used.") class WindowFunctionTests(PySparkStreamingTestCase): timeout = 15 @@ -474,8 +476,9 @@ def func(dstream): @unittest.skipIf( - "pypy" in platform.python_implementation().lower() and "COVERAGE_PROCESS_START" in os.environ, - "PyPy implementation causes to hang DStream tests forever when Coverage report is used.") + "pypy" in platform.python_implementation().lower(), + "The tests fail in PyPy3 implementation for an unknown reason. " + "With PyPy, it causes to hang DStream tests forever when Coverage report is used.") class CheckpointTests(unittest.TestCase): setupCalled = False diff --git a/python/run-tests.py b/python/run-tests.py index b677a5134ec9..a404c5364e9b 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -161,7 +161,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): def get_default_python_executables(): - python_execs = [x for x in ["python3.6", "python2.7", "pypy"] if which(x)] + python_execs = [x for x in ["python3.8", "python2.7", "pypy3", "pypy"] if which(x)] if "python3.6" not in python_execs: p = which("python3") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala index 6391d5664d5c..80346b350c14 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala @@ -197,7 +197,7 @@ object IntegratedUDFTestUtils extends SQLHelper { lazy val pythonExec: String = { val pythonExec = sys.env.getOrElse( - "PYSPARK_DRIVER_PYTHON", sys.env.getOrElse("PYSPARK_PYTHON", "python3.6")) + "PYSPARK_DRIVER_PYTHON", sys.env.getOrElse("PYSPARK_PYTHON", "python3")) if (TestUtils.testCommandAvailable(pythonExec)) { pythonExec } else { diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala index 15cc3109da3f..8b16674f05a8 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala @@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.util.fileToString import org.apache.spark.sql.execution.HiveResult import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ +import org.apache.spark.tags.GitHubActionsUnstableTest /** * Re-run all the tests in SQLQueryTestSuite via Thrift Server. @@ -52,6 +53,7 @@ import org.apache.spark.sql.types._ * 2. Support DESC command. * 3. Support SHOW command. */ +@GitHubActionsUnstableTest class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServer { override protected def testFile(fileName: String): String = { diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala index 3e1fce78ae71..73547d752024 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala @@ -17,6 +17,9 @@ package org.apache.spark.sql.hive.thriftserver +import org.apache.spark.tags.GitHubActionsUnstableTest + +@GitHubActionsUnstableTest class ThriftServerWithSparkContextSuite extends SharedThriftServer { test("SPARK-29911: Uncache cached tables when session closed") { diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index db1f6fbd97d9..4112512e56fe 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -26,10 +26,12 @@ import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy +import org.apache.spark.tags.SlowHiveTest /** * Runs the test cases that are included in the hive distribution. */ +@SlowHiveTest class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // TODO: bundle in jar files... get from classpath private lazy val hiveQueryDir = TestHive.getHiveFile( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 8be3d26bfc93..aa96fa035c4f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTableType import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH import org.apache.spark.sql.test.SQLTestUtils -import org.apache.spark.tags.ExtendedHiveTest +import org.apache.spark.tags.{ExtendedHiveTest, SlowHiveTest} import org.apache.spark.util.Utils /** @@ -46,6 +46,7 @@ import org.apache.spark.util.Utils * expected version under this local directory, e.g. `/tmp/spark-test/spark-2.0.3`, we will skip the * downloading for this spark version. */ +@SlowHiveTest @ExtendedHiveTest class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { private val isTestAtLeastJava9 = SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index 8b97489e2d81..3a7e92ee1c00 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -38,12 +38,13 @@ import org.apache.spark.sql.hive.test.{HiveTestJars, TestHiveContext} import org.apache.spark.sql.internal.SQLConf.SHUFFLE_PARTITIONS import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH import org.apache.spark.sql.types.{DecimalType, StructType} -import org.apache.spark.tags.ExtendedHiveTest +import org.apache.spark.tags.{ExtendedHiveTest, SlowHiveTest} import org.apache.spark.util.{ResetSystemProperties, Utils} /** * This suite tests spark-submit with applications using HiveContext. */ +@SlowHiveTest @ExtendedHiveTest class HiveSparkSubmitSuite extends SparkSubmitTestUtils diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala index 8642a5ff1681..5ddaaf3c9a77 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala @@ -41,7 +41,7 @@ import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} import org.apache.spark.sql.hive.test.TestHiveVersion import org.apache.spark.sql.types.IntegerType import org.apache.spark.sql.types.StructType -import org.apache.spark.tags.ExtendedHiveTest +import org.apache.spark.tags.{ExtendedHiveTest, GitHubActionsUnstableTest} import org.apache.spark.util.{MutableURLClassLoader, Utils} /** @@ -52,6 +52,7 @@ import org.apache.spark.util.{MutableURLClassLoader, Utils} */ // TODO: Refactor this to `HiveClientSuite` and make it a subclass of `HiveVersionSuite` @ExtendedHiveTest +@GitHubActionsUnstableTest class VersionsSuite extends SparkFunSuite with Logging { override protected val enableAutoThreadAudit = false diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala index fac981267f4d..1dd2ad3837cc 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types._ +import org.apache.spark.tags.SlowHiveTest import org.apache.spark.unsafe.UnsafeAlignedOffset @@ -1054,6 +1055,7 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te class HashAggregationQuerySuite extends AggregationQuerySuite +@SlowHiveTest class HashAggregationQueryWithControlledFallbackSuite extends AggregationQuerySuite { override protected def checkAnswer(actual: => DataFrame, expectedAnswer: Seq[Row]): Unit = { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index e8cf4ad5d9f2..d0aa618fc273 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -44,9 +44,11 @@ import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types._ +import org.apache.spark.tags.SlowHiveTest import org.apache.spark.util.Utils // TODO(gatorsmile): combine HiveCatalogedDDLSuite and HiveDDLSuite +@SlowHiveTest class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeAndAfterEach { override def afterEach(): Unit = { try { @@ -404,6 +406,7 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA } } +@SlowHiveTest class HiveDDLSuite extends QueryTest with SQLTestUtils with TestHiveSingleton with BeforeAndAfterEach { import testImplicits._ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index b10a8cb8bf2b..e798a352f72b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -39,6 +39,7 @@ import org.apache.spark.sql.hive.test.{HiveTestJars, TestHive} import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.tags.SlowHiveTest case class TestData(a: Int, b: String) @@ -46,6 +47,7 @@ case class TestData(a: Int, b: String) * A set of test cases expressed in Hive QL that are not covered by the tests * included in the hive distribution. */ +@SlowHiveTest class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAndAfter { import org.apache.spark.sql.hive.test.TestHive.implicits._ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala index b20ef035594d..6f37e39a532d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala @@ -27,13 +27,14 @@ import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.StaticSQLConf._ import org.apache.spark.sql.types._ -import org.apache.spark.tags.ExtendedHiveTest +import org.apache.spark.tags.{ExtendedHiveTest, SlowHiveTest} import org.apache.spark.util.Utils /** * A separate set of DDL tests that uses Hive 2.1 libraries, which behave a little differently * from the built-in ones. */ +@SlowHiveTest @ExtendedHiveTest class Hive_2_1_DDLSuite extends SparkFunSuite with TestHiveSingleton with BeforeAndAfterEach with BeforeAndAfterAll { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index d12eae0e410b..a46db32b9ade 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -43,6 +43,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types._ +import org.apache.spark.tags.SlowHiveTest import org.apache.spark.util.Utils case class Nested1(f1: Nested2) @@ -2559,6 +2560,8 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi } } +@SlowHiveTest class SQLQuerySuite extends SQLQuerySuiteBase with DisableAdaptiveExecutionSuite +@SlowHiveTest class SQLQuerySuiteAE extends SQLQuerySuiteBase with EnableAdaptiveExecutionSuite