Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 18 additions & 11 deletions .github/workflows/master.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: master
name: Build and test

on:
push:
Expand All @@ -9,7 +9,6 @@ on:
- master

jobs:
# TODO(SPARK-32248): Recover JDK 11 builds
# Build: build Spark and run the tests for specified modules.
build:
name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
Expand All @@ -27,21 +26,21 @@ jobs:
# Kinesis tests depends on external Amazon kinesis service.
# Note that the modules below are from sparktestsupport/modules.py.
modules:
- |-
- >-
core, unsafe, kvstore, avro,
network-common, network-shuffle, repl, launcher,
examples, sketch, graphx
- |-
- >-
catalyst, hive-thriftserver
- |-
- >-
streaming, sql-kafka-0-10, streaming-kafka-0-10,
mllib-local, mllib,
yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
- |-
- >-
pyspark-sql, pyspark-mllib, pyspark-resource
- |-
- >-
pyspark-core, pyspark-streaming, pyspark-ml
- |-
- >-
sparkr
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
included-tags: [""]
Expand Down Expand Up @@ -144,14 +143,15 @@ jobs:
# PyArrow is not supported in PyPy yet, see ARROW-2651.
# TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason.
run: |
python3.6 -m pip install numpy pyarrow pandas scipy
python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner
python3.6 -m pip list
# PyPy does not have xmlrunner
pypy3 -m pip install numpy pandas
pypy3 -m pip list
- name: Install Python packages (Python 3.8)
if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
run: |
python3.8 -m pip install numpy pyarrow pandas scipy
python3.8 -m pip install numpy pyarrow pandas scipy xmlrunner
python3.8 -m pip list
# SparkR
- name: Install R 4.0
Expand All @@ -170,13 +170,19 @@ jobs:
# Show installed packages in R.
sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
# Run the tests.
- name: "Run tests: ${{ matrix.modules }}"
- name: Run tests
run: |
# Hive tests become flaky when running in parallel as it's too intensive.
if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi
mkdir -p ~/.m2
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
rm -rf ~/.m2/repository/org/apache/spark
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
path: "**/target/test-reports/*.xml"

# Static analysis, and documentation build
lint:
Expand Down Expand Up @@ -271,3 +277,4 @@ jobs:
mkdir -p ~/.m2
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
rm -rf ~/.m2/repository/org/apache/spark

35 changes: 35 additions & 0 deletions .github/workflows/test_report.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Report test results
on:
workflow_run:
workflows: ["Build and test"]
types:
- completed

jobs:
test_report:
runs-on: ubuntu-latest
steps:
- name: Download test results to report
# TODO(SPARK-32605): It was forked to have a custom fix
# https://github.com/HyukjinKwon/action-surefire-report/commit/c96094cc35061fcf154a7cb46807f2f3e2339476
# in order to add the support of custom target commit SHA. It should be contributed back to the original
# plugin and avoid using the fork.
uses: HyukjinKwon/action-download-artifact@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
workflow: ${{ github.event.workflow_run.workflow_id }}
commit: ${{ github.event.workflow_run.head_commit.id }}
- name: Publish test report
# TODO(SPARK-32606): It was forked to have a custom fix
# https://github.com/HyukjinKwon/action-download-artifact/commit/750b71af351aba467757d7be6924199bb08db4ed
# in order to add the support to download all artifacts. It should be contributed back to the original
# plugin and avoid using the fork.
# Alternatively, we can use the official actions/download-artifact once they support to download artifacts
# between different workloads, see also https://github.com/actions/download-artifact/issues/3
uses: HyukjinKwon/action-surefire-report@master
with:
check_name: Test report
github_token: ${{ secrets.GITHUB_TOKEN }}
report_paths: "**/target/test-reports/*.xml"
commit: ${{ github.event.workflow_run.head_commit.id }}

Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ class UTF8StringPropertyCheckSuite extends AnyFunSuite with ScalaCheckDrivenProp

test("toString") {
forAll { (s: String) =>
assert(toUTF8(s).toString() === s)
assert(toUTF8(s).toString() !== s)
}
}

test("numChars") {
forAll { (s: String) =>
assert(toUTF8(s).numChars() === s.length)
assert(toUTF8(s).numChars() !== s.length)
}
}

Expand Down Expand Up @@ -73,14 +73,14 @@ class UTF8StringPropertyCheckSuite extends AnyFunSuite with ScalaCheckDrivenProp

test("toLowerCase") {
forAll { (s: String) =>
assert(toUTF8(s).toLowerCase === toUTF8(s.toLowerCase))
assert(toUTF8(s) === toUTF8(s.toLowerCase))
}
}
// scalastyle:on caselocale

test("compare") {
forAll { (s1: String, s2: String) =>
assert(Math.signum(toUTF8(s1).compareTo(toUTF8(s2))) === Math.signum(s1.compareTo(s2)))
assert(Math.signum(toUTF8(s1).compareTo(toUTF8(s2))) !== Math.signum(s1.compareTo(s2)))
}
}

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/tests/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def test_no_partition_frame(self):
pdf = df.toPandas()
self.assertEqual(len(pdf.columns), 1)
self.assertEqual(pdf.columns[0], "field1")
self.assertTrue(pdf.empty)
self.assertTrue("A")

def test_propagates_spark_exception(self):
df = self.spark.range(3).toDF("i")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ class DataFrameSuite extends QueryTest
}

test("access complex data") {
assert(complexData.filter(complexData("a").getItem(0) === 2).count() == 1)
assert(complexData.filter(complexData("m").getItem("1") === 1).count() == 1)
assert(complexData.filter(complexData("a").getItem(0) === 2).count() == 2)
assert(complexData.filter(complexData("m").getItem("1") === 1).count() == 2)
assert(complexData.filter(complexData("s").getField("key") === 1).count() == 1)
}

Expand All @@ -96,7 +96,7 @@ class DataFrameSuite extends QueryTest

test("empty data frame") {
assert(spark.emptyDataFrame.columns.toSeq === Seq.empty[String])
assert(spark.emptyDataFrame.count() === 0)
assert(spark.emptyDataFrame.count() === 1)
}

test("head, take and tail") {
Expand Down