Skip to content

Commit be7c445

Browse files
committed
Merge remote-tracking branch 'origin/master' into HEAD
2 parents be08f53 + a3c04ec commit be7c445

File tree

1,036 files changed

+34802
-14017
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,036 files changed

+34802
-14017
lines changed

.github/labeler.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ INFRA:
2121
- changed-files:
2222
- any-glob-to-any-file: [
2323
'.github/**/*',
24-
'appveyor.yml',
2524
'tools/**/*',
2625
'dev/create-release/**/*',
2726
'.asf.yaml',
@@ -102,6 +101,8 @@ SQL:
102101
]
103102
- any-glob-to-any-file: [
104103
'common/unsafe/**/*',
104+
'common/sketch/**/*',
105+
'common/variant/**/*',
105106
'bin/spark-sql*',
106107
'bin/beeline*',
107108
'sbin/*thriftserver*.sh',

.github/workflows/benchmark.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ on:
4646
jobs:
4747
matrix-gen:
4848
name: Generate matrix for job splits
49-
runs-on: ubuntu-20.04
49+
runs-on: ubuntu-latest
5050
outputs:
5151
matrix: ${{ steps.set-matrix.outputs.matrix }}
5252
env:
@@ -117,8 +117,7 @@ jobs:
117117
name: "Run benchmarks: ${{ github.event.inputs.class }} (JDK ${{ github.event.inputs.jdk }}, Scala ${{ github.event.inputs.scala }}, ${{ matrix.split }} out of ${{ github.event.inputs.num-splits }} splits)"
118118
if: always()
119119
needs: [matrix-gen, tpcds-1g-gen]
120-
# Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
121-
runs-on: ubuntu-20.04
120+
runs-on: ubuntu-latest
122121
strategy:
123122
fail-fast: false
124123
matrix:

.github/workflows/build_and_test.yml

Lines changed: 35 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ on:
5252
jobs:
5353
precondition:
5454
name: Check changes
55-
runs-on: ubuntu-22.04
55+
runs-on: ubuntu-latest
5656
env:
5757
GITHUB_PREV_SHA: ${{ github.event.before }}
5858
outputs:
@@ -82,7 +82,7 @@ jobs:
8282
sparkr=`./dev/is-changed.py -m sparkr`
8383
tpcds=`./dev/is-changed.py -m sql`
8484
docker=`./dev/is-changed.py -m docker-integration-tests`
85-
# 'build' and 'java-other-versions' are always true for now.
85+
# 'build' and 'maven-build' are always true for now.
8686
# It does not save significant time and most of PRs trigger the build.
8787
precondition="
8888
{
@@ -91,7 +91,7 @@ jobs:
9191
\"sparkr\": \"$sparkr\",
9292
\"tpcds-1g\": \"$tpcds\",
9393
\"docker-integration-tests\": \"$docker\",
94-
\"java-other-versions\": \"true\",
94+
\"maven-build\": \"true\",
9595
\"lint\" : \"true\",
9696
\"k8s-integration-tests\" : \"true\",
9797
\"buf\" : \"true\",
@@ -122,7 +122,7 @@ jobs:
122122
name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }}"
123123
needs: precondition
124124
if: fromJson(needs.precondition.outputs.required).build == 'true'
125-
runs-on: ubuntu-22.04
125+
runs-on: ubuntu-latest
126126
timeout-minutes: 300
127127
strategy:
128128
fail-fast: false
@@ -133,22 +133,21 @@ jobs:
133133
- ${{ inputs.hadoop }}
134134
hive:
135135
- hive2.3
136-
# TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
137-
# Kinesis tests depends on external Amazon kinesis service.
138136
# Note that the modules below are from sparktestsupport/modules.py.
139137
modules:
140138
- >-
141139
core, unsafe, kvstore, avro, utils,
142140
network-common, network-shuffle, repl, launcher,
143-
examples, sketch
141+
examples, sketch, variant
144142
- >-
145143
api, catalyst, hive-thriftserver
146144
- >-
147145
mllib-local, mllib, graphx
148146
- >-
149147
streaming, sql-kafka-0-10, streaming-kafka-0-10, streaming-kinesis-asl,
150-
yarn, kubernetes, hadoop-cloud, spark-ganglia-lgpl,
151-
connect, protobuf
148+
kubernetes, hadoop-cloud, spark-ganglia-lgpl, protobuf
149+
- >-
150+
yarn, connect
152151
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
153152
included-tags: [""]
154153
excluded-tags: [""]
@@ -212,7 +211,7 @@ jobs:
212211
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
213212
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' merge --no-commit --progress --squash FETCH_HEAD
214213
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' commit -m "Merged commit" --allow-empty
215-
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
214+
# Cache local repositories. Note that GitHub Actions cache has a 10G limit.
216215
- name: Cache Scala, SBT and Maven
217216
uses: actions/cache@v4
218217
with:
@@ -253,7 +252,7 @@ jobs:
253252
- name: Install Python packages (Python 3.9)
254253
if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect')
255254
run: |
256-
python3.9 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.59.3' 'grpcio-status==1.59.3' 'protobuf==4.25.1'
255+
python3.9 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1'
257256
python3.9 -m pip list
258257
# Run the tests.
259258
- name: Run tests
@@ -286,7 +285,6 @@ jobs:
286285
infra-image:
287286
name: "Base image build"
288287
needs: precondition
289-
# Currently, enable docker build from cache for `master` and branch (since 3.4) jobs
290288
if: >-
291289
fromJson(needs.precondition.outputs.required).pyspark == 'true' ||
292290
fromJson(needs.precondition.outputs.required).lint == 'true' ||
@@ -335,7 +333,7 @@ jobs:
335333
# always run if pyspark == 'true', even infra-image is skip (such as non-master job)
336334
if: (!cancelled()) && fromJson(needs.precondition.outputs.required).pyspark == 'true'
337335
name: "Build modules: ${{ matrix.modules }}"
338-
runs-on: ubuntu-22.04
336+
runs-on: ubuntu-latest
339337
timeout-minutes: 300
340338
container:
341339
image: ${{ needs.precondition.outputs.image_url }}
@@ -395,7 +393,7 @@ jobs:
395393
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
396394
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' merge --no-commit --progress --squash FETCH_HEAD
397395
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' commit -m "Merged commit" --allow-empty
398-
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
396+
# Cache local repositories. Note that GitHub Actions cache has a 10G limit.
399397
- name: Cache Scala, SBT and Maven
400398
uses: actions/cache@v4
401399
with:
@@ -458,7 +456,7 @@ jobs:
458456
fi
459457
- name: Upload coverage to Codecov
460458
if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true'
461-
uses: codecov/codecov-action@v2
459+
uses: codecov/codecov-action@v4
462460
with:
463461
files: ./python/coverage.xml
464462
flags: unittests
@@ -483,7 +481,7 @@ jobs:
483481
# always run if sparkr == 'true', even infra-image is skip (such as non-master job)
484482
if: (!cancelled()) && fromJson(needs.precondition.outputs.required).sparkr == 'true'
485483
name: "Build modules: sparkr"
486-
runs-on: ubuntu-22.04
484+
runs-on: ubuntu-latest
487485
timeout-minutes: 300
488486
container:
489487
image: ${{ needs.precondition.outputs.image_url }}
@@ -513,7 +511,7 @@ jobs:
513511
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
514512
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' merge --no-commit --progress --squash FETCH_HEAD
515513
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' commit -m "Merged commit" --allow-empty
516-
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
514+
# Cache local repositories. Note that GitHub Actions cache has a 10G limit.
517515
- name: Cache Scala, SBT and Maven
518516
uses: actions/cache@v4
519517
with:
@@ -561,7 +559,7 @@ jobs:
561559
needs: [precondition]
562560
if: (!cancelled()) && fromJson(needs.precondition.outputs.required).buf == 'true'
563561
name: Protobuf breaking change detection and Python CodeGen check
564-
runs-on: ubuntu-22.04
562+
runs-on: ubuntu-latest
565563
steps:
566564
- name: Checkout Spark repository
567565
uses: actions/checkout@v4
@@ -595,7 +593,7 @@ jobs:
595593
python-version: '3.9'
596594
- name: Install dependencies for Python CodeGen check
597595
run: |
598-
python3.9 -m pip install 'black==23.9.1' 'protobuf==4.25.1' 'mypy==0.982' 'mypy-protobuf==3.3.0'
596+
python3.9 -m pip install 'black==23.9.1' 'protobuf==4.25.1' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
599597
python3.9 -m pip list
600598
- name: Python CodeGen check
601599
run: ./dev/connect-check-protos.py
@@ -606,7 +604,7 @@ jobs:
606604
# always run if lint == 'true', even infra-image is skip (such as non-master job)
607605
if: (!cancelled()) && fromJson(needs.precondition.outputs.required).lint == 'true'
608606
name: Linters, licenses, dependencies and documentation generation
609-
runs-on: ubuntu-22.04
607+
runs-on: ubuntu-latest
610608
timeout-minutes: 300
611609
env:
612610
LC_ALL: C.UTF-8
@@ -633,7 +631,7 @@ jobs:
633631
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
634632
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' merge --no-commit --progress --squash FETCH_HEAD
635633
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' commit -m "Merged commit" --allow-empty
636-
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
634+
# Cache local repositories. Note that GitHub Actions cache has a 10G limit.
637635
- name: Cache Scala, SBT and Maven
638636
uses: actions/cache@v4
639637
with:
@@ -703,8 +701,8 @@ jobs:
703701
# See 'docutils<0.18.0' in SPARK-39421
704702
python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
705703
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
706-
'flake8==3.9.0' 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
707-
'pandas-stubs==1.2.0.53' 'grpcio==1.59.3' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
704+
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
705+
'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
708706
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
709707
python3.9 -m pip list
710708
- name: Python linter
@@ -790,17 +788,21 @@ jobs:
790788
path: site.tar.bz2
791789
retention-days: 1
792790

793-
java-other-versions:
791+
maven-build:
794792
needs: precondition
795-
if: fromJson(needs.precondition.outputs.required).java-other-versions == 'true'
796-
name: Java ${{ matrix.java }} build with Maven
793+
if: fromJson(needs.precondition.outputs.required).maven-build == 'true'
794+
name: Java ${{ matrix.java }} build with Maven (${{ matrix.os }})
797795
strategy:
798796
fail-fast: false
799797
matrix:
800-
java:
801-
- 17
802-
- 21
803-
runs-on: ubuntu-22.04
798+
include:
799+
- java: 17
800+
os: ubuntu-latest
801+
- java: 21
802+
os: ubuntu-latest
803+
- java: 21
804+
os: macos-14
805+
runs-on: ${{ matrix.os }}
804806
timeout-minutes: 300
805807
steps:
806808
- name: Checkout Spark repository
@@ -952,7 +954,7 @@ jobs:
952954
needs: precondition
953955
if: fromJson(needs.precondition.outputs.required).docker-integration-tests == 'true'
954956
name: Run Docker integration tests
955-
runs-on: ubuntu-22.04
957+
runs-on: ubuntu-latest
956958
timeout-minutes: 300
957959
env:
958960
HADOOP_PROFILE: ${{ inputs.hadoop }}
@@ -1021,7 +1023,7 @@ jobs:
10211023
needs: precondition
10221024
if: fromJson(needs.precondition.outputs.required).k8s-integration-tests == 'true'
10231025
name: Run Spark on Kubernetes Integration test
1024-
runs-on: ubuntu-22.04
1026+
runs-on: ubuntu-latest
10251027
timeout-minutes: 300
10261028
steps:
10271029
- name: Checkout Spark repository
@@ -1094,7 +1096,7 @@ jobs:
10941096
needs: [precondition]
10951097
if: fromJson(needs.precondition.outputs.required).ui == 'true'
10961098
name: Run Spark UI tests
1097-
runs-on: ubuntu-22.04
1099+
runs-on: ubuntu-latest
10981100
timeout-minutes: 300
10991101
steps:
11001102
- uses: actions/checkout@v4

.github/workflows/build_coverage.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ jobs:
3636
hadoop: hadoop3
3737
envs: >-
3838
{
39+
"PYTHON_TO_TEST": "python3.11",
3940
"PYSPARK_CODECOV": "true"
4041
}
4142
jobs: >-

.github/workflows/build_maven.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
# under the License.
1818
#
1919

20-
name: "Build using Maven (master, Scala 2.13, Hadoop 3, JDK 17)"
20+
name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 17)"
2121

2222
on:
2323
schedule:
@@ -33,5 +33,5 @@ jobs:
3333
with:
3434
envs: >-
3535
{
36-
"SKIP_SPARK_RELEASE_VERSIONS": "3.3.4,3.4.2,3.5.0"
36+
"SKIP_SPARK_RELEASE_VERSIONS": "3.4.2"
3737
}

.github/workflows/build_maven_java21.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
# under the License.
1818
#
1919

20-
name: "Build using Maven (master, Scala 2.13, Hadoop 3, JDK 21)"
20+
name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21)"
2121

2222
on:
2323
schedule:
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21, macos-14)"
21+
22+
on:
23+
schedule:
24+
- cron: '0 20 * * *'
25+
26+
jobs:
27+
run-build:
28+
permissions:
29+
packages: write
30+
name: Run
31+
uses: ./.github/workflows/maven_test.yml
32+
if: github.repository == 'apache/spark'
33+
with:
34+
java: 21
35+
os: macos-14
36+
envs: >-
37+
{
38+
"OBJC_DISABLE_INITIALIZE_FORK_SAFETY": "YES"
39+
}

0 commit comments

Comments
 (0)