Skip to content
Closed

init #24

Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
9f038aa
[SPARK-50112] Moving Avro files to sql/core so they can be used by Tr…
ericm-db Oct 25, 2024
28c3dbd
moving scala to scala dir
ericm-db Oct 25, 2024
2e33fd1
adding deprecated one
ericm-db Oct 25, 2024
b037859
init
ericm-db Oct 25, 2024
c1db91d
adding enum
ericm-db Oct 25, 2024
a30a29d
feedback and test
ericm-db Oct 25, 2024
2ebf6a8
creating utils class
ericm-db Oct 25, 2024
0559480
micheal feedback
ericm-db Oct 31, 2024
d3845a5
ValueState post-refactor
ericm-db Nov 1, 2024
35b3b0d
multivalue state encoder
ericm-db Nov 1, 2024
dcf0df7
encodeToUnsafeRow avro method
ericm-db Nov 2, 2024
dfc6b1e
using correct val
ericm-db Nov 4, 2024
5b98aa6
comments
ericm-db Nov 4, 2024
0d37ffd
calling encodeUnsafeRow
ericm-db Nov 4, 2024
9a1f825
merge into upstream/master
ericm-db Nov 5, 2024
5c8dd33
Merge remote-tracking branch 'upstream/master' into avro
ericm-db Nov 5, 2024
9b8dd5d
[SPARK-50127] Implement Avro encoding for MapState and PrefixKeyScanS…
ericm-db Nov 7, 2024
448ea76
making schema conversion lazy
ericm-db Nov 7, 2024
386fbf1
batch succeeds
ericm-db Nov 7, 2024
896e24f
actually enabling ttl
ericm-db Nov 7, 2024
15c5f71
including hidden files
ericm-db Nov 7, 2024
1f5e5f7
testWithEncodingTypes
ericm-db Nov 7, 2024
1826d5a
no longer relying on unsaferow
ericm-db Nov 8, 2024
c5ef895
everything but batch works
ericm-db Nov 8, 2024
e22e1a2
splitting it up
ericm-db Nov 8, 2024
730cae0
easy feedback to address
ericm-db Nov 9, 2024
754ce6c
batch works
ericm-db Nov 9, 2024
b6dbfdb
added test suite for non-contiguous ordinals
ericm-db Nov 11, 2024
e6f0b7a
using negative/null val marker
ericm-db Nov 11, 2024
ca660c0
removing log line
ericm-db Nov 11, 2024
41de8ae
getAvroEnc
ericm-db Nov 11, 2024
c49acd2
init
ericm-db Nov 5, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
Prev Previous commit
Next Next commit
merge into upstream/master
  • Loading branch information
ericm-db committed Nov 5, 2024
commit 9a1f8256318bed8c7576ac303f0914c6e75a1a2b
6 changes: 2 additions & 4 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,14 @@ INFRA:
'.asf.yaml',
'.gitattributes',
'.gitignore',
'dev/merge_spark_pr.py',
'dev/run-tests-jenkins*'
'dev/merge_spark_pr.py'
]

BUILD:
- changed-files:
- all-globs-to-any-file: [
'dev/**/*',
'!dev/merge_spark_pr.py',
'!dev/run-tests-jenkins*'
'!dev/merge_spark_pr.py'
]
- any-glob-to-any-file: [
'build/**/*',
Expand Down
31 changes: 12 additions & 19 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ jobs:
- name: Install Python packages (Python 3.11)
if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect')
run: |
python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1'
python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.28.3'
python3.11 -m pip list
# Run the tests.
- name: Run tests
Expand Down Expand Up @@ -614,7 +614,7 @@ jobs:
python-version: '3.11'
- name: Install dependencies for Python CodeGen check
run: |
python3.11 -m pip install 'black==23.9.1' 'protobuf==4.25.1' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
python3.11 -m pip install 'black==23.9.1' 'protobuf==5.28.3' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
python3.11 -m pip list
- name: Python CodeGen check
run: ./dev/connect-check-protos.py
Expand Down Expand Up @@ -702,13 +702,6 @@ jobs:
run: ./dev/lint-java
- name: Spark connect jvm client mima check
run: ./dev/connect-jvm-client-mima-check
- name: Install Python linter dependencies for branch-3.4
if: inputs.branch == 'branch-3.4'
run: |
# SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578
# Should delete this section after SPARK 3.4 EOL.
python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0'
python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0'
- name: Install Python linter dependencies for branch-3.5
if: inputs.branch == 'branch-3.5'
run: |
Expand All @@ -717,15 +710,15 @@ jobs:
python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0'
python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.56.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0'
- name: Install Python dependencies for python linter and documentation generation
if: inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5'
if: inputs.branch != 'branch-3.5'
run: |
# Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
# See 'ipython_genutils' in SPARK-38517
# See 'docutils<0.18.0' in SPARK-39421
python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
ipython ipython_genutils sphinx_plotly_directive numpy pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
python3.9 -m pip list
- name: Python linter
Expand All @@ -745,16 +738,16 @@ jobs:
if: inputs.branch == 'branch-3.5'
run: if test -f ./dev/connect-check-protos.py; then PATH=$PATH:$HOME/buf/bin PYTHON_EXECUTABLE=python3.9 ./dev/connect-check-protos.py; fi
# Should delete this section after SPARK 3.5 EOL.
- name: Install JavaScript linter dependencies for branch-3.4, branch-3.5
if: inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5'
- name: Install JavaScript linter dependencies for branch-3.5
if: inputs.branch == 'branch-3.5'
run: |
apt update
apt-get install -y nodejs npm
- name: JS linter
run: ./dev/lint-js
# Should delete this section after SPARK 3.5 EOL.
- name: Install R linter dependencies for branch-3.4, branch-3.5
if: inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5'
- name: Install R linter dependencies for branch-3.5
if: inputs.branch == 'branch-3.5'
run: |
apt update
apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev \
Expand Down Expand Up @@ -834,7 +827,7 @@ jobs:
distribution: zulu
java-version: ${{ inputs.java }}
- name: Install Python dependencies for python linter and documentation generation
if: inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5'
if: inputs.branch != 'branch-3.5'
run: |
# Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
# See 'ipython_genutils' in SPARK-38517
Expand All @@ -845,8 +838,8 @@ jobs:
'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
python3.9 -m pip list
- name: Install dependencies for documentation generation for branch-3.4, branch-3.5
if: inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5'
- name: Install dependencies for documentation generation for branch-3.5
if: inputs.branch == 'branch-3.5'
run: |
# pandoc is required to generate PySpark APIs as well in nbsphinx.
apt-get update -y
Expand Down Expand Up @@ -1134,7 +1127,7 @@ jobs:
export PVC_TESTS_VM_PATH=$PVC_TMP_DIR
minikube mount ${PVC_TESTS_HOST_PATH}:${PVC_TESTS_VM_PATH} --gid=0 --uid=185 &
kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true
if [[ "${{ inputs.branch }}" == 'branch-3.5' || "${{ inputs.branch }}" == 'branch-3.4' ]]; then
if [[ "${{ inputs.branch }}" == 'branch-3.5' ]]; then
kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.7.0/installer/volcano-development.yaml || true
else
kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.9.0/installer/volcano-development.yaml || true
Expand Down
51 changes: 0 additions & 51 deletions .github/workflows/build_branch34.yml

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
# under the License.
#

name: "Build / Python-only (branch-3.4)"
name: "Build / Python-only (master, Python 3.9)"

on:
schedule:
- cron: '0 9 * * *'
- cron: '0 21 * * *'

jobs:
run-build:
Expand All @@ -31,12 +31,12 @@ jobs:
uses: ./.github/workflows/build_and_test.yml
if: github.repository == 'apache/spark'
with:
java: 8
branch: branch-3.4
java: 17
branch: master
hadoop: hadoop3
envs: >-
{
"PYTHON_TO_TEST": ""
"PYTHON_TO_TEST": "python3.9"
}
jobs: >-
{
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_python_connect35.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'

# Add Python deps for Spark Connect.
pip install 'grpcio>=1.48,<1.57' 'grpcio-status>=1.48,<1.57' 'protobuf==3.20.3' 'googleapis-common-protos==1.56.4' 'graphviz==0.20.3'
pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.28.3' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3'

# Add torch as a testing dependency for TorchDistributor
pip install 'torch==2.0.1' 'torchvision==0.15.2' torcheval
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# under the License.
#

name: "Build / Python-only (master, PyPy 3.9)"
name: "Build / Python-only (master, PyPy 3.10)"

on:
schedule:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/build_sparkr_window.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# specific language governing permissions and limitations
# under the License.
#
name: "Build / SparkR-only (master, 4.4.1, windows-2022)"
name: "Build / SparkR-only (master, 4.4.2, windows-2022)"

on:
schedule:
Expand Down Expand Up @@ -50,10 +50,10 @@ jobs:
with:
distribution: zulu
java-version: 17
- name: Install R 4.4.1
- name: Install R 4.4.2
uses: r-lib/actions/setup-r@v2
with:
r-version: 4.4.1
r-version: 4.4.2
- name: Install R dependencies
run: |
Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', 'xml2'), repos='https://cloud.r-project.org/')"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/maven_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ jobs:
- name: Install Python packages (Python 3.11)
if: (contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect')
run: |
python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1'
python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.28.3'
python3.11 -m pip list
# Run the tests.
- name: Run tests
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/publish_snapshot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ on:
description: 'list of branches to publish (JSON)'
required: true
# keep in sync with default value of strategy matrix 'branch'
default: '["master", "branch-3.5", "branch-3.4"]'
default: '["master", "branch-3.5"]'

jobs:
publish-snapshot:
Expand All @@ -38,7 +38,7 @@ jobs:
fail-fast: false
matrix:
# keep in sync with default value of workflow_dispatch input 'branch'
branch: ${{ fromJSON( inputs.branch || '["master", "branch-3.5", "branch-3.4"]' ) }}
branch: ${{ fromJSON( inputs.branch || '["master", "branch-3.5"]' ) }}
steps:
- name: Checkout Spark repository
uses: actions/checkout@v4
Expand All @@ -52,13 +52,13 @@ jobs:
restore-keys: |
snapshot-maven-
- name: Install Java 8 for branch-3.x
if: matrix.branch == 'branch-3.5' || matrix.branch == 'branch-3.4'
if: matrix.branch == 'branch-3.5'
uses: actions/setup-java@v4
with:
distribution: temurin
java-version: 8
- name: Install Java 17
if: matrix.branch != 'branch-3.5' && matrix.branch != 'branch-3.4'
if: matrix.branch != 'branch-3.5'
uses: actions/setup-java@v4
with:
distribution: temurin
Expand Down
Loading
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.