diff --git a/.github/workflows/build_python_minimum.yml b/.github/workflows/build_python_minimum.yml index 4e65503006489..3514a82f6217c 100644 --- a/.github/workflows/build_python_minimum.yml +++ b/.github/workflows/build_python_minimum.yml @@ -38,7 +38,7 @@ jobs: envs: >- { "PYSPARK_IMAGE_TO_TEST": "python-minimum", - "PYTHON_TO_TEST": "python3.9" + "PYTHON_TO_TEST": "python3.10" } jobs: >- { diff --git a/dev/spark-test-image/python-minimum/Dockerfile b/dev/spark-test-image/python-minimum/Dockerfile index 59d9ebed4e40f..0b688b9de0e2f 100644 --- a/dev/spark-test-image/python-minimum/Dockerfile +++ b/dev/spark-test-image/python-minimum/Dockerfile @@ -24,11 +24,12 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark wi # Overwrite this label to avoid exposing the underlying Ubuntu OS version label LABEL org.opencontainers.image.version="" -ENV FULL_REFRESH_DATE=20250327 +ENV FULL_REFRESH_DATE=20250703 ENV DEBIAN_FRONTEND=noninteractive ENV DEBCONF_NONINTERACTIVE_SEEN=true +# Should keep the installation consistent with https://apache.github.io/spark/api/python/getting_started/install.html RUN apt-get update && apt-get install -y \ build-essential \ ca-certificates \ @@ -52,30 +53,19 @@ RUN apt-get update && apt-get install -y \ libxml2-dev \ openjdk-17-jdk-headless \ pkg-config \ + python3.10 \ + python3-psutil \ qpdf \ tzdata \ software-properties-common \ wget \ zlib1g-dev - -# Should keep the installation consistent with https://apache.github.io/spark/api/python/getting_started/install.html - -# Install Python 3.9 -RUN add-apt-repository ppa:deadsnakes/ppa -RUN apt-get update && apt-get install -y \ - python3.9 \ - python3.9-distutils \ - && apt-get autoremove --purge -y \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - - -ARG BASIC_PIP_PKGS="numpy==1.21 pyarrow==11.0.0 pandas==2.0.0 six==1.16.0 scipy scikit-learn coverage unittest-xml-reporting" +ARG BASIC_PIP_PKGS="numpy==1.22.4 pyarrow==11.0.0 pandas==2.2.0 six==1.16.0 scipy scikit-learn coverage unittest-xml-reporting" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 googleapis-common-protos==1.65.0 graphviz==0.20 protobuf" # Install Python 3.9 packages -RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9 -RUN python3.9 -m pip install --force $BASIC_PIP_PKGS $CONNECT_PIP_PKGS && \ - python3.9 -m pip cache purge +RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 +RUN python3.10 -m pip install --force $BASIC_PIP_PKGS $CONNECT_PIP_PKGS && \ + python3.10 -m pip cache purge diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index 73a80ce014a83..7e1a87eafffd4 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -30,7 +30,7 @@ and building from the source. Python Versions Supported ------------------------- -Python 3.9 and above. +Python 3.10 and above. Using PyPI @@ -143,7 +143,7 @@ the same session as pyspark (you can install in several steps too). .. code-block:: bash - conda install -c conda-forge pyspark # can also add "python=3.9 some_package [etc.]" here + conda install -c conda-forge pyspark # can also add "python=3.10 some_package [etc.]" here Note that `PySpark for conda `_ is maintained separately by the community; while new versions generally get packaged quickly, the