Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,12 @@ jobs:
scala) SUFFIX=ubuntu
;;
esac
BASE_IMGAE_TAG=${{ inputs.spark }}-scala${{ inputs.scala }}-java${{ inputs.java }}-ubuntu
TAG=scala${{ inputs.scala }}-java${{ inputs.java }}-$SUFFIX

IMAGE_NAME=spark
IMAGE_PATH=${{ inputs.spark }}/$TAG
BASE_IMAGE_PATH=${{ inputs.spark }}/scala${{ inputs.scala }}-java${{ inputs.java }}-ubuntu
if [ "${{ inputs.build }}" == "true" ]; then
# Use the local registry to build and test
REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
Expand All @@ -105,6 +107,7 @@ jobs:
TEST_REPO=${{ inputs.repository }}
UNIQUE_IMAGE_TAG=${{ inputs.image-tag }}
fi
BASE_IMAGE_URL=$TEST_REPO/$IMAGE_NAME:$BASE_IMGAE_TAG
IMAGE_URL=$TEST_REPO/$IMAGE_NAME:$UNIQUE_IMAGE_TAG

PUBLISH_REPO=${{ inputs.repository }}
Expand All @@ -116,8 +119,12 @@ jobs:
echo "TEST_REPO=${TEST_REPO}" >> $GITHUB_ENV
# Image name: spark
echo "IMAGE_NAME=${IMAGE_NAME}" >> $GITHUB_ENV
# Base Image Dockerfile: 3.3.0/scala2.12-java11-ubuntu
echo "BASE_IMAGE_PATH=${BASE_IMAGE_PATH}" >> $GITHUB_ENV
# Image dockerfile path: 3.3.0/scala2.12-java11-python3-ubuntu
echo "IMAGE_PATH=${IMAGE_PATH}" >> $GITHUB_ENV
# Base Image URL: spark:3.3.0-scala2.12-java11-ubuntu
echo "BASE_IMAGE_URL=${BASE_IMAGE_URL}" >> $GITHUB_ENV
# Image URL: ghcr.io/apache/spark-docker/spark:3.3.0-scala2.12-java11-python3-ubuntu
echo "IMAGE_URL=${IMAGE_URL}" >> $GITHUB_ENV

Expand All @@ -132,6 +139,9 @@ jobs:
echo "IMAGE_PATH: "${IMAGE_PATH}
echo "IMAGE_URL: "${IMAGE_URL}

echo "BASE_IMAGE_PATH: "${BASE_IMAGE_PATH}
echo "BASE_IMAGE_URL: "${BASE_IMAGE_URL}

echo "PUBLISH_REPO:"${PUBLISH_REPO}
echo "PUBLISH_IMAGE_URL:"${PUBLISH_IMAGE_URL}

Expand All @@ -146,10 +156,20 @@ jobs:
# This required by local registry
driver-opts: network=host

- name: Build - Build the base image
if: ${{ inputs.build }}
uses: docker/build-push-action@v3
with:
context: ${{ env.BASE_IMAGE_PATH }}
tags: ${{ env.BASE_IMAGE_URL }}
platforms: linux/amd64,linux/arm64
push: true

- name: Build - Build and push test image
if: ${{ inputs.build }}
uses: docker/build-push-action@v3
with:
build-args: BASE_IMAGE=${{ env.BASE_IMAGE_URL }}
context: ${{ env.IMAGE_PATH }}
tags: ${{ env.IMAGE_URL }}
platforms: linux/amd64,linux/arm64
Expand Down
63 changes: 2 additions & 61 deletions 3.4.0/scala2.12-java11-python3-r-ubuntu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,73 +14,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM eclipse-temurin:11-jre-focal

ARG spark_uid=185

RUN groupadd --system --gid=${spark_uid} spark && \
useradd --system --uid=${spark_uid} --gid=spark spark
ARG BASE_IMAGE=spark:3.4.0-scala2.12-java11-ubuntu
FROM $BASE_IMAGE

RUN set -ex && \
apt-get update && \
ln -s /lib /lib64 && \
apt install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu && \
apt install -y python3 python3-pip && \
apt install -y r-base r-base-dev && \
mkdir -p /opt/spark && \
mkdir /opt/spark/python && \
mkdir -p /opt/spark/examples && \
mkdir -p /opt/spark/work-dir && \
touch /opt/spark/RELEASE && \
chown -R spark:spark /opt/spark && \
rm /bin/sh && \
ln -sv /bin/bash /bin/sh && \
echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \
rm -rf /var/cache/apt/* && \
rm -rf /var/lib/apt/lists/*

# Install Apache Spark
# https://downloads.apache.org/spark/KEYS
ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.4.0/spark-3.4.0-bin-hadoop3.tgz \
SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.4.0/spark-3.4.0-bin-hadoop3.tgz.asc \
GPG_KEY=CC68B3D16FE33A766705160BA7E57908C7A4E1B1

RUN set -ex; \
export SPARK_TMP="$(mktemp -d)"; \
cd $SPARK_TMP; \
wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \
wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \
export GNUPGHOME="$(mktemp -d)"; \
gpg --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \
gpg --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \
gpg --batch --verify spark.tgz.asc spark.tgz; \
gpgconf --kill all; \
rm -rf "$GNUPGHOME" spark.tgz.asc; \
\
tar -xf spark.tgz --strip-components=1; \
chown -R spark:spark .; \
mv jars /opt/spark/; \
mv bin /opt/spark/; \
mv sbin /opt/spark/; \
mv kubernetes/dockerfiles/spark/decom.sh /opt/; \
mv examples /opt/spark/; \
mv kubernetes/tests /opt/spark/; \
mv data /opt/spark/; \
mv python/pyspark /opt/spark/python/pyspark/; \
mv python/lib /opt/spark/python/lib/; \
mv R /opt/spark/; \
cd ..; \
rm -rf "$SPARK_TMP";

COPY entrypoint.sh /opt/

ENV SPARK_HOME /opt/spark
ENV R_HOME /usr/lib/R

WORKDIR /opt/spark/work-dir
RUN chmod g+w /opt/spark/work-dir
RUN chmod a+x /opt/decom.sh
RUN chmod a+x /opt/entrypoint.sh

ENTRYPOINT [ "/opt/entrypoint.sh" ]
114 changes: 0 additions & 114 deletions 3.4.0/scala2.12-java11-python3-r-ubuntu/entrypoint.sh

This file was deleted.

63 changes: 2 additions & 61 deletions 3.4.0/scala2.12-java11-python3-ubuntu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,70 +14,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM eclipse-temurin:11-jre-focal

ARG spark_uid=185

RUN groupadd --system --gid=${spark_uid} spark && \
useradd --system --uid=${spark_uid} --gid=spark spark
ARG BASE_IMAGE=spark:3.4.0-scala2.12-java11-ubuntu
FROM $BASE_IMAGE

RUN set -ex && \
apt-get update && \
ln -s /lib /lib64 && \
apt install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu && \
apt install -y python3 python3-pip && \
mkdir -p /opt/spark && \
mkdir /opt/spark/python && \
mkdir -p /opt/spark/examples && \
mkdir -p /opt/spark/work-dir && \
touch /opt/spark/RELEASE && \
chown -R spark:spark /opt/spark && \
rm /bin/sh && \
ln -sv /bin/bash /bin/sh && \
echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \
rm -rf /var/cache/apt/* && \
rm -rf /var/lib/apt/lists/*

# Install Apache Spark
# https://downloads.apache.org/spark/KEYS
ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.4.0/spark-3.4.0-bin-hadoop3.tgz \
SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.4.0/spark-3.4.0-bin-hadoop3.tgz.asc \
GPG_KEY=CC68B3D16FE33A766705160BA7E57908C7A4E1B1

RUN set -ex; \
export SPARK_TMP="$(mktemp -d)"; \
cd $SPARK_TMP; \
wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \
wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \
export GNUPGHOME="$(mktemp -d)"; \
gpg --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \
gpg --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \
gpg --batch --verify spark.tgz.asc spark.tgz; \
gpgconf --kill all; \
rm -rf "$GNUPGHOME" spark.tgz.asc; \
\
tar -xf spark.tgz --strip-components=1; \
chown -R spark:spark .; \
mv jars /opt/spark/; \
mv bin /opt/spark/; \
mv sbin /opt/spark/; \
mv kubernetes/dockerfiles/spark/decom.sh /opt/; \
mv examples /opt/spark/; \
mv kubernetes/tests /opt/spark/; \
mv data /opt/spark/; \
mv python/pyspark /opt/spark/python/pyspark/; \
mv python/lib /opt/spark/python/lib/; \
cd ..; \
rm -rf "$SPARK_TMP";

COPY entrypoint.sh /opt/

ENV SPARK_HOME /opt/spark

WORKDIR /opt/spark/work-dir
RUN chmod g+w /opt/spark/work-dir
RUN chmod a+x /opt/decom.sh
RUN chmod a+x /opt/entrypoint.sh

ENTRYPOINT [ "/opt/entrypoint.sh" ]
Loading