Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
[SPARK-26015][K8S] Set a default UID for Spark on K8S Images
Adds USER directives to the Dockerfiles which is configurable via build
argument for easy customisation.  A -u flag is added to
bin/docker-image-tool.sh to make it easy to customise this.
  • Loading branch information
rvesse committed Nov 29, 2018
commit 26697fc545ba816dedc789b186a26c9d8636f4e6
15 changes: 12 additions & 3 deletions bin/docker-image-tool.sh
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@ function build {
SPARK_ROOT="$CTX_DIR/base"
fi

# If a custom SPARK_UID was set add it to build arguments
if [ -n "$SPARK_UID" ]; then
BUILD_ARGS+=(--build-arg spark_uid=$SPARK_UID)
fi

# Verify that the Docker image content directory is present
if [ ! -d "$SPARK_ROOT/kubernetes/dockerfiles" ]; then
error "Cannot find docker image. This script must be run from a runnable distribution of Apache Spark."
Expand Down Expand Up @@ -207,8 +212,10 @@ Options:
-t tag Tag to apply to the built image, or to identify the image to be pushed.
-m Use minikube's Docker daemon.
-n Build docker image with --no-cache
-b arg Build arg to build or push the image. For multiple build args, this option needs to
be used separately for each build arg.
-u uid UID to use in the USER directive to set the user the main Spark process runs as inside the
resulting container
-b arg Build arg to build or push the image. For multiple build args, this option needs to
be used separately for each build arg.

Using minikube when building images will do so directly into minikube's Docker daemon.
There is no need to push the images into minikube in that case, they'll be automatically
Expand Down Expand Up @@ -243,7 +250,8 @@ PYDOCKERFILE=
RDOCKERFILE=
NOCACHEARG=
BUILD_PARAMS=
while getopts f:p:R:mr:t:nb: option
SPARK_UID=
while getopts f:p:R:mr:t:nb:u: option
do
case "${option}"
in
Expand All @@ -263,6 +271,7 @@ do
fi
eval $(minikube docker-env)
;;
u) SPARK_UID=${OPTARG};;
esac
done

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

FROM openjdk:8-alpine

ARG spark_uid=185

# Before building the docker image, first build and make a Spark distribution following
# the instructions in http://spark.apache.org/docs/latest/building-spark.html.
# If this docker file is being used in the context of building your images from a Spark
Expand Down Expand Up @@ -47,5 +49,9 @@ COPY data /opt/spark/data
ENV SPARK_HOME /opt/spark

WORKDIR /opt/spark/work-dir
RUN chmod g+w /opt/spark/work-dir

ENTRYPOINT [ "/opt/entrypoint.sh" ]

# Specify the User that the actual main process will run as
USER ${spark_uid}
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,14 @@
#

ARG base_img
ARG spark_uid=185

FROM $base_img
WORKDIR /

# Reset to root to run installation tasks
USER 0

RUN mkdir ${SPARK_HOME}/R

RUN apk add --no-cache R R-dev
Expand All @@ -27,3 +33,6 @@ ENV R_HOME /usr/lib/R

WORKDIR /opt/spark/work-dir
ENTRYPOINT [ "/opt/entrypoint.sh" ]

# Specify the User that the actual main process will run as
USER ${spark_uid}
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,14 @@
#

ARG base_img
ARG spark_uid=185

FROM $base_img
WORKDIR /

# Reset to root to run installation tasks
USER 0

RUN mkdir ${SPARK_HOME}/python
# TODO: Investigate running both pip and pip3 via virtualenvs
RUN apk add --no-cache python && \
Expand All @@ -37,3 +43,6 @@ ENV PYTHONPATH ${SPARK_HOME}/python/lib/pyspark.zip:${SPARK_HOME}/python/lib/py4

WORKDIR /opt/spark/work-dir
ENTRYPOINT [ "/opt/entrypoint.sh" ]

# Specify the User that the actual main process will run as
USER ${spark_uid}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ set -e
# If there is no passwd entry for the container UID, attempt to create one
if [ -z "$uidentry" ] ; then
if [ -w /etc/passwd ] ; then
# TODO Should we allow providing an environment variable to set the desired username?
# SPARK_USER_NAME is the obvious candidate here but we only propagate this to the
# pods when using Hadoop therefore we'd need to move that to a feature step that
# always runs e.g. Basic(Driver|Executor)FeatureStep
echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd
else
echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
Expand Down