From 6a25a323f16f1e403b471b3a4ac88e29483fc604 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sat, 16 May 2020 15:14:22 -0700 Subject: [PATCH 1/4] Make the docker image tool support cross building with the new docker buildx command --- bin/docker-image-tool.sh | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index 57b86254ab42..f67beca75613 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -19,6 +19,8 @@ # This script builds and pushes docker images when run from a release of Spark # with Kubernetes support. +set -x + function error { echo "$@" 1>&2 exit 1 @@ -62,6 +64,9 @@ function docker_push { if [ $? -ne 0 ]; then error "Failed to push $image_name Docker image." fi + if [ "${CROSS_BUILD}" != "false" ]; then + docker buildx push "$(image_ref ${image_name})" + fi else echo "$(image_ref ${image_name}) image not found. Skipping push for this image." fi @@ -172,6 +177,7 @@ function build { local BASEDOCKERFILE=${BASEDOCKERFILE:-"kubernetes/dockerfiles/spark/Dockerfile"} local PYDOCKERFILE=${PYDOCKERFILE:-false} local RDOCKERFILE=${RDOCKERFILE:-false} + local ARCHS=${ARCHS:-"--platform linux/amd64,linux/arm64"} (cd $(img_ctx_dir base) && docker build $NOCACHEARG "${BUILD_ARGS[@]}" \ -t $(image_ref spark) \ @@ -179,6 +185,11 @@ function build { if [ $? -ne 0 ]; then error "Failed to build Spark JVM Docker image, please refer to Docker build output for details." fi + if [ "${CROSS_BUILD}" != "false" ]; then + (cd $(img_ctx_dir base) && docker buildx build $ARCHS $NOCACHEARG "${BUILD_ARGS[@]}" \ + -t $(image_ref spark) \ + -f "$BASEDOCKERFILE" .) + fi if [ "${PYDOCKERFILE}" != "false" ]; then (cd $(img_ctx_dir pyspark) && docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ @@ -187,6 +198,11 @@ function build { if [ $? -ne 0 ]; then error "Failed to build PySpark Docker image, please refer to Docker build output for details." fi + if [ "${CROSS_BUILD}" != "false" ]; then + (cd $(img_ctx_dir pyspark) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ + -t $(image_ref spark-py) \ + -f "$PYDOCKERFILE" .) + fi fi if [ "${RDOCKERFILE}" != "false" ]; then @@ -196,6 +212,11 @@ function build { if [ $? -ne 0 ]; then error "Failed to build SparkR Docker image, please refer to Docker build output for details." fi + if [ "${CROSS_BUILD}" != "false" ]; then + (cd $(img_ctx_dir sparkr) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ + -t $(image_ref spark-r) \ + -f "$RDOCKERFILE" .) + fi fi } @@ -227,6 +248,7 @@ Options: -n Build docker image with --no-cache -u uid UID to use in the USER directive to set the user the main Spark process runs as inside the resulting container + -x Use docker buildx to cross build -b arg Build arg to build or push the image. For multiple build args, this option needs to be used separately for each build arg. @@ -252,6 +274,11 @@ Examples: - Build and push JDK11-based image with tag "v3.0.0" to docker.io/myrepo $0 -r docker.io/myrepo -t v3.0.0 -b java_image_tag=11-jre-slim build $0 -r docker.io/myrepo -t v3.0.0 push + + - Build and push JDK11-based image for multiple archs to docker.io/myrepo + $0 -r docker.io/myrepo -t v3.0.0 -X -b java_image_tag=11-jre-slim build + $0 -r docker.io/myrepo -t v3.0.0 -X push + EOF } @@ -268,7 +295,8 @@ RDOCKERFILE= NOCACHEARG= BUILD_PARAMS= SPARK_UID= -while getopts f:p:R:mr:t:nb:u: option +CROSS_BUILD="false" +while getopts f:p:R:mr:t:Xnb:u: option do case "${option}" in @@ -279,6 +307,7 @@ do t) TAG=${OPTARG};; n) NOCACHEARG="--no-cache";; b) BUILD_PARAMS=${BUILD_PARAMS}" --build-arg "${OPTARG};; + X) CROSS_BUILD=1;; m) if ! which minikube 1>/dev/null; then error "Cannot find minikube." From f7fdddcfeb2276491b89a64cd9b89a2692f29de3 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 22 May 2020 10:14:19 -0700 Subject: [PATCH 2/4] Clarify the docs for cross build and untabify because mix of tabs/spaces is meeps --- bin/docker-image-tool.sh | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index f67beca75613..128b6a565854 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -64,9 +64,6 @@ function docker_push { if [ $? -ne 0 ]; then error "Failed to push $image_name Docker image." fi - if [ "${CROSS_BUILD}" != "false" ]; then - docker buildx push "$(image_ref ${image_name})" - fi else echo "$(image_ref ${image_name}) image not found. Skipping push for this image." fi @@ -196,12 +193,12 @@ function build { -t $(image_ref spark-py) \ -f "$PYDOCKERFILE" .) if [ $? -ne 0 ]; then - error "Failed to build PySpark Docker image, please refer to Docker build output for details." + error "Failed to build PySpark Docker image, please refer to Docker build output for details." fi if [ "${CROSS_BUILD}" != "false" ]; then (cd $(img_ctx_dir pyspark) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ - -t $(image_ref spark-py) \ - -f "$PYDOCKERFILE" .) + -t $(image_ref spark-py) \ + -f "$PYDOCKERFILE" .) fi fi @@ -214,8 +211,8 @@ function build { fi if [ "${CROSS_BUILD}" != "false" ]; then (cd $(img_ctx_dir sparkr) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ - -t $(image_ref spark-r) \ - -f "$RDOCKERFILE" .) + -t $(image_ref spark-r) \ + -f "$RDOCKERFILE" .) fi fi } @@ -233,24 +230,24 @@ Builds or pushes the built-in Spark Docker image. Commands: build Build image. Requires a repository address to be provided if the image will be - pushed to a different registry. + pushed to a different registry. push Push a pre-built image to a registry. Requires a repository address to be provided. Options: -f file Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark. -p file (Optional) Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark. - Skips building PySpark docker image if not specified. + Skips building PySpark docker image if not specified. -R file (Optional) Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark. - Skips building SparkR docker image if not specified. + Skips building SparkR docker image if not specified. -r repo Repository address. -t tag Tag to apply to the built image, or to identify the image to be pushed. -m Use minikube's Docker daemon. -n Build docker image with --no-cache -u uid UID to use in the USER directive to set the user the main Spark process runs as inside the - resulting container - -x Use docker buildx to cross build + resulting container + -X Use docker buildx to cross build. Automatically pushes. -b arg Build arg to build or push the image. For multiple build args, this option needs to - be used separately for each build arg. + be used separately for each build arg. Using minikube when building images will do so directly into minikube's Docker daemon. There is no need to push the images into minikube in that case, they'll be automatically @@ -277,7 +274,8 @@ Examples: - Build and push JDK11-based image for multiple archs to docker.io/myrepo $0 -r docker.io/myrepo -t v3.0.0 -X -b java_image_tag=11-jre-slim build - $0 -r docker.io/myrepo -t v3.0.0 -X push + # Note: buildx, which does cross building, needs to do the push during build + # So there is no seperate push step with -X EOF } From ea0769dc04803402c252feff238fc5edab517dda Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 22 May 2020 10:54:42 -0700 Subject: [PATCH 3/4] Fix untabify --- bin/docker-image-tool.sh | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index 128b6a565854..a1f43a49e771 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -193,12 +193,12 @@ function build { -t $(image_ref spark-py) \ -f "$PYDOCKERFILE" .) if [ $? -ne 0 ]; then - error "Failed to build PySpark Docker image, please refer to Docker build output for details." + error "Failed to build PySpark Docker image, please refer to Docker build output for details." fi if [ "${CROSS_BUILD}" != "false" ]; then - (cd $(img_ctx_dir pyspark) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ - -t $(image_ref spark-py) \ - -f "$PYDOCKERFILE" .) + (cd $(img_ctx_dir pyspark) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ + -t $(image_ref spark-py) \ + -f "$PYDOCKERFILE" .) fi fi @@ -211,8 +211,8 @@ function build { fi if [ "${CROSS_BUILD}" != "false" ]; then (cd $(img_ctx_dir sparkr) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ - -t $(image_ref spark-r) \ - -f "$RDOCKERFILE" .) + -t $(image_ref spark-r) \ + -f "$RDOCKERFILE" .) fi fi } @@ -230,24 +230,24 @@ Builds or pushes the built-in Spark Docker image. Commands: build Build image. Requires a repository address to be provided if the image will be - pushed to a different registry. + pushed to a different registry. push Push a pre-built image to a registry. Requires a repository address to be provided. Options: -f file Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark. -p file (Optional) Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark. - Skips building PySpark docker image if not specified. + Skips building PySpark docker image if not specified. -R file (Optional) Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark. - Skips building SparkR docker image if not specified. + Skips building SparkR docker image if not specified. -r repo Repository address. -t tag Tag to apply to the built image, or to identify the image to be pushed. -m Use minikube's Docker daemon. -n Build docker image with --no-cache -u uid UID to use in the USER directive to set the user the main Spark process runs as inside the - resulting container + resulting container -X Use docker buildx to cross build. Automatically pushes. -b arg Build arg to build or push the image. For multiple build args, this option needs to - be used separately for each build arg. + be used separately for each build arg. Using minikube when building images will do so directly into minikube's Docker daemon. There is no need to push the images into minikube in that case, they'll be automatically From e5176c718ce44a85544e74bdd440dd4cf2280628 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sun, 24 May 2020 16:19:18 -0700 Subject: [PATCH 4/4] Add a link to how to setup buildx since it's new and most folks probably don't have it enabled already --- bin/docker-image-tool.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index a1f43a49e771..8a01b80c4164 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -246,6 +246,7 @@ Options: -u uid UID to use in the USER directive to set the user the main Spark process runs as inside the resulting container -X Use docker buildx to cross build. Automatically pushes. + See https://docs.docker.com/buildx/working-with-buildx/ for steps to setup buildx. -b arg Build arg to build or push the image. For multiple build args, this option needs to be used separately for each build arg.