From 7677364f4557ccdc8b185102b9c56c489bc5cde9 Mon Sep 17 00:00:00 2001 From: Mike Bonnet Date: Mon, 15 Dec 2025 10:18:17 -0800 Subject: [PATCH] rocm: reduce image size by using a multi-stage build Only copy required binaries and libraries from the installation directory into the final image, and install only necessary runtime dependencies. The final image size is reduced by over 2Gb. Signed-off-by: Mike Bonnet --- container-images/rocm/Containerfile | 18 +++++++++++++++++- .../scripts/build_llama_and_whisper.sh | 7 ++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/container-images/rocm/Containerfile b/container-images/rocm/Containerfile index 32fa6891e..d9f8f8ec4 100644 --- a/container-images/rocm/Containerfile +++ b/container-images/rocm/Containerfile @@ -1,8 +1,24 @@ -FROM quay.io/fedora/fedora:43 +FROM quay.io/fedora/fedora:43 AS builder COPY container-images/scripts/build_llama_and_whisper.sh \ container-images/scripts/lib.sh \ /src/ WORKDIR /src/ RUN ./build_llama_and_whisper.sh rocm + +FROM quay.io/fedora/fedora:43 + +COPY --from=builder \ + /tmp/install/bin/llama-bench \ + /tmp/install/bin/llama-perplexity \ + /tmp/install/bin/llama-quantize \ + /tmp/install/bin/llama-server \ + /tmp/install/bin/rpc-server \ + /tmp/install/bin/whisper-server \ + /usr/bin/ +COPY --from=builder /tmp/install/lib64/*.so /usr/lib64/ + +RUN dnf -y --setopt=install_weak_deps=false install hipblas rocblas rocm-hip rocm-runtime rocsolver && \ + dnf -y clean all + WORKDIR / diff --git a/container-images/scripts/build_llama_and_whisper.sh b/container-images/scripts/build_llama_and_whisper.sh index 0f425a13e..0b599b0dc 100755 --- a/container-images/scripts/build_llama_and_whisper.sh +++ b/container-images/scripts/build_llama_and_whisper.sh @@ -182,7 +182,7 @@ cmake_steps() { } set_install_prefix() { - if [ "$containerfile" = "cuda" ] || [ "$containerfile" = "intel-gpu" ] || [ "$containerfile" = "cann" ] || [ "$containerfile" = "musa" ]; then + if [ "$containerfile" = "cuda" ] || [ "$containerfile" = "intel-gpu" ] || [ "$containerfile" = "cann" ] || [ "$containerfile" = "musa" ] || [ "$containerfile" = "rocm" ]; then echo "/tmp/install" else echo "/usr" @@ -203,7 +203,7 @@ configure_common_flags() { common_flags+=("-DCMAKE_HIP_COMPILER_ROCM_ROOT=/usr") fi - common_flags+=("-DGGML_HIP=ON" "-DAMDGPU_TARGETS=${AMDGPU_TARGETS:-gfx1010,gfx1012,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1151,gfx1200,gfx1201}") + common_flags+=("-DGGML_HIP=ON" "-DGPU_TARGETS=${GPU_TARGETS:-gfx1010,gfx1012,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1151,gfx1200,gfx1201}") ;; cuda) common_flags+=("-DGGML_CUDA=ON" "-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined" "-DCMAKE_CUDA_FLAGS=\"-U__ARM_NEON -U__ARM_NEON__\"") @@ -234,7 +234,6 @@ clone_and_build_whisper_cpp() { git_clone_specific_commit "${WHISPER_CPP_REPO:-https://github.com/ggerganov/whisper.cpp}" "$whisper_cpp_commit" cmake_steps "${whisper_flags[@]}" - mkdir -p "$install_prefix/bin" cd .. if [[ "${RAMALAMA_IMAGE_BUILD_DEBUG_MODE:-}" != y ]]; then rm -rf whisper.cpp @@ -243,8 +242,6 @@ clone_and_build_whisper_cpp() { clone_and_build_llama_cpp() { local llama_cpp_commit="${LLAMA_CPP_PULL_REF:-$DEFAULT_LLAMA_CPP_COMMIT}" - local install_prefix - install_prefix=$(set_install_prefix) git_clone_specific_commit "${LLAMA_CPP_REPO:-https://github.com/ggml-org/llama.cpp}" "$llama_cpp_commit" cmake_steps "${common_flags[@]}" install -m 755 build/bin/rpc-server "$install_prefix"/bin/rpc-server