From 7677364f4557ccdc8b185102b9c56c489bc5cde9 Mon Sep 17 00:00:00 2001
From: Mike Bonnet <mikeb@redhat.com>
Date: Mon, 15 Dec 2025 10:18:17 -0800
Subject: [PATCH] rocm: reduce image size by using a multi-stage build

Only copy required binaries and libraries from the installation directory into
the final image, and install only necessary runtime dependencies. The final image
size is reduced by over 2Gb.

Signed-off-by: Mike Bonnet <mikeb@redhat.com>
---
 container-images/rocm/Containerfile            | 18 +++++++++++++++++-
 .../scripts/build_llama_and_whisper.sh         |  7 ++-----
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/container-images/rocm/Containerfile b/container-images/rocm/Containerfile
index 32fa6891e..d9f8f8ec4 100644
--- a/container-images/rocm/Containerfile
+++ b/container-images/rocm/Containerfile
@@ -1,8 +1,24 @@
-FROM quay.io/fedora/fedora:43
+FROM quay.io/fedora/fedora:43 AS builder
 
 COPY container-images/scripts/build_llama_and_whisper.sh \
      container-images/scripts/lib.sh \
      /src/
 WORKDIR /src/
 RUN ./build_llama_and_whisper.sh rocm
+
+FROM quay.io/fedora/fedora:43
+
+COPY --from=builder \
+  /tmp/install/bin/llama-bench \
+  /tmp/install/bin/llama-perplexity \
+  /tmp/install/bin/llama-quantize \
+  /tmp/install/bin/llama-server \
+  /tmp/install/bin/rpc-server \
+  /tmp/install/bin/whisper-server \
+  /usr/bin/
+COPY --from=builder /tmp/install/lib64/*.so /usr/lib64/
+
+RUN dnf -y --setopt=install_weak_deps=false install hipblas rocblas rocm-hip rocm-runtime rocsolver && \
+    dnf -y clean all
+
 WORKDIR /
diff --git a/container-images/scripts/build_llama_and_whisper.sh b/container-images/scripts/build_llama_and_whisper.sh
index 0f425a13e..0b599b0dc 100755
--- a/container-images/scripts/build_llama_and_whisper.sh
+++ b/container-images/scripts/build_llama_and_whisper.sh
@@ -182,7 +182,7 @@ cmake_steps() {
 }
 
 set_install_prefix() {
-  if [ "$containerfile" = "cuda" ] || [ "$containerfile" = "intel-gpu" ] || [ "$containerfile" = "cann" ] || [ "$containerfile" = "musa" ]; then
+  if [ "$containerfile" = "cuda" ] || [ "$containerfile" = "intel-gpu" ] || [ "$containerfile" = "cann" ] || [ "$containerfile" = "musa" ] || [ "$containerfile" = "rocm" ]; then
     echo "/tmp/install"
   else
     echo "/usr"
@@ -203,7 +203,7 @@ configure_common_flags() {
       common_flags+=("-DCMAKE_HIP_COMPILER_ROCM_ROOT=/usr")
     fi
 
-    common_flags+=("-DGGML_HIP=ON" "-DAMDGPU_TARGETS=${AMDGPU_TARGETS:-gfx1010,gfx1012,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1151,gfx1200,gfx1201}")
+    common_flags+=("-DGGML_HIP=ON" "-DGPU_TARGETS=${GPU_TARGETS:-gfx1010,gfx1012,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1151,gfx1200,gfx1201}")
     ;;
   cuda)
     common_flags+=("-DGGML_CUDA=ON" "-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined" "-DCMAKE_CUDA_FLAGS=\"-U__ARM_NEON -U__ARM_NEON__\"")
@@ -234,7 +234,6 @@ clone_and_build_whisper_cpp() {
 
   git_clone_specific_commit "${WHISPER_CPP_REPO:-https://github.com/ggerganov/whisper.cpp}" "$whisper_cpp_commit"
   cmake_steps "${whisper_flags[@]}"
-  mkdir -p "$install_prefix/bin"
   cd ..
   if [[ "${RAMALAMA_IMAGE_BUILD_DEBUG_MODE:-}" != y ]]; then
       rm -rf whisper.cpp
@@ -243,8 +242,6 @@ clone_and_build_whisper_cpp() {
 
 clone_and_build_llama_cpp() {
   local llama_cpp_commit="${LLAMA_CPP_PULL_REF:-$DEFAULT_LLAMA_CPP_COMMIT}"
-  local install_prefix
-  install_prefix=$(set_install_prefix)
   git_clone_specific_commit "${LLAMA_CPP_REPO:-https://github.com/ggml-org/llama.cpp}" "$llama_cpp_commit"
   cmake_steps "${common_flags[@]}"
   install -m 755 build/bin/rpc-server "$install_prefix"/bin/rpc-server