Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
test script: support custom args
  • Loading branch information
ngxson committed Dec 10, 2025
commit 32e6a3318302f0ecd42be3508c7c82dffcf42c84
1 change: 0 additions & 1 deletion tools/mtmd/mtmd-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,6 @@ int main(int argc, char ** argv) {
if (params.prompt.find(mtmd_default_marker()) == std::string::npos) {
for (size_t i = 0; i < params.image.size(); i++) {
// most models require the marker before each image
// TODO: check if it's actually true for all models
// ref: https://github.com/ggml-org/llama.cpp/pull/17616
params.prompt = mtmd_default_marker() + params.prompt;
}
Expand Down
51 changes: 32 additions & 19 deletions tools/mtmd/tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,33 +32,42 @@ fi

arr_prefix=()
arr_hf=()
arr_tmpl=() # chat template
arr_extra_args=()
arr_file=()

add_test_vision() {
local hf=$1
local tmpl=${2:-""} # default to empty string if not provided
shift
local extra_args=""
if [ $# -gt 0 ]; then
extra_args=$(printf " %q" "$@")
fi
arr_prefix+=("[vision]")
arr_hf+=("$hf")
arr_tmpl+=("$tmpl")
arr_extra_args+=("$extra_args")
arr_file+=("test-1.jpeg")
}

add_test_audio() {
local hf=$1
shift
local extra_args=""
if [ $# -gt 0 ]; then
extra_args=$(printf " %q" "$@")
fi
arr_prefix+=("[audio] ")
arr_hf+=("$hf")
arr_tmpl+=("") # no need for chat tmpl
arr_extra_args+=("$extra_args")
arr_file+=("test-2.mp3")
}

add_test_vision "ggml-org/SmolVLM-500M-Instruct-GGUF:Q8_0"
add_test_vision "ggml-org/SmolVLM2-2.2B-Instruct-GGUF:Q4_K_M"
add_test_vision "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF:Q8_0"
add_test_vision "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M"
add_test_vision "THUDM/glm-edge-v-5b-gguf:Q4_K_M"
add_test_vision "second-state/Llava-v1.5-7B-GGUF:Q2_K" "vicuna"
add_test_vision "cjpais/llava-1.6-mistral-7b-gguf:Q3_K_M" "vicuna"
add_test_vision "THUDM/glm-edge-v-5b-gguf:Q4_K_M" -p "name of the newspaper?<__media__>"
add_test_vision "second-state/Llava-v1.5-7B-GGUF:Q2_K" --chat-template vicuna
add_test_vision "cjpais/llava-1.6-mistral-7b-gguf:Q3_K_M" --chat-template vicuna
add_test_vision "ibm-research/granite-vision-3.2-2b-GGUF:Q4_K_M"
add_test_vision "second-state/MiniCPM-Llama3-V-2_5-GGUF:Q2_K" # model from openbmb is corrupted
add_test_vision "openbmb/MiniCPM-V-2_6-gguf:Q2_K"
Expand All @@ -79,7 +88,7 @@ add_test_audio "ggml-org/Voxtral-Mini-3B-2507-GGUF:Q4_K_M"
# to test the big models, run: ./tests.sh big
if [ "$RUN_BIG_TESTS" = true ]; then
add_test_vision "ggml-org/pixtral-12b-GGUF:Q4_K_M"
add_test_vision "ggml-org/Mistral-Small-3.1-24B-Instruct-2503-GGUF" "mistral-v7"
add_test_vision "ggml-org/Mistral-Small-3.1-24B-Instruct-2503-GGUF" --chat-template mistral-v7
add_test_vision "ggml-org/Qwen2-VL-2B-Instruct-GGUF:Q4_K_M"
add_test_vision "ggml-org/Qwen2-VL-7B-Instruct-GGUF:Q4_K_M"
add_test_vision "ggml-org/Qwen2.5-VL-3B-Instruct-GGUF:Q4_K_M"
Expand All @@ -89,7 +98,7 @@ if [ "$RUN_BIG_TESTS" = true ]; then
add_test_vision "ggml-org/InternVL3-14B-Instruct-GGUF:Q4_K_M"
add_test_vision "ggml-org/Qwen2.5-Omni-7B-GGUF:Q4_K_M"
# add_test_vision "ggml-org/Qwen2.5-VL-32B-Instruct-GGUF:Q4_K_M" # does not work on my mac M3 Ultra
add_test_vision "ggml-org/Kimi-VL-A3B-Thinking-2506-GGUF:Q4_K_M"
# add_test_vision "ggml-org/Kimi-VL-A3B-Thinking-2506-GGUF:Q4_K_M" # not always working

add_test_audio "ggml-org/ultravox-v0_5-llama-3_1-8b-GGUF:Q4_K_M"
add_test_audio "ggml-org/Qwen2.5-Omni-7B-GGUF:Q4_K_M"
Expand Down Expand Up @@ -122,31 +131,35 @@ for i in "${!arr_hf[@]}"; do
bin="llama-mtmd-cli"
prefix="${arr_prefix[$i]}"
hf="${arr_hf[$i]}"
tmpl="${arr_tmpl[$i]}"
extra_args="${arr_extra_args[$i]}"
inp_file="${arr_file[$i]}"

echo "Running test with binary: $bin and HF model: $hf"
echo ""
echo ""

output=$(\
"$PROJ_ROOT/build/bin/$bin" \
-hf "$hf" \
--image $SCRIPT_DIR/$inp_file \
-p "what is the publisher name of the newspaper?" \
cmd="$(printf %q "$PROJ_ROOT/build/bin/$bin") \
-hf $(printf %q "$hf") \
--image $(printf %q "$SCRIPT_DIR/$inp_file") \
--temp 0 -n 128 \
${tmpl:+--chat-template "$tmpl"} \
2>&1 | tee /dev/tty)
${extra_args}"

# if extra_args does not contain -p, we add a default prompt
if ! [[ "$extra_args" =~ "-p" ]]; then
cmd+=" -p \"what is the publisher name of the newspaper?\""
fi

output=$(eval "$cmd" 2>&1 | tee /dev/tty)

echo "$output" > $SCRIPT_DIR/output/$bin-$(echo "$hf" | tr '/' '-').log

# either contains "new york" or both "men" and "walk"
if echo "$output" | grep -iq "new york" \
|| (echo "$output" | grep -iq "men" && echo "$output" | grep -iq "walk")
then
result="$prefix \033[32mOK\033[0m: $bin $hf"
result="$prefix \033[32mOK\033[0m: $hf"
else
result="$prefix \033[31mFAIL\033[0m: $bin $hf"
result="$prefix \033[31mFAIL\033[0m: $hf"
fi
echo -e "$result"
arr_res+=("$result")
Expand Down
Loading