feat: mtmd support xcframework

tc-mb · tc-mb · Jul 21, 2025 · Jul 18, 2025 · Jul 21, 2025 · Jul 18, 2025
commit 4d32fd29c54417f7f2037efbcc38f77b3772d223
diff --git a/build-xcframework.sh b/build-xcframework.sh
@@ -8,7 +8,7 @@ TVOS_MIN_OS_VERSION=16.4
 
 BUILD_SHARED_LIBS=OFF
 LLAMA_BUILD_EXAMPLES=OFF
-LLAMA_BUILD_TOOLS=OFF
+LLAMA_BUILD_TOOLS=ON
 LLAMA_BUILD_TESTS=OFF
 LLAMA_BUILD_SERVER=OFF
 GGML_METAL=ON
@@ -124,6 +124,10 @@ setup_framework_structure() {
     cp ggml/include/ggml-cpu.h     ${header_path}
     cp ggml/include/ggml-blas.h    ${header_path}
     cp ggml/include/gguf.h         ${header_path}
+    # Copy mtmd-ios headers and dependencies
+    cp tools/mtmd/mtmd-ios.h        ${header_path}
+    cp tools/mtmd/mtmd.h            ${header_path}
+    cp tools/mtmd/mtmd-helper.h     ${header_path}
 
     # Create module map (common for all platforms)
     cat > ${module_path}module.modulemap << EOF
@@ -136,6 +140,9 @@ framework module llama {
     header "ggml-cpu.h"
     header "ggml-blas.h"
     header "gguf.h"
+    header "mtmd-ios.h"
+    header "mtmd.h"
+    header "mtmd-helper.h"
 
     link "c++"
     link framework "Accelerate"
@@ -252,6 +259,8 @@ combine_static_libraries() {
         "${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-cpu.a"
         "${base_dir}/${build_dir}/ggml/src/ggml-metal/${release_dir}/libggml-metal.a"
         "${base_dir}/${build_dir}/ggml/src/ggml-blas/${release_dir}/libggml-blas.a"
+        "${base_dir}/${build_dir}/common/${release_dir}/libcommon.a"
+        "${base_dir}/${build_dir}/tools/mtmd/${release_dir}/libmtmd.a"
     )
 
     # Create temporary directory for processing
@@ -327,7 +336,7 @@ combine_static_libraries() {
         $arch_flags \
         $min_version_flag \
         -Wl,-force_load,"${temp_dir}/combined.a" \
-        -framework Foundation -framework Metal -framework Accelerate \
+        -framework Foundation -framework Metal -framework Accelerate -framework CoreML \
         -install_name "$install_name" \
         -o "${base_dir}/${output_lib}"
 

diff --git a/tools/batched-bench/CMakeLists.txt b/tools/batched-bench/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(TARGET llama-batched-bench)
 add_executable(${TARGET} batched-bench.cpp)
-install(TARGETS ${TARGET} RUNTIME)
+install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
diff --git a/tools/cvector-generator/CMakeLists.txt b/tools/cvector-generator/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(TARGET llama-cvector-generator)
 add_executable(${TARGET} cvector-generator.cpp pca.hpp)
-install(TARGETS ${TARGET} RUNTIME)
+install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
diff --git a/tools/export-lora/CMakeLists.txt b/tools/export-lora/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(TARGET llama-export-lora)
 add_executable(${TARGET} export-lora.cpp)
-install(TARGETS ${TARGET} RUNTIME)
+install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
diff --git a/tools/gguf-split/CMakeLists.txt b/tools/gguf-split/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(TARGET llama-gguf-split)
 add_executable(${TARGET} gguf-split.cpp)
-install(TARGETS ${TARGET} RUNTIME)
+install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
diff --git a/tools/imatrix/CMakeLists.txt b/tools/imatrix/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(TARGET llama-imatrix)
 add_executable(${TARGET} imatrix.cpp)
-install(TARGETS ${TARGET} RUNTIME)
+install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
diff --git a/tools/llama-bench/CMakeLists.txt b/tools/llama-bench/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(TARGET llama-bench)
 add_executable(${TARGET} llama-bench.cpp)
-install(TARGETS ${TARGET} RUNTIME)
+install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
diff --git a/tools/main/CMakeLists.txt b/tools/main/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(TARGET llama-cli)
 add_executable(${TARGET} main.cpp)
-install(TARGETS ${TARGET} RUNTIME)
+install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
diff --git a/tools/mtmd/CMakeLists.txt b/tools/mtmd/CMakeLists.txt
@@ -93,7 +93,7 @@ add_executable(llama-qwen2vl-cli  deprecation-warning.cpp)
 set(TARGET llama-mtmd-cli)
 add_executable         (${TARGET} mtmd-cli.cpp)
 set_target_properties  (${TARGET} PROPERTIES OUTPUT_NAME llama-mtmd-cli)
-install                (TARGETS ${TARGET} RUNTIME)
+install                (TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
 target_link_libraries  (${TARGET} PRIVATE common mtmd Threads::Threads)
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
 

diff --git a/tools/mtmd/mtmd-ios.cpp b/tools/mtmd/mtmd-ios.cpp
@@ -59,8 +59,8 @@ static void set_error(mtmd_ios_context* ctx, const std::string& error) {
 
 mtmd_ios_params mtmd_ios_params_default(void) {
     mtmd_ios_params params = {};
-    params.model_path = nullptr;
-    params.mmproj_path = nullptr;
+    params.model_path = "";
+    params.mmproj_path = "";
     params.n_predict = -1;
     params.n_ctx = 4096;
     params.n_threads = 4;
@@ -70,7 +70,7 @@ mtmd_ios_params mtmd_ios_params_default(void) {
 }
 
 mtmd_ios_context* mtmd_ios_init(const mtmd_ios_params* params) {
-    if (!params || !params->model_path || !params->mmproj_path) {
+    if (!params || params->model_path.empty() || params->mmproj_path.empty()) {
         return nullptr;
     }
 
@@ -133,7 +133,7 @@ mtmd_ios_context* mtmd_ios_init(const mtmd_ios_params* params) {
     mparams.n_threads = params->n_threads;
     mparams.verbosity = GGML_LOG_LEVEL_INFO;
 
-    ctx->ctx_vision.reset(mtmd_init_from_file(params->mmproj_path, ctx->model, mparams));
+    ctx->ctx_vision.reset(mtmd_init_from_file(params->mmproj_path.c_str(), ctx->model, mparams));
     if (!ctx->ctx_vision.get()) {
         set_error(ctx.get(), "Failed to load vision model from " + std::string(params->mmproj_path));
         return nullptr;
@@ -148,14 +148,14 @@ void mtmd_ios_free(mtmd_ios_context* ctx) {
     }
 }
 
-int mtmd_ios_prefill_image(mtmd_ios_context* ctx, const char* image_path) {
-    if (!ctx || !image_path) {
+int mtmd_ios_prefill_image(mtmd_ios_context* ctx, const std::string& image_path) {
+    if (!ctx || image_path.empty()) {
         return -1;
     }
 
-    mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(ctx->ctx_vision.get(), image_path));
+    mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(ctx->ctx_vision.get(), image_path.c_str()));
     if (!bmp.ptr) {
-        set_error(ctx, "Failed to load image from file: " + std::string(image_path));
+        set_error(ctx, "Failed to load image from file: " + image_path);
         return -1;
     }
     ctx->bitmaps.entries.push_back(std::move(bmp));
@@ -199,14 +199,14 @@ int mtmd_ios_prefill_image(mtmd_ios_context* ctx, const char* image_path) {
 
 
 
-int mtmd_ios_prefill_text(mtmd_ios_context* ctx, const char* text, const char* role) {
-    if (!ctx || !text || !role) {
+int mtmd_ios_prefill_text(mtmd_ios_context* ctx, const std::string& text, const std::string& role) {
+    if (!ctx || text.empty() || role.empty()) {
         return -1;
     }
 
     common_chat_msg msg;
-    msg.role = role;
-    msg.content = text;
+    msg.role = role.c_str();
+    msg.content = text.c_str();
 
     common_chat_templates_inputs tmpl_inputs;
     tmpl_inputs.messages = {msg};

diff --git a/tools/mtmd/mtmd-ios.h b/tools/mtmd/mtmd-ios.h
@@ -1,63 +1,82 @@
 #ifndef MTMD_IOS_H
 #define MTMD_IOS_H
 
-#include "mtmd.h"
-#include "mtmd-helper.h"
-#include "common.h"
-#include "sampling.h"
-#include "llama.h"
-#include "ggml.h"
-#include "chat.h"
-
 #include <string>
-#include <vector>
-#include <functional>
-#include <memory>
+
+#include "ggml.h"
+#include "llama.h"
+#include "mtmd-helper.h"
+#include "mtmd.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-struct mtmd_ios_context;
-struct mtmd_ios_params;
-
-struct mtmd_ios_params {
-    const char* model_path;
-    const char* mmproj_path;
-
-    int n_predict;
-    int n_ctx;
-    int n_threads;
-    float temperature;
-
-    bool use_gpu;
-    bool mmproj_use_gpu;
-    bool warmup;
-};
-
-
-
-mtmd_ios_context* mtmd_ios_init(const mtmd_ios_params* params);
-void mtmd_ios_free(mtmd_ios_context* ctx);
-
+// Context structure
+typedef struct mtmd_ios_context mtmd_ios_context;
+
+// Parameters structure
+typedef struct mtmd_ios_params {
+    std::string model_path;
+    std::string mmproj_path;
+    int         n_predict;
+    int         n_ctx;
+    int         n_threads;
+    float       temperature;
+    bool        use_gpu;
+    bool        mmproj_use_gpu;
+    bool        warmup;
+} mtmd_ios_params;
+
+// Initialize, returns 0 on success, -1 on failure
+// Parameters:
+// params: parameters
+mtmd_ios_context * mtmd_ios_init(const mtmd_ios_params * params);
+
+// Free resources
+// Parameters:
+// ctx: context
+void mtmd_ios_free(mtmd_ios_context * ctx);
+
+// Get default parameters
 mtmd_ios_params mtmd_ios_params_default(void);
 
-int mtmd_ios_prefill_image(mtmd_ios_context* ctx, const char* image_path);
-int mtmd_ios_prefill_text(mtmd_ios_context* ctx, const char* text, const char* role);
+// Prefill image, returns 0 on success, -1 on failure
+// Parameters:
+// ctx: context
+// image_path: image path
+int mtmd_ios_prefill_image(mtmd_ios_context * ctx, const std::string & image_path);
 
+// Prefill text, returns 0 on success, -1 on failure
+// Parameters:
+// ctx: context
+// text: text
+// role: role
+int mtmd_ios_prefill_text(mtmd_ios_context * ctx, const std::string & text, const std::string & role);
+
+// Loop return value structure
 typedef struct {
-    char* token;
-    bool is_end;
+    char * token;
+    bool   is_end;
 } mtmd_ios_token;
 
-mtmd_ios_token mtmd_ios_loop(mtmd_ios_context* ctx);
+// Loop, returns 0 on success, -1 on failure
+// Parameters:
+// ctx: context
+mtmd_ios_token mtmd_ios_loop(mtmd_ios_context * ctx);
 
-const char* mtmd_ios_get_last_error(mtmd_ios_context* ctx);
+// Get last error message
+// Parameters:
+// ctx: context
+const char * mtmd_ios_get_last_error(mtmd_ios_context * ctx);
 
-void mtmd_ios_string_free(char* str);
+// Free string
+// Parameters:
+// str: string
+void mtmd_ios_string_free(char * str);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif 
+#endif
diff --git a/tools/perplexity/CMakeLists.txt b/tools/perplexity/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(TARGET llama-perplexity)
 add_executable(${TARGET} perplexity.cpp)
-install(TARGETS ${TARGET} RUNTIME)
+install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
diff --git a/tools/quantize/CMakeLists.txt b/tools/quantize/CMakeLists.txt
@@ -1,6 +1,6 @@
 set(TARGET llama-quantize)
 add_executable(${TARGET} quantize.cpp)
-install(TARGETS ${TARGET} RUNTIME)
+install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_include_directories(${TARGET} PRIVATE ../../common)
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
diff --git a/tools/run/CMakeLists.txt b/tools/run/CMakeLists.txt
@@ -11,6 +11,6 @@ if (LLAMA_CURL)
     set(LLAMA_RUN_EXTRA_LIBS ${LLAMA_RUN_EXTRA_LIBS} ${CURL_LIBRARY})
 endif ()
 
-install(TARGETS ${TARGET} RUNTIME)
+install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT} ${LLAMA_RUN_EXTRA_LIBS})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
diff --git a/tools/tokenize/CMakeLists.txt b/tools/tokenize/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(TARGET llama-tokenize)
 add_executable(${TARGET} tokenize.cpp)
-install(TARGETS ${TARGET} RUNTIME)
+install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
diff --git a/tools/tts/CMakeLists.txt b/tools/tts/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(TARGET llama-tts)
 add_executable(${TARGET} tts.cpp)
-install(TARGETS ${TARGET} RUNTIME)
+install(TARGETS ${TARGET} RUNTIME BUNDLE DESTINATION .)
 target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)