diff --git a/llama.cpp/main/main.cpp b/llama.cpp/main/main.cpp index 5a4daabc79..9721647852 100644 --- a/llama.cpp/main/main.cpp +++ b/llama.cpp/main/main.cpp @@ -199,16 +199,16 @@ int main(int argc, char ** argv) { __builtin_unreachable(); } - enum Program prog = determine_program(argv); - if (prog == LLAMAFILER) - return lf::server::main(argc, argv); - mallopt(M_GRANULARITY, 2 * 1024 * 1024); mallopt(M_MMAP_THRESHOLD, 16 * 1024 * 1024); mallopt(M_TRIM_THRESHOLD, 128 * 1024 * 1024); ShowCrashReports(); argc = cosmo_args("/zip/.args", &argv); + enum Program prog = determine_program(argv); + if (prog == LLAMAFILER) + return lf::server::run(argc, argv, true); + if (prog == SERVER) return server_cli(argc, argv); diff --git a/llamafile/server/main.cpp b/llamafile/server/main.cpp index 312b7fcbbf..b0cb63c73a 100644 --- a/llamafile/server/main.cpp +++ b/llamafile/server/main.cpp @@ -20,5 +20,5 @@ int main(int argc, char* argv[]) { - return lf::server::main(argc, argv); + return lf::server::run(argc, argv, false); } diff --git a/llamafile/server/prog.cpp b/llamafile/server/prog.cpp index bd6e6b6a24..6ecfe76270 100644 --- a/llamafile/server/prog.cpp +++ b/llamafile/server/prog.cpp @@ -35,7 +35,7 @@ namespace server { Server* g_server; int -main(int argc, char* argv[]) +run(int argc, char* argv[], bool args_already_loaded) { llamafile_check_cpu(); signal(SIGPIPE, SIG_IGN); @@ -56,7 +56,8 @@ main(int argc, char* argv[]) } // get config - argc = cosmo_args("/zip/.args", &argv); + if (!args_already_loaded) + argc = cosmo_args("/zip/.args", &argv); llamafile_get_flags(argc, argv); // initialize subsystems diff --git a/llamafile/server/prog.h b/llamafile/server/prog.h index af6c555794..95e5111e0e 100644 --- a/llamafile/server/prog.h +++ b/llamafile/server/prog.h @@ -4,7 +4,7 @@ namespace lf { namespace server { int -main(int, char**); +run(int, char**, bool); } // namespace server } // namespace lf