From 81cc0ab15d158f6562c683e49b5e1b3b17571808 Mon Sep 17 00:00:00 2001 From: ndonkoHenri Date: Wed, 1 Jul 2026 02:49:26 +0200 Subject: [PATCH 1/2] recipe: llama-cpp-python 0.3.32 (llama.cpp, iOS + Android) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Runs local GGUF LLMs on-device (flet-dev/flet#6627). scikit-build-core / CMake package that vendors the full llama.cpp engine; the Python layer is a pure-ctypes binding that loads the bundled libllama/libggml* shared libs, so this is the duckdb archetype crossed with a pyzbar-style loader. CPU-only baseline: all GPU backends (Metal/CUDA/Vulkan/OpenCL/HIP/RPC), BLAS, Accelerate, OpenMP and LLAMAFILE are disabled, GGML_NATIVE=OFF, LLAVA_BUILD=OFF (the multimodal mtmd surface is imported lazily so text inference never needs it). Android links libc++_shared (flet-libcpp-shared) and gets the 16 KB page-size flags; iOS uses the Unix Makefiles generator. mobile.patch (4 parts): 1. Gate the CMakeLists Apple block to skip iOS — it FORCE-enables GGML_METAL (via CACHE...FORCE, which -DGGML_METAL=OFF can't override) and guesses the arch from `uname -m`; iOS is CPU-only with an explicit arch. 2. Skip llama.cpp's unused `common` helper lib (~5 MB). 3. Strip SONAME versioning from the shipped libs, so the wheel carries single unversioned files instead of a lib*.dylib -> .0 -> .0.15.3 symlink triplet (forge's packer dereferences those into 3 copies / colliding iOS frameworks). Cuts the wheel from ~14 MB to ~1.7 MB. 4. Rewrite the ctypes loader to (a) find the lib under its iOS framework name (lib.fwork) and on sys.platform == "ios", and (b) preload the ggml dependency chain with RTLD_GLOBAL — the bundled libs carry no RUNPATH, so the platform linker can't resolve siblings on its own. Also -DCMAKE_INSTALL_LIBDIR=llama_cpp/lib to merge llama.cpp's standard install into the package dir (drops the duplicate top-level lib/). Full 6-slice matrix builds green (iOS device/arm64-sim/x86_64-sim, Android arm64-v8a/x86_64/armeabi-v7a). On-device validated end to end on Android arm64 and the iOS arm64 simulator: import + native calls + real GGUF inference (SmolLM2-135M Q4) via the recipe-tester app. --- recipes/llama-cpp-python/meta.yaml | 85 +++++++ recipes/llama-cpp-python/patches/mobile.patch | 218 ++++++++++++++++++ .../llama-cpp-python/tests/test_llama_cpp.py | 26 +++ 3 files changed, 329 insertions(+) create mode 100644 recipes/llama-cpp-python/meta.yaml create mode 100644 recipes/llama-cpp-python/patches/mobile.patch create mode 100644 recipes/llama-cpp-python/tests/test_llama_cpp.py diff --git a/recipes/llama-cpp-python/meta.yaml b/recipes/llama-cpp-python/meta.yaml new file mode 100644 index 0000000..d5f5dc5 --- /dev/null +++ b/recipes/llama-cpp-python/meta.yaml @@ -0,0 +1,85 @@ +package: + name: llama-cpp-python + version: "0.3.32" + +requirements: + build: + # llama-cpp-python builds via scikit-build-core + CMake; with forge's + # `--no-isolation` these must be in the build venv. scikit-build-core comes + # from the package's own build-system.requires. Android keeps Ninja; iOS + # uses the Makefiles generator (see CMAKE_GENERATOR below). + - cmake + - ninja +# {% if sdk == 'android' %} + host: + # llama.cpp/ggml is C++; the bundled libllama.so / libggml*.so link + # libc++_shared.so, which the device runtime doesn't provide unless bundled. + - flet-libcpp-shared >=27.2.12479018 +# {% endif %} + +build: + number: 0 + script_env: +# {% if sdk == 'android' %} + CMAKE_ARGS: >- + -DCMAKE_TOOLCHAIN_FILE={NDK_ROOT}/build/cmake/android.toolchain.cmake + -DANDROID_ABI={ANDROID_ABI} + -DANDROID_NATIVE_API_LEVEL={ANDROID_API_LEVEL} + -DANDROID_STL=c++_shared + -DCMAKE_CROSSCOMPILING_EMULATOR=/usr/bin/env + -DCMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=16384 + -DCMAKE_MODULE_LINKER_FLAGS=-Wl,-z,max-page-size=16384 + -DLLAVA_BUILD=OFF + -DCMAKE_INSTALL_LIBDIR=llama_cpp/lib + -DGGML_NATIVE=OFF + -DGGML_OPENMP=OFF + -DGGML_LLAMAFILE=OFF + -DGGML_BLAS=OFF + -DGGML_ACCELERATE=OFF + -DGGML_METAL=OFF + -DGGML_CUDA=OFF + -DGGML_HIP=OFF + -DGGML_VULKAN=OFF + -DGGML_OPENCL=OFF + -DGGML_RPC=OFF +# {% else %} + # The `ninja` pip package crashes importing on the iOS crossenv python + # (sysconfig.get_preferred_scheme('user') -> 'posix_user' invalid on iOS), + # so use the Makefiles generator on iOS; scikit-build-core then never imports + # ninja. (Android keeps Ninja, which works there.) + CMAKE_GENERATOR: Unix Makefiles + CMAKE_ARGS: >- + -DCMAKE_SYSTEM_NAME=iOS + -DCMAKE_OSX_SYSROOT={{ sdk }} + -DCMAKE_OSX_DEPLOYMENT_TARGET={{ sdk_version }} + -DCMAKE_OSX_ARCHITECTURES={{ arch }} + -DCMAKE_CROSSCOMPILING_EMULATOR=/usr/bin/env + -DLLAVA_BUILD=OFF + -DCMAKE_INSTALL_LIBDIR=llama_cpp/lib + -DGGML_NATIVE=OFF + -DGGML_OPENMP=OFF + -DGGML_LLAMAFILE=OFF + -DGGML_BLAS=OFF + -DGGML_ACCELERATE=OFF + -DGGML_METAL=OFF + -DGGML_METAL_EMBED_LIBRARY=OFF + -DGGML_CUDA=OFF + -DGGML_HIP=OFF + -DGGML_VULKAN=OFF + -DGGML_OPENCL=OFF + -DGGML_RPC=OFF +# {% endif %} + +patches: + # mobile.patch does four things: + # 1. Stops llama-cpp-python's CMakeLists from FORCE-enabling GGML_METAL on all + # Apple platforms (it would override our -DGGML_METAL=OFF for the iOS cross + # build, which has no Metal toolchain). + # 2. Skips building llama.cpp's unused `common` helper lib (~5 MB). + # 3. Drops SONAME versioning from the shipped libs (ggml-base/ggml/ggml-cpu/ + # llama) so the wheel carries single unversioned files instead of a + # lib*.dylib -> .0 -> .0.15.3 symlink triplet that forge dereferences into + # three copies (and three colliding frameworks on iOS). + # 4. Teaches the ctypes loader to find the bundled lib under its iOS framework + # name (lib.fwork) and on the "ios" platform. + - mobile.patch diff --git a/recipes/llama-cpp-python/patches/mobile.patch b/recipes/llama-cpp-python/patches/mobile.patch new file mode 100644 index 0000000..9409ce4 --- /dev/null +++ b/recipes/llama-cpp-python/patches/mobile.patch @@ -0,0 +1,218 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 5feaaca..db16837 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -67,8 +67,11 @@ if (LLAMA_BUILD) + set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + set(CMAKE_SKIP_RPATH FALSE) + +- # Enable building of the common library +- set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build llama.cpp common library" FORCE) ++ # mobile-forge: do NOT build llama.cpp's `common` helper library. It is ++ # only used by the CLI tools/examples (all disabled here); the ctypes ++ # bindings call libllama directly. Dropping it removes a ~5 MB unused lib ++ # (and its duplicate install) from the wheel. ++ set(LLAMA_BUILD_COMMON OFF CACHE BOOL "Build llama.cpp common library" FORCE) + + # Disable building curl support + set(LLAMA_CURL OFF CACHE BOOL "llama.cpp: enable curl" FORCE) +@@ -100,7 +103,11 @@ if (LLAMA_BUILD) + endif() + + # Architecture detection and settings for Apple platforms +- if (APPLE) ++ # mobile-forge: skip this macOS-host block when cross-compiling to iOS. ++ # It FORCEs GGML_METAL=ON (overriding our -DGGML_METAL=OFF) and guesses ++ # the arch via `uname -m`. For iOS we build CPU-only and pass the arch ++ # explicitly via CMAKE_OSX_ARCHITECTURES. ++ if (APPLE AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS") + # Get the target architecture + execute_process( + COMMAND uname -m +diff --git a/llama_cpp/_ctypes_extensions.py b/llama_cpp/_ctypes_extensions.py +index 02cee8a..9274b5d 100644 +--- a/llama_cpp/_ctypes_extensions.py ++++ b/llama_cpp/_ctypes_extensions.py +@@ -25,30 +25,33 @@ _EMSCRIPTEN_SIDE_MODULE_SUFFIX = ".cpython-00-wasm32-emscripten.so" + # Load the library + def load_shared_library(lib_base_name: str, base_path: pathlib.Path): + """Platform independent shared library loader""" +- # Searching for the library in the current directory under the name "libllama" (default name +- # for llamacpp) and "llama" (default name for this repo) +- lib_paths: List[pathlib.Path] = [] +- # Determine the file extension based on the platform +- if sys.platform == "emscripten": +- # Use a CPython-style tag that Pyodide skips during package auto-load. +- lib_paths += [ +- base_path / f"lib{lib_base_name}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}", +- ] +- elif sys.platform.startswith("linux") or sys.platform.startswith("freebsd"): +- lib_paths += [ +- base_path / f"lib{lib_base_name}.so", +- ] +- elif sys.platform == "darwin": +- lib_paths += [ +- base_path / f"lib{lib_base_name}.so", +- base_path / f"lib{lib_base_name}.dylib", +- ] +- elif sys.platform == "win32": +- lib_paths += [ +- base_path / f"{lib_base_name}.dll", +- base_path / f"lib{lib_base_name}.dll", +- ] +- else: ++ ++ def _candidate_paths(name: str) -> List[pathlib.Path]: ++ # Ordered filename candidates for lib `name` inside base_path. ++ if sys.platform == "emscripten": ++ # A CPython-style tag that Pyodide skips during package auto-load. ++ return [base_path / f"lib{name}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}"] ++ if sys.platform.startswith("linux") or sys.platform.startswith("freebsd"): ++ return [base_path / f"lib{name}.so"] ++ if sys.platform == "darwin": ++ # mobile-forge: on iOS, flet/serious-python repackages each bundled ++ # .dylib into a code-signed framework and renames the in-wheel file ++ # to lib.fwork (ctypes.CDLL can dlopen it directly). Try that ++ # first, then the plain desktop-macOS names. ++ return [ ++ base_path / f"lib{name}.fwork", ++ base_path / f"lib{name}.so", ++ base_path / f"lib{name}.dylib", ++ ] ++ if sys.platform == "ios": ++ # flet's iOS Python (3.13+) reports sys.platform == "ios". ++ return [ ++ base_path / f"lib{name}.fwork", ++ base_path / f"lib{name}.dylib", ++ base_path / f"lib{name}.so", ++ ] ++ if sys.platform == "win32": ++ return [base_path / f"{name}.dll", base_path / f"lib{name}.dll"] + raise RuntimeError("Unsupported platform") + + cdll_args = dict() # type: ignore +@@ -67,9 +70,14 @@ def load_shared_library(lib_base_name: str, base_path: pathlib.Path): + os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "bin")) + os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "lib")) + cdll_args["winmode"] = ctypes.RTLD_GLOBAL ++ else: ++ # mobile-forge: load globally so each library's dependencies (preloaded ++ # just below) satisfy its DT_NEEDED / @rpath entries by soname. The ++ # bundled libs carry no RUNPATH, so on iOS/Android the platform linker ++ # will not find sibling libs on its own. ++ cdll_args["mode"] = ctypes.RTLD_GLOBAL + + if sys.platform == "emscripten": +- cdll_args["mode"] = ctypes.RTLD_GLOBAL + lib_dir = str(base_path) + ld_library_path = os.environ.get("LD_LIBRARY_PATH", "") + if lib_dir not in ld_library_path.split(os.pathsep): +@@ -79,24 +87,26 @@ def load_shared_library(lib_base_name: str, base_path: pathlib.Path): + else f"{lib_dir}{os.pathsep}{ld_library_path}" + ) + +- emscripten_dependencies = { +- "llama": ("ggml-base", "ggml-cpu", "ggml"), +- "mtmd": ("ggml-base", "ggml-cpu", "ggml", "llama"), +- } +- for dependency in emscripten_dependencies.get(lib_base_name, ()): +- dependency_path = ( +- base_path / f"lib{dependency}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}" +- ) ++ # Preload dependencies (in dependency order) so the main library resolves ++ # them from the already-loaded set: ggml-base before ggml-cpu / ggml, and ++ # all of them before llama / mtmd. Best-effort — the main load reports the ++ # real error if something is genuinely missing. ++ dependencies = { ++ "llama": ("ggml-base", "ggml-cpu", "ggml"), ++ "mtmd": ("ggml-base", "ggml-cpu", "ggml", "llama"), ++ "llava": ("ggml-base", "ggml-cpu", "ggml", "llama"), ++ } ++ for dependency in dependencies.get(lib_base_name, ()): ++ for dependency_path in _candidate_paths(dependency): + if dependency_path.exists(): + try: + ctypes.CDLL(str(dependency_path), **cdll_args) # type: ignore +- except Exception as e: +- raise RuntimeError( +- f"Failed to load shared library '{dependency_path}': {e}" +- ) ++ except Exception: ++ pass ++ break + + # Try to load the shared library, handling potential errors +- for lib_path in lib_paths: ++ for lib_path in _candidate_paths(lib_base_name): + if lib_path.exists(): + try: + return ctypes.CDLL(str(lib_path), **cdll_args) # type: ignore +diff --git a/vendor/llama.cpp/ggml/src/CMakeLists.txt b/vendor/llama.cpp/ggml/src/CMakeLists.txt +index 89e5180..d4a6657 100644 +--- a/vendor/llama.cpp/ggml/src/CMakeLists.txt ++++ b/vendor/llama.cpp/ggml/src/CMakeLists.txt +@@ -208,10 +208,13 @@ add_library(ggml-base + ggml-quants.h + gguf.cpp) + +-set_target_properties(ggml-base PROPERTIES +- VERSION ${GGML_VERSION} +- SOVERSION ${GGML_VERSION_MAJOR} +-) ++# mobile-forge: drop SONAME versioning so the wheel ships a single ++# unversioned lib instead of a .dylib -> .0 -> .0.15.3 symlink triplet ++# (forge dereferences those into 3 copies / colliding iOS frameworks). ++# set_target_properties(ggml-base PROPERTIES ++# VERSION ${GGML_VERSION} ++# SOVERSION ${GGML_VERSION_MAJOR} ++# ) + + target_include_directories(ggml-base PRIVATE .) + if (GGML_BACKEND_DL) +@@ -244,10 +247,10 @@ add_library(ggml + ggml-backend-reg.cpp) + add_library(ggml::ggml ALIAS ggml) + +-set_target_properties(ggml PROPERTIES +- VERSION ${GGML_VERSION} +- SOVERSION ${GGML_VERSION_MAJOR} +-) ++# set_target_properties(ggml PROPERTIES ++# VERSION ${GGML_VERSION} ++# SOVERSION ${GGML_VERSION_MAJOR} ++# ) + + if (GGML_BACKEND_DIR) + if (NOT GGML_BACKEND_DL) +@@ -291,10 +294,11 @@ function(ggml_add_backend_library backend) + # Set versioning properties for all backend libraries + # Building a MODULE library with a version is not supported on macOS (https://gitlab.kitware.com/cmake/cmake/-/issues/20782) + if (NOT (APPLE AND GGML_BACKEND_DL)) +- set_target_properties(${backend} PROPERTIES +- VERSION ${GGML_VERSION} +- SOVERSION ${GGML_VERSION_MAJOR} +- ) ++ # mobile-forge: drop SONAME versioning (single unversioned backend lib) ++ # set_target_properties(${backend} PROPERTIES ++ # VERSION ${GGML_VERSION} ++ # SOVERSION ${GGML_VERSION_MAJOR} ++ # ) + endif() + + if(NOT GGML_AVAILABLE_BACKENDS) +diff --git a/vendor/llama.cpp/src/CMakeLists.txt b/vendor/llama.cpp/src/CMakeLists.txt +index d15ccfd..84769d1 100644 +--- a/vendor/llama.cpp/src/CMakeLists.txt ++++ b/vendor/llama.cpp/src/CMakeLists.txt +@@ -43,8 +43,9 @@ add_library(llama + ) + + set_target_properties(llama PROPERTIES +- VERSION ${LLAMA_INSTALL_VERSION} +- SOVERSION 0 ++ # mobile-forge: drop SONAME versioning (single unversioned libllama) ++ # VERSION ${LLAMA_INSTALL_VERSION} ++ # SOVERSION 0 + MACHO_CURRENT_VERSION 0 # keep macOS linker from seeing oversized version number + ) + diff --git a/recipes/llama-cpp-python/tests/test_llama_cpp.py b/recipes/llama-cpp-python/tests/test_llama_cpp.py new file mode 100644 index 0000000..b7efb40 --- /dev/null +++ b/recipes/llama-cpp-python/tests/test_llama_cpp.py @@ -0,0 +1,26 @@ +def test_import_version(): + """Importing llama_cpp ctypes-loads the bundled libllama (+ libggml*) shared + libraries — the canary that the C++ engine cross-compiled, links its C++ + runtime, and the loader finds the lib under its mobile name.""" + import llama_cpp + + assert llama_cpp.__version__ + + +def test_native_lib_callable(): + """Call into the ctypes-loaded libllama without needing a GGUF model file. + Proves the native library actually loaded and its symbols are callable.""" + import llama_cpp + + # const char * llama_print_system_info(void) — reports the compiled CPU + # backend/feature set (e.g. NEON on arm64). Non-empty => the lib answered. + info = llama_cpp.llama_print_system_info() + assert isinstance(info, bytes) and len(info) > 0 + + # A couple of trivial capability queries through the C ABI. + assert isinstance(llama_cpp.llama_max_devices(), int) + assert isinstance(llama_cpp.llama_supports_mmap(), bool) + + # Backend init/free round-trip exercises ggml setup teardown. + llama_cpp.llama_backend_init() + llama_cpp.llama_backend_free() From 92f4c79b5589411ed654439e4656e4fb42dd0f63 Mon Sep 17 00:00:00 2001 From: TheEthicalBoy <98978078+ndonkoHenri@users.noreply.github.com> Date: Wed, 1 Jul 2026 18:52:32 +0200 Subject: [PATCH 2/2] build number 1 --- recipes/llama-cpp-python/meta.yaml | 32 +++++++++---------- ..._llama_cpp.py => test_llama_cpp_python.py} | 9 ------ 2 files changed, 16 insertions(+), 25 deletions(-) rename recipes/llama-cpp-python/tests/{test_llama_cpp.py => test_llama_cpp_python.py} (71%) diff --git a/recipes/llama-cpp-python/meta.yaml b/recipes/llama-cpp-python/meta.yaml index d5f5dc5..e9553a7 100644 --- a/recipes/llama-cpp-python/meta.yaml +++ b/recipes/llama-cpp-python/meta.yaml @@ -2,23 +2,8 @@ package: name: llama-cpp-python version: "0.3.32" -requirements: - build: - # llama-cpp-python builds via scikit-build-core + CMake; with forge's - # `--no-isolation` these must be in the build venv. scikit-build-core comes - # from the package's own build-system.requires. Android keeps Ninja; iOS - # uses the Makefiles generator (see CMAKE_GENERATOR below). - - cmake - - ninja -# {% if sdk == 'android' %} - host: - # llama.cpp/ggml is C++; the bundled libllama.so / libggml*.so link - # libc++_shared.so, which the device runtime doesn't provide unless bundled. - - flet-libcpp-shared >=27.2.12479018 -# {% endif %} - build: - number: 0 + number: 1 script_env: # {% if sdk == 'android' %} CMAKE_ARGS: >- @@ -70,6 +55,21 @@ build: -DGGML_RPC=OFF # {% endif %} +requirements: + build: + # llama-cpp-python builds via scikit-build-core + CMake; with forge's + # `--no-isolation` these must be in the build venv. scikit-build-core comes + # from the package's own build-system.requires. Android keeps Ninja; iOS + # uses the Makefiles generator (see CMAKE_GENERATOR below). + - cmake + - ninja +# {% if sdk == 'android' %} + host: + # llama.cpp/ggml is C++; the bundled libllama.so / libggml*.so link + # libc++_shared.so, which the device runtime doesn't provide unless bundled. + - flet-libcpp-shared >=27.2.12479018 +# {% endif %} + patches: # mobile.patch does four things: # 1. Stops llama-cpp-python's CMakeLists from FORCE-enabling GGML_METAL on all diff --git a/recipes/llama-cpp-python/tests/test_llama_cpp.py b/recipes/llama-cpp-python/tests/test_llama_cpp_python.py similarity index 71% rename from recipes/llama-cpp-python/tests/test_llama_cpp.py rename to recipes/llama-cpp-python/tests/test_llama_cpp_python.py index b7efb40..b678fa9 100644 --- a/recipes/llama-cpp-python/tests/test_llama_cpp.py +++ b/recipes/llama-cpp-python/tests/test_llama_cpp_python.py @@ -1,12 +1,3 @@ -def test_import_version(): - """Importing llama_cpp ctypes-loads the bundled libllama (+ libggml*) shared - libraries — the canary that the C++ engine cross-compiled, links its C++ - runtime, and the loader finds the lib under its mobile name.""" - import llama_cpp - - assert llama_cpp.__version__ - - def test_native_lib_callable(): """Call into the ctypes-loaded libllama without needing a GGUF model file. Proves the native library actually loaded and its symbols are callable."""