From 81cc0ab15d158f6562c683e49b5e1b3b17571808 Mon Sep 17 00:00:00 2001
From: ndonkoHenri <robotcoder4@protonmail.com>
Date: Wed, 1 Jul 2026 02:49:26 +0200
Subject: [PATCH 1/2] recipe: llama-cpp-python 0.3.32 (llama.cpp, iOS +
 Android)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Runs local GGUF LLMs on-device (flet-dev/flet#6627). scikit-build-core /
CMake package that vendors the full llama.cpp engine; the Python layer is a
pure-ctypes binding that loads the bundled libllama/libggml* shared libs, so
this is the duckdb archetype crossed with a pyzbar-style loader.

CPU-only baseline: all GPU backends (Metal/CUDA/Vulkan/OpenCL/HIP/RPC), BLAS,
Accelerate, OpenMP and LLAMAFILE are disabled, GGML_NATIVE=OFF, LLAVA_BUILD=OFF
(the multimodal mtmd surface is imported lazily so text inference never needs
it). Android links libc++_shared (flet-libcpp-shared) and gets the 16 KB
page-size flags; iOS uses the Unix Makefiles generator.

mobile.patch (4 parts):
  1. Gate the CMakeLists Apple block to skip iOS — it FORCE-enables GGML_METAL
     (via CACHE...FORCE, which -DGGML_METAL=OFF can't override) and guesses the
     arch from `uname -m`; iOS is CPU-only with an explicit arch.
  2. Skip llama.cpp's unused `common` helper lib (~5 MB).
  3. Strip SONAME versioning from the shipped libs, so the wheel carries single
     unversioned files instead of a lib*.dylib -> .0 -> .0.15.3 symlink triplet
     (forge's packer dereferences those into 3 copies / colliding iOS
     frameworks). Cuts the wheel from ~14 MB to ~1.7 MB.
  4. Rewrite the ctypes loader to (a) find the lib under its iOS framework name
     (lib<name>.fwork) and on sys.platform == "ios", and (b) preload the ggml
     dependency chain with RTLD_GLOBAL — the bundled libs carry no RUNPATH, so
     the platform linker can't resolve siblings on its own.

Also -DCMAKE_INSTALL_LIBDIR=llama_cpp/lib to merge llama.cpp's standard install
into the package dir (drops the duplicate top-level lib/).

Full 6-slice matrix builds green (iOS device/arm64-sim/x86_64-sim, Android
arm64-v8a/x86_64/armeabi-v7a). On-device validated end to end on Android arm64
and the iOS arm64 simulator: import + native calls + real GGUF inference
(SmolLM2-135M Q4) via the recipe-tester app.
---
 recipes/llama-cpp-python/meta.yaml            |  85 +++++++
 recipes/llama-cpp-python/patches/mobile.patch | 218 ++++++++++++++++++
 .../llama-cpp-python/tests/test_llama_cpp.py  |  26 +++
 3 files changed, 329 insertions(+)
 create mode 100644 recipes/llama-cpp-python/meta.yaml
 create mode 100644 recipes/llama-cpp-python/patches/mobile.patch
 create mode 100644 recipes/llama-cpp-python/tests/test_llama_cpp.py
diff --git a/recipes/llama-cpp-python/meta.yaml b/recipes/llama-cpp-python/meta.yaml
new file mode 100644
index 0000000..d5f5dc5
--- /dev/null
+++ b/recipes/llama-cpp-python/meta.yaml
@@ -0,0 +1,85 @@
+package:
+  name: llama-cpp-python
+  version: "0.3.32"
+
+requirements:
+  build:
+    # llama-cpp-python builds via scikit-build-core + CMake; with forge's
+    # `--no-isolation` these must be in the build venv. scikit-build-core comes
+    # from the package's own build-system.requires. Android keeps Ninja; iOS
+    # uses the Makefiles generator (see CMAKE_GENERATOR below).
+    - cmake
+    - ninja
+# {% if sdk == 'android' %}
+  host:
+    # llama.cpp/ggml is C++; the bundled libllama.so / libggml*.so link
+    # libc++_shared.so, which the device runtime doesn't provide unless bundled.
+    - flet-libcpp-shared >=27.2.12479018
+# {% endif %}
+
+build:
+  number: 0
+  script_env:
+# {% if sdk == 'android' %}
+    CMAKE_ARGS: >-
+      -DCMAKE_TOOLCHAIN_FILE={NDK_ROOT}/build/cmake/android.toolchain.cmake
+      -DANDROID_ABI={ANDROID_ABI}
+      -DANDROID_NATIVE_API_LEVEL={ANDROID_API_LEVEL}
+      -DANDROID_STL=c++_shared
+      -DCMAKE_CROSSCOMPILING_EMULATOR=/usr/bin/env
+      -DCMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=16384
+      -DCMAKE_MODULE_LINKER_FLAGS=-Wl,-z,max-page-size=16384
+      -DLLAVA_BUILD=OFF
+      -DCMAKE_INSTALL_LIBDIR=llama_cpp/lib
+      -DGGML_NATIVE=OFF
+      -DGGML_OPENMP=OFF
+      -DGGML_LLAMAFILE=OFF
+      -DGGML_BLAS=OFF
+      -DGGML_ACCELERATE=OFF
+      -DGGML_METAL=OFF
+      -DGGML_CUDA=OFF
+      -DGGML_HIP=OFF
+      -DGGML_VULKAN=OFF
+      -DGGML_OPENCL=OFF
+      -DGGML_RPC=OFF
+# {% else %}
+    # The `ninja` pip package crashes importing on the iOS crossenv python
+    # (sysconfig.get_preferred_scheme('user') -> 'posix_user' invalid on iOS),
+    # so use the Makefiles generator on iOS; scikit-build-core then never imports
+    # ninja. (Android keeps Ninja, which works there.)
+    CMAKE_GENERATOR: Unix Makefiles
+    CMAKE_ARGS: >-
+      -DCMAKE_SYSTEM_NAME=iOS
+      -DCMAKE_OSX_SYSROOT={{ sdk }}
+      -DCMAKE_OSX_DEPLOYMENT_TARGET={{ sdk_version }}
+      -DCMAKE_OSX_ARCHITECTURES={{ arch }}
+      -DCMAKE_CROSSCOMPILING_EMULATOR=/usr/bin/env
+      -DLLAVA_BUILD=OFF
+      -DCMAKE_INSTALL_LIBDIR=llama_cpp/lib
+      -DGGML_NATIVE=OFF
+      -DGGML_OPENMP=OFF
+      -DGGML_LLAMAFILE=OFF
+      -DGGML_BLAS=OFF
+      -DGGML_ACCELERATE=OFF
+      -DGGML_METAL=OFF
+      -DGGML_METAL_EMBED_LIBRARY=OFF
+      -DGGML_CUDA=OFF
+      -DGGML_HIP=OFF
+      -DGGML_VULKAN=OFF
+      -DGGML_OPENCL=OFF
+      -DGGML_RPC=OFF
+# {% endif %}
+
+patches:
+  # mobile.patch does four things:
+  #  1. Stops llama-cpp-python's CMakeLists from FORCE-enabling GGML_METAL on all
+  #     Apple platforms (it would override our -DGGML_METAL=OFF for the iOS cross
+  #     build, which has no Metal toolchain).
+  #  2. Skips building llama.cpp's unused `common` helper lib (~5 MB).
+  #  3. Drops SONAME versioning from the shipped libs (ggml-base/ggml/ggml-cpu/
+  #     llama) so the wheel carries single unversioned files instead of a
+  #     lib*.dylib -> .0 -> .0.15.3 symlink triplet that forge dereferences into
+  #     three copies (and three colliding frameworks on iOS).
+  #  4. Teaches the ctypes loader to find the bundled lib under its iOS framework
+  #     name (lib<name>.fwork) and on the "ios" platform.
+  - mobile.patch
diff --git a/recipes/llama-cpp-python/patches/mobile.patch b/recipes/llama-cpp-python/patches/mobile.patch
new file mode 100644
index 0000000..9409ce4
--- /dev/null
+++ b/recipes/llama-cpp-python/patches/mobile.patch
@@ -0,0 +1,218 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 5feaaca..db16837 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -67,8 +67,11 @@ if (LLAMA_BUILD)
+     set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+     set(CMAKE_SKIP_RPATH FALSE)
+ 
+-    # Enable building of the common library
+-    set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build llama.cpp common library" FORCE)
++    # mobile-forge: do NOT build llama.cpp's `common` helper library. It is
++    # only used by the CLI tools/examples (all disabled here); the ctypes
++    # bindings call libllama directly. Dropping it removes a ~5 MB unused lib
++    # (and its duplicate install) from the wheel.
++    set(LLAMA_BUILD_COMMON OFF CACHE BOOL "Build llama.cpp common library" FORCE)
+ 
+     # Disable building curl support
+     set(LLAMA_CURL OFF CACHE BOOL "llama.cpp: enable curl" FORCE)
+@@ -100,7 +103,11 @@ if (LLAMA_BUILD)
+     endif()
+ 
+     # Architecture detection and settings for Apple platforms
+-    if (APPLE)
++    # mobile-forge: skip this macOS-host block when cross-compiling to iOS.
++    # It FORCEs GGML_METAL=ON (overriding our -DGGML_METAL=OFF) and guesses
++    # the arch via `uname -m`. For iOS we build CPU-only and pass the arch
++    # explicitly via CMAKE_OSX_ARCHITECTURES.
++    if (APPLE AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
+         # Get the target architecture
+         execute_process(
+             COMMAND uname -m
+diff --git a/llama_cpp/_ctypes_extensions.py b/llama_cpp/_ctypes_extensions.py
+index 02cee8a..9274b5d 100644
+--- a/llama_cpp/_ctypes_extensions.py
++++ b/llama_cpp/_ctypes_extensions.py
+@@ -25,30 +25,33 @@ _EMSCRIPTEN_SIDE_MODULE_SUFFIX = ".cpython-00-wasm32-emscripten.so"
+ # Load the library
+ def load_shared_library(lib_base_name: str, base_path: pathlib.Path):
+     """Platform independent shared library loader"""
+-    # Searching for the library in the current directory under the name "libllama" (default name
+-    # for llamacpp) and "llama" (default name for this repo)
+-    lib_paths: List[pathlib.Path] = []
+-    # Determine the file extension based on the platform
+-    if sys.platform == "emscripten":
+-        # Use a CPython-style tag that Pyodide skips during package auto-load.
+-        lib_paths += [
+-            base_path / f"lib{lib_base_name}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}",
+-        ]
+-    elif sys.platform.startswith("linux") or sys.platform.startswith("freebsd"):
+-        lib_paths += [
+-            base_path / f"lib{lib_base_name}.so",
+-        ]
+-    elif sys.platform == "darwin":
+-        lib_paths += [
+-            base_path / f"lib{lib_base_name}.so",
+-            base_path / f"lib{lib_base_name}.dylib",
+-        ]
+-    elif sys.platform == "win32":
+-        lib_paths += [
+-            base_path / f"{lib_base_name}.dll",
+-            base_path / f"lib{lib_base_name}.dll",
+-        ]
+-    else:
++
++    def _candidate_paths(name: str) -> List[pathlib.Path]:
++        # Ordered filename candidates for lib `name` inside base_path.
++        if sys.platform == "emscripten":
++            # A CPython-style tag that Pyodide skips during package auto-load.
++            return [base_path / f"lib{name}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}"]
++        if sys.platform.startswith("linux") or sys.platform.startswith("freebsd"):
++            return [base_path / f"lib{name}.so"]
++        if sys.platform == "darwin":
++            # mobile-forge: on iOS, flet/serious-python repackages each bundled
++            # .dylib into a code-signed framework and renames the in-wheel file
++            # to lib<name>.fwork (ctypes.CDLL can dlopen it directly). Try that
++            # first, then the plain desktop-macOS names.
++            return [
++                base_path / f"lib{name}.fwork",
++                base_path / f"lib{name}.so",
++                base_path / f"lib{name}.dylib",
++            ]
++        if sys.platform == "ios":
++            # flet's iOS Python (3.13+) reports sys.platform == "ios".
++            return [
++                base_path / f"lib{name}.fwork",
++                base_path / f"lib{name}.dylib",
++                base_path / f"lib{name}.so",
++            ]
++        if sys.platform == "win32":
++            return [base_path / f"{name}.dll", base_path / f"lib{name}.dll"]
+         raise RuntimeError("Unsupported platform")
+ 
+     cdll_args = dict()  # type: ignore
+@@ -67,9 +70,14 @@ def load_shared_library(lib_base_name: str, base_path: pathlib.Path):
+             os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "bin"))
+             os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "lib"))
+         cdll_args["winmode"] = ctypes.RTLD_GLOBAL
++    else:
++        # mobile-forge: load globally so each library's dependencies (preloaded
++        # just below) satisfy its DT_NEEDED / @rpath entries by soname. The
++        # bundled libs carry no RUNPATH, so on iOS/Android the platform linker
++        # will not find sibling libs on its own.
++        cdll_args["mode"] = ctypes.RTLD_GLOBAL
+ 
+     if sys.platform == "emscripten":
+-        cdll_args["mode"] = ctypes.RTLD_GLOBAL
+         lib_dir = str(base_path)
+         ld_library_path = os.environ.get("LD_LIBRARY_PATH", "")
+         if lib_dir not in ld_library_path.split(os.pathsep):
+@@ -79,24 +87,26 @@ def load_shared_library(lib_base_name: str, base_path: pathlib.Path):
+                 else f"{lib_dir}{os.pathsep}{ld_library_path}"
+             )
+ 
+-        emscripten_dependencies = {
+-            "llama": ("ggml-base", "ggml-cpu", "ggml"),
+-            "mtmd": ("ggml-base", "ggml-cpu", "ggml", "llama"),
+-        }
+-        for dependency in emscripten_dependencies.get(lib_base_name, ()):
+-            dependency_path = (
+-                base_path / f"lib{dependency}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}"
+-            )
++    # Preload dependencies (in dependency order) so the main library resolves
++    # them from the already-loaded set: ggml-base before ggml-cpu / ggml, and
++    # all of them before llama / mtmd. Best-effort — the main load reports the
++    # real error if something is genuinely missing.
++    dependencies = {
++        "llama": ("ggml-base", "ggml-cpu", "ggml"),
++        "mtmd": ("ggml-base", "ggml-cpu", "ggml", "llama"),
++        "llava": ("ggml-base", "ggml-cpu", "ggml", "llama"),
++    }
++    for dependency in dependencies.get(lib_base_name, ()):
++        for dependency_path in _candidate_paths(dependency):
+             if dependency_path.exists():
+                 try:
+                     ctypes.CDLL(str(dependency_path), **cdll_args)  # type: ignore
+-                except Exception as e:
+-                    raise RuntimeError(
+-                        f"Failed to load shared library '{dependency_path}': {e}"
+-                    )
++                except Exception:
++                    pass
++                break
+ 
+     # Try to load the shared library, handling potential errors
+-    for lib_path in lib_paths:
++    for lib_path in _candidate_paths(lib_base_name):
+         if lib_path.exists():
+             try:
+                 return ctypes.CDLL(str(lib_path), **cdll_args)  # type: ignore
+diff --git a/vendor/llama.cpp/ggml/src/CMakeLists.txt b/vendor/llama.cpp/ggml/src/CMakeLists.txt
+index 89e5180..d4a6657 100644
+--- a/vendor/llama.cpp/ggml/src/CMakeLists.txt
++++ b/vendor/llama.cpp/ggml/src/CMakeLists.txt
+@@ -208,10 +208,13 @@ add_library(ggml-base
+             ggml-quants.h
+             gguf.cpp)
+ 
+-set_target_properties(ggml-base PROPERTIES
+-    VERSION ${GGML_VERSION}
+-    SOVERSION ${GGML_VERSION_MAJOR}
+-)
++# mobile-forge: drop SONAME versioning so the wheel ships a single
++# unversioned lib instead of a .dylib -> .0 -> .0.15.3 symlink triplet
++# (forge dereferences those into 3 copies / colliding iOS frameworks).
++# set_target_properties(ggml-base PROPERTIES
++#     VERSION ${GGML_VERSION}
++#     SOVERSION ${GGML_VERSION_MAJOR}
++# )
+ 
+ target_include_directories(ggml-base PRIVATE .)
+ if (GGML_BACKEND_DL)
+@@ -244,10 +247,10 @@ add_library(ggml
+             ggml-backend-reg.cpp)
+ add_library(ggml::ggml ALIAS ggml)
+ 
+-set_target_properties(ggml PROPERTIES
+-    VERSION ${GGML_VERSION}
+-    SOVERSION ${GGML_VERSION_MAJOR}
+-)
++# set_target_properties(ggml PROPERTIES
++#     VERSION ${GGML_VERSION}
++#     SOVERSION ${GGML_VERSION_MAJOR}
++# )
+ 
+ if (GGML_BACKEND_DIR)
+     if (NOT GGML_BACKEND_DL)
+@@ -291,10 +294,11 @@ function(ggml_add_backend_library backend)
+     # Set versioning properties for all backend libraries
+     # Building a MODULE library with a version is not supported on macOS (https://gitlab.kitware.com/cmake/cmake/-/issues/20782)
+     if (NOT (APPLE AND GGML_BACKEND_DL))
+-        set_target_properties(${backend} PROPERTIES
+-            VERSION ${GGML_VERSION}
+-            SOVERSION ${GGML_VERSION_MAJOR}
+-        )
++        # mobile-forge: drop SONAME versioning (single unversioned backend lib)
++        # set_target_properties(${backend} PROPERTIES
++        #     VERSION ${GGML_VERSION}
++        #     SOVERSION ${GGML_VERSION_MAJOR}
++        # )
+     endif()
+ 
+     if(NOT GGML_AVAILABLE_BACKENDS)
+diff --git a/vendor/llama.cpp/src/CMakeLists.txt b/vendor/llama.cpp/src/CMakeLists.txt
+index d15ccfd..84769d1 100644
+--- a/vendor/llama.cpp/src/CMakeLists.txt
++++ b/vendor/llama.cpp/src/CMakeLists.txt
+@@ -43,8 +43,9 @@ add_library(llama
+             )
+ 
+ set_target_properties(llama PROPERTIES
+-    VERSION ${LLAMA_INSTALL_VERSION}
+-    SOVERSION 0
++    # mobile-forge: drop SONAME versioning (single unversioned libllama)
++    # VERSION ${LLAMA_INSTALL_VERSION}
++    # SOVERSION 0
+     MACHO_CURRENT_VERSION 0 # keep macOS linker from seeing oversized version number
+ )
+ 
diff --git a/recipes/llama-cpp-python/tests/test_llama_cpp.py b/recipes/llama-cpp-python/tests/test_llama_cpp.py
new file mode 100644
index 0000000..b7efb40
--- /dev/null
+++ b/recipes/llama-cpp-python/tests/test_llama_cpp.py
@@ -0,0 +1,26 @@
+def test_import_version():
+    """Importing llama_cpp ctypes-loads the bundled libllama (+ libggml*) shared
+    libraries — the canary that the C++ engine cross-compiled, links its C++
+    runtime, and the loader finds the lib under its mobile name."""
+    import llama_cpp
+
+    assert llama_cpp.__version__
+
+
+def test_native_lib_callable():
+    """Call into the ctypes-loaded libllama without needing a GGUF model file.
+    Proves the native library actually loaded and its symbols are callable."""
+    import llama_cpp
+
+    # const char * llama_print_system_info(void) — reports the compiled CPU
+    # backend/feature set (e.g. NEON on arm64). Non-empty => the lib answered.
+    info = llama_cpp.llama_print_system_info()
+    assert isinstance(info, bytes) and len(info) > 0
+
+    # A couple of trivial capability queries through the C ABI.
+    assert isinstance(llama_cpp.llama_max_devices(), int)
+    assert isinstance(llama_cpp.llama_supports_mmap(), bool)
+
+    # Backend init/free round-trip exercises ggml setup teardown.
+    llama_cpp.llama_backend_init()
+    llama_cpp.llama_backend_free()

From 92f4c79b5589411ed654439e4656e4fb42dd0f63 Mon Sep 17 00:00:00 2001
From: TheEthicalBoy <98978078+ndonkoHenri@users.noreply.github.com>
Date: Wed, 1 Jul 2026 18:52:32 +0200
Subject: [PATCH 2/2] build number 1

---
 recipes/llama-cpp-python/meta.yaml            | 32 +++++++++----------
 ..._llama_cpp.py => test_llama_cpp_python.py} |  9 ------
 2 files changed, 16 insertions(+), 25 deletions(-)
 rename recipes/llama-cpp-python/tests/{test_llama_cpp.py => test_llama_cpp_python.py} (71%)

diff --git a/recipes/llama-cpp-python/meta.yaml b/recipes/llama-cpp-python/meta.yaml
index d5f5dc5..e9553a7 100644
--- a/recipes/llama-cpp-python/meta.yaml
+++ b/recipes/llama-cpp-python/meta.yaml
@@ -2,23 +2,8 @@ package:
   name: llama-cpp-python
   version: "0.3.32"
 
-requirements:
-  build:
-    # llama-cpp-python builds via scikit-build-core + CMake; with forge's
-    # `--no-isolation` these must be in the build venv. scikit-build-core comes
-    # from the package's own build-system.requires. Android keeps Ninja; iOS
-    # uses the Makefiles generator (see CMAKE_GENERATOR below).
-    - cmake
-    - ninja
-# {% if sdk == 'android' %}
-  host:
-    # llama.cpp/ggml is C++; the bundled libllama.so / libggml*.so link
-    # libc++_shared.so, which the device runtime doesn't provide unless bundled.
-    - flet-libcpp-shared >=27.2.12479018
-# {% endif %}
-
 build:
-  number: 0
+  number: 1
   script_env:
 # {% if sdk == 'android' %}
     CMAKE_ARGS: >-
@@ -70,6 +55,21 @@ build:
       -DGGML_RPC=OFF
 # {% endif %}
 
+requirements:
+  build:
+    # llama-cpp-python builds via scikit-build-core + CMake; with forge's
+    # `--no-isolation` these must be in the build venv. scikit-build-core comes
+    # from the package's own build-system.requires. Android keeps Ninja; iOS
+    # uses the Makefiles generator (see CMAKE_GENERATOR below).
+    - cmake
+    - ninja
+# {% if sdk == 'android' %}
+  host:
+    # llama.cpp/ggml is C++; the bundled libllama.so / libggml*.so link
+    # libc++_shared.so, which the device runtime doesn't provide unless bundled.
+    - flet-libcpp-shared >=27.2.12479018
+# {% endif %}
+
 patches:
   # mobile.patch does four things:
   #  1. Stops llama-cpp-python's CMakeLists from FORCE-enabling GGML_METAL on all
diff --git a/recipes/llama-cpp-python/tests/test_llama_cpp.py b/recipes/llama-cpp-python/tests/test_llama_cpp_python.py
similarity index 71%
rename from recipes/llama-cpp-python/tests/test_llama_cpp.py
rename to recipes/llama-cpp-python/tests/test_llama_cpp_python.py
index b7efb40..b678fa9 100644
--- a/recipes/llama-cpp-python/tests/test_llama_cpp.py
+++ b/recipes/llama-cpp-python/tests/test_llama_cpp_python.py
@@ -1,12 +1,3 @@
-def test_import_version():
-    """Importing llama_cpp ctypes-loads the bundled libllama (+ libggml*) shared
-    libraries — the canary that the C++ engine cross-compiled, links its C++
-    runtime, and the loader finds the lib under its mobile name."""
-    import llama_cpp
-
-    assert llama_cpp.__version__
-
-
 def test_native_lib_callable():
     """Call into the ctypes-loaded libllama without needing a GGUF model file.
     Proves the native library actually loaded and its symbols are callable."""