Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions recipes/llama-cpp-python/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package:
name: llama-cpp-python
version: "0.3.32"

build:
number: 1
script_env:
# {% if sdk == 'android' %}
CMAKE_ARGS: >-
-DCMAKE_TOOLCHAIN_FILE={NDK_ROOT}/build/cmake/android.toolchain.cmake
-DANDROID_ABI={ANDROID_ABI}
-DANDROID_NATIVE_API_LEVEL={ANDROID_API_LEVEL}
-DANDROID_STL=c++_shared
-DCMAKE_CROSSCOMPILING_EMULATOR=/usr/bin/env
-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=16384
-DCMAKE_MODULE_LINKER_FLAGS=-Wl,-z,max-page-size=16384
-DLLAVA_BUILD=OFF
-DCMAKE_INSTALL_LIBDIR=llama_cpp/lib
-DGGML_NATIVE=OFF
-DGGML_OPENMP=OFF
-DGGML_LLAMAFILE=OFF
-DGGML_BLAS=OFF
-DGGML_ACCELERATE=OFF
-DGGML_METAL=OFF
-DGGML_CUDA=OFF
-DGGML_HIP=OFF
-DGGML_VULKAN=OFF
-DGGML_OPENCL=OFF
-DGGML_RPC=OFF
# {% else %}
# The `ninja` pip package crashes importing on the iOS crossenv python
# (sysconfig.get_preferred_scheme('user') -> 'posix_user' invalid on iOS),
# so use the Makefiles generator on iOS; scikit-build-core then never imports
# ninja. (Android keeps Ninja, which works there.)
CMAKE_GENERATOR: Unix Makefiles
CMAKE_ARGS: >-
-DCMAKE_SYSTEM_NAME=iOS
-DCMAKE_OSX_SYSROOT={{ sdk }}
-DCMAKE_OSX_DEPLOYMENT_TARGET={{ sdk_version }}
-DCMAKE_OSX_ARCHITECTURES={{ arch }}
-DCMAKE_CROSSCOMPILING_EMULATOR=/usr/bin/env
-DLLAVA_BUILD=OFF
-DCMAKE_INSTALL_LIBDIR=llama_cpp/lib
-DGGML_NATIVE=OFF
-DGGML_OPENMP=OFF
-DGGML_LLAMAFILE=OFF
-DGGML_BLAS=OFF
-DGGML_ACCELERATE=OFF
-DGGML_METAL=OFF
-DGGML_METAL_EMBED_LIBRARY=OFF
-DGGML_CUDA=OFF
-DGGML_HIP=OFF
-DGGML_VULKAN=OFF
-DGGML_OPENCL=OFF
-DGGML_RPC=OFF
# {% endif %}

requirements:
build:
# llama-cpp-python builds via scikit-build-core + CMake; with forge's
# `--no-isolation` these must be in the build venv. scikit-build-core comes
# from the package's own build-system.requires. Android keeps Ninja; iOS
# uses the Makefiles generator (see CMAKE_GENERATOR below).
- cmake
- ninja
# {% if sdk == 'android' %}
host:
# llama.cpp/ggml is C++; the bundled libllama.so / libggml*.so link
# libc++_shared.so, which the device runtime doesn't provide unless bundled.
- flet-libcpp-shared >=27.2.12479018
# {% endif %}

patches:
# mobile.patch does four things:
# 1. Stops llama-cpp-python's CMakeLists from FORCE-enabling GGML_METAL on all
# Apple platforms (it would override our -DGGML_METAL=OFF for the iOS cross
# build, which has no Metal toolchain).
# 2. Skips building llama.cpp's unused `common` helper lib (~5 MB).
# 3. Drops SONAME versioning from the shipped libs (ggml-base/ggml/ggml-cpu/
# llama) so the wheel carries single unversioned files instead of a
# lib*.dylib -> .0 -> .0.15.3 symlink triplet that forge dereferences into
# three copies (and three colliding frameworks on iOS).
# 4. Teaches the ctypes loader to find the bundled lib under its iOS framework
# name (lib<name>.fwork) and on the "ios" platform.
- mobile.patch
218 changes: 218 additions & 0 deletions recipes/llama-cpp-python/patches/mobile.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5feaaca..db16837 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -67,8 +67,11 @@ if (LLAMA_BUILD)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
set(CMAKE_SKIP_RPATH FALSE)

- # Enable building of the common library
- set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build llama.cpp common library" FORCE)
+ # mobile-forge: do NOT build llama.cpp's `common` helper library. It is
+ # only used by the CLI tools/examples (all disabled here); the ctypes
+ # bindings call libllama directly. Dropping it removes a ~5 MB unused lib
+ # (and its duplicate install) from the wheel.
+ set(LLAMA_BUILD_COMMON OFF CACHE BOOL "Build llama.cpp common library" FORCE)

# Disable building curl support
set(LLAMA_CURL OFF CACHE BOOL "llama.cpp: enable curl" FORCE)
@@ -100,7 +103,11 @@ if (LLAMA_BUILD)
endif()

# Architecture detection and settings for Apple platforms
- if (APPLE)
+ # mobile-forge: skip this macOS-host block when cross-compiling to iOS.
+ # It FORCEs GGML_METAL=ON (overriding our -DGGML_METAL=OFF) and guesses
+ # the arch via `uname -m`. For iOS we build CPU-only and pass the arch
+ # explicitly via CMAKE_OSX_ARCHITECTURES.
+ if (APPLE AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
# Get the target architecture
execute_process(
COMMAND uname -m
diff --git a/llama_cpp/_ctypes_extensions.py b/llama_cpp/_ctypes_extensions.py
index 02cee8a..9274b5d 100644
--- a/llama_cpp/_ctypes_extensions.py
+++ b/llama_cpp/_ctypes_extensions.py
@@ -25,30 +25,33 @@ _EMSCRIPTEN_SIDE_MODULE_SUFFIX = ".cpython-00-wasm32-emscripten.so"
# Load the library
def load_shared_library(lib_base_name: str, base_path: pathlib.Path):
"""Platform independent shared library loader"""
- # Searching for the library in the current directory under the name "libllama" (default name
- # for llamacpp) and "llama" (default name for this repo)
- lib_paths: List[pathlib.Path] = []
- # Determine the file extension based on the platform
- if sys.platform == "emscripten":
- # Use a CPython-style tag that Pyodide skips during package auto-load.
- lib_paths += [
- base_path / f"lib{lib_base_name}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}",
- ]
- elif sys.platform.startswith("linux") or sys.platform.startswith("freebsd"):
- lib_paths += [
- base_path / f"lib{lib_base_name}.so",
- ]
- elif sys.platform == "darwin":
- lib_paths += [
- base_path / f"lib{lib_base_name}.so",
- base_path / f"lib{lib_base_name}.dylib",
- ]
- elif sys.platform == "win32":
- lib_paths += [
- base_path / f"{lib_base_name}.dll",
- base_path / f"lib{lib_base_name}.dll",
- ]
- else:
+
+ def _candidate_paths(name: str) -> List[pathlib.Path]:
+ # Ordered filename candidates for lib `name` inside base_path.
+ if sys.platform == "emscripten":
+ # A CPython-style tag that Pyodide skips during package auto-load.
+ return [base_path / f"lib{name}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}"]
+ if sys.platform.startswith("linux") or sys.platform.startswith("freebsd"):
+ return [base_path / f"lib{name}.so"]
+ if sys.platform == "darwin":
+ # mobile-forge: on iOS, flet/serious-python repackages each bundled
+ # .dylib into a code-signed framework and renames the in-wheel file
+ # to lib<name>.fwork (ctypes.CDLL can dlopen it directly). Try that
+ # first, then the plain desktop-macOS names.
+ return [
+ base_path / f"lib{name}.fwork",
+ base_path / f"lib{name}.so",
+ base_path / f"lib{name}.dylib",
+ ]
+ if sys.platform == "ios":
+ # flet's iOS Python (3.13+) reports sys.platform == "ios".
+ return [
+ base_path / f"lib{name}.fwork",
+ base_path / f"lib{name}.dylib",
+ base_path / f"lib{name}.so",
+ ]
+ if sys.platform == "win32":
+ return [base_path / f"{name}.dll", base_path / f"lib{name}.dll"]
raise RuntimeError("Unsupported platform")

cdll_args = dict() # type: ignore
@@ -67,9 +70,14 @@ def load_shared_library(lib_base_name: str, base_path: pathlib.Path):
os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "bin"))
os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "lib"))
cdll_args["winmode"] = ctypes.RTLD_GLOBAL
+ else:
+ # mobile-forge: load globally so each library's dependencies (preloaded
+ # just below) satisfy its DT_NEEDED / @rpath entries by soname. The
+ # bundled libs carry no RUNPATH, so on iOS/Android the platform linker
+ # will not find sibling libs on its own.
+ cdll_args["mode"] = ctypes.RTLD_GLOBAL

if sys.platform == "emscripten":
- cdll_args["mode"] = ctypes.RTLD_GLOBAL
lib_dir = str(base_path)
ld_library_path = os.environ.get("LD_LIBRARY_PATH", "")
if lib_dir not in ld_library_path.split(os.pathsep):
@@ -79,24 +87,26 @@ def load_shared_library(lib_base_name: str, base_path: pathlib.Path):
else f"{lib_dir}{os.pathsep}{ld_library_path}"
)

- emscripten_dependencies = {
- "llama": ("ggml-base", "ggml-cpu", "ggml"),
- "mtmd": ("ggml-base", "ggml-cpu", "ggml", "llama"),
- }
- for dependency in emscripten_dependencies.get(lib_base_name, ()):
- dependency_path = (
- base_path / f"lib{dependency}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}"
- )
+ # Preload dependencies (in dependency order) so the main library resolves
+ # them from the already-loaded set: ggml-base before ggml-cpu / ggml, and
+ # all of them before llama / mtmd. Best-effort — the main load reports the
+ # real error if something is genuinely missing.
+ dependencies = {
+ "llama": ("ggml-base", "ggml-cpu", "ggml"),
+ "mtmd": ("ggml-base", "ggml-cpu", "ggml", "llama"),
+ "llava": ("ggml-base", "ggml-cpu", "ggml", "llama"),
+ }
+ for dependency in dependencies.get(lib_base_name, ()):
+ for dependency_path in _candidate_paths(dependency):
if dependency_path.exists():
try:
ctypes.CDLL(str(dependency_path), **cdll_args) # type: ignore
- except Exception as e:
- raise RuntimeError(
- f"Failed to load shared library '{dependency_path}': {e}"
- )
+ except Exception:
+ pass
+ break

# Try to load the shared library, handling potential errors
- for lib_path in lib_paths:
+ for lib_path in _candidate_paths(lib_base_name):
if lib_path.exists():
try:
return ctypes.CDLL(str(lib_path), **cdll_args) # type: ignore
diff --git a/vendor/llama.cpp/ggml/src/CMakeLists.txt b/vendor/llama.cpp/ggml/src/CMakeLists.txt
index 89e5180..d4a6657 100644
--- a/vendor/llama.cpp/ggml/src/CMakeLists.txt
+++ b/vendor/llama.cpp/ggml/src/CMakeLists.txt
@@ -208,10 +208,13 @@ add_library(ggml-base
ggml-quants.h
gguf.cpp)

-set_target_properties(ggml-base PROPERTIES
- VERSION ${GGML_VERSION}
- SOVERSION ${GGML_VERSION_MAJOR}
-)
+# mobile-forge: drop SONAME versioning so the wheel ships a single
+# unversioned lib instead of a .dylib -> .0 -> .0.15.3 symlink triplet
+# (forge dereferences those into 3 copies / colliding iOS frameworks).
+# set_target_properties(ggml-base PROPERTIES
+# VERSION ${GGML_VERSION}
+# SOVERSION ${GGML_VERSION_MAJOR}
+# )

target_include_directories(ggml-base PRIVATE .)
if (GGML_BACKEND_DL)
@@ -244,10 +247,10 @@ add_library(ggml
ggml-backend-reg.cpp)
add_library(ggml::ggml ALIAS ggml)

-set_target_properties(ggml PROPERTIES
- VERSION ${GGML_VERSION}
- SOVERSION ${GGML_VERSION_MAJOR}
-)
+# set_target_properties(ggml PROPERTIES
+# VERSION ${GGML_VERSION}
+# SOVERSION ${GGML_VERSION_MAJOR}
+# )

if (GGML_BACKEND_DIR)
if (NOT GGML_BACKEND_DL)
@@ -291,10 +294,11 @@ function(ggml_add_backend_library backend)
# Set versioning properties for all backend libraries
# Building a MODULE library with a version is not supported on macOS (https://gitlab.kitware.com/cmake/cmake/-/issues/20782)
if (NOT (APPLE AND GGML_BACKEND_DL))
- set_target_properties(${backend} PROPERTIES
- VERSION ${GGML_VERSION}
- SOVERSION ${GGML_VERSION_MAJOR}
- )
+ # mobile-forge: drop SONAME versioning (single unversioned backend lib)
+ # set_target_properties(${backend} PROPERTIES
+ # VERSION ${GGML_VERSION}
+ # SOVERSION ${GGML_VERSION_MAJOR}
+ # )
endif()

if(NOT GGML_AVAILABLE_BACKENDS)
diff --git a/vendor/llama.cpp/src/CMakeLists.txt b/vendor/llama.cpp/src/CMakeLists.txt
index d15ccfd..84769d1 100644
--- a/vendor/llama.cpp/src/CMakeLists.txt
+++ b/vendor/llama.cpp/src/CMakeLists.txt
@@ -43,8 +43,9 @@ add_library(llama
)

set_target_properties(llama PROPERTIES
- VERSION ${LLAMA_INSTALL_VERSION}
- SOVERSION 0
+ # mobile-forge: drop SONAME versioning (single unversioned libllama)
+ # VERSION ${LLAMA_INSTALL_VERSION}
+ # SOVERSION 0
MACHO_CURRENT_VERSION 0 # keep macOS linker from seeing oversized version number
)

17 changes: 17 additions & 0 deletions recipes/llama-cpp-python/tests/test_llama_cpp_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
def test_native_lib_callable():
"""Call into the ctypes-loaded libllama without needing a GGUF model file.
Proves the native library actually loaded and its symbols are callable."""
import llama_cpp

# const char * llama_print_system_info(void) — reports the compiled CPU
# backend/feature set (e.g. NEON on arm64). Non-empty => the lib answered.
info = llama_cpp.llama_print_system_info()
assert isinstance(info, bytes) and len(info) > 0

# A couple of trivial capability queries through the C ABI.
assert isinstance(llama_cpp.llama_max_devices(), int)
assert isinstance(llama_cpp.llama_supports_mmap(), bool)

# Backend init/free round-trip exercises ggml setup teardown.
llama_cpp.llama_backend_init()
llama_cpp.llama_backend_free()
Loading