Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions bindings/python/quantcpp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,36 @@ def _download_with_progress(url: str, dest: Path, desc: str) -> None:
tmp.rename(dest)


_MODEL_ALIASES = {
"smollm2": "SmolLM2-1.7B",
"smollm2:1.7b": "SmolLM2-1.7B",
"smollm2:135m": "SmolLM2-135M",
"qwen3.5": "Qwen3.5-0.8B",
"qwen3.5:0.8b": "Qwen3.5-0.8B",
"llama3.2": "Llama-3.2-1B",
"llama3.2:1b": "Llama-3.2-1B",
"phi3.5": "Phi-3.5-mini",
"phi3.5:mini": "Phi-3.5-mini",
"phi-3.5": "Phi-3.5-mini",
"phi-3.5-mini": "Phi-3.5-mini",
}


def _resolve_model_name(name: str) -> str:
"""Resolve alias or case-insensitive name to canonical registry key."""
if name in _MODEL_REGISTRY:
return name
return _MODEL_ALIASES.get(name.lower(), name)


def download(name: str) -> str:
"""Download a model from HuggingFace Hub and return its local path.

Parameters
----------
name : str
Model name from the registry. Currently available:
``"SmolLM2-135M"`` (~135 MB, good for testing).
Model name or alias. Examples: ``"Phi-3.5-mini"``, ``"phi3.5:mini"``,
``"smollm2"``, ``"llama3.2:1b"``.

Returns
-------
Expand All @@ -171,9 +193,10 @@ def download(name: str) -> str:

Examples
--------
>>> path = quantcpp.download("SmolLM2-135M")
>>> path = quantcpp.download("phi3.5:mini")
>>> m = quantcpp.Model(path)
"""
name = _resolve_model_name(name)
if name not in _MODEL_REGISTRY:
avail = ", ".join(sorted(_MODEL_REGISTRY))
raise ValueError(
Expand Down
9 changes: 9 additions & 0 deletions bindings/python/quantcpp/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,15 @@ def cmd_serve(args):
print(" Or install via your package manager.", file=sys.stderr)
return 2

# Check if port is available before launching server
import socket
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
if sock.connect_ex(("127.0.0.1", args.port)) == 0:
print(f"error: port {args.port} is already in use.", file=sys.stderr)
print(f" Try a different port: quantcpp serve {args.model} --port {args.port + 1}",
file=sys.stderr)
return 1

cmd = [binary, model_path, "-p", str(args.port), "-j", str(args.threads)]
print(f"quantcpp serve {os.path.basename(model_path)} on :{args.port}", file=sys.stderr)
print("", file=sys.stderr)
Expand Down
Loading