From 21a4bc2074f483ad0f12e584a01e7ac52ceaddd3 Mon Sep 17 00:00:00 2001 From: Jonathan Buchanan Date: Mon, 16 Mar 2026 17:54:46 -0700 Subject: [PATCH 1/5] add: notebook smoke tests --- .github/workflows/check-notebooks.yaml | 43 + .gitignore | 5 + notebooks/LFM2_Inference_with_Ollama.ipynb | 12 +- .../LFM2_Inference_with_Transformers.ipynb | 75 +- notebooks/LFM2_Inference_with_llama_cpp.ipynb | 56 +- notebooks/LFM2_Inference_with_vLLM.ipynb | 71 +- notebooks/grpo_for_verifiable_tasks.ipynb | 149 +- notebooks/quickstart_snippets.ipynb | 113 +- ...360\237\222\247_LFM2_5_SFT_with_TRL.ipynb" | 223 +- ...\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" | 2121 +++++++++-------- .../\360\237\222\247_LFM2_DPO_with_TRL.ipynb" | 403 ++-- util/modal_runner.py | 62 + util/run_notebook_test.py | 271 +++ 13 files changed, 2216 insertions(+), 1388 deletions(-) create mode 100644 util/modal_runner.py create mode 100644 util/run_notebook_test.py diff --git a/.github/workflows/check-notebooks.yaml b/.github/workflows/check-notebooks.yaml index 6641df6..bff3725 100644 --- a/.github/workflows/check-notebooks.yaml +++ b/.github/workflows/check-notebooks.yaml @@ -68,3 +68,46 @@ jobs: echo "" echo "βœ… All required checks passed!" echo "πŸ“Š Validated $(find . -name '*.ipynb' | wc -l) notebooks" + + discover-notebooks: + runs-on: ubuntu-latest + outputs: + notebooks: ${{ steps.list.outputs.notebooks }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: List notebooks + id: list + run: | + notebooks=$(ls notebooks/*.ipynb | xargs -I{} basename {} | jq -R -s -c 'split("\n") | map(select(. != ""))') + echo "notebooks=$notebooks" >> "$GITHUB_OUTPUT" + echo "Found notebooks: $notebooks" + + run-notebooks: + needs: discover-notebooks + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + notebook: ${{ fromJSON(needs.discover-notebooks.outputs.notebooks) }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Install modal + run: uv pip install --system modal + + - name: Set up Modal token + run: modal token set --token-id ${{ secrets.MODAL_TOKEN_ID }} --token-secret ${{ secrets.MODAL_TOKEN_SECRET }} + + - name: Run notebook on Modal + run: python util/run_notebook_test.py --notebook "notebooks/${{ matrix.notebook }}" --skip-packages flash-attn \ No newline at end of file diff --git a/.gitignore b/.gitignore index 95a32ba..42c6d12 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,8 @@ Thumbs.db *~ .vscode/ .onnx-tests/ + +env +env/* + +__pycache__/ \ No newline at end of file diff --git a/notebooks/LFM2_Inference_with_Ollama.ipynb b/notebooks/LFM2_Inference_with_Ollama.ipynb index d626b15..eef7913 100644 --- a/notebooks/LFM2_Inference_with_Ollama.ipynb +++ b/notebooks/LFM2_Inference_with_Ollama.ipynb @@ -3,7 +3,13 @@ { "cell_type": "markdown", "metadata": {}, - "source": "# πŸ’§ LFM2 Inference with Ollama\n\nThis notebook demonstrates how to use the [Ollama](https://ollama.com) API to run [LFM2](https://huggingface.co/collections/LiquidAI/lfm2-67d775f3b4b6fe79fbb21bda) and [LFM2.5](https://huggingface.co/collections/LiquidAI/lfm25-6839e3e26b2a9fdbde95b341) models.\n\n> ⚠️ **Note:** Ollama is intended to run locally on your machine. This notebook shows the Python and curl API usage to get Ollama running in Colab. Install Ollama from [ollama.com/download](https://ollama.com/download) and follow the [Liquid Docs](https://docs.liquid.ai/docs/inference/ollama) to get started. Also, right now LFM VL models are currently not working with ollama, we have an [open PR](https://github.com/ollama/ollama/pull/14069) to resolve this quickly." + "source": [ + "# πŸ’§ LFM2 Inference with Ollama\n", + "\n", + "This notebook demonstrates how to use the [Ollama](https://ollama.com) API to run [LFM2](https://huggingface.co/collections/LiquidAI/lfm2-67d775f3b4b6fe79fbb21bda) and [LFM2.5](https://huggingface.co/collections/LiquidAI/lfm25-6839e3e26b2a9fdbde95b341) models.\n", + "\n", + "> ⚠️ **Note:** Ollama is intended to run locally on your machine. This notebook shows the Python and curl API usage to get Ollama running in Colab. Install Ollama from [ollama.com/download](https://ollama.com/download) and follow the [Liquid Docs](https://docs.liquid.ai/docs/inference/ollama) to get started. Also, right now LFM VL models are currently not working with ollama, we have an [open PR](https://github.com/ollama/ollama/pull/14069) to resolve this quickly." + ] }, { "cell_type": "markdown", @@ -19,6 +25,7 @@ "outputs": [], "source": [ "# Colab specific settings\n", + "# !modal_skip_rest\n", "!sudo apt install zstd\n", "!sudo apt update\n", "!sudo apt install -y pciutils" @@ -170,6 +177,7 @@ "outputs": [], "source": [ "# Chat API\n", + "# !modal_skip_rest\n", "%%bash\n", "curl -s http://localhost:11434/api/chat -d '{\n", " \"model\": \"hf.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF\",\n", @@ -219,4 +227,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/notebooks/LFM2_Inference_with_Transformers.ipynb b/notebooks/LFM2_Inference_with_Transformers.ipynb index 636ba48..ab42bc0 100644 --- a/notebooks/LFM2_Inference_with_Transformers.ipynb +++ b/notebooks/LFM2_Inference_with_Transformers.ipynb @@ -26,7 +26,7 @@ "metadata": {}, "outputs": [], "source": [ - "!uv pip install \"transformers>=5.0.0\" \"torch==2.9.0\" accelerate" + "!uv pip install \"transformers>=5.0.0\" \"torch==2.9.0\" accelerate, torchvision" ] }, { @@ -86,7 +86,30 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "from transformers import GenerationConfig\n\ngeneration_config = GenerationConfig(\n do_sample=True,\n temperature=0.1,\n top_k=50,\n repetition_penalty=1.05,\n max_new_tokens=512,\n)\n\nprompt = \"Explain quantum computing in simple terms.\"\ninputs = tokenizer.apply_chat_template(\n [{\"role\": \"user\", \"content\": prompt}],\n add_generation_prompt=True,\n return_tensors=\"pt\",\n return_dict=True,\n).to(model.device)\n\noutput = model.generate(**inputs, generation_config=generation_config)\ninput_length = inputs[\"input_ids\"].shape[1]\nresponse = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\nprint(response)" + "source": [ + "from transformers import GenerationConfig\n", + "\n", + "generation_config = GenerationConfig(\n", + " do_sample=True,\n", + " temperature=0.1,\n", + " top_k=50,\n", + " repetition_penalty=1.05,\n", + " max_new_tokens=512,\n", + ")\n", + "\n", + "prompt = \"Explain quantum computing in simple terms.\"\n", + "inputs = tokenizer.apply_chat_template(\n", + " [{\"role\": \"user\", \"content\": prompt}],\n", + " add_generation_prompt=True,\n", + " return_tensors=\"pt\",\n", + " return_dict=True,\n", + ").to(model.device)\n", + "\n", + "output = model.generate(**inputs, generation_config=generation_config)\n", + "input_length = inputs[\"input_ids\"].shape[1]\n", + "response = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\n", + "print(response)" + ] }, { "cell_type": "markdown", @@ -131,7 +154,51 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "from transformers import AutoProcessor, AutoModelForImageTextToText\nfrom transformers.image_utils import load_image\n\n# Load vision model and processor\nmodel_id = \"LiquidAI/LFM2.5-VL-1.6B\"\nvision_model = AutoModelForImageTextToText.from_pretrained(\n model_id,\n device_map=\"auto\",\n dtype=\"bfloat16\"\n)\n\n# IMPORTANT: tie lm_head to input embeddings (transformers v5 bug)\nvision_model.lm_head.weight = vision_model.get_input_embeddings().weight\n\nprocessor = AutoProcessor.from_pretrained(model_id)\n\n# Load image\nurl = \"https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg\"\nimage = load_image(url)\n\n# Create conversation\nconversation = [\n {\n \"role\": \"user\",\n \"content\": [\n {\"type\": \"image\", \"image\": image},\n {\"type\": \"text\", \"text\": \"What is in this image?\"},\n ],\n },\n]\n\n# Generate response\ninputs = processor.apply_chat_template(\n conversation,\n add_generation_prompt=True,\n return_tensors=\"pt\",\n return_dict=True,\n tokenize=True,\n).to(vision_model.device)\n\noutputs = vision_model.generate(**inputs, do_sample=True, temperature=0.1, min_p=0.15, repetition_penalty=1.05, max_new_tokens=64)\nresponse = processor.batch_decode(outputs, skip_special_tokens=True)[0]\nprint(response)" + "source": [ + "from transformers import AutoProcessor, AutoModelForImageTextToText\n", + "from transformers.image_utils import load_image\n", + "\n", + "# Load vision model and processor\n", + "model_id = \"LiquidAI/LFM2.5-VL-1.6B\"\n", + "vision_model = AutoModelForImageTextToText.from_pretrained(\n", + " model_id,\n", + " device_map=\"auto\",\n", + " dtype=\"bfloat16\"\n", + ")\n", + "\n", + "# IMPORTANT: tie lm_head to input embeddings (transformers v5 bug)\n", + "vision_model.lm_head.weight = vision_model.get_input_embeddings().weight\n", + "\n", + "processor = AutoProcessor.from_pretrained(model_id)\n", + "\n", + "# Load image\n", + "url = \"https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg\"\n", + "image = load_image(url)\n", + "\n", + "# Create conversation\n", + "conversation = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"type\": \"image\", \"image\": image},\n", + " {\"type\": \"text\", \"text\": \"What is in this image?\"},\n", + " ],\n", + " },\n", + "]\n", + "\n", + "# Generate response\n", + "inputs = processor.apply_chat_template(\n", + " conversation,\n", + " add_generation_prompt=True,\n", + " return_tensors=\"pt\",\n", + " return_dict=True,\n", + " tokenize=True,\n", + ").to(vision_model.device)\n", + "\n", + "outputs = vision_model.generate(**inputs, do_sample=True, temperature=0.1, min_p=0.15, repetition_penalty=1.05, max_new_tokens=64)\n", + "response = processor.batch_decode(outputs, skip_special_tokens=True)[0]\n", + "print(response)" + ] }, { "cell_type": "markdown", @@ -161,4 +228,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/notebooks/LFM2_Inference_with_llama_cpp.ipynb b/notebooks/LFM2_Inference_with_llama_cpp.ipynb index 1571a3d..4e4ce47 100644 --- a/notebooks/LFM2_Inference_with_llama_cpp.ipynb +++ b/notebooks/LFM2_Inference_with_llama_cpp.ipynb @@ -44,7 +44,14 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "!llama-b7633/llama-cli \\\n -hf LiquidAI/LFM2.5-1.2B-Instruct-GGUF:Q4_K_M \\\n -p \"What is C. elegans?\" \\\n -n 256 \\\n --temp 0.1 --top-k 50 --top-p 0.1 --repeat-penalty 1.05" + "source": [ + "# !modal_skip\n", + "!llama-b7633/llama-cli \\\n", + " -hf LiquidAI/LFM2.5-1.2B-Instruct-GGUF:Q4_K_M \\\n", + " -p \"What is C. elegans?\" \\\n", + " -n 256 \\\n", + " --temp 0.1 --top-k 50 --top-p 0.1 --repeat-penalty 1.05" + ] }, { "cell_type": "markdown", @@ -99,7 +106,7 @@ "metadata": {}, "outputs": [], "source": [ - "!uv pip install -qqq openai" + "!uv pip install -qqq openai requests" ] }, { @@ -107,7 +114,26 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "from openai import OpenAI\n\nclient = OpenAI(\n base_url=\"http://localhost:8000/v1\",\n api_key=\"not-needed\"\n)\n\nresponse = client.chat.completions.create(\n model=\"lfm2.5-1.2b-instruct\",\n messages=[\n {\"role\": \"user\", \"content\": \"What is machine learning?\"}\n ],\n temperature=0.1,\n top_p=0.1,\n max_tokens=512,\n extra_body={\"top_k\": 50, \"repetition_penalty\": 1.05},\n)\nprint(response.choices[0].message.content)" + "source": [ + "from openai import OpenAI\n", + "\n", + "client = OpenAI(\n", + " base_url=\"http://localhost:8000/v1\",\n", + " api_key=\"not-needed\"\n", + ")\n", + "\n", + "response = client.chat.completions.create(\n", + " model=\"lfm2.5-1.2b-instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"What is machine learning?\"}\n", + " ],\n", + " temperature=0.1,\n", + " top_p=0.1,\n", + " max_tokens=512,\n", + " extra_body={\"top_k\": 50, \"repetition_penalty\": 1.05},\n", + ")\n", + "print(response.choices[0].message.content)" + ] }, { "cell_type": "code", @@ -148,7 +174,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "!llama-b7633/llama-cli \\\n -hf LiquidAI/LFM2.5-VL-1.6B-GGUF:Q4_0 \\\n --image test_image.jpg \\\n --image-max-tokens 64 \\\n -p \"What's in this image?\" \\\n -n 128 \\\n --temp 0.1 --min-p 0.15 --repeat-penalty 1.05" + "source": "# !modal_skip\n!llama-b7633/llama-cli \\\n -hf LiquidAI/LFM2.5-VL-1.6B-GGUF:Q4_0 \\\n --image test_image.jpg \\\n --image-max-tokens 64 \\\n -p \"What's in this image?\" \\\n -n 128 \\\n --temp 0.1 --min-p 0.15 --repeat-penalty 1.05" }, { "cell_type": "markdown", @@ -202,7 +228,27 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "client = OpenAI(\n base_url=\"http://localhost:8000/v1\",\n api_key=\"not-needed\"\n)\n\nresponse = client.chat.completions.create(\n model=\"lfm2.5-vl-1.6b\",\n messages=[{\n \"role\": \"user\",\n \"content\": [\n {\"type\": \"image_url\", \"image_url\": {\"url\": image_url}},\n {\"type\": \"text\", \"text\": \"What's in this image?\"}\n ]\n }],\n temperature=0.1,\n max_tokens=512,\n extra_body={\"min_p\": 0.15, \"repetition_penalty\": 1.05},\n)\nprint(response.choices[0].message.content)" + "source": [ + "client = OpenAI(\n", + " base_url=\"http://localhost:8000/v1\",\n", + " api_key=\"not-needed\"\n", + ")\n", + "\n", + "response = client.chat.completions.create(\n", + " model=\"lfm2.5-vl-1.6b\",\n", + " messages=[{\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"type\": \"image_url\", \"image_url\": {\"url\": image_url}},\n", + " {\"type\": \"text\", \"text\": \"What's in this image?\"}\n", + " ]\n", + " }],\n", + " temperature=0.1,\n", + " max_tokens=512,\n", + " extra_body={\"min_p\": 0.15, \"repetition_penalty\": 1.05},\n", + ")\n", + "print(response.choices[0].message.content)" + ] }, { "cell_type": "code", diff --git a/notebooks/LFM2_Inference_with_vLLM.ipynb b/notebooks/LFM2_Inference_with_vLLM.ipynb index ac72203..d5656b6 100644 --- a/notebooks/LFM2_Inference_with_vLLM.ipynb +++ b/notebooks/LFM2_Inference_with_vLLM.ipynb @@ -26,7 +26,7 @@ "metadata": {}, "outputs": [], "source": [ - "!uv pip install -qqq vllm==0.14" + "!uv pip install -qqq vllm==0.14 \"typing_extensions>=4.14.0\" \"pydantic>=2.12.0\"" ] }, { @@ -35,6 +35,7 @@ "metadata": {}, "outputs": [], "source": [ + "# !modal_skip\n", "# These are only needed for Colab\n", "import contextlib\n", "import vllm.utils.system_utils as system_utils\n", @@ -58,7 +59,25 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "from vllm import LLM, SamplingParams\n\n# Initialize the model\nllm = LLM(model=\"LiquidAI/LFM2.5-1.2B-Instruct\")\n\n# Define sampling parameters\nsampling_params = SamplingParams(\n temperature=0.1,\n top_k=50,\n repetition_penalty=1.05,\n max_tokens=512\n)\n\n# Generate answer\nmessages = [{\"role\": \"user\", \"content\": \"What is C. elegans?\"}]\noutput = llm.chat(messages, sampling_params)\nprint(output[0].outputs[0].text)" + "source": [ + "from vllm import LLM, SamplingParams\n", + "\n", + "# Initialize the model\n", + "llm = LLM(model=\"LiquidAI/LFM2.5-1.2B-Instruct\")\n", + "\n", + "# Define sampling parameters\n", + "sampling_params = SamplingParams(\n", + " temperature=0.1,\n", + " top_k=50,\n", + " repetition_penalty=1.05,\n", + " max_tokens=512\n", + ")\n", + "\n", + "# Generate answer\n", + "messages = [{\"role\": \"user\", \"content\": \"What is C. elegans?\"}]\n", + "output = llm.chat(messages, sampling_params)\n", + "print(output[0].outputs[0].text)" + ] }, { "cell_type": "markdown", @@ -106,6 +125,7 @@ "outputs": [], "source": [ "# Install vLLM with vision model support (Note: you may need to restart your runtime before installing a new version of vllm)\n", + "# !modal_skip_rest\n", "!VLLM_PRECOMPILED_WHEEL_COMMIT=72506c98349d6bcd32b4e33eec7b5513453c1502 VLLM_USE_PRECOMPILED=1 \n", "!uv pip install -qqq git+https://github.com/vllm-project/vllm.git\n", "!uv pip install -qqq \"transformers>=5.0.0\" pillow" @@ -131,7 +151,50 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "from vllm import LLM, SamplingParams\nfrom typing import List, Dict, Any\n\ndef build_messages(parts):\n content = []\n for item in parts:\n if item[\"type\"] == \"text\":\n content.append({\"type\": \"text\", \"text\": item[\"value\"]})\n elif item[\"type\"] == \"image\":\n content.append({\"type\": \"image_url\", \"image_url\": {\"url\": item[\"value\"]}})\n return [{\"role\": \"user\", \"content\": content}]\n\nIMAGE_URL = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n\nllm = LLM(\n model=\"LiquidAI/LFM2.5-VL-1.6B\",\n max_model_len=1024,\n)\n\nsampling_params = SamplingParams(\n temperature=0.1,\n min_p=0.15,\n repetition_penalty=1.05,\n max_tokens=1024,\n)\n\n# Batch multiple prompts - text-only and multimodal\nprompts: List[List[Dict[str, Any]]] = [ # type: ignore[no-redef]\n [{\"type\": \"text\", \"value\": \"What is C. elegans?\"}],\n [{\"type\": \"text\", \"value\": \"Say hi in JSON format\"}],\n [\n {\"type\": \"image\", \"value\": IMAGE_URL},\n {\"type\": \"text\", \"value\": \"Describe what you see in this image.\"},\n ],\n]\n\nconversations = [build_messages(p) for p in prompts]\noutputs = llm.chat(conversations, sampling_params)\n\nfor output in outputs:\n print(output.outputs[0].text)\n print(\"---\")" + "source": [ + "from vllm import LLM, SamplingParams\n", + "from typing import List, Dict, Any\n", + "\n", + "def build_messages(parts):\n", + " content = []\n", + " for item in parts:\n", + " if item[\"type\"] == \"text\":\n", + " content.append({\"type\": \"text\", \"text\": item[\"value\"]})\n", + " elif item[\"type\"] == \"image\":\n", + " content.append({\"type\": \"image_url\", \"image_url\": {\"url\": item[\"value\"]}})\n", + " return [{\"role\": \"user\", \"content\": content}]\n", + "\n", + "IMAGE_URL = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "\n", + "llm = LLM(\n", + " model=\"LiquidAI/LFM2.5-VL-1.6B\",\n", + " max_model_len=1024,\n", + ")\n", + "\n", + "sampling_params = SamplingParams(\n", + " temperature=0.1,\n", + " min_p=0.15,\n", + " repetition_penalty=1.05,\n", + " max_tokens=1024,\n", + ")\n", + "\n", + "# Batch multiple prompts - text-only and multimodal\n", + "prompts: List[List[Dict[str, Any]]] = [ # type: ignore[no-redef]\n", + " [{\"type\": \"text\", \"value\": \"What is C. elegans?\"}],\n", + " [{\"type\": \"text\", \"value\": \"Say hi in JSON format\"}],\n", + " [\n", + " {\"type\": \"image\", \"value\": IMAGE_URL},\n", + " {\"type\": \"text\", \"value\": \"Describe what you see in this image.\"},\n", + " ],\n", + "]\n", + "\n", + "conversations = [build_messages(p) for p in prompts]\n", + "outputs = llm.chat(conversations, sampling_params)\n", + "\n", + "for output in outputs:\n", + " print(output.outputs[0].text)\n", + " print(\"---\")" + ] }, { "cell_type": "markdown", @@ -161,4 +224,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/notebooks/grpo_for_verifiable_tasks.ipynb b/notebooks/grpo_for_verifiable_tasks.ipynb index 155021c..3d68b58 100644 --- a/notebooks/grpo_for_verifiable_tasks.ipynb +++ b/notebooks/grpo_for_verifiable_tasks.ipynb @@ -5,14 +5,90 @@ "metadata": { "id": "w2TnJ6ta-2zj" }, - "source": "# πŸ’§ LFM2.5 - GRPO for Verifiable Tasks\n\nThis tutorial demonstrates how to fine-tune [`LiquidAI/LFM2.5-1.2B-Instruct`](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct) using Group Relative Policy Optimization (GRPO).\n\nFine-tuning requires a GPU. If you don't have one locally, you can run this notebook for free on [Google Colab](https://colab.research.google.com/github/Liquid4All/cookbook/blob/main/finetuning/notebooks/grpo_for_verifiable_tasks.ipynb) using a free NVIDIA T4 GPU instance.\n\n[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Liquid4All/cookbook/blob/main/finetuning/notebooks/grpo_for_verifiable_tasks.ipynb)\n\n## 🎯 What You'll Find:\n- **GRPO** (Group Relative Policy Optimization) - RL-based fine-tuning for verifiable tasks\n- **LoRA + GRPO** - Parameter-efficient GRPO training\n\nGRPO is widely used for easily verifiable tasks like:\n- Mathematical problem solving with numeric verification\n- Code generation with unit test validation\n- Structured output tasks (JSON, SQL) with schema validation\n- Question answering with ground truth answers\n\n## πŸ“‹ Prerequisites:\n- **GPU Runtime**: Select GPU in `Runtime` β†’ `Change runtime type`\n- **Hugging Face Account**: For accessing models and datasets\n\n## πŸš€ Deployment Options\n\nLFM2.5 models are small and efficient, enabling deployment across a wide range of platforms:\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Deployment TargetUse Case
πŸ“± AndroidMobile apps on Android devices
πŸ“± iOSMobile apps on iPhone/iPad
🍎 Apple Silicon MacLocal inference on Mac with MLX
πŸ¦™ llama.cppLocal deployments on any hardware
πŸ¦™ OllamaLocal inference with easy setup
πŸ–₯️ LM StudioDesktop app for local inference
⚑ vLLMCloud deployments with high throughput
☁️ ModalServerless cloud deployment
πŸ—οΈ BasetenProduction ML infrastructure
πŸš€ FalFast inference API
" + "source": [ + "# πŸ’§ LFM2.5 - GRPO for Verifiable Tasks\n", + "\n", + "This tutorial demonstrates how to fine-tune [`LiquidAI/LFM2.5-1.2B-Instruct`](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct) using Group Relative Policy Optimization (GRPO).\n", + "\n", + "Fine-tuning requires a GPU. If you don't have one locally, you can run this notebook for free on [Google Colab](https://colab.research.google.com/github/Liquid4All/cookbook/blob/main/finetuning/notebooks/grpo_for_verifiable_tasks.ipynb) using a free NVIDIA T4 GPU instance.\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Liquid4All/cookbook/blob/main/finetuning/notebooks/grpo_for_verifiable_tasks.ipynb)\n", + "\n", + "## 🎯 What You'll Find:\n", + "- **GRPO** (Group Relative Policy Optimization) - RL-based fine-tuning for verifiable tasks\n", + "- **LoRA + GRPO** - Parameter-efficient GRPO training\n", + "\n", + "GRPO is widely used for easily verifiable tasks like:\n", + "- Mathematical problem solving with numeric verification\n", + "- Code generation with unit test validation\n", + "- Structured output tasks (JSON, SQL) with schema validation\n", + "- Question answering with ground truth answers\n", + "\n", + "## πŸ“‹ Prerequisites:\n", + "- **GPU Runtime**: Select GPU in `Runtime` β†’ `Change runtime type`\n", + "- **Hugging Face Account**: For accessing models and datasets\n", + "\n", + "## πŸš€ Deployment Options\n", + "\n", + "LFM2.5 models are small and efficient, enabling deployment across a wide range of platforms:\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Deployment TargetUse Case
πŸ“± AndroidMobile apps on Android devices
πŸ“± iOSMobile apps on iPhone/iPad
🍎 Apple Silicon MacLocal inference on Mac with MLX
πŸ¦™ llama.cppLocal deployments on any hardware
πŸ¦™ OllamaLocal inference with easy setup
πŸ–₯️ LM StudioDesktop app for local inference
⚑ vLLMCloud deployments with high throughput
☁️ ModalServerless cloud deployment
πŸ—οΈ BasetenProduction ML infrastructure
πŸš€ FalFast inference API
" + ] }, { "cell_type": "markdown", "metadata": { "id": "z48z7BEZCMHO" }, - "source": "### πŸ’¬ Need help?\nJoin the [Liquid AI Discord Community](https://discord.com/invite/liquid-ai) and ask!" + "source": [ + "### πŸ’¬ Need help?\n", + "Join the [Liquid AI Discord Community](https://discord.com/invite/liquid-ai) and ask!" + ] }, { "cell_type": "markdown", @@ -43,7 +119,10 @@ "outputId": "ec580e3b-30c5-4192-bf3d-e50587d801f2" }, "outputs": [], - "source": "!uv pip install \"trl[peft]\" bitsandbytes trackio math_verify liger-kernel flash-attn" + "source": [ + "!uv pip install \"trl[peft]\" bitsandbytes trackio math_verify liger-kernel\n", + "!uv pip install flash-attn" + ] }, { "cell_type": "code", @@ -69,7 +148,15 @@ "metadata": { "id": "cTEw4xlFrhnQ" }, - "source": "# πŸ“₯ Load the Dataset\n\nWe load the [**AI-MO/NuminaMath-TIR**](https://huggingface.co/datasets/AI-MO/NuminaMath-TIR) dataset from the Hugging Face Hub. This dataset focuses on **mathematical reasoning**, featuring problems that require step-by-step logical solutions.\n\nBy fine-tuning a model with GRPO on this dataset, it can learn to **generate structured reasoning steps**, enhancing both the model's **accuracy** and **interpretability** on math-related tasks.\n\nFor efficiency, we'll load only a **small portion of the training split**:" + "source": [ + "# πŸ“₯ Load the Dataset\n", + "\n", + "We load the [**AI-MO/NuminaMath-TIR**](https://huggingface.co/datasets/AI-MO/NuminaMath-TIR) dataset from the Hugging Face Hub. This dataset focuses on **mathematical reasoning**, featuring problems that require step-by-step logical solutions.\n", + "\n", + "By fine-tuning a model with GRPO on this dataset, it can learn to **generate structured reasoning steps**, enhancing both the model's **accuracy** and **interpretability** on math-related tasks.\n", + "\n", + "For efficiency, we'll load only a **small portion of the training split**:" + ] }, { "cell_type": "code", @@ -227,7 +314,11 @@ "metadata": { "id": "DiqBlxK_A0SD" }, - "source": "## πŸ”„ Transform the Dataset\n\nWe adapt our dataset to a conversational format using a custom system prompt, guiding the LLM to generate both step-by-step reasoning and the final answer." + "source": [ + "## πŸ”„ Transform the Dataset\n", + "\n", + "We adapt our dataset to a conversational format using a custom system prompt, guiding the LLM to generate both step-by-step reasoning and the final answer." + ] }, { "cell_type": "code", @@ -302,7 +393,9 @@ "metadata": { "id": "g5BfIX8yCMHP" }, - "source": "# 🧠 Load the Model" + "source": [ + "# 🧠 Load the Model" + ] }, { "cell_type": "markdown", @@ -344,7 +437,9 @@ "metadata": { "id": "wpa2Y2q0CMHQ" }, - "source": "## πŸŽ›οΈ Define LoRA Adapters" + "source": [ + "## πŸŽ›οΈ Define LoRA Adapters" + ] }, { "cell_type": "markdown", @@ -376,7 +471,24 @@ "metadata": { "id": "bh4CXgFjCMHQ" }, - "source": "## 🎁 Load Reward Functions\n\nGRPO requires **reward functions** to guide the learning process. We use pre-defined rewards from `trl.rewards`:\n\n- `think_format_reward`: Rewards completions that correctly follow the `...` format\n- `reasoning_accuracy_reward`: Evaluates the correctness of the model's solution\n\nYou can also create custom reward functions - they're simply Python functions that take generated completions and return a list of floats.\n\n```python\ndef think_format_reward(completions: list[list[dict[str, str]]], **kwargs) -> list[float]:\n pattern = r\"^(?!.*)(.*?).*$\"\n completion_contents = [completion[0][\"content\"] for completion in completions]\n matches = [re.match(pattern, content, re.DOTALL | re.MULTILINE) for content in completion_contents]\n return [1.0 if match else 0.0 for match in matches]\n```" + "source": [ + "## 🎁 Load Reward Functions\n", + "\n", + "GRPO requires **reward functions** to guide the learning process. We use pre-defined rewards from `trl.rewards`:\n", + "\n", + "- `think_format_reward`: Rewards completions that correctly follow the `...` format\n", + "- `reasoning_accuracy_reward`: Evaluates the correctness of the model's solution\n", + "\n", + "You can also create custom reward functions - they're simply Python functions that take generated completions and return a list of floats.\n", + "\n", + "```python\n", + "def think_format_reward(completions: list[list[dict[str, str]]], **kwargs) -> list[float]:\n", + " pattern = r\"^(?!.*)(.*?).*$\"\n", + " completion_contents = [completion[0][\"content\"] for completion in completions]\n", + " matches = [re.match(pattern, content, re.DOTALL | re.MULTILINE) for content in completion_contents]\n", + " return [1.0 if match else 0.0 for match in matches]\n", + "```" + ] }, { "cell_type": "code", @@ -389,12 +501,23 @@ "from trl.rewards import think_format_reward, reasoning_accuracy_reward" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !modal_skip_rest" + ] + }, { "cell_type": "markdown", "metadata": { "id": "prKnAp-Esyiq" }, - "source": "# πŸš€ Train the Model" + "source": [ + "# πŸš€ Train the Model" + ] }, { "cell_type": "code", @@ -459,7 +582,11 @@ "metadata": { "id": "ibO4f7tuLboQ" }, - "source": "# πŸ’Ύ Save the Model\n\nSave the fine-tuned model locally:" + "source": [ + "# πŸ’Ύ Save the Model\n", + "\n", + "Save the fine-tuned model locally:" + ] }, { "cell_type": "code", @@ -493,4 +620,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/notebooks/quickstart_snippets.ipynb b/notebooks/quickstart_snippets.ipynb index 85e4255..dc92c7e 100644 --- a/notebooks/quickstart_snippets.ipynb +++ b/notebooks/quickstart_snippets.ipynb @@ -32,7 +32,30 @@ "snippet": "text-transformers" }, "outputs": [], - "source": "from transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_id = \"LiquidAI/LFM2.5-1.2B-Instruct\"\nmodel = AutoModelForCausalLM.from_pretrained(\n model_id,\n device_map=\"auto\",\n dtype=\"bfloat16\",\n)\ntokenizer = AutoTokenizer.from_pretrained(model_id)\n\ninputs = tokenizer.apply_chat_template(\n [{\"role\": \"user\", \"content\": \"What is machine learning?\"}],\n add_generation_prompt=True,\n return_tensors=\"pt\",\n return_dict=True,\n).to(model.device)\n\noutput = model.generate(**inputs, do_sample=True, temperature=0.1, top_k=50, repetition_penalty=1.05, max_new_tokens=512)\ninput_length = inputs[\"input_ids\"].shape[1]\nresponse = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\nprint(response)" + "source": [ + "# !modal_skip_rest\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer\n", + "\n", + "model_id = \"LiquidAI/LFM2.5-1.2B-Instruct\"\n", + "model = AutoModelForCausalLM.from_pretrained(\n", + " model_id,\n", + " device_map=\"auto\",\n", + " dtype=\"bfloat16\",\n", + ")\n", + "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", + "\n", + "inputs = tokenizer.apply_chat_template(\n", + " [{\"role\": \"user\", \"content\": \"What is machine learning?\"}],\n", + " add_generation_prompt=True,\n", + " return_tensors=\"pt\",\n", + " return_dict=True,\n", + ").to(model.device)\n", + "\n", + "output = model.generate(**inputs, do_sample=True, temperature=0.1, top_k=50, repetition_penalty=1.05, max_new_tokens=512)\n", + "input_length = inputs[\"input_ids\"].shape[1]\n", + "response = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\n", + "print(response)" + ] }, { "cell_type": "code", @@ -41,7 +64,21 @@ "snippet": "text-vllm" }, "outputs": [], - "source": "from vllm import LLM, SamplingParams\n\nllm = LLM(model=\"LiquidAI/LFM2.5-1.2B-Instruct\")\n\nsampling_params = SamplingParams(\n temperature=0.1,\n top_k=50,\n repetition_penalty=1.05,\n max_tokens=512,\n)\n\noutput = llm.chat(\"What is machine learning?\", sampling_params)\nprint(output[0].outputs[0].text)" + "source": [ + "from vllm import LLM, SamplingParams\n", + "\n", + "llm = LLM(model=\"LiquidAI/LFM2.5-1.2B-Instruct\")\n", + "\n", + "sampling_params = SamplingParams(\n", + " temperature=0.1,\n", + " top_k=50,\n", + " repetition_penalty=1.05,\n", + " max_tokens=512,\n", + ")\n", + "\n", + "output = llm.chat(\"What is machine learning?\", sampling_params)\n", + "print(output[0].outputs[0].text)" + ] }, { "cell_type": "markdown", @@ -57,7 +94,46 @@ "snippet": "vl-transformers" }, "outputs": [], - "source": "from transformers import AutoProcessor, AutoModelForImageTextToText\nfrom transformers.image_utils import load_image\n\nmodel_id = \"LiquidAI/LFM2.5-VL-1.6B\"\nmodel = AutoModelForImageTextToText.from_pretrained(\n model_id,\n device_map=\"auto\",\n dtype=\"bfloat16\",\n)\n# IMPORTANT: tie lm_head to input embeddings (transformers v5 bug)\nmodel.lm_head.weight = model.get_input_embeddings().weight\n\nprocessor = AutoProcessor.from_pretrained(model_id)\n\nurl = \"https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg\"\nimage = load_image(url)\n\nconversation = [\n {\n \"role\": \"user\",\n \"content\": [\n {\"type\": \"image\", \"image\": image},\n {\"type\": \"text\", \"text\": \"What is in this image?\"},\n ],\n },\n]\n\ninputs = processor.apply_chat_template(\n conversation,\n add_generation_prompt=True,\n return_tensors=\"pt\",\n return_dict=True,\n tokenize=True,\n).to(model.device)\n\noutputs = model.generate(**inputs, do_sample=True, temperature=0.1, min_p=0.15, repetition_penalty=1.05, max_new_tokens=256)\nresponse = processor.batch_decode(outputs, skip_special_tokens=True)[0]\nprint(response)" + "source": [ + "from transformers import AutoProcessor, AutoModelForImageTextToText\n", + "from transformers.image_utils import load_image\n", + "\n", + "model_id = \"LiquidAI/LFM2.5-VL-1.6B\"\n", + "model = AutoModelForImageTextToText.from_pretrained(\n", + " model_id,\n", + " device_map=\"auto\",\n", + " dtype=\"bfloat16\",\n", + ")\n", + "# IMPORTANT: tie lm_head to input embeddings (transformers v5 bug)\n", + "model.lm_head.weight = model.get_input_embeddings().weight\n", + "\n", + "processor = AutoProcessor.from_pretrained(model_id)\n", + "\n", + "url = \"https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg\"\n", + "image = load_image(url)\n", + "\n", + "conversation = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"type\": \"image\", \"image\": image},\n", + " {\"type\": \"text\", \"text\": \"What is in this image?\"},\n", + " ],\n", + " },\n", + "]\n", + "\n", + "inputs = processor.apply_chat_template(\n", + " conversation,\n", + " add_generation_prompt=True,\n", + " return_tensors=\"pt\",\n", + " return_dict=True,\n", + " tokenize=True,\n", + ").to(model.device)\n", + "\n", + "outputs = model.generate(**inputs, do_sample=True, temperature=0.1, min_p=0.15, repetition_penalty=1.05, max_new_tokens=256)\n", + "response = processor.batch_decode(outputs, skip_special_tokens=True)[0]\n", + "print(response)" + ] }, { "cell_type": "code", @@ -66,7 +142,34 @@ "snippet": "vl-vllm" }, "outputs": [], - "source": "from vllm import LLM, SamplingParams\n\nIMAGE_URL = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n\nllm = LLM(\n model=\"LiquidAI/LFM2.5-VL-1.6B\",\n max_model_len=1024,\n)\n\nsampling_params = SamplingParams(\n temperature=0.1,\n min_p=0.15,\n repetition_penalty=1.05,\n max_tokens=256,\n)\n\nmessages = [{\n \"role\": \"user\",\n \"content\": [\n {\"type\": \"image_url\", \"image_url\": {\"url\": IMAGE_URL}},\n {\"type\": \"text\", \"text\": \"Describe what you see in this image.\"},\n ],\n}]\n\noutputs = llm.chat(messages, sampling_params)\nprint(outputs[0].outputs[0].text)" + "source": [ + "from vllm import LLM, SamplingParams\n", + "\n", + "IMAGE_URL = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "\n", + "llm = LLM(\n", + " model=\"LiquidAI/LFM2.5-VL-1.6B\",\n", + " max_model_len=1024,\n", + ")\n", + "\n", + "sampling_params = SamplingParams(\n", + " temperature=0.1,\n", + " min_p=0.15,\n", + " repetition_penalty=1.05,\n", + " max_tokens=256,\n", + ")\n", + "\n", + "messages = [{\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"type\": \"image_url\", \"image_url\": {\"url\": IMAGE_URL}},\n", + " {\"type\": \"text\", \"text\": \"Describe what you see in this image.\"},\n", + " ],\n", + "}]\n", + "\n", + "outputs = llm.chat(messages, sampling_params)\n", + "print(outputs[0].outputs[0].text)" + ] } ], "metadata": { @@ -80,4 +183,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git "a/notebooks/\360\237\222\247_LFM2_5_SFT_with_TRL.ipynb" "b/notebooks/\360\237\222\247_LFM2_5_SFT_with_TRL.ipynb" index bc212de..d603510 100644 --- "a/notebooks/\360\237\222\247_LFM2_5_SFT_with_TRL.ipynb" +++ "b/notebooks/\360\237\222\247_LFM2_5_SFT_with_TRL.ipynb" @@ -1,23 +1,10 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "gpuType": "T4" - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - }, - "accelerator": "GPU" - }, "cells": [ { "cell_type": "markdown", + "metadata": { + "id": "a3PTFH-H9Ozk" + }, "source": [ "# πŸ’§ LFM2.5 - SFT with TRL\n", "\n", @@ -33,42 +20,46 @@ "- **GPU Runtime**: Select GPU in `Runtime` β†’ `Change runtime type`\n", "- **Hugging Face Account**: For accessing models and datasets\n", "\n" - ], - "metadata": { - "id": "a3PTFH-H9Ozk" - } + ] }, { "cell_type": "markdown", + "metadata": { + "id": "x0RPLu2h9ome" + }, "source": [ "# πŸ“¦ Installation & Setup\n", "\n", "First, let's install all the required packages:\n" - ], - "metadata": { - "id": "x0RPLu2h9ome" - } + ] }, { "cell_type": "code", - "source": "!uv pip install transformers==4.54.0 trl>=0.18.2 peft>=0.15.2", + "execution_count": null, "metadata": { "id": "3FIcp_wo9nsR" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "!uv pip install transformers==4.54.0 trl>=0.18.2 peft>=0.15.2 IPython" + ] }, { "cell_type": "markdown", - "source": [ - "Let's now verify the packages are installed correctly" - ], "metadata": { "id": "41UEf1uxCd6m" - } + }, + "source": [ + "Let's now verify the packages are installed correctly" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bSJgYtHT_Os4" + }, + "outputs": [], "source": [ "import torch\n", "import transformers\n", @@ -79,25 +70,25 @@ "print(f\"πŸ“¦ PyTorch version: {torch.__version__}\")\n", "print(f\"πŸ€— Transformers version: {transformers.__version__}\")\n", "print(f\"πŸ“Š TRL version: {trl.__version__}\")" - ], - "metadata": { - "id": "bSJgYtHT_Os4" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "v_uXLzxQ_rnK" + }, "source": [ "# Loading the model from Transformers πŸ€—\n", "\n" - ], - "metadata": { - "id": "v_uXLzxQ_rnK" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iA3erKM4-HhS" + }, + "outputs": [], "source": [ "from transformers import AutoTokenizer, AutoModelForCausalLM\n", "from IPython.display import display, HTML, Markdown\n", @@ -132,37 +123,37 @@ "print(f\"πŸ”’ Parameters: {model.num_parameters():,}\")\n", "print(f\"πŸ“– Vocab size: {len(tokenizer)}\")\n", "print(f\"πŸ’Ύ Model size: ~{model.num_parameters() * 2 / 1e9:.1f} GB (bfloat16)\")" - ], - "metadata": { - "id": "iA3erKM4-HhS" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "6ABA6Yrm_lql" + }, "source": [ "# 🎯 Part 1: Supervised Fine-Tuning (SFT)\n", "\n", "SFT teaches the model to follow instructions by training on input-output pairs (instruction vs response). This is the foundation for creating instruction-following models." - ], - "metadata": { - "id": "6ABA6Yrm_lql" - } + ] }, { "cell_type": "markdown", + "metadata": { + "id": "KufdgeypHtst" + }, "source": [ "## Load an SFT Dataset\n", "\n", "We will use [HuggingFaceTB/smoltalk](https://huggingface.co/datasets/HuggingFaceTB/smoltalk), limiting ourselves to the first 5k samples for brevity. Feel free to change the limit by changing the slicing index in the parameter `split`." - ], - "metadata": { - "id": "KufdgeypHtst" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XCe8O06-_Cps" + }, + "outputs": [], "source": [ "from datasets import load_dataset\n", "\n", @@ -174,27 +165,36 @@ "print(f\" πŸ“š Train samples: {len(train_dataset_sft)}\")\n", "print(f\" πŸ§ͺ Eval samples: {len(eval_dataset_sft)}\")\n", "print(f\"\\nπŸ“ Single Sample: {train_dataset_sft[0]['messages']}\")" - ], - "metadata": { - "id": "XCe8O06-_Cps" - }, + ] + }, + { + "cell_type": "code", "execution_count": null, - "outputs": [] + "metadata": {}, + "outputs": [], + "source": [ + "# !modal_skip_rest" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "n5pI5JWpIlFQ" + }, "source": [ "## Launch Training\n", "\n", "We are now ready to launch an SFT run with `SFTTrainer`, feel free to modify `SFTConfig` to play around with different configurations.\n", "\n" - ], - "metadata": { - "id": "n5pI5JWpIlFQ" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ixD8Po-eAbPp" + }, + "outputs": [], "source": [ "from trl import SFTConfig, SFTTrainer\n", "\n", @@ -230,39 +230,39 @@ "\n", "sft_trainer.save_model()\n", "print(f\"πŸ’Ύ SFT model saved to: {sft_config.output_dir}\")" - ], - "metadata": { - "id": "ixD8Po-eAbPp" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "08Y3TxKrBRXo" + }, "source": [ "# πŸŽ›οΈ Part 2: LoRA + SFT (Parameter-Efficient Fine-tuning)\n", "\n", "LoRA (Low-Rank Adaptation) allows efficient fine-tuning by only training a small number of additional parameters. Perfect for limited compute resources!\n" - ], - "metadata": { - "id": "08Y3TxKrBRXo" - } + ] }, { "cell_type": "markdown", + "metadata": { + "id": "-MfWfc-Pvl9q" + }, "source": [ "## Wrap the model with PEFT\n", "\n", "We specify target modules that will be finetuned while the rest of the models weights remains frozen. Feel free to modify the `r` (rank) value:\n", "- higher -> better approximation of full-finetuning\n", "- lower -> needs even less compute resources" - ], - "metadata": { - "id": "-MfWfc-Pvl9q" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "puYp_gTpBSsf" + }, + "outputs": [], "source": [ "from peft import LoraConfig, get_peft_model, TaskType\n", "\n", @@ -288,26 +288,26 @@ "print(f\"πŸŽ›οΈ LoRA rank: {lora_config.r}\")\n", "print(f\"πŸ“Š LoRA alpha: {lora_config.lora_alpha}\")\n", "print(f\"🎯 Target modules: {lora_config.target_modules}\")" - ], - "metadata": { - "id": "puYp_gTpBSsf" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "L1Hem_DOwHgY" + }, "source": [ "## Launch Training\n", "\n", "Now ready to launch the SFT training, but this time with the LoRA-wrapped model" - ], - "metadata": { - "id": "L1Hem_DOwHgY" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "u-VYQysHBY8-" + }, + "outputs": [], "source": [ "from trl import SFTConfig, SFTTrainer\n", "\n", @@ -342,38 +342,49 @@ "\n", "lora_sft_trainer.save_model()\n", "print(f\"πŸ’Ύ LoRA model saved to: {lora_sft_config.output_dir}\")" - ], - "metadata": { - "id": "u-VYQysHBY8-" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "xI1-N-_Ev0cC" + }, "source": [ "## Save merged model\n", "\n", "Merge the extra weights learned with LoRA back into the model to obtain a \"normal\" model checkpoint." - ], - "metadata": { - "id": "xI1-N-_Ev0cC" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_rizEFUsvwce" + }, + "outputs": [], "source": [ "print(\"\\nπŸ”„ Merging LoRA weights...\")\n", "merged_model = lora_model.merge_and_unload()\n", "merged_model.save_pretrained(\"./lfm2-lora-merged\")\n", "tokenizer.save_pretrained(\"./lfm2-lora-merged\")\n", "print(\"πŸ’Ύ Merged model saved to: ./lfm2-lora-merged\")" - ], - "metadata": { - "id": "_rizEFUsvwce" - }, - "execution_count": null, - "outputs": [] + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git "a/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" "b/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" index 08abb1c..a2e3350 100644 --- "a/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" +++ "b/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" @@ -36,15 +36,17 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true, - "id": "fmOBSWrCAw5S", "colab": { "base_uri": "https://localhost:8080/" }, + "collapsed": true, + "id": "fmOBSWrCAw5S", "outputId": "01173385-066c-4114-d217-6d6e1d91f12b" }, "outputs": [], - "source": "!uv pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe datasets trl" + "source": [ + "!uv pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe datasets trl torchvision peft" + ] }, { "cell_type": "markdown", @@ -59,16 +61,16 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "046FXiTJNZjh", "colab": { "base_uri": "https://localhost:8080/" }, + "id": "046FXiTJNZjh", "outputId": "12ea24e3-3515-43dd-9b76-2ba7c33e4025" }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "πŸ“¦ PyTorch version: 2.9.0+cu126\n", "πŸ€— Transformers version: 5.0.0.dev0\n", @@ -101,7 +103,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "x5I6sA6NBMxF", "colab": { "base_uri": "https://localhost:8080/", "height": 412, @@ -196,145 +197,146 @@ "475e285df2a242588c35b56acf5e2281" ] }, + "id": "x5I6sA6NBMxF", "outputId": "7b10dd2c-90ac-4118-8d8f-2d5f9389005d" }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "πŸ“š Loading processor...\n" ] }, { - "output_type": "display_data", "data": { - "text/plain": [ - "processor_config.json: 0%| | 0.00/828 [00:00" - ], "text/html": [ "\n", "
\n", @@ -733,13 +740,17 @@ " \n", " \n", "

" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "πŸŽ‰ SFT training completed!\n", "πŸ’Ύ Saving to: lfm2-vl-med\n" @@ -798,7 +809,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "QHB1ACVMzFZ5", "colab": { "base_uri": "https://localhost:8080/", "height": 84, @@ -816,33 +826,34 @@ "ba5474abed8646f0997c998eb60c6584" ] }, + "id": "QHB1ACVMzFZ5", "outputId": "6a91c28c-eac7-4ed9-ba89-d9ffb78595d6" }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "πŸ”„ Merging LoRA weights...\n" ] }, { - "output_type": "display_data", "data": { - "text/plain": [ - "Writing model shards: 0%| | 0/1 [00:00=0.18.2 peft>=0.15.2", + "execution_count": null, "metadata": { "id": "3FIcp_wo9nsR" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "!uv pip install transformers==4.54.0 trl>=0.18.2 peft>=0.15.2" + ] }, { "cell_type": "markdown", - "source": [ - "Let's now verify the packages are installed correctly" - ], "metadata": { "id": "41UEf1uxCd6m" - } + }, + "source": [ + "Let's now verify the packages are installed correctly" + ] }, { "cell_type": "code", - "source": [ - "import torch\n", - "import transformers\n", - "import trl\n", - "import os\n", - "os.environ[\"WANDB_DISABLED\"] = \"true\"\n", - "\n", - "print(f\"πŸ“¦ PyTorch version: {torch.__version__}\")\n", - "print(f\"πŸ€— Transformers version: {transformers.__version__}\")\n", - "print(f\"πŸ“Š TRL version: {trl.__version__}\")" - ], + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -86,71 +62,60 @@ "id": "bSJgYtHT_Os4", "outputId": "d483d722-d85a-4de3-d266-347c75abbfce" }, - "execution_count": null, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "πŸ“¦ PyTorch version: 2.6.0+cu124\n", "πŸ€— Transformers version: 4.54.0\n", "πŸ“Š TRL version: 0.19.1\n" ] } + ], + "source": [ + "import torch\n", + "import transformers\n", + "import trl\n", + "import os\n", + "os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\n", + "print(f\"πŸ“¦ PyTorch version: {torch.__version__}\")\n", + "print(f\"πŸ€— Transformers version: {transformers.__version__}\")\n", + "print(f\"πŸ“Š TRL version: {trl.__version__}\")" ] }, { "cell_type": "markdown", + "metadata": { + "id": "v_uXLzxQ_rnK" + }, "source": [ "# Loading the model from Transformers πŸ€—\n", "\n" - ], - "metadata": { - "id": "v_uXLzxQ_rnK" - } + ] }, { "cell_type": "code", - "source": [ - "from transformers import AutoTokenizer, AutoModelForCausalLM\n", - "import torch\n", - "\n", - "model_name = \"LiquidAI/LFM2.5-1.2B-Instruct\"\n", - "\n", - "print(\"πŸ“š Loading tokenizer...\")\n", - "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", - "\n", - "print(\"🧠 Loading model...\")\n", - "model = AutoModelForCausalLM.from_pretrained(\n", - " model_name,\n", - " device_map=\"auto\",\n", - " torch_dtype=\"auto\",\n", - ")\n", - "\n", - "print(\"βœ… Local model loaded successfully!\")\n", - "print(f\"πŸ”’ Parameters: {model.num_parameters():,}\")\n", - "print(f\"πŸ“– Vocab size: {len(tokenizer)}\")\n", - "print(f\"πŸ’Ύ Model size: ~{model.num_parameters() * 2 / 1e9:.1f} GB (bfloat16)\")" - ], + "execution_count": null, "metadata": { - "id": "iA3erKM4-HhS", "colab": { "base_uri": "https://localhost:8080/" }, + "id": "iA3erKM4-HhS", "outputId": "e19261f7-5e96-4756-e001-80af1d9d37b5" }, - "execution_count": null, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "πŸ“š Loading tokenizer...\n" ] }, { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", @@ -161,8 +126,8 @@ ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "🧠 Loading model...\n", "βœ… Local model loaded successfully!\n", @@ -171,53 +136,56 @@ "πŸ’Ύ Model size: ~2.3 GB (bfloat16)\n" ] } + ], + "source": [ + "from transformers import AutoTokenizer, AutoModelForCausalLM\n", + "import torch\n", + "\n", + "model_name = \"LiquidAI/LFM2.5-1.2B-Instruct\"\n", + "\n", + "print(\"πŸ“š Loading tokenizer...\")\n", + "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", + "\n", + "print(\"🧠 Loading model...\")\n", + "model = AutoModelForCausalLM.from_pretrained(\n", + " model_name,\n", + " device_map=\"auto\",\n", + " torch_dtype=\"auto\",\n", + ")\n", + "\n", + "print(\"βœ… Local model loaded successfully!\")\n", + "print(f\"πŸ”’ Parameters: {model.num_parameters():,}\")\n", + "print(f\"πŸ“– Vocab size: {len(tokenizer)}\")\n", + "print(f\"πŸ’Ύ Model size: ~{model.num_parameters() * 2 / 1e9:.1f} GB (bfloat16)\")" ] }, { "cell_type": "markdown", + "metadata": { + "id": "o83NqFPNA_nk" + }, "source": [ "# 🎯 Direct Preference Optimization (DPO + LoRA)\n", "\n", "DPO aligns the model with human preferences by learning from preference pairs (chosen vs rejected responses). This typically follows SFT training.\n", "\n", "DPO might be too compute heavy if you're running on one of the free-tier colab GPUs. Hence we use LoRA (Low-Rank Adaptation) to finetune the model by only training a small number of additional parameters. Perfect for limited compute resources!" - ], - "metadata": { - "id": "o83NqFPNA_nk" - } + ] }, { "cell_type": "markdown", + "metadata": { + "id": "0w-40n_XJL9H" + }, "source": [ "## Load a DPO Dataset\n", "\n", "We will use [mlabonne/orpo-dpo-mix-40k](https://huggingface.co/datasets/mlabonne/orpo-dpo-mix-40k), limiting ourselves to the first 2k samples for brevity. Feel free to change the limit by changing the slicing index in the parameter `split`. The size of the validation data can be adjusted by changing `test_size`." - ], - "metadata": { - "id": "0w-40n_XJL9H" - } + ] }, { "cell_type": "code", - "source": [ - "from datasets import load_dataset\n", - "\n", - "print(\"πŸ“₯ Loading DPO dataset...\")\n", - "\n", - "dataset_dpo = load_dataset(\"mlabonne/orpo-dpo-mix-40k\", split=\"train[:2000]\")\n", - "dataset_dpo = dataset_dpo.train_test_split(test_size=0.1, seed=42)\n", - "train_dataset_dpo, eval_dataset_dpo = dataset_dpo['train'], dataset_dpo['test']\n", - "\n", - "print(\"βœ… DPO Dataset loaded:\")\n", - "print(f\" πŸ“š Train samples: {len(train_dataset_dpo)}\")\n", - "print(f\" πŸ§ͺ Eval samples: {len(eval_dataset_dpo)}\")\n", - "\n", - "sample = train_dataset_dpo[0]\n", - "print(\"\\nπŸ“ Single Sample:\")\n", - "print(f\" Prompt: {sample['prompt'][:100]}...\")\n", - "print(f\" βœ… Chosen: {sample['chosen'][:100]}...\")\n", - "print(f\" ❌ Rejected: {sample['rejected'][:100]}...\")" - ], + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -225,11 +193,10 @@ "id": "Ajq6EABUBAv_", "outputId": "1b6e9f8f-e7a7-472d-e50b-b40b450d336e" }, - "execution_count": null, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "πŸ“₯ Loading DPO dataset...\n", "βœ… DPO Dataset loaded:\n", @@ -242,10 +209,32 @@ " ❌ Rejected: [{'content': 'Classify the following instruments into their respective families (brass, strings, woodwinds, or percussion): Didgeridoo, Cuica, Euphonium, Guzheng', 'role': 'user'}, {'content': 'The classification of the mentioned instruments is as follows:\\n\\n1. Didgeridoo: Brass\\n2. Cuica: Percussion\\n3. Euphonium: Brass\\n4. Guzheng: Strings', 'role': 'assistant'}, {'content': 'Explain the construction and sound production process of each instrument mentioned, and how these processes relate to their classification in their respective families.', 'role': 'user'}, {'content': \"1. Didgeridoo: The didgeridoo is traditionally made from eucalyptus trees which have been naturally hollowed out by termites. The mouthpiece can be made of beeswax or shaped directly from the wood. The player blows into the instrument without interruption while using a special breathing technique called circular breathing. The vibration of the lips and the resonances in the tube produce the sound. Although the didgeridoo is not a typical brass instrument, it is often classified as such due to the lip-vibrated method of sound production, similar to a brass instrument.\\n\\n2. Cuica: The cuica is a Brazilian friction drum with a large pitch range, produced by changing tension on the head of the drum. It has a stick fixed in the center of the drum head, and the player rubs this stick with a wet cloth and simultaneously modulates the pitch by pressing the thumb on the drum head. This results in a sound that can be used to mimic a variety of effects, often the call of wild animals. It is classified as a percussion instrument due to the way the sound is produced by striking or rubbing.\\n\\n3. Euphonium: The euphonium is a conical-bore, tenor-voiced brass instrument. The mouthpiece of the euphonium is larger than that of a trumpet, and this, along with the conical bore, contributes to its rich, dark tone. The player buzzes their lips into the mouthpiece, creating vibrations that travel through the instrument and are amplified by the bell. The pitch can be altered by changing the lip tension and by using the valves to change the length of the tubing. Its classification as a brass instrument is due to the material it's made from and the method of sound production.\\n\\n4. Guzheng: The guzheng, also known as the Chinese zither, is a string instrument with movable bridges and 21 strings. It is made of a large, resonant wooden body, over which the strings are stretched. The player plucks the strings with their fingers, often with the aid of plectra attached to four of the fingers on the right or both hands. The left hand can also press the strings to the left of the bridges to change the pitch or produce vibrato. It is classified as a string instrument because the sound is produced by vibrating strings.\", 'role': 'assistant'}, {'content': 'Discuss the technique of circular breathing used in playing the Didgeridoo and its impact on the sound produced.', 'role': 'user'}, {'content': \"Circular breathing is a technique used by players of some wind instruments to produce a continuous tone without interruption. This technique is used extensively in playing the didgeridoo. It involves inhaling through the nose while maintaining airflow through the instrument, using air stored in the cheeks.\\n\\nHere's a basic breakdown of how it works:\\n\\n1. Fill your cheeks with air and begin to play the instrument.\\n2. While you're blowing this air out through your mouth into the instrument, start to breathe in through your nose.\\n3. As your lungs fill with air, continue to push the air out of your mouth using your cheeks and tongue.\\n4. Once your cheeks are empty, switch back to blowing air from your lungs into the instrument, and refill your cheeks.\\n\\nThis technique allows the player to sustain a drone for a long period of time, which is characteristic of didgeridoo music. It creates a continuous, rhythmic sound and allows for the layering of complex rhythms and sound effects. The circular breathing technique contributes to the mesmerizing, trance-like quality of the didgeridoo's sound. It also allows for the performance of more complex pieces that require extended periods of sound without breaks for breath.\", 'role': 'assistant'}, {'content': 'Can you elaborate on the physiological and mental challenges a musician might face while learning and mastering the technique of circular breathing?', 'role': 'user'}, {'content': \"Learning and mastering the technique of circular breathing can pose numerous physiological and mental challenges for instrumentalists. Here are some of the challenges a musician may face:\\n\\n1. Coordination: Circular breathing requires a high degree of muscular and mental coordination between respiratory, oral, and facial muscles. This includes skills like inhalation through the nose, simultaneous sound production, and control over airflow from the lungs and cheeks. The learning curve can be steep, and several hours of practice may be required to achieve the basic level of proficiency.\\n\\n2. Breath control: Attaining consistent breath control is crucial in circular breathing. A musician must be able to pulse and maintain a steady airflow through the nose while precisely regulating the rate and amount of air entering the lungs. This requires a high level of breath control and lung capacity.\\n\\n3. Cheek strength and control: Circular breathing demands significant strength and control of the cheek muscles. The cheeks must act as a second lung, storing air while the player inhales through the nose and releasing it back into the instrument when blowing from the lungs. This requires practice and muscle conditioning to build the endurance necessary for prolonged circular breathing sessions.\\n\\n4. Embouchure: Keeping a stable embouchure β€” the position and tension of the lips and mouth β€” is important while incorporating the circular breathing technique. It may take time to adjust to this combination and prevent air loss or a change in the quality of the sound while switching from blowing from the lungs to inhaling through the nose.\\n\\n5. Mental focus and concentration: Circular breathing involves sustaining multiple tasks simultaneously and requires a high concentration level. Musicians must maintain focus on the sound production while switching between inhaling and exhaling, ensuring an uninterrupted airflow and sound throughout the performance.\\n\\nIn addition to these physiological challenges, mastering circular breathing can also have mental benefits. It can enhance an individual's mindfulness, concentration, and breath awareness, leading to improved performance in various aspects of life. However, the learning process may be frustrating at times due to the high level of coordination and practice involved. Overcoming these challenges can be rewarding and offer a sense of accomplishment as the musician achieves greater proficiency and control in the circular breathing technique.\", 'role': 'assistant'}]...\n" ] } + ], + "source": [ + "from datasets import load_dataset\n", + "\n", + "print(\"πŸ“₯ Loading DPO dataset...\")\n", + "\n", + "dataset_dpo = load_dataset(\"mlabonne/orpo-dpo-mix-40k\", split=\"train[:2000]\")\n", + "dataset_dpo = dataset_dpo.train_test_split(test_size=0.1, seed=42)\n", + "train_dataset_dpo, eval_dataset_dpo = dataset_dpo['train'], dataset_dpo['test']\n", + "\n", + "print(\"βœ… DPO Dataset loaded:\")\n", + "print(f\" πŸ“š Train samples: {len(train_dataset_dpo)}\")\n", + "print(f\" πŸ§ͺ Eval samples: {len(eval_dataset_dpo)}\")\n", + "\n", + "sample = train_dataset_dpo[0]\n", + "print(\"\\nπŸ“ Single Sample:\")\n", + "print(f\" Prompt: {sample['prompt'][:100]}...\")\n", + "print(f\" βœ… Chosen: {sample['chosen'][:100]}...\")\n", + "print(f\" ❌ Rejected: {sample['rejected'][:100]}...\")" ] }, { "cell_type": "markdown", + "metadata": { + "id": "v8or_m-UdKGc" + }, "source": [ "## Wrap the model with PEFT\n", "\n", @@ -254,13 +243,31 @@ "- lower -> needs even less compute resources\n", "\n", "You can skip this part if you have a premium GPU and want to go for a full finetune." - ], - "metadata": { - "id": "v8or_m-UdKGc" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "z7x5RhC5eLmc", + "outputId": "f6553df2-ba07-4839-9c73-2c55dec64495" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "trainable params: 5,554,176 || all params: 1,175,894,784 || trainable%: 0.4723\n", + "βœ… LoRA configuration applied!\n", + "πŸŽ›οΈ LoRA rank: 8\n", + "πŸ“Š LoRA alpha: 16\n", + "🎯 Target modules: {'w2', 'out_proj', 'in_proj', 'q_proj', 'k_proj', 'w3', 'v_proj', 'w1'}\n" + ] + } + ], "source": [ "from peft import LoraConfig, get_peft_model, TaskType\n", "\n", @@ -286,126 +293,74 @@ "print(f\"πŸŽ›οΈ LoRA rank: {lora_config.r}\")\n", "print(f\"πŸ“Š LoRA alpha: {lora_config.lora_alpha}\")\n", "print(f\"🎯 Target modules: {lora_config.target_modules}\")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "z7x5RhC5eLmc", - "outputId": "f6553df2-ba07-4839-9c73-2c55dec64495" - }, + ] + }, + { + "cell_type": "code", "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "trainable params: 5,554,176 || all params: 1,175,894,784 || trainable%: 0.4723\n", - "βœ… LoRA configuration applied!\n", - "πŸŽ›οΈ LoRA rank: 8\n", - "πŸ“Š LoRA alpha: 16\n", - "🎯 Target modules: {'w2', 'out_proj', 'in_proj', 'q_proj', 'k_proj', 'w3', 'v_proj', 'w1'}\n" - ] - } + "metadata": {}, + "outputs": [], + "source": [ + "# !modal_skip_rest" ] }, { "cell_type": "markdown", + "metadata": { + "id": "yxuxfHu7Jopx" + }, "source": [ "## Launch Training\n", "\n", "We are now ready to launch a DPO run with `DPOTrainer`, feel free to modify `DPOConfig` to play around with different configurations.\n", "\n", "Replace `lora_model` with `model` if you have a premium-tier in colab and want to run a full finetune." - ], - "metadata": { - "id": "yxuxfHu7Jopx" - } + ] }, { "cell_type": "code", - "source": [ - "from trl import DPOConfig, DPOTrainer\n", - "\n", - "# DPO Training configuration\n", - "dpo_config = DPOConfig(\n", - " output_dir=\"./lfm2-dpo\",\n", - " num_train_epochs=1,\n", - " per_device_train_batch_size=1,\n", - " learning_rate=1e-6,\n", - " lr_scheduler_type=\"linear\",\n", - " logging_steps=10,\n", - " save_strategy=\"epoch\",\n", - " eval_strategy=\"epoch\",\n", - " bf16=False # <- not all colab GPUs support bf16\n", - ")\n", - "\n", - "# Create DPO trainer\n", - "print(\"πŸ—οΈ Creating DPO trainer...\")\n", - "dpo_trainer = DPOTrainer(\n", - " model=lora_model,\n", - " args=dpo_config,\n", - " train_dataset=train_dataset_dpo,\n", - " eval_dataset=eval_dataset_dpo,\n", - " processing_class=tokenizer,\n", - ")\n", - "\n", - "# Start DPO training\n", - "print(\"\\nπŸš€ Starting DPO training...\")\n", - "dpo_trainer.train()\n", - "\n", - "print(\"πŸŽ‰ DPO training completed!\")\n", - "\n", - "# Save the DPO model\n", - "dpo_trainer.save_model()\n", - "print(f\"πŸ’Ύ DPO model saved to: {dpo_config.output_dir}\")" - ], + "execution_count": null, "metadata": { - "id": "nGBgkf9LBHp6", "colab": { "base_uri": "https://localhost:8080/", "height": 263 }, + "id": "nGBgkf9LBHp6", "outputId": "5242d3ac-16e2-4ff3-c4b6-fdfb32c6cf23" }, - "execution_count": null, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n", "Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n" ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "πŸ—οΈ Creating DPO trainer...\n" ] }, { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n" ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\n", "πŸš€ Starting DPO training...\n" ] }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/html": [ "\n", "

\n", @@ -445,66 +400,122 @@ " \n", " \n", "

" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n" ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "πŸŽ‰ DPO training completed!\n", "πŸ’Ύ DPO model saved to: ./lfm2-dpo\n" ] } + ], + "source": [ + "from trl import DPOConfig, DPOTrainer\n", + "\n", + "# DPO Training configuration\n", + "dpo_config = DPOConfig(\n", + " output_dir=\"./lfm2-dpo\",\n", + " num_train_epochs=1,\n", + " per_device_train_batch_size=1,\n", + " learning_rate=1e-6,\n", + " lr_scheduler_type=\"linear\",\n", + " logging_steps=10,\n", + " save_strategy=\"epoch\",\n", + " eval_strategy=\"epoch\",\n", + " bf16=False # <- not all colab GPUs support bf16\n", + ")\n", + "\n", + "# Create DPO trainer\n", + "print(\"πŸ—οΈ Creating DPO trainer...\")\n", + "dpo_trainer = DPOTrainer(\n", + " model=lora_model,\n", + " args=dpo_config,\n", + " train_dataset=train_dataset_dpo,\n", + " eval_dataset=eval_dataset_dpo,\n", + " processing_class=tokenizer,\n", + ")\n", + "\n", + "# Start DPO training\n", + "print(\"\\nπŸš€ Starting DPO training...\")\n", + "dpo_trainer.train()\n", + "\n", + "print(\"πŸŽ‰ DPO training completed!\")\n", + "\n", + "# Save the DPO model\n", + "dpo_trainer.save_model()\n", + "print(f\"πŸ’Ύ DPO model saved to: {dpo_config.output_dir}\")" ] }, { "cell_type": "markdown", + "metadata": { + "id": "WIrCmjUsfNIg" + }, "source": [ "## Save merged model\n", "\n", "If you have used LoRA. Merge the extra weights learned with LoRA back into the model to obtain a \"normal\" model checkpoint." - ], - "metadata": { - "id": "WIrCmjUsfNIg" - } + ] }, { "cell_type": "code", - "source": [ - "print(\"\\nπŸ”„ Merging LoRA weights...\")\n", - "merged_model = lora_model.merge_and_unload()\n", - "merged_model.save_pretrained(\"./lfm2-lora-merged\")\n", - "tokenizer.save_pretrained(\"./lfm2-lora-merged\")\n", - "print(\"πŸ’Ύ Merged model saved to: ./lfm2-lora-merged\")" - ], + "execution_count": null, "metadata": { - "id": "RUJoU3BwfPTg", "colab": { "base_uri": "https://localhost:8080/" }, + "id": "RUJoU3BwfPTg", "outputId": "45e09b8f-9089-4237-ffff-c4d59db6ff89" }, - "execution_count": null, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\n", "πŸ”„ Merging LoRA weights...\n", "πŸ’Ύ Merged model saved to: ./lfm2-lora-merged\n" ] } + ], + "source": [ + "print(\"\\nπŸ”„ Merging LoRA weights...\")\n", + "merged_model = lora_model.merge_and_unload()\n", + "merged_model.save_pretrained(\"./lfm2-lora-merged\")\n", + "tokenizer.save_pretrained(\"./lfm2-lora-merged\")\n", + "print(\"πŸ’Ύ Merged model saved to: ./lfm2-lora-merged\")" ] } - ] -} \ No newline at end of file + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/util/modal_runner.py b/util/modal_runner.py new file mode 100644 index 0000000..b75bada --- /dev/null +++ b/util/modal_runner.py @@ -0,0 +1,62 @@ +import modal +import subprocess, sys, os, tempfile + +app = modal.App("ci-runner") + +@app.function( + gpu="A10G", + timeout=600, + image=modal.Image.debian_slim(python_version="3.12") + .apt_install("curl", "wget", "zstd", "git") + .pip_install("uv", "typing_extensions>=4.14.0"), +) +def run_code(code: str, pip_packages: list[list[str]] = [], setup_commands: list[str] = []) -> dict: + + # Disable vLLM v1 multiprocessing (incompatible with containers) + os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0" + + # Restore real stdout/stderr β€” Modal wraps them with objects that + # lack fileno(), which breaks subprocesses and libraries like vLLM. + real_stdout = open("/dev/stdout", "w") + real_stderr = open("/dev/stderr", "w") + sys.stdout = real_stdout + sys.stderr = real_stderr + + for group in pip_packages: + print(f"[ci-runner] Installing: {group}") + subprocess.check_call( + ["uv", "pip", "install", "--system", *group], + stdout=real_stdout, stderr=real_stderr, + ) + + for cmd in setup_commands: + print(f"[ci-runner] Setup: {cmd}") + subprocess.run(cmd, shell=True, check=True, stdout=real_stdout, stderr=real_stderr) + + # Remove Modal's bundled deps from sys.path to prevent shadowing + clean_path = [p for p in sys.path if "/__modal/deps" not in p] + + # Write code to a temp file and run as a subprocess so that + # stdout/stderr are real file descriptors (required by vLLM, etc.) + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write(code) + script_path = f.name + + env = os.environ.copy() + env["PYTHONPATH"] = ":".join(clean_path) + + proc = subprocess.run( + [sys.executable, script_path], + capture_output=True, + text=True, + timeout=540, + ) + + os.unlink(script_path) + + return { + "success": proc.returncode == 0, + "stdout": proc.stdout, + "stderr": proc.stderr, + "error": None if proc.returncode == 0 else f"Exit code {proc.returncode}\n{proc.stderr[-2000:] if proc.stderr else ''}", + } \ No newline at end of file diff --git a/util/run_notebook_test.py b/util/run_notebook_test.py new file mode 100644 index 0000000..4d3368c --- /dev/null +++ b/util/run_notebook_test.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +""" +Run code cells from a notebook on a Modal GPU as a single combined script. + +Cell 0 is treated as the dependency cell (pip packages). +Remaining non-skipped cells are concatenated into one script and executed together, +preserving shared state (variables, imports) across cells just like a real notebook. + +Usage: + python run_notebook_test.py --notebook notebooks/LFM2_Inference_with_Transformers.ipynb + python run_notebook_test.py --notebook notebooks/LFM2_Inference_with_Transformers.ipynb --gpu A10G + python run_notebook_test.py --notebook notebooks/LFM2_Inference_with_Transformers.ipynb --dry-run +""" + +import argparse +import re +import sys +import time +from pathlib import Path +import json +import sys +from pathlib import Path +import modal + +sys.path.insert(0, str(Path(__file__).resolve().parent / "tests")) + + +def extract_code_cells(notebook_path: Path) -> list[dict]: + """Return a list of code cell dicts with 'source', 'index', and 'skipped' keys.""" + with open(notebook_path) as f: + nb = json.load(f) + + cells = [] + code_index = 0 + for cell in nb.get("cells", []): + if cell["cell_type"] != "code": + continue + + source = "".join(cell["source"]) + skip = ( + "# test:skip" in source + or cell.get("metadata", {}).get("test_skip", False) + ) + + cells.append({ + "index": code_index, + "source": source, + "skipped": skip, + }) + code_index += 1 + + return cells + +def parse_packages_from_cell(source: str) -> tuple[list[list[str]], list[str]]: + """Parse dependency cell into pip package groups and shell setup commands. + + Returns (pip_package_groups, setup_commands). Each ``pip install`` line + becomes its own group so that install ordering is preserved (e.g. packages + that need torch at build time can be installed in a later group). + Non-pip ``!`` lines become setup commands that run before the Python script. + Lines after ``# !modal_skip_rest`` are ignored; lines with + ``# !modal_skip`` are skipped individually. + """ + package_groups: list[list[str]] = [] + setup_commands = [] + for line in source.splitlines(): + line = line.strip() + # Stop processing if we hit a modal_skip_rest directive + if "!modal_skip_rest" in line: + break + # Skip individual lines marked with modal_skip + if "!modal_skip" in line: + continue + # Match: !pip install ..., !uv pip install ..., pip install ... + match = re.match(r"^!?\s*(?:uv\s+)?pip\s+install\s+(.+)", line) + if match: + group = [] + for token in re.split(r"[,\s]+", match.group(1)): + token = token.strip('"').strip("'").strip(",") + if token and not token.startswith("-"): + group.append(token) + if group: + package_groups.append(group) + elif line.startswith("!"): + setup_commands.append(line[1:].strip()) + return package_groups, setup_commands + + +def filter_cells(code_cells: list[dict]) -> list[dict]: + """Apply !modal_skip and !modal_skip_rest directives. + + - !modal_skip: skip only that cell + - !modal_skip_rest: skip that cell and all remaining cells + """ + filtered = [] + for cell in code_cells: + source = cell["source"] + if "!modal_skip_rest" in source: + break + if "!modal_skip" in source and "!modal_skip_rest" not in source: + continue + filtered.append(cell) + return filtered + + +def preprocess_cell(source: str) -> tuple[str, list[str]]: + """Transform cell source so shell lines become valid Python. + + Returns (transformed_source, setup_commands). + - ``%%bash`` cells are collected as setup commands and excluded from the script. + - ``!`` lines are converted to ``subprocess.run()`` calls. + - Pure Python lines are unchanged. + """ + lines = source.splitlines() + setup_commands: list[str] = [] + + # Detect %%bash magic (may follow comment lines) + non_comment = [l for l in lines if l.strip() and not l.strip().startswith("#")] + if non_comment and non_comment[0].strip() == "%%bash": + bash_lines: list[str] = [] + found_magic = False + for line in lines: + if not found_magic: + if line.strip() == "%%bash": + found_magic = True + continue + bash_lines.append(line) + if bash_lines: + setup_commands.append("\n".join(bash_lines)) + return "", setup_commands + + # Convert ! lines to subprocess.run(), joining backslash continuations + transformed: list[str] = [] + i = 0 + while i < len(lines): + stripped = lines[i].lstrip() + if stripped.startswith("!"): + indent = lines[i][: len(lines[i]) - len(stripped)] + cmd_parts = [stripped[1:]] + # Collect continuation lines ending with backslash + while cmd_parts[-1].rstrip().endswith("\\") and i + 1 < len(lines): + i += 1 + cmd_parts.append(lines[i]) + full_cmd = "\n".join(cmd_parts) + # Ensure uv/pip installs use --system (no venv in containers) + if re.match(r"(?:uv\s+)?pip\s+install", full_cmd) and "--system" not in full_cmd: + full_cmd = full_cmd.replace("pip install", "pip install --system", 1) + transformed.append(f"{indent}subprocess.run({full_cmd!r}, shell=True, check=True)") + else: + transformed.append(lines[i]) + i += 1 + return "\n".join(transformed), setup_commands + + +def combine_cells(code_cells: list[dict]) -> tuple[str, list[str]]: + """Concatenate code cells into a single script with cell markers. + + Returns (combined_code, setup_commands) after preprocessing each cell. + """ + parts = [] + all_setup: list[str] = [] + has_subprocess_import = False + + for cell in code_cells: + source, setup_cmds = preprocess_cell(cell["source"]) + all_setup.extend(setup_cmds) + if source.strip(): + if not has_subprocess_import and "subprocess.run(" in source: + has_subprocess_import = True + parts.append(f"# --- cell {cell['index']} ---") + parts.append(source) + + code = "\n\n".join(parts) + # Prepend subprocess import if we generated subprocess.run() calls + if has_subprocess_import: + code = "import subprocess\n\n" + code + return code, all_setup + + +def main(): + parser = argparse.ArgumentParser(description="Run notebook code cells on Modal as a single script") + parser.add_argument("--notebook", required=True, help="Path to .ipynb file") + parser.add_argument("--gpu", default="A10G", help="GPU type (default: A10G)") + parser.add_argument("--dry-run", action="store_true", help="Print combined script and packages without running") + parser.add_argument("--skip-packages", nargs="+", default=[], metavar="PKG", + help="Package names to exclude from installation (e.g. --skip-packages flash-attn)") + args = parser.parse_args() + + notebook_path = Path(args.notebook) + if not notebook_path.exists(): + print(f"Error: notebook not found: {notebook_path}") + sys.exit(1) + + cells = extract_code_cells(notebook_path) + if not cells: + print("No code cells found.") + sys.exit(1) + + # Cell 0 = dependencies + dep_cell = cells[0] + pip_packages, dep_setup = parse_packages_from_cell(dep_cell["source"]) + + # Filter out skipped packages + if args.skip_packages: + skip_set = set(args.skip_packages) + pip_packages = [ + [pkg for pkg in group if pkg not in skip_set] + for group in pip_packages + ] + pip_packages = [g for g in pip_packages if g] # drop empty groups + code_cells = [c for c in cells[1:] if not c["skipped"]] + code_cells = filter_cells(code_cells) + + if not code_cells: + print("No runnable code cells after dependency cell.") + sys.exit(1) + + combined, cell_setup = combine_cells(code_cells) + setup_commands = dep_setup + cell_setup + total_after_dep = len(cells) - 1 + skipped = total_after_dep - len(code_cells) + + print(f"{'=' * 50}") + print(f"Notebook: {notebook_path.name}") + all_packages = [pkg for group in pip_packages for pkg in group] + print(f"Packages: {all_packages or '(none)'} ({len(pip_packages)} install group(s))") + if setup_commands: + print(f"Setup: {len(setup_commands)} command(s)") + print(f"GPU: {args.gpu}") + print(f"Cells: {len(code_cells)} to run, {skipped} skipped") + print(f"{'=' * 50}\n") + + if args.dry_run: + if setup_commands: + print("=== SETUP COMMANDS ===") + for i, cmd in enumerate(setup_commands): + print(f"\n--- setup {i} ---") + print(cmd) + print(f"\n=== PYTHON SCRIPT ===\n") + print(combined) + return + + try: + run_code = modal.Function.from_name("ci-runner", "run_code") + except modal.exception.NotFoundError: + print("Error: Modal app 'ci-runner' not deployed.") + print("Run this first: modal deploy modal_runner.py") + sys.exit(1) + + print("Submitting to Modal...") + start_time = time.time() + result = run_code.remote(code=combined, pip_packages=pip_packages, setup_commands=setup_commands) + + print("\n--- STDOUT ---") + print(result["stdout"] or "(empty)") + + if result["stderr"]: + print("\n--- STDERR ---") + print(result["stderr"]) + + if result["error"]: + print(f"\nβœ— FAILED: {result['error']}") + sys.exit(1) + + elapsed = time.time() - start_time + print(f"\nRuntime: {elapsed:.2f}s") + print(f"βœ“ All {len(code_cells)} cells passed") + + +if __name__ == "__main__": + main() From 13e44ce1a5d17f8c1bdf0ec3f07a15de73dd7971 Mon Sep 17 00:00:00 2001 From: Jonathan Buchanan Date: Mon, 16 Mar 2026 18:11:06 -0700 Subject: [PATCH 2/5] fix: check for skip all in dependency cell --- notebooks/quickstart_snippets.ipynb | 10 +++++++++- util/run_notebook_test.py | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/notebooks/quickstart_snippets.ipynb b/notebooks/quickstart_snippets.ipynb index dc92c7e..9d7d3f2 100644 --- a/notebooks/quickstart_snippets.ipynb +++ b/notebooks/quickstart_snippets.ipynb @@ -25,6 +25,15 @@ "## Text Model Snippets" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !modal_skip_rest" + ] + }, { "cell_type": "code", "execution_count": null, @@ -33,7 +42,6 @@ }, "outputs": [], "source": [ - "# !modal_skip_rest\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "\n", "model_id = \"LiquidAI/LFM2.5-1.2B-Instruct\"\n", diff --git a/util/run_notebook_test.py b/util/run_notebook_test.py index 4d3368c..8a335ef 100644 --- a/util/run_notebook_test.py +++ b/util/run_notebook_test.py @@ -200,6 +200,10 @@ def main(): dep_cell = cells[0] pip_packages, dep_setup = parse_packages_from_cell(dep_cell["source"]) + if "!modal_skip_rest" in dep_cell["source"]: + print(f"Notebook {notebook_path.name}: dependency cell has !modal_skip_rest β€” skipping entirely.") + sys.exit(0) + # Filter out skipped packages if args.skip_packages: skip_set = set(args.skip_packages) From 1e74fac1b3c309488ae1c15c81d9d81a8de28643 Mon Sep 17 00:00:00 2001 From: Jonathan Buchanan Date: Tue, 17 Mar 2026 14:35:42 -0700 Subject: [PATCH 3/5] refactor: add readmd to util, move into separate workflow, remove only checking first cell dependncies --- .github/workflows/check-notebooks.yaml | 43 ------------- .github/workflows/run-notebooks.yaml | 58 ++++++++++++++++++ util/README.md | 83 ++++++++++++++++++++++++++ util/run_notebook_test.py | 60 +++++++++++++------ 4 files changed, 183 insertions(+), 61 deletions(-) create mode 100644 .github/workflows/run-notebooks.yaml create mode 100644 util/README.md diff --git a/.github/workflows/check-notebooks.yaml b/.github/workflows/check-notebooks.yaml index bff3725..6641df6 100644 --- a/.github/workflows/check-notebooks.yaml +++ b/.github/workflows/check-notebooks.yaml @@ -68,46 +68,3 @@ jobs: echo "" echo "βœ… All required checks passed!" echo "πŸ“Š Validated $(find . -name '*.ipynb' | wc -l) notebooks" - - discover-notebooks: - runs-on: ubuntu-latest - outputs: - notebooks: ${{ steps.list.outputs.notebooks }} - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: List notebooks - id: list - run: | - notebooks=$(ls notebooks/*.ipynb | xargs -I{} basename {} | jq -R -s -c 'split("\n") | map(select(. != ""))') - echo "notebooks=$notebooks" >> "$GITHUB_OUTPUT" - echo "Found notebooks: $notebooks" - - run-notebooks: - needs: discover-notebooks - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - notebook: ${{ fromJSON(needs.discover-notebooks.outputs.notebooks) }} - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - - name: Install uv - uses: astral-sh/setup-uv@v4 - - - name: Install modal - run: uv pip install --system modal - - - name: Set up Modal token - run: modal token set --token-id ${{ secrets.MODAL_TOKEN_ID }} --token-secret ${{ secrets.MODAL_TOKEN_SECRET }} - - - name: Run notebook on Modal - run: python util/run_notebook_test.py --notebook "notebooks/${{ matrix.notebook }}" --skip-packages flash-attn \ No newline at end of file diff --git a/.github/workflows/run-notebooks.yaml b/.github/workflows/run-notebooks.yaml new file mode 100644 index 0000000..86666b1 --- /dev/null +++ b/.github/workflows/run-notebooks.yaml @@ -0,0 +1,58 @@ +name: Run notebooks + +on: + push: + branches: + - main + paths: + - 'notebooks/**' + pull_request: + branches: + - main + paths: + - 'notebooks/**' + workflow_dispatch: + +jobs: + discover-notebooks: + runs-on: ubuntu-latest + outputs: + notebooks: ${{ steps.list.outputs.notebooks }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: List notebooks + id: list + run: | + notebooks=$(ls notebooks/*.ipynb | xargs -I{} basename {} | jq -R -s -c 'split("\n") | map(select(. != ""))') + echo "notebooks=$notebooks" >> "$GITHUB_OUTPUT" + echo "Found notebooks: $notebooks" + + run-notebooks: + needs: discover-notebooks + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + notebook: ${{ fromJSON(needs.discover-notebooks.outputs.notebooks) }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Install modal + run: uv pip install --system modal + + - name: Set up Modal token + run: modal token set --token-id ${{ secrets.MODAL_TOKEN_ID }} --token-secret ${{ secrets.MODAL_TOKEN_SECRET }} + + - name: Run notebook on Modal + run: python util/run_notebook_test.py --notebook "notebooks/${{ matrix.notebook }}" --skip-packages flash-attn diff --git a/util/README.md b/util/README.md new file mode 100644 index 0000000..8924740 --- /dev/null +++ b/util/README.md @@ -0,0 +1,83 @@ +# Notebook Testing Utilities + +This directory contains tools for running Jupyter notebooks as smoke tests on [Modal](https://modal.com/) GPUs. The system extracts code cells from `.ipynb` files, collects their dependencies, and executes them remotely to verify they work end-to-end. + +## Overview + +| File | Purpose | +|---|---| +| `modal_runner.py` | Modal app that runs Python code on a remote GPU container | +| `run_notebook_test.py` | CLI that parses a notebook, extracts dependencies, and submits the combined script to Modal | + +## How it works + +1. **`run_notebook_test.py`** reads a notebook and extracts all code cells. +2. Pip install lines (`!pip install ...`, `!uv pip install ...`) are extracted from **every** cell, collected as image-level dependencies, and stripped from the script so they don't run at execution time. +3. Shell commands (`!` lines) are converted to `subprocess.run()` calls; `%%bash` cells become setup commands. +4. The remaining cells are concatenated into a single Python script, preserving shared state (variables, imports) across cells just like a real notebook. +5. The script, dependency list, and setup commands are sent to **`modal_runner.py`**'s `run_code` function via Modal's remote invocation. + +**`modal_runner.py`** runs on an A10G GPU (default) with a 10-minute timeout. It: +- Installs pip packages in order (each `pip install` line becomes a separate install group to preserve ordering) +- Runs any setup commands (shell commands, `%%bash` cells) +- Executes the combined Python script as a subprocess and captures stdout/stderr + +## Usage + +### Prerequisites + +- A Modal account with a deployed `ci-runner` app +- Modal token configured locally + +### Deploy the Modal app (one-time) + +```bash +modal deploy util/modal_runner.py +``` + +### Run a notebook + +```bash +python util/run_notebook_test.py --notebook notebooks/MyNotebook.ipynb +``` + +### Options + +| Flag | Description | Default | +|---|---|---| +| `--notebook` | Path to `.ipynb` file (required) | β€” | +| `--gpu` | GPU type | `A10G` | +| `--dry-run` | Print the combined script and packages without running | off | +| `--skip-packages PKG ...` | Package names to exclude from installation | none | + +### Examples + +```bash +# Dry run to inspect what would be executed +python util/run_notebook_test.py --notebook notebooks/LFM2_Inference.ipynb --dry-run + +# Skip a package that doesn't build in CI +python util/run_notebook_test.py --notebook notebooks/LFM2_Inference.ipynb --skip-packages flash-attn + +# Use a different GPU +python util/run_notebook_test.py --notebook notebooks/LFM2_Inference.ipynb --gpu A100 +``` + +## Notebook directives + +Control which cells and lines are included in the test run: + +| Directive | Scope | Effect | +|---|---|---| +| `# test:skip` | Cell | Skip the entire cell | +| `# !modal_skip` | Line or cell | Skip that line (in dependency parsing) or cell (in cell filtering) | +| `# !modal_skip_rest` | Line or cell | Stop processing β€” ignore this and all subsequent lines/cells | + +These can be placed in comments within any code cell. + +## CI workflow + +The GitHub Actions workflow (`.github/workflows/run-notebooks.yaml`) automatically: +1. Discovers all `.ipynb` files in the `notebooks/` directory +2. Runs each notebook in parallel as a separate matrix job +3. Triggers on pushes/PRs that change `notebooks/**`, or manually via `workflow_dispatch` diff --git a/util/run_notebook_test.py b/util/run_notebook_test.py index 8a335ef..a778581 100644 --- a/util/run_notebook_test.py +++ b/util/run_notebook_test.py @@ -2,8 +2,9 @@ """ Run code cells from a notebook on a Modal GPU as a single combined script. -Cell 0 is treated as the dependency cell (pip packages). -Remaining non-skipped cells are concatenated into one script and executed together, +Pip install lines are extracted from ALL cells, collected as image dependencies, +and stripped from the script so they don't run at execution time. +Non-skipped cells are concatenated into one script and executed together, preserving shared state (variables, imports) across cells just like a real notebook. Usage: @@ -18,8 +19,6 @@ import time from pathlib import Path import json -import sys -from pathlib import Path import modal sys.path.insert(0, str(Path(__file__).resolve().parent / "tests")) @@ -86,6 +85,29 @@ def parse_packages_from_cell(source: str) -> tuple[list[list[str]], list[str]]: return package_groups, setup_commands +def strip_pip_lines(source: str) -> str: + """Remove pip install lines (and their continuations) from cell source. + + This is used after packages have been extracted so that pip installs + run as image-level dependencies rather than at script execution time. + """ + lines = source.splitlines() + cleaned: list[str] = [] + i = 0 + while i < len(lines): + stripped = lines[i].strip() + if re.match(r"^!?\s*(?:uv\s+)?pip\s+install\s+", stripped): + # Skip this line and any backslash-continuation lines + while stripped.endswith("\\") and i + 1 < len(lines): + i += 1 + stripped = lines[i].strip() + i += 1 + continue + cleaned.append(lines[i]) + i += 1 + return "\n".join(cleaned) + + def filter_cells(code_cells: list[dict]) -> list[dict]: """Apply !modal_skip and !modal_skip_rest directives. @@ -196,13 +218,22 @@ def main(): print("No code cells found.") sys.exit(1) - # Cell 0 = dependencies - dep_cell = cells[0] - pip_packages, dep_setup = parse_packages_from_cell(dep_cell["source"]) + # Filter out skipped / modal_skip cells + code_cells = [c for c in cells if not c["skipped"]] + code_cells = filter_cells(code_cells) + + if not code_cells: + print("No runnable code cells.") + sys.exit(1) - if "!modal_skip_rest" in dep_cell["source"]: - print(f"Notebook {notebook_path.name}: dependency cell has !modal_skip_rest β€” skipping entirely.") - sys.exit(0) + # Extract pip packages from ALL cells, then strip those lines from source + pip_packages: list[list[str]] = [] + dep_setup: list[str] = [] + for cell in code_cells: + pkgs, setup = parse_packages_from_cell(cell["source"]) + pip_packages.extend(pkgs) + dep_setup.extend(setup) + cell["source"] = strip_pip_lines(cell["source"]) # Filter out skipped packages if args.skip_packages: @@ -212,17 +243,10 @@ def main(): for group in pip_packages ] pip_packages = [g for g in pip_packages if g] # drop empty groups - code_cells = [c for c in cells[1:] if not c["skipped"]] - code_cells = filter_cells(code_cells) - - if not code_cells: - print("No runnable code cells after dependency cell.") - sys.exit(1) combined, cell_setup = combine_cells(code_cells) setup_commands = dep_setup + cell_setup - total_after_dep = len(cells) - 1 - skipped = total_after_dep - len(code_cells) + skipped = len(cells) - len(code_cells) print(f"{'=' * 50}") print(f"Notebook: {notebook_path.name}") From f6023c85b370c9d06f33bf7e60635019f87c0af0 Mon Sep 17 00:00:00 2001 From: Jonathan Buchanan Date: Tue, 17 Mar 2026 15:15:34 -0700 Subject: [PATCH 4/5] fix: ollama and llama tests after changing how deps being processed --- notebooks/LFM2_Inference_with_Ollama.ipynb | 2 +- util/run_notebook_test.py | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/notebooks/LFM2_Inference_with_Ollama.ipynb b/notebooks/LFM2_Inference_with_Ollama.ipynb index eef7913..dbf9fc8 100644 --- a/notebooks/LFM2_Inference_with_Ollama.ipynb +++ b/notebooks/LFM2_Inference_with_Ollama.ipynb @@ -25,7 +25,7 @@ "outputs": [], "source": [ "# Colab specific settings\n", - "# !modal_skip_rest\n", + "# !modal_skip\n", "!sudo apt install zstd\n", "!sudo apt update\n", "!sudo apt install -y pciutils" diff --git a/util/run_notebook_test.py b/util/run_notebook_test.py index a778581..6f59588 100644 --- a/util/run_notebook_test.py +++ b/util/run_notebook_test.py @@ -223,16 +223,16 @@ def main(): code_cells = filter_cells(code_cells) if not code_cells: - print("No runnable code cells.") - sys.exit(1) + print(f"Notebook {notebook_path.name}: no runnable code cells (all skipped) β€” passing.") + sys.exit(0) - # Extract pip packages from ALL cells, then strip those lines from source + # Extract pip packages from ALL cells, then strip those lines from source. + # Only collect packages here β€” shell commands (! lines, %%bash) in regular + # cells are handled by preprocess_cell inside combine_cells. pip_packages: list[list[str]] = [] - dep_setup: list[str] = [] for cell in code_cells: - pkgs, setup = parse_packages_from_cell(cell["source"]) + pkgs, _setup = parse_packages_from_cell(cell["source"]) pip_packages.extend(pkgs) - dep_setup.extend(setup) cell["source"] = strip_pip_lines(cell["source"]) # Filter out skipped packages @@ -244,8 +244,7 @@ def main(): ] pip_packages = [g for g in pip_packages if g] # drop empty groups - combined, cell_setup = combine_cells(code_cells) - setup_commands = dep_setup + cell_setup + combined, setup_commands = combine_cells(code_cells) skipped = len(cells) - len(code_cells) print(f"{'=' * 50}") From 1804a98c4df152444a3443b010946d100ddfe713 Mon Sep 17 00:00:00 2001 From: Jonathan Buchanan Date: Wed, 18 Mar 2026 11:27:02 -0700 Subject: [PATCH 5/5] add: training execution full coverage --- notebooks/grpo_for_verifiable_tasks.ipynb | 13 ++-------- ...360\237\222\247_LFM2_5_SFT_with_TRL.ipynb" | 6 ++--- ...\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" | 4 +-- .../\360\237\222\247_LFM2_DPO_with_TRL.ipynb" | 9 ------- util/run_notebook_test.py | 26 +++++++++++++++++++ 5 files changed, 32 insertions(+), 26 deletions(-) diff --git a/notebooks/grpo_for_verifiable_tasks.ipynb b/notebooks/grpo_for_verifiable_tasks.ipynb index 3d68b58..7fa1489 100644 --- a/notebooks/grpo_for_verifiable_tasks.ipynb +++ b/notebooks/grpo_for_verifiable_tasks.ipynb @@ -120,7 +120,7 @@ }, "outputs": [], "source": [ - "!uv pip install \"trl[peft]\" bitsandbytes trackio math_verify liger-kernel\n", + "!uv pip install \"trl[peft]\" \"trl[rewards]\" bitsandbytes trackio math_verify liger-kernel\n", "!uv pip install flash-attn" ] }, @@ -501,15 +501,6 @@ "from trl.rewards import think_format_reward, reasoning_accuracy_reward" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# !modal_skip_rest" - ] - }, { "cell_type": "markdown", "metadata": { @@ -533,7 +524,7 @@ "training_args = GRPOConfig(\n", " # Training schedule / optimization\n", " learning_rate=2e-5, # Learning rate for the optimizer\n", - " #num_train_epochs=1,\n", + " num_train_epochs=1,\n", " max_steps=30, # Number of dataset passes. For full trainings, use `num_train_epochs` instead\n", "\n", " # Parameters that control GRPO training (you can adapt them)\n", diff --git "a/notebooks/\360\237\222\247_LFM2_5_SFT_with_TRL.ipynb" "b/notebooks/\360\237\222\247_LFM2_5_SFT_with_TRL.ipynb" index d603510..63aee05 100644 --- "a/notebooks/\360\237\222\247_LFM2_5_SFT_with_TRL.ipynb" +++ "b/notebooks/\360\237\222\247_LFM2_5_SFT_with_TRL.ipynb" @@ -172,9 +172,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# !modal_skip_rest" - ] + "source": [] }, { "cell_type": "markdown", @@ -313,7 +311,7 @@ "\n", "lora_sft_config = SFTConfig(\n", " output_dir=\"./lfm2-sft-lora\",\n", - " num_train_epochs=1,\n", + " num_train_epochs=.01,\n", " per_device_train_batch_size=1,\n", " learning_rate=5e-5,\n", " lr_scheduler_type=\"linear\",\n", diff --git "a/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" "b/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" index a2e3350..7086649 100644 --- "a/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" +++ "b/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" @@ -45,7 +45,7 @@ }, "outputs": [], "source": [ - "!uv pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe datasets trl torchvision peft" + "!uv pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe datasets trl torchvision peft \"torchao>=0.4.0\"" ] }, { @@ -638,7 +638,7 @@ "metadata": {}, "outputs": [], "source": [ - "# !modal_skip_rest" + "# ignore this line" ] }, { diff --git "a/notebooks/\360\237\222\247_LFM2_DPO_with_TRL.ipynb" "b/notebooks/\360\237\222\247_LFM2_DPO_with_TRL.ipynb" index 56b8fb1..f030814 100644 --- "a/notebooks/\360\237\222\247_LFM2_DPO_with_TRL.ipynb" +++ "b/notebooks/\360\237\222\247_LFM2_DPO_with_TRL.ipynb" @@ -295,15 +295,6 @@ "print(f\"🎯 Target modules: {lora_config.target_modules}\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# !modal_skip_rest" - ] - }, { "cell_type": "markdown", "metadata": { diff --git a/util/run_notebook_test.py b/util/run_notebook_test.py index 6f59588..3775de8 100644 --- a/util/run_notebook_test.py +++ b/util/run_notebook_test.py @@ -108,6 +108,23 @@ def strip_pip_lines(source: str) -> str: return "\n".join(cleaned) +TRAINER_PATTERNS = re.compile(r"\b(?:SFTTrainer|GRPOTrainer|DPOTrainer|Trainer)\s*\(") + + +def is_training_notebook(code_cells: list[dict]) -> bool: + """Return True if any cell instantiates a HF Trainer.""" + return any(TRAINER_PATTERNS.search(c["source"]) for c in code_cells) + +# Default to lower epoch to speed up testing pipeline +def patch_training_epochs(source: str, epochs: float = 0.01) -> str: + """Replace num_train_epochs= with a minimal value for smoke testing.""" + return re.sub( + r"num_train_epochs\s*=\s*[0-9.]+", + f"num_train_epochs={epochs}", + source, + ) + + def filter_cells(code_cells: list[dict]) -> list[dict]: """Apply !modal_skip and !modal_skip_rest directives. @@ -247,8 +264,17 @@ def main(): combined, setup_commands = combine_cells(code_cells) skipped = len(cells) - len(code_cells) + # Auto-detect training notebooks: upgrade GPU and patch epochs for smoke testing + is_training = is_training_notebook(code_cells) + if is_training: + if args.gpu == "A10G": # only override if user didn't specify + args.gpu = "H100" + combined = patch_training_epochs(combined, epochs=0.01) + print(f"{'=' * 50}") print(f"Notebook: {notebook_path.name}") + if is_training: + print(f"Type: training (auto: GPUβ†’{args.gpu}, epochsβ†’0.01)") all_packages = [pkg for group in pip_packages for pkg in group] print(f"Packages: {all_packages or '(none)'} ({len(pip_packages)} install group(s))") if setup_commands: