feat: add ready-to-run notebooks for Kaggle and Colab

walidsobhie-code · walidsobhie-code · commit fb15bd8782dc · 2026-04-10T02:54:40.000+02:00
- Colab 128K context fine-tuning notebook
- Kaggle 128K context fine-tuning notebook
- Colab HumanEval benchmark notebook
diff --git a/notebooks/colab_128k_training.ipynb b/notebooks/colab_128k_training.ipynb
@@ -0,0 +1,138 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 🎯 Stack 2.9 — 128K Context Fine-tuning\n",
+    "Fine-tune Qwen2.5-Coder-1.5B from 32K → 128K context\n",
+    "\n",
+    "**Runtime:** GPU (T4 16GB recommended) | **Time:** ~2-3 hours"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 1: Clone Stack 2.9 & Install Dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!git clone https://github.com/my-ai-stack/stack-2.9.git\n",
+    "cd stack-2.9\n",
+    "!pip install -q transformers peft datasets bitsandbytes accelerate huggingface_hub\n",
+    "!pip install -q scipy torch --upgrade"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 2: Login to HuggingFace (push weights later)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from huggingface_hub import login\n",
+    "# Get your token at: https://huggingface.co/settings/tokens\n",
+    "login(token=\"YOUR_HF_TOKEN\")  # ← Replace with your token"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 3: Mount Google Drive (optional — for saving checkpoints)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from google.colab import drive\n",
+    "drive.mount('/content/drive')\n",
+    "OUTPUT_DIR = \"/content/drive/MyDrive/stack-2.9-128k-output\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 4: Run 128K Context Fine-tuning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import subprocess\n",
+    "result = subprocess.run([\n",
+    "    \"python3\", \"training/train_extended_context.py\",\n",
+    "    \"--model-path\", \"my-ai-stack/Stack-2-9-finetuned\",\n",
+    "    \"--data-path\", \"training/training-data/tool_examples_combined.jsonl\",\n",
+    "    \"--output-dir\", OUTPUT_DIR,\n",
+    "    \"--context-length\", \"131072\",\n",
+    "    \"--lora-rank\", \"64\",\n",
+    "    \"--epochs\", \"3\",\n",
+    "    \"--push-to-hub\",\n",
+    "    \"--hub-model-id\", \"YOUR_USERNAME/stack-2.9-128k\"\n",
+    "], cwd=\"/content/stack-2.9\")\n",
+    "print(result.stdout)\n",
+    "print(result.stderr)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "## Alternative: Run on Base Qwen Model (if HF model not loaded)\n",
+    "\n",
+    "If the fine-tuned model isn't available, use the base model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Change --model-path to:\n",
+    "# \"Qwen/Qwen2.5-Coder-1.5B\"\n",
+    "# And add --push-to-hub with your own model ID"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "provenance": [],
+   "machine_shape": "hm"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/colab_humaneval.ipynb b/notebooks/colab_humaneval.ipynb
@@ -0,0 +1,104 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 📊 Stack 2.9 — HumanEval Benchmark\n",
+    "Run full HumanEval (164 problems) pass@k evaluation\n",
+    "\n",
+    "**Runtime:** GPU (T4 16GB) | **Time:** ~30-60 min"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 1: Clone & Install"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!git clone https://github.com/my-ai-stack/stack-2.9.git\n",
+    "cd stack-2.9\n",
+    "!pip install -q transformers peft datasets human-eval accelerate"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 2: Run HumanEval Benchmark"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "MODEL_PATH = \"my-ai-stack/Stack-2-9-finetuned\"  # or your 128K fine-tuned model ID\n",
+    "\n",
+    "import subprocess\n",
+    "result = subprocess.run([\n",
+    "    \"python3\", \"training/evaluate_model.py\",\n",
+    "    \"--model-path\", MODEL_PATH,\n",
+    "    \"--benchmark\", \"humaneval\",\n",
+    "    \"--num-samples\", \"10\",\n",
+    "    \"--max-new-tokens\", \"256\",\n",
+    "    \"--output\", \"/tmp/humaneval_results.json\"\n",
+    "], cwd=\"/content/stack-2.9\", capture_output=True, text=True)\n",
+    "\n",
+    "print(result.stdout[-5000:] if result.stdout else \"No output\")\n",
+    "print(\"STDERR:\", result.stderr[-1000:] if result.stderr else \"None\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 3: Parse Results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "with open(\"/tmp/humaneval_results.json\") as f:\n",
+    "    results = json.load(f)\n",
+    "\n",
+    "print(\"=== HumanEval Results ===\")\n",
+    "for k, v in results.items():\n",
+    "    if isinstance(v, float):\n",
+    "        print(f\"  pass@{k}: {v:.1%}\")\n",
+    "    else:\n",
+    "        print(f\"  {k}: {v}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "provenance": [],
+   "machine_shape": "hm"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/kaggle_128k_training.ipynb b/notebooks/kaggle_128k_training.ipynb
@@ -0,0 +1,130 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 🎯 Stack 2.9 — 128K Context Fine-tuning\n",
+    "Fine-tune Qwen2.5-Coder-1.5B from 32K → 128K context\n",
+    "\n",
+    "**Runtime:** GPU (P100 16GB) | **Time:** ~2-3 hours\n",
+    "\n",
+    "![Kaggle](https://img.shields.io/badge/Kaggle-P100-blue) ![Context](https://img.shields.io/badge/Context-128K-green)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 1: Clone Repo & Install"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!git clone https://github.com/my-ai-stack/stack-2.9.git\n",
+    "cd stack-2.9\n",
+    "!pip install -q transformers peft datasets bitsandbytes accelerate huggingface_hub\n",
+    "!pip install -q scipy torch --upgrade"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 2: HuggingFace Login"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from huggingface_hub import login\n",
+    "import os\n",
+    "# Add your HF token to Kaggle Secrets: https://www.kaggle.com/docs/secrets\n",
+    "os.environ[\"HF_TOKEN\"] = \"YOUR_HF_TOKEN\"\n",
+    "login(token=os.environ[\"HF_TOKEN\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 3: Run Fine-tuning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import subprocess\n",
+    "\n",
+    "result = subprocess.run([\n",
+    "    \"python3\", \"training/train_extended_context.py\",\n",
+    "    \"--model-path\", \"Qwen/Qwen2.5-Coder-1.5B\",\n",
+    "    \"--data-path\", \"training/training-data/tool_examples_combined.jsonl\",\n",
+    "    \"--output-dir\", \"/kaggle/working/stack-2.9-128k\",\n",
+    "    \"--context-length\", \"131072\",\n",
+    "    \"--lora-rank\", \"64\",\n",
+    "    \"--epochs\", \"3\",\n",
+    "    \"--push-to-hub\",\n",
+    "    \"--hub-model-id\", \"YOUR_USERNAME/stack-2.9-128k\"\n",
+    "], cwd=\"/kaggle/working/stack-2.9\", env=dict(os.environ, HF_TOKEN=os.environ[\"HF_TOKEN\"]))\n",
+    "\n",
+    "print(result.stdout[-3000:] if result.stdout else \"No stdout\")\n",
+    "print(result.stderr[-1000:] if result.stderr else \"No stderr\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 4: Download Checkpoints"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# After training, download the merged model\n",
+    "!ls -la /kaggle/working/stack-2.9-128k/merged/\n",
+    "# The merged/ folder contains the full 128K model ready to use"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "kaggle": {
+   "accelerator": "GPU",
+   "dataSources": [],
+   "dockerImageVersion": "gpu",
+   "gpuRequirements": {
+    "top": "p100"
+   },
+   "kernelImage": {
+    "id": "docker",
+    "name": "docker"
+   }
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}