fix: shell cd syntax, proper pip install, os.chdir for working dir

walidsobhie-code · walidsobhie-code · commit 5090864d7712 · 2026-04-10T15:17:03.000+02:00
diff --git a/notebooks/colab_128k_training.ipynb b/notebooks/colab_128k_training.ipynb
@@ -1,170 +1,158 @@
 {
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 🎯 Stack 2.9 — 128K Context Fine-tuning\n",
-    "\n",
-    "Fine-tune **Qwen2.5-Coder-1.5B** with **packed 128K context windows**.\n",
-    "\n",
-    "**Key innovation:** Instead of training on short ~500-token examples, we **pack 200+ examples** into each 128K window. This multiplies training signal and teaches the model to track tool state across long, multi-turn interactions.\n",
-    "\n",
-    "**Runtime:** Runtime → Change runtime type → **GPU (T4 16GB recommended)**\n",
-    "**Time:** ~6-8 hours on free Colab T4"
-   ]
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# \ud83c\udfaf Stack 2.9 \u2014 128K Context Fine-tuning\n",
+        "\n",
+        "Fine-tune **Qwen2.5-Coder-1.5B** with **packed 128K context windows**.\n",
+        "1500 examples \u2192 packed into long 128K sequences for massive training signal.\n",
+        "\n",
+        "**Runtime:** Runtime \u2192 Change runtime type \u2192 **GPU (T4 16GB)**\n",
+        "**Time:** ~6-8 hours on free Colab T4"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Step 1: Clone Stack 2.9 & Install Dependencies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Clone and install in ONE cell \u2014 use ! for shell commands\n",
+        "!git clone https://github.com/my-ai-stack/stack-2.9.git\n",
+        "!pip install -q transformers peft datasets bitsandbytes>=0.46.1 accelerate huggingface_hub scipy\n",
+        "!pip install -q torch --upgrade\n",
+        "import os\n",
+        "os.chdir('/content/stack-2.9')  # Change to repo dir for subsequent cells\n",
+        "print('\u2705 Cloned & installed. Working dir:', os.getcwd())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Step 2: Login to HuggingFace"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from huggingface_hub import login\n",
+        "# \ud83d\udc47 Put YOUR HF token here\n",
+        "login(token=\"YOUR_HF_TOKEN_HERE\")\n",
+        "print('\u2705 Logged into HuggingFace')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Step 3: Mount Google Drive"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')\n",
+        "OUTPUT_DIR = '/content/drive/MyDrive/stack-2.9-128k-output'\n",
+        "os.makedirs(OUTPUT_DIR, exist_ok=True)\n",
+        "print(f'\ud83d\udcc1 Output: {OUTPUT_DIR}')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Step 4: Download Training Data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import huggingface_hub, shutil, json\n",
+        "\n",
+        "print('Downloading from HuggingFace...')\n",
+        "path = huggingface_hub.hf_hub_download(\n",
+        "    repo_id='walidsobhie/stack-2-9-tool-examples',\n",
+        "    filename='tool_examples_combined.jsonl',\n",
+        "    repo_type='dataset',\n",
+        "    local_dir='/content/',\n",
+        ")\n",
+        "shutil.move(path, '/content/tool_examples.jsonl')\n",
+        "\n",
+        "with open('/content/tool_examples.jsonl') as f:\n",
+        "    lines = f.readlines()\n",
+        "print(f'\u2705 {len(lines)} examples ready')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Step 5: Train! \ud83c\udfaf"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import subprocess\n",
+        "\n",
+        "result = subprocess.run([\n",
+        "    \"python3\", \"training/train_extended_context.py\",\n",
+        "    \"--model-path\", \"Qwen/Qwen2.5-Coder-1.5B\",\n",
+        "    \"--data-path\", \"/content/tool_examples.jsonl\",\n",
+        "    \"--output-dir\", OUTPUT_DIR,\n",
+        "    \"--context-length\", \"131072\",\n",
+        "    \"--lora-rank\", \"32\",\n",
+        "    \"--epochs\", \"3\",\n",
+        "    \"--batch-size\", \"1\",\n",
+        "    \"--grad-accum\", \"16\",\n",
+        "    \"--lr\", \"2e-4\",\n",
+        "    \"--use-packing\",\n",
+        "    \"--push-to-hub\",\n",
+        "    \"--hub-model-id\", \"walidsobhie/stack-2.9-128k-context\"\n",
+        "], cwd=\"/content/stack-2.9\")\n",
+        "\n",
+        "print('STDOUT:\\n', result.stdout[-2000:] if result.stdout else '')\n",
+        "print('STDERR:\\n', result.stderr[-2000:] if result.stderr else '')"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "name": "stack-2.9-128k-packed-training"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.10.0"
+    }
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": ["## Step 1: Clone Stack 2.9 & Install Dependencies"]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Clone the repo (gets the fixed training script)\n",
-    "!git clone https://github.com/my-ai-stack/stack-2.9.git\n",
-    "cd stack-2.9\n",
-    "\n",
-    "# Install all dependencies\n",
-    "!pip install -q transformers peft datasets bitsandbytes>=0.46.1 accelerate huggingface_hub scipy\n",
-    "!pip install -q torch --upgrade\n",
-    "\n",
-    "print('✅ Dependencies installed')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": ["## Step 2: Login to HuggingFace\n\nGet your token at: https://huggingface.co/settings/tokens"]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from huggingface_hub import login\n",
-    "# 👇 Replace with YOUR HuggingFace token\n",
-    "login(token=\"YOUR_HF_TOKEN_HERE\")  # ← 🔴 PUT YOUR HF TOKEN HERE\n",
-    "print('✅ Logged into HuggingFace')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 3: Mount Google Drive\n\nTraining checkpoints and the final adapter will be saved here."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from google.colab import drive\n",
-    "drive.mount('/content/drive')\n",
-    "OUTPUT_DIR = '/content/drive/MyDrive/stack-2.9-128k-output'\n",
-    "import os; os.makedirs(OUTPUT_DIR, exist_ok=True)\n",
-    "print(f'📁 Output directory: {OUTPUT_DIR}')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 4: Download Training Data\n\nWe use the dataset uploaded to HuggingFace Hub — 1500 tool-calling examples, packed into 128K sequences."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import huggingface_hub\n",
-    "\n",
-    "DATA_FILE = '/content/tool_examples.jsonl'\n",
-    "\n",
-    "print('Downloading training data from HuggingFace...')\n",
-    "hf_id = 'walidsobhie/stack-2-9-tool-examples'\n",
-    "path = huggingface_hub.hf_hub_download(\n",
-    "    repo_id=hf_id,\n",
-    "    filename='tool_examples_combined.jsonl',\n",
-    "    repo_type='dataset',\n",
-    "    local_dir='/content/',\n",
-    "    local_dir_use_symlinks=False,\n",
-    ")\n",
-    "import shutil\n",
-    "shutil.move(path, DATA_FILE)\n",
-    "print(f'✅ Dataset ready: {DATA_FILE}')\n",
-    "\n",
-    "# Quick sanity check\n",
-    "import json\n",
-    "with open(DATA_FILE) as f:\n",
-    "    lines = f.readlines()\n",
-    "print(f'   Total examples: {len(lines)}')\n",
-    "ex = json.loads(lines[0])\n",
-    "print(f'   Keys: {list(ex.keys())}')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 5: Run 128K Packed Context Fine-tuning\n\n**This cell runs the full training. On free Colab T4 it takes ~6-8 hours.**\n",
-    "\n",
-    "If Colab disconnects, your checkpoints are safe in Google Drive. Reconnect and re-run this cell — it will resume from the last checkpoint."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import subprocess\n",
-    "\n",
-    "# Run the fixed training script with packing enabled\n",
-    "result = subprocess.run([\n",
-    "    \"python3\", \"training/train_extended_context.py\",\n",
-    "    \"--model-path\", \"Qwen/Qwen2.5-Coder-1.5B\",\n",
-    "    \"--data-path\", \"/content/tool_examples.jsonl\",\n",
-    "    \"--output-dir\", OUTPUT_DIR,\n",
-    "    \"--context-length\", \"131072\",\n",
-    "    \"--lora-rank\", \"32\",\n",
-    "    \"--epochs\", \"3\",\n",
-    "    \"--batch-size\", \"1\",\n",
-    "    \"--grad-accum\", \"16\",\n",
-    "    \"--lr\", \"2e-4\",\n",
-    "    \"--use-packing\",\n",
-    "    \"--push-to-hub\",\n",
-    "    \"--hub-model-id\", \"walidsobhie/stack-2.9-128k-context\"\n",
-    "], cwd=\"/content/stack-2.9\")\n",
-    "\n",
-    "print('STDOUT:', result.stdout)\n",
-    "print('STDERR:', result.stderr[-3000:] if result.stderr else '(none)')"
-   ]
-  }
- ],
- "metadata": {
-  "colab": {
-   "provenance": [],
-   "name": "stack-2.9-128k-packed-training"
-  },
-  "kernelspec": {
-   "name": "python3",
-   "display_name": "Python 3"
-  },
-  "language_info": {
-   "name": "python",
-   "version": "3.10.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
+  "nbformat": 4,
+  "nbformat_minor": 4
+}