\n",
@@ -733,13 +740,17 @@
" \n",
" \n",
"
"
+ ],
+ "text/plain": [
+ ""
]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"π SFT training completed!\n",
"πΎ Saving to: lfm2-vl-med\n"
@@ -798,7 +809,6 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "QHB1ACVMzFZ5",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 84,
@@ -816,33 +826,34 @@
"ba5474abed8646f0997c998eb60c6584"
]
},
+ "id": "QHB1ACVMzFZ5",
"outputId": "6a91c28c-eac7-4ed9-ba89-d9ffb78595d6"
},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"π Merging LoRA weights...\n"
]
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "Writing model shards: 0%| | 0/1 [00:00, ?it/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "50ea07e2ec724376b68733e61f154736",
"version_major": 2,
- "version_minor": 0,
- "model_id": "50ea07e2ec724376b68733e61f154736"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Writing model shards: 0%| | 0/1 [00:00, ?it/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"πΎ Model saved to: ./lfm2-vl-med\n"
]
@@ -862,8 +873,8 @@
"accelerator": "GPU",
"colab": {
"gpuType": "A100",
- "provenance": [],
- "machine_shape": "hm"
+ "machine_shape": "hm",
+ "provenance": []
},
"kernelspec": {
"display_name": "Python 3",
@@ -874,98 +885,25 @@
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
- "50ea07e2ec724376b68733e61f154736": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_667564fe3c804219a40daf05c01b7c8a",
- "IPY_MODEL_9a7b2c8b1e8343b0bf9bdf0797e8085e",
- "IPY_MODEL_9111e89dca0f4b7fbbd866f0be43cc89"
- ],
- "layout": "IPY_MODEL_7577eb2740334e14be5eb13367657d0b"
- }
- },
- "667564fe3c804219a40daf05c01b7c8a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_219db81514d347d39a9ade11620db5b6",
- "placeholder": "β",
- "style": "IPY_MODEL_8ed57dced20e4e83a3424b2212777aed",
- "value": "Writingβmodelβshards:β100%"
- }
- },
- "9a7b2c8b1e8343b0bf9bdf0797e8085e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_034be1ed88444496af62403b4f0bdab3",
- "max": 1,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_f85783c3f9094aecb4c7d4eea369e841",
- "value": 1
- }
- },
- "9111e89dca0f4b7fbbd866f0be43cc89": {
+ "02a8521e92144c7b8b01ddabf77047d1": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_d2918b5864b342e3adca46b781d944d7",
- "placeholder": "β",
- "style": "IPY_MODEL_ba5474abed8646f0997c998eb60c6584",
- "value": "β1/1β[00:06<00:00,ββ6.11s/it]"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
}
},
- "7577eb2740334e14be5eb13367657d0b": {
+ "034be1ed88444496af62403b4f0bdab3": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1014,10 +952,86 @@
"width": null
}
},
- "219db81514d347d39a9ade11620db5b6": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
+ "03bf89932cce4b9ab5c0d0110d7c23b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "06b1c189384544a2b48094e2fe8e4145": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "06e06ed849004eedb3d40022bdf2639c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_87ca309f76ce4495b6bd4007f040caa6",
+ "placeholder": "β",
+ "style": "IPY_MODEL_336df5b38a024a1387cfbf1294559a06",
+ "value": "β828/828β[00:00<00:00,β108kB/s]"
+ }
+ },
+ "0b1634525cb74f71b1ff1929bb040df7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_78fe7f45f5ef45ff89a8927945d783a6",
+ "max": 3193334216,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_f042d469768e4e71b3a9b2f0ce412e19",
+ "value": 3193334216
+ }
+ },
+ "0c51ed97d58c409fa4495ac7a9fc7d2a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1066,25 +1080,136 @@
"width": null
}
},
- "8ed57dced20e4e83a3424b2212777aed": {
+ "0e17ab61aa0b44c4a12807b34698c3db": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
+ "bar_color": null,
"description_width": ""
}
},
- "034be1ed88444496af62403b4f0bdab3": {
+ "0f37cfe8625f4ded90e23278965004b2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_178764f0a7084f8c93ec0e528f1e682d",
+ "placeholder": "β",
+ "style": "IPY_MODEL_25f786c2efc74fc7a930f916903dadfd",
+ "value": "β4.73M/?β[00:00<00:00,β22.7MB/s]"
+ }
+ },
+ "116426cbc3e048cb8a5e82560cdd54e0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "173ff5e74e8f4e50899f8528cb092c77": {
"model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
"model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "1772514a2b6a4b25ae19bc7836bbc5f6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_173ff5e74e8f4e50899f8528cb092c77",
+ "placeholder": "β",
+ "style": "IPY_MODEL_475e285df2a242588c35b56acf5e2281",
+ "value": "β136/136β[00:00<00:00,β18.2kB/s]"
+ }
+ },
+ "178764f0a7084f8c93ec0e528f1e682d": {
+ "model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1133,10 +1258,25 @@
"width": null
}
},
- "f85783c3f9094aecb4c7d4eea369e841": {
+ "19a5ca0e1a6448e9a131add20d4995d2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "20fb5a2767a24d49bcbb40c17ee1b480": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -1149,10 +1289,10 @@
"description_width": ""
}
},
- "d2918b5864b342e3adca46b781d944d7": {
+ "219db81514d347d39a9ade11620db5b6": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1201,10 +1341,10 @@
"width": null
}
},
- "ba5474abed8646f0997c998eb60c6584": {
+ "25f786c2efc74fc7a930f916903dadfd": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -1216,32 +1356,25 @@
"description_width": ""
}
},
- "842c3c9b10554231bdc9014a8355896f": {
+ "273a94eb6a034e10a9060cbe12e1b6ff": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_40b02c35acb74d8a8f3f97bd332a0199",
- "IPY_MODEL_2d6dbe8d81a9483fb5c8abadd22a1bab",
- "IPY_MODEL_06e06ed849004eedb3d40022bdf2639c"
- ],
- "layout": "IPY_MODEL_5c91591cc3b2450c963e97980b8c15bd"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
}
},
- "40b02c35acb74d8a8f3f97bd332a0199": {
+ "2c4e3d7592c84b109fd2481b93fe3823": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -1253,16 +1386,16 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_72f5d3ca07814e8282cb036fcc48ac38",
+ "layout": "IPY_MODEL_5164cce340634f108332e1fb1b7dcd66",
"placeholder": "β",
- "style": "IPY_MODEL_b834ae54dc984f78a366c42b6cdecfdd",
- "value": "processor_config.json:β100%"
+ "style": "IPY_MODEL_02a8521e92144c7b8b01ddabf77047d1",
+ "value": "Loadingβweights:β100%"
}
},
"2d6dbe8d81a9483fb5c8abadd22a1bab": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -1283,31 +1416,47 @@
"value": 828
}
},
- "06e06ed849004eedb3d40022bdf2639c": {
+ "336df5b38a024a1387cfbf1294559a06": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "36ea958e73cf4abc903c2be3143849f8": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_87ca309f76ce4495b6bd4007f040caa6",
- "placeholder": "β",
- "style": "IPY_MODEL_336df5b38a024a1387cfbf1294559a06",
- "value": "β828/828β[00:00<00:00,β108kB/s]"
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b6b60344cd624c27b09a47a7f09cb3e2",
+ "IPY_MODEL_a5a9d94bbe9c42459383b88f33bb135f",
+ "IPY_MODEL_8289401ce6964124befa97804398aaf9"
+ ],
+ "layout": "IPY_MODEL_99693f4890934fbf8e0bb655136163b6"
}
},
- "5c91591cc3b2450c963e97980b8c15bd": {
+ "3c4700b10c7c433f9d817df80f371341": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1356,77 +1505,56 @@
"width": null
}
},
- "72f5d3ca07814e8282cb036fcc48ac38": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
+ "3d20038999654f41910c5e097e873ca6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_8a927e60416047e5947a6c59d96312a2",
+ "IPY_MODEL_83f466f8a1ba4896a38fd15f335526ac",
+ "IPY_MODEL_42718666a8a141078b8a262bd31121c8"
+ ],
+ "layout": "IPY_MODEL_7259c4ef5abb44d4b6911b6cfdb023e7"
}
},
- "b834ae54dc984f78a366c42b6cdecfdd": {
+ "3df47eda247c4334bd1d2fc2d69e0a0b": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "FloatProgressModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a2789cabde4643dcb440375ed88e7aed",
+ "max": 589,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_7563318658d14ed8a49be73d57af990e",
+ "value": 589
}
},
- "da1d76234b2d4a30a7f72355d6b5b351": {
+ "3e30c4b0fbe84d8596a189f7d9c4016d": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1475,26 +1603,10 @@
"width": null
}
},
- "d32837fabb13404db97963fc187fe5b6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "87ca309f76ce4495b6bd4007f040caa6": {
+ "3f24e66c05f54298b1ade01f50c6780c": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1526,109 +1638,49 @@
"justify_items": null,
"left": null,
"margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "336df5b38a024a1387cfbf1294559a06": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "b373bfda9e814a97b05a4348af5bb304": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_79500c893a164c86ae35704a42555c5b",
- "IPY_MODEL_fdee5526846d45d096bca9a5532829a6",
- "IPY_MODEL_4b7995b1e3e2480ab7a23a022a53a9e9"
- ],
- "layout": "IPY_MODEL_631d85f42f7741f3a5929d5fd777813e"
- }
- },
- "79500c893a164c86ae35704a42555c5b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_9e8b05ac39124c6481c2728fb90e243e",
- "placeholder": "β",
- "style": "IPY_MODEL_03bf89932cce4b9ab5c0d0110d7c23b6",
- "value": "chat_template.jinja:β"
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": "20px"
}
},
- "fdee5526846d45d096bca9a5532829a6": {
+ "3f2f0594893846beaa27d524cdb43084": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
+ "_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_d7f6a24450054d1baeb50111eb4e3264",
- "max": 1,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_0e17ab61aa0b44c4a12807b34698c3db",
- "value": 1
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_74c62184daff4b3caeafe913e9314f2b",
+ "IPY_MODEL_fd9e5bd6e1204e7bb45591979a5c980e",
+ "IPY_MODEL_1772514a2b6a4b25ae19bc7836bbc5f6"
+ ],
+ "layout": "IPY_MODEL_bd40f03f61ea487c8ac299e0faac9c10"
}
},
- "4b7995b1e3e2480ab7a23a022a53a9e9": {
+ "40b02c35acb74d8a8f3f97bd332a0199": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -1640,16 +1692,16 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_4ae037c83cb9423d86c2f3f3550531dc",
+ "layout": "IPY_MODEL_72f5d3ca07814e8282cb036fcc48ac38",
"placeholder": "β",
- "style": "IPY_MODEL_71d93d3a640548bb8fbdcfee9c5a596b",
- "value": "β2.22k/?β[00:00<00:00,β236kB/s]"
+ "style": "IPY_MODEL_b834ae54dc984f78a366c42b6cdecfdd",
+ "value": "processor_config.json:β100%"
}
},
- "631d85f42f7741f3a5929d5fd777813e": {
+ "41245ab726a14001a5ad646ae6488576": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1698,10 +1750,46 @@
"width": null
}
},
- "9e8b05ac39124c6481c2728fb90e243e": {
+ "42718666a8a141078b8a262bd31121c8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9a402bfdaf9f4e229e9d5bce94104abe",
+ "placeholder": "β",
+ "style": "IPY_MODEL_19a5ca0e1a6448e9a131add20d4995d2",
+ "value": "β843/843β[00:00<00:00,β98.5kB/s]"
+ }
+ },
+ "475e285df2a242588c35b56acf5e2281": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "4ae037c83cb9423d86c2f3f3550531dc": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1750,25 +1838,53 @@
"width": null
}
},
- "03bf89932cce4b9ab5c0d0110d7c23b6": {
+ "4b7995b1e3e2480ab7a23a022a53a9e9": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ae037c83cb9423d86c2f3f3550531dc",
+ "placeholder": "β",
+ "style": "IPY_MODEL_71d93d3a640548bb8fbdcfee9c5a596b",
+ "value": "β2.22k/?β[00:00<00:00,β236kB/s]"
}
},
- "d7f6a24450054d1baeb50111eb4e3264": {
+ "50ea07e2ec724376b68733e61f154736": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_667564fe3c804219a40daf05c01b7c8a",
+ "IPY_MODEL_9a7b2c8b1e8343b0bf9bdf0797e8085e",
+ "IPY_MODEL_9111e89dca0f4b7fbbd866f0be43cc89"
+ ],
+ "layout": "IPY_MODEL_7577eb2740334e14be5eb13367657d0b"
+ }
+ },
+ "5164cce340634f108332e1fb1b7dcd66": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1814,13 +1930,13 @@
"right": null,
"top": null,
"visibility": null,
- "width": "20px"
+ "width": null
}
},
- "0e17ab61aa0b44c4a12807b34698c3db": {
+ "58ff44c83d254a31bb3c7e47452440d2": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -1833,10 +1949,10 @@
"description_width": ""
}
},
- "4ae037c83cb9423d86c2f3f3550531dc": {
+ "5c91591cc3b2450c963e97980b8c15bd": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1885,92 +2001,10 @@
"width": null
}
},
- "71d93d3a640548bb8fbdcfee9c5a596b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "3d20038999654f41910c5e097e873ca6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_8a927e60416047e5947a6c59d96312a2",
- "IPY_MODEL_83f466f8a1ba4896a38fd15f335526ac",
- "IPY_MODEL_42718666a8a141078b8a262bd31121c8"
- ],
- "layout": "IPY_MODEL_7259c4ef5abb44d4b6911b6cfdb023e7"
- }
- },
- "8a927e60416047e5947a6c59d96312a2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_640b950749a14c05b7d458b25012734d",
- "placeholder": "β",
- "style": "IPY_MODEL_fcf8fb45e50240ab898c8043f433d258",
- "value": "tokenizer_config.json:β100%"
- }
- },
- "83f466f8a1ba4896a38fd15f335526ac": {
+ "5faad210e20840ce88b8c54a429e5ae7": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_f7ce8770e82743b2959f1848c19b0341",
- "max": 843,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_58ff44c83d254a31bb3c7e47452440d2",
- "value": 843
- }
- },
- "42718666a8a141078b8a262bd31121c8": {
- "model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
- "model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -1982,16 +2016,16 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_9a402bfdaf9f4e229e9d5bce94104abe",
+ "layout": "IPY_MODEL_41245ab726a14001a5ad646ae6488576",
"placeholder": "β",
- "style": "IPY_MODEL_19a5ca0e1a6448e9a131add20d4995d2",
- "value": "β843/843β[00:00<00:00,β98.5kB/s]"
+ "style": "IPY_MODEL_bddbfb8a7db64434ab25a345176a1d67",
+ "value": "model.safetensors:β100%"
}
},
- "7259c4ef5abb44d4b6911b6cfdb023e7": {
+ "631d85f42f7741f3a5929d5fd777813e": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2042,8 +2076,8 @@
},
"640b950749a14c05b7d458b25012734d": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2092,10 +2126,31 @@
"width": null
}
},
- "fcf8fb45e50240ab898c8043f433d258": {
+ "667564fe3c804219a40daf05c01b7c8a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_219db81514d347d39a9ade11620db5b6",
+ "placeholder": "β",
+ "style": "IPY_MODEL_8ed57dced20e4e83a3424b2212777aed",
+ "value": "Writingβmodelβshards:β100%"
+ }
+ },
+ "71d93d3a640548bb8fbdcfee9c5a596b": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -2107,10 +2162,10 @@
"description_width": ""
}
},
- "f7ce8770e82743b2959f1848c19b0341": {
+ "7259c4ef5abb44d4b6911b6cfdb023e7": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2159,26 +2214,10 @@
"width": null
}
},
- "58ff44c83d254a31bb3c7e47452440d2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "9a402bfdaf9f4e229e9d5bce94104abe": {
+ "72f5d3ca07814e8282cb036fcc48ac38": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2227,10 +2266,10 @@
"width": null
}
},
- "19a5ca0e1a6448e9a131add20d4995d2": {
+ "73cbe76f421e4b01b66313a231b2381e": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -2242,32 +2281,10 @@
"description_width": ""
}
},
- "c2c6cab229be45df9a58bfaa1bcf5054": {
+ "74c62184daff4b3caeafe913e9314f2b": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_b6f155daf1944fd2be59b35cb8adb098",
- "IPY_MODEL_fabab603710d42708d15cba390d279ad",
- "IPY_MODEL_0f37cfe8625f4ded90e23278965004b2"
- ],
- "layout": "IPY_MODEL_3e30c4b0fbe84d8596a189f7d9c4016d"
- }
- },
- "b6f155daf1944fd2be59b35cb8adb098": {
- "model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
- "model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -2279,61 +2296,32 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_ce8dad90d69648038ba92e3eb4244792",
+ "layout": "IPY_MODEL_ae1ca3ff0ce840f286a2b2729ddf08e0",
"placeholder": "β",
- "style": "IPY_MODEL_93bf5fe2c57e4cb390562a031d56d03a",
- "value": "tokenizer.json:β"
- }
- },
- "fabab603710d42708d15cba390d279ad": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_3f24e66c05f54298b1ade01f50c6780c",
- "max": 1,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_20fb5a2767a24d49bcbb40c17ee1b480",
- "value": 1
+ "style": "IPY_MODEL_83f5d32415c34234aa30b5b3ddb83efc",
+ "value": "generation_config.json:β100%"
}
},
- "0f37cfe8625f4ded90e23278965004b2": {
+ "7563318658d14ed8a49be73d57af990e": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_178764f0a7084f8c93ec0e528f1e682d",
- "placeholder": "β",
- "style": "IPY_MODEL_25f786c2efc74fc7a930f916903dadfd",
- "value": "β4.73M/?β[00:00<00:00,β22.7MB/s]"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
}
},
- "3e30c4b0fbe84d8596a189f7d9c4016d": {
+ "7577eb2740334e14be5eb13367657d0b": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2382,10 +2370,10 @@
"width": null
}
},
- "ce8dad90d69648038ba92e3eb4244792": {
+ "7637edb369574df8b9920088160d1640": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2434,25 +2422,10 @@
"width": null
}
},
- "93bf5fe2c57e4cb390562a031d56d03a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "3f24e66c05f54298b1ade01f50c6780c": {
+ "772dcadbd43649b2946020d07335e354": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2498,29 +2471,34 @@
"right": null,
"top": null,
"visibility": null,
- "width": "20px"
+ "width": null
}
},
- "20fb5a2767a24d49bcbb40c17ee1b480": {
+ "778f6ecad3bf4478b2c488770a57b31b": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3c4700b10c7c433f9d817df80f371341",
+ "placeholder": "β",
+ "style": "IPY_MODEL_9c6c1b4ae5084dcaa62b757bb2ec2851",
+ "value": "β3.19G/3.19Gβ[00:05<00:00,β799MB/s]"
}
},
- "178764f0a7084f8c93ec0e528f1e682d": {
+ "78fe7f45f5ef45ff89a8927945d783a6": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2569,92 +2547,10 @@
"width": null
}
},
- "25f786c2efc74fc7a930f916903dadfd": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "36ea958e73cf4abc903c2be3143849f8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_b6b60344cd624c27b09a47a7f09cb3e2",
- "IPY_MODEL_a5a9d94bbe9c42459383b88f33bb135f",
- "IPY_MODEL_8289401ce6964124befa97804398aaf9"
- ],
- "layout": "IPY_MODEL_99693f4890934fbf8e0bb655136163b6"
- }
- },
- "b6b60344cd624c27b09a47a7f09cb3e2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_772dcadbd43649b2946020d07335e354",
- "placeholder": "β",
- "style": "IPY_MODEL_73cbe76f421e4b01b66313a231b2381e",
- "value": "config.json:β"
- }
- },
- "a5a9d94bbe9c42459383b88f33bb135f": {
+ "79500c893a164c86ae35704a42555c5b": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_8e426fea7503472cbdf9001d84aaedbc",
- "max": 1,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_116426cbc3e048cb8a5e82560cdd54e0",
- "value": 1
- }
- },
- "8289401ce6964124befa97804398aaf9": {
- "model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
- "model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -2666,16 +2562,16 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_7dcd5d13a3b144e9bda5e8084c76426c",
+ "layout": "IPY_MODEL_9e8b05ac39124c6481c2728fb90e243e",
"placeholder": "β",
- "style": "IPY_MODEL_273a94eb6a034e10a9060cbe12e1b6ff",
- "value": "β2.38k/?β[00:00<00:00,β248kB/s]"
+ "style": "IPY_MODEL_03bf89932cce4b9ab5c0d0110d7c23b6",
+ "value": "chat_template.jinja:β"
}
},
- "99693f4890934fbf8e0bb655136163b6": {
+ "7dcd5d13a3b144e9bda5e8084c76426c": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2724,10 +2620,92 @@
"width": null
}
},
- "772dcadbd43649b2946020d07335e354": {
+ "8289401ce6964124befa97804398aaf9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7dcd5d13a3b144e9bda5e8084c76426c",
+ "placeholder": "β",
+ "style": "IPY_MODEL_273a94eb6a034e10a9060cbe12e1b6ff",
+ "value": "β2.38k/?β[00:00<00:00,β248kB/s]"
+ }
+ },
+ "83f466f8a1ba4896a38fd15f335526ac": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f7ce8770e82743b2959f1848c19b0341",
+ "max": 843,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_58ff44c83d254a31bb3c7e47452440d2",
+ "value": 843
+ }
+ },
+ "83f5d32415c34234aa30b5b3ddb83efc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "842c3c9b10554231bdc9014a8355896f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_40b02c35acb74d8a8f3f97bd332a0199",
+ "IPY_MODEL_2d6dbe8d81a9483fb5c8abadd22a1bab",
+ "IPY_MODEL_06e06ed849004eedb3d40022bdf2639c"
+ ],
+ "layout": "IPY_MODEL_5c91591cc3b2450c963e97980b8c15bd"
+ }
+ },
+ "87ca309f76ce4495b6bd4007f040caa6": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2776,25 +2754,31 @@
"width": null
}
},
- "73cbe76f421e4b01b66313a231b2381e": {
+ "8a927e60416047e5947a6c59d96312a2": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_640b950749a14c05b7d458b25012734d",
+ "placeholder": "β",
+ "style": "IPY_MODEL_fcf8fb45e50240ab898c8043f433d258",
+ "value": "tokenizer_config.json:β100%"
}
},
"8e426fea7503472cbdf9001d84aaedbc": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2843,26 +2827,25 @@
"width": "20px"
}
},
- "116426cbc3e048cb8a5e82560cdd54e0": {
+ "8ed57dced20e4e83a3424b2212777aed": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
- "bar_color": null,
"description_width": ""
}
},
- "7dcd5d13a3b144e9bda5e8084c76426c": {
+ "9101f321004e45b589b22084e654a1fe": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2911,47 +2894,10 @@
"width": null
}
},
- "273a94eb6a034e10a9060cbe12e1b6ff": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "c8b8972136184e0ab8b7d78c0a4d094c": {
+ "9111e89dca0f4b7fbbd866f0be43cc89": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_5faad210e20840ce88b8c54a429e5ae7",
- "IPY_MODEL_0b1634525cb74f71b1ff1929bb040df7",
- "IPY_MODEL_778f6ecad3bf4478b2c488770a57b31b"
- ],
- "layout": "IPY_MODEL_7637edb369574df8b9920088160d1640"
- }
- },
- "5faad210e20840ce88b8c54a429e5ae7": {
- "model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
- "model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -2963,61 +2909,31 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_41245ab726a14001a5ad646ae6488576",
+ "layout": "IPY_MODEL_d2918b5864b342e3adca46b781d944d7",
"placeholder": "β",
- "style": "IPY_MODEL_bddbfb8a7db64434ab25a345176a1d67",
- "value": "model.safetensors:β100%"
- }
- },
- "0b1634525cb74f71b1ff1929bb040df7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_78fe7f45f5ef45ff89a8927945d783a6",
- "max": 3193334216,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_f042d469768e4e71b3a9b2f0ce412e19",
- "value": 3193334216
+ "style": "IPY_MODEL_ba5474abed8646f0997c998eb60c6584",
+ "value": "β1/1β[00:06<00:00,ββ6.11s/it]"
}
},
- "778f6ecad3bf4478b2c488770a57b31b": {
+ "93bf5fe2c57e4cb390562a031d56d03a": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_3c4700b10c7c433f9d817df80f371341",
- "placeholder": "β",
- "style": "IPY_MODEL_9c6c1b4ae5084dcaa62b757bb2ec2851",
- "value": "β3.19G/3.19Gβ[00:05<00:00,β799MB/s]"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
}
},
- "7637edb369574df8b9920088160d1640": {
+ "99693f4890934fbf8e0bb655136163b6": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3066,10 +2982,10 @@
"width": null
}
},
- "41245ab726a14001a5ad646ae6488576": {
+ "9a402bfdaf9f4e229e9d5bce94104abe": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3118,10 +3034,34 @@
"width": null
}
},
- "bddbfb8a7db64434ab25a345176a1d67": {
+ "9a7b2c8b1e8343b0bf9bdf0797e8085e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_034be1ed88444496af62403b4f0bdab3",
+ "max": 1,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_f85783c3f9094aecb4c7d4eea369e841",
+ "value": 1
+ }
+ },
+ "9c6c1b4ae5084dcaa62b757bb2ec2851": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -3133,10 +3073,10 @@
"description_width": ""
}
},
- "78fe7f45f5ef45ff89a8927945d783a6": {
+ "9e8b05ac39124c6481c2728fb90e243e": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3185,26 +3125,10 @@
"width": null
}
},
- "f042d469768e4e71b3a9b2f0ce412e19": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "3c4700b10c7c433f9d817df80f371341": {
+ "a2789cabde4643dcb440375ed88e7aed": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3253,47 +3177,10 @@
"width": null
}
},
- "9c6c1b4ae5084dcaa62b757bb2ec2851": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "ace560bb9d614871849c67d76023a9ac": {
+ "a55033559b404853b19ab1ca973ac782": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_2c4e3d7592c84b109fd2481b93fe3823",
- "IPY_MODEL_3df47eda247c4334bd1d2fc2d69e0a0b",
- "IPY_MODEL_a55033559b404853b19ab1ca973ac782"
- ],
- "layout": "IPY_MODEL_0c51ed97d58c409fa4495ac7a9fc7d2a"
- }
- },
- "2c4e3d7592c84b109fd2481b93fe3823": {
- "model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
- "model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -3305,16 +3192,16 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_5164cce340634f108332e1fb1b7dcd66",
+ "layout": "IPY_MODEL_9101f321004e45b589b22084e654a1fe",
"placeholder": "β",
- "style": "IPY_MODEL_02a8521e92144c7b8b01ddabf77047d1",
- "value": "Loadingβweights:β100%"
+ "style": "IPY_MODEL_c766b170a5d94950ba7f610ee882e283",
+ "value": "β589/589β[00:01<00:00,β688.52it/s,βMaterializingβparam=model.vision_tower.vision_model.post_layernorm.weight]"
}
},
- "3df47eda247c4334bd1d2fc2d69e0a0b": {
+ "a5a9d94bbe9c42459383b88f33bb135f": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -3327,39 +3214,40 @@
"bar_style": "success",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_a2789cabde4643dcb440375ed88e7aed",
- "max": 589,
+ "layout": "IPY_MODEL_8e426fea7503472cbdf9001d84aaedbc",
+ "max": 1,
"min": 0,
"orientation": "horizontal",
- "style": "IPY_MODEL_7563318658d14ed8a49be73d57af990e",
- "value": 589
+ "style": "IPY_MODEL_116426cbc3e048cb8a5e82560cdd54e0",
+ "value": 1
}
},
- "a55033559b404853b19ab1ca973ac782": {
+ "ace560bb9d614871849c67d76023a9ac": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_9101f321004e45b589b22084e654a1fe",
- "placeholder": "β",
- "style": "IPY_MODEL_c766b170a5d94950ba7f610ee882e283",
- "value": "β589/589β[00:01<00:00,β688.52it/s,βMaterializingβparam=model.vision_tower.vision_model.post_layernorm.weight]"
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_2c4e3d7592c84b109fd2481b93fe3823",
+ "IPY_MODEL_3df47eda247c4334bd1d2fc2d69e0a0b",
+ "IPY_MODEL_a55033559b404853b19ab1ca973ac782"
+ ],
+ "layout": "IPY_MODEL_0c51ed97d58c409fa4495ac7a9fc7d2a"
}
},
- "0c51ed97d58c409fa4495ac7a9fc7d2a": {
+ "ae1ca3ff0ce840f286a2b2729ddf08e0": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3408,62 +3296,89 @@
"width": null
}
},
- "5164cce340634f108332e1fb1b7dcd66": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
+ "b373bfda9e814a97b05a4348af5bb304": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_79500c893a164c86ae35704a42555c5b",
+ "IPY_MODEL_fdee5526846d45d096bca9a5532829a6",
+ "IPY_MODEL_4b7995b1e3e2480ab7a23a022a53a9e9"
+ ],
+ "layout": "IPY_MODEL_631d85f42f7741f3a5929d5fd777813e"
+ }
+ },
+ "b6b60344cd624c27b09a47a7f09cb3e2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_772dcadbd43649b2946020d07335e354",
+ "placeholder": "β",
+ "style": "IPY_MODEL_73cbe76f421e4b01b66313a231b2381e",
+ "value": "config.json:β"
+ }
+ },
+ "b6f155daf1944fd2be59b35cb8adb098": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ce8dad90d69648038ba92e3eb4244792",
+ "placeholder": "β",
+ "style": "IPY_MODEL_93bf5fe2c57e4cb390562a031d56d03a",
+ "value": "tokenizer.json:β"
+ }
+ },
+ "b834ae54dc984f78a366c42b6cdecfdd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
+ "_view_name": "StyleView",
+ "description_width": ""
}
},
- "02a8521e92144c7b8b01ddabf77047d1": {
+ "ba5474abed8646f0997c998eb60c6584": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -3475,10 +3390,10 @@
"description_width": ""
}
},
- "a2789cabde4643dcb440375ed88e7aed": {
+ "bd40f03f61ea487c8ac299e0faac9c10": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3527,26 +3442,47 @@
"width": null
}
},
- "7563318658d14ed8a49be73d57af990e": {
+ "bddbfb8a7db64434ab25a345176a1d67": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
- "bar_color": null,
"description_width": ""
}
},
- "9101f321004e45b589b22084e654a1fe": {
+ "c2c6cab229be45df9a58bfaa1bcf5054": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b6f155daf1944fd2be59b35cb8adb098",
+ "IPY_MODEL_fabab603710d42708d15cba390d279ad",
+ "IPY_MODEL_0f37cfe8625f4ded90e23278965004b2"
+ ],
+ "layout": "IPY_MODEL_3e30c4b0fbe84d8596a189f7d9c4016d"
+ }
+ },
+ "c757f43343d949df8f384bd0c21b505f": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3597,8 +3533,8 @@
},
"c766b170a5d94950ba7f610ee882e283": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -3610,10 +3546,10 @@
"description_width": ""
}
},
- "3f2f0594893846beaa27d524cdb43084": {
+ "c8b8972136184e0ab8b7d78c0a4d094c": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -3625,83 +3561,17 @@
"_view_name": "HBoxView",
"box_style": "",
"children": [
- "IPY_MODEL_74c62184daff4b3caeafe913e9314f2b",
- "IPY_MODEL_fd9e5bd6e1204e7bb45591979a5c980e",
- "IPY_MODEL_1772514a2b6a4b25ae19bc7836bbc5f6"
+ "IPY_MODEL_5faad210e20840ce88b8c54a429e5ae7",
+ "IPY_MODEL_0b1634525cb74f71b1ff1929bb040df7",
+ "IPY_MODEL_778f6ecad3bf4478b2c488770a57b31b"
],
- "layout": "IPY_MODEL_bd40f03f61ea487c8ac299e0faac9c10"
- }
- },
- "74c62184daff4b3caeafe913e9314f2b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_ae1ca3ff0ce840f286a2b2729ddf08e0",
- "placeholder": "β",
- "style": "IPY_MODEL_83f5d32415c34234aa30b5b3ddb83efc",
- "value": "generation_config.json:β100%"
- }
- },
- "fd9e5bd6e1204e7bb45591979a5c980e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_c757f43343d949df8f384bd0c21b505f",
- "max": 136,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_06b1c189384544a2b48094e2fe8e4145",
- "value": 136
- }
- },
- "1772514a2b6a4b25ae19bc7836bbc5f6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_173ff5e74e8f4e50899f8528cb092c77",
- "placeholder": "β",
- "style": "IPY_MODEL_475e285df2a242588c35b56acf5e2281",
- "value": "β136/136β[00:00<00:00,β18.2kB/s]"
+ "layout": "IPY_MODEL_7637edb369574df8b9920088160d1640"
}
},
- "bd40f03f61ea487c8ac299e0faac9c10": {
+ "ce8dad90d69648038ba92e3eb4244792": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3750,10 +3620,10 @@
"width": null
}
},
- "ae1ca3ff0ce840f286a2b2729ddf08e0": {
+ "d2918b5864b342e3adca46b781d944d7": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3802,25 +3672,78 @@
"width": null
}
},
- "83f5d32415c34234aa30b5b3ddb83efc": {
+ "d32837fabb13404db97963fc187fe5b6": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
+ "bar_color": null,
"description_width": ""
}
},
- "c757f43343d949df8f384bd0c21b505f": {
+ "d7f6a24450054d1baeb50111eb4e3264": {
"model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
"model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": "20px"
+ }
+ },
+ "da1d76234b2d4a30a7f72355d6b5b351": {
+ "model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3869,10 +3792,10 @@
"width": null
}
},
- "06b1c189384544a2b48094e2fe8e4145": {
+ "f042d469768e4e71b3a9b2f0ce412e19": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -3885,10 +3808,10 @@
"description_width": ""
}
},
- "173ff5e74e8f4e50899f8528cb092c77": {
+ "f7ce8770e82743b2959f1848c19b0341": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3937,10 +3860,50 @@
"width": null
}
},
- "475e285df2a242588c35b56acf5e2281": {
+ "f85783c3f9094aecb4c7d4eea369e841": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "fabab603710d42708d15cba390d279ad": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3f24e66c05f54298b1ade01f50c6780c",
+ "max": 1,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_20fb5a2767a24d49bcbb40c17ee1b480",
+ "value": 1
+ }
+ },
+ "fcf8fb45e50240ab898c8043f433d258": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -3951,10 +3914,58 @@
"_view_name": "StyleView",
"description_width": ""
}
+ },
+ "fd9e5bd6e1204e7bb45591979a5c980e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c757f43343d949df8f384bd0c21b505f",
+ "max": 136,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_06b1c189384544a2b48094e2fe8e4145",
+ "value": 136
+ }
+ },
+ "fdee5526846d45d096bca9a5532829a6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d7f6a24450054d1baeb50111eb4e3264",
+ "max": 1,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_0e17ab61aa0b44c4a12807b34698c3db",
+ "value": 1
+ }
}
}
}
},
"nbformat": 4,
"nbformat_minor": 0
-}
\ No newline at end of file
+}
diff --git "a/notebooks/\360\237\222\247_LFM2_DPO_with_TRL.ipynb" "b/notebooks/\360\237\222\247_LFM2_DPO_with_TRL.ipynb"
index b283012..f030814 100644
--- "a/notebooks/\360\237\222\247_LFM2_DPO_with_TRL.ipynb"
+++ "b/notebooks/\360\237\222\247_LFM2_DPO_with_TRL.ipynb"
@@ -1,23 +1,10 @@
{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "provenance": [],
- "gpuType": "T4"
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- },
- "language_info": {
- "name": "python"
- },
- "accelerator": "GPU"
- },
"cells": [
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "a3PTFH-H9Ozk"
+ },
"source": [
"# π§ LFM2 - DPO with TRL\n",
"\n",
@@ -32,53 +19,42 @@
"- **GPU Runtime**: Select GPU in `Runtime` β `Change runtime type`\n",
"- **Hugging Face Account**: For accessing models and datasets\n",
"\n"
- ],
- "metadata": {
- "id": "a3PTFH-H9Ozk"
- }
+ ]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "x0RPLu2h9ome"
+ },
"source": [
"# π¦ Installation & Setup\n",
"\n",
"First, let's install all the required packages:\n"
- ],
- "metadata": {
- "id": "x0RPLu2h9ome"
- }
+ ]
},
{
"cell_type": "code",
- "source": "!uv pip install transformers==4.54.0 trl>=0.18.2 peft>=0.15.2",
+ "execution_count": null,
"metadata": {
"id": "3FIcp_wo9nsR"
},
- "execution_count": null,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "!uv pip install transformers==4.54.0 trl>=0.18.2 peft>=0.15.2"
+ ]
},
{
"cell_type": "markdown",
- "source": [
- "Let's now verify the packages are installed correctly"
- ],
"metadata": {
"id": "41UEf1uxCd6m"
- }
+ },
+ "source": [
+ "Let's now verify the packages are installed correctly"
+ ]
},
{
"cell_type": "code",
- "source": [
- "import torch\n",
- "import transformers\n",
- "import trl\n",
- "import os\n",
- "os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
- "\n",
- "print(f\"π¦ PyTorch version: {torch.__version__}\")\n",
- "print(f\"π€ Transformers version: {transformers.__version__}\")\n",
- "print(f\"π TRL version: {trl.__version__}\")"
- ],
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -86,71 +62,60 @@
"id": "bSJgYtHT_Os4",
"outputId": "d483d722-d85a-4de3-d266-347c75abbfce"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"π¦ PyTorch version: 2.6.0+cu124\n",
"π€ Transformers version: 4.54.0\n",
"π TRL version: 0.19.1\n"
]
}
+ ],
+ "source": [
+ "import torch\n",
+ "import transformers\n",
+ "import trl\n",
+ "import os\n",
+ "os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
+ "\n",
+ "print(f\"π¦ PyTorch version: {torch.__version__}\")\n",
+ "print(f\"π€ Transformers version: {transformers.__version__}\")\n",
+ "print(f\"π TRL version: {trl.__version__}\")"
]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "v_uXLzxQ_rnK"
+ },
"source": [
"# Loading the model from Transformers π€\n",
"\n"
- ],
- "metadata": {
- "id": "v_uXLzxQ_rnK"
- }
+ ]
},
{
"cell_type": "code",
- "source": [
- "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
- "import torch\n",
- "\n",
- "model_name = \"LiquidAI/LFM2.5-1.2B-Instruct\"\n",
- "\n",
- "print(\"π Loading tokenizer...\")\n",
- "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
- "\n",
- "print(\"π§ Loading model...\")\n",
- "model = AutoModelForCausalLM.from_pretrained(\n",
- " model_name,\n",
- " device_map=\"auto\",\n",
- " torch_dtype=\"auto\",\n",
- ")\n",
- "\n",
- "print(\"β
Local model loaded successfully!\")\n",
- "print(f\"π’ Parameters: {model.num_parameters():,}\")\n",
- "print(f\"π Vocab size: {len(tokenizer)}\")\n",
- "print(f\"πΎ Model size: ~{model.num_parameters() * 2 / 1e9:.1f} GB (bfloat16)\")"
- ],
+ "execution_count": null,
"metadata": {
- "id": "iA3erKM4-HhS",
"colab": {
"base_uri": "https://localhost:8080/"
},
+ "id": "iA3erKM4-HhS",
"outputId": "e19261f7-5e96-4756-e001-80af1d9d37b5"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"π Loading tokenizer...\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
"The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
@@ -161,8 +126,8 @@
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"π§ Loading model...\n",
"β
Local model loaded successfully!\n",
@@ -171,53 +136,56 @@
"πΎ Model size: ~2.3 GB (bfloat16)\n"
]
}
+ ],
+ "source": [
+ "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
+ "import torch\n",
+ "\n",
+ "model_name = \"LiquidAI/LFM2.5-1.2B-Instruct\"\n",
+ "\n",
+ "print(\"π Loading tokenizer...\")\n",
+ "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
+ "\n",
+ "print(\"π§ Loading model...\")\n",
+ "model = AutoModelForCausalLM.from_pretrained(\n",
+ " model_name,\n",
+ " device_map=\"auto\",\n",
+ " torch_dtype=\"auto\",\n",
+ ")\n",
+ "\n",
+ "print(\"β
Local model loaded successfully!\")\n",
+ "print(f\"π’ Parameters: {model.num_parameters():,}\")\n",
+ "print(f\"π Vocab size: {len(tokenizer)}\")\n",
+ "print(f\"πΎ Model size: ~{model.num_parameters() * 2 / 1e9:.1f} GB (bfloat16)\")"
]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "o83NqFPNA_nk"
+ },
"source": [
"# π― Direct Preference Optimization (DPO + LoRA)\n",
"\n",
"DPO aligns the model with human preferences by learning from preference pairs (chosen vs rejected responses). This typically follows SFT training.\n",
"\n",
"DPO might be too compute heavy if you're running on one of the free-tier colab GPUs. Hence we use LoRA (Low-Rank Adaptation) to finetune the model by only training a small number of additional parameters. Perfect for limited compute resources!"
- ],
- "metadata": {
- "id": "o83NqFPNA_nk"
- }
+ ]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "0w-40n_XJL9H"
+ },
"source": [
"## Load a DPO Dataset\n",
"\n",
"We will use [mlabonne/orpo-dpo-mix-40k](https://huggingface.co/datasets/mlabonne/orpo-dpo-mix-40k), limiting ourselves to the first 2k samples for brevity. Feel free to change the limit by changing the slicing index in the parameter `split`. The size of the validation data can be adjusted by changing `test_size`."
- ],
- "metadata": {
- "id": "0w-40n_XJL9H"
- }
+ ]
},
{
"cell_type": "code",
- "source": [
- "from datasets import load_dataset\n",
- "\n",
- "print(\"π₯ Loading DPO dataset...\")\n",
- "\n",
- "dataset_dpo = load_dataset(\"mlabonne/orpo-dpo-mix-40k\", split=\"train[:2000]\")\n",
- "dataset_dpo = dataset_dpo.train_test_split(test_size=0.1, seed=42)\n",
- "train_dataset_dpo, eval_dataset_dpo = dataset_dpo['train'], dataset_dpo['test']\n",
- "\n",
- "print(\"β
DPO Dataset loaded:\")\n",
- "print(f\" π Train samples: {len(train_dataset_dpo)}\")\n",
- "print(f\" π§ͺ Eval samples: {len(eval_dataset_dpo)}\")\n",
- "\n",
- "sample = train_dataset_dpo[0]\n",
- "print(\"\\nπ Single Sample:\")\n",
- "print(f\" Prompt: {sample['prompt'][:100]}...\")\n",
- "print(f\" β
Chosen: {sample['chosen'][:100]}...\")\n",
- "print(f\" β Rejected: {sample['rejected'][:100]}...\")"
- ],
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -225,11 +193,10 @@
"id": "Ajq6EABUBAv_",
"outputId": "1b6e9f8f-e7a7-472d-e50b-b40b450d336e"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"π₯ Loading DPO dataset...\n",
"β
DPO Dataset loaded:\n",
@@ -242,10 +209,32 @@
" β Rejected: [{'content': 'Classify the following instruments into their respective families (brass, strings, woodwinds, or percussion): Didgeridoo, Cuica, Euphonium, Guzheng', 'role': 'user'}, {'content': 'The classification of the mentioned instruments is as follows:\\n\\n1. Didgeridoo: Brass\\n2. Cuica: Percussion\\n3. Euphonium: Brass\\n4. Guzheng: Strings', 'role': 'assistant'}, {'content': 'Explain the construction and sound production process of each instrument mentioned, and how these processes relate to their classification in their respective families.', 'role': 'user'}, {'content': \"1. Didgeridoo: The didgeridoo is traditionally made from eucalyptus trees which have been naturally hollowed out by termites. The mouthpiece can be made of beeswax or shaped directly from the wood. The player blows into the instrument without interruption while using a special breathing technique called circular breathing. The vibration of the lips and the resonances in the tube produce the sound. Although the didgeridoo is not a typical brass instrument, it is often classified as such due to the lip-vibrated method of sound production, similar to a brass instrument.\\n\\n2. Cuica: The cuica is a Brazilian friction drum with a large pitch range, produced by changing tension on the head of the drum. It has a stick fixed in the center of the drum head, and the player rubs this stick with a wet cloth and simultaneously modulates the pitch by pressing the thumb on the drum head. This results in a sound that can be used to mimic a variety of effects, often the call of wild animals. It is classified as a percussion instrument due to the way the sound is produced by striking or rubbing.\\n\\n3. Euphonium: The euphonium is a conical-bore, tenor-voiced brass instrument. The mouthpiece of the euphonium is larger than that of a trumpet, and this, along with the conical bore, contributes to its rich, dark tone. The player buzzes their lips into the mouthpiece, creating vibrations that travel through the instrument and are amplified by the bell. The pitch can be altered by changing the lip tension and by using the valves to change the length of the tubing. Its classification as a brass instrument is due to the material it's made from and the method of sound production.\\n\\n4. Guzheng: The guzheng, also known as the Chinese zither, is a string instrument with movable bridges and 21 strings. It is made of a large, resonant wooden body, over which the strings are stretched. The player plucks the strings with their fingers, often with the aid of plectra attached to four of the fingers on the right or both hands. The left hand can also press the strings to the left of the bridges to change the pitch or produce vibrato. It is classified as a string instrument because the sound is produced by vibrating strings.\", 'role': 'assistant'}, {'content': 'Discuss the technique of circular breathing used in playing the Didgeridoo and its impact on the sound produced.', 'role': 'user'}, {'content': \"Circular breathing is a technique used by players of some wind instruments to produce a continuous tone without interruption. This technique is used extensively in playing the didgeridoo. It involves inhaling through the nose while maintaining airflow through the instrument, using air stored in the cheeks.\\n\\nHere's a basic breakdown of how it works:\\n\\n1. Fill your cheeks with air and begin to play the instrument.\\n2. While you're blowing this air out through your mouth into the instrument, start to breathe in through your nose.\\n3. As your lungs fill with air, continue to push the air out of your mouth using your cheeks and tongue.\\n4. Once your cheeks are empty, switch back to blowing air from your lungs into the instrument, and refill your cheeks.\\n\\nThis technique allows the player to sustain a drone for a long period of time, which is characteristic of didgeridoo music. It creates a continuous, rhythmic sound and allows for the layering of complex rhythms and sound effects. The circular breathing technique contributes to the mesmerizing, trance-like quality of the didgeridoo's sound. It also allows for the performance of more complex pieces that require extended periods of sound without breaks for breath.\", 'role': 'assistant'}, {'content': 'Can you elaborate on the physiological and mental challenges a musician might face while learning and mastering the technique of circular breathing?', 'role': 'user'}, {'content': \"Learning and mastering the technique of circular breathing can pose numerous physiological and mental challenges for instrumentalists. Here are some of the challenges a musician may face:\\n\\n1. Coordination: Circular breathing requires a high degree of muscular and mental coordination between respiratory, oral, and facial muscles. This includes skills like inhalation through the nose, simultaneous sound production, and control over airflow from the lungs and cheeks. The learning curve can be steep, and several hours of practice may be required to achieve the basic level of proficiency.\\n\\n2. Breath control: Attaining consistent breath control is crucial in circular breathing. A musician must be able to pulse and maintain a steady airflow through the nose while precisely regulating the rate and amount of air entering the lungs. This requires a high level of breath control and lung capacity.\\n\\n3. Cheek strength and control: Circular breathing demands significant strength and control of the cheek muscles. The cheeks must act as a second lung, storing air while the player inhales through the nose and releasing it back into the instrument when blowing from the lungs. This requires practice and muscle conditioning to build the endurance necessary for prolonged circular breathing sessions.\\n\\n4. Embouchure: Keeping a stable embouchure β the position and tension of the lips and mouth β is important while incorporating the circular breathing technique. It may take time to adjust to this combination and prevent air loss or a change in the quality of the sound while switching from blowing from the lungs to inhaling through the nose.\\n\\n5. Mental focus and concentration: Circular breathing involves sustaining multiple tasks simultaneously and requires a high concentration level. Musicians must maintain focus on the sound production while switching between inhaling and exhaling, ensuring an uninterrupted airflow and sound throughout the performance.\\n\\nIn addition to these physiological challenges, mastering circular breathing can also have mental benefits. It can enhance an individual's mindfulness, concentration, and breath awareness, leading to improved performance in various aspects of life. However, the learning process may be frustrating at times due to the high level of coordination and practice involved. Overcoming these challenges can be rewarding and offer a sense of accomplishment as the musician achieves greater proficiency and control in the circular breathing technique.\", 'role': 'assistant'}]...\n"
]
}
+ ],
+ "source": [
+ "from datasets import load_dataset\n",
+ "\n",
+ "print(\"π₯ Loading DPO dataset...\")\n",
+ "\n",
+ "dataset_dpo = load_dataset(\"mlabonne/orpo-dpo-mix-40k\", split=\"train[:2000]\")\n",
+ "dataset_dpo = dataset_dpo.train_test_split(test_size=0.1, seed=42)\n",
+ "train_dataset_dpo, eval_dataset_dpo = dataset_dpo['train'], dataset_dpo['test']\n",
+ "\n",
+ "print(\"β
DPO Dataset loaded:\")\n",
+ "print(f\" π Train samples: {len(train_dataset_dpo)}\")\n",
+ "print(f\" π§ͺ Eval samples: {len(eval_dataset_dpo)}\")\n",
+ "\n",
+ "sample = train_dataset_dpo[0]\n",
+ "print(\"\\nπ Single Sample:\")\n",
+ "print(f\" Prompt: {sample['prompt'][:100]}...\")\n",
+ "print(f\" β
Chosen: {sample['chosen'][:100]}...\")\n",
+ "print(f\" β Rejected: {sample['rejected'][:100]}...\")"
]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "v8or_m-UdKGc"
+ },
"source": [
"## Wrap the model with PEFT\n",
"\n",
@@ -254,13 +243,31 @@
"- lower -> needs even less compute resources\n",
"\n",
"You can skip this part if you have a premium GPU and want to go for a full finetune."
- ],
- "metadata": {
- "id": "v8or_m-UdKGc"
- }
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "z7x5RhC5eLmc",
+ "outputId": "f6553df2-ba07-4839-9c73-2c55dec64495"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "trainable params: 5,554,176 || all params: 1,175,894,784 || trainable%: 0.4723\n",
+ "β
LoRA configuration applied!\n",
+ "ποΈ LoRA rank: 8\n",
+ "π LoRA alpha: 16\n",
+ "π― Target modules: {'w2', 'out_proj', 'in_proj', 'q_proj', 'k_proj', 'w3', 'v_proj', 'w1'}\n"
+ ]
+ }
+ ],
"source": [
"from peft import LoraConfig, get_peft_model, TaskType\n",
"\n",
@@ -286,126 +293,65 @@
"print(f\"ποΈ LoRA rank: {lora_config.r}\")\n",
"print(f\"π LoRA alpha: {lora_config.lora_alpha}\")\n",
"print(f\"π― Target modules: {lora_config.target_modules}\")"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "z7x5RhC5eLmc",
- "outputId": "f6553df2-ba07-4839-9c73-2c55dec64495"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "trainable params: 5,554,176 || all params: 1,175,894,784 || trainable%: 0.4723\n",
- "β
LoRA configuration applied!\n",
- "ποΈ LoRA rank: 8\n",
- "π LoRA alpha: 16\n",
- "π― Target modules: {'w2', 'out_proj', 'in_proj', 'q_proj', 'k_proj', 'w3', 'v_proj', 'w1'}\n"
- ]
- }
]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "yxuxfHu7Jopx"
+ },
"source": [
"## Launch Training\n",
"\n",
"We are now ready to launch a DPO run with `DPOTrainer`, feel free to modify `DPOConfig` to play around with different configurations.\n",
"\n",
"Replace `lora_model` with `model` if you have a premium-tier in colab and want to run a full finetune."
- ],
- "metadata": {
- "id": "yxuxfHu7Jopx"
- }
+ ]
},
{
"cell_type": "code",
- "source": [
- "from trl import DPOConfig, DPOTrainer\n",
- "\n",
- "# DPO Training configuration\n",
- "dpo_config = DPOConfig(\n",
- " output_dir=\"./lfm2-dpo\",\n",
- " num_train_epochs=1,\n",
- " per_device_train_batch_size=1,\n",
- " learning_rate=1e-6,\n",
- " lr_scheduler_type=\"linear\",\n",
- " logging_steps=10,\n",
- " save_strategy=\"epoch\",\n",
- " eval_strategy=\"epoch\",\n",
- " bf16=False # <- not all colab GPUs support bf16\n",
- ")\n",
- "\n",
- "# Create DPO trainer\n",
- "print(\"ποΈ Creating DPO trainer...\")\n",
- "dpo_trainer = DPOTrainer(\n",
- " model=lora_model,\n",
- " args=dpo_config,\n",
- " train_dataset=train_dataset_dpo,\n",
- " eval_dataset=eval_dataset_dpo,\n",
- " processing_class=tokenizer,\n",
- ")\n",
- "\n",
- "# Start DPO training\n",
- "print(\"\\nπ Starting DPO training...\")\n",
- "dpo_trainer.train()\n",
- "\n",
- "print(\"π DPO training completed!\")\n",
- "\n",
- "# Save the DPO model\n",
- "dpo_trainer.save_model()\n",
- "print(f\"πΎ DPO model saved to: {dpo_config.output_dir}\")"
- ],
+ "execution_count": null,
"metadata": {
- "id": "nGBgkf9LBHp6",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 263
},
+ "id": "nGBgkf9LBHp6",
"outputId": "5242d3ac-16e2-4ff3-c4b6-fdfb32c6cf23"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n",
"Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"ποΈ Creating DPO trainer...\n"
]
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"\n",
"π Starting DPO training...\n"
]
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- ""
- ],
"text/html": [
"\n",
" \n",
@@ -445,66 +391,122 @@
" \n",
" \n",
"
"
+ ],
+ "text/plain": [
+ ""
]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n"
]
},
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"π DPO training completed!\n",
"πΎ DPO model saved to: ./lfm2-dpo\n"
]
}
+ ],
+ "source": [
+ "from trl import DPOConfig, DPOTrainer\n",
+ "\n",
+ "# DPO Training configuration\n",
+ "dpo_config = DPOConfig(\n",
+ " output_dir=\"./lfm2-dpo\",\n",
+ " num_train_epochs=1,\n",
+ " per_device_train_batch_size=1,\n",
+ " learning_rate=1e-6,\n",
+ " lr_scheduler_type=\"linear\",\n",
+ " logging_steps=10,\n",
+ " save_strategy=\"epoch\",\n",
+ " eval_strategy=\"epoch\",\n",
+ " bf16=False # <- not all colab GPUs support bf16\n",
+ ")\n",
+ "\n",
+ "# Create DPO trainer\n",
+ "print(\"ποΈ Creating DPO trainer...\")\n",
+ "dpo_trainer = DPOTrainer(\n",
+ " model=lora_model,\n",
+ " args=dpo_config,\n",
+ " train_dataset=train_dataset_dpo,\n",
+ " eval_dataset=eval_dataset_dpo,\n",
+ " processing_class=tokenizer,\n",
+ ")\n",
+ "\n",
+ "# Start DPO training\n",
+ "print(\"\\nπ Starting DPO training...\")\n",
+ "dpo_trainer.train()\n",
+ "\n",
+ "print(\"π DPO training completed!\")\n",
+ "\n",
+ "# Save the DPO model\n",
+ "dpo_trainer.save_model()\n",
+ "print(f\"πΎ DPO model saved to: {dpo_config.output_dir}\")"
]
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "WIrCmjUsfNIg"
+ },
"source": [
"## Save merged model\n",
"\n",
"If you have used LoRA. Merge the extra weights learned with LoRA back into the model to obtain a \"normal\" model checkpoint."
- ],
- "metadata": {
- "id": "WIrCmjUsfNIg"
- }
+ ]
},
{
"cell_type": "code",
- "source": [
- "print(\"\\nπ Merging LoRA weights...\")\n",
- "merged_model = lora_model.merge_and_unload()\n",
- "merged_model.save_pretrained(\"./lfm2-lora-merged\")\n",
- "tokenizer.save_pretrained(\"./lfm2-lora-merged\")\n",
- "print(\"πΎ Merged model saved to: ./lfm2-lora-merged\")"
- ],
+ "execution_count": null,
"metadata": {
- "id": "RUJoU3BwfPTg",
"colab": {
"base_uri": "https://localhost:8080/"
},
+ "id": "RUJoU3BwfPTg",
"outputId": "45e09b8f-9089-4237-ffff-c4d59db6ff89"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"\n",
"π Merging LoRA weights...\n",
"πΎ Merged model saved to: ./lfm2-lora-merged\n"
]
}
+ ],
+ "source": [
+ "print(\"\\nπ Merging LoRA weights...\")\n",
+ "merged_model = lora_model.merge_and_unload()\n",
+ "merged_model.save_pretrained(\"./lfm2-lora-merged\")\n",
+ "tokenizer.save_pretrained(\"./lfm2-lora-merged\")\n",
+ "print(\"πΎ Merged model saved to: ./lfm2-lora-merged\")"
]
}
- ]
-}
\ No newline at end of file
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/util/README.md b/util/README.md
new file mode 100644
index 0000000..8924740
--- /dev/null
+++ b/util/README.md
@@ -0,0 +1,83 @@
+# Notebook Testing Utilities
+
+This directory contains tools for running Jupyter notebooks as smoke tests on [Modal](https://modal.com/) GPUs. The system extracts code cells from `.ipynb` files, collects their dependencies, and executes them remotely to verify they work end-to-end.
+
+## Overview
+
+| File | Purpose |
+|---|---|
+| `modal_runner.py` | Modal app that runs Python code on a remote GPU container |
+| `run_notebook_test.py` | CLI that parses a notebook, extracts dependencies, and submits the combined script to Modal |
+
+## How it works
+
+1. **`run_notebook_test.py`** reads a notebook and extracts all code cells.
+2. Pip install lines (`!pip install ...`, `!uv pip install ...`) are extracted from **every** cell, collected as image-level dependencies, and stripped from the script so they don't run at execution time.
+3. Shell commands (`!` lines) are converted to `subprocess.run()` calls; `%%bash` cells become setup commands.
+4. The remaining cells are concatenated into a single Python script, preserving shared state (variables, imports) across cells just like a real notebook.
+5. The script, dependency list, and setup commands are sent to **`modal_runner.py`**'s `run_code` function via Modal's remote invocation.
+
+**`modal_runner.py`** runs on an A10G GPU (default) with a 10-minute timeout. It:
+- Installs pip packages in order (each `pip install` line becomes a separate install group to preserve ordering)
+- Runs any setup commands (shell commands, `%%bash` cells)
+- Executes the combined Python script as a subprocess and captures stdout/stderr
+
+## Usage
+
+### Prerequisites
+
+- A Modal account with a deployed `ci-runner` app
+- Modal token configured locally
+
+### Deploy the Modal app (one-time)
+
+```bash
+modal deploy util/modal_runner.py
+```
+
+### Run a notebook
+
+```bash
+python util/run_notebook_test.py --notebook notebooks/MyNotebook.ipynb
+```
+
+### Options
+
+| Flag | Description | Default |
+|---|---|---|
+| `--notebook` | Path to `.ipynb` file (required) | β |
+| `--gpu` | GPU type | `A10G` |
+| `--dry-run` | Print the combined script and packages without running | off |
+| `--skip-packages PKG ...` | Package names to exclude from installation | none |
+
+### Examples
+
+```bash
+# Dry run to inspect what would be executed
+python util/run_notebook_test.py --notebook notebooks/LFM2_Inference.ipynb --dry-run
+
+# Skip a package that doesn't build in CI
+python util/run_notebook_test.py --notebook notebooks/LFM2_Inference.ipynb --skip-packages flash-attn
+
+# Use a different GPU
+python util/run_notebook_test.py --notebook notebooks/LFM2_Inference.ipynb --gpu A100
+```
+
+## Notebook directives
+
+Control which cells and lines are included in the test run:
+
+| Directive | Scope | Effect |
+|---|---|---|
+| `# test:skip` | Cell | Skip the entire cell |
+| `# !modal_skip` | Line or cell | Skip that line (in dependency parsing) or cell (in cell filtering) |
+| `# !modal_skip_rest` | Line or cell | Stop processing β ignore this and all subsequent lines/cells |
+
+These can be placed in comments within any code cell.
+
+## CI workflow
+
+The GitHub Actions workflow (`.github/workflows/run-notebooks.yaml`) automatically:
+1. Discovers all `.ipynb` files in the `notebooks/` directory
+2. Runs each notebook in parallel as a separate matrix job
+3. Triggers on pushes/PRs that change `notebooks/**`, or manually via `workflow_dispatch`
diff --git a/util/modal_runner.py b/util/modal_runner.py
new file mode 100644
index 0000000..b75bada
--- /dev/null
+++ b/util/modal_runner.py
@@ -0,0 +1,62 @@
+import modal
+import subprocess, sys, os, tempfile
+
+app = modal.App("ci-runner")
+
+@app.function(
+ gpu="A10G",
+ timeout=600,
+ image=modal.Image.debian_slim(python_version="3.12")
+ .apt_install("curl", "wget", "zstd", "git")
+ .pip_install("uv", "typing_extensions>=4.14.0"),
+)
+def run_code(code: str, pip_packages: list[list[str]] = [], setup_commands: list[str] = []) -> dict:
+
+ # Disable vLLM v1 multiprocessing (incompatible with containers)
+ os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
+
+ # Restore real stdout/stderr β Modal wraps them with objects that
+ # lack fileno(), which breaks subprocesses and libraries like vLLM.
+ real_stdout = open("/dev/stdout", "w")
+ real_stderr = open("/dev/stderr", "w")
+ sys.stdout = real_stdout
+ sys.stderr = real_stderr
+
+ for group in pip_packages:
+ print(f"[ci-runner] Installing: {group}")
+ subprocess.check_call(
+ ["uv", "pip", "install", "--system", *group],
+ stdout=real_stdout, stderr=real_stderr,
+ )
+
+ for cmd in setup_commands:
+ print(f"[ci-runner] Setup: {cmd}")
+ subprocess.run(cmd, shell=True, check=True, stdout=real_stdout, stderr=real_stderr)
+
+ # Remove Modal's bundled deps from sys.path to prevent shadowing
+ clean_path = [p for p in sys.path if "/__modal/deps" not in p]
+
+ # Write code to a temp file and run as a subprocess so that
+ # stdout/stderr are real file descriptors (required by vLLM, etc.)
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
+ f.write(code)
+ script_path = f.name
+
+ env = os.environ.copy()
+ env["PYTHONPATH"] = ":".join(clean_path)
+
+ proc = subprocess.run(
+ [sys.executable, script_path],
+ capture_output=True,
+ text=True,
+ timeout=540,
+ )
+
+ os.unlink(script_path)
+
+ return {
+ "success": proc.returncode == 0,
+ "stdout": proc.stdout,
+ "stderr": proc.stderr,
+ "error": None if proc.returncode == 0 else f"Exit code {proc.returncode}\n{proc.stderr[-2000:] if proc.stderr else ''}",
+ }
\ No newline at end of file
diff --git a/util/run_notebook_test.py b/util/run_notebook_test.py
new file mode 100644
index 0000000..3775de8
--- /dev/null
+++ b/util/run_notebook_test.py
@@ -0,0 +1,324 @@
+#!/usr/bin/env python3
+"""
+Run code cells from a notebook on a Modal GPU as a single combined script.
+
+Pip install lines are extracted from ALL cells, collected as image dependencies,
+and stripped from the script so they don't run at execution time.
+Non-skipped cells are concatenated into one script and executed together,
+preserving shared state (variables, imports) across cells just like a real notebook.
+
+Usage:
+ python run_notebook_test.py --notebook notebooks/LFM2_Inference_with_Transformers.ipynb
+ python run_notebook_test.py --notebook notebooks/LFM2_Inference_with_Transformers.ipynb --gpu A10G
+ python run_notebook_test.py --notebook notebooks/LFM2_Inference_with_Transformers.ipynb --dry-run
+"""
+
+import argparse
+import re
+import sys
+import time
+from pathlib import Path
+import json
+import modal
+
+sys.path.insert(0, str(Path(__file__).resolve().parent / "tests"))
+
+
+def extract_code_cells(notebook_path: Path) -> list[dict]:
+ """Return a list of code cell dicts with 'source', 'index', and 'skipped' keys."""
+ with open(notebook_path) as f:
+ nb = json.load(f)
+
+ cells = []
+ code_index = 0
+ for cell in nb.get("cells", []):
+ if cell["cell_type"] != "code":
+ continue
+
+ source = "".join(cell["source"])
+ skip = (
+ "# test:skip" in source
+ or cell.get("metadata", {}).get("test_skip", False)
+ )
+
+ cells.append({
+ "index": code_index,
+ "source": source,
+ "skipped": skip,
+ })
+ code_index += 1
+
+ return cells
+
+def parse_packages_from_cell(source: str) -> tuple[list[list[str]], list[str]]:
+ """Parse dependency cell into pip package groups and shell setup commands.
+
+ Returns (pip_package_groups, setup_commands). Each ``pip install`` line
+ becomes its own group so that install ordering is preserved (e.g. packages
+ that need torch at build time can be installed in a later group).
+ Non-pip ``!`` lines become setup commands that run before the Python script.
+ Lines after ``# !modal_skip_rest`` are ignored; lines with
+ ``# !modal_skip`` are skipped individually.
+ """
+ package_groups: list[list[str]] = []
+ setup_commands = []
+ for line in source.splitlines():
+ line = line.strip()
+ # Stop processing if we hit a modal_skip_rest directive
+ if "!modal_skip_rest" in line:
+ break
+ # Skip individual lines marked with modal_skip
+ if "!modal_skip" in line:
+ continue
+ # Match: !pip install ..., !uv pip install ..., pip install ...
+ match = re.match(r"^!?\s*(?:uv\s+)?pip\s+install\s+(.+)", line)
+ if match:
+ group = []
+ for token in re.split(r"[,\s]+", match.group(1)):
+ token = token.strip('"').strip("'").strip(",")
+ if token and not token.startswith("-"):
+ group.append(token)
+ if group:
+ package_groups.append(group)
+ elif line.startswith("!"):
+ setup_commands.append(line[1:].strip())
+ return package_groups, setup_commands
+
+
+def strip_pip_lines(source: str) -> str:
+ """Remove pip install lines (and their continuations) from cell source.
+
+ This is used after packages have been extracted so that pip installs
+ run as image-level dependencies rather than at script execution time.
+ """
+ lines = source.splitlines()
+ cleaned: list[str] = []
+ i = 0
+ while i < len(lines):
+ stripped = lines[i].strip()
+ if re.match(r"^!?\s*(?:uv\s+)?pip\s+install\s+", stripped):
+ # Skip this line and any backslash-continuation lines
+ while stripped.endswith("\\") and i + 1 < len(lines):
+ i += 1
+ stripped = lines[i].strip()
+ i += 1
+ continue
+ cleaned.append(lines[i])
+ i += 1
+ return "\n".join(cleaned)
+
+
+TRAINER_PATTERNS = re.compile(r"\b(?:SFTTrainer|GRPOTrainer|DPOTrainer|Trainer)\s*\(")
+
+
+def is_training_notebook(code_cells: list[dict]) -> bool:
+ """Return True if any cell instantiates a HF Trainer."""
+ return any(TRAINER_PATTERNS.search(c["source"]) for c in code_cells)
+
+# Default to lower epoch to speed up testing pipeline
+def patch_training_epochs(source: str, epochs: float = 0.01) -> str:
+ """Replace num_train_epochs= with a minimal value for smoke testing."""
+ return re.sub(
+ r"num_train_epochs\s*=\s*[0-9.]+",
+ f"num_train_epochs={epochs}",
+ source,
+ )
+
+
+def filter_cells(code_cells: list[dict]) -> list[dict]:
+ """Apply !modal_skip and !modal_skip_rest directives.
+
+ - !modal_skip: skip only that cell
+ - !modal_skip_rest: skip that cell and all remaining cells
+ """
+ filtered = []
+ for cell in code_cells:
+ source = cell["source"]
+ if "!modal_skip_rest" in source:
+ break
+ if "!modal_skip" in source and "!modal_skip_rest" not in source:
+ continue
+ filtered.append(cell)
+ return filtered
+
+
+def preprocess_cell(source: str) -> tuple[str, list[str]]:
+ """Transform cell source so shell lines become valid Python.
+
+ Returns (transformed_source, setup_commands).
+ - ``%%bash`` cells are collected as setup commands and excluded from the script.
+ - ``!`` lines are converted to ``subprocess.run()`` calls.
+ - Pure Python lines are unchanged.
+ """
+ lines = source.splitlines()
+ setup_commands: list[str] = []
+
+ # Detect %%bash magic (may follow comment lines)
+ non_comment = [l for l in lines if l.strip() and not l.strip().startswith("#")]
+ if non_comment and non_comment[0].strip() == "%%bash":
+ bash_lines: list[str] = []
+ found_magic = False
+ for line in lines:
+ if not found_magic:
+ if line.strip() == "%%bash":
+ found_magic = True
+ continue
+ bash_lines.append(line)
+ if bash_lines:
+ setup_commands.append("\n".join(bash_lines))
+ return "", setup_commands
+
+ # Convert ! lines to subprocess.run(), joining backslash continuations
+ transformed: list[str] = []
+ i = 0
+ while i < len(lines):
+ stripped = lines[i].lstrip()
+ if stripped.startswith("!"):
+ indent = lines[i][: len(lines[i]) - len(stripped)]
+ cmd_parts = [stripped[1:]]
+ # Collect continuation lines ending with backslash
+ while cmd_parts[-1].rstrip().endswith("\\") and i + 1 < len(lines):
+ i += 1
+ cmd_parts.append(lines[i])
+ full_cmd = "\n".join(cmd_parts)
+ # Ensure uv/pip installs use --system (no venv in containers)
+ if re.match(r"(?:uv\s+)?pip\s+install", full_cmd) and "--system" not in full_cmd:
+ full_cmd = full_cmd.replace("pip install", "pip install --system", 1)
+ transformed.append(f"{indent}subprocess.run({full_cmd!r}, shell=True, check=True)")
+ else:
+ transformed.append(lines[i])
+ i += 1
+ return "\n".join(transformed), setup_commands
+
+
+def combine_cells(code_cells: list[dict]) -> tuple[str, list[str]]:
+ """Concatenate code cells into a single script with cell markers.
+
+ Returns (combined_code, setup_commands) after preprocessing each cell.
+ """
+ parts = []
+ all_setup: list[str] = []
+ has_subprocess_import = False
+
+ for cell in code_cells:
+ source, setup_cmds = preprocess_cell(cell["source"])
+ all_setup.extend(setup_cmds)
+ if source.strip():
+ if not has_subprocess_import and "subprocess.run(" in source:
+ has_subprocess_import = True
+ parts.append(f"# --- cell {cell['index']} ---")
+ parts.append(source)
+
+ code = "\n\n".join(parts)
+ # Prepend subprocess import if we generated subprocess.run() calls
+ if has_subprocess_import:
+ code = "import subprocess\n\n" + code
+ return code, all_setup
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Run notebook code cells on Modal as a single script")
+ parser.add_argument("--notebook", required=True, help="Path to .ipynb file")
+ parser.add_argument("--gpu", default="A10G", help="GPU type (default: A10G)")
+ parser.add_argument("--dry-run", action="store_true", help="Print combined script and packages without running")
+ parser.add_argument("--skip-packages", nargs="+", default=[], metavar="PKG",
+ help="Package names to exclude from installation (e.g. --skip-packages flash-attn)")
+ args = parser.parse_args()
+
+ notebook_path = Path(args.notebook)
+ if not notebook_path.exists():
+ print(f"Error: notebook not found: {notebook_path}")
+ sys.exit(1)
+
+ cells = extract_code_cells(notebook_path)
+ if not cells:
+ print("No code cells found.")
+ sys.exit(1)
+
+ # Filter out skipped / modal_skip cells
+ code_cells = [c for c in cells if not c["skipped"]]
+ code_cells = filter_cells(code_cells)
+
+ if not code_cells:
+ print(f"Notebook {notebook_path.name}: no runnable code cells (all skipped) β passing.")
+ sys.exit(0)
+
+ # Extract pip packages from ALL cells, then strip those lines from source.
+ # Only collect packages here β shell commands (! lines, %%bash) in regular
+ # cells are handled by preprocess_cell inside combine_cells.
+ pip_packages: list[list[str]] = []
+ for cell in code_cells:
+ pkgs, _setup = parse_packages_from_cell(cell["source"])
+ pip_packages.extend(pkgs)
+ cell["source"] = strip_pip_lines(cell["source"])
+
+ # Filter out skipped packages
+ if args.skip_packages:
+ skip_set = set(args.skip_packages)
+ pip_packages = [
+ [pkg for pkg in group if pkg not in skip_set]
+ for group in pip_packages
+ ]
+ pip_packages = [g for g in pip_packages if g] # drop empty groups
+
+ combined, setup_commands = combine_cells(code_cells)
+ skipped = len(cells) - len(code_cells)
+
+ # Auto-detect training notebooks: upgrade GPU and patch epochs for smoke testing
+ is_training = is_training_notebook(code_cells)
+ if is_training:
+ if args.gpu == "A10G": # only override if user didn't specify
+ args.gpu = "H100"
+ combined = patch_training_epochs(combined, epochs=0.01)
+
+ print(f"{'=' * 50}")
+ print(f"Notebook: {notebook_path.name}")
+ if is_training:
+ print(f"Type: training (auto: GPUβ{args.gpu}, epochsβ0.01)")
+ all_packages = [pkg for group in pip_packages for pkg in group]
+ print(f"Packages: {all_packages or '(none)'} ({len(pip_packages)} install group(s))")
+ if setup_commands:
+ print(f"Setup: {len(setup_commands)} command(s)")
+ print(f"GPU: {args.gpu}")
+ print(f"Cells: {len(code_cells)} to run, {skipped} skipped")
+ print(f"{'=' * 50}\n")
+
+ if args.dry_run:
+ if setup_commands:
+ print("=== SETUP COMMANDS ===")
+ for i, cmd in enumerate(setup_commands):
+ print(f"\n--- setup {i} ---")
+ print(cmd)
+ print(f"\n=== PYTHON SCRIPT ===\n")
+ print(combined)
+ return
+
+ try:
+ run_code = modal.Function.from_name("ci-runner", "run_code")
+ except modal.exception.NotFoundError:
+ print("Error: Modal app 'ci-runner' not deployed.")
+ print("Run this first: modal deploy modal_runner.py")
+ sys.exit(1)
+
+ print("Submitting to Modal...")
+ start_time = time.time()
+ result = run_code.remote(code=combined, pip_packages=pip_packages, setup_commands=setup_commands)
+
+ print("\n--- STDOUT ---")
+ print(result["stdout"] or "(empty)")
+
+ if result["stderr"]:
+ print("\n--- STDERR ---")
+ print(result["stderr"])
+
+ if result["error"]:
+ print(f"\nβ FAILED: {result['error']}")
+ sys.exit(1)
+
+ elapsed = time.time() - start_time
+ print(f"\nRuntime: {elapsed:.2f}s")
+ print(f"β All {len(code_cells)} cells passed")
+
+
+if __name__ == "__main__":
+ main()