From dd0b7f2ee00d8a87c71acd0dc10415cb48d80b80 Mon Sep 17 00:00:00 2001 From: "enzymezoo.code" Date: Thu, 17 Jul 2025 12:12:41 -0500 Subject: [PATCH 1/4] Add stable audio 2.5 and audio inpaint --- nbs/Stable_Audio_API.ipynb | 112 +++++++++++++++++++++++++++++++++++-- 1 file changed, 108 insertions(+), 4 deletions(-) diff --git a/nbs/Stable_Audio_API.ipynb b/nbs/Stable_Audio_API.ipynb index 2be251ea..0d494d11 100644 --- a/nbs/Stable_Audio_API.ipynb +++ b/nbs/Stable_Audio_API.ipynb @@ -54,7 +54,7 @@ "steps = 50 #@param {type:\"number\"}\n", "cfg_scale = 7.0 #@param {type:\"number\"}\n", "output_format = \"mp3\" #@param ['mp3', 'wav'] {type:\"string\"}\n", - "\n", + "model = \"stable-audio-2.5\" #@param ['stable-audio-2', 'stable-audio-2.5'] {type:\"string\"}\n", "response = requests.post(\n", " \"https://api.stability.ai/v2beta/audio/stable-audio-2/text-to-audio\",\n", " headers={\"Authorization\": f\"Bearer {STABILITY_KEY}\", \"Accept\": \"audio/*\"},\n", @@ -66,6 +66,7 @@ " \"steps\": steps,\n", " \"cfg_scale\" : cfg_scale,\n", " \"output_format\": output_format,\n", + " \"model\": model\n", " }\n", ")\n", "if not response.ok:\n", @@ -152,7 +153,8 @@ "steps = 50 #@param {type:\"number\"}\n", "cfg_scale = 7.0 #@param {type:\"number\"}\n", "output_format = \"mp3\" #@param ['mp3', 'wav'] {type:\"string\"}\n", - "strength = 1.0 #@param {type:\"number\"}\n", + "strength = 0.5 #@param {type:\"number\"}\n", + "model = \"stable-audio-2.5\" #@param ['stable-audio-2', 'stable-audio-2.5'] {type:\"string\"}\n", "\n", "response = requests.post(\n", " \"https://api.stability.ai/v2beta/audio/stable-audio-2/audio-to-audio\",\n", @@ -166,6 +168,7 @@ " \"cfg_scale\" : cfg_scale,\n", " \"output_format\": output_format,\n", " \"strength\": strength,\n", + " \"model\": model\n", " }\n", ")\n", "if not response.ok:\n", @@ -190,8 +193,109 @@ "metadata": { "id": "TDWW5DQbDnNo" }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saved inpaint.mp3\n", + "\n", + "Original audio:\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Generation result:\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#@title Inpaint\n", + "\n", + "#@markdown - Drag and drop a .wav or .mp3 to file folder on left\n", + "#@markdown - Right click on it and choose Copy path\n", + "#@markdown - Paste that path into audio field below\n", + "#@markdown

\n", + "\n", + "prompt = \"Lofi hip hop beat, chillhop\" #@param {type:\"string\"}\n", + "audio = \"/content/piano.mp3\" #@param {type:\"string\"}\n", + "duration = 45 #@param {type:\"number\"}\n", + "seed = 0 #@param {type:\"number\"}\n", + "steps = 50 #@param {type:\"number\"}\n", + "cfg_scale = 7.0 #@param {type:\"number\"}\n", + "output_format = \"mp3\" #@param ['mp3', 'wav'] {type:\"string\"}\n", + "strength = 1.0 #@param {type:\"number\"}\n", + "mask_start = 15.0 #@param {type:\"number\"}\n", + "mask_end = 40.0 #@param {type:\"number\"}\n", + "\n", + "response = requests.post(\n", + " \"https://api.stability.ai/v2beta/audio/stable-audio-2/inpaint\",\n", + " headers={\"Authorization\": f\"Bearer {STABILITY_KEY}\", \"Accept\": \"audio/*\"},\n", + " files={\"audio\": open(audio, \"rb\")},\n", + " data={\n", + " \"prompt\" : prompt,\n", + " \"duration\": duration,\n", + " \"seed\": seed,\n", + " \"steps\": steps,\n", + " \"cfg_scale\" : cfg_scale,\n", + " \"output_format\": output_format,\n", + " \"strength\": strength,\n", + " \"mask_start\": mask_start,\n", + " \"mask_end\": mask_end\n", + " }\n", + ")\n", + "if not response.ok:\n", + " raise Exception(f\"HTTP {response.status_code}: {response.text}\")\n", + "\n", + "# Save and show the result\n", + "filename = f\"inpaint.mp3\"\n", + "with open(filename, \"wb\") as f:\n", + " f.write(response.content)\n", + "print(f\"Saved {filename}\")\n", + "\n", + "print(\"\\nOriginal audio:\")\n", + "IPython.display.display(IPython.display.Audio(audio))\n", + "\n", + "print(\"\\nGeneration result:\")\n", + "IPython.display.display(IPython.display.Audio(filename))" + ] } ], "metadata": { From 6020feb7f1d11471a1949e931d76f53fb8aa349f Mon Sep 17 00:00:00 2001 From: "enzymezoo.code" Date: Thu, 17 Jul 2025 12:13:22 -0500 Subject: [PATCH 2/4] Fix upscale pricing --- nbs/Stable_Image_API_Public.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nbs/Stable_Image_API_Public.ipynb b/nbs/Stable_Image_API_Public.ipynb index a604243b..f247064d 100644 --- a/nbs/Stable_Image_API_Public.ipynb +++ b/nbs/Stable_Image_API_Public.ipynb @@ -1018,7 +1018,9 @@ "- Creative Upscaler: This service will transform a low quality, low resolution image into a stunning work of art with intricate details at 9mp resolution - regardless of the input resolution. Provide an input image of poor quality and add a `prompt` that describes the desired output. High `creativity` (up to 0.35) will yield dramatic changes to the image.\n", "This service will use 25 credits.\n", "\n", - "- Conservative Upscaler: Upscale and image to 4K resolution while minimizing alterations to the image. This service will use 3 credits.\n", + "- Conservative Upscaler: Upgrade low-res to 4k without reinterpreting the image. This service will use 25 credits.\n", + "\n", + "- Fast Upscaler: Simple, low-cost upscaler to increase image resolution by 4, up to 4 megapixels. This service will use 1 credit.\n", "\n", "See https://platform.stability.ai/docs/api-reference#tag/Upscale\n", "\n", From 45b1dbd29163751873c3aa4aef97c1e529f1796b Mon Sep 17 00:00:00 2001 From: "enzymezoo.code" Date: Wed, 30 Jul 2025 09:45:31 -0500 Subject: [PATCH 3/4] Fix default cfg scale and steps for Stable Audio 2.5 --- nbs/Stable_Audio_API.ipynb | 188 +++++++++++++++++++++++++++++++++++-- 1 file changed, 178 insertions(+), 10 deletions(-) diff --git a/nbs/Stable_Audio_API.ipynb b/nbs/Stable_Audio_API.ipynb index 0d494d11..1e833506 100644 --- a/nbs/Stable_Audio_API.ipynb +++ b/nbs/Stable_Audio_API.ipynb @@ -46,7 +46,7 @@ } ], "source": [ - "#@title Text to Audio\n", + "#@title Stable Audio 2: Text to Audio\n", "\n", "prompt = \"Genre: UK Bass | Instruments: 707 Drum Machine, Strings, 808 bass stabs, Beautiful Synths\" #@param {type:\"string\"}\n", "duration = 190 #@param {type:\"number\"}\n", @@ -54,7 +54,6 @@ "steps = 50 #@param {type:\"number\"}\n", "cfg_scale = 7.0 #@param {type:\"number\"}\n", "output_format = \"mp3\" #@param ['mp3', 'wav'] {type:\"string\"}\n", - "model = \"stable-audio-2.5\" #@param ['stable-audio-2', 'stable-audio-2.5'] {type:\"string\"}\n", "response = requests.post(\n", " \"https://api.stability.ai/v2beta/audio/stable-audio-2/text-to-audio\",\n", " headers={\"Authorization\": f\"Bearer {STABILITY_KEY}\", \"Accept\": \"audio/*\"},\n", @@ -66,7 +65,72 @@ " \"steps\": steps,\n", " \"cfg_scale\" : cfg_scale,\n", " \"output_format\": output_format,\n", - " \"model\": model\n", + " \"model\": \"stable-audio-2\"\n", + " }\n", + ")\n", + "if not response.ok:\n", + " raise Exception(f\"HTTP {response.status_code}: {response.text}\")\n", + "\n", + "# Save and show the result\n", + "filename = f\"txt2audio.mp3\"\n", + "with open(filename, \"wb\") as f:\n", + " f.write(response.content)\n", + "print(f\"Saved {filename}\")\n", + "\n", + "IPython.display.display(IPython.display.Audio(filename))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saved txt2audio.mp3\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#@title Stable Audio 2.5: Text to Audio\n", + "\n", + "prompt = \"Genre: UK Bass | Instruments: 707 Drum Machine, Strings, 808 bass stabs, Beautiful Synths\" #@param {type:\"string\"}\n", + "duration = 190 #@param {type:\"number\"}\n", + "seed = 0 #@param {type:\"number\"}\n", + "steps = 8 #@param {type:\"number\"}\n", + "cfg_scale = 1.0 #@param {type:\"number\"}\n", + "output_format = \"mp3\" #@param ['mp3', 'wav'] {type:\"string\"}\n", + "response = requests.post(\n", + " \"https://api.stability.ai/v2beta/audio/stable-audio-2/text-to-audio\",\n", + " headers={\"Authorization\": f\"Bearer {STABILITY_KEY}\", \"Accept\": \"audio/*\"},\n", + " files={\"image\": None},\n", + " data={\n", + " \"prompt\" : prompt,\n", + " \"duration\": duration,\n", + " \"seed\": seed,\n", + " \"steps\": steps,\n", + " \"cfg_scale\" : cfg_scale,\n", + " \"output_format\": output_format,\n", + " \"model\": \"stable-audio-2.5\"\n", " }\n", ")\n", "if not response.ok:\n", @@ -139,7 +203,7 @@ } ], "source": [ - "#@title Audio to Audio\n", + "#@title Stable Audio 2: Audio to Audio\n", "\n", "#@markdown - Drag and drop a .wav or .mp3 to file folder on left\n", "#@markdown - Right click on it and choose Copy path\n", @@ -154,7 +218,6 @@ "cfg_scale = 7.0 #@param {type:\"number\"}\n", "output_format = \"mp3\" #@param ['mp3', 'wav'] {type:\"string\"}\n", "strength = 0.5 #@param {type:\"number\"}\n", - "model = \"stable-audio-2.5\" #@param ['stable-audio-2', 'stable-audio-2.5'] {type:\"string\"}\n", "\n", "response = requests.post(\n", " \"https://api.stability.ai/v2beta/audio/stable-audio-2/audio-to-audio\",\n", @@ -168,7 +231,112 @@ " \"cfg_scale\" : cfg_scale,\n", " \"output_format\": output_format,\n", " \"strength\": strength,\n", - " \"model\": model\n", + " \"model\": \"stable-audio-2\"\n", + " }\n", + ")\n", + "if not response.ok:\n", + " raise Exception(f\"HTTP {response.status_code}: {response.text}\")\n", + "\n", + "# Save and show the result\n", + "filename = f\"audio2audio.mp3\"\n", + "with open(filename, \"wb\") as f:\n", + " f.write(response.content)\n", + "print(f\"Saved {filename}\")\n", + "\n", + "print(\"\\nOriginal audio:\")\n", + "IPython.display.display(IPython.display.Audio(audio))\n", + "\n", + "print(\"\\nGeneration result:\")\n", + "IPython.display.display(IPython.display.Audio(filename))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saved audio2audio.mp3\n", + "\n", + "Original audio:\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Generation result:\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#@title Stable Audio 2.5: Audio to Audio\n", + "\n", + "#@markdown - Drag and drop a .wav or .mp3 to file folder on left\n", + "#@markdown - Right click on it and choose Copy path\n", + "#@markdown - Paste that path into audio field below\n", + "#@markdown

\n", + "\n", + "prompt = \"Lofi hip hop beat, chillhop\" #@param {type:\"string\"}\n", + "audio = \"/content/piano.mp3\" #@param {type:\"string\"}\n", + "duration = 45 #@param {type:\"number\"}\n", + "seed = 0 #@param {type:\"number\"}\n", + "steps = 8 #@param {type:\"number\"}\n", + "cfg_scale = 1.0 #@param {type:\"number\"}\n", + "output_format = \"mp3\" #@param ['mp3', 'wav'] {type:\"string\"}\n", + "strength = 0.5 #@param {type:\"number\"}\n", + "\n", + "response = requests.post(\n", + " \"https://api.stability.ai/v2beta/audio/stable-audio-2/audio-to-audio\",\n", + " headers={\"Authorization\": f\"Bearer {STABILITY_KEY}\", \"Accept\": \"audio/*\"},\n", + " files={\"audio\": open(audio, \"rb\")},\n", + " data={\n", + " \"prompt\" : prompt,\n", + " \"duration\": duration,\n", + " \"seed\": seed,\n", + " \"steps\": steps,\n", + " \"cfg_scale\" : cfg_scale,\n", + " \"output_format\": output_format,\n", + " \"strength\": strength,\n", + " \"model\": \"stable-audio-2.5\"\n", " }\n", ")\n", "if not response.ok:\n", @@ -258,8 +426,8 @@ "audio = \"/content/piano.mp3\" #@param {type:\"string\"}\n", "duration = 45 #@param {type:\"number\"}\n", "seed = 0 #@param {type:\"number\"}\n", - "steps = 50 #@param {type:\"number\"}\n", - "cfg_scale = 7.0 #@param {type:\"number\"}\n", + "steps = 8 #@param {type:\"number\"}\n", + "cfg_scale = 1.0 #@param {type:\"number\"}\n", "output_format = \"mp3\" #@param ['mp3', 'wav'] {type:\"string\"}\n", "strength = 1.0 #@param {type:\"number\"}\n", "mask_start = 15.0 #@param {type:\"number\"}\n", @@ -303,7 +471,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "venv", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -317,7 +485,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.15" + "version": "3.13.3" } }, "nbformat": 4, From 495785aee2ba2bf45b5e0f70b2420454c1502c61 Mon Sep 17 00:00:00 2001 From: "enzymezoo.code" Date: Wed, 30 Jul 2025 09:51:20 -0500 Subject: [PATCH 4/4] Drop strength for inpaint --- nbs/Stable_Audio_API.ipynb | 2 -- 1 file changed, 2 deletions(-) diff --git a/nbs/Stable_Audio_API.ipynb b/nbs/Stable_Audio_API.ipynb index 1e833506..08ef25f4 100644 --- a/nbs/Stable_Audio_API.ipynb +++ b/nbs/Stable_Audio_API.ipynb @@ -429,7 +429,6 @@ "steps = 8 #@param {type:\"number\"}\n", "cfg_scale = 1.0 #@param {type:\"number\"}\n", "output_format = \"mp3\" #@param ['mp3', 'wav'] {type:\"string\"}\n", - "strength = 1.0 #@param {type:\"number\"}\n", "mask_start = 15.0 #@param {type:\"number\"}\n", "mask_end = 40.0 #@param {type:\"number\"}\n", "\n", @@ -444,7 +443,6 @@ " \"steps\": steps,\n", " \"cfg_scale\" : cfg_scale,\n", " \"output_format\": output_format,\n", - " \"strength\": strength,\n", " \"mask_start\": mask_start,\n", " \"mask_end\": mask_end\n", " }\n",