Background generation updates

oskip0906 · oskip0906 · commit 5986b49a04c0 · 2025-04-22T21:30:13.000-04:00
diff --git a/README.md b/README.md
@@ -104,7 +104,9 @@ _This script is useful for **testing and fine-tuning** object placements before
 
 All the images rendered are just 2 objects on a white surface with a light grey "sky". If you would like to add a custom (more realistic) background to each image in your image output folder, you can navigate to the `generate_backgrounds.sh` script.
 
-- Here, you can customize the `PROMPT` argument to guide the background generation process.
+- Here, you can customize the `PROMPT` (what you want the scene to include) and `NEGATIVE_PROMPT` (what you want to avoid in the scene) arguments to guide the background generation process. 
+
+- Make sure to provide a valid Hugging Face token in the `HF_TOKEN` field to authenticate and access the model.
 
 - The process utilizes the [**Stable Diffusion XL inpainting model**](https://huggingface.co/diffusers/stable-diffusion-xl-1.0-inpainting-0.1) to modify specific regions of images with custom backgrounds.
 
diff --git a/docs/index.rst b/docs/index.rst
@@ -159,7 +159,7 @@ Adding AI-generated Backgrounds (Experimental)
 
 To enhance image realism, use ``generate_backgrounds.sh`` to add AI-generated backgrounds via the Stable Diffusion XL inpainting model:
 
-Customize the background generation with the ``PROMPT`` variable.
+Customize the background generation with the ``PROMPT`` (what you want the scene to include) and ``NEGATIVE_PROMPT`` (what you want to avoid in the scene) variables.
 
 .. tip::
    
diff --git a/scripts/generate_backgrounds.sh b/scripts/generate_backgrounds.sh
@@ -5,16 +5,23 @@ cd "../src"
 # Path to Python executable
 PYTHON_EXECUTABLE="python"
 
-# Prompt for the model (required)
+# Prompt for the generation
 PROMPT=""
+NEGATIVE_PROMPT=""
+
+# Hugging Face Token
+HF_TOKEN=""
 
 # Install dependencies
-if ! python -c "import torch, torchvision, torchaudio" &> /dev/null; then
-    pip install torch --index-url https://download.pytorch.org/whl/cu121/torch_stable.html
+if ! python -c "import torch" &> /dev/null; then
+    pip install torch --index-url https://download.pytorch.org/whl/cu121/
 fi
 
 if ! pip freeze | grep -q -f requirements.txt; then
     pip install -r requirements.txt
 fi
 
-$PYTHON_EXECUTABLE generate_background.py "$PROMPT"
+# Hugging Face authentication
+huggingface-cli login --token "$HF_TOKEN"
+
+$PYTHON_EXECUTABLE generate_background.py "$PROMPT" "$NEGATIVE_PROMPT"
diff --git a/src/generate_background.py b/src/generate_background.py
@@ -1,7 +1,7 @@
 """Generate AI-enhanced images for each image in the output directory."""
 import torch
 from diffusers import AutoPipelineForInpainting
-from PIL import Image
+from PIL import Image, ImageFilter
 import os
 import json
 import sys
@@ -30,6 +30,7 @@ def generate_background(image_file: str,
                     mask_file: str, 
                     output_path: str, 
                     prompt: str, 
+                    negative_prompt: str,
                     device: str, 
                     pipe: AutoPipelineForInpainting) -> None:
     """ Generate and save an enhanced image using a diffusion model for a given image and mask.
@@ -45,12 +46,14 @@ def generate_background(image_file: str,
     # Open both image and mask
     image = Image.open(image_file).convert("RGB")
     mask = Image.open(mask_file).convert("L")
+    mask = mask.filter(ImageFilter.GaussianBlur(radius=2))
     mask = mask.point(lambda x: 0 if x < 254 else 255)
     # Use a generator
     generator = torch.Generator(device=device).manual_seed(42)
     # Perform inpainting
     output_image = pipe(
         prompt=prompt,
+        negative_prompt=negative_prompt,
         image=image,
         mask_image=mask,
         guidance_scale=5,
@@ -61,7 +64,7 @@ def generate_background(image_file: str,
     filename = os.path.basename(image_file)
     output_image.save(os.path.join(output_path, filename))
 
-def main(prompt: str, device: str) -> None:
+def main(prompt: str, negative_prompt: str, device: str) -> None:
     """Generate enhanced images for each image in the output directory.
     
     Args: 
@@ -86,7 +89,7 @@ def main(prompt: str, device: str) -> None:
             enhanced_path_dir = os.path.join(enhanced_image_dir, relative_path)
             if not os.path.exists(enhanced_path_dir):
                 os.makedirs(enhanced_path_dir)
-            generate_background(image_file, mask_file, enhanced_path_dir, prompt, device, pipe)
+            generate_background(image_file, mask_file, enhanced_path_dir, prompt, negative_prompt, device, pipe)
 
 if __name__ == '__main__':
     # Check if CUDA is available
@@ -96,6 +99,7 @@ def main(prompt: str, device: str) -> None:
         print("CUDA is not available. Using CPU.")
         device = "cpu"
     # Extract prompt
-    prompt = sys.argv[-1] 
-    main(prompt, device)
+    prompt = sys.argv[-2] 
+    negative_prrompt = sys.argv[-1]
+    main(prompt, negative_prrompt, device)