Skip to content

Commit 5a88d7f

Browse files
charliewwdevclaude
andcommitted
fix trailer V2 quality: portrait aspect ratio, inference params, ncnn upscale
- Fix character stretching: portrait generation 480x832 → 832x480 to match I2V landscape shots, add cinematic wide-angle composition to prompts - Increase I2V inference steps 30 → 50 and reduce guidance_scale 6.0 → 5.0 to match Wan 2.2 TI2V-5B defaults for better quality - Add realesrgan-ncnn-vulkan backend fallback to VideoUpscaler for when Python realesrgan/basicsr won't compile (e.g. Python 3.14) - Update .gitignore for tools/ dir and new scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent cf4b92b commit 5a88d7f

4 files changed

Lines changed: 164 additions & 28 deletions

File tree

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,16 @@ build/
1616

1717
scripts/*
1818
!scripts/animate.py
19+
!scripts/story.py
20+
!scripts/produce_trailer_v2.py
1921
!scripts/__init__.py
2022

2123
*.ipynb
2224
*.safetensors
2325
*.ckpt
2426

27+
tools/
28+
2529
models/*
2630
!models/StableDiffusion/
2731
models/StableDiffusion/*

animatediff/postprocess/upscale.py

Lines changed: 150 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,66 @@
66
- RealESRGAN_x4plus_anime_6B: 17MB, anime images
77
- RealESRGAN_x4plus: 64MB, general purpose
88
9-
MPS/Apple Silicon: works with half=False and tiling.
9+
Backends:
10+
- python: Uses realesrgan pip package (requires basicsr, may not compile on Python 3.14)
11+
- ncnn: Uses realesrgan-ncnn-vulkan binary (standalone, no Python deps)
12+
- auto: Tries python first, falls back to ncnn
13+
14+
MPS/Apple Silicon: Python backend uses half=False + tiling. ncnn uses Vulkan GPU natively.
1015
"""
1116

1217
import logging
13-
from typing import List, Optional, Literal
18+
import os
19+
import shutil
20+
import subprocess
21+
import tempfile
22+
from pathlib import Path
23+
from typing import List, Literal
1424

1525
import numpy as np
1626
from PIL import Image
1727

1828
logger = logging.getLogger(__name__)
1929

20-
# Model download URLs
30+
# Model download URLs (Python backend)
2131
MODEL_URLS = {
2232
"animevideov3": "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth",
2333
"anime_6B": "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth",
2434
"general": "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
2535
}
2636

37+
# ncnn model name mapping
38+
NCNN_MODEL_NAMES = {
39+
"animevideov3": "realesr-animevideov3",
40+
"anime_6B": "realesrgan-x4plus-anime",
41+
"general": "realesrgan-x4plus",
42+
}
43+
44+
# Default ncnn binary location (relative to project root)
45+
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
46+
NCNN_DIR = PROJECT_ROOT / "tools" / "realesrgan-ncnn-vulkan"
47+
48+
49+
def _find_ncnn_binary() -> str | None:
50+
"""Find realesrgan-ncnn-vulkan binary."""
51+
# Check project tools/ directory
52+
candidate = NCNN_DIR / "realesrgan-ncnn-vulkan"
53+
if candidate.is_file() and os.access(str(candidate), os.X_OK):
54+
return str(candidate)
55+
# Check PATH
56+
which = shutil.which("realesrgan-ncnn-vulkan")
57+
if which:
58+
return which
59+
return None
60+
61+
62+
def _find_ncnn_models_dir() -> str | None:
63+
"""Find ncnn models directory."""
64+
candidate = NCNN_DIR / "models"
65+
if candidate.is_dir():
66+
return str(candidate)
67+
return None
68+
2769

2870
class VideoUpscaler:
2971
"""Upscale video frames using Real-ESRGAN."""
@@ -34,25 +76,50 @@ def __init__(
3476
scale: int = 2,
3577
tile: int = 0,
3678
device: str = "cpu",
79+
backend: Literal["auto", "python", "ncnn"] = "auto",
3780
):
3881
self.model_name = model_name
3982
self.scale = scale
4083
self.tile = tile
4184
self.device = device
85+
self.backend = backend
4286
self._upsampler = None
87+
self._resolved_backend = None
88+
89+
def _resolve_backend(self) -> str:
90+
"""Determine which backend to use."""
91+
if self._resolved_backend:
92+
return self._resolved_backend
93+
94+
if self.backend == "python":
95+
self._resolved_backend = "python"
96+
elif self.backend == "ncnn":
97+
self._resolved_backend = "ncnn"
98+
else: # auto
99+
try:
100+
import realesrgan # noqa: F401
101+
import basicsr # noqa: F401
102+
self._resolved_backend = "python"
103+
logger.info("Upscale backend: python (realesrgan pip package)")
104+
except ImportError:
105+
if _find_ncnn_binary():
106+
self._resolved_backend = "ncnn"
107+
logger.info("Upscale backend: ncnn-vulkan (Python realesrgan unavailable)")
108+
else:
109+
raise ImportError(
110+
"No upscale backend available. Either:\n"
111+
" 1. pip install realesrgan (requires basicsr)\n"
112+
" 2. Place realesrgan-ncnn-vulkan binary in tools/realesrgan-ncnn-vulkan/"
113+
)
114+
return self._resolved_backend
43115

44116
def _ensure_loaded(self):
45-
"""Lazy-load the upscaler model."""
117+
"""Lazy-load the Python upscaler model."""
46118
if self._upsampler is not None:
47119
return
48120

49-
try:
50-
from realesrgan import RealESRGANer
51-
from basicsr.utils.download_util import load_file_from_url
52-
except ImportError:
53-
raise ImportError(
54-
"Real-ESRGAN not installed. Install with: pip install realesrgan"
55-
)
121+
from realesrgan import RealESRGANer
122+
from basicsr.utils.download_util import load_file_from_url
56123

57124
model_url = MODEL_URLS.get(self.model_name, MODEL_URLS["animevideov3"])
58125
model_path = load_file_from_url(url=model_url, model_dir="weights/realesrgan", progress=True)
@@ -110,26 +177,91 @@ def _ensure_loaded(self):
110177

111178
logger.info(f"Loaded Real-ESRGAN model: {self.model_name} (device={self.device}, tile={tile})")
112179

113-
def upscale_frame(self, frame: Image.Image) -> Image.Image:
114-
"""Upscale a single frame."""
180+
def _upscale_frame_python(self, frame: Image.Image) -> Image.Image:
181+
"""Upscale a single frame using Python backend."""
115182
self._ensure_loaded()
116183
import cv2
117184

118185
img = np.array(frame)
119-
# Real-ESRGAN expects BGR
120186
img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
121-
122187
output, _ = self._upsampler.enhance(img_bgr, outscale=self.scale)
123-
124188
output_rgb = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
125189
return Image.fromarray(output_rgb)
126190

191+
def _upscale_frames_ncnn(self, frames: List[Image.Image]) -> List[Image.Image]:
192+
"""Upscale frames using realesrgan-ncnn-vulkan binary (batch directory mode)."""
193+
binary = _find_ncnn_binary()
194+
if not binary:
195+
raise RuntimeError("realesrgan-ncnn-vulkan binary not found")
196+
197+
models_dir = _find_ncnn_models_dir()
198+
if not models_dir:
199+
raise RuntimeError(
200+
f"ncnn models not found. Expected at: {NCNN_DIR / 'models'}"
201+
)
202+
203+
ncnn_model_name = NCNN_MODEL_NAMES.get(self.model_name, "realesr-animevideov3")
204+
205+
with tempfile.TemporaryDirectory() as tmpdir:
206+
in_dir = os.path.join(tmpdir, "input")
207+
out_dir = os.path.join(tmpdir, "output")
208+
os.makedirs(in_dir)
209+
os.makedirs(out_dir)
210+
211+
# Save input frames as PNG
212+
for i, frame in enumerate(frames):
213+
frame.save(os.path.join(in_dir, f"{i:06d}.png"))
214+
215+
# Run ncnn binary
216+
cmd = [
217+
binary,
218+
"-i", in_dir,
219+
"-o", out_dir,
220+
"-s", str(self.scale),
221+
"-n", ncnn_model_name,
222+
"-m", models_dir,
223+
"-f", "png",
224+
]
225+
logger.info(f"Running ncnn upscale: {' '.join(cmd)}")
226+
227+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
228+
if result.returncode != 0:
229+
raise RuntimeError(
230+
f"realesrgan-ncnn-vulkan failed (exit {result.returncode}): "
231+
f"{result.stderr[:500]}"
232+
)
233+
234+
# Load output frames
235+
upscaled = []
236+
for i in range(len(frames)):
237+
out_path = os.path.join(out_dir, f"{i:06d}.png")
238+
if not os.path.exists(out_path):
239+
raise RuntimeError(f"ncnn output missing: {out_path}")
240+
upscaled.append(Image.open(out_path).convert("RGB"))
241+
242+
return upscaled
243+
244+
def upscale_frame(self, frame: Image.Image) -> Image.Image:
245+
"""Upscale a single frame."""
246+
backend = self._resolve_backend()
247+
if backend == "python":
248+
return self._upscale_frame_python(frame)
249+
else:
250+
# ncnn: batch of 1
251+
return self._upscale_frames_ncnn([frame])[0]
252+
127253
def upscale_frames(self, frames: List[Image.Image]) -> List[Image.Image]:
128254
"""Upscale all frames in a video."""
129-
logger.info(f"Upscaling {len(frames)} frames with {self.model_name} ({self.scale}x)")
255+
backend = self._resolve_backend()
256+
logger.info(f"Upscaling {len(frames)} frames with {self.model_name} ({self.scale}x, backend={backend})")
257+
258+
if backend == "ncnn":
259+
return self._upscale_frames_ncnn(frames)
260+
261+
# Python backend: frame-by-frame
130262
result = []
131263
for i, frame in enumerate(frames):
132-
result.append(self.upscale_frame(frame))
264+
result.append(self._upscale_frame_python(frame))
133265
if (i + 1) % 10 == 0:
134266
logger.info(f" Upscaled {i+1}/{len(frames)} frames")
135267
return result

examples/fanren_trailer_v2.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,28 +23,28 @@
2323
"韩立_early": {
2424
"description": "young thin boy age 14, simple brown cloth, messy black hair, poor but determined dark eyes, thin face",
2525
"image_path": null,
26-
"portrait_prompt": "portrait of a young thin Chinese boy age 14, simple ragged brown cloth, messy black hair, poor but determined dark eyes, thin face, anime style, xianxia, high detail, upper body, looking at camera",
26+
"portrait_prompt": "cinematic wide angle portrait of a young thin Chinese boy age 14, simple ragged brown cloth, messy black hair, poor but determined dark eyes, thin face, anime style, xianxia, high detail, upper body centered in frame, landscape composition, looking at camera",
2727
"lora_path": null,
2828
"lora_scale": 1.0
2929
},
3030
"韩立_late": {
3131
"description": "young male cultivator age 16, grey disciple robes, hair tied in topknot, determined gaze, faint golden aura",
3232
"image_path": null,
33-
"portrait_prompt": "portrait of a young Chinese male cultivator age 16, grey disciple robes with dark trim, hair tied in topknot, determined gaze, faint golden spiritual aura, anime style, xianxia, high detail, upper body, looking at camera",
33+
"portrait_prompt": "cinematic wide angle portrait of a young Chinese male cultivator age 16, grey disciple robes with dark trim, hair tied in topknot, determined gaze, faint golden spiritual aura, anime style, xianxia, high detail, upper body centered in frame, landscape composition, looking at camera",
3434
"lora_path": null,
3535
"lora_scale": 1.0
3636
},
3737
"墨大夫": {
3838
"description": "sinister old man with long white beard, dark green robes, sharp cunning eyes, hunched posture, alchemist",
3939
"image_path": null,
40-
"portrait_prompt": "portrait of a sinister old Chinese man alchemist, long white beard, dark green robes, sharp cunning eyes, hunched posture, dim candlelit background, anime style, xianxia, high detail, upper body, looking at camera",
40+
"portrait_prompt": "cinematic wide angle portrait of a sinister old Chinese man alchemist, long white beard, dark green robes, sharp cunning eyes, hunched posture, dim candlelit background, anime style, xianxia, high detail, upper body centered in frame, landscape composition, looking at camera",
4141
"lora_path": null,
4242
"lora_scale": 1.0
4343
},
4444
"厉飞雨": {
4545
"description": "handsome young man in grey disciple robes, short black hair, warm smile, loyal friend, carrying a sword",
4646
"image_path": null,
47-
"portrait_prompt": "portrait of a handsome young Chinese man in grey disciple robes, short black hair, warm friendly smile, sword on back, anime style, xianxia, high detail, upper body, looking at camera",
47+
"portrait_prompt": "cinematic wide angle portrait of a handsome young Chinese man in grey disciple robes, short black hair, warm friendly smile, sword on back, anime style, xianxia, high detail, upper body centered in frame, landscape composition, looking at camera",
4848
"lora_path": null,
4949
"lora_scale": 1.0
5050
}

scripts/produce_trailer_v2.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,8 @@ def phase2_generate_on_gpu():
288288
output = pipeline.generate(
289289
prompt=prompt,
290290
negative_prompt="blurry, low quality, distorted, deformed, ugly, watermark, multiple people",
291-
width=480,
292-
height=832,
291+
width=832,
292+
height=480,
293293
num_frames=17,
294294
num_inference_steps=30,
295295
guidance_scale=6.0,
@@ -361,10 +361,10 @@ def phase2_generate_on_gpu():
361361
print(f" Shot {i:2d}: I2V with ref={char_name}")
362362
else:
363363
print(f" Shot {i:2d}: ⚠️ No portrait for {char_name}, using black frame")
364-
ref_image = Image.new("RGB", (480, 832), (0, 0, 0))
364+
ref_image = Image.new("RGB", (832, 480), (0, 0, 0))
365365
else:
366366
print(f" Shot {i:2d}: No characters, using black frame")
367-
ref_image = Image.new("RGB", (480, 832), (0, 0, 0))
367+
ref_image = Image.new("RGB", (832, 480), (0, 0, 0))
368368

369369
num_frames = shot.get("num_frames", 0)
370370
if num_frames == 0:
@@ -382,8 +382,8 @@ def phase2_generate_on_gpu():
382382
width=832,
383383
height=480,
384384
num_frames=num_frames,
385-
num_inference_steps=30,
386-
guidance_scale=6.0,
385+
num_inference_steps=50,
386+
guidance_scale=5.0,
387387
seed=shot.get("seed", -1),
388388
image=ref_image,
389389
)

0 commit comments

Comments
 (0)