Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
5c5d5a6
chore: gitignore local cgw tooling; add PR audit doc
forkni May 16, 2026
275f293
feat: add FP8 QDQ finite-scale gate and fused-MHA layer count
forkni May 16, 2026
90091d5
feat: add quality regression harness with FP16-TRT goldens and SSIM/L…
forkni May 16, 2026
d1763bb
feat: port varshith15 kvo_cache patch onto diffusers 0.38.0 via runti…
forkni May 16, 2026
62548fb
fix: replace emoji chars in SDXL ONNX size warning to avoid cp1252 en…
forkni May 16, 2026
67c74c2
feat: seed quality-harness goldens, manifest, thresholds; fix FP8 CFG…
forkni May 16, 2026
4b4aaf7
chore: stage pre-existing formatter diffs (quote/whitespace normalisa…
forkni May 16, 2026
09156f7
fix: per-input-aware FP8 calibration tile for static-dim0 ONNX inputs
forkni May 16, 2026
1a8065f
fix: kvo_cache patch breaks ControlNet ONNX export — sentinel for bac…
forkni May 17, 2026
4c2a742
feat: add CUDA IPC output direction via cuda-link (SD-to-TD zero-copy…
forkni May 18, 2026
72dc7cc
feat: add CUDA IPC input direction via cuda-link (TD->SD zero-copy GP…
forkni May 18, 2026
52c4a68
docs: add CUDA IPC input direction plan with next-session log review …
forkni May 18, 2026
eecb9f5
fix: use relative imports in vendored _compat/cuda_ipc (CUDARuntimeTy…
forkni May 18, 2026
02911e5
docs: add plans for CUDARuntimeTypes fix and zero-copy GPU input
forkni May 18, 2026
59f2caa
docs: add plan for ControlNet zero-copy GPU input
forkni May 18, 2026
07045be
fix: harden CUDA IPC capture mode and emit ControlNet IPC keys in str…
forkni May 18, 2026
dbd293b
fix: disable CUDA-graph capture for ControlNet TRT engines (resolves …
forkni May 18, 2026
a3b1a45
perf: tighten CUDALINK_WAIT_SPIN_US + BARRIER_STALE_NS defaults for C…
forkni May 23, 2026
90941b2
refactor: collapse duplicate builder_optimization_level override in b…
forkni May 23, 2026
b9130e2
chore: vendor cuda-link v1.5.1 and migrate wrapper to Exporter API
forkni May 23, 2026
f631c90
perf: CUDA GPU performance audit and implementation (P1-P6)
forkni May 24, 2026
2a9ed08
feat: add CN CUDA-IPC zero-copy consumer + harden P3 GPU Canny
forkni May 24, 2026
3dd66a0
fix: gate CN preview send-back behind send_controlnet_preview flag (d…
forkni May 24, 2026
72e0103
perf: skip GPU timing sync entirely when similar image filter is off
forkni May 24, 2026
d7f95cd
perf: instrument hot-path eager ops and document profiling audit
forkni May 24, 2026
ff86eb5
perf: fix profile_ncu.py for production engine; document UNet wave-li…
forkni May 24, 2026
6982b51
docs: document wave-limited verdict and 99% GPU load clarification
forkni May 24, 2026
5fbc04a
chore: update cuda-link _compat vendor mirrors to v1.7.2
forkni May 31, 2026
bf3413e
feat: surface CUDA-IPC zero-copy degradation as console health indicator
forkni May 31, 2026
0dcb158
feat: gate IPC health tracker behind par.Debugmode UI parameter
forkni May 31, 2026
bcc35fb
refactor: retire _compat vendored mirrors; depend solely on pip cuda-…
forkni Jun 2, 2026
b3dbacd
chore: commit Phase A file deletions missing from bcc35fb (_compat/, …
forkni Jun 2, 2026
8167eb2
docs: add ADR-0001 — cuda-link as external pip dependency, not vendored
forkni Jun 2, 2026
609d8f8
chore: remove redundant _compat/td_exporter glue layer (now fully ext…
forkni Jun 2, 2026
da09e3f
style: normalize profile_ncu.py arg-list formatting
forkni Jun 2, 2026
c201a5f
docs: add clean-shutdown-watchdog and copy-sdtd-code-crash-fix plans
forkni Jun 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,12 @@ controlnet_test_*
demo/realtime-img2img/uploads/
.cgw.conf

# Local-only git workflow tooling (cgw scripts, hooks, example — never committed)
scripts/git/
hooks/cc-block-dangerous-git.sh
.githooks/
cgw.conf.example

# Local Claude / session state (per-user, never committed)
.claude/

Expand Down Expand Up @@ -261,3 +267,6 @@ SESSION_LOG.md

# Profiling/audit CSV exports (Nsight summaries, kernel stats — generated artifacts)
audit_reports/

# Quality harness run outputs (generated; goldens/ is committed, outputs/ is not)
tests/quality/outputs/
15 changes: 14 additions & 1 deletion configs/td_config.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,27 @@ use_ipadapter: false



# CUDA IPC zero-copy GPU-to-GPU output (SD→TD via cuda-link)
use_cuda_ipc_output: false # set true to enable
cuda_ipc_shm_name: 'StreamDiffusionTD_512-512_output_ipc'
cuda_ipc_num_slots: 3
output_type: 'np'

# CUDA IPC zero-copy GPU-to-GPU input (TD→SD via cuda-link)
# When true, SD reads input frames from td_settings.cuda_ipc_input_shm_name
# instead of the legacy CPU SharedMemory at td_settings.input_mem_name.
use_cuda_ipc_input: false

# TouchDesigner specific settings
td_settings:
# OSC communication
osc_receive_port: 8576
osc_transmit_port: 8588

# Memory interface
# Legacy CPU SharedMemory names (used when use_cuda_ipc_output is false)
input_mem_name: 'StreamDiffusionTD_512-512'
# Reserved for future TD→SD IPC input direction (not wired yet)
cuda_ipc_input_shm_name: 'StreamDiffusionTD_512-512_input_ipc'
output_mem_name: 'StreamDiffusionTD_512-512_out'

# Debug settings
Expand Down
37 changes: 21 additions & 16 deletions demo/realtime-img2img/app_config.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,52 @@
"""
Application configuration and settings for realtime-img2img
"""
import yaml

import logging
from pathlib import Path

import yaml


def load_controlnet_registry():
"""Load ControlNet registry from config file"""
try:
registry_path = Path(__file__).parent / "controlnet_registry.yaml"
with open(registry_path, 'r') as f:
with open(registry_path, "r") as f:
config_data = yaml.safe_load(f)

# Extract the available_controlnets section
return config_data.get('available_controlnets', {})
return config_data.get("available_controlnets", {})
except Exception as e:
logging.exception(f"load_controlnet_registry: Failed to load ControlNet registry: {e}")
# Fallback to empty registry
return {}


def load_default_settings():
"""Load default settings from YAML config file"""
try:
registry_path = Path(__file__).parent / "controlnet_registry.yaml"
with open(registry_path, 'r') as f:
with open(registry_path, "r") as f:
config_data = yaml.safe_load(f)
return config_data.get('defaults', {})

return config_data.get("defaults", {})
except Exception as e:
logging.exception(f"load_default_settings: Failed to load default settings: {e}")
# Fallback to hardcoded defaults
return {
'guidance_scale': 1.1,
'delta': 0.7,
'num_inference_steps': 50,
'seed': 2,
't_index_list': [35, 45],
'ipadapter_scale': 1.0,
'normalize_prompt_weights': True,
'normalize_seed_weights': True,
'prompt': "Portrait of The Joker halloween costume, face painting, with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
"guidance_scale": 1.1,
"delta": 0.7,
"num_inference_steps": 50,
"seed": 2,
"t_index_list": [35, 45],
"ipadapter_scale": 1.0,
"normalize_prompt_weights": True,
"normalize_seed_weights": True,
"prompt": "Portrait of The Joker halloween costume, face painting, with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece",
}


# Load configuration at module level
AVAILABLE_CONTROLNETS = load_controlnet_registry()
DEFAULT_SETTINGS = load_default_settings()
6 changes: 2 additions & 4 deletions demo/realtime-img2img/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import NamedTuple
import argparse
import os
from typing import NamedTuple


class Args(NamedTuple):
Expand Down Expand Up @@ -45,9 +45,7 @@ def pretty_print(self):
parser.add_argument("--host", type=str, default=default_host, help="Host address")
parser.add_argument("--port", type=int, default=default_port, help="Port number")
parser.add_argument("--reload", action="store_true", help="Reload code on change")
parser.add_argument(
"--mode", type=str, default=default_mode, help="App Inferece Mode: txt2img, img2img"
)
parser.add_argument("--mode", type=str, default=default_mode, help="App Inferece Mode: txt2img, img2img")
parser.add_argument(
"--max-queue-size",
dest="max_queue_size",
Expand Down
14 changes: 7 additions & 7 deletions demo/realtime-img2img/connection_manager.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import asyncio
import logging
from types import SimpleNamespace
from typing import Dict, Union
from uuid import UUID
import asyncio

from fastapi import WebSocket
from starlette.websockets import WebSocketState
import logging
from types import SimpleNamespace


Connections = Dict[UUID, Dict[str, Union[WebSocket, asyncio.Queue]]]

Expand All @@ -20,9 +22,7 @@ def __init__(self):
self.active_connections: Connections = {}
self.latest_data: Dict[UUID, SimpleNamespace] = {} # Store latest parameters for HTTP streaming

async def connect(
self, user_id: UUID, websocket: WebSocket, max_queue_size: int = 0
):
async def connect(self, user_id: UUID, websocket: WebSocket, max_queue_size: int = 0):
await websocket.accept()
user_count = self.get_user_count()
print(f"User count: {user_count}")
Expand Down Expand Up @@ -61,7 +61,7 @@ async def get_latest_data(self, user_id: UUID) -> SimpleNamespace:
return await queue.get()
except asyncio.QueueEmpty:
return None

def get_latest_data_sync(self, user_id: UUID) -> SimpleNamespace:
"""Get the latest data without consuming it from the queue (for HTTP streaming)"""
return self.latest_data.get(user_id)
Expand Down
Loading