Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 146 additions & 0 deletions skills/transformation/depth-estimation/models.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
{
"studio": {
"title": "3D Depth Vision Studio",
"subtitle": "Convert 2D video to 3D depth maps • Privacy-first scene understanding",
"icon": "layers"
},
"models_dir": "~/.aegis-ai/models/feature-extraction",
"models": {
"depth-anything-v2-small": {
"name": "Depth Anything V2 Small",
"type": "depth_estimation",
"description": "Real-time monocular depth estimation — Apple Neural Engine optimized",
"input_size": [518, 392],
"platforms": {
"darwin": {
"repository": "apple/coreml-depth-anything-v2-small",
"format": "mlpackage",
"variants": {
"DepthAnythingV2SmallF16": {
"precision": "float16",
"size_mb": 49.8,
"description": "Float16 — optimized for Neural Engine"
},
"DepthAnythingV2SmallF16INT8": {
"precision": "float16_int8",
"size_mb": 25.0,
"description": "Float16 + INT8 quantization — smallest"
},
"DepthAnythingV2SmallF32": {
"precision": "float32",
"size_mb": 99.2,
"description": "Float32 — highest precision"
},
"DepthAnythingV2SmallF16P6": {
"precision": "float16_p6",
"size_mb": 18.0,
"description": "Float16 palettized 6-bit"
},
"DepthAnythingV2SmallF16P8": {
"precision": "float16_p8",
"size_mb": 24.0,
"description": "Float16 palettized 8-bit"
},
"DepthAnythingV2SmallF32INT8": {
"precision": "float32_int8",
"size_mb": 24.0,
"description": "Float32 + INT8 quantization"
},
"DepthAnythingV2SmallF32P6": {
"precision": "float32_p6",
"size_mb": 18.0,
"description": "Float32 palettized 6-bit"
},
"DepthAnythingV2SmallF32P8": {
"precision": "float32_p8",
"size_mb": 24.0,
"description": "Float32 palettized 8-bit"
}
}
},
"linux": {
"repository": "depth-anything/Depth-Anything-V2-Small",
"format": "pth",
"variants": {
"depth_anything_v2_vits": {
"precision": "float32",
"size_mb": 99.0,
"description": "PyTorch ViT-S — CUDA/CPU"
}
}
},
"win32": {
"repository": "depth-anything/Depth-Anything-V2-Small",
"format": "pth",
"variants": {
"depth_anything_v2_vits": {
"precision": "float32",
"size_mb": 99.0,
"description": "PyTorch ViT-S — CUDA/CPU"
}
}
}
}
},
"depth-anything-v2-base": {
"name": "Depth Anything V2 Base",
"type": "depth_estimation",
"description": "Higher accuracy depth estimation — larger model",
"input_size": [518, 392],
"platforms": {
"linux": {
"repository": "depth-anything/Depth-Anything-V2-Base",
"format": "pth",
"variants": {
"depth_anything_v2_vitb": {
"precision": "float32",
"size_mb": 390.0,
"description": "PyTorch ViT-B — CUDA/CPU"
}
}
},
"win32": {
"repository": "depth-anything/Depth-Anything-V2-Base",
"format": "pth",
"variants": {
"depth_anything_v2_vitb": {
"precision": "float32",
"size_mb": 390.0,
"description": "PyTorch ViT-B — CUDA/CPU"
}
}
}
}
},
"depth-anything-v2-large": {
"name": "Depth Anything V2 Large",
"type": "depth_estimation",
"description": "Highest accuracy depth estimation — largest model",
"input_size": [518, 392],
"platforms": {
"linux": {
"repository": "depth-anything/Depth-Anything-V2-Large",
"format": "pth",
"variants": {
"depth_anything_v2_vitl": {
"precision": "float32",
"size_mb": 1280.0,
"description": "PyTorch ViT-L — CUDA recommended"
}
}
},
"win32": {
"repository": "depth-anything/Depth-Anything-V2-Large",
"format": "pth",
"variants": {
"depth_anything_v2_vitl": {
"precision": "float32",
"size_mb": 1280.0,
"description": "PyTorch ViT-L — CUDA recommended"
}
}
}
}
}
}
}
178 changes: 178 additions & 0 deletions skills/transformation/depth-estimation/scripts/benchmark_coreml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
#!/usr/bin/env python3
"""
Standalone CoreML depth benchmark — spawned by Aegis IPC handler.

Usage:
python3 benchmark_coreml.py --variant DepthAnythingV2SmallF16 --runs 10 --colormap viridis

Outputs a single JSON line to stdout with benchmark results.
"""

import sys
import json
import time
import os
import argparse
import platform
from pathlib import Path


MODELS_DIR = Path.home() / ".aegis-ai" / "models" / "feature-extraction"
COREML_INPUT_SIZE = (518, 392) # width, height

COLORMAP_MAP = {
"inferno": 1, "viridis": 16, "plasma": 13, "magma": 12,
"jet": 2, "turbo": 18, "hot": 11, "cool": 8,
}

COMPUTE_UNIT_MAP = {
"all": "ALL",
"cpu": "CPU_ONLY",
"gpu": "CPU_AND_GPU",
"cpu_npu": "CPU_AND_NE",
"npu": "ALL",
}


def _log(msg):
print(f"[DepthBenchmark] {msg}", file=sys.stderr, flush=True)


def download_test_image(url):
"""Download a test image from URL, return numpy BGR array."""
import cv2
import numpy as np
import urllib.request

_log(f"Downloading test image: {url}")
tmp_path = "/tmp/aegis_depth_bench_test.jpg"

try:
urllib.request.urlretrieve(url, tmp_path)
img = cv2.imread(tmp_path)
if img is not None:
return img
except Exception as e:
_log(f"Download failed: {e}")

# Fallback: generate a synthetic test image
_log("Using synthetic test image (640x480 gradient)")
return np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)


def run_benchmark(args):
import cv2
import numpy as np
import coremltools as ct
from PIL import Image

variant_id = args.variant
model_path = MODELS_DIR / f"{variant_id}.mlpackage"

if not model_path.exists():
print(json.dumps({"error": f"Model not found: {model_path}"}))
sys.exit(1)

# Load model
_log(f"Loading CoreML model: {variant_id}")
compute_unit_key = COMPUTE_UNIT_MAP.get(args.compute_units, "ALL")
compute_unit = getattr(ct.ComputeUnit, compute_unit_key, ct.ComputeUnit.ALL)

t0 = time.perf_counter()
model = ct.models.MLModel(str(model_path), compute_units=compute_unit)
load_time_ms = (time.perf_counter() - t0) * 1000
_log(f"Model loaded in {load_time_ms:.0f}ms (compute_units={compute_unit_key})")

# Get test image
test_image = download_test_image(args.test_image_url)
original_h, original_w = test_image.shape[:2]
input_w, input_h = COREML_INPUT_SIZE

# Prepare input
rgb = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)
resized = cv2.resize(rgb, (input_w, input_h), interpolation=cv2.INTER_LINEAR)
pil_image = Image.fromarray(resized, mode="RGB")

colormap_id = COLORMAP_MAP.get(args.colormap, 16)

# Warm-up run
_log("Warm-up inference...")
model.predict({"image": pil_image})

# Benchmark runs
_log(f"Running {args.runs} benchmark iterations...")
times = []
last_depth_colored = None

for i in range(args.runs):
t0 = time.perf_counter()
prediction = model.predict({"image": pil_image})
elapsed_ms = (time.perf_counter() - t0) * 1000
times.append(elapsed_ms)

if i == 0:
# Process first result for extraction preview
output_key = list(prediction.keys())[0]
depth_map = np.array(prediction[output_key])
if depth_map.ndim > 2:
depth_map = np.squeeze(depth_map)
depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min() + 1e-8)
depth_uint8 = (depth_norm * 255).astype(np.uint8)
last_depth_colored = cv2.applyColorMap(depth_uint8, colormap_id)
last_depth_colored = cv2.resize(last_depth_colored, (original_w, original_h))

# Compute stats
import statistics
times_sorted = sorted(times)
avg_ms = statistics.mean(times)
std_ms = statistics.stdev(times) if len(times) > 1 else 0

result = {
"model_id": "depth-anything-v2-small",
"variant_id": variant_id,
"num_runs": args.runs,
"successful_runs": len(times),
"avg_time_ms": round(avg_ms, 2),
"min_time_ms": round(times_sorted[0], 2),
"max_time_ms": round(times_sorted[-1], 2),
"std_time_ms": round(std_ms, 2),
"fps": round(1000.0 / avg_ms, 2) if avg_ms > 0 else 0,
"model_load_ms": round(load_time_ms, 2),
"compute_units": args.compute_units,
}

# Encode extraction result as base64 for preview
if last_depth_colored is not None:
import base64
_, buf = cv2.imencode(".jpg", last_depth_colored, [cv2.IMWRITE_JPEG_QUALITY, 85])
result["extraction_result"] = {
"success": True,
"feature_type": "depth_estimation",
"feature_data": base64.b64encode(buf).decode("ascii"),
"processing_time": round(times[0], 2),
"metadata": {
"model": variant_id,
"colormap": args.colormap,
"compute_units": args.compute_units,
"input_size": list(COREML_INPUT_SIZE),
},
}

_log(f"Benchmark complete: {avg_ms:.1f}ms avg ({result['fps']:.1f} FPS)")
print(json.dumps(result), flush=True)


if __name__ == "__main__":
if platform.system() != "Darwin":
print(json.dumps({"error": "CoreML benchmark requires macOS"}))
sys.exit(1)

parser = argparse.ArgumentParser()
parser.add_argument("--variant", default="DepthAnythingV2SmallF16")
parser.add_argument("--runs", type=int, default=10)
parser.add_argument("--colormap", default="viridis")
parser.add_argument("--compute-units", default="all")
parser.add_argument("--test-image-url", default="https://ultralytics.com/images/bus.jpg")
args = parser.parse_args()

run_benchmark(args)