Graphify-Labs · antonioscarinci · Jun 27, 2026 · Jun 27, 2026 · Jun 28, 2026
diff --git a/.gitattributes b/.gitattributes
@@ -4,3 +4,19 @@
 worked/**/*.html linguist-vendored=true
 graphify-out/**/*.html linguist-vendored=true
 *.html linguist-detectable=false
+
+# Normalize line endings
+* text=auto
+
+# Scripts always get LF
+*.sh text eol=lf
+*.py text eol=lf
+
+# Windows batch files always get CRLF
+*.bat text eol=crlf
+*.cmd text eol=crlf
+
+# Binary files
+*.pdf binary
+*.png binary
+*.jpg binary
diff --git a/graphify/__main__.py b/graphify/__main__.py
@@ -2932,13 +2932,18 @@ def main() -> None:
 
         p = _ap.ArgumentParser(prog="graphify save-result")
         p.add_argument("--question", required=True)
-        p.add_argument("--answer", required=True)
+        p.add_argument("--answer", default=None)
+        p.add_argument("--answer-file", dest="answer_file", default=None)
         p.add_argument("--type", dest="query_type", default="query")
         p.add_argument("--nodes", nargs="*", default=[])
         p.add_argument("--outcome", choices=("useful", "dead_end", "corrected"), default=None)
         p.add_argument("--correction", default=None)
         p.add_argument("--memory-dir", default=str(Path(_GRAPHIFY_OUT) / "memory"))
         opts = p.parse_args(sys.argv[2:])
+        if opts.answer_file:
+            opts.answer = Path(opts.answer_file).read_text(encoding="utf-8").strip()
+        elif not opts.answer:
+            p.error("--answer or --answer-file is required")
         from graphify.ingest import save_query_result as _sqr
 
         out = _sqr(

diff --git a/graphify/export.py b/graphify/export.py
@@ -1490,6 +1490,15 @@ def to_graphml(
     for _, _, attrs in H.edges(data=True):
         for k in [k for k in attrs if k.startswith("_")]:
             del attrs[k]
+    # nx.write_graphml raises ValueError on None attribute values; replace with "".
+    for node_id in H.nodes():
+        for key, val in list(H.nodes[node_id].items()):
+            if val is None:
+                H.nodes[node_id][key] = ""
+    for u, v in H.edges():
+        for key, val in list(H.edges[u, v].items()):
+            if val is None:
+                H.edges[u, v][key] = ""
     nx.write_graphml(H, output_path)
 
 

diff --git a/graphify/skill-windows.md b/graphify/skill-windows.md
diff --git a/graphify/windows-scripts/add_transcripts_to_detect.py b/graphify/windows-scripts/add_transcripts_to_detect.py
@@ -0,0 +1,23 @@
+import sys
+import json
+from pathlib import Path
+sys.stdout.reconfigure(encoding='utf-8')
+sys.stderr.reconfigure(encoding='utf-8')
+
+transcripts_file = Path('graphify-out/.graphify_transcripts.json')
+detect_file = Path('graphify-out/.graphify_detect.json')
+
+if not transcripts_file.exists():
+    print('No transcripts file found, skipping.')
+    raise SystemExit(0)
+
+transcript_paths = json.loads(transcripts_file.read_text(encoding='utf-8'))
+if not transcript_paths:
+    print('No transcripts produced.')
+    raise SystemExit(0)
+
+detect = json.loads(detect_file.read_text(encoding='utf-8'))
+detect.setdefault('files', {}).setdefault('docs', []).extend(transcript_paths)
+detect['total_files'] = detect.get('total_files', 0) + len(transcript_paths)
+detect_file.write_text(json.dumps(detect, indent=2), encoding='utf-8')
+print(f'Transcribed {len(transcript_paths)} video file(s) -> treating as docs')
diff --git a/graphify/windows-scripts/ast_extraction.py b/graphify/windows-scripts/ast_extraction.py
@@ -0,0 +1,20 @@
+import sys, json
+from graphify.extract import collect_files, extract
+from pathlib import Path
+import json
+sys.stdout.reconfigure(encoding='utf-8')
+sys.stderr.reconfigure(encoding='utf-8')
+
+detect = json.loads(Path('graphify-out/.graphify_detect.json').read_text(encoding='utf-8'))
+input_path = detect.get('input_path', '.')
+code_files = []
+for f in detect.get('files', {}).get('code', []):
+    code_files.extend(collect_files(Path(f)) if Path(f).is_dir() else [Path(f)])
+
+if code_files:
+    result = extract(code_files, cache_root=Path(input_path))
+    Path('graphify-out/.graphify_ast.json').write_text(json.dumps(result, indent=2, ensure_ascii=False), encoding='utf-8')
+    print(f'AST: {len(result["nodes"])} nodes, {len(result["edges"])} edges')
+else:
+    Path('graphify-out/.graphify_ast.json').write_text(json.dumps({'nodes':[],'edges':[],'input_tokens':0,'output_tokens':0}, ensure_ascii=False), encoding='utf-8')
+    print('No code files - skipping AST extraction')
diff --git a/graphify/windows-scripts/build_graph.py b/graphify/windows-scripts/build_graph.py
@@ -0,0 +1,47 @@
+import sys, json
+from graphify.build import build_from_json
+from graphify.cluster import cluster, score_all
+from graphify.analyze import god_nodes, surprising_connections, suggest_questions
+from graphify.report import generate
+from graphify.export import to_json
+from pathlib import Path
+sys.stdout.reconfigure(encoding='utf-8')
+sys.stderr.reconfigure(encoding='utf-8')
+
+input_path = sys.argv[1] if len(sys.argv) > 1 else '.'
+directed = sys.argv[2].lower() == 'true' if len(sys.argv) > 2 else False
+
+extraction = json.loads(Path('graphify-out/.graphify_extract.json').read_text(encoding='utf-8'))
+detection  = json.loads(Path('graphify-out/.graphify_detect.json').read_text(encoding='utf-8'))
+
+G = build_from_json(extraction, root=input_path, directed=directed)
+if G.number_of_nodes() == 0:
+    print('ERROR: Graph is empty - extraction produced no nodes.')
+    print('Possible causes: all files were skipped, binary-only corpus, or extraction failed.')
+    raise SystemExit(1)
+
+communities = cluster(G)
+cohesion = score_all(G, communities)
+tokens = {'input': extraction.get('input_tokens', 0), 'output': extraction.get('output_tokens', 0)}
+gods = god_nodes(G)
+surprises = surprising_connections(G, communities)
+labels = {cid: 'Community ' + str(cid) for cid in communities}
+questions = suggest_questions(G, communities, labels)
+
+wrote = to_json(G, communities, 'graphify-out/graph.json')
+if not wrote:
+    print('ERROR: refused to shrink graphify-out/graph.json (existing graph has more nodes; #479).')
+    print('If this shrink is intentional (you deleted files), re-run a full build with --force.')
+    raise SystemExit(1)
+
+report = generate(G, communities, cohesion, labels, gods, surprises, detection, tokens, input_path, suggested_questions=questions)
+Path('graphify-out/GRAPH_REPORT.md').write_text(report, encoding='utf-8')
+analysis = {
+    'communities': {str(k): v for k, v in communities.items()},
+    'cohesion': {str(k): v for k, v in cohesion.items()},
+    'gods': gods,
+    'surprises': surprises,
+    'questions': questions,
+}
+Path('graphify-out/.graphify_analysis.json').write_text(json.dumps(analysis, indent=2, ensure_ascii=False), encoding='utf-8')
+print(f'Graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges, {len(communities)} communities')
diff --git a/graphify/windows-scripts/check_code_only.py b/graphify/windows-scripts/check_code_only.py
@@ -0,0 +1,12 @@
+import sys
+import json
+from pathlib import Path
+sys.stdout.reconfigure(encoding='utf-8')
+sys.stderr.reconfigure(encoding='utf-8')
+
+result = json.loads(Path('graphify-out/.graphify_incremental.json').read_text(encoding='utf-8')) if Path('graphify-out/.graphify_incremental.json').exists() else {}
+code_exts = {'.py', '.ts', '.js', '.go', '.rs', '.java', '.cpp', '.c', '.rb', '.swift', '.kt', '.cs', '.scala', '.php', '.cc', '.cxx', '.hpp', '.h', '.kts', '.lua', '.toc'}
+new_files = result.get('new_files', {})
+all_changed = [f for files in new_files.values() for f in files]
+code_only = all(Path(f).suffix.lower() in code_exts for f in all_changed)
+print('code_only:', code_only)
diff --git a/graphify/windows-scripts/check_extraction_cache.py b/graphify/windows-scripts/check_extraction_cache.py
@@ -0,0 +1,26 @@
+import sys
+import json
+from graphify.cache import check_semantic_cache
+from pathlib import Path
+sys.stdout.reconfigure(encoding='utf-8')
+sys.stderr.reconfigure(encoding='utf-8')
+
+input_path = sys.argv[1] if len(sys.argv) > 1 else '.'
+detect = json.loads(Path('graphify-out/.graphify_detect.json').read_text(encoding='utf-8'))
+
+# Only content files go to semantic extraction; code is handled by AST (Part A).
+# Video is transcribed to document in Step 2.5 first.
+all_files = [f for cat in ('document', 'paper', 'image') for f in detect['files'].get(cat, [])]
+
+cached_nodes, cached_edges, cached_hyperedges, uncached = check_semantic_cache(all_files, root=Path(input_path))
+
+# Always (re)write or DELETE the cache file so Part C never merges stale data (#1392).
+if cached_nodes or cached_edges or cached_hyperedges:
+    Path('graphify-out/.graphify_cached.json').write_text(
+        json.dumps({'nodes': cached_nodes, 'edges': cached_edges, 'hyperedges': cached_hyperedges}, ensure_ascii=False),
+        encoding='utf-8'
+    )
+else:
+    Path('graphify-out/.graphify_cached.json').unlink(missing_ok=True)
+Path('graphify-out/.graphify_uncached.txt').write_text('\n'.join(uncached), encoding='utf-8')
+print(f'Cache: {len(all_files)-len(uncached)} files hit, {len(uncached)} files need extraction')
diff --git a/graphify/windows-scripts/check_graph_exists.py b/graphify/windows-scripts/check_graph_exists.py
@@ -0,0 +1,8 @@
+import sys
+from pathlib import Path
+sys.stdout.reconfigure(encoding='utf-8')
+sys.stderr.reconfigure(encoding='utf-8')
+
+if not Path('graphify-out/graph.json').exists():
+    print('ERROR: No graph found. Run /graphify <path> first to build the graph.')
+    raise SystemExit(1)
diff --git a/graphify/windows-scripts/check_graph_health.py b/graphify/windows-scripts/check_graph_health.py
@@ -0,0 +1,23 @@
+import sys, json
+from pathlib import Path
+from graphify.diagnostics import diagnose_extraction, format_diagnostic_report
+sys.stdout.reconfigure(encoding='utf-8')
+sys.stderr.reconfigure(encoding='utf-8')
+
+input_path = sys.argv[1] if len(sys.argv) > 1 else '.'
+directed = sys.argv[2].lower() == 'true' if len(sys.argv) > 2 else False
+
+extraction = json.loads(Path('graphify-out/.graphify_extract.json').read_text(encoding='utf-8'))
+summary = diagnose_extraction(extraction, directed=directed, root=input_path)
+print(format_diagnostic_report(summary))
+flags = [f'{summary[k]} {label}' for k, label in (
+    ('dangling_endpoint_edges', 'dangling-endpoint edges'),
+    ('missing_endpoint_edges', 'missing-endpoint edges'),
+    ('self_loop_edges', 'self-loop edges'),
+    ('directed_same_endpoint_collapsed_edges', 'collapsed (directed) edges'),
+    ('undirected_same_endpoint_collapsed_edges', 'collapsed (undirected) edges'),
+) if summary.get(k, 0)]
+if flags:
+    print('GRAPH HEALTH WARNING: ' + '; '.join(flags) + ' - graph may be incomplete/corrupt.')
+else:
+    print('Graph health: OK (no dangling/missing/collapsed edges).')
diff --git a/graphify/windows-scripts/check_graphify_installed.py b/graphify/windows-scripts/check_graphify_installed.py
@@ -0,0 +1,4 @@
+import sys
+import graphify  # exits non-zero if not installed
+sys.stdout.reconfigure(encoding='utf-8')
+sys.stderr.reconfigure(encoding='utf-8')
diff --git a/graphify/windows-scripts/cluster_only.py b/graphify/windows-scripts/cluster_only.py
@@ -0,0 +1,37 @@
+import sys
+import json
+from graphify.cluster import cluster, score_all
+from graphify.analyze import god_nodes, surprising_connections
+from graphify.report import generate
+from graphify.export import to_json
+from networkx.readwrite import json_graph
+import networkx as nx
+from pathlib import Path
+sys.stdout.reconfigure(encoding='utf-8')
+sys.stderr.reconfigure(encoding='utf-8')
+
+data = json.loads(Path('graphify-out/graph.json').read_text(encoding='utf-8'))
+G = json_graph.node_link_graph(data, edges='links')
+
+detection = {'total_files': 0, 'total_words': 99999, 'needs_graph': True, 'warning': None,
+             'files': {'code': [], 'document': [], 'paper': []}}
+tokens = {'input': 0, 'output': 0}
+
+communities = cluster(G)
+cohesion = score_all(G, communities)
+gods = god_nodes(G)
+surprises = surprising_connections(G, communities)
+labels = {cid: 'Community ' + str(cid) for cid in communities}
+
+report = generate(G, communities, cohesion, labels, gods, surprises, detection, tokens, '.')
+Path('graphify-out/GRAPH_REPORT.md').write_text(report, encoding='utf-8')
+to_json(G, communities, 'graphify-out/graph.json')
+
+analysis = {
+    'communities': {str(k): v for k, v in communities.items()},
+    'cohesion': {str(k): v for k, v in cohesion.items()},
+    'gods': gods,
+    'surprises': surprises,
+}
+Path('graphify-out/.graphify_analysis.json').write_text(json.dumps(analysis, indent=2, ensure_ascii=False), encoding='utf-8')
+print(f'Re-clustered: {len(communities)} communities')
diff --git a/graphify/windows-scripts/collect_chunk_results.py b/graphify/windows-scripts/collect_chunk_results.py
@@ -0,0 +1,58 @@
+import sys
+import json, sys
+from pathlib import Path
+sys.stdout.reconfigure(encoding='utf-8')
+sys.stderr.reconfigure(encoding='utf-8')
+
+out_dir = Path('graphify-out')
+chunk_files = sorted(out_dir.glob('.graphify_chunk_*.json'))
+
+if not chunk_files:
+    print('ERROR: No chunk files found in graphify-out/. Subagents may not have written results.')
+    print('Re-run and ensure subagent_type="general-purpose" is used.')
+    sys.exit(1)
+
+all_nodes, all_edges, all_hyperedges = [], [], []
+total_input = total_output = 0
+failed = []
+
+for cf in chunk_files:
+    try:
+        data = json.loads(cf.read_text(encoding='utf-8'))
+        if 'nodes' not in data or 'edges' not in data:
+            raise ValueError('missing nodes or edges keys')
+        all_nodes.extend(data['nodes'])
+        all_edges.extend(data['edges'])
+        all_hyperedges.extend(data.get('hyperedges', []))
+        total_input += data.get('input_tokens', 0)
+        total_output += data.get('output_tokens', 0)
+    except Exception as e:
+        print(f'WARNING: {cf.name} failed validation: {e}')
+        failed.append(cf.name)
+
+total = len(chunk_files)
+if len(failed) > total / 2:
+    print(f'ERROR: {len(failed)}/{total} chunks failed. Re-run and ensure subagent_type="general-purpose".')
+    sys.exit(1)
+
+if failed:
+    print(f'WARNING: {len(failed)} chunk(s) invalid: {", ".join(failed)}')
+
+seen: set = set()
+deduped_nodes = []
+for n in all_nodes:
+    if n['id'] not in seen:
+        seen.add(n['id'])
+        deduped_nodes.append(n)
+
+Path('graphify-out/.graphify_semantic_new.json').write_text(json.dumps({
+    'nodes': deduped_nodes,
+    'edges': all_edges,
+    'hyperedges': all_hyperedges,
+    'input_tokens': total_input,
+    'output_tokens': total_output,
+}, indent=2), encoding='utf-8')
+print(f'Collected {total - len(failed)}/{total} chunks: {len(deduped_nodes)} nodes, {len(all_edges)} edges')
+
+for cf in chunk_files:
+    cf.unlink(missing_ok=True)
diff --git a/graphify/windows-scripts/detect_files.py b/graphify/windows-scripts/detect_files.py
@@ -0,0 +1,20 @@
+import sys, json
+from graphify.detect import detect
+from pathlib import Path
+sys.stdout.reconfigure(encoding='utf-8')
+sys.stderr.reconfigure(encoding='utf-8')
+
+def _is_assets(path_str: str) -> bool:
+    return any(part.lower() == 'assets' for part in Path(path_str).parts)
+
+input_path = Path(sys.argv[1]).resolve()
+result = detect(input_path)
+result['input_path'] = str(input_path)
+
+# Discard files inside any folder named 'assets'
+for category in result.get('files', {}):
+    result['files'][category] = [f for f in result['files'][category] if not _is_assets(f)]
+result['total_files'] = sum(len(v) for v in result.get('files', {}).values())
+
+Path('graphify-out').mkdir(exist_ok=True)
+Path('graphify-out/.graphify_detect.json').write_text(json.dumps(result, ensure_ascii=False), encoding='utf-8')
diff --git a/graphify/windows-scripts/detect_incremental.py b/graphify/windows-scripts/detect_incremental.py
@@ -0,0 +1,39 @@
+import sys, json
+from graphify.detect import detect_incremental, save_manifest
+from pathlib import Path
+sys.stdout.reconfigure(encoding='utf-8')
+sys.stderr.reconfigure(encoding='utf-8')
+
+def _is_assets(path_str: str) -> bool:
+    return any(part.lower() == 'assets' for part in Path(path_str).parts)
+
+result = detect_incremental(Path(sys.argv[1]), kind='ast')
+
+# Discard files inside any folder named 'assets'
+for category in result.get('files', {}):
+    result['files'][category] = [f for f in result['files'][category] if not _is_assets(f)]
+for category in result.get('new_files', {}):
+    result['new_files'][category] = [f for f in result['new_files'][category] if not _is_assets(f)]
+result['new_total'] = sum(len(v) for v in result.get('new_files', {}).values())
+result['total_files'] = sum(len(v) for v in result.get('files', {}).values())
+
+new_total = result.get('new_total', 0)
+print(json.dumps(result, indent=2))
+Path('graphify-out/.graphify_incremental.json').write_text(json.dumps(result, ensure_ascii=False), encoding='utf-8')
+if new_total == 0:
+    print('No files changed since last run. Nothing to update.')
+    raise SystemExit(0)
+print(f'{new_total} new/changed file(s) to re-extract.')
+
+# Write .graphify_detect.json scoped to new files only so all downstream
+# scripts (check_extraction_cache, print_timing_estimate, build_graph, etc.)
+# operate on the changed file set, not the full corpus.
+detect_for_update = {
+    'files': result['new_files'],
+    'all_files': result.get('files', {}),  # full corpus for manifest-saving
+    'total_files': result['new_total'],
+    'total_words': result.get('total_words', 0),
+    'skipped_sensitive': result.get('skipped_sensitive', []),
+    'input_path': result.get('input_path', str(Path(sys.argv[1]).resolve())),
+}
+Path('graphify-out/.graphify_detect.json').write_text(json.dumps(detect_for_update, ensure_ascii=False), encoding='utf-8')