mandali/mandali.py at main · nmallick1/mandali · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
"""
Mandali (मंडली) — Autonomous Multi-Agent Orchestrator
======================================================
A circle of specialized AI agents that deliberate and act together.
Orchestrator is a passive monitor, not an active driver.

Usage:
  python mandali.py --plan path/to/plan.md --out-path ./output
  python mandali.py --prompt "Read phases/_CONTEXT.md and phases/_INDEX.md. Complete all phases." --out-path ./output
  python mandali.py --prompt "Build a feature" --generate-plan --out-path ./output

Requirements:
  pip install github-copilot-sdk pyyaml rich
"""

import argparse
import asyncio
import json
import os
import re
import shutil
import subprocess
import sys
import threading
import urllib.request
import yaml
from datetime import datetime
from pathlib import Path
from typing import Optional, Dict, Any
from dataclasses import dataclass, field

from rich.console import Console
from rich.panel import Panel
from rich.table import Table
from rich.prompt import Prompt, Confirm
from rich.markup import escape

console = Console()

# GitHub Copilot SDK
from copilot import CopilotClient

__version__ = "0.1.0"
try:
    from importlib.metadata import version as _pkg_version
    __version__ = _pkg_version("mandali")
except Exception:
    pass  # Not installed as package — use hardcoded fallback
GITHUB_REPO = "nmallick1/mandali"


def _check_for_updates():
    """Check GitHub for a newer version (runs in background thread, never blocks)."""
    try:
        url = f"https://raw.githubusercontent.com/{GITHUB_REPO}/main/pyproject.toml"
        req = urllib.request.Request(url, headers={"User-Agent": "mandali-update-check"})
        with urllib.request.urlopen(req, timeout=3) as resp:
            content = resp.read().decode("utf-8")

        match = re.search(r'^version\s*=\s*"([^"]+)"', content, re.MULTILINE)
        if not match:
            return

        remote_version = match.group(1)
        if remote_version != __version__:
            # Use print() — not console.print() — to avoid Rich thread-safety issues
            print(
                f"  Update available: {__version__} → {remote_version}. "
                f"Run: pip install --upgrade git+https://github.com/{GITHUB_REPO}.git"
            )
    except Exception:
        pass  # Network issues, rate limits — silently ignore


def check_for_updates_async():
    """Fire-and-forget update check in a daemon thread."""
    t = threading.Thread(target=_check_for_updates, daemon=True)
    t.start()


def get_copilot_cli_path() -> str:
    """
    Discover the copilot CLI path, handling Windows specifics.
    On Windows, we need to use the .cmd wrapper or the node loader directly.
    Exits with clear instructions if the CLI is not found.
    """
    # Check environment variable first
    if env_path := os.environ.get("COPILOT_CLI_PATH"):
        if os.path.isfile(env_path) or shutil.which(env_path):
            return env_path
        log(f"COPILOT_CLI_PATH is set to '{env_path}' but it was not found.", "ERROR")
        sys.exit(1)

    # Try to find copilot in PATH
    if sys.platform == "win32":
        copilot_cmd = shutil.which("copilot.cmd")
        if copilot_cmd:
            return copilot_cmd
        copilot_exe = shutil.which("copilot")
        if copilot_exe:
            return copilot_exe
    else:
        copilot_path = shutil.which("copilot")
        if copilot_path:
            return copilot_path

    # Not found — give clear instructions
    log("GitHub Copilot CLI not found in PATH.", "ERROR")
    console.print(Panel(
        "[bold]GitHub Copilot CLI is required but was not found.[/bold]\n\n"
        "Install it with:\n"
        "  [cyan]winget install GitHub.Copilot[/cyan]  (Windows)\n"
        "  [cyan]npm install -g @github/copilot[/cyan]  (any platform)\n\n"
        "Or set the path manually:\n"
        "  [cyan]export COPILOT_CLI_PATH=/path/to/copilot[/cyan]  (Linux/macOS)\n"
        "  [cyan]set COPILOT_CLI_PATH=C:\\path\\to\\copilot.cmd[/cyan]  (Windows)\n\n"
        "After installing, verify with:\n"
        "  [cyan]copilot --version[/cyan]\n\n"
        "For more info: [link]https://github.com/nmallick1/mandali#prerequisites[/link]",
        title="⚠️  Missing Prerequisite",
        border_style="yellow",
    ))
    sys.exit(1)


# ============================================================================
# Configuration
# ============================================================================

SCRIPT_DIR = Path(__file__).parent
CONFIG_FILE = SCRIPT_DIR / "config.yaml"
PERSONAS_DIR = SCRIPT_DIR / "personas"

STALL_TIMEOUT_SECONDS = 300  # 5 minutes without activity = stall
POLL_INTERVAL_SECONDS = 10  # Check status every 10 seconds
QUIET_MODE = False  # Set by --quiet flag; suppresses non-essential output

# Lock for serializing file writes to prevent race conditions
import threading
_satisfaction_lock = threading.Lock()

# Debug logging — enabled via --debug flag, writes JSONL to mandali-artifacts/debug.jsonl
_debug_enabled = False
_debug_file = None

def _debug_log(event: str, data: dict):
    """Write a debug event to the JSONL log file if debugging is enabled."""
    if not _debug_enabled or not _debug_file:
        return
    entry = {
        "timestamp": datetime.now().isoformat(),
        "event": event,
        **data,
    }
    try:
        with open(_debug_file, 'a', encoding='utf-8') as f:
            f.write(json.dumps(entry, default=str) + "\n")
    except Exception:
        pass  # Debug logging must never crash the app

# ============================================================================
# Dynamic Persona Constants
# ============================================================================

PERSONA_FRONTMATTER_KEYS = ['id', 'name', 'domain', 'role', 'mention']

# Universal behavioral skeleton for dynamically generated personas.
# Slots use {placeholder} syntax, filled by render_persona().
# Runtime tokens use {{TOKEN}} syntax, filled by the orchestrator at launch.
PERSONA_SKELETON_TEMPLATE = """---
id: {id}
name: {name}
domain: {domain}
role: {role}
mention: "{mention}"
---

# {name} - {role_name}

> {role_description}

## Team
{{TEAM_ROSTER}}

## Engagement
{engagement_rules}
- **Before responding**: check last {{CONVERSATION_CHECK_LINES}} lines of conversation for relevant context

## Key Files

You have access to ALL tools available in your environment — use whatever tools are needed to accomplish your work. Key files for this project:

- `_CONTEXT.md` — global context, architecture decisions, non-negotiables
- `_INDEX.md` — phase tracking, progress status
- `phase-*.md` — detailed tasks per phase
- `conversation.txt` — team communication
- `DecisionsTracker.md` — deviation log for human review

## Decision Tracking

Record deviations in `DecisionsTracker.md` (path in your initial prompt). This is a **deviation log for human review** — a human reads it to diff "what I asked for" vs "what I got." Record when:

{decision_tracking_triggers}

**Catch-all:** Record any choice a human comparing plan to implementation would be surprised by, including choices where the plan was silent. Read existing decisions first — don't re-litigate settled choices. Use the template format with `[HH:MM:SS]` timestamps.

---

## Phased Development Workflow

1. Read `_CONTEXT.md` first → `_INDEX.md` → current phase file
2. Complete each phase fully before moving to the next
3. Verify quality gates before declaring phase complete

### Phase 0A: Context Building
Before discussion, build complete understanding:
1. Read `_CONTEXT.md` first — understand the user's original ask, the problem being solved, and the big picture
2. Read the full plan and explore relevant materials — understand scope, constraints, existing work
3. Identify domain-specific concerns this plan may not have considered
4. Post: `@Team - I have reviewed the plan and materials. Ready for design discussion.`

Wait for ALL agents to confirm before design discussion begins.

### Phase 0B: Design Discussion
{phase_0b_actions}

---

## Domain Expertise

{domain_expertise}

## Non-Negotiables

{non_negotiables}

## Quality Definition

{quality_definition}

## Core Rules

{core_rules}

## Self-Unblocking (2-Strike Rule)

After raising a concern twice without resolution, you MUST either:
1. Propose a concrete resolution (a specific deliverable, fix, or alternative approach), or
2. Yield and record the disagreement in `DecisionsTracker.md`

No endless stalemates. The goal is progress, not being right.

## Domain Ownership & Conflict Resolution

- **Own**: {domain_ownership}
- **Defer to**: {defer_to}
- **Shared jurisdiction**: {shared_jurisdiction}
- **Conflict resolution**: {conflict_resolution_stance}

## Phase Responsibilities

{phase_responsibilities}

---

## Self-Validation

Verify your own work actually produces the expected result before declaring done. Don't rely solely on others to catch your mistakes. If your deliverable can be checked, check it.

## Incremental Review

Review after each phase, not just at the end. Problems found early are cheaper to fix. After each phase, verify that earlier deliverables still hold — new work can break prior work.

---

## Satisfaction Criteria

ALL must be true to declare SATISFIED:
{satisfaction_criteria}

**⚠️ Do NOT declare SATISFIED after one phase. Only when ALL phases are done or STOP directive reached.**

## Response Format
```
@Team - [Brief status]
PHASE: [current] | STATUS: [In Progress / Complete / Blocked]
{response_format_fields}
SATISFACTION_STATUS: WORKING | SATISFIED | BLOCKED - [reason] | PAUSED
```
"""

CLASSIFIER_PROMPT = """You are a task classifier. Respond with EXACTLY the format shown below. Nothing else.

## Decision Rule

Ask ONE question: **Does the task require producing or modifying software as a deliverable?**

- YES → software-development (building APIs, CLIs, libraries, infrastructure-as-code, scripts, configurations)
- NO → non-software (reports, analysis, writing, research, design, planning — even if the SUBJECT is about software)
- BOTH → mixed (software deliverables AND non-software deliverables in different domains)

The subject of the task does not determine the type. Only the deliverable does.
- "Analyze AI code review tools" → non-software (deliverable is a report ABOUT software, not software itself)
- "Build an AI code review tool" → software-development (deliverable IS software)
- "Build a dashboard AND write a market analysis" → mixed

## Domain Rules
- Use lowercase slugs (e.g., market-research, technical-writing, data-analysis)
- For pure software tasks: DOMAIN_1 is software-development
- Only list domains needing dedicated expertise. Skip generic skills.
- 1-3 domains max. Use NONE for unused slots.

## Output Format (copy this exactly, fill in values)

TASK_TYPE: <software-development or non-software or mixed>
DOMAIN_1: <primary-domain-slug>
DOMAIN_2: <supporting-domain-slug or NONE>
DOMAIN_3: <supporting-domain-slug or NONE>

IMPORTANT: TASK_TYPE must be one of exactly three values: software-development, non-software, mixed.
Respond with ONLY the 4 lines above. No explanation, no JSON, no markdown fences.
"""

PERSONA_GENERATOR_PROMPT = """
## Quality Bar (ALL personas MUST embody these traits)
1. High standards, zero ego — critique the work, not the person
2. Goes beyond the ask — anticipate issues, suggest improvements proactively
3. Domain depth — bring genuine specialist knowledge, not generic platitudes
4. Adversarial rigor — challenge assumptions, demand evidence, test boundaries
5. Opinionated but flexible — have strong defaults, yield to better arguments
6. Concrete over abstract — code samples, specific metrics, real examples over vague guidance
7. Self-aware scope — know what you own, what you don't, and say so explicitly
8. Progress-oriented — unblock yourself and others, never stall for perfection
9. Deliverable-focused — every action should advance a concrete output
10. Honest about uncertainty — say "I don't know" rather than fabricate domain expertise
11. Incremental verification — verify work at each step, don't batch-validate at the end
12. Collaborative by default — engage with teammates' work, build on each other's output
13. Domain-appropriate quality gates — enforce professional standards for your domain even when the user doesn't ask explicitly (e.g., code personas: performance, resource consumption, unhandled exceptions; analytical personas: validated facts, cited sources, reproducible methodology; writing personas: accuracy, logical structure, audience-appropriate tone)

## Role Types
- **Doer**: Primary implementer for the domain. Produces deliverables, proposes approaches, executes. May also review and challenge others' work that touches their domain.
- **Critic**: Quality challenger for the domain. Challenges methodology, verifies rigor, catches blind spots. NOT passive — actively contributes solutions, writes fixes, and demonstrates better approaches when critiquing. A Critic who only points out problems without helping resolve them is failing.
- **Scope-keeper**: Cross-domain awareness. Ensures domains don't drift, resolves boundary disputes, maintains coherent big picture.

Note: These roles define primary orientation, not rigid boundaries. A QA Doer writes tests AND critiques the dev's work from a quality perspective. A Security Critic spots vulnerabilities AND suggests fixes or writes patches. Every persona is expected to be hands-on in their domain.

## Persona File Structure
The .persona.md file MUST include:
- YAML frontmatter with: id, name, domain, role, mention
- Role name and description
- Engagement rules (when to speak, when to stay quiet)
- Decision tracking triggers
- Phase 0B actions (design discussion behavior — MUST include negotiating domain-specific quality gates beyond user's explicit requirements)
- Domain expertise (2-4 paragraphs of deep domain knowledge)
- Non-negotiables (absolute rules this persona enforces)
- Quality definition (what "good" means in this domain)
- Core operating principles (5-10 rules)
- Domain ownership, defer-to, and shared jurisdiction
- Conflict resolution stance
- Phase responsibilities
- Satisfaction criteria (checklist — ALL must be true to declare SATISFIED)
- Response format fields

## Important
- Generate personas with REAL domain depth — not generic "I review things" descriptions
- The persona must be useful for the SPECIFIC domain, not a generic template with domain name swapped in
- Engagement rules must be specific to the domain's concerns
- Satisfaction criteria must reflect domain-specific quality gates
- The id must be unique and descriptive (not "persona-1")
"""

DEDUP_AGENT_PROMPT = """You are a deduplication analyst for a multi-agent collaboration system.

Your job: Analyze a set of persona definitions for overlap. You receive full persona file contents (not summaries) because you must distinguish surface overlap from real functional overlap.

Two personas that both mention "data quality" may serve completely different functions:
- A "Data Quality Reviewer" focused on pipeline validation is different from
- A "Data Quality Reviewer" focused on statistical methodology review

Read the FULL definitions carefully before deciding.

Output ONLY valid JSON (no markdown fences) with this structure:
{
  "keep": [
    {"id": "<persona-id>", "reason": "<why this persona is unique and needed>"}
  ],
  "drop": [
    {"id": "<persona-id>", "reason": "<why this persona is redundant>", "covered_by": "<id of persona that covers this>"}
  ],
  "merge": [
    {
      "sources": ["<persona-id-1>", "<persona-id-2>"],
      "merged_name": "<suggested name for merged persona>",
      "reason": "<why these should be combined>",
      "merge_guidance": "<what to keep from each source>"
    }
  ]
}

Rules:
- Default to KEEP unless you find genuine functional overlap (not just label similarity)
- MERGE when two personas have significantly overlapping expertise and responsibilities, making them compete for the same work
- DROP when one persona is strictly a subset of another (the broader one covers everything the narrow one does)
- Static team members (Dev, PM, QA, Security, SRE) cannot be dropped or merged — only consider overlap between dynamic personas, and between dynamic and static
- When a dynamic persona overlaps with a static persona, DROP the dynamic one (static personas are hand-tuned and take priority)
- Be conservative: when in doubt, KEEP both. False dedup is worse than mild redundancy.
"""

# ============================================================================
# Data Classes
# ============================================================================

@dataclass
class PersonaAgent:
    """Represents an autonomous AI persona."""
    id: str
    name: str
    mention: str  # @Dev, @PM, etc.
    session: Any = None
    task: asyncio.Task = None  # Background task
    prompt_file: str = None  # Path to persona file (dynamic personas)
    dynamic: bool = False  # True for dynamically generated personas
    domain: str = None  # Domain (for dynamic personas)
    session_lock: asyncio.Lock = field(default_factory=asyncio.Lock)  # Serializes session access
    model: str = None  # Per-persona model override (falls back to orchestrator model)


@dataclass
class Workspace:
    """Shared workspace for agent communication."""
    path: Path  # Main output directory where agents create feature files
    artifacts_path: Path  # mandali-artifacts subfolder for orchestration files
    phases_path: Path  # phases subfolder for phased plan files
    conversation_file: Path
    satisfaction_file: Path
    decisions_file: Path
    plan_file: Path  # Legacy single-file plan OR _INDEX.md for phased plans
    context_file: Path  # _CONTEXT.md for phased plans
    index_file: Path  # _INDEX.md for phased plans
    metrics_file: Path

    @classmethod
    def create(cls, out_path: Path) -> 'Workspace':
        """Create a Workspace from an output path."""
        artifacts = out_path / "mandali-artifacts"
        phases = out_path / "phases"
        return cls(
            path=out_path,
            artifacts_path=artifacts,
            phases_path=phases,
            conversation_file=artifacts / "conversation.txt",
            satisfaction_file=artifacts / "satisfaction.txt",
            decisions_file=artifacts / "DecisionsTracker.md",
            plan_file=artifacts / "plan.md",  # Legacy fallback
            context_file=phases / "_CONTEXT.md",
            index_file=phases / "_INDEX.md",
            metrics_file=artifacts / "metrics.json"
        )

    def ensure_exists(self):
        self.path.mkdir(parents=True, exist_ok=True)
        self.artifacts_path.mkdir(parents=True, exist_ok=True)
        self.phases_path.mkdir(parents=True, exist_ok=True)
        self.conversation_file.touch()
        self.satisfaction_file.touch()
        # Copy DecisionsTracker template if it doesn't already exist
        if not self.decisions_file.exists():
            template = SCRIPT_DIR / "DecisionsTracker.md"
            if template.exists():
                shutil.copy2(template, self.decisions_file)
            else:
                self.decisions_file.touch()

    def is_phased_plan(self) -> bool:
        """Check if this workspace uses phased plan structure."""
        return self.index_file.exists() and self.context_file.exists()

    def get_plan_content(self) -> str:
        """Get plan content, preferring phased structure."""
        if self.is_phased_plan():
            # For phased plans, return _CONTEXT.md + _INDEX.md + all phase files
            content_parts = []

            # Read _CONTEXT.md first
            if self.context_file.exists():
                content_parts.append(f"# === _CONTEXT.md (READ FIRST) ===\n\n{self.context_file.read_text(encoding='utf-8')}")

            # Read _INDEX.md
            if self.index_file.exists():
                content_parts.append(f"\n\n# === _INDEX.md ===\n\n{self.index_file.read_text(encoding='utf-8')}")

            # Read all phase files
            phase_files = sorted(self.phases_path.glob("phase-*.md"))
            for pf in phase_files:
                content_parts.append(f"\n\n# === {pf.name} ===\n\n{pf.read_text(encoding='utf-8')}")

            return "\n".join(content_parts)
        elif self.plan_file.exists():
            # Fallback to single-file plan
            return self.plan_file.read_text(encoding='utf-8')
        else:
            return ""


@dataclass
class Metrics:
    """Collaboration metrics."""
    start_time: str = ""
    end_time: str = ""
    total_messages: int = 0
    human_escalations: int = 0
    nudges: int = 0  # Times orchestrator nudged inactive agents
    decisions_logged: int = 0
    victory: bool = False
    verification_rounds: int = 0
    verification_passed: bool = False
    per_agent: Dict[str, Dict] = field(default_factory=dict)


@dataclass
class TaskClassification:
    """Result of classifying a task into type and domains."""
    task_type: str  # "software-development" | "non-software" | "mixed"
    domains: list  # [{"name": "analytics", "role_in_task": "primary"}, ...]
    interview_summary: dict = field(default_factory=dict)


# ============================================================================
# Utilities
# ============================================================================

def log(msg: str, level: str = "INFO"):
    """Log with timestamp and styled output. Suppressed in --quiet mode except HUMAN/ERR/WARN."""
    if QUIET_MODE and level in ("INFO", "OK", "AGENT"):
        return
    timestamp = datetime.now().strftime("%H:%M:%S")
    styles = {
        "INFO": ("ℹ️", "bright_blue"),
        "OK": ("✅", "green"),
        "WARN": ("⚠️", "yellow"),
        "ERR": ("❌", "red bold"),
        "AGENT": ("🤖", "cyan"),
        "HUMAN": ("👤", "magenta"),
    }
    symbol, style = styles.get(level, ("•", "white"))
    console.print(f"[dim]{timestamp}[/dim] {symbol} [{style}]{escape(msg)}[/{style}]")


def load_config() -> dict:
    with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
        return yaml.safe_load(f)


def load_mcp_config() -> dict:
    """Load MCP server configuration from ~/.copilot/mcp-config.json.

    The Copilot SDK does NOT automatically inherit MCP config from the CLI.
    We must explicitly load and pass it to each session.
    """
    # Check multiple possible locations
    possible_paths = [
        Path.home() / ".copilot" / "mcp-config.json",  # User config (primary)
        Path.cwd() / ".copilot" / "mcp-config.json",   # Project config
    ]

    for config_path in possible_paths:
        if config_path.exists():
            try:
                with open(config_path, 'r', encoding='utf-8') as f:
                    config = json.load(f)
                    mcp_servers = config.get("mcpServers", {})
                    if mcp_servers:
                        log(f"Loaded MCP config from {config_path} ({len(mcp_servers)} servers)", "OK")
                        return mcp_servers
            except (json.JSONDecodeError, IOError) as e:
                log(f"Failed to load MCP config from {config_path}: {e}", "WARN")

    log("No MCP config found - agents will have limited tool access", "WARN")
    return {}


# Global MCP config (loaded once at startup)
MCP_SERVERS_CONFIG: dict = {}


def _build_session_config(model: str, system_message: str, working_directory: str = None) -> dict:
    """Build a session config with full tool access (MCP servers, skills, extensions).

    All sessions — persona agents and orchestrator housekeeping agents alike —
    get the same tool access. The system prompt controls behavior, not tool availability.
    """
    copilot_config_dir = Path.home() / ".copilot"
    config = {
        "model": model,
        "system_message": system_message,
    }
    if working_directory:
        config["working_directory"] = working_directory
    if copilot_config_dir.exists():
        config["config_dir"] = str(copilot_config_dir)
    if MCP_SERVERS_CONFIG:
        config["mcp_servers"] = MCP_SERVERS_CONFIG
    return config


def load_persona_prompt(persona_id: str, prompt_file: str = None,
                        team_roster: list = None, team_size: int = None) -> str:
    """Load a persona prompt file, optionally replacing runtime tokens.

    For static personas: loads from personas/ directory.
    For dynamic personas: loads from the specified prompt_file path, strips YAML frontmatter.
    Replaces {{TEAM_ROSTER}} and {{CONVERSATION_CHECK_LINES}} tokens if team info provided.
    """
    if prompt_file:
        filepath = Path(prompt_file)
    else:
        filepath = PERSONAS_DIR / f"{persona_id}.persona.md"

    content = filepath.read_text(encoding='utf-8')

    # Strip YAML frontmatter from dynamic personas
    if prompt_file and content.startswith('---'):
        content = strip_persona_frontmatter(content)

    # Replace runtime tokens if team info is available
    if team_roster is not None:
        roster_str = format_team_roster(team_roster, current_persona_id=persona_id)
        content = content.replace('{{TEAM_ROSTER}}', roster_str)

    if team_size is not None:
        check_lines = compute_conversation_check_lines(team_size)
        content = content.replace('{{CONVERSATION_CHECK_LINES}}', str(check_lines))

    return content


def _strip_code_fences(text: str) -> str:
    """Strip markdown code fences from LLM response text.

    Handles ```json, ```JSON, bare ```, and no-newline variants.
    """
    text = text.strip()
    if text.startswith("```"):
        # Remove opening fence (with optional language tag)
        first_newline = text.find("\n")
        if first_newline != -1:
            text = text[first_newline + 1:]
        else:
            # No newline: strip ``` prefix and any language tag
            text = text[3:]
            for tag in ("json", "JSON", "yaml", "YAML"):
                if text.startswith(tag):
                    text = text[len(tag):]
                    break
        # Remove closing fence
        if text.rstrip().endswith("```"):
            text = text.rstrip()
            text = text[:-3]
        text = text.strip()
    return text


def render_persona(skeleton: str, slots: dict) -> str:
    """Fill placeholder slots in a persona skeleton template.

    Uses simple string replacement so that {{RUNTIME_TOKENS}} are preserved
    (they don't match any {single_brace_key} pattern).
    """
    import re
    result = skeleton
    for key, value in slots.items():
        result = result.replace(f'{{{key}}}', str(value))

    # Warn about unreplaced single-brace placeholders (not {{runtime}} tokens)
    unreplaced = re.findall(r'(?<!\{)\{([a-z_]+)\}(?!\})', result)
    if unreplaced:
        log(f"Unreplaced placeholders in persona: {unreplaced}", "WARN")

    return result


def parse_persona_frontmatter(filepath: Path) -> dict:
    """Extract YAML frontmatter from a .persona.md file.

    Returns dict with keys: id, name, domain, role, mention.
    Raises ValueError if frontmatter is missing or invalid.
    """
    import yaml
    content = filepath.read_text(encoding='utf-8')
    lines = content.split('\n')

    if not lines or lines[0].strip() != '---':
        raise ValueError(f"No YAML frontmatter found in {filepath}")

    # Find closing --- on its own line with no indentation (skip line 0)
    end_line = None
    for i, line in enumerate(lines[1:], 1):
        if line.rstrip() == '---':
            end_line = i
            break

    if end_line is None:
        raise ValueError(f"No closing frontmatter delimiter in {filepath}")

    frontmatter_str = '\n'.join(lines[1:end_line])
    frontmatter = yaml.safe_load(frontmatter_str)

    if frontmatter is None:
        raise ValueError(f"Empty or invalid YAML frontmatter in {filepath}")

    # Validate required keys
    missing = [k for k in PERSONA_FRONTMATTER_KEYS if k not in frontmatter]
    if missing:
        raise ValueError(f"Missing frontmatter keys in {filepath}: {missing}")

    return {k: frontmatter[k] for k in PERSONA_FRONTMATTER_KEYS}


def strip_persona_frontmatter(content: str) -> str:
    """Remove YAML frontmatter from persona file content, returning just the prompt."""
    lines = content.split('\n')
    if not lines or lines[0].strip() != '---':
        return content
    for i, line in enumerate(lines[1:], 1):
        if line.rstrip() == '---':
            return '\n'.join(lines[i + 1:]).lstrip('\n')
    return content


def compute_conversation_check_lines(team_size: int) -> int:
    """Adaptive conversation window: scales with team size."""
    return max(50, team_size * 15)


def format_team_roster(team: list, current_persona_id: str = None) -> str:
    """Format team roster as @mention list for persona files.

    Marks the current persona with '(you)' for self-awareness.
    """
    parts = []
    for m in team:
        mention = m.get('mention', f"@{m['name']}")
        if m['id'] == current_persona_id:
            parts.append(f"{mention} (you)")
        else:
            parts.append(mention)
    return ', '.join(parts)


def build_orchestrator_message(team_roster: list, plan_location: str, task_type: str = "software-development",
                               review_notes_path: str = None) -> str:
    """Generate the Phase 0A/0B/Communication conversation message dynamically.

    Replaces the hardcoded @PM/@Dev/@Security/@QA/@SRE block with role-based
    instructions built from the actual team roster.
    """
    # Determine lead
    has_pm = any(m['id'] == 'pm' for m in team_roster)
    if task_type == "non-software" and not has_pm:
        # Pure non-code: Scope-keeper leads
        scope_keepers = [m for m in team_roster if m.get('role') == 'Scope-keeper']
        lead = scope_keepers[0] if scope_keepers else team_roster[0]
    else:
        # Code or mixed: PM leads (or first persona if no PM)
        pm = next((m for m in team_roster if m['id'] == 'pm'), None)
        lead = pm or team_roster[0]

    lead_mention = lead.get('mention', f"@{lead['name']}")

    # Build Phase 0B role-based instructions
    critics = [m for m in team_roster if m.get('role') == 'Critic']
    doers = [m for m in team_roster if m.get('role') == 'Doer' and m['id'] != lead['id']]

    phase_0b_steps = [f"1. **{lead_mention}**: Present the plan, clarify acceptance criteria, lead the discussion"]

    if critics:
        critic_mentions = ', '.join(m.get('mention', f"@{m['name']}") for m in critics)
        phase_0b_steps.append(f"2. Each Critic ({critic_mentions}): Raise domain-specific concerns NOW")

    if doers:
        doer_mentions = ', '.join(m.get('mention', f"@{m['name']}") for m in doers)
        step_num = len(phase_0b_steps) + 1
        phase_0b_steps.append(f"{step_num}. Each Doer ({doer_mentions}): Propose approach, identify risks, suggest adjustments")

    # For static code team, add specific role callouts
    security = next((m for m in team_roster if m['id'] == 'security'), None)
    if security:
        step_num = len(phase_0b_steps) + 1
        phase_0b_steps.append(f"{step_num}. **@Security**: Raise ALL security concerns NOW (not during implementation)")

    step_num = len(phase_0b_steps) + 1
    phase_0b_steps.append(f"{step_num}. ALL agents must participate and acknowledge the plan")

    phase_0b_text = '\n'.join(phase_0b_steps)

    # Security gate for mixed/code tasks
    security_gate = ""
    if security and task_type in ("software-development", "mixed"):
        security_gate = "\n- @Security must approve the security approach BEFORE implementation begins"

    # Phase 0B deliverables (gap analysis)
    deliverables = f"""
### Design Discussion Deliverables
Design discussion produces updated artifacts, not just conversation:
1. If the team identified gaps, missing phases, or restructuring:
   - {lead_mention} updates _INDEX.md to reflect agreed structure
   - Affected phase files are edited (added tasks, modified criteria, reordered work)
   - New phase files are created if the team agreed to add phases
2. All decisions and filled gaps recorded in DecisionsTracker.md
3. {lead_mention} declares: "@Team design discussion complete. Plan files updated. Begin Phase 1"
"""

    # Communication section with full mention list
    all_mentions = ', '.join(m.get('mention', f"@{m['name']}") for m in team_roster)
    all_mentions += ', @Team, @AllAgents'

    # Phased workflow section
    phased_workflow = f"""
## Phased Plan Workflow (if using phases/ structure)

After each phase is complete:
1. {lead_mention} updates `_INDEX.md` with: ✅ Complete, commit hash
2. {lead_mention} verifies `DecisionsTracker.md` has entries for any deviations made during this phase — if choices were made that differ from the plan or where the plan was silent, they must be recorded before moving on
3. {lead_mention} announces: "@Team Phase X complete, proceeding to Phase Y"
4. If plan says "STOP after Phase X", team stops and reports to human
"""

    # Review notes reference (if plan review produced recommendations)
    review_notes_ref = ""
    if review_notes_path:
        review_notes_ref = f"\n- **Before starting discussion**: Read `{review_notes_path}` — it contains pre-execution review notes to consider"

    return f"""@AllAgents - Welcome to Mandali!

You are an autonomous team implementing {plan_location}

---

## PHASE 0A: CONTEXT BUILDING (Before Design Discussion)

Before discussing the design, each agent MUST build a complete understanding:

### Required Actions for EACH Agent:
1. **Read _CONTEXT.md FIRST** (if phased plan) - contains global architecture, security, non-negotiables
2. **Read _INDEX.md** (if phased plan) - shows phase status and dependencies
3. **Read the relevant phase file(s)** - understand tasks and quality gates
4. **Explore the codebase** - understand project structure, patterns, conventions
5. **Launch background agents** if needed to explore large codebases efficiently
6. **Understand dependencies** - what exists, what needs to be built

### Your Tools:
- Use `view` to read files
- Use `glob` and `grep` to explore the codebase
- Use `task` tool with agent_type="explore" for parallel codebase exploration
- Take your time - understanding the full picture is critical

### When Ready:
Each agent should post: "@Team - I have reviewed the plan and codebase. Ready for design discussion."

**Wait for ALL agents to confirm readiness before starting design discussion.**

---

## PHASE 0B: DESIGN DISCUSSION (After All Agents Ready)

Once ALL agents confirm readiness, begin design discussion:

{phase_0b_text}

**Rules for Design Discussion:**
- ALL agents must participate and acknowledge the plan{security_gate}{review_notes_ref}
- Team may reorder phases, add sub-phases, or adjust scope
{deliverables}
---
{phased_workflow}
---

## Communication
- Use @mentions: {all_mentions}
- End each message with SATISFACTION_STATUS

## Victory Condition
All agents SATISFIED = Implementation complete.

---

@AllAgents - Begin by reading the plan and exploring the codebase.
Post when you're ready for design discussion.
"""


async def _send_and_get_response(client, model: str, system_prompt: str, message: str,
                                  timeout_seconds: int = 120) -> str:
    """Send a single message to an LLM session and return the response text.

    Uses the event-based SDK pattern (create_session + on + send).
    No tools/MCP/skills — this is for pure text-in/text-out calls
    (classification, persona generation, dedup, merge).
    Raises TimeoutError if no response within timeout_seconds.
    """
    session = await client.create_session({
        "model": model,
        "system_message": system_prompt,
    })

    response_parts = []
    done = asyncio.Event()

    def on_event(event):
        if event.type.value == "assistant.message":
            response_parts.append(event.data.content)
        elif event.type.value == "session.idle":
            done.set()

    unsubscribe = session.on(on_event)
    try:
        await session.send({"prompt": message})
        await asyncio.wait_for(done.wait(), timeout=timeout_seconds)
    except asyncio.TimeoutError:
        raise TimeoutError(f"LLM session timed out after {timeout_seconds}s")
    finally:
        unsubscribe()
        await session.destroy()

    response = ''.join(response_parts)

    _debug_log("llm_call", {
        "system_prompt_preview": system_prompt[:200],
        "message_preview": message[:500],
        "response_preview": response[:1000],
        "response_length": len(response),
        "model": model,
    })

    return response


async def classify_task(client, model: str, user_prompt: str, interview_summary: dict) -> 'TaskClassification':
    """Classify a task into type and domains using LLM analysis.

    Returns TaskClassification with task_type and ordered domains.
    On unparseable response, retries in the same session (LLM already
    has the analysis, just needs to reformat).
    Conservative default: classifies as 'software-development' when uncertain.
    """
    # Extract only deliverable-relevant fields from interview summary.
    # The full summary contains domain jargon that confuses the classifier.
    slim_summary = {}
    for key in ("outcome", "project_name", "success_criteria", "scope",
                "output_directory", "constraints", "implicit_requirements"):
        if key in interview_summary:
            slim_summary[key] = interview_summary[key]

    # Put the user's original prompt LAST — recency bias helps the LLM
    # focus on the actual ask rather than the elaborated interview content.
    message = (
        f"Classify this task.\n\n"
        f"## Interview Context (deliverables only)\n{json.dumps(slim_summary, indent=2)}\n\n"
        f"## User's Original Prompt (this is the PRIMARY signal for classification)\n{user_prompt}"
    )

    VALID_TASK_TYPES = ("software-development", "non-software", "mixed")
    SW_DEV_DOMAIN = [{"name": "software-development", "role_in_task": "primary"}]

    # Manage session manually for same-session retry
    session = await client.create_session({
        "model": model,
        "system_message": CLASSIFIER_PROMPT,
    })

    async def _send_and_collect(msg: str) -> str:
        parts = []
        done = asyncio.Event()
        def on_event(event):
            if event.type.value == "assistant.message":
                parts.append(event.data.content)
            elif event.type.value == "session.idle":
                done.set()
        unsub = session.on(on_event)
        try:
            await session.send({"prompt": msg})
            await asyncio.wait_for(done.wait(), timeout=120)
        finally:
            unsub()
        return ''.join(parts)

    task_type = None
    domains = SW_DEV_DOMAIN

    try:
        response_text = await _send_and_collect(message)
        text = response_text.strip()

        _debug_log("llm_call", {
            "system_prompt_preview": CLASSIFIER_PROMPT[:200],
            "message_preview": message[:500],
            "response_preview": text[:1000],
            "response_length": len(text),
            "model": model,