From fddc77dcb87611551019f8978863887990236ac0 Mon Sep 17 00:00:00 2001
From: nullhack <nullhack@users.noreply.github.com>
Date: Fri, 1 May 2026 12:13:04 -0400
Subject: [PATCH] feat(smith): implement connect/disconnect/update/status CLI
 commands with local bundled templates

---
 .dockerignore                                 | 212 -----
 .github/workflows/ci.yml                      |  32 +-
 .github/workflows/dependency-review.yml       |   6 +-
 .github/workflows/pypi-publish.yml            |  61 +-
 .github/workflows/tag-release.yml             |  33 +-
 .gitignore                                    |  15 +-
 AGENTS.md                                     | 256 -------
 CHANGELOG.md                                  | 367 +--------
 README.md                                     | 179 ++---
 TODO.md                                       |   4 -
 docs/{c4 => adr}/.gitkeep                     |   0
 .../ADR_20260501_argparse-cli-framework.md    |  55 ++
 ...1_atomic-file-writes-via-temp-directory.md |  57 ++
 ...0501_github-bundled-template-resolution.md |  64 ++
 .../ADR_20260501_hexagonal-architecture.md    |  51 ++
 ...60501_local-bundled-template-resolution.md |  66 ++
 docs/adr/ADR_20260501_no-smart-merge.md       |  53 ++
 docs/adr/ADR_20260501_smith-yaml-metadata.md  |  55 ++
 docs/architecture.md                          |  19 -
 docs/discovery.md                             |  39 -
 docs/discovery_journal.md                     |  67 --
 docs/features/{backlog => }/.gitkeep          |   0
 .../features/backlog/smith-assimilate.feature | 117 ---
 docs/features/backlog/smith-commands.feature  | 297 +++++++
 docs/features/backlog/smith-new.feature       | 110 ---
 .../completed/display-version.feature         |  60 --
 docs/index.html                               | 510 +++++++++++--
 .../IN_20260422_scope-discovery.md            |  65 ++
 .../IN_20260501_local-bundle-reversal.md      |  67 ++
 ...N_20260501_smith-commands-specification.md | 158 ++++
 .../IN_20260501_stakeholder-reinterview.md    |  98 +++
 ..._20260501_temple8-dependency-resolution.md |  72 ++
 .../completed => post-mortem}/.gitkeep        |   0
 .../2026-04-14-ping-pong-cli-workflow-gaps.md | 176 -----
 ...ping-pong-cli-package-and-design-review.md | 108 ---
 .../PM_20260501_conflict-exit-code-removal.md |  26 +
 ...260501_coverage-test-in-features-folder.md |  28 +
 ..._20260501_missing-feature-test-template.md |  39 +
 .../PM_20260501_missing-overwrite-flag.md     |  54 ++
 .../PM_20260501_moscow-gherkin-tags.md        |  23 +
 .../PM_20260501_reviewer-fixing-code.md       |  41 +
 .../PM_20260501_se-dirtying-living-docs.md    |  27 +
 .../artificial-intelligence/liu_et_al_2023.md |  45 ++
 .../design/accessibility/w3c_wcag21_2018.md   |  48 ++
 docs/research/design/visual/airey_2010.md     |  48 ++
 docs/research/design/visual/albers_1963.md    |  48 ++
 docs/research/design/visual/arnheim_1954.md   |  48 ++
 docs/research/design/visual/biederman_1987.md |  48 ++
 docs/research/design/visual/hicks_2011.md     |  47 ++
 docs/research/design/visual/itten_1961.md     |  48 ++
 docs/research/design/visual/kare_1984.md      |  47 ++
 docs/research/design/visual/lupton_2010.md    |  47 ++
 .../design/visual/muller_brockmann_1981.md    |  47 ++
 docs/research/design/visual/rand_1985.md      |  46 ++
 .../research/design/visual/wertheimer_1923.md |  48 ++
 .../documentation/procida_2021.md             |  45 ++
 .../domain-modeling/brandolini_2012.md        |  45 ++
 .../domain-modeling/evans_2003.md             |  45 ++
 .../domain-modeling/vernon_2013.md            |  45 ++
 .../cognitive/craik_lockhart_1972.md          |  48 ++
 .../cognitive/fisher_geiselman_1987.md        |  47 ++
 .../psychology/cognitive/flanagan_1954.md     |  47 ++
 .../psychology/cognitive/gollwitzer_1999.md   |  48 ++
 .../cognitive/hattie_timperley_2007.md        |  48 ++
 .../psychology/cognitive/kahneman_2011.md     |  47 ++
 .../psychology/cognitive/klein_1998.md        |  47 ++
 .../cognitive/mcdaniel_einstein_2000.md       |  47 ++
 .../psychology/cognitive/miller_1956.md       |  48 ++
 .../cognitive/reynolds_gutman_1988.md         |  46 ++
 .../cognitive/tversky_kahneman_1974.md        |  46 ++
 .../psychology/social/cialdini_2001.md        |  47 ++
 .../psychology/social/mellers_et_al_2001.md   |  46 ++
 .../psychology/social/rogers_farson_1957.md   |  46 ++
 .../psychology/social/tetlock_1985.md         |  45 ++
 .../architecture/bass_et_al_2021.md           |  48 ++
 .../architecture/boehm_1991.md                |  47 ++
 .../architecture/brown_2018.md                |  48 ++
 .../architecture/cockburn_2005.md             |  47 ++
 .../architecture/conway_1968.md               |  47 ++
 .../architecture/fielding_2000.md             |  47 ++
 .../architecture/fowler_2003.md               |  47 ++
 .../architecture/hohpe_woolf_2003.md          |  47 ++
 .../kazman_klein_clements_2000.md             |  47 ++
 .../architecture/kruchten_1995.md             |  47 ++
 .../architecture/martin_2012_clean.md         |  48 ++
 .../architecture/nygard_2011.md               |  48 ++
 .../architecture/parnas_1972.md               |  47 ++
 .../architecture/skelton_pais_2019.md         |  48 ++
 .../process/beck_1999_yagni.md                |  45 ++
 .../process/beyer_et_al_2016.md               |  45 ++
 .../process/calver_2020.md                    |  45 ++
 .../process/clegg_barker_1994.md              |  45 ++
 .../process/fagan_1976.md                     |  45 ++
 .../process/preston-werner_2013.md            |  45 ++
 .../process/reinertsen_2009.md                |  45 ++
 .../software-engineering/quality/bay_2008.md  |  46 ++
 .../software-engineering/quality/beck_2002.md |  48 ++
 .../quality/demillo_lipton_sayward_1978.md    |  45 ++
 .../quality/feathers_2004.md                  |  46 ++
 .../quality/fowler_1999.md                    |  46 ++
 .../quality/freeman_pryce_2009.md             |  45 ++
 .../quality/gamma_et_al_1994.md               |  46 ++
 .../quality/google_testing_2013.md            |  45 ++
 .../quality/maciver_2016.md                   |  47 ++
 .../quality/martin_2000_solid.md              |  47 ++
 .../quality/martin_2017_first_class_tests.md  |  45 ++
 .../quality/meszaros_2007.md                  |  46 ++
 .../quality/north_2006.md                     |  45 ++
 .../quality/shvets_2014.md                    |  46 ++
 .../requirements/christel_kang_1992.md        |  45 ++
 .../requirements/kano_et_al_1984.md           |  45 ++
 .../requirements/wake_2003.md                 |  45 ++
 .../requirements/wynne_2015.md                |  45 ++
 docs/scientific-research/README.md            |  16 -
 docs/scientific-research/ai-agents.md         | 118 ---
 docs/scientific-research/architecture.md      |  86 ---
 docs/scientific-research/cognitive-science.md | 150 ----
 docs/scientific-research/documentation.md     |  69 --
 docs/scientific-research/domain-modeling.md   | 115 ---
 docs/scientific-research/oop-design.md        |  64 --
 .../refactoring-empirical.md                  | 100 ---
 .../requirements-elicitation.md               | 246 ------
 .../scientific-research/software-economics.md |  24 -
 docs/scientific-research/testing.md           | 137 ----
 docs/{features/in-progress => spec}/.gitkeep  |   0
 docs/spec/context_map.md                      |  84 ++
 docs/spec/domain_model.md                     | 123 +++
 docs/spec/glossary.md                         | 255 +++++++
 docs/spec/product_definition.md               | 156 ++++
 docs/spec/system.md                           | 157 ++++
 docs/spec/technical_design.md                 | 722 ++++++++++++++++++
 docs/spec/workflow-design.md                  | 565 ++++++++++++++
 pyproject.toml                                |  43 +-
 scripts/flowr-utils.sh                        | 121 +++
 scripts/generate-flowviz-data.py              | 248 ++++++
 scripts/generate-svg.sh                       |  92 +++
 scripts/update-bundle.sh                      |  65 ++
 smith/__init__.py                             |   2 +-
 smith/__main__.py                             |  24 +-
 smith/application/__init__.py                 |   1 +
 smith/application/connect.py                  |  30 +
 smith/application/disconnect.py               |  32 +
 smith/application/status.py                   |  32 +
 smith/application/update.py                   |  32 +
 smith/data/.flowr/.gitignore                  |   2 +
 .../data/.flowr/flows/architecture-flow.yaml  | 145 ++++
 smith/data/.flowr/flows/branding-flow.yaml    |  61 ++
 smith/data/.flowr/flows/delivery-flow.yaml    |  80 ++
 smith/data/.flowr/flows/development-flow.yaml |  72 ++
 smith/data/.flowr/flows/discovery-flow.yaml   | 101 +++
 .../flows/feature-development-flow.yaml       |  40 +
 smith/data/.flowr/flows/main-flow.yaml        |  31 +
 smith/data/.flowr/flows/planning-flow.yaml    | 156 ++++
 smith/data/.flowr/flows/post-mortem-flow.yaml |  66 ++
 smith/data/.flowr/flows/review-gate-flow.yaml |  63 ++
 .../data/.flowr/flows/setup-project-flow.yaml |  89 +++
 smith/data/.flowr/flows/tdd-cycle-flow.yaml   |  51 ++
 smith/data/.flowr/sessions/current.yaml       |  10 +
 smith/data/.opencode/agents/design-agent.md   |  10 +
 smith/data/.opencode/agents/domain-expert.md  |  10 +
 smith/data/.opencode/agents/product-owner.md  |  10 +
 smith/data/.opencode/agents/reviewer.md       |  10 +
 smith/data/.opencode/agents/setup-agent.md    |  10 +
 .../.opencode/agents/software-engineer.md     |  10 +
 .../data/.opencode/agents/system-architect.md |  10 +
 .../knowledge/agent-design/principles.md      | 100 +++
 .../.opencode/knowledge/architecture/adr.md   |  79 ++
 .../knowledge/architecture/assessment.md      |  69 ++
 .../knowledge/architecture/contract-design.md |  72 ++
 .../architecture/quality-attributes.md        |  60 ++
 .../knowledge/architecture/reconciliation.md  |  57 ++
 .../architecture/technical-design.md          |  61 ++
 .../knowledge/design/color-systems.md         | 126 +++
 .../knowledge/design/identity-design.md       |  36 +
 .../knowledge/design/project-assets.md        |  76 ++
 .../knowledge/design/visual-harmony.md        |  69 ++
 .../domain-modeling/context-mapping.md        |  64 ++
 .../domain-modeling/event-storming.md         |  58 ++
 .../knowledge/knowledge-design/principles.md  | 123 +++
 .../knowledge/requirements/decomposition.md   |  31 +
 .../knowledge/requirements/gherkin.md         | 106 +++
 .../requirements/interview-techniques.md      |  86 +++
 .../knowledge/requirements/invest.md          |  77 ++
 .../knowledge/requirements/moscow.md          |  55 ++
 .../knowledge/requirements/post-mortem.md     |  58 ++
 .../knowledge/requirements/pre-mortem.md      |  67 ++
 .../requirements/ubiquitous-language.md       |  61 ++
 .../.opencode/knowledge/requirements/wsjf.md  |  86 +++
 .../knowledge/skill-design/principles.md      | 107 +++
 .../knowledge/software-craft/code-review.md   |  80 ++
 .../software-craft/design-patterns.md         | 106 +++
 .../software-craft/git-conventions.md         |  90 +++
 .../software-craft/object-calisthenics.md     |  50 ++
 .../software-craft/refactoring-techniques.md  | 137 ++++
 .../knowledge/software-craft/refactoring.md   |  73 ++
 .../software-craft/smell-catalogue.md         |  87 +++
 .../knowledge/software-craft/solid.md         |  57 ++
 .../knowledge/software-craft/stub-design.md   |  45 ++
 .../.opencode/knowledge/software-craft/tdd.md | 101 +++
 .../knowledge/software-craft/test-design.md   |  58 ++
 .../knowledge/software-craft/versioning.md    |  34 +
 .../knowledge/workflow/flowr-spec.md          | 144 ++++
 .../.opencode/skills/accept-feature/SKILL.md  |  15 +
 .../skills/analyze-root-cause/SKILL.md        |  13 +
 .../skills/assess-architecture/SKILL.md       |  16 +
 .../skills/break-down-feature/SKILL.md        |  16 +
 .../skills/commit-implementation/SKILL.md     |  14 +
 .../skills/conduct-interview/SKILL.md         |  18 +
 .../skills/confirm-baseline/SKILL.md          |  13 +
 .../data/.opencode/skills/create-pr/SKILL.md  |  15 +
 .../.opencode/skills/create-py-stubs/SKILL.md |  15 +
 .../skills/decide-batch-action/SKILL.md       |  10 +
 .../.opencode/skills/define-done/SKILL.md     |  13 +
 .../skills/define-product-scope/SKILL.md      |  14 +
 .../define-ubiquitous-language/SKILL.md       |  14 +
 .../.opencode/skills/design-assets/SKILL.md   |  32 +
 .../.opencode/skills/design-colors/SKILL.md   |  21 +
 .../skills/design-technical-solution/SKILL.md |  23 +
 .../skills/determine-action-items/SKILL.md    |  13 +
 .../skills/document-post-mortem/SKILL.md      |  12 +
 .../data/.opencode/skills/draft-adr/SKILL.md  |  15 +
 .../.opencode/skills/extract-lessons/SKILL.md |  13 +
 .../skills/facilitate-event-storming/SKILL.md |  16 +
 .../skills/implement-minimum/SKILL.md         |  14 +
 .../.opencode/skills/map-contexts/SKILL.md    |  18 +
 .../.opencode/skills/merge-local/SKILL.md     |  20 +
 .../.opencode/skills/model-domain/SKILL.md    |  16 +
 smith/data/.opencode/skills/refactor/SKILL.md |  27 +
 .../skills/review-architecture/SKILL.md       |  17 +
 .../skills/review-conventions/SKILL.md        |  16 +
 .../.opencode/skills/review-design/SKILL.md   |  22 +
 .../skills/review-structure/SKILL.md          |  17 +
 .../.opencode/skills/select-feature/SKILL.md  |  17 +
 .../.opencode/skills/setup-apply/SKILL.md     |  24 +
 .../.opencode/skills/setup-assess/SKILL.md    |  31 +
 .../.opencode/skills/setup-branding/SKILL.md  |  19 +
 .../.opencode/skills/setup-configure/SKILL.md |  42 +
 .../.opencode/skills/setup-verify/SKILL.md    |  34 +
 .../.opencode/skills/specify-feature/SKILL.md |  19 +
 .../skills/structure-project/SKILL.md         |  13 +
 .../skills/write-bdd-features/SKILL.md        |  18 +
 .../data/.opencode/skills/write-test/SKILL.md |  15 +
 smith/data/.templates/CHANGELOG.md.template   |   9 +
 .../docs/adr/ADR_YYYYMMDD_<slug>.md.template  |  41 +
 .../docs/assets/banner.svg.template           |   4 +
 .../.templates/docs/assets/logo.svg.template  |   4 +
 .../docs/branding/branding.md.template        |  52 ++
 .../.templates/docs/context_map.md.template   |  47 ++
 .../.templates/docs/domain_model.md.template  |  96 +++
 .../docs/features/feature.feature.template    |  50 ++
 .../data/.templates/docs/glossary.md.template |  36 +
 .../IN_YYYYMMDD_<slug>.md.template            |  59 ++
 .../PM_YYYYMMDD_<slug>.md.template            |  21 +
 .../docs/product_definition.md.template       | 131 ++++
 .../docs/research/TEMPLATE.md.template        |  49 ++
 smith/data/.templates/docs/system.md.template | 114 +++
 .../docs/technical_design.md.template         | 139 ++++
 .../features/<rule_slug>_test.py.template     |  11 +
 smith/data/AGENTS.md                          | 136 ++++
 smith/data/__init__.py                        |   1 +
 smith/delivery/__init__.py                    |   1 +
 smith/delivery/cli.py                         | 170 +++++
 smith/domain/__init__.py                      |   1 +
 smith/domain/connection.py                    | 153 ++++
 smith/domain/ports.py                         |  73 ++
 smith/domain/value_objects.py                 |  62 ++
 smith/infrastructure/__init__.py              |   1 +
 smith/infrastructure/filesystem.py            |  54 ++
 smith/infrastructure/gitignore.py             |  94 +++
 smith/infrastructure/metadata.py              |  40 +
 smith/infrastructure/template_source.py       | 215 ++++++
 template-config.yaml                          | 113 +++
 tests/conftest.py                             |  25 +-
 .../cli_entrypoint/help_output_test.py        |  38 +
 .../unrecognised_arguments_test.py            |  32 +
 .../cli_entrypoint/version_output_test.py     |  39 +
 tests/features/smith_commands/__init__.py     |   5 +
 tests/features/smith_commands/conftest.py     | 134 ++++
 .../connect_fresh_project_test.py             | 417 ++++++++++
 .../smith_commands/disconnect_test.py         | 153 ++++
 .../smith_commands/skip_user_tracked_test.py  | 220 ++++++
 tests/features/smith_commands/status_test.py  | 177 +++++
 tests/features/smith_commands/update_test.py  | 161 ++++
 tests/unit/app_test.py                        |  18 -
 tests/unit/application/__init__.py            |   1 +
 tests/unit/delivery/__init__.py               |   1 +
 tests/unit/domain/__init__.py                 |   1 +
 tests/unit/domain/test_connection.py          |  26 +
 tests/unit/infrastructure/__init__.py         |   1 +
 tests/unit/infrastructure/filesystem_test.py  |  84 ++
 tests/unit/infrastructure/gitignore_test.py   |  54 ++
 tests/unit/infrastructure/metadata_test.py    |  38 +
 .../infrastructure/template_source_test.py    | 246 ++++++
 tests/unit/main_test.py                       |  42 +
 uv.lock                                       | 191 ++---
 295 files changed, 16606 insertions(+), 3063 deletions(-)
 delete mode 100644 .dockerignore
 delete mode 100644 AGENTS.md
 delete mode 100644 TODO.md
 rename docs/{c4 => adr}/.gitkeep (100%)
 create mode 100644 docs/adr/ADR_20260501_argparse-cli-framework.md
 create mode 100644 docs/adr/ADR_20260501_atomic-file-writes-via-temp-directory.md
 create mode 100644 docs/adr/ADR_20260501_github-bundled-template-resolution.md
 create mode 100644 docs/adr/ADR_20260501_hexagonal-architecture.md
 create mode 100644 docs/adr/ADR_20260501_local-bundled-template-resolution.md
 create mode 100644 docs/adr/ADR_20260501_no-smart-merge.md
 create mode 100644 docs/adr/ADR_20260501_smith-yaml-metadata.md
 delete mode 100644 docs/architecture.md
 delete mode 100644 docs/discovery.md
 delete mode 100644 docs/discovery_journal.md
 rename docs/features/{backlog => }/.gitkeep (100%)
 delete mode 100644 docs/features/backlog/smith-assimilate.feature
 create mode 100644 docs/features/backlog/smith-commands.feature
 delete mode 100644 docs/features/backlog/smith-new.feature
 delete mode 100644 docs/features/completed/display-version.feature
 create mode 100644 docs/interview-notes/IN_20260422_scope-discovery.md
 create mode 100644 docs/interview-notes/IN_20260501_local-bundle-reversal.md
 create mode 100644 docs/interview-notes/IN_20260501_smith-commands-specification.md
 create mode 100644 docs/interview-notes/IN_20260501_stakeholder-reinterview.md
 create mode 100644 docs/interview-notes/IN_20260501_temple8-dependency-resolution.md
 rename docs/{features/completed => post-mortem}/.gitkeep (100%)
 delete mode 100644 docs/post-mortem/2026-04-14-ping-pong-cli-workflow-gaps.md
 delete mode 100644 docs/post-mortem/2026-04-16-ping-pong-cli-package-and-design-review.md
 create mode 100644 docs/post-mortem/PM_20260501_conflict-exit-code-removal.md
 create mode 100644 docs/post-mortem/PM_20260501_coverage-test-in-features-folder.md
 create mode 100644 docs/post-mortem/PM_20260501_missing-feature-test-template.md
 create mode 100644 docs/post-mortem/PM_20260501_missing-overwrite-flag.md
 create mode 100644 docs/post-mortem/PM_20260501_moscow-gherkin-tags.md
 create mode 100644 docs/post-mortem/PM_20260501_reviewer-fixing-code.md
 create mode 100644 docs/post-mortem/PM_20260501_se-dirtying-living-docs.md
 create mode 100644 docs/research/computer-science/artificial-intelligence/liu_et_al_2023.md
 create mode 100644 docs/research/design/accessibility/w3c_wcag21_2018.md
 create mode 100644 docs/research/design/visual/airey_2010.md
 create mode 100644 docs/research/design/visual/albers_1963.md
 create mode 100644 docs/research/design/visual/arnheim_1954.md
 create mode 100644 docs/research/design/visual/biederman_1987.md
 create mode 100644 docs/research/design/visual/hicks_2011.md
 create mode 100644 docs/research/design/visual/itten_1961.md
 create mode 100644 docs/research/design/visual/kare_1984.md
 create mode 100644 docs/research/design/visual/lupton_2010.md
 create mode 100644 docs/research/design/visual/muller_brockmann_1981.md
 create mode 100644 docs/research/design/visual/rand_1985.md
 create mode 100644 docs/research/design/visual/wertheimer_1923.md
 create mode 100644 docs/research/information-science/documentation/procida_2021.md
 create mode 100644 docs/research/information-science/domain-modeling/brandolini_2012.md
 create mode 100644 docs/research/information-science/domain-modeling/evans_2003.md
 create mode 100644 docs/research/information-science/domain-modeling/vernon_2013.md
 create mode 100644 docs/research/psychology/cognitive/craik_lockhart_1972.md
 create mode 100644 docs/research/psychology/cognitive/fisher_geiselman_1987.md
 create mode 100644 docs/research/psychology/cognitive/flanagan_1954.md
 create mode 100644 docs/research/psychology/cognitive/gollwitzer_1999.md
 create mode 100644 docs/research/psychology/cognitive/hattie_timperley_2007.md
 create mode 100644 docs/research/psychology/cognitive/kahneman_2011.md
 create mode 100644 docs/research/psychology/cognitive/klein_1998.md
 create mode 100644 docs/research/psychology/cognitive/mcdaniel_einstein_2000.md
 create mode 100644 docs/research/psychology/cognitive/miller_1956.md
 create mode 100644 docs/research/psychology/cognitive/reynolds_gutman_1988.md
 create mode 100644 docs/research/psychology/cognitive/tversky_kahneman_1974.md
 create mode 100644 docs/research/psychology/social/cialdini_2001.md
 create mode 100644 docs/research/psychology/social/mellers_et_al_2001.md
 create mode 100644 docs/research/psychology/social/rogers_farson_1957.md
 create mode 100644 docs/research/psychology/social/tetlock_1985.md
 create mode 100644 docs/research/software-engineering/architecture/bass_et_al_2021.md
 create mode 100644 docs/research/software-engineering/architecture/boehm_1991.md
 create mode 100644 docs/research/software-engineering/architecture/brown_2018.md
 create mode 100644 docs/research/software-engineering/architecture/cockburn_2005.md
 create mode 100644 docs/research/software-engineering/architecture/conway_1968.md
 create mode 100644 docs/research/software-engineering/architecture/fielding_2000.md
 create mode 100644 docs/research/software-engineering/architecture/fowler_2003.md
 create mode 100644 docs/research/software-engineering/architecture/hohpe_woolf_2003.md
 create mode 100644 docs/research/software-engineering/architecture/kazman_klein_clements_2000.md
 create mode 100644 docs/research/software-engineering/architecture/kruchten_1995.md
 create mode 100644 docs/research/software-engineering/architecture/martin_2012_clean.md
 create mode 100644 docs/research/software-engineering/architecture/nygard_2011.md
 create mode 100644 docs/research/software-engineering/architecture/parnas_1972.md
 create mode 100644 docs/research/software-engineering/architecture/skelton_pais_2019.md
 create mode 100644 docs/research/software-engineering/process/beck_1999_yagni.md
 create mode 100644 docs/research/software-engineering/process/beyer_et_al_2016.md
 create mode 100644 docs/research/software-engineering/process/calver_2020.md
 create mode 100644 docs/research/software-engineering/process/clegg_barker_1994.md
 create mode 100644 docs/research/software-engineering/process/fagan_1976.md
 create mode 100644 docs/research/software-engineering/process/preston-werner_2013.md
 create mode 100644 docs/research/software-engineering/process/reinertsen_2009.md
 create mode 100644 docs/research/software-engineering/quality/bay_2008.md
 create mode 100644 docs/research/software-engineering/quality/beck_2002.md
 create mode 100644 docs/research/software-engineering/quality/demillo_lipton_sayward_1978.md
 create mode 100644 docs/research/software-engineering/quality/feathers_2004.md
 create mode 100644 docs/research/software-engineering/quality/fowler_1999.md
 create mode 100644 docs/research/software-engineering/quality/freeman_pryce_2009.md
 create mode 100644 docs/research/software-engineering/quality/gamma_et_al_1994.md
 create mode 100644 docs/research/software-engineering/quality/google_testing_2013.md
 create mode 100644 docs/research/software-engineering/quality/maciver_2016.md
 create mode 100644 docs/research/software-engineering/quality/martin_2000_solid.md
 create mode 100644 docs/research/software-engineering/quality/martin_2017_first_class_tests.md
 create mode 100644 docs/research/software-engineering/quality/meszaros_2007.md
 create mode 100644 docs/research/software-engineering/quality/north_2006.md
 create mode 100644 docs/research/software-engineering/quality/shvets_2014.md
 create mode 100644 docs/research/software-engineering/requirements/christel_kang_1992.md
 create mode 100644 docs/research/software-engineering/requirements/kano_et_al_1984.md
 create mode 100644 docs/research/software-engineering/requirements/wake_2003.md
 create mode 100644 docs/research/software-engineering/requirements/wynne_2015.md
 delete mode 100644 docs/scientific-research/README.md
 delete mode 100644 docs/scientific-research/ai-agents.md
 delete mode 100644 docs/scientific-research/architecture.md
 delete mode 100644 docs/scientific-research/cognitive-science.md
 delete mode 100644 docs/scientific-research/documentation.md
 delete mode 100644 docs/scientific-research/domain-modeling.md
 delete mode 100644 docs/scientific-research/oop-design.md
 delete mode 100644 docs/scientific-research/refactoring-empirical.md
 delete mode 100644 docs/scientific-research/requirements-elicitation.md
 delete mode 100644 docs/scientific-research/software-economics.md
 delete mode 100644 docs/scientific-research/testing.md
 rename docs/{features/in-progress => spec}/.gitkeep (100%)
 create mode 100644 docs/spec/context_map.md
 create mode 100644 docs/spec/domain_model.md
 create mode 100644 docs/spec/glossary.md
 create mode 100644 docs/spec/product_definition.md
 create mode 100644 docs/spec/system.md
 create mode 100644 docs/spec/technical_design.md
 create mode 100644 docs/spec/workflow-design.md
 create mode 100755 scripts/flowr-utils.sh
 create mode 100755 scripts/generate-flowviz-data.py
 create mode 100755 scripts/generate-svg.sh
 create mode 100755 scripts/update-bundle.sh
 create mode 100644 smith/application/__init__.py
 create mode 100644 smith/application/connect.py
 create mode 100644 smith/application/disconnect.py
 create mode 100644 smith/application/status.py
 create mode 100644 smith/application/update.py
 create mode 100644 smith/data/.flowr/.gitignore
 create mode 100644 smith/data/.flowr/flows/architecture-flow.yaml
 create mode 100644 smith/data/.flowr/flows/branding-flow.yaml
 create mode 100644 smith/data/.flowr/flows/delivery-flow.yaml
 create mode 100644 smith/data/.flowr/flows/development-flow.yaml
 create mode 100644 smith/data/.flowr/flows/discovery-flow.yaml
 create mode 100644 smith/data/.flowr/flows/feature-development-flow.yaml
 create mode 100644 smith/data/.flowr/flows/main-flow.yaml
 create mode 100644 smith/data/.flowr/flows/planning-flow.yaml
 create mode 100644 smith/data/.flowr/flows/post-mortem-flow.yaml
 create mode 100644 smith/data/.flowr/flows/review-gate-flow.yaml
 create mode 100644 smith/data/.flowr/flows/setup-project-flow.yaml
 create mode 100644 smith/data/.flowr/flows/tdd-cycle-flow.yaml
 create mode 100644 smith/data/.flowr/sessions/current.yaml
 create mode 100644 smith/data/.opencode/agents/design-agent.md
 create mode 100644 smith/data/.opencode/agents/domain-expert.md
 create mode 100644 smith/data/.opencode/agents/product-owner.md
 create mode 100644 smith/data/.opencode/agents/reviewer.md
 create mode 100644 smith/data/.opencode/agents/setup-agent.md
 create mode 100644 smith/data/.opencode/agents/software-engineer.md
 create mode 100644 smith/data/.opencode/agents/system-architect.md
 create mode 100644 smith/data/.opencode/knowledge/agent-design/principles.md
 create mode 100644 smith/data/.opencode/knowledge/architecture/adr.md
 create mode 100644 smith/data/.opencode/knowledge/architecture/assessment.md
 create mode 100644 smith/data/.opencode/knowledge/architecture/contract-design.md
 create mode 100644 smith/data/.opencode/knowledge/architecture/quality-attributes.md
 create mode 100644 smith/data/.opencode/knowledge/architecture/reconciliation.md
 create mode 100644 smith/data/.opencode/knowledge/architecture/technical-design.md
 create mode 100644 smith/data/.opencode/knowledge/design/color-systems.md
 create mode 100644 smith/data/.opencode/knowledge/design/identity-design.md
 create mode 100644 smith/data/.opencode/knowledge/design/project-assets.md
 create mode 100644 smith/data/.opencode/knowledge/design/visual-harmony.md
 create mode 100644 smith/data/.opencode/knowledge/domain-modeling/context-mapping.md
 create mode 100644 smith/data/.opencode/knowledge/domain-modeling/event-storming.md
 create mode 100644 smith/data/.opencode/knowledge/knowledge-design/principles.md
 create mode 100644 smith/data/.opencode/knowledge/requirements/decomposition.md
 create mode 100644 smith/data/.opencode/knowledge/requirements/gherkin.md
 create mode 100644 smith/data/.opencode/knowledge/requirements/interview-techniques.md
 create mode 100644 smith/data/.opencode/knowledge/requirements/invest.md
 create mode 100644 smith/data/.opencode/knowledge/requirements/moscow.md
 create mode 100644 smith/data/.opencode/knowledge/requirements/post-mortem.md
 create mode 100644 smith/data/.opencode/knowledge/requirements/pre-mortem.md
 create mode 100644 smith/data/.opencode/knowledge/requirements/ubiquitous-language.md
 create mode 100644 smith/data/.opencode/knowledge/requirements/wsjf.md
 create mode 100644 smith/data/.opencode/knowledge/skill-design/principles.md
 create mode 100644 smith/data/.opencode/knowledge/software-craft/code-review.md
 create mode 100644 smith/data/.opencode/knowledge/software-craft/design-patterns.md
 create mode 100644 smith/data/.opencode/knowledge/software-craft/git-conventions.md
 create mode 100644 smith/data/.opencode/knowledge/software-craft/object-calisthenics.md
 create mode 100644 smith/data/.opencode/knowledge/software-craft/refactoring-techniques.md
 create mode 100644 smith/data/.opencode/knowledge/software-craft/refactoring.md
 create mode 100644 smith/data/.opencode/knowledge/software-craft/smell-catalogue.md
 create mode 100644 smith/data/.opencode/knowledge/software-craft/solid.md
 create mode 100644 smith/data/.opencode/knowledge/software-craft/stub-design.md
 create mode 100644 smith/data/.opencode/knowledge/software-craft/tdd.md
 create mode 100644 smith/data/.opencode/knowledge/software-craft/test-design.md
 create mode 100644 smith/data/.opencode/knowledge/software-craft/versioning.md
 create mode 100644 smith/data/.opencode/knowledge/workflow/flowr-spec.md
 create mode 100644 smith/data/.opencode/skills/accept-feature/SKILL.md
 create mode 100644 smith/data/.opencode/skills/analyze-root-cause/SKILL.md
 create mode 100644 smith/data/.opencode/skills/assess-architecture/SKILL.md
 create mode 100644 smith/data/.opencode/skills/break-down-feature/SKILL.md
 create mode 100644 smith/data/.opencode/skills/commit-implementation/SKILL.md
 create mode 100644 smith/data/.opencode/skills/conduct-interview/SKILL.md
 create mode 100644 smith/data/.opencode/skills/confirm-baseline/SKILL.md
 create mode 100644 smith/data/.opencode/skills/create-pr/SKILL.md
 create mode 100644 smith/data/.opencode/skills/create-py-stubs/SKILL.md
 create mode 100644 smith/data/.opencode/skills/decide-batch-action/SKILL.md
 create mode 100644 smith/data/.opencode/skills/define-done/SKILL.md
 create mode 100644 smith/data/.opencode/skills/define-product-scope/SKILL.md
 create mode 100644 smith/data/.opencode/skills/define-ubiquitous-language/SKILL.md
 create mode 100644 smith/data/.opencode/skills/design-assets/SKILL.md
 create mode 100644 smith/data/.opencode/skills/design-colors/SKILL.md
 create mode 100644 smith/data/.opencode/skills/design-technical-solution/SKILL.md
 create mode 100644 smith/data/.opencode/skills/determine-action-items/SKILL.md
 create mode 100644 smith/data/.opencode/skills/document-post-mortem/SKILL.md
 create mode 100644 smith/data/.opencode/skills/draft-adr/SKILL.md
 create mode 100644 smith/data/.opencode/skills/extract-lessons/SKILL.md
 create mode 100644 smith/data/.opencode/skills/facilitate-event-storming/SKILL.md
 create mode 100644 smith/data/.opencode/skills/implement-minimum/SKILL.md
 create mode 100644 smith/data/.opencode/skills/map-contexts/SKILL.md
 create mode 100644 smith/data/.opencode/skills/merge-local/SKILL.md
 create mode 100644 smith/data/.opencode/skills/model-domain/SKILL.md
 create mode 100644 smith/data/.opencode/skills/refactor/SKILL.md
 create mode 100644 smith/data/.opencode/skills/review-architecture/SKILL.md
 create mode 100644 smith/data/.opencode/skills/review-conventions/SKILL.md
 create mode 100644 smith/data/.opencode/skills/review-design/SKILL.md
 create mode 100644 smith/data/.opencode/skills/review-structure/SKILL.md
 create mode 100644 smith/data/.opencode/skills/select-feature/SKILL.md
 create mode 100644 smith/data/.opencode/skills/setup-apply/SKILL.md
 create mode 100644 smith/data/.opencode/skills/setup-assess/SKILL.md
 create mode 100644 smith/data/.opencode/skills/setup-branding/SKILL.md
 create mode 100644 smith/data/.opencode/skills/setup-configure/SKILL.md
 create mode 100644 smith/data/.opencode/skills/setup-verify/SKILL.md
 create mode 100644 smith/data/.opencode/skills/specify-feature/SKILL.md
 create mode 100644 smith/data/.opencode/skills/structure-project/SKILL.md
 create mode 100644 smith/data/.opencode/skills/write-bdd-features/SKILL.md
 create mode 100644 smith/data/.opencode/skills/write-test/SKILL.md
 create mode 100644 smith/data/.templates/CHANGELOG.md.template
 create mode 100644 smith/data/.templates/docs/adr/ADR_YYYYMMDD_<slug>.md.template
 create mode 100644 smith/data/.templates/docs/assets/banner.svg.template
 create mode 100644 smith/data/.templates/docs/assets/logo.svg.template
 create mode 100644 smith/data/.templates/docs/branding/branding.md.template
 create mode 100644 smith/data/.templates/docs/context_map.md.template
 create mode 100644 smith/data/.templates/docs/domain_model.md.template
 create mode 100644 smith/data/.templates/docs/features/feature.feature.template
 create mode 100644 smith/data/.templates/docs/glossary.md.template
 create mode 100644 smith/data/.templates/docs/interview-notes/IN_YYYYMMDD_<slug>.md.template
 create mode 100644 smith/data/.templates/docs/post-mortem/PM_YYYYMMDD_<slug>.md.template
 create mode 100644 smith/data/.templates/docs/product_definition.md.template
 create mode 100644 smith/data/.templates/docs/research/TEMPLATE.md.template
 create mode 100644 smith/data/.templates/docs/system.md.template
 create mode 100644 smith/data/.templates/docs/technical_design.md.template
 create mode 100644 smith/data/.templates/tests/features/<rule_slug>_test.py.template
 create mode 100644 smith/data/AGENTS.md
 create mode 100644 smith/data/__init__.py
 create mode 100644 smith/delivery/__init__.py
 create mode 100644 smith/delivery/cli.py
 create mode 100644 smith/domain/__init__.py
 create mode 100644 smith/domain/connection.py
 create mode 100644 smith/domain/ports.py
 create mode 100644 smith/domain/value_objects.py
 create mode 100644 smith/infrastructure/__init__.py
 create mode 100644 smith/infrastructure/filesystem.py
 create mode 100644 smith/infrastructure/gitignore.py
 create mode 100644 smith/infrastructure/metadata.py
 create mode 100644 smith/infrastructure/template_source.py
 create mode 100644 template-config.yaml
 create mode 100644 tests/features/cli_entrypoint/help_output_test.py
 create mode 100644 tests/features/cli_entrypoint/unrecognised_arguments_test.py
 create mode 100644 tests/features/cli_entrypoint/version_output_test.py
 create mode 100644 tests/features/smith_commands/__init__.py
 create mode 100644 tests/features/smith_commands/conftest.py
 create mode 100644 tests/features/smith_commands/connect_fresh_project_test.py
 create mode 100644 tests/features/smith_commands/disconnect_test.py
 create mode 100644 tests/features/smith_commands/skip_user_tracked_test.py
 create mode 100644 tests/features/smith_commands/status_test.py
 create mode 100644 tests/features/smith_commands/update_test.py
 delete mode 100644 tests/unit/app_test.py
 create mode 100644 tests/unit/application/__init__.py
 create mode 100644 tests/unit/delivery/__init__.py
 create mode 100644 tests/unit/domain/__init__.py
 create mode 100644 tests/unit/domain/test_connection.py
 create mode 100644 tests/unit/infrastructure/__init__.py
 create mode 100644 tests/unit/infrastructure/filesystem_test.py
 create mode 100644 tests/unit/infrastructure/gitignore_test.py
 create mode 100644 tests/unit/infrastructure/metadata_test.py
 create mode 100644 tests/unit/infrastructure/template_source_test.py
 create mode 100644 tests/unit/main_test.py

diff --git a/.dockerignore b/.dockerignore
deleted file mode 100644
index 268dbdf..0000000
--- a/.dockerignore
+++ /dev/null
@@ -1,212 +0,0 @@
-# Docker ignore file for pairai
-# Optimized for minimal context and security
-
-# Version control
-.git/
-.gitignore
-.gitattributes
-
-# Development files
-.vscode/
-.idea/
-*.swp
-*.swo
-*~
-
-# OS generated files
-.DS_Store
-.DS_Store?
-._*
-.Spotlight-V100
-.Trashes
-ehthumbs.db
-Thumbs.db
-
-# Python
-__pycache__/
-*.py[cod]
-*$py.class
-*.so
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-docs/tests/
-docs/coverage/
-docs/mutation/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff
-instance/
-.webassets-cache
-
-# Scrapy stuff
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-.pybuilder/
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-.python-version
-
-# pipenv
-Pipfile.lock
-
-# poetry
-poetry.lock
-
-# pdm
-.pdm.toml
-.pdm-python
-.pdm-build/
-
-# PEP 582
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
-
-# pytype static type analyzer
-.pytype/
-
-# Cython debug symbols
-cython_debug/
-
-# PyCharm
-.idea/
-
-# Project specific
-docs/api/
-docs/tests/
-docs/coverage/
-docs/mutation/
-.mutmut-cache/
-mutants/
-*.db
-*.sqlite
-*.sqlite3
-
-# Docker
-.dockerignore
-Dockerfile*
-docker-compose*.yml
-
-# CI/CD
-.github/
-.gitlab-ci.yml
-.travis.yml
-.circleci/
-
-# Package managers
-node_modules/
-package-lock.json
-yarn.lock
-
-# Logs
-*.log
-logs/
-
-# Temporary files
-tmp/
-temp/
-.tmp/
-
-# Security
-.secrets
-credentials.json
-*.pem
-*.key
-*.crt
-
-# Backup files
-*.bak
-*.backup
-*.old
\ No newline at end of file
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ba51540..6045076 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -33,10 +33,10 @@ jobs:
     
     steps:
       - name: Checkout code
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1
         
       - name: Install uv
-        uses: astral-sh/setup-uv@cdfb2ee6dde255817c739680168ad81e184c4bfb # v4.0.0
+        uses: astral-sh/setup-uv@c0c76fcf76c37099e6a452584d04b015240faefc # v4.0.0
         with:
           enable-cache: true
           cache-dependency-glob: "uv.lock"
@@ -67,21 +67,23 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ["3.13"]
+    env:
+      UV_SYSTEM_PYTHON: "false"
         
     steps:
       - name: Checkout code
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1
         
       - name: Install uv
-        uses: astral-sh/setup-uv@cdfb2ee6dde255817c739680168ad81e184c4bfb # v4.0.0
+        uses: astral-sh/setup-uv@c0c76fcf76c37099e6a452584d04b015240faefc # v4.0.0
         with:
           enable-cache: true
           cache-dependency-glob: "uv.lock"
           python-version: ${{ matrix.python-version }}
           
       - name: Install dependencies
-        run: uv sync --locked --all-extras --dev
-        
+        run: uv sync --locked --all-extras --dev && uv pip install -e .
+
       - name: Run fast tests
         run: uv run task test-fast
         
@@ -115,13 +117,15 @@ jobs:
     permissions:
       contents: read
       actions: read
-    
+    env:
+      UV_SYSTEM_PYTHON: "false"
+
     steps:
       - name: Checkout code
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1
         
       - name: Install uv
-        uses: astral-sh/setup-uv@cdfb2ee6dde255817c739680168ad81e184c4bfb # v4.0.0
+        uses: astral-sh/setup-uv@c0c76fcf76c37099e6a452584d04b015240faefc # v4.0.0
         with:
           enable-cache: true
           cache-dependency-glob: "uv.lock"
@@ -130,7 +134,7 @@ jobs:
         run: uv python install 3.13
         
       - name: Install dependencies
-        run: uv sync --locked --all-extras --dev
+        run: uv sync --locked --all-extras --dev && uv pip install -e .
         
       - name: Build documentation
         run: uv run task doc-build
@@ -168,13 +172,15 @@ jobs:
     if: github.event_name == 'push' && github.ref == 'refs/heads/main'
     permissions:
       contents: write
+    env:
+      UV_SYSTEM_PYTHON: "false"
 
     steps:
       - name: Checkout code
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1
 
       - name: Install uv
-        uses: astral-sh/setup-uv@cdfb2ee6dde255817c739680168ad81e184c4bfb # v4.0.0
+        uses: astral-sh/setup-uv@c0c76fcf76c37099e6a452584d04b015240faefc # v4.0.0
         with:
           enable-cache: true
           cache-dependency-glob: "uv.lock"
@@ -183,7 +189,7 @@ jobs:
         run: uv python install 3.13
 
       - name: Install dependencies
-        run: uv sync --locked --all-extras --dev
+        run: uv sync --locked --all-extras --dev && uv pip install -e .
 
       - name: Build and publish documentation
         run: uv run task doc-publish
diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml
index e4712b6..185d0ca 100644
--- a/.github/workflows/dependency-review.yml
+++ b/.github/workflows/dependency-review.yml
@@ -17,7 +17,7 @@ jobs:
     
     steps:
       - name: Checkout repository
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1
         
       - name: Dependency Review
         uses: actions/dependency-review-action@2031cfc080254a8a887f58cffee85186f0e49e48 # v4.9.0
@@ -36,10 +36,10 @@ jobs:
     
     steps:
       - name: Checkout repository
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1
         
       - name: Install uv
-        uses: astral-sh/setup-uv@cdfb2ee6dde255817c739680168ad81e184c4bfb # v4.0.0
+        uses: astral-sh/setup-uv@c0c76fcf76c37099e6a452584d04b015240faefc # v4.0.0
         
       - name: Set up Python
         run: uv python install 3.13
diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml
index 26d06f3..08011c1 100644
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -4,6 +4,12 @@ on:
   push:
     tags:
       - "v*"
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: "Tag to publish (e.g. v0.1.0+20260501)"
+        required: true
+        type: string
 
 permissions:
   contents: read
@@ -17,10 +23,12 @@ jobs:
 
     steps:
       - name: Checkout code
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1
+        with:
+          ref: ${{ github.event_name == 'workflow_dispatch' && inputs.tag || github.ref }}
 
       - name: Install uv
-        uses: astral-sh/setup-uv@cdfb2ee6dde255817c739680168ad81e184c4bfb # v4.0.0
+        uses: astral-sh/setup-uv@c0c76fcf76c37099e6a452584d04b015240faefc # v4.0.0
         with:
           enable-cache: true
           cache-dependency-glob: "uv.lock"
@@ -28,14 +36,34 @@ jobs:
       - name: Set up Python 3.13
         run: uv python install 3.13
 
+      - name: Install dependencies
+        run: uv sync --locked --all-extras --dev
+
+      - name: Install package in editable mode
+        run: uv pip install -e .
+
+      - name: Run quality gate
+        run: |
+          uv run task lint
+          uv run task static-check
+          uv run task test
+
       - name: Clean dist
         run: rm -rf dist/
 
       - name: Build wheel and sdist
         run: uv build
 
+      - name: Verify package installation (wheel)
+        run: |
+          uv run --isolated --no-project --with dist/*.whl python -c "import smith; print('Wheel install successful')"
+
+      - name: Verify package installation (sdist)
+        run: |
+          uv run --isolated --no-project --with dist/*.tar.gz python -c "import smith; print('Source dist install successful')"
+
       - name: Upload dist artifacts
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v4.6.2
         with:
           name: dist
           path: dist/
@@ -50,7 +78,7 @@ jobs:
 
     steps:
       - name: Download dist artifacts
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
         with:
           name: dist
           path: dist/
@@ -59,27 +87,34 @@ jobs:
         uses: pypa/gh-action-pypi-publish@release/v1
 
   release:
-    name: Create GitHub Release
+    name: Update GitHub Release
     runs-on: ubuntu-latest
     needs: publish
     permissions:
-      contents: write  # required to create a release
+      contents: write  # required to create/edit a release
 
     steps:
       - name: Checkout code
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1
+        with:
+          ref: ${{ github.event_name == 'workflow_dispatch' && inputs.tag || github.ref }}
 
       - name: Download dist artifacts
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
         with:
           name: dist
           path: dist/
 
-      - name: Create GitHub Release
+      - name: Create or update GitHub Release
         env:
           GH_TOKEN: ${{ github.token }}
+          TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.tag || github.ref_name }}
         run: |
-          gh release create "${{ github.ref_name }}" \
-            --title "${{ github.ref_name }}" \
-            --generate-notes \
-            dist/*
+          if gh release view "$TAG" > /dev/null 2>&1; then
+            gh release upload "$TAG" dist/* --clobber
+          else
+            gh release create "$TAG" \
+              --title "$TAG" \
+              --generate-notes \
+              dist/*
+          fi
diff --git a/.github/workflows/tag-release.yml b/.github/workflows/tag-release.yml
index 4fbc251..b639b09 100644
--- a/.github/workflows/tag-release.yml
+++ b/.github/workflows/tag-release.yml
@@ -14,16 +14,20 @@ jobs:
     runs-on: ubuntu-latest
     permissions:
       contents: write
+    env:
+      UV_SYSTEM_PYTHON: "false"
 
     steps:
       - name: Checkout code
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1
 
       - name: Extract version from pyproject.toml
         id: version
         run: |
+          # Extract semver core (major.minor.patch) from pyproject.toml
           VERSION=$(grep '^version' pyproject.toml | head -1 | sed 's/version = "\(.*\)"/\1/')
           echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
+          # Build metadata (+YYYYMMDD) is appended at tag time, not stored in pyproject.toml
           echo "tag=v${VERSION}" >> "$GITHUB_OUTPUT"
 
       - name: Check if tag already exists
@@ -35,6 +39,25 @@ jobs:
             echo "exists=false" >> "$GITHUB_OUTPUT"
           fi
 
+      - name: Install uv
+        if: steps.check.outputs.exists == 'false'
+        uses: astral-sh/setup-uv@c0c76fcf76c37099e6a452584d04b015240faefc # v4.0.0
+        with:
+          enable-cache: true
+          cache-dependency-glob: "uv.lock"
+
+      - name: Set up Python 3.13
+        if: steps.check.outputs.exists == 'false'
+        run: uv python install 3.13
+
+      - name: Install dependencies
+        if: steps.check.outputs.exists == 'false'
+        run: uv sync --locked --all-extras --dev && uv pip install -e .
+
+      - name: Run release-check
+        if: steps.check.outputs.exists == 'false'
+        run: uv run task release-check
+
       - name: Create and push tag
         if: steps.check.outputs.exists == 'false'
         env:
@@ -43,9 +66,11 @@ jobs:
           GIT_COMMITTER_NAME: github-actions[bot]
           GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com
         run: |
-          git tag "${{ steps.version.outputs.tag }}"
-          git push origin "${{ steps.version.outputs.tag }}"
-          echo "Created tag ${{ steps.version.outputs.tag }} at $(git rev-parse HEAD)"
+          DATE=$(date +%Y%m%d)
+          TAG="v${{ steps.version.outputs.version }}+${DATE}"
+          git tag "$TAG"
+          git push origin "$TAG"
+          echo "Created tag $TAG at $(git rev-parse HEAD)"
 
       - name: Skip (tag already exists)
         if: steps.check.outputs.exists == 'true'
diff --git a/.gitignore b/.gitignore
index 8131edd..33cbce9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,8 @@
 .DS_Store
-.opencode/
+*.swp
 .coverage
-.vscode/\n.idea/
+.vscode/
+.idea/
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -169,4 +170,12 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 .mutmut-cache
-# Trigger CI run to verify linting fixes
+
+# Agentic files (managed by smith, not versioned)
+.opencode/
+AGENTS.md
+.templates/
+.flowr/
+
+# Generated flow visualization data (regenerate with: task regenerate-flowviz)
+flowviz/
diff --git a/AGENTS.md b/AGENTS.md
deleted file mode 100644
index e62ef6f..0000000
--- a/AGENTS.md
+++ /dev/null
@@ -1,256 +0,0 @@
-# Python Project Template
-
-A Python template to quickstart any project with a production-ready workflow, quality tooling, and AI-assisted development.
-
-## Workflow Overview
-
-Features flow through 5 steps with a WIP limit of 1 feature at a time. The filesystem enforces WIP:
-- `docs/features/backlog/<feature-stem>.feature` — features waiting to be worked on
-- `docs/features/in-progress/<feature-stem>.feature` — exactly one feature being built right now
-- `docs/features/completed/<feature-stem>.feature` — accepted and shipped features
-
-```
-STEP 1: SCOPE          (product-owner)  → discovery + Gherkin stories + criteria
-STEP 2: ARCH           (software-engineer)      → read all features + existing package files, write domain stubs (signatures only, no bodies); decisions appended to docs/architecture.md
-STEP 3: TDD LOOP       (software-engineer)      → RED → GREEN → REFACTOR, one @id at a time
-STEP 4: VERIFY         (reviewer)       → run all commands, review code
-STEP 5: ACCEPT         (product-owner)  → demo, validate, move .feature to completed/ (PO only)
-```
-
-**PO picks the next feature from backlog. Software-engineer never self-selects.**
-
-**Verification is adversarial.** The reviewer's job is to try to break the feature, not to confirm it works. The default hypothesis is "it might be broken despite green checks; prove otherwise."
-
-## Roles
-
-- **Product Owner (PO)** — AI agent. Interviews the stakeholder, writes discovery docs, Gherkin features, and acceptance criteria. Accepts or rejects deliveries. **Sole owner of all `.feature` file moves** (backlog → in-progress before Step 2; in-progress → completed after Step 5 acceptance).
-- **Stakeholder** — Human. Answers PO's questions, provides domain knowledge, approves PO syntheses to confirm discovery is complete.
-- **Software Engineer** — AI agent. Architecture, test bodies, implementation, git. Never creates, edits, or moves `.feature` files. Escalates spec gaps to PO. If no `.feature` file is in `in-progress/`, stops and escalates to PO.
-- **Reviewer** — AI agent. Adversarial verification. Reports spec gaps to PO. Never creates, edits, or moves `.feature` files. After APPROVED report, stops and escalates to PO for Step 5.
-
-## Feature File Chain of Responsibility
-
-`.feature` files are owned exclusively by the PO. **No other agent ever creates, edits, or moves them.**
-
-| Transition | Who | When |
-|---|---|---|
-| `backlog/` → `in-progress/` | PO only | Before Step 2 begins; only if `Status: BASELINED` |
-| `in-progress/` → `completed/` | PO only | After Step 5 acceptance |
-
-**If an agent (SE or reviewer) finds no `.feature` in `in-progress/`**: update TODO.md with the correct `Next:` escalation line and stop. Never self-select a backlog feature.
-
-## Agents
-
-- **product-owner** — defines scope (Stage 1 Discovery + Stage 2 Specification), picks features, accepts deliveries
-- **software-engineer** — architecture, tests, code, git, releases (Steps 2-3 + release)
-- **reviewer** — runs commands and reviews code at Step 4, produces APPROVED/REJECTED report
-- **designer** — creates and updates visual assets (SVG banners, logos) and maintains `docs/branding.md`
-- **setup-project** — one-time setup to initialize a new project from this template
-
-## Skills
-
-| Skill | Used By | Step |
-|---|---|---|
-| `run-session` | all agents | every session |
-| `select-feature` | product-owner | between features (idle state) |
-| `define-scope` | product-owner | 1 |
-| `implement` | software-engineer | 2, 3 |
-| `apply-patterns` | software-engineer | 2, 3 (on-demand, when GoF pattern needed) |
-| `refactor` | software-engineer | 3 (REFACTOR phase + preparatory refactoring) |
-| `verify` | reviewer | 4 |
-| `check-quality` | software-engineer | pre-handoff (redirects to `verify`) |
-| `create-pr` | software-engineer | 5 |
-| `git-release` | software-engineer | 5 (after acceptance) |
-| `update-docs` | product-owner | 5 (after acceptance) + on stakeholder demand |
-| `design-colors` | designer | branding, color, WCAG compliance |
-| `design-assets` | designer | SVG asset creation and updates |
-| `create-skill` | software-engineer | meta |
-| `create-agent` | human-user | meta |
-
-**Branding**: Agents that generate docs, diagrams, release names, or visual assets read `docs/branding.md` if present. Absent or blank fields fall back to defaults (adjective-animal release names, Mermaid default colors, no wording constraints). `docs/branding.md` and `docs/assets/` are owned by the designer agent.
-
-**Session protocol**: Every agent loads `skill run-session` at session start. Load additional skills as needed for the current step.
-
-## Step 1 — SCOPE
-
-Step 1 has two stages:
-
-### Stage 1 — Discovery (PO + stakeholder, iterative)
-
-Discovery is a continuous process. Sessions happen whenever scope needs to be established or refined — for a new project, new features, or new information. Every session follows the same structure:
-
-**Session question order:**
-1. **General** (5Ws + Success + Failure + Out-of-scope) — first session only, if the journal doesn't exist yet
-2. **Cross-cutting** — behavior groups, bounded contexts, integration points, lifecycle events
-3. **Per-feature** — one feature at a time; extract entities from `docs/discovery.md` Domain Model; gap-finding with CIT, Laddering, CI Perspective Change
-
-**Real-time split rule**: if the PO detects >2 concerns or >8 candidate Examples for a feature during per-feature questions, split immediately — record the split in the journal, create stub `.feature` files, continue questions for both in the same session.
-
-**After questions (PO alone, in order):**
-1. Append answered Q&A (in groups) to `docs/discovery_journal.md` — only answered questions
-2. Rewrite `.feature` description for each feature touched — others stay unchanged
-3. Append session synthesis block to `docs/discovery.md` — LAST, after all `.feature` updates
-
-**Session status**: the journal session header begins with `Status: IN-PROGRESS` (written before questions). Updated to `Status: COMPLETE` after all writes. If a session is interrupted, the next agent detects `IN-PROGRESS` and resumes the pending writes before starting a new session.
-
-**Baselining**: PO writes `Status: BASELINED (YYYY-MM-DD)` in the `.feature` file when the stakeholder approves that feature's discovery and the decomposition check passes.
-
-Commit per session: `feat(discovery): <session summary>`
-
-### Stage 2 — Specification (PO alone, per feature)
-
-Only runs on features with `Status: BASELINED`. No stakeholder involvement. If a gap requires stakeholder input, open a new Stage 1 session first.
-
-**Step A — Stories**: derive one `Rule:` block per user story from the baselined feature description. INVEST gate: all 6 letters must pass.
-Commit: `feat(stories): write user stories for <name>`
-
-**Step B — Criteria**: PO writes `Example:` blocks with `@id` tags under each `Rule:`. Pre-mortem per Rule before writing any Examples. MoSCoW triage per Example. Examples are frozen after commit.
-Commit: `feat(criteria): write acceptance criteria for <name>`
-
-**Criteria are frozen**: no `Example:` changes after commit. Adding a new Example with a new `@id` replaces old.
-
-### Bug Handling
-
-When a defect is reported:
-1. **PO** adds a `@bug` Example to the relevant `Rule:` in the `.feature` file and moves (or keeps) the feature in `backlog/` for normal scheduling.
-2. **SE** handles the bug when the feature is selected for development (standard Step 2–3 flow): implements the specific `@bug`-tagged test in `tests/features/<feature_slug>/` and also writes a `@given` Hypothesis property test in `tests/unit/` covering the whole class of inputs.
-3. Both tests are required. SE follows the normal TDD loop (Step 3).
-
-## Filesystem Structure
-
-```
-docs/
-  discovery_journal.md                ← raw Q&A, PO appends after every session
-  discovery.md                        ← synthesis changelog, PO appends after every session
-  architecture.md                     ← all architectural decisions, SE appends after Step 2
-  glossary.md                         ← living glossary, PO updates via update-docs skill
-  branding.md                         ← project identity, colors, release naming, wording (designer owns)
-  assets/                             ← logo.svg, banner.svg, and other visual assets (designer owns)
-  c4/
-    context.md                        ← C4 Level 1 diagram, PO updates via update-docs skill
-    container.md                      ← C4 Level 2 diagram, PO updates via update-docs skill
-  features/
-    backlog/<feature-stem>.feature    ← narrative + Rules + Examples
-    in-progress/<feature-stem>.feature
-    completed/<feature-stem>.feature
-
-tests/
-  features/<feature_slug>/
-    <rule_slug>_test.py               ← one per Rule: block, software-engineer-written
-  unit/
-    <anything>_test.py                ← software-engineer-authored extras (no @id traceability)
-```
-
-Tests in `tests/unit/` are software-engineer-authored extras not covered by any `@id` criterion. Any test style is valid — plain `assert` or Hypothesis `@given`. Use Hypothesis when the test covers a **property** that holds across many inputs (mathematical invariants, parsing contracts, value object constraints). Use plain pytest for specific behaviors or single edge cases discovered during refactoring.
-
-- `@pytest.mark.slow` is mandatory on every `@given`-decorated test (Hypothesis is genuinely slow)
-- `@example(...)` is optional but encouraged when using `@given` to document known corner cases
-- No `@id` tags — tests with `@id` belong in `tests/features/`, written by software-engineer
-
-## Test File Layout
-
-```
-tests/features/<feature_slug>/<rule_slug>_test.py
-```
-
-### Stub Format
-
-Stubs are auto-generated by pytest-beehave. The SE triggers generation at Step 2 end by running `uv run task test-fast`. pytest-beehave reads the in-progress `.feature` file and creates one skipped function per `@id`:
-
-```python
-@pytest.mark.skip(reason="not yet implemented")
-def test_<feature_slug>_<@id>() -> None:
-    """
-    <@id steps raw text including new lines>
-    """
-```
-
-### Markers
-- `@pytest.mark.slow` — takes > 50ms; applied to Hypothesis tests and any test with I/O, network, or DB
-- `@pytest.mark.deprecated` — auto-skipped by pytest-beehave; used for superseded Examples
-
-## Development Commands
-
-```bash
-# Install dependencies
-uv sync --all-extras
-
-# Run the application (for humans)
-uv run task run
-
-# Run the application with timeout (for agents — prevents hanging)
-timeout 10s uv run task run
-
-# Run tests (fast, no coverage)
-uv run task test-fast
-
-# Run full test suite with coverage
-uv run task test
-
-# Run tests with coverage report generation
-uv run task test-build
-
-# Lint and format
-uv run task lint
-
-# Type checking
-uv run task static-check
-
-# Build documentation
-uv run task doc-build
-```
-
-## Code Quality Standards
-
-- **Principles (in priority order)**: YAGNI > KISS > DRY > SOLID > Object Calisthenics > appropriate design patterns > complex code > complicate code > failing code > no code
-- **Linting**: ruff format, ruff check, Google docstring convention, `noqa` forbidden
-- **Type checking**: pyright, 0 errors required
-- **Coverage**: 100% (measured against your actual package)
-- **Function length**: ≤ 20 lines (code lines only, excluding docstrings)
-- **Class length**: ≤ 50 lines (code lines only, excluding docstrings)
-- **Max nesting**: 2 levels
-- **Instance variables**: ≤ 2 per class *(exception: dataclasses, Pydantic models, value objects, and TypedDicts are exempt — they may carry as many fields as the domain requires)*
-- **Semantic alignment**: tests must operate at the same abstraction level as the acceptance criteria they cover
-
-### Software-Engineer Quality Gate Priority Order
-
-During Step 3 (TDD Loop), correctness priorities are:
-
-1. **Design correctness** — YAGNI > KISS > DRY > SOLID > Object Calisthenics > appropriated design patterns > complex code > complicated code > failing code > no code
-2. **One test green** — the specific test under work passes, plus `test-fast` still passes
-3. **Reviewer code-design check** — reviewer verifies design + semantic alignment (no lint/pyright/coverage yet)
-5. **Quality tooling** — `lint`, `static-check`, full `test` with coverage run only at software-engineer handoff (before Step 4)
-
-Design correctness is far more important than lint/pyright/coverage compliance. A well-designed codebase with minor lint issues is better than a lint-clean codebase with poor design.
-
-## Verification Philosophy
-
-- **Automated checks** (lint, typecheck, coverage) verify **syntax-level** correctness — the code is well-formed.
-- **Human review** (semantic alignment, code review, manual testing) verifies **semantic-level** correctness — the code does what the user needs.
-- Both are required. All-green automated checks are necessary but not sufficient for APPROVED.
-- Reviewer defaults to REJECTED unless correctness is proven.
-
-## Release Management
-
-Version format: `v{major}.{minor}.{YYYYMMDD}`
-
-- Minor bump for new features; major bump for breaking changes
-- Same-day second release: increment minor, keep same date
-- Release name: defined by `docs/branding.md > Release Naming > Convention`; absent or blank defaults to version string only (no name)
-
-Use `@software-engineer /skill git-release` for the full release process. When requested by the stakeholder
-
-## Session Management
-
-Every session: load `skill run-session`. Read `TODO.md` first, update it at the end.
-
-`TODO.md` is a session bookmark — not a project journal. See `.opencode/skills/run-session/SKILL.md` for the full structure including the Cycle State block used during Step 3.
-
-## Setup
-
-To initialize a new project from this template:
-```bash
-@setup-project
-```
-
-The setup agent will ask for your project name, GitHub username, author info, and configure all template placeholders.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e81b0bd..bdc3a85 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,370 +1,9 @@
 # Changelog
 
-All notable changes to this template will be documented in this file.
+All notable changes to agents-smith will be documented in this file.
 
-## [v6.4.20260420] - Minimal Prometheus - 2026-04-20
+## [0.1.0] - 20260501
 
 ### Added
-- **Branding system**: `docs/branding.md` — project identity, colour palette, release naming convention, and wording guidelines; agents read this file to personalise release names, C4 diagram colours, and docs without touching `.opencode/` (#89)
-- **Designer agent** (`designer.md`): owns `docs/branding.md` and `docs/assets/`; uses `design-colors` and `design-assets` skills (#89)
-- **`design-colors` skill**: step-by-step colour palette selection with WCAG 2.1 AA 4.5:1 contrast validation; Itten/Albers colour theory embedded inline (#89)
-- **`design-assets` skill**: SVG banner and logo creation workflow; W3C SVG 2 spec and WCAG 1.1.1 `aria-label` requirements embedded (#89)
-- **`setup-project` Step 6 Branding**: collects tagline, mission, vision, tone, theme, and colours; suggests WCAG-validated palettes when user provides a theme but no colours; writes `docs/branding.md` (#89)
-- **Output Style + Rule #8** in `run-session` skill: minimalist output discipline — signal only, no tool narration, session ends with `Next:` line (#89)
 
-### Changed
-- **Skill renames to verb-noun convention**: `session-workflow → run-session`, `scope → define-scope`, `implementation → implement`, `feature-selection → select-feature`, `living-docs → update-docs`, `pr-management → create-pr`, `design-patterns → apply-patterns`, `code-quality → check-quality` — all references updated across agents, skills, and `AGENTS.md` (#89)
-- **`docs/images/` → `docs/assets/`**: asset directory renamed; `README.md` path updated (#89)
-- **`git-release` v1.1**: reads `docs/branding.md` for optional release naming and theme; release name omitted from commit/release title if convention is absent (#89)
-- **`update-docs` skill**: reads `docs/branding.md` primary/accent colours to apply `%%{init:...}%%` theming to Mermaid C4 diagrams (#89)
-
-## [v6.2.20260419] - Autonomous Stenella - 2026-04-19
-
-### Added
-- **pytest-beehave integration**: `@id` tags now auto-assigned to untagged `Example:` blocks on every `pytest` run; test stubs auto-generated from `.feature` files at Step 2 end — no manual ID generation or stub writing required (#78)
-- **Self-declaration defense in depth**: all 25 items numbered 1–25 in `implementation/SKILL.md`; `verify/SKILL.md` now hard-gates on completeness (count must equal 25, sequence must be gapless) before item audit begins (#78)
-
-### Changed
-- **Naming convention**: `.feature` file paths now use `<feature-stem>` (kebab); test directories use `<feature_slug>` (underscore) — applied consistently across all skills, `AGENTS.md`, and docs (#78)
-- **`conftest.py`**: removed manual `deprecated` marker skip hook — now owned entirely by pytest-beehave (#78)
-- **`scope/SKILL.md`**: removed all manual `@id` generation instructions and `@id` uniqueness checklist items — assignment is automatic (#78)
-- **`product-owner.md`**: removed `@id` from bug handling and gap-resolution table — PO writes `Example:` blocks only (#78)
-- **README**: added "Why this template?" section; added `pytest-beehave` to tooling table; replaced static stub example with a two-part Gherkin-in → stub-out illustration (#78)
-- **`verify/SKILL.md` report table**: expanded Self-Declaration Audit from 21 collapsed rows to 25 numbered rows matching the implementation template exactly (#78)
-
-## [v6.1.20260419] - Contextual Ambystoma - 2026-04-19 (hotfix)
-
-### Added
-- **living-docs skill**: new PO skill for generating C4 architecture diagrams (`docs/c4/context.md`, `docs/c4/container.md`) and maintaining the living glossary (`docs/glossary.md`) after each feature acceptance (Step 5) or on stakeholder demand
-- **docs/c4/**: new directory for C4 Level 1 (Context) and Level 2 (Container) Mermaid diagrams; placeholder `.gitkeep` added
-- **docs/glossary.md**: new living glossary file owned by `living-docs` skill (PO); terms sourced from completed feature files, `docs/discovery.md` Domain Model, and `docs/architecture.md` decisions
-- **Scientific research — documentation.md**: new file with 4 entries (#59–62): Ko et al. 2007 (information needs), Winters et al. 2020 (docs-as-code), Procida 2021 (Diátaxis framework), Allspaw 2012 (blameless post-mortems)
-- **Scientific research — domain-modeling.md**: 6 new DDD entries (#63–68): Evans DDD Reference CC-BY, Fowler UbiquitousLanguage bliki, Fowler BoundedContext bliki, Vernon IDDD, Verraes "UL is not a glossary", Evans Whirlpool process
-- **Scientific research — architecture.md**: 4 new entries (#55–58): Nygard ADRs, Kruchten 4+1 View Model, Brown C4 Model, Parnas information hiding
-
-### Changed
-- **discovery.md template**: `### Scope` section renamed to `### Context` — the section is a session-level general-context synthesis, not a complete project scope definition
-- **scope/SKILL.md**: updated `### Scope` references to `### Context` in Step C instructions and template block
-- **living-docs/SKILL.md**: glossary entry format updated — `**Context:**` renamed to `**Bounded context:**` (mandatory for multi-context projects); `Domain Event` added as a distinct Type value; secondary-artifact note added to preamble; source-traceability rule replaces "do not invent" rule; checklist updated accordingly
-- **implementation/SKILL.md**: Step 2 Read Phase now includes `docs/glossary.md` as item 2 — SE reads existing domain terms before naming classes, methods, and modules to avoid inventing synonyms
-- **create-skill/SKILL.md**: `living-docs` added to available skills table
-- **AGENTS.md**: skills table updated with `living-docs`; filesystem structure section updated (`docs/c4/`, `docs/glossary.md` added; `docs/architecture/` subtree removed; TODO.md reference updated)
-
-### Removed
-- **docs/architecture/**: folder deleted; the ADR log lives at `docs/architecture.md` (SE-owned); the old `adr-template.md` inside the folder was redundant
-- **docs/workflow.md**: deleted; canonical workflow reference is `AGENTS.md` and the skills under `.opencode/skills/`
-- **Dockerfile / docker-compose.yml**: removed as unused template artifacts
-
-## [v6.0.20260419] - Declarative Nautilus - 2026-04-19
-
-### Added
-- **PO Self-Declaration**: mandatory 11-claim checklist (INVEST I/V/S/T, observable Then, no impl details, entity coverage, distinct examples, unique IDs, pre-mortem, scope boundary) written into TODO.md at end of Stage 2 Step B before criteria commit; every DISAGREE is a hard blocker (#71)
-- **Reviewer Stance Declaration**: 5-claim block (adversarial mindset, manual trace, boundary check, semantic read, independence) added to verify/SKILL.md report template before APPROVED/REJECTED verdict; DISAGREE allowed with explanation, unexplained DISAGREE = REJECTED (#71)
-- **session-workflow**: Step 1 Stage 2 Criteria TODO format section with full Self-Declaration template and Rule 9 enforcing the declaration before criteria commit (#71)
-- **Three append-only project docs**: `docs/discovery_journal.md` (raw Q&A), `docs/discovery.md` (synthesis changelog), `docs/architecture.md` (architectural decisions) replace the old flat `docs/features/discovery.md` (#70)
-
-### Changed
-- **Discovery model** (breaking): Phase 1 / Phase 2 / Phase 3 / Phase 4 replaced by 2-stage model — Stage 1 Discovery (unified iterative sessions, PO + stakeholder) and Stage 2 Specification (PO alone, per BASELINED feature) (#70)
-- **Feature file moves** (breaking): PO is now the sole owner of all `.feature` file moves (backlog → in-progress and in-progress → completed); SE and reviewer explicitly prohibited from moving files with clear escalation protocol (#70)
-- **Session protocol**: discovery journal sessions use `Status: IN-PROGRESS` / `Status: COMPLETE` markers; real-time split rule (>2 concerns or >8 candidate Examples splits within the same session); journal writes only answered Q&A in groups (#70)
-- **Bug handling**: explicit protocol — PO adds `@bug @id` Example, SE writes both the `@id` test in `tests/features/` and a `@given` Hypothesis property test in `tests/unit/`; both required (#70)
-- **scope/SKILL.md**: full rewrite to 2-stage model with session start checklist, question order (general → cross-cutting → per-feature), after-questions steps, baselining section, and bug handling section (#70)
-- **feature-selection/SKILL.md**: updated "Phase 4 (Criteria)" reference to "Stage 2 Step B (Criteria)" (#70)
-- **All agent files and skills**: updated to reflect new document model, terminology, and chain of responsibility (#70, #71)
-
-## [v5.2.20260418] - Emergent Colugo - 2026-04-18 (hotfix)
-
-### Fixed
-- **Role naming**: Replaced stale `developer` agent-role references with `software-engineer` in `implementation/SKILL.md`, `docs/scientific-research/ai-agents.md`, `docs/scientific-research/cognitive-science.md`, and `docs/features/completed/display-version.feature`
-- **session-workflow**: Replaced hardcoded agent names in `## Next` line examples with `@<agent-name>` placeholders; added note pointing to `AGENTS.md` as source of truth; added missing Step 2 (Architecture) example
-
-## [v5.1.20260418] - Emergent Colugo - 2026-04-18
-
-### Added
-- **refactor skill**: Standalone skill with Fowler's full catalogue, green-bar rule, two-hats rule, SOLID/OC self-declaration table, and preparatory refactoring protocol — loaded on demand at REFACTOR phase
-- **feature-selection skill**: WSJF-based backlog prioritisation (Reinertsen 2009) with Kano value scoring and dependency gate — PO loads this when `TODO.md` is idle
-- **ADR template**: `docs/architecture/adr-template.md` for Step 2 architectural decisions
-- **Logo and banner**: visual identity added to README (SVG assets in `docs/images/`)
-
-### Changed
-- **Architecture stubs**: Step 2 now writes stubs directly into `<package>/` instead of an Architecture section in the feature file; stubs have no docstrings (add after GREEN when lint enforces them); folder structure is suggested, not prescribed — `ports/` and `adapters/` only created when a concrete external dependency is confirmed
-- **design-patterns skill**: Narrowed to pure GoF catalogue (23 patterns, smell-triggered before/after examples); SOLID, OC, LoD, CQS, Python Zen moved to refactor skill self-declaration checklist
-- **session-workflow**: `Next` line in TODO.md now requires `Run @<agent-name>` prefix so the human always knows which agent to invoke; idle state loads `skill feature-selection` instead of a vague prompt
-- **verify skill**: Added orphaned-stub check (skip-marked tests that were never implemented); report template now includes structured `Next Steps` block directing the human to the correct agent
-- **Scientific research**: `docs/academic_research.md` split into 9 domain files under `docs/scientific-research/` (cognitive-science, testing, architecture, oop-design, refactoring-empirical, requirements-elicitation, domain-modeling, software-economics, ai-agents)
-
-### Fixed
-- Stale `docs/architecture/STEP2-ARCH.md` reference removed from workflow diagram and skill
-- Protocol smell-check gate now marked N/A when no external dependencies are identified in scope
-
-## [v5.0.20260418] - Structured Phascolarctos - 2026-04-18
-
-### Added
-- **design-patterns skill**: Full GoF pattern catalogue with smell-triggered patterns, SOLID, Object Calisthenics, Python Zen, Law of Demeter, CQS, Tell Don't Ask — loaded on demand at Steps 2-3
-- **create-agent skill**: Research-backed agent creation guide with OpenAI/Anthropic best practices, ownership boundaries, tool surface design, and escalation rules
-- **software-engineer agent**: Dedicated agent file replacing `developer.md`; owns Steps 2-3 and release
-- **3-session discovery structure**: Phase 1 and Phase 2 now each use a 3-session template with template gates (§1/§2/§3 must be confirmed before proceeding); active listening protocol (3 levels) codified in scope skill
-
-### Changed
-- **5-step workflow** (breaking): Steps restructured — TDD loop merged into Step 3, Verify is Step 4, Accept is Step 5; all agents, skills, and docs updated to match
-- **Behavior groups terminology**: "Cluster" renamed to "behavior group" throughout scope skill, AGENTS.md, workflow.md, and templates for clearer AI focus
-- **Story candidates terminology**: Phase 3 now derives "story candidates" → `Rule:` blocks, removing ambiguity from the cluster-to-story mapping
-- **Test stub format** (breaking): Stubs now use `@pytest.mark.skip(reason="not yet implemented")` instead of `raise NotImplementedError`; skip marker is removed when implementing in RED phase
-- **Dropped `@pytest.mark.unit` and `@pytest.mark.integration`**: Only `@pytest.mark.slow` and `@pytest.mark.deprecated` remain; folder structure (`tests/features/` vs `tests/unit/`) encodes test type
-- **BASELINED gate enforced**: PO may not move a feature to `in-progress/` unless its discovery section has `Status: BASELINED`; enforced in product-owner.md and session-workflow
-- **tdd skill removed**: Replaced by implementation skill with inline TDD guidance
-- **gen_test_stubs.py removed**: Script deleted along with tdd skill
-
-### Fixed
-- **pyproject.toml**: Removed broken `gen-tests` task; removed `raise NotImplementedError` from coverage exclusions; removed `unit`/`integration` marker definitions
-- **Role naming**: `developer` → `software-engineer` across all files
-- **Step count**: All references to "6 steps" updated to "5 steps"
-
-## [v4.1.20260416] - Recursive Acinonyx - 2026-04-16
-
-### Added
-- **Single `.feature` file per feature**: Each feature is now one `.feature` file with `Rule:` blocks for user stories and `Example:` blocks for ACs — discovery content embedded in the feature description free text; replaces the folder-per-feature structure
-- **Rule-scoped test files**: `gen_test_stubs.py` rewritten to parse `Rule:` blocks; each Rule maps to one test file (`<rule-slug>_test.py`); function naming is now `test_<rule_slug>_<id_hex>()`
-- **Hypothesis-only `tests/unit/`**: Every test in `tests/unit/` must use `@given`; `@pytest.mark.slow` is mandatory on all Hypothesis tests; plain `assert` tests without `@given` are forbidden
-- **Mandatory `## Self-Declaration` in TODO.md**: Developer writes the 21-item checklist into a `## Self-Declaration (@id:<hex>)` block in `TODO.md` at `SELF-DECLARE` phase before requesting reviewer check (Rule 8 in session-workflow)
-
-### Changed
-- **`gen_test_stubs.py`**: Scans `docs/features/{backlog,in-progress,completed}/*.feature` directly (not subfolders); generates one test file per `Rule:` block
-- **`gen_todo.py`**: `find_in_progress_feature()` now finds `.feature` files directly in `in-progress/`; source path is `docs/features/in-progress/<name>.feature`
-- **`skills/tdd/SKILL.md`**: Test Tool Decision table updated to separate `tests/features/` (plain pytest, generated) from `tests/unit/` (Hypothesis only); `tests/unit/` rules section added
-- **`skills/implementation/SKILL.md`**: Unit test rule tightened — `@given` required, `@pytest.mark.slow` mandatory, plain tests forbidden
-- **`skills/verify/SKILL.md`**: Two new rows in section 4f: `@given` check and `@slow` check; two new rows in Standards Summary
-- **`skills/scope/SKILL.md`**: All four phases rewritten for file-based workflow; `discovery-template.md` converted to `.feature` file template
-- **`skills/session-workflow/SKILL.md`**: Step 4 TODO format updated with mandatory `## Self-Declaration` block template; Rule 8 added
-- **Completed feature migrated**: `docs/features/completed/display-version/` (three files) merged into `docs/features/completed/display-version.feature` (single file with two `Rule:` blocks)
-
-### Fixed
-- **OC-8 clarification**: The only valid fix for > 2 `self.x` is a new named class (Rule 3 or Rule 4); hardcoded constants, class-level variables, inlined literals, and parent-class moves are all invalid workarounds and remain FAIL
-
-## [v4.0.20260416] - Precise Tarsius - 2026-04-16
-
-### Added
-- **Per-test Design Self-Declaration**: After REFACTOR, developer fills a 20-item checklist (YAGNI → KISS → DRY → SOLID-S/O/L/I/D → OC rules 1–9) with `file:line` evidence before requesting reviewer check; reviewer independently audits claims using an 11-row comparison table (#58)
-- **Package Verification step**: Mandatory before writing any code — read `pyproject.toml → [tool.setuptools] packages`, confirm directory exists on disk; hard stop if missing (#58)
-- **SELF-DECLARE phase**: New phase added to the Red-Green-Refactor cycle between REFACTOR and REVIEWER; Cycle State now `RED | GREEN | REFACTOR | SELF-DECLARE | REVIEWER(code-design) | COMMITTED` (#58)
-- **template-config.yaml**: Declarative single source of truth for all setup-project substitutions — `defaults:` block with 6 parameters, `substitutions:` map with literal `old:` strings, `{variable}` `new:` patterns, and expected `count:` per file (#58)
-- **Post-mortem docs**: Two ping-pong-cli post-mortems documenting the systemic failures that drove this release (#58)
-
-### Changed
-- **verify/SKILL.md Scope Guard**: Reviewer receives completed Design Self-Declaration and independently verifies each claim; responds using structured 11-row comparison table (#58)
-- **verify/SKILL.md section 4g**: New row — `Imports use correct package name` (check imports match `[tool.setuptools] packages`); existing rows made more precise with `pyproject.toml` references (#58)
-- **reviewer.md per-test Step 4 section**: Rewritten to reference `skill implementation` verification table; clarifies no commands run during Step 4 reviews (#58)
-- **reviewer.md Zero-Tolerance Rule 1**: Scoped to `(Step 5 only — per-test Step 4 checks are code-design only, no commands)` (#58)
-- **setup-project.md**: Reads `template-config.yaml`; each apply step delegates to the config map rather than carrying implicit pattern knowledge (#58)
-- **Template app simplified**: `app/version.py` deleted; `app/__main__.py` reduced from 41 to 23 lines (#58)
-
-### Fixed
-- **gen_todo.py path**: `parents[5]` → `parents[4]` — was resolving one directory above the project root (#58)
-- **session-workflow Cycle State**: `SELF-DECLARE` phase added to documented phase list and Rule 6 (#58)
-- **code-quality/SKILL.md**: Removed "has been absorbed" migration language (#58)
-- **Dockerfile stale references**: `python_package_template.python_module_template` → `app` in HEALTHCHECK and CMD (#58)
-- **docker-compose.yml stale references**: `python_package_template` → `app` in volume mounts and command (#58)
-
-### Breaking Changes
-- `project_defaults.json` deleted — replaced by `template-config.yaml` (#58)
-- `app/version.py` and `tests/version_test.py` deleted — template app simplified to minimal `__main__.py` + one Hypothesis unit test (#58)
-
-## [v3.2.20260415] - Vigilant Mantis - 2026-04-15
-
-### Added
-- **Adversarial verification mandate**: Reviewer's default hypothesis is now "the code is broken despite green checks" — job is to find the failure mode, not confirm it works (#54)
-- **Production-grade gate**: New step 3 in verification — app must exit cleanly AND output must change when input changes; static output regardless of input = REJECTED (#54)
-- **UUID Drift bash check**: One-liner detects duplicate UUIDs across test functions; any duplicate = REJECTED with fix instructions (#54)
-- **docs/academic_research.md**: 15 cognitive and social science mechanisms with full citations grounding every workflow design decision (pre-mortem, implementation intentions, adversarial collaboration, elaborative encoding, and 11 more) (#54)
-- **Design pattern decision table**: Added to `developer.md` and `implementation/SKILL.md`; any detected anti-pattern = REJECTED (#54)
-- **Architecture contradiction check**: Developer must cross-check ADRs against ACs before writing production code (#54)
-- **PO pre-mortem**: Added at scope step and acceptance step (#54)
-- **Semantic alignment rule**: Tests must operate at same abstraction level as AC (#54)
-- **Integration test requirement**: Multi-component features require at least one integration test through the public entry point (#54)
-- **Verification Philosophy section**: Added to AGENTS.md — automated checks verify syntax-level correctness; human review verifies semantic-level correctness; both required (#54)
-
-### Changed
-- **Verification order**: Code review before automated commands; run app first as production-grade gate (#54)
-- **All review sections converted to tables**: Correctness, KISS, SOLID, ObjCal, Design Patterns, Tests, Versions/Build all have PASS/FAIL/Fix columns (#54)
-- **UUID Uniqueness rule**: If only Given varies it is a property — use Hypothesis `@given` + `@example`, not multiple test functions; if When/Then differs use `extend-criteria` (#54)
-- **Production-grade self-check in implementation**: Developer must verify output changes with input before handoff (#54)
-
-## [v3.1.20260414] - Tidal Capybara - 2026-04-14
-
-### Added
-- **extend-criteria skill**: New skill for any agent to add acceptance criteria discovered mid-flight or post-merge, with decision rule (gap within scope vs. new feature), per-role procedures, and commit protocol
-- **Source: field on acceptance criteria**: Mandatory traceability field on every criterion (`stakeholder | po | developer | reviewer | bug`) — records who originated the requirement
-
-### Changed
-- **Test function naming**: `test_<short_title>` replaces `test_<condition>_should_<outcome>`
-- **Test docstring first line**: UUID only (no trailing description) — `"""<uuid>\n\nGiven: ...`
-- **development commands**: All skill and agent files now use `uv run task` consistently (not bare `task`)
-- **tests/ layout**: Documented as flat (no unit/ or integration/ subdirectories)
-- **pytest.skip prohibition**: Aligned across files — allowed with written justification in the docstring
-- **Marker decision table**: Moved to tdd/SKILL.md only (developer's decision, not PO's)
-- **mv to in-progress**: Ownership reassigned to developer Step 2 (not PO scope step)
-- **TODO.md status markers**: Added `[~]` (in progress) and `[-]` (cancelled) to documented legend
-- **--doctest-modules**: Documented in implementation/SKILL.md (task test runs doctest modules)
-- **verify/SKILL.md**: Report template uses flat `tests/<file>:<function>` path format
-- **exit code wording**: `exit non-124` (was ambiguous `exit 0 or 124`) in developer.md
-- **README.md**: `uv sync --all-extras` and `uv run task` commands throughout
-
-### Fixed
-- Removed stale `docs/features/in-progress/auto-publish-docs.md`
-- Split compound acceptance criterion (two outcomes in one Then) into two single-outcome criteria
-- Added `@pytest.mark.slow` to Hypothesis tests in reference implementation
-- Added `# Given / # When / # Then` body comments to all reference tests
-- Removed duplicate assertion from `test_version_logs_correct_message`
-- Moved `StringIO` import from test body to module-level imports
-
-## [v3.0.20260414] - Drifting Axolotl - 2026-04-14
-
-### Breaking Changes
-- **Workflow redesigned**: 8-phase/6-role system replaced with 6-step/3-role (Product Owner, Developer, Reviewer)
-- **Roles removed**: architect, manager, repo-manager, requirements-gatherer, overseer agents deleted
-- **Feature directories restructured**: `docs/features/{business,architecture}/` replaced with flat `docs/features/{backlog,in-progress,completed}/`
-
-### Added
-- **product-owner agent**: Defines scope, acceptance criteria, picks features, accepts deliveries (Steps 1 + 6)
-- **reviewer agent**: Read+bash only, runs all commands, produces APPROVED/REJECTED report (Step 5)
-- **scope skill**: PO guide for writing user stories + UUID acceptance criteria
-- **verify skill**: Reviewer guide for running commands and code review checklist
-- **Unified docs site**: `docs/index.html` landing page linking to API Reference, Coverage, Test Results
-- **ghp-import**: One-liner `task doc-publish` replaces complex inline Python
-
-### Changed
-- **developer agent**: Owns all of Steps 2-4+6 including architecture, tests, code, and release
-- **9 skills rewritten**: session-workflow, tdd, implementation, code-quality, pr-management, git-release, create-skill (lean, <150 lines each)
-- **Test markers reduced**: from 11 (with duplicate) to 3: `unit`, `integration`, `slow`
-- **doc-build**: Now generates all three outputs (pdoc API + pytest-cov HTML + pytest-html)
-- **CI workflow**: Cleaned up to use `uv run task <name>` consistently
-- **setup-project agent**: No longer uses setup_project.py; agent applies changes directly
-
-### Removed
-- 11 skills deleted (architectural-analysis, delegation-coordination, epic-workflow, feature-definition, qa-enforcement, requirements-management, signature-design, workflow-coordination, prototype-script, create-agent, reference/)
-- `setup_project.py` script and `.opencode/templates/` directory
-- Wrong `dotenv` dependency (replaced nothing — was unused)
-- `mutmut` dev dependency (YAGNI)
-
-## [v2.2.20260413] - Luminous Kestrel - 2026-04-13
-
-### Added
-- **Architecture-First Feature System** - New directory structure separating business and architecture features
-- **Architectural Analysis Skill** - Systematic architecture documentation for each feature
-- **8-Phase Development Cycle** - Expanded from 7-phase with dedicated Architecture Analysis phase
-
-### Changed
-- **BDD → Acceptance Criteria** - Renamed gherkin-validation to acceptance-criteria-validation for accurate terminology
-- **Consistency Updates** - Fixed phase numbering, cross-references, and documentation across all agents and skills
-- **Epic-Workflow Refactor** - Converted from epic-based to feature-selection with architecture-first priority
-- **Manager Agent** - Enhanced with test signature creation capabilities
-
-### Migration Notes
-- No breaking changes in this release
-- Projects can continue using existing workflow
-
-## [v2.1.20260413] - Polished Gecko - 2026-04-13
-
-### Added
-- Docker simplification and cleanup
-- V2 Development Workflow with CI/CD fixes
-- Template refactoring for generic app package
-- Enhanced QA enforcement skills
-
-### Changed
-- Complexity fixes for CI compliance
-- CodeQL config conflict resolved
-
-## [v2.0.20260411] - Armored Pangolin - 2026-04-11
-
-### 🚀 MAJOR RELEASE - V1 → V2 Architecture Transition
-
-This represents a fundamental architectural shift from V1 (template validation workflows) to V2 (project development workflows).
-
-### Breaking Changes
-- **Workflow Architecture**: Complete transition from template validation (V1) to project development (V2)
-- **CI/CD Pipeline**: New comprehensive GitHub Actions workflow replacing template-specific workflows
-- **Branch Structure**: V2/init becomes the new development foundation
-- **Agent Configuration**: Updated agent roles and capabilities for project development
-
-### Security Improvements
-- Enhanced GitHub Actions workflow security with proper permissions blocks
-- Removed risky PIP_USER environment variable from CI/CD pipeline
-- Added secure error handling to shell scripts with 'set -euo pipefail'
-- Implemented job-level permissions for all CI workflow operations
-
-### Infrastructure & DevOps
-- Modernized Docker setup with security-first containerization approach
-- Comprehensive CI/CD pipeline with GitHub Actions integration
-- Improved workflow security following GitHub Advanced Security recommendations
-- Full project development workflow implementation
-
-### Development Experience
-- Complete project-focused development environment
-- Better error handling and security practices in automation
-- Enhanced development workflow with secure defaults
-- Improved CI/CD reliability and security posture
-
-### Migration Notes
-- **BREAKING**: This is a major version requiring migration from V1 template workflows
-- V1 template validation workflows are replaced by V2 project development workflows
-- Projects using V1 should plan migration to V2 architecture
-- All security improvements follow GitHub security best practices
-
-## [v1.7.20260410] - Vivid Cardinal - 2026-04-10
-
-### Added
-- **QA-gated Epic Workflow** - Complete epic-based development with mandatory quality checkpoints at each phase
-- **Epic-workflow Skill** - Manages epic-based development with automatic feature progression
-- **EPICS.md Template** - Epic tracking and management file for generated projects
-
-### Changed
-- Updated all agent descriptions to use industry-standard roles (Development Lead, Software Architect, QA Specialist, Business Analyst, Release Engineer)
-- Removed model specifications from all agents to make template model-agnostic
-- Updated AGENTS.md to properly document all 5 generated project agents and all skills
-- Updated README.md with new workflow and agent roles
-
-### Fixed
-- Documentation now accurately reflects what exists in template
-
-## [v1.6.20260409] - Guardian Owl - 2026-04-09
-
-### Added
-- **Overseer Agent** - Quality assurance agent that reviews work after each test implementation and requests changes if needed
-- **Requirements Gatherer Agent** - Agent that asks questions to understand project needs, updates documentation, creates detailed analysis for architect
-
-### Changed
-- Updated developer workflow to include `@overseer` calls after Phase 3 (TDD tests) and Phase 7 (Quality Assurance)
-- Updated AGENTS.md with new agents and updated workflow examples
-
-## [v1.0.0] - 2026-03-12
-
-### Added
-- **AI-Enhanced Development Workflow** - Complete OpenCode integration for AI-powered development
-- **Developer Agent** - Main development agent with 8-phase TDD workflow
-- **Architect Agent** - Design review agent for SOLID principles and object calisthenics compliance
-- **Repository Manager Agent** - Git operations, PRs, and themed releases management
-- **Development Skills** - feature-definition, prototype-script, tdd, signature-design, implementation, code-quality
-- **Repository Skills** - git-release (hybrid calver versioning with themed releases), pr-management
-- **Meta Skills** - create-skill, create-agent for extending OpenCode
-- **Template Management** - template-manager agent, template-test, template-release skills
-- **Comprehensive CI Workflow** - Template validation, generated project tests, Docker builds
-- **Validation Scripts** - cookiecutter.json, pyproject.toml, YAML frontmatter validation
-
-### Changed
-- Updated README.md with modern AI-focused branding
-- Updated generated project README template with AI development workflow
-
-### Features
-- **7-Phase Development Cycle**: Feature Definition → Prototype → TDD → Signature Design → Architecture Review → Implementation → Quality Assurance
-- **SOLID Principles Enforcement** - Single responsibility, dependency inversion, interface segregation
-- **Object Calisthenics** - No primitives, small classes, behavior-rich objects
-- **Hybrid Calver Versioning**: v1.2.20260302 format with themed releases
-- **Themed Release Names**: "Swift Cheetah", "Vigilant Owl", "Creative Fox" based on PR sentiment
-- **Property-Based Testing**: Hypothesis integration for robust test coverage
-
-### Migration Notes
-- This is the first semantic version release
-- No breaking changes to cookiecutter.json structure
-- Generated projects now include OpenCode agents and skills
-- Existing projects can regenerate to get new features
+- Initial release.
\ No newline at end of file
diff --git a/README.md b/README.md
index 3744d34..4884775 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 <div align="center">
 
-<img src="docs/assets/banner.svg" alt="Python Project Template" width="100%"/>
+<img src="docs/assets/banner.svg" alt="agents-smith" width="100%"/>
 
 <br/><br/>
 
@@ -9,156 +9,69 @@
 [![Stargazers][stars-shield]][stars-url]
 [![Issues][issues-shield]][issues-url]
 [![MIT License][license-shield]][license-url]
-[![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen?style=for-the-badge)](https://nullhack.github.io/smith/coverage/)
-[![CI](https://img.shields.io/github/actions/workflow/status/nullhack/smith/ci.yml?style=for-the-badge&label=CI)](https://github.com/nullhack/smith/actions/workflows/ci.yml)
+[![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen?style=for-the-badge)](https://nullhack.github.io/agents-smith/coverage/)
+[![CI](https://img.shields.io/github/actions/workflow/status/nullhack/agents-smith/ci.yml?style=for-the-badge&label=CI)](https://github.com/nullhack/agents-smith/actions/workflows/ci.yml)
 [![Python](https://img.shields.io/badge/python-3.13-blue?style=for-the-badge)](https://www.python.org/downloads/)
 
-**From zero to hero — production-ready Python, without the ceremony.**
+**AI-assisted software delivery system with flow-based agent orchestration.**
 
 </div>
 
 ---
 
-## Quick Start
+A delivery system that treats documentation as a first-class artifact and enforces production rigor through an AI-assisted workflow. Your team ships features, not broken promises.
 
-```bash
-git clone https://github.com/nullhack/smith
-cd smith
-curl -LsSf https://astral.sh/uv/install.sh | sh  # skip if uv installed
-uv sync --all-extras
-opencode && @setup-project                        # personalise for your project
-uv run task test && uv run task lint && uv run task static-check
-```
-
----
-
-## Why this template?
-
-Most Python templates give you a folder structure and a `Makefile`. This one gives you a **complete delivery system**: five AI agents, a structured five-step workflow, and quality gates that cannot be silenced by convention.
-
-The goal is to give every project — from its first commit — the same rigour that mature teams take years to establish.
-
-- **No feature starts without written acceptance criteria** — Gherkin `Example:` blocks traced to tests
-- **No feature ships without adversarial review** — the reviewer's default hypothesis is "broken"
-- **No guesswork on test stubs** — generated automatically from `.feature` files
-- **No manual `@id` tags** — assigned automatically when you run tests
-- **AI agents for every role** — each agent has scoped instructions and cannot exceed its authority
+Developers get TDD by default with traceability from requirement to test. Product Owners get living documentation that never drifts from code. Architects get adversarial review that catches what automated checks miss.
 
 ---
 
-## How it works
-
-### The delivery cycle
-
-```
-SCOPE → ARCH → TDD LOOP → VERIFY → ACCEPT
-```
-
-| Step | Role | Output |
-|------|------|--------|
-| **1 · SCOPE** | Product Owner | Discovery interviews + Gherkin stories + acceptance criteria |
-| **2 · ARCH** | Software Engineer | Module stubs, ADRs, auto-generated test stubs |
-| **3 · TDD LOOP** | Software Engineer | RED → GREEN → REFACTOR, one criterion at a time |
-| **4 · VERIFY** | Reviewer | Adversarial check — lint, types, coverage, semantic review |
-| **5 · ACCEPT** | Product Owner | Demo, validate, ship |
-
-**WIP limit: 1 feature at a time.** Features are `.feature` files that move through folders:
+## Quick start
 
+```bash
+git clone https://github.com/nullhack/agents-smith
+cd agents-smith
+curl -LsSf https://astral.sh/uv/install.sh | sh  # skip if uv is already installed
+uv sync --all-extras
+opencode && @setup-project                        # personalise for your project
+uv run task test && uv run task lint && uv run task static-check
 ```
-docs/features/backlog/      ← waiting
-docs/features/in-progress/  ← building (max 1)
-docs/features/completed/    ← shipped
-```
-
-### AI agents included
-
-| Agent | Responsibility |
-|-------|---------------|
-| `@product-owner` | Scope, stories, acceptance criteria, delivery acceptance |
-| `@software-engineer` | Architecture, TDD loop, git, releases |
-| `@reviewer` | Adversarial verification — default position: broken |
-| `@designer` | Visual identity, colour palette, SVG assets |
-| `@setup-project` | One-time project initialisation |
-
-### Quality tooling, pre-configured
-
-| Tool | Role |
-|------|------|
-| `uv` | Package & environment management |
-| `ruff` | Lint + format (Google docstrings) |
-| `pyright` | Static type checking — 0 errors |
-| `pytest` + `hypothesis` | Tests + property-based testing |
-| `pytest-beehave` | Auto-generates test stubs from `.feature` files |
-| `pytest-cov` | Coverage — 100% required |
-| `pdoc` | API docs → GitHub Pages |
-| `taskipy` | Task runner |
 
 ---
 
 ## Commands
 
+### Development
+
 ```bash
-uv run task test          # Full suite + coverage
-uv run task test-fast     # Fast, no coverage (use during TDD loop)
-uv run task lint          # ruff check + format
-uv run task static-check  # pyright
-uv run task run           # Run the app
+uv run task test          # full suite + coverage
+uv run task test-fast     # fast, no coverage (use during TDD loop)
+uv run task lint          # ruff format + check
+uv run task static-check  # pyright type checking
+uv run task run           # run the app
+uv run task doc-build     # build API docs + coverage report
 ```
 
----
-
-## Code standards
-
-| | |
-|---|---|
-| Coverage | 100% |
-| Type errors | 0 |
-| Function length | ≤ 20 lines |
-| Class length | ≤ 50 lines |
-| Max nesting | 2 levels |
-| Principles | YAGNI › KISS › DRY › SOLID › Object Calisthenics |
-
----
-
-## Test convention
+### Smith CLI
 
-Write acceptance criteria in Gherkin:
+`smith` connects your project to the agents-smith agentic workflow files. It manages the agentic file lifecycle — connect, update, and disconnect — so your project stays in sync without manual file copying.
 
-```gherkin
-@id:a3f2b1c4
-Example: User sees version on startup
-  Given the application starts
-  When no arguments are passed
-  Then the version string is printed to stdout
-```
-
-Run tests once — a traced, skipped stub appears automatically:
-
-```python
-@pytest.mark.skip(reason="not yet implemented")
-def test_display_version_a3f2b1c4() -> None:
-    """
-    Given the application starts
-    When no arguments are passed
-    Then the version string is printed to stdout
-    """
+```bash
+smith connect              # write agentic files from the default template source
+smith connect --from PATH  # write agentic files from a local path
+smith connect --from URL   # write agentic files from a remote tarball
+smith connect --overwrite  # overwrite existing agentic files
+smith update               # re-write agentic files from the connected source
+smith disconnect           # remove all agentic files and gitignore entries
+smith status               # show connection state and source
 ```
 
-Each test traces to exactly one acceptance criterion. No orphan tests. No untested criteria.
-
----
-
-## Branding
-
-When you run `@setup-project`, the agent collects your project's identity — name, tagline, mission, colour palette, and release naming convention — and writes `docs/branding.md`. All agents read this file. Release names, C4 diagram colours, and generated copy all reflect your project's identity without you touching `.opencode/`.
-
-Absent or blank fields fall back to defaults: adjective-animal release names, Mermaid default colours, no wording constraints.
-
 ---
 
-## Versioning
+## Documentation
 
-`v{major}.{minor}.{YYYYMMDD}` — each release gets a unique name derived from your branding convention. By default: an adjective paired with an animal (scientific name). Configure your own theme in `docs/branding.md`.
+- **[Product Definition](docs/product-definition.md)** — product boundaries, users, and scope
+- **[System Overview](docs/system.md)** — architecture, domain model, module structure, and constraints
+- **[Glossary](docs/glossary.md)** — living domain glossary
 
 ---
 
@@ -166,16 +79,16 @@ Absent or blank fields fall back to defaults: adjective-animal release names, Me
 
 MIT — see [LICENSE](LICENSE).
 
-**Author:** [@nullhack](https://github.com/nullhack) · [Documentation](https://nullhack.github.io/smith)
+**Author:** [@nullhack](https://github.com/nullhack) · [Documentation](https://nullhack.github.io/agents-smith)
 
 <!-- MARKDOWN LINKS -->
-[contributors-shield]: https://img.shields.io/github/contributors/nullhack/smith.svg?style=for-the-badge
-[contributors-url]: https://github.com/nullhack/smith/graphs/contributors
-[forks-shield]: https://img.shields.io/github/forks/nullhack/smith.svg?style=for-the-badge
-[forks-url]: https://github.com/nullhack/smith/network/members
-[stars-shield]: https://img.shields.io/github/stars/nullhack/smith.svg?style=for-the-badge
-[stars-url]: https://github.com/nullhack/smith/stargazers
-[issues-shield]: https://img.shields.io/github/issues/nullhack/smith.svg?style=for-the-badge
-[issues-url]: https://github.com/nullhack/smith/issues
+[contributors-shield]: https://img.shields.io/github/contributors/nullhack/agents-smith.svg?style=for-the-badge
+[contributors-url]: https://github.com/nullhack/agents-smith/graphs/contributors
+[forks-shield]: https://img.shields.io/github/forks/nullhack/agents-smith.svg?style=for-the-badge
+[forks-url]: https://github.com/nullhack/agents-smith/network/members
+[stars-shield]: https://img.shields.io/github/stars/nullhack/agents-smith.svg?style=for-the-badge
+[stars-url]: https://github.com/nullhack/agents-smith/stargazers
+[issues-shield]: https://img.shields.io/github/issues/nullhack/agents-smith.svg?style=for-the-badge
+[issues-url]: https://github.com/nullhack/agents-smith/issues
 [license-shield]: https://img.shields.io/badge/license-MIT-green?style=for-the-badge
-[license-url]: https://github.com/nullhack/smith/blob/main/LICENSE
+[license-url]: https://github.com/nullhack/agents-smith/blob/main/LICENSE
\ No newline at end of file
diff --git a/TODO.md b/TODO.md
deleted file mode 100644
index f8f8910..0000000
--- a/TODO.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Current Work
-
-No feature in progress.
-Next: Run @product-owner — select next BASELINED feature from backlog and move to in-progress for Step 2 (Architecture).
diff --git a/docs/c4/.gitkeep b/docs/adr/.gitkeep
similarity index 100%
rename from docs/c4/.gitkeep
rename to docs/adr/.gitkeep
diff --git a/docs/adr/ADR_20260501_argparse-cli-framework.md b/docs/adr/ADR_20260501_argparse-cli-framework.md
new file mode 100644
index 0000000..f83b83d
--- /dev/null
+++ b/docs/adr/ADR_20260501_argparse-cli-framework.md
@@ -0,0 +1,55 @@
+# ADR_20260501_argparse-cli-framework
+
+## Status
+
+Accepted
+
+## Context
+
+smith needs a CLI framework to support four subcommands (`connect`, `disconnect`, `update`, `status`) with options (`--from <path/url>`, `--overwrite`). The project has a hard constraint of zero runtime dependencies. The current codebase already uses argparse (stdlib) for `--help` and `--version`. The CLI framework choice is architecturally significant because it constrains the entire command dispatch structure and is hard to change later without rewriting all command handlers.
+
+**Feature:** smith-commands (all four CLI commands)
+
+Forces:
+- Zero runtime dependency constraint from `product_definition.md` and `system.md`
+- Four subcommands with options — argparse supports subparsers natively
+- The quality attribute ranking places Usability below Safety, Atomicity, and Clean Separation — a simpler CLI is acceptable if it meets the four-command requirement
+- The delivery mechanism is CLI-only — no HTTP, no TUI, no GUI
+
+## Interview
+
+| Question | Answer |
+|---|---|
+| Which CLI framework should smith use? | argparse (stdlib) |
+
+## Decision
+
+Use argparse as the CLI framework for all four subcommands.
+
+## Reason
+
+argparse is part of the Python stdlib, satisfies the zero-runtime-dependency constraint, and supports subparsers for multi-command CLIs. The four subcommands are well within argparse's capability — no complex nested commands, no shell completion, no rich terminal output required.
+
+## Alternatives Considered
+
+- **Click**: Mature, excellent for complex CLIs, but introduces a runtime dependency (`click`). Rejected because it violates the zero-dependency constraint.
+- **Typer**: Built on Click with type annotations, but also introduces a runtime dependency (`typer` + `click`). Rejected for the same reason.
+- **Docopt**: Declarative CLI from docstrings, but introduces a runtime dependency and has weaker subparser support. Rejected.
+- **Cleo**: Full-featured CLI framework used by Poetry, but introduces a runtime dependency and is over-engineered for four commands. Rejected.
+
+## Consequences
+
+- (+) Zero runtime dependencies maintained — `pip install agents-smith` works with no additional packages
+- (+) Consistent with existing codebase (`__main__.py` already uses argparse)
+- (+) Stdlib guarantee — argparse will always be available on any Python 3.13 installation
+- (-) argparse subparser API is more verbose than Click/Typer — more boilerplate per command
+- (-) No built-in shell completion, rich formatting, or progress bars — mitigated by keeping CLI output simple (text + exit codes)
+- (-) Switching to Click/Typer later would require rewriting all command handlers — mitigated by hexagonal architecture (command handlers are thin adapters; domain logic is independent)
+
+## Risk Assessment
+
+| Risk | Probability | Impact | Mitigation | Accepted? |
+|------|------------|--------|------------|-----------|
+| argparse subparser ergonomics lead to verbose command dispatch code | Medium | Low | Keep command handlers thin (dispatch to application use cases) | Yes |
+| Future CLI complexity exceeds argparse capability | Low | Medium | Hexagonal architecture isolates CLI framework — can swap without domain changes | Yes |
+| argparse `required` subparser behavior differs across Python versions | Low | Low | Python 3.13 is the only target version | Yes |
\ No newline at end of file
diff --git a/docs/adr/ADR_20260501_atomic-file-writes-via-temp-directory.md b/docs/adr/ADR_20260501_atomic-file-writes-via-temp-directory.md
new file mode 100644
index 0000000..bee5219
--- /dev/null
+++ b/docs/adr/ADR_20260501_atomic-file-writes-via-temp-directory.md
@@ -0,0 +1,57 @@
+# ADR_20260501_atomic-file-writes-via-temp-directory
+
+## Status
+
+Accepted
+
+## Context
+
+smith must guarantee atomicity for the `connect` and `update` commands: either all agentic files are written to the project directory or none are. Partial connections are explicitly forbidden by the Safety and Atomicity quality attributes. The file set includes AGENTS.md (single file), .opencode/ (directory tree), .templates/ (directory tree), and .flowr/ (directory tree). The mechanism for achieving atomicity is architecturally significant because it affects the entire write path, rollback strategy, and failure recovery.
+
+**Feature:** smith-commands (connect, update)
+
+Forces:
+- Atomicity quality attribute: "When smith connects, either all agentic files are written or none are" — Must priority
+- Safety quality attribute: "When smith connects to a project that already has agentic files, it refuses to overwrite without explicit `--overwrite` flag" — Must priority
+- Clean Separation quality attribute: "When smith disconnects, no agentic files remain" — Must priority
+- Zero runtime dependency constraint — no external transaction managers
+- The write set includes both single files and directory trees — the mechanism must handle both
+
+## Interview
+
+| Question | Answer |
+|---|---|
+| How should smith guarantee atomicity for file writes? | Temp-directory staging with atomic rename |
+
+## Decision
+
+Use temp-directory staging for all file writes: write all files to a temporary directory first, validate the complete set exists, then move files to their final locations. On any failure during the write phase, discard the temporary directory — no cleanup of partial writes needed because nothing was moved to the final location yet.
+
+## Reason
+
+Temp-directory staging is the simplest mechanism that satisfies the atomicity invariant without runtime dependencies. It uses only `tempfile.mkdtemp()` and `os.replace()` from the stdlib. The two-phase approach (stage → commit) means that failures during staging leave zero trace in the project directory, eliminating the need for complex rollback logic.
+
+## Alternatives Considered
+
+- **Transaction log with rollback**: Write a log of all operations before executing them; on failure, reverse the log. Rejected because it requires tracking individual file operations, makes rollback complex (reverse order, handle partial failures during rollback), and risks leaving the project in an inconsistent state if rollback itself fails.
+- **Shadow directory (write to `.smith-staging/`, then rename)**: Similar to temp-directory staging but uses a fixed directory name in the project root. Rejected because it pollutes the project directory with a staging directory that must be cleaned up even on success, and could conflict with an existing `.smith-staging/` directory.
+- **In-place writes with backup-and-restore**: Write files directly, keeping backups of any overwritten files. On failure, restore from backups. Rejected because it's the most complex approach — requires backup management, handles partial writes, and risks data loss if the restore fails.
+
+## Consequences
+
+- (+) Atomicity guarantee: either all files are written or none are — no partial connections possible
+- (+) Simple rollback: on failure, just discard the temp directory — no cleanup of partial writes
+- (+) Zero runtime dependencies — uses only stdlib (`tempfile`, `os.replace`, `shutil.move`)
+- (+) Clear failure mode: if staging fails, the project directory is untouched
+- (-) Disk space: staging requires temporary disk space for the full file set — mitigated by the fact that agentic files are typically small (a few KB for AGENTS.md, a few MB for .opencode/)
+- (-) Two-phase write adds latency: all files must be staged before any are committed — mitigated by the small file set size
+- (-) `os.replace()` is atomic on POSIX but not on Windows for cross-device moves — mitigated by ensuring temp directory is on the same filesystem as the project directory (use `dir=` parameter of `mkdtemp`)
+
+## Risk Assessment
+
+| Risk | Probability | Impact | Mitigation | Accepted? |
+|------|------------|--------|------------|-----------|
+| Cross-device temp directory causes non-atomic rename on Windows | Low | Medium | Use `dir=` parameter of `mkdtemp` to create temp directory in the project directory's filesystem | Yes |
+| Staging directory leaked if process is killed between stage and commit | Low | Low | Add a `.smith-staging` cleanup check at the start of `smith connect` and `smith status` | Yes |
+| Disk full during staging causes write failure | Low | Low | Pre-check available disk space before staging; report clear error message | Yes |
+| File permissions differ between temp and project directories | Low | Low | Explicitly set permissions after `os.replace()` using `os.chmod()` | Yes |
\ No newline at end of file
diff --git a/docs/adr/ADR_20260501_github-bundled-template-resolution.md b/docs/adr/ADR_20260501_github-bundled-template-resolution.md
new file mode 100644
index 0000000..1bda96b
--- /dev/null
+++ b/docs/adr/ADR_20260501_github-bundled-template-resolution.md
@@ -0,0 +1,64 @@
+# ADR_20260501_github-bundled-template-resolution
+
+## Status
+
+Superseded by ADR-007 (local bundled template resolution)
+
+## Context
+
+The `BundledTemplateSource` currently resolves template files from `smith/data/` — a directory of 85 hardcoded copies of the project's own `.opencode/`, `.flowr/`, `.templates/`, and `AGENTS.md` files. This approach has several problems:
+
+1. **Staleness:** The bundled files are copies that must be manually updated. They will drift from the actual agents-smith templates over time.
+2. **Coupling:** Every template update requires a new smith package release, even though smith is the consumer, not the owner, of these templates.
+3. **Size:** 85 files add unnecessary bulk to the package distribution.
+4. **Wrong ownership:** The agents-smith repository (specifically its `v8_release` branch) is the authoritative source for these files, not the smith package.
+
+The agents-smith PyPI package (v7.2.20260423) only contains `app/__init__.py` and `app/__main__.py` — it does not expose template data as package resources, so `importlib.resources` cannot be used to read templates from a agents-smith package.
+
+Forces:
+- Templates should always be current without requiring a new smith release
+- The default template source must work reliably (network or cache)
+- Network failure should not prevent `smith connect` if the cache is populated
+- The solution should be simple and maintainable
+
+## Interview
+
+| Question | Answer |
+|---|---|
+| Should we use stdlib `urllib.request` or `requests` for HTTP? | Use `requests` — cleaner API, better error handling, worth the dependency |
+| Should downloaded templates be cached locally? | Yes — cache in `~/.cache/smith/` to avoid re-downloading on every connect/update |
+| Should the default GitHub branch/tag be configurable? | No — default to `v8_release` for now; will change in future but not configurable today |
+| Should `smith/data/` be removed? | Yes — delete the entire directory; it contains stale copies |
+
+## Decision
+
+Resolve the bundled `agents-smith` template source by downloading the GitHub archive at runtime from `https://github.com/nullhack/agents-smith/archive/refs/heads/v8_release.tar.gz`, extracting it, and caching the files locally in `~/.cache/smith/agents-smith/`. Delete `smith/data/` entirely. Add `requests` as the only external runtime dependency.
+
+## Reason
+
+GitHub-based resolution ensures templates are always current without requiring a new smith package release. Local caching avoids redundant network requests. The `requests` library provides significantly better HTTP handling than `urllib.request` for this use case. Removing `smith/data/` eliminates stale copies and the maintenance burden of keeping them in sync.
+
+## Alternatives Considered
+
+- **importlib.resources with packaged templates (status quo):** Templates in `smith/data/` will go stale and require manual updates. The agents-smith PyPI package does not expose template data as resources. Rejected because of staleness and coupling.
+- **urllib.request for HTTP downloads:** The stdlib HTTP client lacks connection pooling, timeout defaults, and clean error handling that `requests` provides. Rejected because the API is harder to use correctly and test.
+- **Git submodule for agents-smith:** Adds complexity to the build process and still requires packaging template files. Rejected because it doesn't solve the staleness problem.
+- **No caching (re-download every time):** Wasteful network requests on every `smith connect`/`smith update`. Rejected because of performance and usability impact on repeated commands.
+
+## Consequences
+
+- (+) Templates are always current — no need for a new smith release when templates change
+- (+) `smith/data/` is removed — no stale copies, smaller package distribution
+- (+) Local cache enables offline use after first download
+- (-) `requests` is added as a runtime dependency — breaks the previous "zero runtime dependencies" constraint; mitigated by `requests` being the only external dependency
+- (-) First `smith connect` requires network access — mitigated by clear error message on failure and cache fallback for subsequent use
+- (-) Cache directory management adds implementation complexity — mitigated by using standard OS cache directories (`~/.cache/smith/`)
+
+## Risk Assessment
+
+| Risk | Probability | Impact | Mitigation | Accepted? |
+|------|------------|--------|------------|-----------|
+| GitHub is unavailable on first `smith connect` | Low | Medium | Clear error message with exit code 1; suggest retrying or using `--from <local-path>` | Yes |
+| Cache corruption | Low | Low | Delete cache directory and re-download; smith does not rely on cache integrity for safety | Yes |
+| `v8_release` branch is renamed or deleted | Low | High | Default URL is a module-level constant that can be updated in a patch release; future enhancement could make it configurable | Yes |
+| `requests` security vulnerability | Low | Medium | Pin minimum version in pyproject.toml; dependabot alerts for known CVEs | Yes |
\ No newline at end of file
diff --git a/docs/adr/ADR_20260501_hexagonal-architecture.md b/docs/adr/ADR_20260501_hexagonal-architecture.md
new file mode 100644
index 0000000..57df821
--- /dev/null
+++ b/docs/adr/ADR_20260501_hexagonal-architecture.md
@@ -0,0 +1,51 @@
+# ADR_20260501_hexagonal-architecture
+
+## Status
+
+Accepted
+
+## Context
+
+smith's domain logic enforces four critical invariants (atomicity, safety, clean separation, consistency) that must not be coupled to infrastructure details like filesystem operations, network access, or CLI argument parsing. The quality attributes rank Safety, Atomicity, and Clean Separation above Usability — the domain invariants are the core value, and the delivery mechanism (CLI) is a thin adapter. The project also needs to support multiple template source types (bundled, local path, remote URL) without changing domain logic.
+
+Forces:
+- Safety, Atomicity, and Clean Separation are Must-quality attributes that must be enforced in the domain layer
+- Multiple template source types (bundled, local path, URL) require different infrastructure implementations
+- Zero runtime dependency constraint means no framework can provide dependency injection
+- Testability is a Should-quality attribute — domain logic must be testable without filesystem or network access
+- The domain is small and cohesive (single bounded context, single aggregate)
+
+## Interview
+
+| Question | Answer |
+|---|---|
+| Which architectural style should smith use? | Hexagonal (Ports & Adapters) |
+
+## Decision
+
+Use Hexagonal Architecture (Ports & Adapters) with four layers: domain, application, infrastructure, delivery. Domain defines Protocol interfaces (ports); infrastructure implements them as adapters. The dependency arrow always points inward.
+
+## Reason
+
+Hexagonal architecture keeps the domain invariant enforcement independent of filesystem, network, and CLI concerns. The four quality attributes (Safety, Atomicity, Clean Separation, Consistency) are all enforced in the pure domain layer — no filesystem or network imports in domain code. Template source variations are handled by infrastructure adapters implementing a TemplateSourcePort interface, satisfying Modifiability without domain changes.
+
+## Alternatives Considered
+
+- **Layered architecture (traditional 3-tier):** Would work but doesn't enforce the strict dependency inversion needed. Domain could accidentally import infrastructure through shared layers. Rejected because it doesn't make the port/adapter boundary explicit.
+- **Microservices architecture:** Over-engineered for a single-bounded-context CLI tool. Rejected because there's no inter-service communication need.
+- **Event-driven architecture:** No asynchronous processing or event sourcing requirements. Rejected because smith's commands are synchronous request-response.
+
+## Consequences
+
+- (+) Domain invariants are testable in isolation via port mocks — no filesystem or network in unit tests
+- (+) New template source types added as infrastructure adapters without domain changes
+- (+) CLI is a thin delivery adapter — can be replaced without touching domain logic
+- (-) More files and indirection than a simple script — mitigated by the domain being small (single aggregate)
+- (-) Protocol interfaces must be maintained alongside implementations — mitigated by keeping ports minimal (4 ports)
+
+## Risk Assessment
+
+| Risk | Probability | Impact | Mitigation | Accepted? |
+|------|------------|--------|------------|-----------|
+| Over-engineering for a small CLI tool | Medium | Low | Domain is a single aggregate; the overhead is 4 port interfaces and 6 adapter classes — proportional to the problem | Yes |
+| Port interfaces drift from actual needs | Low | Medium | Write tests against ports first (TDD); ports evolve with domain needs | Yes |
\ No newline at end of file
diff --git a/docs/adr/ADR_20260501_local-bundled-template-resolution.md b/docs/adr/ADR_20260501_local-bundled-template-resolution.md
new file mode 100644
index 0000000..8b13426
--- /dev/null
+++ b/docs/adr/ADR_20260501_local-bundled-template-resolution.md
@@ -0,0 +1,66 @@
+# ADR_20260501_local-bundled-template-resolution
+
+## Status
+
+Accepted — supersedes ADR-006 (GitHub-based bundled template resolution)
+
+## Context
+
+ADR-006 replaced the local `smith/data/` bundle with GitHub-based runtime download + local cache. This introduced problems discovered during end-to-end testing:
+
+1. **Runtime network dependency:** `smith connect` without `--from` requires network access on first run, violating the principle that the default source should "just work"
+2. **Cache staleness:** A stale cache directory with incomplete content was served instead of re-downloading, producing incorrect results
+3. **Complexity:** Download, extraction, caching, and cache invalidation logic added significant implementation overhead for the default use case
+4. **Wrong default behavior:** The default template source should be the most reliable path, not one that depends on external infrastructure
+
+The agents-smith v8_release branch is the source of truth for agentic files, but smith should carry a local copy as part of its distribution rather than downloading at runtime.
+
+Forces:
+- The default `smith connect` experience should be instant and offline-capable
+- Template freshness is a release-time concern, not a runtime concern
+- `requests` is still needed for UrlTemplateSource (non-default source types)
+- The `smith/data/` directory must be kept in sync with agents-smith v8_release via a manual script
+
+## Interview
+
+| Question | Answer |
+|---|---|
+| Should bundled templates be packaged locally or downloaded at runtime? | Packaged locally — `smith connect` without `--from` must work offline |
+| Should `requests` still be a dependency? | Yes — UrlTemplateSource needs it for tar.gz/zip downloads |
+| How is `smith/data/` kept in sync with agents-smith? | Manual script (`scripts/update-bundle.sh`) that downloads and copies agentic files |
+| Should URL sources cache downloads? | No — re-download every time; no persistent cache for any source type |
+| What about the deprecated BDD examples a1b2c3d4 and e5f6g7h8? | Deprecate — they test network failure and cache fallback for bundled source, which no longer applies |
+
+## Decision
+
+Package agentic files in `smith/data/` and resolve them at runtime via `importlib.resources`. BundledTemplateSource reads from the package directory — no network calls, no caching. UrlTemplateSource (fully implemented) downloads tar.gz/zip archives via `requests`, extracts to a temp directory, applies the agentic filter, and returns FileSpec objects — no persistent cache. Delete the GitHub download and caching code from BundledTemplateSource. Add `scripts/update-bundle.sh` for manual sync from agents-smith v8_release.
+
+## Reason
+
+Local packaging ensures `smith connect` works offline and instantly for the default case. Runtime download complexity is unnecessary for the default source. Template freshness is maintained through release-time updates, not runtime downloads.
+
+## Alternatives Considered
+
+- **GitHub-based download + local cache (ADR-006, superseded):** Adds runtime network dependency, cache staleness risk, and implementation complexity. Rejected because the default experience should be instant and offline.
+- **No `requests` dependency at all:** Would prevent UrlTemplateSource from working. Rejected because URL source support is a required feature.
+- **Git submodule for agents-smith:** Adds build complexity and still requires packaging files. Rejected because it doesn't simplify the distribution.
+- **Persistent cache for URL sources:** Adds cache invalidation complexity with minimal benefit since URL sources are used infrequently. Rejected for simplicity.
+
+## Consequences
+
+- (+) `smith connect` without `--from` works instantly and offline
+- (+) No cache staleness or invalidation issues for the default source
+- (+) Simpler implementation — no download, extraction, or caching for BundledTemplateSource
+- (+) `requests` dependency is only used for UrlTemplateSource, not the default path
+- (-) `smith/data/` must be kept in sync with agents-smith v8_release via manual script
+- (-) Template updates require a new smith release (same as pre-ADR-006 behavior)
+- (-) `smith/data/` adds ~85 files to the package distribution
+
+## Risk Assessment
+
+| Risk | Probability | Impact | Mitigation | Accepted? |
+|------|------------|--------|------------|-----------|
+| `smith/data/` drifts from agents-smith v8_release | Medium | Low | `scripts/update-bundle.sh` syncs agentic files; CI could automate this in future | Yes |
+| Package size increases by ~85 files | Low | Low | Agentic files are small text files; total size is negligible | Yes |
+| UrlTemplateSource download fails | Medium | Medium | Clear error message with exit code 1; user can retry or use `--from <local-path>` | Yes |
+| `importlib.resources` path resolution differs across Python versions | Low | Low | Use `importlib.resources.files()` which is stable in Python 3.9+ | Yes |
\ No newline at end of file
diff --git a/docs/adr/ADR_20260501_no-smart-merge.md b/docs/adr/ADR_20260501_no-smart-merge.md
new file mode 100644
index 0000000..b4fcbf5
--- /dev/null
+++ b/docs/adr/ADR_20260501_no-smart-merge.md
@@ -0,0 +1,53 @@
+# ADR_20260501_no-smart-merge
+
+## Status
+
+Accepted
+
+## Context
+
+When `smith connect` encounters an existing `.flowr/` or `.templates/` directory in the target project, a decision must be made about how to handle the conflict. These directories may contain project-specific data (flows, templates) that the engineer has customised. The stakeholder deferred this decision to the architect.
+
+Forces:
+- Safety quality attribute: "Zero silent overwrites, ever"
+- Atomicity quality attribute: "No partial connections, ever"
+- Clean Separation quality attribute: "Zero orphaned files after disconnect"
+- `.flowr/` and `.templates/` may contain project-specific data that the engineer wants to preserve
+- Smart merge logic (comparing files, choosing which to keep) adds complexity and failure modes
+- smith disconnect must be able to cleanly remove everything smith wrote — merge makes this ambiguous
+
+## Interview
+
+| Question | Answer |
+|---|---|
+| How should smith handle existing .flowr/ and .templates/ when connecting? | Refuse without --overwrite; replace entirely with --overwrite (no merge) |
+
+## Decision
+
+Treat `.flowr/` and `.templates/` identically to all other agentic files: skip user-tracked files and auto-update smith-managed files. When `--overwrite` is passed, replace managed files entirely. No smart merge logic.
+
+## Reason
+
+This decision applies the YAGNI principle over DRY. Smart merge logic would violate Atomicity (partial connections where some files are merged and others are skipped), Safety (silent modification of existing content), and Clean Separation (disconnect wouldn't know which files were smith's vs pre-existing). The simple mental model — "smith writes its files; if they exist, use `--overwrite`" — is more usable than complex merge rules.
+
+## Alternatives Considered
+
+- **Smart merge (file-by-file comparison):** Compare each file and only write files that don't exist. Rejected because it violates Atomicity (partial state: some files merged, some skipped), Safety (silently modifying existing directory content), and Clean Separation (disconnect can't determine which files were smith's).
+- **Selective skip:** Skip `.flowr/` and `.templates/` if they exist, but write `AGENTS.md` and `.opencode/`. Rejected because it violates Atomicity (partial connection) and creates an inconsistent state where some agentic files are present but others are not.
+- **Interactive prompt:** Ask the user what to do for each conflicting directory. Rejected because it breaks the non-interactive CLI workflow and adds complexity for a marginal benefit.
+
+## Consequences
+
+- (+) Atomicity is preserved: all files or nothing
+- (+) Safety is preserved: no silent overwrites without `--overwrite`
+- (+) Clean Separation is preserved: disconnect removes everything smith wrote, unambiguously
+- (+) Simple mental model: "smith writes its files; if they exist, use `--overwrite`"
+- (+) YAGNI: no merge logic to maintain, test, or debug
+- (-) Engineers who want to combine project-specific flows/templates with smith's templates must manually manage that outside of smith — mitigated by the fact that `smith update --overwrite` replaces all files, making the workflow explicit
+
+## Risk Assessment
+
+| Risk | Probability | Impact | Mitigation | Accepted? |
+|------|------------|--------|------------|-----------|
+| Engineers lose project-specific .flowr/ or .templates/ data when using --overwrite | Medium | High | Warn before overwrite; suggest backing up the directory first. Future feature could add `--backup` flag | Yes |
+| Engineers want selective merge in future | Low | Low | Can be added as a future feature without architectural changes — the TemplateSourcePort and FileSystemPort interfaces support this | Yes |
\ No newline at end of file
diff --git a/docs/adr/ADR_20260501_smith-yaml-metadata.md b/docs/adr/ADR_20260501_smith-yaml-metadata.md
new file mode 100644
index 0000000..1247c3b
--- /dev/null
+++ b/docs/adr/ADR_20260501_smith-yaml-metadata.md
@@ -0,0 +1,55 @@
+# ADR_20260501_smith-yaml-metadata
+
+## Status
+
+Superseded — The stakeholder decided smith should be stateless. Connection state is now inferred from the `# smith managed` section in `.gitignore`, with source metadata stored in the section header (e.g., `# smith managed source:agents-smith`). No separate `.smith.yaml` file is created. This decision supersedes ADR-004's original recommendation of a dedicated metadata file.
+
+## Context
+
+smith needs to persist connection state between commands. `smith status` must report which template source was used and when the connection was established. `smith update` must know which template source to refresh from. `smith disconnect` must know what to remove. This state must survive process termination — it cannot be in-memory only.
+
+Forces:
+- The connection state must be queryable by `smith status` without re-deriving it
+- `smith update` must know the original template source (default agents-smith, or `--from <path/url>`)
+- The project directory is the only reliable persistence location (smith has no config directory)
+- Zero runtime dependency constraint means no PyYAML or other parsing libraries
+- The metadata file is created on connect and removed on disconnect, following the same lifecycle as the agentic files
+
+## Interview
+
+| Question | Answer |
+|---|---|
+| How should smith persist connection state? | Simple YAML file in the project root (.smith.yaml) |
+
+## Decision
+
+Use a `.smith.yaml` file in the project root to persist connection state. The file contains `template_source` and `connected_at` fields in simple `key: value` format. The file is created on `smith connect`, read by `smith status` and `smith update`, and removed on `smith disconnect`.
+
+## Reason
+
+A simple key-value YAML file in the project root is the most discoverable and debuggable persistence mechanism. It's human-readable, version-controllable, and can be parsed without PyYAML by using a simple line-splitting approach. The file follows the same lifecycle as the agentic files (created on connect, removed on disconnect).
+
+## Alternatives Considered
+
+- **SQLite database in .smith/:** Over-engineered for two fields. Rejected because it adds binary state and requires a runtime dependency for proper SQLite handling.
+- **JSON file (.smith.json):** Valid alternative, but JSON is less human-readable for simple key-value data and doesn't support comments. Rejected in favor of YAML's comment support for explaining fields.
+- **No persistence (derive state from file presence):** Fragile — cannot distinguish between "connected with default agents-smith" and "files happened to be there." Also cannot determine the original `--from` source for `smith update`. Rejected because it violates the consistency invariant.
+- **Git configuration (git config):** Only works in git repositories. smith must work in non-git directories. Rejected.
+
+## Consequences
+
+- (+) Connection state is human-readable and debuggable
+- (+) `.smith.yaml` follows the same lifecycle as the agentic files — created on connect, removed on disconnect
+- (+) Simple format parseable without PyYAML — maintains zero runtime dependency constraint
+- (+) `smith status` can report template source and connection time without re-deriving
+- (-) `.smith.yaml` is visible in the project directory — mitigated by adding it to the managed .gitignore section
+- (-) If a user manually edits `.smith.yaml`, state could become inconsistent — mitigated by documenting that `.smith.yaml` is managed by smith and should not be edited manually
+- (-) Simple YAML format cannot represent complex nested structures — mitigated by YAGNI; only two fields are needed
+
+## Risk Assessment
+
+| Risk | Probability | Impact | Mitigation | Accepted? |
+|------|------------|--------|------------|-----------|
+| User manually edits .smith.yaml causing inconsistent state | Low | Medium | Document in .smith.yaml comments that it is managed by smith; detect corruption on status/update commands | Yes |
+| .smith.yaml conflicts with other tools using same filename | Low | Low | The `.smith.` prefix is specific to this tool; collision is unlikely | Yes |
+| Simple YAML parser cannot handle edge cases | Low | Low | Only two fields with string values; no complex types needed | Yes |
\ No newline at end of file
diff --git a/docs/architecture.md b/docs/architecture.md
deleted file mode 100644
index 2edabcd..0000000
--- a/docs/architecture.md
+++ /dev/null
@@ -1,19 +0,0 @@
-# Architecture: <project-name>
-
----
-
-## YYYY-MM-DD — <feature-stem>: <short title>
-
-Decision: <what was decided — one sentence>
-Reason: <why — one sentence>
-Alternatives considered: <what was rejected and why>
-Feature: <feature-stem>
-
----
-
-## YYYY-MM-DD — Cross-feature: <short title>
-
-Decision: <what was decided>
-Reason: <why>
-Alternatives considered: <what was rejected and why>
-Affected features: <feature-stem>, <feature-stem>
diff --git a/docs/discovery.md b/docs/discovery.md
deleted file mode 100644
index 58b7151..0000000
--- a/docs/discovery.md
+++ /dev/null
@@ -1,39 +0,0 @@
-# Discovery: smith
-
----
-
-## Session: 2026-04-20
-
-### Context
-`smith` is a CLI tool for Python developers who want a production-grade project setup without manual scaffolding. It solves the problem of bootstrapping and maintaining a consistent project structure aligned with the nullhack/python-project-template. Users are developers starting new projects or upgrading existing ones. Success means a new project runs immediately after creation, and an existing project gains template tooling without losing any existing content. Failure means data loss in an existing project or a broken new project. Out of scope: running projects, version-tracking template updates, multi-project management, PyPI publishing, and IDE integration.
-
-### Feature List
-- `smith-init` — creates a new Python project using `uv init` then layers template add-ons with interactive metadata prompts
-- `smith-assimilate` — applies template structure/tooling to an existing project with per-file conflict resolution and dry-run mode
-
-### Domain Model
-| Type | Name | Description | In Scope |
-|------|------|-------------|----------|
-| Noun | Project | A Python project directory being created or upgraded | Yes |
-| Noun | Template | The nullhack/python-project-template add-ons (`.opencode/`, CI, folder structure, `AGENTS.md`) | Yes |
-| Noun | Metadata | User-provided values (name, author, GitHub username) substituted into template placeholders | Yes |
-| Noun | ConflictResolution | Per-file user decision when a template file already exists: skip, overwrite, or diff | Yes |
-| Noun | DryRun | A preview mode that shows planned changes without writing any files | Yes |
-| Verb | init | Create a new project via `uv init` then apply template add-ons | Yes |
-| Verb | assimilate | Apply template add-ons to an existing project | Yes |
-| Verb | merge | Add missing `pyproject.toml` entries without overwriting existing ones | Yes |
-| Verb | prompt | Ask the user for metadata or conflict resolution decisions interactively | Yes |
-
----
-
-## Session: 2026-04-20 (Session 2)
-
-### Feature List
-- `smith-new` — renamed from `smith-init`; command confirmed as `smith new <name> [path]`; template source confirmed as uv GitHub dependency pinned by rev
-- `smith-assimilate` — path argument confirmed (`smith assimilate [path]`, defaults to cwd); both features baselined
-
-### Domain Model
-| Type | Name | Description | In Scope |
-|------|------|-------------|----------|
-| Noun | TemplateDependency | The nullhack/python-project-template installed as a uv GitHub dep, rev-pinned in pyproject.toml | Yes |
-| Verb | new | Create a new project via `uv init` then apply template add-ons | Yes |
diff --git a/docs/discovery_journal.md b/docs/discovery_journal.md
deleted file mode 100644
index aa2edfd..0000000
--- a/docs/discovery_journal.md
+++ /dev/null
@@ -1,67 +0,0 @@
-# Discovery Journal: smith
-
----
-
-## 2026-04-20 — Session 1
-Status: IN-PROGRESS
-
-### General
-
-| ID | Question | Answer |
-|----|----------|--------|
-| Q1 | Who are the users? | Python developers starting or improving their Python projects |
-| Q2 | What does the product do at a high level? | CLI tool with two commands: `smith init <name>` creates a new project using `uv init` then layers template add-ons on top; `smith assimilate` applies template structure/tooling to an existing project |
-| Q3 | Why does it exist — what problem does it solve? | Solves the problem of having a proper production-grade project setup and being able to update/upgrade it later |
-| Q4 | When and where is it used? | CLI tool used locally; two modes: create new project, or apply template to existing project |
-| Q5 | Success — what does "done" look like? | New project runs straightaway after creation; existing project is upgraded without affecting existing content |
-| Q6 | Failure — what must never happen? | Destroys anything in an existing project; spawns a project that doesn't work as intended |
-| Q7 | Out-of-scope — what are we explicitly not building? | Running the project (smith only sets up), version-tracking template updates, managing multiple projects, publishing to PyPI, IDE/editor integration |
-
-### Cross-cutting: Commands
-
-| ID | Question | Answer |
-|----|----------|--------|
-| Q8 | What is the second command word? | `assimilate` — Matrix-branded, fits the Agent Smith aesthetic |
-| Q9 | Are these two separate commands or one command with a flag? | Two separate commands: `smith init <name>` and `smith assimilate` |
-| Q10 | What does `smith init` actually do? | Runs `uv init` then layers template add-ons on top — NOT a git clone of the template |
-| Q11 | For `smith assimilate` — what does it touch? | `.opencode/` folder (skills, agents, prompts), `pyproject.toml` additions (merge/add missing, don't overwrite existing), CI files (`.github/workflows/`), folder structure (create `docs/`, `tests/` if missing), `AGENTS.md` |
-
-### Cross-cutting: Safety
-
-| ID | Question | Answer |
-|----|----------|--------|
-| Q12 | What happens if `smith init` is run in a directory that already exists? | Prompt per conflicting file: skip / overwrite / show diff |
-| Q13 | What happens if `smith assimilate` is run twice on the same project? | Safe to run again — always prompts on conflicts; idempotent by design |
-| Q14 | Is there a dry-run or preview mode? | Yes — show what would change before writing anything |
-
-### Cross-cutting: Configuration
-
-| ID | Question | Answer |
-|----|----------|--------|
-| Q15 | Does the user provide project metadata during creation? | Yes — interactive prompts during creation (name, author, GitHub username, etc.) that substitute placeholders in template files |
-
-### Out-of-scope
-
-| ID | Question | Answer |
-|----|----------|--------|
-| Q17 | Is smith responsible for running the project after setup? | No — smith only creates/upgrades; running is out of scope |
-| Q18 | Should smith update an already-applied template to a newer version? | No — version-tracking template updates is out of scope |
-| Q19 | Should smith manage multiple projects? | No — one project at a time; multi-project management is out of scope |
-
-Status: COMPLETE
-
----
-
-## 2026-04-20 — Session 2
-Status: IN-PROGRESS
-
-### Refinements and Baseline Approval
-
-| ID | Question | Answer |
-|----|----------|--------|
-| Q20 | What is the final command name for project creation? | `smith new` (changed from `smith init`) — command is `smith new <name> [path]` |
-| Q21 | How is the template distributed? | As a uv GitHub dependency pinned by commit `rev` in `pyproject.toml`; no runtime download — smith reads template files from the installed package |
-| Q22 | Does `smith assimilate` accept a path argument? | Yes — `smith assimilate [path]`, defaults to cwd if no path given |
-| Q23 | Has the stakeholder approved both features for baselining? | Yes — both `smith-new` and `smith-assimilate` are approved for baselining as of 2026-04-20 |
-
-Status: COMPLETE
diff --git a/docs/features/backlog/.gitkeep b/docs/features/.gitkeep
similarity index 100%
rename from docs/features/backlog/.gitkeep
rename to docs/features/.gitkeep
diff --git a/docs/features/backlog/smith-assimilate.feature b/docs/features/backlog/smith-assimilate.feature
deleted file mode 100644
index b7a9cd3..0000000
--- a/docs/features/backlog/smith-assimilate.feature
+++ /dev/null
@@ -1,117 +0,0 @@
-Feature: smith assimilate
-
-  Applies the nullhack/python-project-template structure and tooling to an existing
-  Python project. The template is bundled as a uv GitHub dependency (pinned by commit
-  rev) — no runtime download occurs. Touches: `.opencode/` (skills, agents, prompts),
-  `pyproject.toml` (merges missing entries only, never overwrites existing),
-  `.github/workflows/` CI files, `docs/` and `tests/` folders (created if missing), and
-  `AGENTS.md`. Supports a `--dry-run` flag that shows all planned changes without writing
-  any files. Conflict resolution is per-file (skip / overwrite / diff). Safe to run
-  multiple times — always prompts on conflicts.
-
-  Status: BASELINED (2026-04-20)
-
-  Rules (Business):
-  - Applies template add-ons to an existing project without destroying existing content
-  - `pyproject.toml` entries are merged: missing entries are added, existing entries are never overwritten
-  - Folder structure additions (`docs/`, `tests/`) are created only if missing
-  - Conflict resolution is per-file: user chooses skip, overwrite, or view diff
-  - `--dry-run` flag shows all planned changes without writing any files
-  - Operation is idempotent: running `smith assimilate` again is safe and always prompts on conflicts
-  - Template source: nullhack/python-project-template installed as uv GitHub dependency (rev-pinned)
-
-  Constraints:
-  - Entry point: `smith assimilate [path]` CLI command; defaults to cwd if no path given
-  - Must never delete or overwrite files without explicit user confirmation
-  - Template source: nullhack/python-project-template installed as uv GitHub dependency
-
-  Rule: Template application
-    As a Python developer
-    I want to run `smith assimilate` on an existing project
-    So that I get the template tooling without recreating the project from scratch
-
-    @id:a1b2c3d4
-    Example: Template files are added to an existing project
-      Given an existing Python project at the target path that lacks `.opencode/` and `AGENTS.md`
-      When the developer runs `smith assimilate` and confirms all prompts
-      Then `.opencode/`, `AGENTS.md`, `.github/workflows/`, `docs/`, and `tests/` are present in the project
-
-    @id:e5f6a7b8
-    Example: Existing project files are not deleted
-      Given an existing Python project with files not part of the template
-      When the developer runs `smith assimilate` and confirms all prompts
-      Then all pre-existing project files remain present and unmodified
-
-  Rule: Safe pyproject.toml merge
-    As a Python developer
-    I want missing pyproject.toml entries added without touching existing ones
-    So that my existing configuration is preserved
-
-    @id:9c0d1e2f
-    Example: Missing pyproject.toml entries are added
-      Given an existing `pyproject.toml` that lacks template-required entries
-      When the developer runs `smith assimilate`
-      Then the missing entries are added to `pyproject.toml`
-
-    @id:3f4a5b6c
-    Example: Existing pyproject.toml entries are never overwritten
-      Given an existing `pyproject.toml` with a `[project.name]` entry set to "my-existing-name"
-      When the developer runs `smith assimilate`
-      Then `[project.name]` remains "my-existing-name" after assimilation
-
-  Rule: Dry-run preview
-    As a Python developer
-    I want to preview all planned changes before they are written
-    So that I can decide whether to proceed without risk of accidental overwrites
-
-    @id:7d8e9f0a
-    Example: Dry-run shows planned changes without writing files
-      Given an existing project that would receive template add-ons
-      When the developer runs `smith assimilate --dry-run`
-      Then a list of all files that would be added or modified is displayed and no files are written
-
-    @id:b1c2d3e4
-    Example: Dry-run on an up-to-date project reports no changes
-      Given an existing project that already has all template add-ons applied
-      When the developer runs `smith assimilate --dry-run`
-      Then smith reports that no changes would be made
-
-  Rule: Per-file conflict resolution
-    As a Python developer
-    I want to choose skip, overwrite, or diff for each conflicting file
-    So that I have full control over what gets changed in my existing project
-
-    @id:f5a6b7c8
-    Example: Conflicting file triggers a per-file prompt
-      Given a template file already exists in the target project with different content
-      When the developer runs `smith assimilate`
-      Then smith prompts the user for that file with options: skip, overwrite, diff
-
-    @id:d9e0f1a2
-    Example: Choosing skip leaves the existing file unchanged
-      Given a conflict prompt is shown for an existing file during assimilation
-      When the developer chooses "skip"
-      Then the existing file is left unchanged and smith continues to the next file
-
-    @id:b3c4d5e6
-    Example: Choosing overwrite replaces the existing file with the template version
-      Given a conflict prompt is shown for an existing file during assimilation
-      When the developer chooses "overwrite"
-      Then the existing file is replaced with the template version
-
-    @id:f7a8b9c0
-    Example: Choosing diff shows a unified diff before re-prompting
-      Given a conflict prompt is shown for an existing file during assimilation
-      When the developer chooses "diff"
-      Then a unified diff of the existing file vs the template version is displayed and the prompt is shown again
-
-  Rule: Idempotent operation
-    As a Python developer
-    I want to run `smith assimilate` multiple times safely
-    So that re-running it never silently overwrites my work
-
-    @id:d1e2f3a4
-    Example: Re-running assimilate on an already-assimilated project prompts on conflicts
-      Given a project that has already had `smith assimilate` applied
-      When the developer runs `smith assimilate` again
-      Then smith prompts for any conflicting files and makes no changes without explicit confirmation
diff --git a/docs/features/backlog/smith-commands.feature b/docs/features/backlog/smith-commands.feature
new file mode 100644
index 0000000..e64f4fe
--- /dev/null
+++ b/docs/features/backlog/smith-commands.feature
@@ -0,0 +1,297 @@
+Feature: smith-commands
+
+  smith connects standardised agent configurations (AGENTS.md, .opencode/,
+  .templates/, .flowr/) to any project directory and disconnects cleanly when
+  done. Four commands — connect, disconnect, update, status — validate the
+  full connect/work/disconnect cycle end-to-end.
+
+  Status: BASELINED (2026-05-01)
+
+  Rules (Business):
+  - Connection state is inferred from the `# smith managed` section in .gitignore, not from a metadata file
+  - All agentic files are written atomically: either all are written or none are
+  - .templates/ and .flowr/ follow the same atomic rules as AGENTS.md and .opencode/
+  - Existing files gitignored by `# smith managed` are auto-updated; files NOT in that section are skipped
+  - Disconnect removes only gitignored managed files; user-tracked files are preserved
+  - `# smith managed` section is kept on disconnect (guard for future usage)
+  - Connect on already-connected project auto-updates; update on not-connected project auto-connects
+
+  Constraints:
+  - Safety: zero silent overwrites, ever (product_definition.md #1)
+  - Atomicity: no partial connections, ever (product_definition.md #2)
+  - Clean separation: zero orphaned files after disconnect (product_definition.md #3)
+  - Usability: smith connect must complete in under 1 minute (product_definition.md #4)
+
+  ## Frozen Examples Rule
+
+  After a feature is BASELINED, all `Example:` blocks are immutable. Changes require
+  `@deprecated` on the old Example (preserving the original @id) and a new Example
+  with a new @id. This prevents scope creep and maintains traceability.
+
+  ## Pre-Mortem Findings
+
+  | Rule | Failure Mode | Mitigation Example |
+  |------|-------------|-------------------|
+  | 1 — Connect | .gitignore does not exist → smith cannot add managed section | c928a845: creates .gitignore |
+  | 1 — Connect | .gitignore exists without smith section → append fails silently | 86c8e268: appends section to existing file |
+  | 2 — Skip | User adds .gitignore entries outside `# smith managed` for smith files → smith treats them as user-tracked | df0455a5: smith-managed file is auto-updated on reconnect |
+  | 3 — Disconnect | User manually deleted a managed file before disconnect | b755bfae: partial disconnect is idempotent |
+  | 4 — Update | Template source has changed since last connect (files added/removed) | 9a01f4e2: update reflects current template state |
+
+  ## Questions
+
+  | ID | Question | Status | Answer / Assumption |
+  |----|----------|--------|---------------------|
+  | Q1 | Should `smith status --json` include the template source URL in the output? | Assumed | Yes — status should report whatever connection metadata is available |
+  | Q2 | What happens if .gitignore is read-only? | Assumed | Exit 1 with IO error (standard filesystem error, not a smith-specific exit code) |
+  | Q3 | What happens if a managed file is a broken symlink? | Assumed | Treat as present (it exists on disk as a symlink); smith does not resolve symlinks |
+| Q4 | What happens if the bundled template download fails on first connect? | Assumed | N/A — bundled source reads from local package data, no network required |
+  | Q5 | What happens if the bundled template download fails but cache exists? | Assumed | N/A — bundled source reads from local package data, no caching |
+
+  ## Changes
+
+  | Session | Q-IDs | Change |
+  |---------|-------|--------|
+  | 2026-05-01 SN | — | Created: initial BDD specification for smith-commands |
+| 2026-05-01 SN | IN_20260501_agents-smith-dependency-resolution | Added: bundled template network failure and cache fallback examples (a1b2c3d4, e5f6g7h8); added Q4 and Q5 about network failure behavior |
+| 2026-05-01 SN | IN_20260501_local-bundle-reversal | Deprecated: a1b2c3d4 and e5f6g7h8 (bundled source no longer uses network); Added: URL source download failure examples (a2b3c4d5, e4f5g6h7); Updated Q4/Q5 to reflect local bundle |
+
+  Rule: Connect to a fresh project
+    As an engineer
+    I want to run smith connect in a fresh project directory
+    So that I can immediately start using standard AI agent workflows
+
+    @id:c928a845
+    Example: Connect with default template source
+      Given a project directory with no agentic files and no `# smith managed` section in .gitignore
+      When the engineer runs `smith connect`
+      Then all agentic files (AGENTS.md, .opencode/, .templates/, .flowr/) are written to the project directory
+      And a `# smith managed` section is added to .gitignore with entries for all agentic file patterns
+
+    @id:86c8e268
+    Example: Connect with a local path template source
+      Given a project directory with no agentic files
+      When the engineer runs `smith connect --from ./my-templates`
+      Then agentic files are written from the local path template source to the project directory
+      And a `# smith managed` section is added to .gitignore
+
+    @id:577156bb
+    Example: Connect with a URL template source
+      Given a project directory with no agentic files
+      When the engineer runs `smith connect --from https://example.com/templates.tar.gz`
+      Then agentic files are downloaded from the URL and written to the project directory
+      And a `# smith managed` section is added to .gitignore
+
+    @id:4fdd38a4
+    Example: Connect with a remote URL template source
+      Given a project directory with no agentic files
+      When the engineer runs `smith connect --from https://example.com/templates/my-template.zip`
+      Then agentic files are downloaded from the remote URL and written to the project directory
+      And a `# smith managed` section is added to .gitignore
+
+    @id:f79d40f4
+    Example: Template source not found
+      Given a project directory with no agentic files
+      When the engineer runs `smith connect --from /nonexistent/path`
+      Then smith exits with code 1
+      And an error message indicates the template source could not be found
+
+    @id:a1b2c3d4 @deprecated(reason="bundled source no longer uses network; see a2b3c4d5 for URL failure")
+    Example: Bundled template source network failure
+      Given a project directory with no agentic files and no cached templates
+      When the engineer runs `smith connect` and the GitHub archive download fails
+      Then smith exits with code 1
+      And an error message indicates the bundled template source could not be downloaded
+
+    @id:e5f6g7h8 @deprecated(reason="bundled source no longer caches; see e4f5g6h7 for URL failure")
+    Example: Bundled template source uses cache when network unavailable
+      Given a project directory with no agentic files and cached templates from a previous connect
+      When the engineer runs `smith connect` and the GitHub archive download fails
+      Then smith uses the cached templates and connects successfully
+      And smith exits with code 0
+
+    @id:a2b3c4d5 Should
+    Example: URL template source download failure
+      Given a project directory with no agentic files
+      When the engineer runs `smith connect --from https://example.com/templates.tar.gz` and the download fails
+      Then smith exits with code 1
+      And an error message indicates the URL template source could not be downloaded
+
+    @id:e4f5g6h7 Should
+    Example: URL template source invalid archive
+      Given a project directory with no agentic files
+      When the engineer runs `smith connect --from https://example.com/templates.tar.gz` and the downloaded archive is invalid
+      Then smith exits with code 1
+      And an error message indicates the archive could not be extracted
+
+    @id:060390bf
+    Example: Connect creates .gitignore when it does not exist
+      Given a project directory with no agentic files and no .gitignore file
+      When the engineer runs `smith connect`
+      Then a new .gitignore file is created containing the `# smith managed` section with entries for all agentic file patterns
+
+    @id:e8245392
+    Example: Connect appends section to existing .gitignore
+      Given a project directory with no agentic files and an existing .gitignore without a `# smith managed` section
+      When the engineer runs `smith connect`
+      Then the `# smith managed` section is appended to the existing .gitignore
+      And existing .gitignore content is preserved
+
+    @id:fc22c286
+    Example: Pair-atomic write rollback on failure
+      Given a project directory with no agentic files
+      When smith fails to write .opencode/ after writing AGENTS.md
+      Then AGENTS.md is removed (rolled back)
+      And no agentic files remain in the project directory
+
+   Rule: Auto-update on connected projects, skip user-tracked files on fresh projects
+     As an engineer
+     I want smith to auto-update managed files when the project is already connected
+     And to skip user-tracked files when connecting to a fresh project
+     So that my existing work is never silently overwritten
+
+     @id:df0455a5 Must
+     Example: Existing smith-managed file is auto-updated on reconnect
+       Given a project directory where .opencode/ exists and is listed in the `# smith managed` section of .gitignore
+       When the engineer runs `smith connect`
+       Then .opencode/ is updated with the template version (auto-update)
+       And all other agentic files are written
+       And smith exits with code 0
+
+    @id:21c05bbb Must
+    Example: Existing user-tracked file is skipped
+      Given a project directory where AGENTS.md exists but is NOT in the `# smith managed` section of .gitignore (the user tracks it manually)
+      When the engineer runs `smith connect`
+      Then AGENTS.md is not overwritten
+      And the remaining agentic files (.opencode/, .templates/, .flowr/) are written
+      And a `# smith managed` section is added to .gitignore
+
+    @id:2a5f83d0 Must
+    Example: Overwrite flag replaces all managed files
+      Given a project directory where .opencode/ exists and is listed in the `# smith managed` section of .gitignore
+      When the engineer runs `smith connect --overwrite`
+      Then .opencode/ is replaced with the template version
+      And all agentic files are written
+      And files not in the `# smith managed` section are not touched
+
+    @id:3e206149 Must
+    Example: Connect on already-connected project auto-updates
+      Given a project directory with all agentic files present and a `# smith managed` section in .gitignore
+      When the engineer runs `smith connect`
+      Then smith behaves as `smith update` — all managed agentic files are overwritten with the template versions
+      And smith exits with code 0
+
+    @id:7d22e1d6 Should
+    Example: Overwrite with user-tracked files preserved
+      Given a project directory where AGENTS.md is NOT in `# smith managed` (user-tracked) and .opencode/ IS in `# smith managed`
+      When the engineer runs `smith connect --overwrite`
+      Then .opencode/ is replaced with the template version
+      And AGENTS.md is not touched (it is not in the smith-managed section)
+
+  Rule: Disconnect from a project
+    As an engineer
+    I want to run smith disconnect so that all smith-managed files are removed from my project
+    So that I can cleanly separate smith from my project without leaving orphaned files
+
+    @id:cd5ba959 Must
+    Example: Disconnect a fully connected project
+      Given a project directory with all agentic files present and a `# smith managed` section in .gitignore
+      When the engineer runs `smith disconnect`
+      Then all agentic files that are gitignored by `# smith managed` are removed from the project directory
+      And the `# smith managed` section is preserved in .gitignore
+      And files not gitignored by `# smith managed` are not removed
+
+    @id:9411ceb4 Must
+    Example: Disconnect a not-connected project is a no-op
+      Given a project directory with no agentic files and no `# smith managed` section in .gitignore
+      When the engineer runs `smith disconnect`
+      Then smith exits with code 0
+      And no files are modified
+
+    @id:b755bfae Should
+    Example: Disconnect a partially connected project removes present gitignored files
+      Given a project directory where .opencode/ exists and is gitignored by `# smith managed` but .flowr/ is missing
+      When the engineer runs `smith disconnect`
+      Then .opencode/ is removed
+      And no error is raised for the missing .flowr/
+      And the `# smith managed` section is preserved in .gitignore
+
+    @id:8f2a9018 Must
+    Example: User-tracked agentic file is preserved on disconnect
+      Given a project directory where AGENTS.md is NOT gitignored by `# smith managed` (user tracks it) but .opencode/ IS gitignored by `# smith managed`
+      When the engineer runs `smith disconnect`
+      Then .opencode/ is removed
+      And AGENTS.md is not removed (it is not in the smith-managed section)
+      And the `# smith managed` section is preserved in .gitignore
+
+  Rule: Update agentic files
+    As an engineer
+    I want to run smith update so that my connected project gets the latest template files
+    So that I can stay current with template changes without reconnecting
+
+    @id:e4d06612 Must
+    Example: Update a connected project
+      Given a project directory with all agentic files present and a `# smith managed` section in .gitignore
+      When the engineer runs `smith update`
+      Then all agentic files that are in the `# smith managed` section are overwritten with the latest template versions
+      And files not managed by smith are not touched
+      And smith exits with code 0
+
+    @id:d348166e Should
+    Example: Update with a new template source
+      Given a project directory with all agentic files present and a `# smith managed` section in .gitignore
+      When the engineer runs `smith update --from ./new-templates`
+      Then all managed agentic files are overwritten with files from the new template source
+      And smith exits with code 0
+
+    @id:9a01f4e2 Must
+    Example: Update on a not-connected project auto-connects
+      Given a project directory with no agentic files and no `# smith managed` section in .gitignore
+      When the engineer runs `smith update`
+      Then smith behaves as `smith connect` — all agentic files are written and a `# smith managed` section is added to .gitignore
+      And smith exits with code 0
+
+    @id:7af2f4d1 Must
+    Example: Update source not found
+      Given a connected project directory
+      When the engineer runs `smith update --from /nonexistent/path`
+      Then smith exits with code 1
+      And an error message indicates the template source could not be found
+
+  Rule: Check connection status
+    As an engineer
+    I want to run smith status so that I know whether my project is connected and which agentic files are present
+    So that I can take appropriate action
+
+    @id:447e3cbf Must
+    Example: Connected project status
+      Given a project directory with all agentic files present and a `# smith managed` section in .gitignore
+      When the engineer runs `smith status`
+      Then smith reports "Connected" with a list of present agentic files
+
+    @id:3f364b1d Must
+    Example: Partially connected project status
+      Given a project directory where .opencode/ and AGENTS.md exist but .templates/ and .flowr/ are missing
+      When the engineer runs `smith status`
+      Then smith reports "Partial" with a list of present and missing agentic files
+      And suggests `smith connect --overwrite` or `smith disconnect`
+
+    @id:76e27d0a Must
+    Example: Disconnected project status
+      Given a project directory with no agentic files but a `# smith managed` section in .gitignore
+      When the engineer runs `smith status`
+      Then smith reports "Disconnected"
+      And suggests `smith connect` to reconnect
+
+    @id:94ebcd86 Must
+    Example: Not connected project status
+      Given a project directory with no agentic files and no `# smith managed` section in .gitignore
+      When the engineer runs `smith status`
+      Then smith reports "Not connected"
+      And suggests `smith connect` to get started
+
+    @id:10843402 Should
+    Example: Status with JSON output
+      Given a connected project directory
+      When the engineer runs `smith status --json`
+      Then smith outputs machine-readable JSON with connection status, present files list, and template source
\ No newline at end of file
diff --git a/docs/features/backlog/smith-new.feature b/docs/features/backlog/smith-new.feature
deleted file mode 100644
index 863ee98..0000000
--- a/docs/features/backlog/smith-new.feature
+++ /dev/null
@@ -1,110 +0,0 @@
-Feature: smith new
-
-  Creates a new Python project by running `uv init <name>` and then layering
-  nullhack/python-project-template add-ons on top. The template is bundled as a uv
-  GitHub dependency (pinned by commit rev) — no runtime download occurs. The user is
-  prompted interactively for project metadata (name, author, GitHub username, email,
-  description), which are substituted into template placeholders. The resulting project
-  is immediately runnable. If the target directory already exists, conflicts are resolved
-  per-file via prompt (skip / overwrite / diff).
-
-  Status: BASELINED (2026-04-20)
-
-  Rules (Business):
-  - Project is created using `uv init` as the foundation, not by cloning the template repo
-  - Template add-ons are read from the installed nullhack/python-project-template package (uv GitHub dep, rev-pinned)
-  - User is prompted interactively for: project name, author, GitHub username, email, description
-  - Metadata placeholders in template files are substituted with user-provided values
-  - If the target directory already exists, conflicts are resolved per-file: skip / overwrite / diff
-  - The resulting project must be immediately runnable after creation
-
-  Constraints:
-  - Entry point: `smith new <name> [path]` CLI command
-  - Requires `uv` to be available on the system PATH
-  - Template source: nullhack/python-project-template installed as uv GitHub dependency
-
-  Rule: Project scaffolding
-    As a Python developer
-    I want to run `smith new <name>` to create a new project
-    So that I get a production-ready project structure without manual setup
-
-    @id:c1a2b3d4
-    Example: New project directory is created with uv init structure
-      Given no directory named "myproject" exists at the target path
-      When the developer runs `smith new myproject`
-      Then a directory "myproject" is created containing a uv-initialized project structure
-
-    @id:e5f6a7b8
-    Example: Template add-ons are present in the new project
-      Given no directory named "myproject" exists at the target path
-      When the developer runs `smith new myproject`
-      Then the new project contains `.opencode/`, `AGENTS.md`, `.github/workflows/`, `docs/`, and `tests/`
-
-    @id:9c0d1e2f
-    Example: Missing uv on PATH produces a clear error
-      Given `uv` is not available on the system PATH
-      When the developer runs `smith new myproject`
-      Then smith exits with a non-zero code and an error message indicating uv is required
-
-  Rule: Metadata substitution
-    As a Python developer
-    I want to provide my project metadata interactively
-    So that template placeholders are replaced with my actual project details
-
-    @id:3f4a5b6c
-    Example: User is prompted for all required metadata fields
-      Given the developer runs `smith new myproject`
-      When smith reaches the metadata collection step
-      Then smith prompts for: project name, author name, GitHub username, email, and description
-
-    @id:7d8e9f0a
-    Example: Placeholders in template files are replaced with provided metadata
-      Given the developer provides name "myproject", author "Alice", GitHub username "alice", email "alice@example.com", description "My project"
-      When smith applies the template add-ons
-      Then all placeholder tokens in template files are replaced with the corresponding provided values
-
-    @id:b1c2d3e4
-    Example: Empty required metadata field is rejected
-      Given the developer leaves a required metadata field blank
-      When smith processes the metadata input
-      Then smith re-prompts for the blank field with a message indicating it is required
-
-  Rule: Conflict resolution on existing directory
-    As a Python developer
-    I want to be prompted per-file when the target directory already exists
-    So that I can choose to skip, overwrite, or diff each conflicting file without losing existing work
-
-    @id:f5a6b7c8
-    Example: Existing directory triggers per-file conflict prompt
-      Given a directory "myproject" already exists at the target path with existing files
-      When the developer runs `smith new myproject`
-      Then smith prompts the user for each conflicting file with options: skip, overwrite, diff
-
-    @id:d9e0f1a2
-    Example: Choosing skip leaves the existing file unchanged
-      Given a conflict prompt is shown for an existing file
-      When the developer chooses "skip"
-      Then the existing file is left unchanged and smith continues to the next file
-
-    @id:b3c4d5e6
-    Example: Choosing overwrite replaces the existing file with the template version
-      Given a conflict prompt is shown for an existing file
-      When the developer chooses "overwrite"
-      Then the existing file is replaced with the template version
-
-    @id:f7a8b9c0
-    Example: Choosing diff shows a unified diff before re-prompting
-      Given a conflict prompt is shown for an existing file
-      When the developer chooses "diff"
-      Then a unified diff of the existing file vs the template version is displayed and the prompt is shown again
-
-  Rule: Runnable result
-    As a Python developer
-    I want the created project to be immediately runnable
-    So that I can start working without additional setup steps
-
-    @id:d1e2f3a4
-    Example: New project passes its own test suite immediately after creation
-      Given the developer runs `smith new myproject` and provides all metadata
-      When the developer runs `uv run task test-fast` inside the new project directory
-      Then all tests pass with no configuration required
diff --git a/docs/features/completed/display-version.feature b/docs/features/completed/display-version.feature
deleted file mode 100644
index 0dfc3dd..0000000
--- a/docs/features/completed/display-version.feature
+++ /dev/null
@@ -1,60 +0,0 @@
-Feature: Display version
-
-  Reads the application version from pyproject.toml at runtime and logs it at INFO
-  level. Log output is controlled by a verbosity parameter; the version is visible
-  at DEBUG and INFO but suppressed at WARNING and above. An invalid verbosity value
-  raises a descriptive error.
-
-  Status: COMPLETED
-
-  Rules (Business):
-  - Version is read from pyproject.toml at runtime using tomllib
-  - Log verbosity is controlled by a ValidVerbosity parameter passed to main()
-  - Valid verbosity levels are: DEBUG, INFO, WARNING, ERROR, CRITICAL
-  - An invalid verbosity value raises a ValueError with the invalid value and valid options
-  - The version string is logged at INFO level; visible at DEBUG and INFO, not at WARNING+
-
-  Constraints:
-  - No hardcoded __version__ constant — pyproject.toml is the single source of truth
-  - Entry point: app/__main__.py (main(verbosity) function)
-  - Version logic: app/version.py (version() function)
-
-  Rule: Version retrieval
-    As a software-engineer
-    I want to retrieve the application version programmatically
-    So that I can display or log it at runtime
-
-    @id:3f2a1b4c
-    Example: Version string is read from pyproject.toml
-      Given pyproject.toml exists with a version field
-      When version() is called
-      Then the returned string matches the version in pyproject.toml
-
-    @id:7a8b9c0d
-    Example: Version call emits an INFO log message
-      Given pyproject.toml exists with a version field
-      When version() is called
-      Then an INFO log message in the format "Version: <version>" is emitted
-
-  Rule: Verbosity control
-    As a software-engineer
-    I want to control log verbosity via a parameter
-    So that I can tune output for different environments
-
-    @id:a1b2c3d4
-    Example: Version appears in logs at DEBUG and INFO verbosity
-      Given a verbosity level of DEBUG or INFO is passed to main()
-      When main() is called
-      Then the version string appears in the log output
-
-    @id:b2c3d4e5
-    Example: Version is absent from logs at WARNING and above
-      Given a verbosity level of WARNING, ERROR, or CRITICAL is passed to main()
-      When main() is called
-      Then the version string does not appear in the log output
-
-    @id:e5f6a7b8
-    Example: Invalid verbosity raises a descriptive error
-      Given an invalid verbosity string is passed to main()
-      When main() is called
-      Then a ValueError is raised with the invalid value and valid options listed
diff --git a/docs/index.html b/docs/index.html
index 741957e..6b426cf 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -3,66 +3,486 @@
 <head>
   <meta charset="UTF-8">
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <title>Project Documentation</title>
+  <title>agents-smith — Documentation</title>
   <style>
     *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
+
+    :root {
+      --parchment:    #faf7f2;
+      --parchment-mid:#ede8e0;
+      --stone:        #f0ece4;
+      --stone-mid:    #c8c0b8;
+      --stone-dark:   #a09890;
+      --brown:        #5c3d1e;
+      --brown-deep:   #3b2410;
+      --gold:         #c9a84c;
+      --gold-light:   #e8c96a;
+      --aegean:       #7baabf;
+      --aegean-deep:  #4a7a96;
+    }
+
+    html { scroll-behavior: smooth; }
+
     body {
-      font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
-      background: #0d1117;
-      color: #e6edf3;
+      font-family: Georgia, "Times New Roman", serif;
+      background: var(--parchment);
+      color: var(--brown-deep);
       min-height: 100vh;
+    }
+
+    /* ── Gold border bands ── */
+    body::before, body::after {
+      content: "";
+      display: block;
+      height: 5px;
+      background: linear-gradient(90deg, var(--gold) 0%, var(--gold-light) 50%, var(--gold) 100%);
+    }
+
+    /* ── Header ── */
+    header {
+      display: flex;
+      align-items: center;
+      gap: 1.5rem;
+      padding: 2.5rem 3rem 1.5rem;
+      border-bottom: 1px solid var(--stone-mid);
+      max-width: 960px;
+      margin: 0 auto;
+    }
+    .logo-wrap svg { width: 64px; height: 64px; }
+    .header-text h1 {
+      font-size: 1.8rem;
+      font-weight: normal;
+      letter-spacing: 0.12em;
+      color: var(--brown-deep);
+      text-transform: uppercase;
+    }
+    .header-text h1 span {
+      color: var(--gold);
+    }
+    .header-text p {
+      font-size: 0.9rem;
+      color: var(--stone-dark);
+      font-style: italic;
+      margin-top: 0.2rem;
+    }
+
+    /* ── Main layout ── */
+    main {
+      max-width: 960px;
+      margin: 0 auto;
+      padding: 2.5rem 3rem 4rem;
+    }
+
+    /* ── Section titles ── */
+    .section-title {
+      font-size: 0.7rem;
+      font-weight: normal;
+      letter-spacing: 0.2em;
+      text-transform: uppercase;
+      color: var(--stone-dark);
+      margin-bottom: 1rem;
+      padding-bottom: 0.4rem;
+      border-bottom: 1px solid var(--gold);
       display: flex;
-      flex-direction: column;
       align-items: center;
-      justify-content: center;
-      padding: 2rem;
+      gap: 0.5rem;
+    }
+    .section-title::before {
+      content: "";
+      display: inline-block;
+      width: 8px; height: 8px;
+      background: var(--gold);
+      clip-path: polygon(50% 0%, 100% 100%, 0% 100%);
+      flex-shrink: 0;
     }
-    h1 { font-size: 2rem; margin-bottom: 0.5rem; }
-    .subtitle { color: #8b949e; margin-bottom: 3rem; font-size: 1rem; }
-    .grid {
+
+    /* ── Generated docs grid ── */
+    .docs-grid {
       display: grid;
-      grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
-      gap: 1.5rem;
-      width: 100%;
-      max-width: 720px;
+      grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
+      gap: 1.25rem;
+      margin-bottom: 3rem;
     }
-    a.card {
+    a.doc-card {
       display: block;
-      background: #161b22;
-      border: 1px solid #30363d;
-      border-radius: 10px;
-      padding: 2rem 1.5rem;
+      background: var(--stone);
+      border: 1px solid var(--stone-mid);
+      border-top: 3px solid var(--gold);
+      border-radius: 4px;
+      padding: 1.5rem 1.25rem;
+      text-decoration: none;
+      color: inherit;
+      transition: border-top-color 0.15s, box-shadow 0.15s;
+    }
+    a.doc-card:hover {
+      border-top-color: var(--aegean);
+      box-shadow: 0 4px 12px rgba(59,36,16,0.08);
+    }
+    .doc-card-label {
+      font-size: 0.65rem;
+      letter-spacing: 0.18em;
+      text-transform: uppercase;
+      color: var(--stone-dark);
+      margin-bottom: 0.4rem;
+    }
+    .doc-card-title {
+      font-size: 1.05rem;
+      color: var(--brown-deep);
+      margin-bottom: 0.35rem;
+    }
+    .doc-card-desc {
+      font-size: 0.82rem;
+      color: var(--stone-dark);
+      font-style: italic;
+      line-height: 1.5;
+    }
+
+    /* ── Features section ── */
+    .features-section { margin-bottom: 3rem; }
+    .features-tabs {
+      display: flex;
+      gap: 0;
+      margin-bottom: 1.25rem;
+      border-bottom: 2px solid var(--stone-mid);
+    }
+    .tab-btn {
+      background: none;
+      border: none;
+      padding: 0.6rem 1.2rem;
+      font-family: inherit;
+      font-size: 0.8rem;
+      letter-spacing: 0.1em;
+      text-transform: uppercase;
+      cursor: pointer;
+      color: var(--stone-dark);
+      border-bottom: 2px solid transparent;
+      margin-bottom: -2px;
+      transition: color 0.15s, border-color 0.15s;
+    }
+    .tab-btn:hover { color: var(--brown); }
+    .tab-btn.active { color: var(--brown-deep); border-bottom-color: var(--gold); }
+    .tab-pane { display: none; }
+    .tab-pane.active { display: block; }
+
+    .feature-list { list-style: none; }
+    .feature-item {
+      display: flex;
+      align-items: baseline;
+      gap: 0.75rem;
+      padding: 0.6rem 0;
+      border-bottom: 1px solid var(--parchment-mid);
+      font-size: 0.9rem;
+    }
+    .feature-item:last-child { border-bottom: none; }
+    .feature-stem {
+      font-family: "Courier New", monospace;
+      font-size: 0.8rem;
+      color: var(--aegean-deep);
+      background: var(--stone);
+      padding: 0.1rem 0.4rem;
+      border-radius: 3px;
+      flex-shrink: 0;
+    }
+    .feature-title { color: var(--brown-deep); flex: 1; }
+    .feature-status {
+      font-size: 0.7rem;
+      letter-spacing: 0.1em;
+      text-transform: uppercase;
+      padding: 0.15rem 0.5rem;
+      border-radius: 3px;
+      flex-shrink: 0;
+    }
+    .status-completed { background: #e8f5e9; color: #2e7d32; }
+    .status-in-progress { background: #fff3e0; color: #e65100; }
+    .status-backlog { background: var(--stone); color: var(--stone-dark); }
+    .empty-note {
+      font-size: 0.85rem;
+      color: var(--stone-dark);
+      font-style: italic;
+      padding: 0.8rem 0;
+    }
+
+    /* ── ADR list ── */
+    .adr-list { list-style: none; margin-bottom: 3rem; }
+    .adr-item {
+      display: flex;
+      align-items: baseline;
+      gap: 0.75rem;
+      padding: 0.55rem 0;
+      border-bottom: 1px solid var(--parchment-mid);
+      font-size: 0.88rem;
+    }
+    .adr-item:last-child { border-bottom: none; }
+    .adr-date {
+      font-family: "Courier New", monospace;
+      font-size: 0.78rem;
+      color: var(--stone-dark);
+      flex-shrink: 0;
+    }
+    .adr-slug { color: var(--brown-deep); flex: 1; }
+    .adr-status {
+      font-size: 0.68rem;
+      letter-spacing: 0.1em;
+      text-transform: uppercase;
+      color: var(--gold);
+      flex-shrink: 0;
+    }
+
+    /* ── Research list ── */
+    .research-grid {
+      display: grid;
+      grid-template-columns: repeat(auto-fill, minmax(260px, 1fr));
+      gap: 0.75rem;
+      margin-bottom: 3rem;
+    }
+    .research-item {
+      background: var(--stone);
+      border: 1px solid var(--stone-mid);
+      border-radius: 3px;
+      padding: 0.75rem 1rem;
+      font-size: 0.82rem;
+    }
+    .research-item-title { color: var(--brown); margin-bottom: 0.2rem; font-style: italic; }
+    .research-item-meta { color: var(--stone-dark); font-size: 0.75rem; }
+
+    a.research-item {
       text-decoration: none;
       color: inherit;
-      transition: border-color 0.2s, transform 0.15s;
+      transition: border-color 0.15s, box-shadow 0.15s;
+    }
+    a.research-item:hover {
+      border-color: var(--aegean);
+      box-shadow: 0 2px 8px rgba(59,36,16,0.07);
+    }
+    a.feature-stem {
+      text-decoration: none;
+      color: var(--aegean-deep);
+      background: var(--stone);
+      padding: 0.1rem 0.4rem;
+      border-radius: 3px;
+      font-family: "Courier New", monospace;
+      font-size: 0.8rem;
+      flex-shrink: 0;
+      transition: background 0.15s;
+    }
+    a.feature-stem:hover { background: var(--stone-mid); }
+
+    /* ── Footer ── */
+    footer {
+      border-top: 1px solid var(--stone-mid);
+      padding: 1.5rem 3rem;
+      max-width: 960px;
+      margin: 0 auto;
+      display: flex;
+      justify-content: space-between;
+      align-items: center;
+      font-size: 0.75rem;
+      color: var(--stone-dark);
+    }
+    footer a { color: var(--aegean-deep); text-decoration: none; }
+    footer a:hover { text-decoration: underline; }
+
+    @media (max-width: 600px) {
+      header, main, footer { padding-left: 1.25rem; padding-right: 1.25rem; }
+      .workflow-steps { grid-template-columns: 1fr 1fr; }
+      .docs-grid { grid-template-columns: 1fr; }
     }
-    a.card:hover { border-color: #58a6ff; transform: translateY(-2px); }
-    .card-icon { font-size: 2rem; margin-bottom: 0.75rem; }
-    .card-title { font-size: 1.1rem; font-weight: 600; margin-bottom: 0.4rem; }
-    .card-desc { font-size: 0.85rem; color: #8b949e; }
-    footer { margin-top: 4rem; font-size: 0.8rem; color: #484f58; }
   </style>
 </head>
 <body>
-  <h1>Documentation</h1>
-  <p class="subtitle">Generated project documentation</p>
-  <div class="grid">
-    <a class="card" href="api/smith.html">
-      <div class="card-icon">📖</div>
-      <div class="card-title">API Reference</div>
-      <div class="card-desc">Auto-generated from source docstrings via pdoc</div>
-    </a>
-    <a class="card" href="coverage/index.html">
-      <div class="card-icon">🧪</div>
-      <div class="card-title">Coverage Report</div>
-      <div class="card-desc">Line-by-line test coverage breakdown</div>
-    </a>
-    <a class="card" href="tests/report.html">
-      <div class="card-icon">✅</div>
-      <div class="card-title">Test Results</div>
-      <div class="card-desc">Full pytest run results with pass/fail details</div>
-    </a>
-  </div>
-  <footer>Built with pdoc · pytest-cov · pytest-html</footer>
+
+  <!-- ── Header ── -->
+  <header>
+    <div class="logo-wrap">
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" role="img" aria-label="agents-smith logo">
+        <g transform="translate(50,52) scale(0.55)">
+          <polygon points="0,-90 80,-54 -80,-54" fill="#ede8e0" stroke="#c8c0b8" stroke-width="1.4" stroke-linejoin="round"/>
+          <line x1="0" y1="-90" x2="80" y2="-54" stroke="#a09890" stroke-width="1" opacity="0.4"/>
+          <rect x="-13" y="-42" width="26" height="3" fill="#5c3d1e" opacity="0.28"/>
+          <rect x="-13" y="25" width="26" height="3" fill="#5c3d1e" opacity="0.28"/>
+          <rect x="-13" y="-9.5" width="26" height="5" fill="#ede8e0" stroke="#5c3d1e" stroke-width="0.9"/>
+          <rect x="-13" y="-4.5" width="26" height="2" fill="#5c3d1e" opacity="0.38"/>
+          <rect x="-13" y="-9.5" width="26" height="1.5" fill="#5c3d1e" opacity="0.38"/>
+          <rect x="-80" y="-54" width="160" height="12" fill="#ede8e0" stroke="#c8c0b8" stroke-width="0.9"/>
+          <rect x="-80" y="-43" width="160" height="2" fill="#a09890" opacity="0.3"/>
+          <rect x="-68" y="-42" width="15" height="70" fill="#ede8e0" stroke="#c8c0b8" stroke-width="0.4"/>
+          <rect x="-55" y="-42" width="3" height="70" fill="#a09890" opacity="0.2"/>
+          <rect x="-28" y="-42" width="15" height="70" fill="#ede8e0" stroke="#c8c0b8" stroke-width="0.4"/>
+          <rect x="-15" y="-42" width="3" height="70" fill="#a09890" opacity="0.2"/>
+          <rect x="-15" y="-42" width="2" height="70" fill="#5c3d1e" opacity="0.50"/>
+          <rect x="13" y="-42" width="15" height="70" fill="#ede8e0" stroke="#c8c0b8" stroke-width="0.4"/>
+          <rect x="26" y="-42" width="3" height="70" fill="#a09890" opacity="0.2"/>
+          <rect x="13" y="-42" width="2" height="70" fill="#5c3d1e" opacity="0.50"/>
+          <rect x="53" y="-42" width="15" height="70" fill="#ede8e0" stroke="#c8c0b8" stroke-width="0.4"/>
+          <rect x="66" y="-42" width="3" height="70" fill="#a09890" opacity="0.2"/>
+          <rect x="-80" y="28" width="160" height="9" fill="#ede8e0" stroke="#c8c0b8" stroke-width="0.7"/>
+          <rect x="-85" y="37" width="170" height="8" fill="#d8d4cc"/>
+          <rect x="-90" y="45" width="180" height="7" fill="#a09890" opacity="0.6"/>
+        </g>
+      </svg>
+    </div>
+    <div class="header-text">
+      <h1>AGENTS<span>·</span>SMITH</h1>
+      <p>AI-assisted software delivery system with flow-based agent orchestration.</p>
+    </div>
+  </header>
+
+  <main>
+
+    <!-- ── Generated Documentation ── -->
+    <p class="section-title">Generated Documentation</p>
+    <!-- BEGIN:DOC_CARDS -->
+    <div class="docs-grid">
+      <a class="doc-card" href="https://github.com/nullhack/agents-smith/blob/main/docs/product-definition.md">
+        <div class="doc-card-label">Product</div>
+        <div class="doc-card-title">Product Definition</div>
+        <div class="doc-card-desc">Product boundaries, users, and scope</div>
+      </a>
+      <a class="doc-card" href="https://github.com/nullhack/agents-smith/blob/main/docs/system.md">
+        <div class="doc-card-label">Architecture</div>
+        <div class="doc-card-title">System Overview</div>
+        <div class="doc-card-desc">Current-state snapshot — domain model, context/container tables, ADR index</div>
+      </a>
+      <a class="doc-card" href="https://github.com/nullhack/agents-smith/blob/main/docs/glossary.md">
+        <div class="doc-card-label">Language</div>
+        <div class="doc-card-title">Glossary</div>
+        <div class="doc-card-desc">Living domain glossary — append-only, source-traced definitions</div>
+      </a>
+      <a class="doc-card" href="https://github.com/nullhack/agents-smith/blob/main/docs/scope_journal.md">
+        <div class="doc-card-label">Scope</div>
+        <div class="doc-card-title">Scope Journal</div>
+        <div class="doc-card-desc">Raw Q&amp;A from discovery sessions</div>
+      </a>
+      <a class="doc-card" href="api/agents_smith.html">
+        <div class="doc-card-label">Reference</div>
+        <div class="doc-card-title">API Docs</div>
+        <div class="doc-card-desc">Auto-generated from source docstrings via pdoc</div>
+      </a>
+      <a class="doc-card" href="coverage/index.html">
+        <div class="doc-card-label">Quality</div>
+        <div class="doc-card-title">Coverage Report</div>
+        <div class="doc-card-desc">Line-by-line test coverage — must hold at 100%</div>
+      </a>
+      <a class="doc-card" href="tests/report.html">
+        <div class="doc-card-label">Quality</div>
+        <div class="doc-card-title">Test Report</div>
+        <div class="doc-card-desc">Full pytest run — results, durations, and pass/fail breakdown</div>
+      </a>
+    </div>
+<!-- END:DOC_CARDS -->
+
+    <!-- ── Features ── -->
+    <p class="section-title">Features</p>
+    <!-- BEGIN:FEATURES -->
+    <div class="features-section">
+      <div class="features-tabs">
+        <button class="tab-btn active" onclick="switchTab(event,'completed')">Completed</button>
+        <button class="tab-btn" onclick="switchTab(event,'in-progress')">In Progress</button>
+        <button class="tab-btn" onclick="switchTab(event,'backlog')">Backlog</button>
+      </div>
+      <div id="tab-completed" class="tab-pane active">
+        <ul class="feature-list">
+          <li class="feature-item">
+            <a class="feature-stem" href="https://github.com/nullhack/agents-smith/blob/main/docs/features/completed/cli-entrypoint.feature">cli-entrypoint</a>
+            <span class="feature-title">CLI Entrypoint</span>
+            <span class="feature-status status-completed">Accepted</span>
+          </li>
+        </ul>
+      </div>
+      <div id="tab-in-progress" class="tab-pane">
+        <p class="empty-note">No feature currently in progress.</p>
+      </div>
+      <div id="tab-backlog" class="tab-pane">
+        <p class="empty-note">Backlog is empty — ready for the next feature.</p>
+      </div>
+    </div>
+<!-- END:FEATURES -->
+
+    <!-- ── Architecture Decisions ── -->
+    <p class="section-title">Architecture Decisions</p>
+    <!-- BEGIN:ADRS -->
+    <ul class="adr-list">
+      <li class="adr-item">
+        <span class="adr-date">2026-04-22</span>
+        <span class="adr-slug">parser-library</span>
+        <span class="adr-status">Accepted</span>
+      </li>
+      <li class="adr-item">
+        <span class="adr-date">2026-04-22</span>
+        <span class="adr-slug">source</span>
+        <span class="adr-status">Accepted</span>
+      </li>
+    </ul>
+<!-- END:ADRS -->
+
+    <!-- ── Research Library ── -->
+    <p class="section-title">Research Library</p>
+    <!-- BEGIN:RESEARCH -->
+    <div class="research-grid">
+      <a class="research-item" href="https://github.com/nullhack/agents-smith/blob/main/docs/research/ai-agents.md">
+        <div class="research-item-title">Scientific Research — AI Agent Design</div>
+        <div class="research-item-meta">Foundations for the agent architecture, file structure, and context management decisions in this template.</div>
+      </a>
+      <a class="research-item" href="https://github.com/nullhack/agents-smith/blob/main/docs/research/architecture.md">
+        <div class="research-item-title">Scientific Research — Architecture</div>
+        <div class="research-item-meta">Foundations for the architectural decisions and patterns used in this template.</div>
+      </a>
+      <a class="research-item" href="https://github.com/nullhack/agents-smith/blob/main/docs/research/cognitive-science.md">
+        <div class="research-item-title">Scientific Research — Cognitive Science</div>
+        <div class="research-item-meta">Mechanisms from cognitive and social psychology that justify workflow design decisions in this template.</div>
+      </a>
+      <a class="research-item" href="https://github.com/nullhack/agents-smith/blob/main/docs/research/documentation.md">
+        <div class="research-item-title">Scientific Research — Documentation</div>
+        <div class="research-item-meta">Foundations for living documentation, docs-as-code, information architecture, and post-mortem practices used in this template.</div>
+      </a>
+      <a class="research-item" href="https://github.com/nullhack/agents-smith/blob/main/docs/research/domain-modeling.md">
+        <div class="research-item-title">Scientific Research — Domain Modeling</div>
+        <div class="research-item-meta">Foundations for bounded context identification, ubiquitous language, and feature decomposition used in this template.</div>
+      </a>
+      <a class="research-item" href="https://github.com/nullhack/agents-smith/blob/main/docs/research/oop-design.md">
+        <div class="research-item-title">Scientific Research — OOP Design</div>
+        <div class="research-item-meta">Foundations for object-oriented design principles used in this template.</div>
+      </a>
+      <a class="research-item" href="https://github.com/nullhack/agents-smith/blob/main/docs/research/refactoring-empirical.md">
+        <div class="research-item-title">Scientific Research — Refactoring (Empirical)</div>
+        <div class="research-item-meta">Empirical studies on code smells, refactoring prioritization, and OOP complexity used in this template.</div>
+      </a>
+      <a class="research-item" href="https://github.com/nullhack/agents-smith/blob/main/docs/research/requirements-elicitation.md">
+        <div class="research-item-title">Scientific Research — Requirements Elicitation</div>
+        <div class="research-item-meta">Foundations for the PO interview structure, Gherkin criteria, and feature discovery in this template.</div>
+      </a>
+      <a class="research-item" href="https://github.com/nullhack/agents-smith/blob/main/docs/research/software-economics.md">
+        <div class="research-item-title">Scientific Research — Software Economics</div>
+        <div class="research-item-meta">Foundations for the shift-left, early defect detection, and workflow ordering decisions in this template.</div>
+      </a>
+      <a class="research-item" href="https://github.com/nullhack/agents-smith/blob/main/docs/research/testing.md">
+        <div class="research-item-title">Scientific Research — Testing</div>
+        <div class="research-item-meta">Foundations for test design, TDD, BDD, and property-based testing used in this template.</div>
+      </a>
+      <a class="research-item" href="https://github.com/nullhack/agents-smith/blob/main/docs/research/version-control.md">
+        <div class="research-item-title">Version Control &amp; Branching Strategies</div>
+        <div class="research-item-meta">**Source**: Chacon, S., &amp; Straub, B. (2014). *Pro Git* (2nd ed.). Apress. Free online: https://git-scm.com/book</div>
+      </a>
+    </div>
+<!-- END:RESEARCH -->
+
+  </main>
+
+  <!-- BEGIN:FOOTER -->
+  <footer>
+    <span>Built with pdoc · pytest-cov · pytest-html</span>
+    <span>agents-smith &nbsp;·&nbsp; <a href="https://github.com/nullhack/agents-smith">github.com/nullhack/agents-smith</a></span>
+  </footer>
+<!-- END:FOOTER -->
+
+  <script>
+    function switchTab(e, name) {
+      document.querySelectorAll('.tab-btn').forEach(b => b.classList.remove('active'));
+      document.querySelectorAll('.tab-pane').forEach(p => p.classList.remove('active'));
+      e.currentTarget.classList.add('active');
+      document.getElementById('tab-' + name).classList.add('active');
+    }
+  </script>
+
 </body>
 </html>
diff --git a/docs/interview-notes/IN_20260422_scope-discovery.md b/docs/interview-notes/IN_20260422_scope-discovery.md
new file mode 100644
index 0000000..a64bd9f
--- /dev/null
+++ b/docs/interview-notes/IN_20260422_scope-discovery.md
@@ -0,0 +1,65 @@
+# IN_20260422_scope-discovery — Initial product scope discovery
+
+> **Status:** COMPLETE
+> **Interviewer:** PO
+> **Participant(s):** Stakeholder
+> **Session type:** Initial discovery
+
+---
+
+## General
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q1 | Who are the users? | Python engineers starting a new project who want rigorous tooling without the setup cost. |
+| Q2 | What does the product do at a high level? | Provides a fully configured Python project skeleton: CI, quality tooling, test infrastructure, and an AI-assisted five-step delivery workflow. |
+| Q3 | Why does it exist — what problem does it solve? | Setting up a production-grade Python environment from scratch is expensive and often skipped; engineers then accrue quality debt from day one. |
+| Q4 | When and where is it used? | At project inception — cloned once, then evolved as features are added via the built-in workflow. |
+| Q5 | Success — what does "done" look like? | An engineer clones the template and ships a meaningful first feature within a single session, with all quality gates passing. |
+| Q6 | Failure — what must never happen? | The template introduces more friction than it removes, or locks engineers into choices they cannot override. |
+| Q7 | Out-of-scope — what are we explicitly not building? | Runtime infrastructure (databases, queues, cloud deployment), UI frameworks, domain-specific business logic. |
+
+## Runtime Behaviour
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q8 | Should the template ship with any working feature, or be purely empty? | It should ship with exactly one working demonstration feature so engineers see the full workflow end-to-end. |
+
+## Feature: cli-entrypoint
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q9 | Which behavioural areas are in scope for the template's own feature backlog? | Just one simple command in the base package — useful for any starting project, simple enough not to bloat the app, and showcasing the template's capabilities end-to-end. |
+| Q10 | What kind of command would be "useful for any starting project"? Candidate options presented: version, hello/greet, info/about, config show, health. | Stakeholder asked: "if I choose version, what will it add to my app/ folder?" — confirmed interest in version-style command after seeing the footprint (one file, ~10 lines, zero new dependencies). |
+| Q11 | Three options presented: (A) `--help` only, (B) `--version` only, (C) `--help` + `--version` combined. Stakeholder also asked how a help/usage command would look in code and terminal. Full code sketches and tradeoff table provided. Which option for the demonstration feature? | Option C — `--help` + `--version` combined. `python -m agents_smith --help` shows app name, tagline, and available options. `python -m agents_smith --version` shows `agents-smith <version>` read from package metadata. Zero new dependencies, all code in `agents_smith/__main__.py`. |
+
+---
+
+## Quality Attributes
+
+| ID | Attribute | Scenario | Target | Priority |
+|----|-----------|----------|--------|----------|
+| QA1 | Usability | When an engineer clones the template, they can ship a first feature within one session | < 60 minutes to first passing CI | Must |
+| QA2 | Extensibility | When the engineer wants to override a default choice, they can do so without forking | Zero fork-required overrides | Must |
+
+---
+
+## Pain Points Identified
+
+- Setting up production-grade Python from scratch is expensive and often skipped
+- Quality debt accrues from day one when tooling is deferred
+
+## Business Goals Identified
+
+- Engineers clone and ship within a single session with all gates passing
+- Template removes friction rather than adding it
+
+## Terms to Define (for glossary)
+
+- quality debt
+- five-step delivery workflow
+
+## Action Items
+
+- [ ] Define detailed behavioural specification for `cli-entrypoint` feature
+- [ ] Update glossary with new terms
\ No newline at end of file
diff --git a/docs/interview-notes/IN_20260501_local-bundle-reversal.md b/docs/interview-notes/IN_20260501_local-bundle-reversal.md
new file mode 100644
index 0000000..a31a54d
--- /dev/null
+++ b/docs/interview-notes/IN_20260501_local-bundle-reversal.md
@@ -0,0 +1,67 @@
+# IN_20260501_local-bundle-reversal — Revert GitHub-based bundled resolution to local bundle
+
+> **Status:** COMPLETE
+> **Interviewer:** PO
+> **Participant(s):** Stakeholder
+> **Session type:** Scope refinement
+
+---
+
+## General
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q1 | Why revert from GitHub-based download to local bundle? | GitHub-based resolution adds runtime network dependency, cache staleness risk, and implementation complexity that outweighs the freshness benefit for the default template source |
+| Q2 | What should BundledTemplateSource do instead? | Read agentic files from `smith/data/` package directory via `importlib.resources` — no network calls, no caching, no external dependency for the default source |
+| Q3 | What files go in `smith/data/`? | Agentic files only: AGENTS.md, .opencode/, .templates/, .flowr/ — derived from the agents-smith v8_release branch |
+| Q4 | How is `smith/data/` kept in sync with agents-smith? | Manual script (`scripts/update-bundle.sh`) that downloads the agents-smith v8_release archive and copies agentic files to `smith/data/` |
+| Q5 | Should `requests` still be a dependency? | Yes — UrlTemplateSource needs it for tar.gz/zip downloads. But bundled source has no runtime network dependency |
+| Q6 | Should URL sources use caching? | No — URL sources re-download every time. No persistent cache for any source type |
+| Q7 | What about BDD examples a1b2c3d4 (network failure) and e5f6g7h8 (cache fallback)? | Deprecate both — bundled source no longer needs network access. Add new Should examples for URL source download failure |
+| Q8 | Should `TemplateSource.kind` still include "bundled"? | Yes — `kind="bundled"` stays. `smith connect` without `--from` defaults to `bundled:agents-smith` |
+| Q9 | Should the `TemplateSourceAdapter` fallback be removed? | Yes — the adapter should just dispatch on `source.kind` with no fallback. Use cases pass the source directly |
+
+## Feature: smith-commands
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q10 | What happens on URL source download failure? | `smith connect --from <url>` exits with code 1 and an error message. No fallback to bundled source — the user explicitly chose a URL source |
+| Q11 | What archive formats should UrlTemplateSource support? | `.tar.gz` and `.zip` — the two formats GitHub provides for branch/tag archives |
+| Q12 | Should the agentic file filter apply to URL sources? | Yes — `_is_agentic_path()` filter applies to all URL sources. Only AGENTS.md, .opencode/, .templates/, .flowr/ are written |
+
+---
+
+## Quality Attributes
+
+| ID | Attribute | Scenario | Target | Priority |
+|----|-----------|----------|--------|----------|
+| QA1 | Simplicity | When `smith connect` runs without `--from`, no network call is made | 0 network calls for bundled source | Must |
+| QA2 | Reliability | When `smith connect --from <url>` fails to download, exit code 1 with clear error | < 1 second to report failure | Must |
+| QA3 | Maintainability | When agents-smith v8_release updates, a single script updates `smith/data/` | 1 command to update bundled files | Must |
+
+---
+
+## Pain Points Identified
+
+- GitHub-based resolution introduced runtime network dependency for the default use case
+- Cache staleness was discovered during end-to-end testing (stale cache had only 2 files)
+- GitHub download + cache logic was more complex than local bundle
+
+## Business Goals Identified
+
+- `smith connect` with no arguments should "just work" — no network required
+- Template freshness is maintained by the update script, not runtime downloads
+
+## Terms to Define (for glossary)
+
+- Local Bundle (update Bundled Template Resolution entry)
+
+## Action Items
+
+- [x] Revert BundledTemplateSource to `importlib.resources`-based local bundle
+- [x] Implement UrlTemplateSource (tar.gz/zip via requests, agentic filter, no cache)
+- [x] Remove TemplateSourceAdapter fallback parameter
+- [x] Deprecate BDD examples a1b2c3d4 and e5f6g7h8
+- [x] Add new URL source failure examples
+- [x] Create ADR-007 superseding ADR-006
+- [x] Update spec documents
\ No newline at end of file
diff --git a/docs/interview-notes/IN_20260501_smith-commands-specification.md b/docs/interview-notes/IN_20260501_smith-commands-specification.md
new file mode 100644
index 0000000..b5ec965
--- /dev/null
+++ b/docs/interview-notes/IN_20260501_smith-commands-specification.md
@@ -0,0 +1,158 @@
+# Interview Notes: smith-commands Feature Specification
+
+> **Date:** 2026-05-01
+> **Feature:** smith-commands (connect, disconnect, update, status)
+> **Interviewer:** PO
+> **Stakeholder:** nullhack
+> **Session type:** Feature specification (behavioral rules and edge cases)
+
+---
+
+## General Behavioral Rules
+
+### Stateless Operation
+
+- **smith is stateless.** There is no `.smith.yaml` metadata file. No connection state is stored.
+- Connection state is inferred from the presence of the `# smith managed` section in `.gitignore` and which agentic files exist on disk.
+- `smith connect`, `smith update`, and `smith disconnect` are stateless operations — they write or remove files based on what's currently on disk.
+
+### Agentic Files
+
+- The agentic file set is: **AGENTS.md**, **.opencode/**, **.templates/**, **.flowr/**
+- **Pair-atomic rule:** AGENTS.md and .opencode/ are a pair — either both are written or neither. This is the core atomicity invariant.
+- **.templates/ and .flowr/ are independent:** if they don't exist, they are written; if they already exist (and are gitignored by the `# smith managed` section), they are refused unless `--overwrite` is used.
+
+### .gitignore Section
+
+- On connect, smith adds a `# smith managed` section to `.gitignore` with entries for each agentic file pattern.
+- On disconnect, smith **keeps** the `# smith managed` section in `.gitignore`. It serves as a guard for future smith usage — it records which files are agentic and should be treated specially.
+- Disconnect removes the agentic files that are gitignored (i.e., listed in the `# smith managed` section). If a file/folder is in the agentic set but the `.gitignore` section does NOT ignore it, that means the user explicitly wants to track it — so smith does NOT remove it.
+- If a `.gitignore` entry for an agentic file/folder is NOT preceded by `# smith managed` (i.e., the user added it manually outside the section), smith does not modify that entry.
+
+### Template Source
+
+- Default template source: **agents-smith** (bundled with the agents-smith package).
+- Override with `--from <source>`:
+  - Local path: `--from ./my-templates`
+  - URL: `--from https://example.com/templates.tar.gz`
+  - Git repo: `--from git+https://github.com/user/repo.git#branch` (standard URL format with ref parameter)
+- If `--from` points to a non-existent path or unreachable URL: **error, exit 1**.
+
+---
+
+## Command Behavioral Rules
+
+### `smith connect [--from <source>] [--overwrite]`
+
+**Default behavior (fresh project, no agentic files):**
+1. Resolve template source (default: agents-smith, or `--from`).
+2. Stage all agentic files in a temp directory.
+3. Validate: check for conflicts (existing agentic files).
+4. Write AGENTS.md + .opencode/ atomically (pair-atomic: both or neither).
+5. Write .templates/ independently (if absent, write it; if present and not gitignored by smith, refuse).
+6. Write .flowr/ independently (same rule as .templates/).
+7. Add `# smith managed` section to `.gitignore` with entries for all agentic files.
+8. Report success: list files written.
+
+**When agentic files already exist:**
+- If any agentic file/folder exists and IS gitignored by the `# smith managed` section → **conflict, exit 2**, list conflicting files, suggest `--overwrite`.
+- If an agentic file/folder exists but is NOT in the `# smith managed` section (user tracks it manually) → do not overwrite it, skip it, write the rest. The user explicitly chose to track this file.
+- `--overwrite`: replace ALL agentic files that are in the `# smith managed` section, regardless of conflicts. Does NOT touch files not in the smith-managed section.
+
+**When already connected (`.gitignore` has `# smith managed` section):**
+- `smith connect` on an already-connected project = **auto-update** (same behavior as `smith update`).
+- `smith connect --from <new_source>` on an already-connected project = update from the new source.
+
+**Exit codes:** 0 = success, 1 = error (invalid args, source not found, IO failure), 2 = conflict (files exist without `--overwrite`).
+
+### `smith disconnect`
+
+**Default behavior (connected project):**
+1. Identify agentic files listed in the `# smith managed` section of `.gitignore`.
+2. Remove only the agentic files that ARE gitignored by the `# smith managed` section.
+3. If an agentic file/folder is NOT gitignored by `# smith managed` (user chose to track it), do NOT remove it.
+4. Keep the `# smith managed` section in `.gitignore` (it serves as a guard for future usage).
+5. Report success: list files removed.
+
+**When not connected (no `# smith managed` section):**
+- **No-op, exit 0.** No error, no message needed.
+
+**When partially connected (some agentic files missing):**
+- Remove whatever agentic files ARE present and gitignored by `# smith managed`. No error for missing files.
+
+**Exit codes:** 0 = success (including no-op), 1 = error (IO failure).
+
+### `smith update [--from <source>]`
+
+**Default behavior (connected project):**
+1. Resolve template source (default: agents-smith, or `--from`).
+2. Re-download all agentic files from the template source.
+3. Overwrite ALL agentic files that are in the `# smith managed` section (this is an intentional overwrite — update is the "refresh" operation).
+4. Do NOT touch files not managed by smith.
+5. Maintain the pair-atomic rule for AGENTS.md + .opencode/ (both or neither).
+6. Report success: list files updated.
+
+**When not connected (no `# smith managed` section in `.gitignore`):**
+- **Auto-connect:** same behavior as `smith connect` with the same `--from` flag.
+
+**When `--from` source is not found:**
+- **Error, exit 1.**
+
+**Exit codes:** 0 = success, 1 = error (source not found, IO failure).
+
+### `smith status [--json]`
+
+**Default behavior (human-readable):**
+- Check which agentic files exist on disk.
+- If all agentic files present → report "Connected" with file list and template source (if determinable).
+- If some agentic files present → report "Partial" with which files are present/missing, suggest `smith connect --overwrite` or `smith disconnect`.
+- If no agentic files present but `# smith managed` section exists in `.gitignore` → report "Disconnected" with suggestion to `smith connect` to reconnect.
+- If no agentic files and no `# smith managed` section → report "Not connected" with "Run smith connect to get started."
+
+**With `--json` flag:**
+- Machine-readable JSON output with same information, suitable for scripting.
+
+**Exit codes:** 0 = success, 1 = error.
+
+---
+
+## Edge Cases and Failure Modes
+
+### Partial connection (some files written, then failure)
+
+- The pair-atomic rule for AGENTS.md + .opencode/ means: if writing .opencode/ fails, AGENTS.md must be rolled back too. Both succeed or neither.
+- .templates/ and .flowr/ are independent — a failure writing .flowr/ does not roll back .templates/.
+- Temp-directory staging is used for AGENTS.md + .opencode/ to ensure atomicity.
+
+### .gitignore section manipulation
+
+- If `.gitignore` doesn't exist, create it with the `# smith managed` section.
+- If `.gitignore` exists but doesn't have a `# smith managed` section, append the section at the end.
+- If `.gitignore` has a `# smith managed` section already, update entries within it (add missing entries, do not remove existing entries unless they are for smith-managed files being disconnected).
+
+### User-modified agentic files
+
+- `smith disconnect` removes agentic files that are gitignored by `# smith managed`, regardless of whether the user modified them. This is clean separation.
+- If the user wants to keep their changes, they should NOT gitignore the file (remove it from the `# smith managed` section), and smith will not remove it on disconnect.
+
+### Template source failure mid-write
+
+- If the template source fails during download/extraction, exit 1 with an error message. No partial writes for the atomic pair (AGENTS.md + .opencode/).
+
+---
+
+## Decisions Summary
+
+| Decision | Choice | Rationale |
+|----------|--------|-----------|
+| State management | Stateless — no .smith.yaml | Simpler model; .gitignore section is sufficient to track managed files |
+| Atomicity scope | AGENTS.md + .opencode/ are pair-atomic; .templates/ and .flowr/ are independent | Core agent config must be consistent; template/flow dirs are independent concerns |
+| .gitignore on disconnect | Keep `# smith managed` section | Serves as guard for future smith usage |
+| File removal on disconnect | Remove only gitignored agentic files | Files the user explicitly tracks are preserved |
+| Connect on already-connected | Auto-update (same as update) | No need to force disconnect first |
+| Update on not-connected | Auto-connect | Convenient; same as connect |
+| Disconnect on not-connected | No-op, exit 0 | Idempotent; no error for clean state |
+| Template source types | Bundled (agents-smith), local path, URL, git repo | Full flexibility from the start |
+| Git source format | `git+https://...#branch` | Standard URL format with ref parameter |
+| Status output | Human-readable by default, `--json` for scripting | Dual audience |
+| Exit codes | 0/1/2 — success/error/conflict | Simple, covers the main cases; conflict maps to safety invariant |
\ No newline at end of file
diff --git a/docs/interview-notes/IN_20260501_stakeholder-reinterview.md b/docs/interview-notes/IN_20260501_stakeholder-reinterview.md
new file mode 100644
index 0000000..7a1bdff
--- /dev/null
+++ b/docs/interview-notes/IN_20260501_stakeholder-reinterview.md
@@ -0,0 +1,98 @@
+# IN_20260501_stakeholder-reinterview — Corrected product scope discovery
+
+> **Status:** COMPLETE
+> **Interviewer:** PO
+> **Participant(s):** Stakeholder
+> **Session type:** Scope refinement (replaces IN_20260422 which captured the wrong product)
+
+---
+
+## General
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q1 | Who are the users? | Software engineers/developers who work on multiple projects and want consistent AI-assisted workflows across all of them. |
+| Q2 | What does the product do at a high level? | smith is an AI pair programming platform that assimilates ordinary projects into high-performing, AI-augmented systems. It connects standardised agent configurations (AGENTS.md, .opencode/, .templates/, .flowr/) to any project, enabling consistent AI-assisted workflows. Like Agent Smith in the Matrix, smith takes control of a project — then detaches when done. |
+| Q3 | Why does it exist — what problem does it solve? | AI agents need structure. Without consistent agent configurations, each project has different .opencode agents, different workflows, and different templates. Engineers waste time maintaining these across projects. smith standardises the AI agent experience — connect, work, detach — the same agents, the same flows, every project. |
+| Q4 | When and where is it used? | Anytime — plug in or out as needed. Works on any project directory, even legacy ones. Not limited to project inception. |
+| Q5 | Success — what does "done" look like? | A uniform experience across projects: `smith connect` in any directory and you're immediately working with standard flows and agents. `smith disconnect` and the project is clean (no agentic files left, only .gitignore entries). Customisation is per-template: use `--from <path/url>` for a different agent template. |
+| Q6 | Failure — what must never happen? | Destructive overwrites without explicit `--overwrite` flag. Rigid workflows that don't adapt to different projects. Complex connect/disconnect workflows. Partial connections — smith must either connect fully or write no files at all (atomic). Never silently overwrite customizations. |
+| Q7 | Out-of-scope — what are we explicitly not building? | AI execution engine (smith configures agents, doesn't run them). CI/CD infrastructure. Package management. Language/framework enforcement. |
+
+## Connect/Disconnect
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q8 | When smith connects, what happens? | `smith connect` copies the default template's agentic files (AGENTS.md, .opencode/, .templates/, .flowr/) into the project directory. `smith connect --from <path/url>` copies from a specified template source instead of the default (agents-smith). |
+| Q9 | When smith disconnects, what happens? | Removes agentic files from the project. Keeps .gitignore entries (managed section). If the user wants to push agentic files, they can remove entries from .gitignore. Disconnect means "I don't want the files here anymore" — if they want to continue with the files, they wouldn't call disconnect. |
+| Q10 | How does --overwrite work? | smith refuses to connect if agentic files already exist (must disconnect first), unless `--overwrite` is explicitly passed. Destructive overwrites are only possible when the stakeholder forces it. |
+| Q11 | How does smith handle .gitignore? | smith manages its own section in .gitignore, marked with a comment like `# smith managed`. On connect, it adds entries for the agentic files. On disconnect, it removes those entries (unless the user has removed them manually to push the files). |
+| Q12 | Which agentic files get connected? | AGENTS.md, .opencode/, .templates/, .flowr/ — these four items are the standard set that smith connects to a project. |
+
+## Conflict Handling
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q13 | What if AGENTS.md or .opencode/ already exist in the project? | Warn and refuse to overwrite unless `--overwrite` is passed. These are core agent configs and should not be silently replaced. |
+| Q14 | What if .flowr/ or .templates/ already exist? | Needs architect decision. Projects may have their own .flowr and .templates specific to that project. We should not overwrite those if already existing, but the user may want to override. The exact merge/replacement strategy needs architectural input. |
+
+## Naming and Branding
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q15 | What are the CLI and package names? | CLI command: `smith`. PyPI package: `agents-smith`. The branding uses the Matrix/Agent Smith theme from the official remote repo (https://github.com/nullhack/agents-smith). The local branding file is stale and wrong — it still has the old agents-smith/Greek theme. Must be replaced with the remote version. |
+
+## Template Source
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q16 | What is the relationship between smith and agents-smith? | Agents-Smith is the default template. `smith connect` uses agents-smith's agentic files by default. `smith connect --from <path/url>` uses a different template source. |
+| Q17 | How does multi-project support work? | Each project gets its own copy of the agentic files. `smith connect` copies files into the project directory. The same template can be connected to multiple projects independently. `smith update` refreshes a project's agentic files from the source template. |
+
+## Feature: smith-commands
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q18 | What CLI commands should smith support? | Four commands: `smith connect [--from <path/url>]`, `smith disconnect`, `smith update`, `smith status`. All four are needed for the first feature to demonstrate the full connect/work/disconnect cycle end-to-end. |
+| Q19 | Which commands go in the first feature? | All four — connect, disconnect, update, and status — as the single demonstration feature. |
+
+---
+
+## Quality Attributes
+
+| ID | Attribute | Scenario | Target | Priority |
+|----|-----------|----------|--------|----------|
+| QA1 | Usability | When an engineer runs `smith connect` in any project directory, they can immediately start working with standard flows and agents | < 1 minute from connect to working | Must |
+| QA2 | Safety | When smith connects to a project that already has agentic files, it refuses to overwrite without explicit `--overwrite` flag | Zero silent overwrites, ever | Must |
+| QA3 | Clean separation | When smith disconnects from a project, no agentic files remain (only .gitignore entries) | Zero orphaned files after disconnect | Must |
+| QA4 | Atomicity | When smith connects, either all agentic files are written or none are | No partial connections, ever | Must |
+
+---
+
+## Pain Points Identified
+
+- Maintaining different .opencode agents and workflows across multiple projects is wasteful and inconsistent
+- AI agents lack structure — each project reinvents agent configs from scratch
+- No standardised way to "plug in" AI-assisted workflows to existing/legacy projects
+
+## Business Goals Identified
+
+- Uniform AI agent experience across all projects
+- Instant setup — connect and go within minutes
+- Clean connect/disconnect cycle — projects should be transformable and reversible
+
+## Terms to Define (for glossary)
+
+- connect — smith command that copies agentic files into a project directory
+- disconnect — smith command that removes agentic files from a project directory
+- agentic files — the set of files smith manages: AGENTS.md, .opencode/, .templates/, .flowr/
+- template source — the origin of agentic files (default: agents-smith; override with --from)
+- assimilate — smith's core metaphor: enter a project, configure it with standard AI agents, transform it
+- managed .gitignore section — a marked block in .gitignore that smith creates and maintains
+
+## Action Items
+
+- [ ] Architect to decide merge/overwrite strategy for .flowr/ and .templates/ when they already exist in a project
+- [ ] Replace local branding.md with the remote version (Matrix/Agent Smith theme)
+- [ ] Replace local logo.svg and banner.svg with remote versions
+- [ ] Retire or archive the old IN_20260422 interview notes (captured wrong product scope)
\ No newline at end of file
diff --git a/docs/interview-notes/IN_20260501_temple8-dependency-resolution.md b/docs/interview-notes/IN_20260501_temple8-dependency-resolution.md
new file mode 100644
index 0000000..9e5b882
--- /dev/null
+++ b/docs/interview-notes/IN_20260501_temple8-dependency-resolution.md
@@ -0,0 +1,72 @@
+# Interview Notes: Agents-Smith Dependency Resolution
+
+> **Status:** COMPLETE
+> **Interviewer:** SA
+> **Participant(s):** nullhack
+> **Session type:** Scope refinement
+
+---
+
+## General
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q1 | How should the bundled template source resolve template files? | Download from the agents-smith GitHub repository's `v8_release` branch at runtime, not from packaged local files |
+| Q2 | Should we use stdlib `urllib.request` or `requests` for HTTP? | Use `requests` — cleaner API, better error handling, worth the dependency |
+| Q3 | Should downloaded templates be cached locally? | Yes — cache in `~/.cache/smith/` to avoid re-downloading on every connect/update |
+| Q4 | Should the default GitHub branch/tag be configurable? | No — default to `v8_release` for now; will change in future but not configurable today |
+| Q5 | Should `smith/data/` (85 stale bundled files) be removed? | Yes — delete the entire `smith/data/` directory; it contains stale copies of the project's own agentic files |
+
+## Architecture: Bundled Template Source
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q6 | How should BundledTemplateSource download the archive? | Download `https://github.com/nullhack/agents-smith/archive/refs/heads/v8_release.tar.gz` as a tarball via GitHub's archive API |
+| Q7 | How should the archive be extracted and resolved into FileSpec objects? | Extract to a temp directory, walk the extracted directory, and collect files matching the agentic file set (AGENTS.md, .opencode/, .templates/, .flowr/) |
+| Q8 | What should happen on network failure? | Exit with code 1 and a clear error message indicating the bundled template source could not be downloaded |
+| Q9 | What is the cache structure? | `~/.cache/smith/agents-smith/` — store the extracted template files; on subsequent resolves, check if cached files exist and are fresh enough before re-downloading |
+| Q10 | What is the cache invalidation strategy? | Re-download when the cache is empty or on explicit `smith update`; future enhancement could add ETag/Last-Modified checking |
+
+## Dependency Change
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q11 | What dependency does this add? | `requests` — the only runtime dependency beyond stdlib |
+| Q12 | Does this change the "zero runtime dependencies" constraint? | Yes — the constraint changes from "zero runtime dependencies" to "one runtime dependency (requests)" |
+
+---
+
+## Quality Attributes
+
+| ID | Attribute | Scenario | Target | Priority |
+|----|-----------|----------|--------|----------|
+| QA1 | Usability | When an engineer runs `smith connect` without network, they get a clear error message | Error message within 1 second | Must |
+| QA2 | Performance | When cache is warm, `smith connect` resolves templates from local cache | < 100ms for cached resolution | Should |
+| QA3 | Reliability | When GitHub is temporarily unavailable, `smith update` fails gracefully with exit code 1 | No partial state on failure | Must |
+
+---
+
+## Pain Points Identified
+
+- `smith/data/` contains 85 stale copies of the project's own `.opencode/`, `.flowr/`, `.templates/`, and `AGENTS.md` — these will go stale and are architecturally wrong
+
+## Business Goals Identified
+
+- The bundled template source should always resolve the latest agents-smith templates without requiring a new smith release
+- Network-based resolution allows template updates to propagate without smith package updates
+
+## Terms to Define (for glossary)
+
+- **Bundled template resolution**: The process by which the default `agents-smith` template source downloads and caches template files from the agents-smith GitHub repository
+- **Cache directory**: `~/.cache/smith/` — local storage for downloaded template files to avoid redundant network requests
+
+## Action Items
+
+- [ ] Add `requests` to `pyproject.toml` dependencies
+- [ ] Rewrite `BundledTemplateSource` to download from GitHub instead of reading `smith/data/`
+- [ ] Add local caching in `~/.cache/smith/agents-smith/`
+- [ ] Delete `smith/data/` directory
+- [ ] Update technical design doc (stack, module structure, template source resolution section)
+- [ ] Update system.md (dependency constraint change)
+- [ ] Write ADR for GitHub-based bundled template resolution
+- [ ] Update glossary (Agents-Smith entry)
\ No newline at end of file
diff --git a/docs/features/completed/.gitkeep b/docs/post-mortem/.gitkeep
similarity index 100%
rename from docs/features/completed/.gitkeep
rename to docs/post-mortem/.gitkeep
diff --git a/docs/post-mortem/2026-04-14-ping-pong-cli-workflow-gaps.md b/docs/post-mortem/2026-04-14-ping-pong-cli-workflow-gaps.md
deleted file mode 100644
index 7f1d054..0000000
--- a/docs/post-mortem/2026-04-14-ping-pong-cli-workflow-gaps.md
+++ /dev/null
@@ -1,176 +0,0 @@
-# Post-Mortem: ping-pong-cli — Workflow Gaps (v3.1)
-
-## Release Details
-
-| Field | Value |
-|-------|-------|
-| Version | v3.1.20260414 |
-| Date | April 14, 2026 |
-| Feature | ping-pong-cli |
-| Status | APPROVED and shipped |
-| Broken | Yes — game doesn't work |
-
----
-
-## What Was Shipped
-
-`ping_pong_cli/game.py` — 240 lines:
-
-- 15 top-level functions, zero classes
-- No keyboard input (`get_input()` always returns `""`)
-- Runs a hardcoded 100-frame demo then exits
-- Uses raw `int` and `tuple[int,int]` — no value objects
-- `render_game` has 3 levels of nesting
-- 8-parameter function signatures
-
-Yet it passed: lint, typecheck, 100% coverage, 31 tests, reviewer APPROVED.
-
----
-
-## What Failed
-
-The acceptance criteria said:
-> Given: The game is running and waiting for input
-> When: The left or right arrow key is pressed
-> Then: The paddle moves
-
-The implementation maps this to a unit test of `update_player("W")`. That test proves the function works in isolation. No test verifies that keyboard input actually reaches `update_player`.
-
-The game shipped with the acceptance criterion satisfied in a narrow technical sense ("paddle moves when 'W' is passed to the function") but broken in the broad user sense ("paddle doesn't move when I press W in the running game").
-
----
-
-## Gap 1: Acceptance Criteria Don't Require End-to-End Verification
-
-### Problem
-
-The `scope` skill defines "Then must be a single observable, measurable outcome" but doesn't define **observable by whom**. The developer interpreted this as "observable in a unit test" — test calls `update_player("W")` returns expected result.
-
-### Fix
-
-In `scope` skill, add:
-
-> **Observable means observable by the end user.** If the criterion says "When the user presses W", the test must verify that pressing W in the running app produces the expected result — not just that calling `update_player("W")` returns the right number. If end-to-end testing isn't feasible, the criterion must explicitly state the boundary (e.g., "When update_player receives 'W'") so the gap is visible.
-
-In `verify` skill, add:
-
-> **Acceptance Criteria vs. Reality Check**
->
-> For each criterion whose Given/When/Then describes user-facing behavior:
-> - Read the test that covers it
-> - If the test only exercises an internal function without going through the actual user-facing entry point, flag it as **COVERED BUT NOT VERIFIED**
-> - A criterion that says "When the user presses W" is NOT verified by `test_update_player("W")` — it's verified by a test or manual check that sends W to the running app
->
-> Any COVERED BUT NOT VERIFIED criterion → REJECTED
-
----
-
-## Gap 2: Object Calisthenics Listed But Not Enforced by Reviewer
-
-### Problem
-
-The `verify` skill listed all 9 Object Calisthenics rules. The reviewer read them but approved code with:
-
-| # | Rule | Violation in shipped code |
-|---|------|--------------------------|
-| 3 | Wrap primitives | `PlayerPosition = int`, `BallState = tuple[int,int]` are type aliases, not value objects |
-| 4 | First-class collections | No collection classes |
-| 7 | Small entities | `run_game_loop` is ~40 lines |
-| 8 | ≤ 2 instance vars | No classes at all, but 8-parameter function signatures |
-
-The skill didn't say **what to do when violations are found**. Violations were treated as observations, not blockers.
-
-### Fix
-
-In `verify` skill, replace ObjCal prose with a structured table:
-
-> **Object Calisthenics — ANY violation is a REJECT**
->
-> | # | Rule | How to check | PASS/FAIL |
-> |---|------|-------------|-----------|
-> | 1 | One level of indentation | Check nest depth in source |
-> | 2 | No `else` after return | Search for `else` inside functions |
-> | 3 | Wrap primitives | Bare `int`, `str` as domain concepts = FAIL |
-> | 4 | First-class collections | `list[Type]` not wrapped = FAIL |
-> | 5 | One dot per line | `a.b.c()` = FAIL |
-> | 6 | No abbreviations | `calc`, `mgr` = FAIL |
-> | 7 | Small entities | Lines per function >20 or class >50 = FAIL |
-> | 8 | ≤ 2 instance vars | More than 2 per class = FAIL |
-> | 9 | No getters/setters | `get_x()`, `set_x()` = FAIL |
-
----
-
-## Gap 3: REFACTOR Step Has No Verification Gate
-
-### Problem
-
-The `implementation` skill says to apply DRY, SOLID, Object Calisthenics during REFACTOR, but when done, it only runs `task test`, `task lint`, `task static-check`. None of those tools check nesting depth, function length, or value objects. The developer skips the self-check, runs the three commands, they all pass.
-
-### Fix
-
-In `implementation` skill, add after REFACTOR section:
-
-> **REFACTOR Self-Check (MANDATORY before commit)**
->
-> 1. Count lines per function you changed. Any >20 → extract helper
-> 2. Check nesting. Any >2 levels → extract function
-> 3. Check bare primitives as domain concepts. `int` for paddle position → value object
-> 4. Check parameters per function. >4 positional → group into dataclass
->
-> If you skip this step, the reviewer WILL reject your code.
-
----
-
-## Gap 4: `timeout 10s uv run task run` Is Not a Playability Test
-
-### Problem
-
-The `verify` skill said: "check that startup completes without error before the timeout." The demo ran for 1.6 seconds and exited cleanly — startup completed, no error. The app passed without being interactive at all.
-
-### Fix
-
-In `verify` skill, replace the timeout check with:
-
-> **For apps with user interaction** (games, CLIs with prompts, web servers):
-> - Run the app, provide sample input via stdin/subprocess
-> - Verify output changes in response to input
-> - A hardcoded demo that auto-plays without input is NOT a playability test
->
-> If the app doesn't respond to user input → REJECTED
-
----
-
-## Gap 5: Tests Verify Functions, Not Behavior
-
-### Problem
-
-The `tdd` skill produces unit tests. Every test calls an isolated function. No test sends input to the running game. No test verifies the game loop integrates these functions correctly. 31 tests pass with 100% coverage but none test the actual gameplay loop.
-
-### Fix
-
-In `tdd` skill, add:
-
-> **Integration Test Requirement**
->
-> For features with multiple components (game loops, handlers, pipelines):
-> - Add at least ONE `@pytest.mark.integration` test
-> - Test must exercise the full path from entry point to observable outcome
-> - Must NOT call internal helpers directly — use the public entry point
-
----
-
-## Summary
-
-| Gap | Skill | Problem | Fix |
-|-----|-------|---------|-----|
-| 1 | scope + verify | "Observable" undefined = unit test passes | Define user-observable; add COVERED BUT NOT VERIFIED |
-| 2 | verify | Object Calisthenics listed = suggestions | Any rule FAIL = REJECTED (table) |
-| 3 | implementation | REFACTOR has no self-check gate | Add mandatory line/nesting check |
-| 4 | verify | `timeout` = "doesn't hang" not "works" | Must accept and respond to input |
-| 5 | tdd | All unit, no integration | Require one integration test |
-
----
-
-## Root Cause
-
-The skills already contained the right standards. The problem is that violations were treated as observations, not blockers. Each check needs a clear **FAIL = REJECTED** consequence with a structured table to fill in — so violations can't be glossed over in prose.
diff --git a/docs/post-mortem/2026-04-16-ping-pong-cli-package-and-design-review.md b/docs/post-mortem/2026-04-16-ping-pong-cli-package-and-design-review.md
deleted file mode 100644
index d9b6995..0000000
--- a/docs/post-mortem/2026-04-16-ping-pong-cli-package-and-design-review.md
+++ /dev/null
@@ -1,108 +0,0 @@
-# Post-Mortem: ping-pong-cli — Package Directory and Design Review Gaps
-
-## Context
-
-| Field | Value |
-|-------|-------|
-| Date | April 16, 2026 |
-| Feature | ping-pong-cli (follow-up run after v3.1 workflow fixes) |
-| Branch | feat/po-workflow-redesign-v4 |
-
-This post-mortem was conducted after a second ping-pong-cli test run on the updated v3.1 workflow. Two systemic failures were identified that the v3.1 fixes did not address.
-
----
-
-## Failure 1: Code Created in Wrong Package Directory
-
-### What Happened
-
-The developer created production code under `python_project_template/` (the template's own package) instead of `ping_pong_cli/` (the feature's package). The correct package name was visible in `pyproject.toml` under `[tool.setuptools] packages`, but no step in the workflow required the developer to read it before writing code.
-
-### Why It Happened
-
-The `implementation` skill's Step 2 (Architecture) listed prerequisites and module structure instructions, but contained no explicit step to:
-1. Read `pyproject.toml` to determine the correct package name
-2. Confirm the package directory exists on disk
-3. Record the package name as a hard constraint before writing any files
-
-Without this verification, the developer defaulted to a plausible-looking name rather than the actual configured name.
-
-### Impact
-
-All production code was placed in the wrong directory. The feature appeared to work during development (imports resolved within the wrong package) but would have failed on any fresh install or CI run.
-
-### Fix Applied
-
-Added a **Package Verification** block at the top of Step 2 in `implementation/SKILL.md` (before prerequisites):
-
-```
-1. Read pyproject.toml → [tool.setuptools] → record packages = ["<name>"]
-2. Confirm that directory exists on disk: ls <name>/
-3. Write the correct package name at the top of working notes
-4. All new source files go under <name>/ — never under a template placeholder
-```
-
-Added a corresponding check row to `verify/SKILL.md` section 4g:
-
-> `Imports use correct package name` — confirm all imports match `[tool.setuptools] packages`, not a template placeholder
-
----
-
-## Failure 2: Design Principle Violations Not Caught in Review
-
-### What Happened
-
-The reviewer approved code containing getters and setters (`get_x()` / `set_x()` pairs), violating Object Calisthenics Rule 9. The violation was visible in the code but was not caught because the review process had no structured mechanism for the developer to declare their own compliance before asking for review.
-
-### Why It Happened
-
-The per-test reviewer check asked the reviewer to verify YAGNI > KISS > DRY > SOLID > ObjCal, but provided no structured checklist or required evidence format. The reviewer was scanning for violations rather than verifying explicit claims. When a reviewer is reading unfamiliar code for the first time, getter/setter patterns can be overlooked if they are not explicitly flagged.
-
-Additionally, the reviewer had no "audit target" — there was nothing the developer had committed to that the reviewer could directly compare against the code.
-
-### Impact
-
-OC Rule 9 (tell-don't-ask) was violated. The design choice propagated into the committed codebase, requiring a later refactor.
-
-### Fix Applied
-
-Added a **Design Self-Declaration** step between REFACTOR and REVIEWER CHECK in `implementation/SKILL.md`:
-
-- Developer fills a checklist covering YAGNI, KISS, DRY, SOLID (all 5 principles), and OC Rules 1–9
-- Each item requires `file:line` evidence or an explicit "does not apply" note
-- The filled checklist is sent to the reviewer as the audit target
-
-Updated the **REVIEWER CHECK** response template from a 3-line compact format to an 11-row structured comparison table (YAGNI, KISS, DRY, SOLID-S/O/L/I/D, OC-1-9, Design patterns, Semantic alignment):
-
-- Developer Claims column (what the developer declared)
-- Reviewer Verdict column (independent verification)
-- Evidence column (`file:line` required for every FAIL)
-- Any FAIL row = rejection
-
-Updated the Cycle State phases to include `SELF-DECLARE` between REFACTOR and REVIEWER:
-
-```
-RED → GREEN → REFACTOR → SELF-DECLARE → REVIEWER(code-design) → COMMITTED
-```
-
-Updated `session-workflow/SKILL.md` Cycle State phase list and Rule 6 to include `SELF-DECLARE`.
-
-Updated `reviewer.md` per-test Step 4 section to reference the structured table and load `skill implementation` for the full protocol.
-
----
-
-## Summary
-
-| Failure | Root Cause | Fix |
-|---------|-----------|-----|
-| Code in wrong package | No package verification step before writing code | Package Verification block added to Step 2 |
-| OC Rule 9 violation approved | No structured self-declaration; reviewer had no audit target | Design Self-Declaration checklist per test; 11-row verification table |
-
----
-
-## Systemic Pattern
-
-Both failures share the same root cause: **the workflow relied on agents noticing problems rather than proving compliance**. The fixes shift the burden:
-
-- Package verification: developer must prove the package name is correct before writing the first line
-- Design self-declaration: developer must prove each principle is satisfied before asking for review; reviewer verifies claims rather than scanning from scratch
diff --git a/docs/post-mortem/PM_20260501_conflict-exit-code-removal.md b/docs/post-mortem/PM_20260501_conflict-exit-code-removal.md
new file mode 100644
index 0000000..4f670bf
--- /dev/null
+++ b/docs/post-mortem/PM_20260501_conflict-exit-code-removal.md
@@ -0,0 +1,26 @@
+# PM_20260501_conflict-exit-code-removal: Planned code removed as "dead" — distinction between dead code and TDD-not-yet-reached code
+
+## Failed At
+
+Design review (7th pass) — reviewer flagged ConflictReport and EXIT_CONFLICT = 2 as "dead code" and "cross-document inconsistency." They were removed. On reflection, the interview notes (`IN_20260501_smith-commands-specification.md:58,66`) explicitly define exit code 2 for conflicts. The code was removed, then restored, then removed again.
+
+## Root Cause
+
+The reviewer operates at the feature level (current TDD examples) while the SA created typed stubs at the architecture level. Code that matches the domain model and technical design but hasn't been reached by the TDD cycle yet is **planned code**, not dead code. Dead code contradicts the architecture; planned code hasn't been exercised yet.
+
+## Missed Gate
+
+The review skill has no step to check whether flagged "dead code" exists in the domain model, technical design, or interview notes before recommending removal. Without this check, planned code is indistinguishable from dead code.
+
+## Fix
+
+1. **Remove planned code now (TDD perspective):** From a strict TDD perspective, code that no test exercises should not exist yet. ConflictReport and EXIT_CONFLICT = 2 are removed. When the feature requires exit code 2 scenarios (e.g., a fresh project with existing agentic files), they will be added back organically through the RED-GREEN-REFACTOR cycle.
+2. **Process change for the template:** Stubs should be created per-feature during feature planning, not all at once during project-structuring. This eliminates the planned-but-not-reached gap entirely. The SA creates the package skeleton (directories, `__init__.py`, port interfaces, aggregate root signatures); feature planning creates typed stubs only for the examples defined in the `.feature` file.
+3. **Review skill update:** Before flagging code as "dead," the reviewer must check the domain model / technical design / interview notes. Code that matches the architecture but lacks tests should be flagged as WARN (planned-not-reached), not REJECT (dead).
+
+## Restart Check
+
+- [x] ConflictReport and EXIT_CONFLICT = 2 removed from code
+- [x] Spec docs consistent: exit codes are 0 (success) and 1 (error)
+- [x] Feature file has no examples asserting exit code 2
+- [ ] When a future feature requires exit code 2, it will be added via TDD (RED test first, then implementation)
\ No newline at end of file
diff --git a/docs/post-mortem/PM_20260501_coverage-test-in-features-folder.md b/docs/post-mortem/PM_20260501_coverage-test-in-features-folder.md
new file mode 100644
index 0000000..5124c3f
--- /dev/null
+++ b/docs/post-mortem/PM_20260501_coverage-test-in-features-folder.md
@@ -0,0 +1,28 @@
+# PM_20260501/coverage-test-in-features-folder: Coverage-boosting test placed in features folder instead of unit folder
+
+## Failed At
+
+Structure review — adding a test for the `disconnect()` empty-patterns branch to reach 100% coverage.
+
+## Root Cause
+
+The reviewer identified that `connection.py:82` (`return []` when `has_section()` is True but `get_patterns()` returns empty) was not covered by any BDD example. Instead of flagging this as a gap that requires either a new BDD example (if the behavior is user-facing) or a unit test in `tests/unit/` (if it's an implementation branch), the SE added a test function directly to `tests/features/smith_commands/disconnect_test.py`.
+
+The `tests/features/` folder is exclusively for BDD scenario tests that trace back to `@id` tags in the `.feature` file. The new test `test_smith_commands_disconnect_empty_patterns` has no corresponding `@id` tag in the feature file, violating the traceability contract.
+
+## Missed Gate
+
+The TDD skill and structure review skill both require that feature tests correspond to BDD examples. Coverage-boosting tests that exercise implementation branches not covered by BDD examples belong in `tests/unit/`, not `tests/features/`.
+
+## Fix
+
+1. Move `test_smith_commands_disconnect_empty_patterns` from `tests/features/smith_commands/disconnect_test.py` to `tests/unit/domain/test_connection.py` (or a new unit test file for Connection).
+2. Remove the BDD-style docstring (Given/When/Then) since it's a unit test, not a feature test.
+3. Write the test as a plain unit test with a descriptive function name.
+4. Ensure the test still covers the `connection.py:82` branch for 100% coverage.
+
+## Restart Check
+
+- [ ] No tests in `tests/features/` lack a corresponding `@id` tag in the feature file
+- [ ] Coverage-boosting tests are in `tests/unit/`, not `tests/features/`
+- [ ] Feature tests use BDD-style docstrings with `@id` tags; unit tests use plain descriptive names
\ No newline at end of file
diff --git a/docs/post-mortem/PM_20260501_missing-feature-test-template.md b/docs/post-mortem/PM_20260501_missing-feature-test-template.md
new file mode 100644
index 0000000..a8793a6
--- /dev/null
+++ b/docs/post-mortem/PM_20260501_missing-feature-test-template.md
@@ -0,0 +1,39 @@
+# PM_20260501_missing-feature-test-template: No test stub template for BDD feature scenarios
+
+## Failed At
+
+project-structuring — SA generated test stubs using `...` ellipsis bodies and carried MoSCoW tags into docstrings, instead of the established `@pytest.mark.skip(reason="not yet implemented")` pattern.
+
+## Root Cause
+
+The `.templates/` directory has no template for feature BDD test stubs. The `structure-project` skill references design artifacts (feature file, technical design, domain model) but provides no test stub format to follow. Without a template, the agent invented its own conventions:
+
+1. **`...` ellipsis bodies** instead of `@pytest.mark.skip(reason="not yet implemented")` — silently passing tests instead of being explicitly skipped
+2. **MoSCoW tags (`Must`/`Should`) in `@id` lines** — the feature file had `@id:xxx Must` which leaked into test docstrings
+3. **Naming convention** — `test_smith_commands_<id>` used instead of `test_<feature_slug>_<id>`
+
+## Missed Gate
+
+The `stubs_traceable` condition checks that all `@id` tags have corresponding test stubs, but does not validate:
+- Whether stubs use the correct skip pattern
+- Whether docstrings contain extraneous content (MoSCoW tags)
+- Whether the naming convention matches project standards
+
+## Fix
+
+1. **Add template** `.templates/tests/features/<feature_slug>/<rule_slug>_test.py.template` with the canonical format:
+```python
+import pytest
+
+@pytest.mark.skip(reason="not yet implemented")
+def test_<feature_slug>_<@id>() -> None:
+    """
+    <@id steps raw text including new lines>
+    """
+```
+2. **Update `structure-project` skill** to reference the new template when generating test stubs.
+3. **Update `stubs_traceable` condition** to validate stub format (skip decorator, no MoSCoW in docstrings, naming convention).
+
+## Restart Check
+
+SA verifies that all test stubs use `@pytest.mark.skip(reason="not yet implemented")`, have no MoSCoW tags in docstrings, and follow the `test_<feature_slug>_<id>` naming convention.
\ No newline at end of file
diff --git a/docs/post-mortem/PM_20260501_missing-overwrite-flag.md b/docs/post-mortem/PM_20260501_missing-overwrite-flag.md
new file mode 100644
index 0000000..e38049f
--- /dev/null
+++ b/docs/post-mortem/PM_20260501_missing-overwrite-flag.md
@@ -0,0 +1,54 @@
+# PM_20260501_missing-overwrite-flag: --overwrite CLI flag not implemented despite Must-priority BDD examples
+
+## Failed At
+
+Development — TDD cycle (green phase). The `--overwrite` flag was present in the interview notes, feature spec, domain model, and technical design, but never implemented in `cli.py` or `Connection.connect()`.
+
+## Root Cause
+
+The `--overwrite` flag was specified at **every planning stage** but dropped during implementation:
+
+1. **Interview notes** (IN_20260501_smith-commands-specification.md): Q6 "Destructive overwrites without explicit `--overwrite` flag" listed as a failure mode. Q10 explicitly describes `--overwrite` behavior. Q13 confirms "refuse to overwrite unless `--overwrite` is passed."
+
+2. **Feature spec** (smith-commands.feature): Two Must-priority examples reference it — `@id:2a5f83d0` and `@id:7d22e1d6`, both using `smith connect --overwrite`.
+
+3. **Domain model** (domain_model.md line 23): `ConnectionRequested` event includes `[--overwrite]` in the command signature.
+
+4. **Technical design** (technical_design.md lines 93, 101, 617, 624, 650): `--overwrite` is documented as a CLI flag, a configuration key, and part of the safety invariant.
+
+5. **Implementation**: **Missing entirely.** `cli.py` has no `--overwrite` argument. `Connection.connect()` has no `overwrite` parameter. `Connection._resolve_specs()` has the domain logic for skipping user-tracked files, but the flag to bypass it was never wired through.
+
+The failure occurred at the **TDD green phase** — when writing the minimum production code to make failing tests pass, the `--overwrite` flag was never added because no test exercised the CLI handler with the `--overwrite` argument. All tests for examples `2a5f83d0` and `7d22e1d6` tested the domain logic (`Connection._resolve_specs`) through in-memory stubs, which validated the skip/overwrite behavior in isolation but never verified the CLI-to-domain wiring.
+
+## Missed Gate
+
+**Structure review** — the review verified test coverage and BDD example pass rate, but did not trace each `@id` example from CLI invocation through to domain behavior. The gate checked "does a test function exist for each `@id`" but not "does each `@id` that references a CLI flag actually test that the flag reaches the domain layer."
+
+Additionally, the **definition-of-done** gate (if it was applied) should have verified that the technical design spec's CLI interface section matches the actual `cli.py` argument parser. The spec lists `smith connect [--from <source>] [--overwrite]` but the parser only has `--from`.
+
+## Stage-by-Stage Trace
+
+| Stage | `--overwrite` Present? | Gap |
+|-------|----------------------|-----|
+| Interview (IN_20260501) | Yes — Q6, Q10, Q13, QA2 | None |
+| Feature spec (smith-commands.feature) | Yes — `@id:2a5f83d0`, `@id:7d22e1d6` | None |
+| Domain model | Yes — `ConnectionRequested` event | None |
+| Technical design | Yes — CLI interface, config keys, safety invariant | None |
+| TDD green phase | **No** — `cli.py` and `Connection.connect()` never received the flag | **Dropped here** |
+| Structure review | Not checked — no CLI-to-domain traceability gate | **Missed here** |
+
+## Fix
+
+1. Add `--overwrite` argument to the `connect_parser` in `cli.py`
+2. Wire `--overwrite` through `handle_connect` to `Connection.connect()`
+3. Update `Connection.connect()` to accept an `overwrite` parameter that bypasses the skip-user-tracked-files logic
+4. Write CLI-level integration tests for `smith connect --overwrite`
+5. Add a review gate that verifies every `@id` example in the feature spec traces to a CLI handler test (not just a domain-layer test)
+6. Add a definition-of-done check that compares the technical design's CLI interface section against the actual argument parser
+
+## Restart Check
+
+- Verify `smith connect --overwrite` works end-to-end from CLI through domain layer
+- Verify BDD examples `2a5f83d0` and `7d22e1d6` pass as CLI integration tests
+- Verify `smith status` suggests `--overwrite` in the partial connection message
+- Run `task lint && task static-check && task test` and confirm 0 errors
\ No newline at end of file
diff --git a/docs/post-mortem/PM_20260501_moscow-gherkin-tags.md b/docs/post-mortem/PM_20260501_moscow-gherkin-tags.md
new file mode 100644
index 0000000..e166c25
--- /dev/null
+++ b/docs/post-mortem/PM_20260501_moscow-gherkin-tags.md
@@ -0,0 +1,23 @@
+# PM_20260501_moscow-gherkin-tags: MoSCoW priority injected into Gherkin @id tags
+
+## Failed At
+
+bdd-features — stakeholder: "why should must etc [be] in the top of the examples?"
+
+## Root Cause
+
+The `moscow.md` knowledge file instructs the PO to "classify each candidate Example as Must/Should/Could" but doesn't specify where to record the classification. The `feature.feature.template` has no field for MoSCoW priority. The agent conflated classification (an internal triage step) with Gherkin output, appending MoSCoW labels to the `@id` tag line (`@id:SC-001 Must`) and later as separate tags (`@must @id:...`), neither of which belongs in the feature file.
+
+## Missed Gate
+
+The `write-bdd-features` skill loads `[[requirements/moscow]]` and `[[requirements/gherkin]]` but neither document states where MoSCoW classification should be recorded or that it should NOT appear in the .feature file. The skill instructions say "Classify each Example per [[requirements/moscow]]" without specifying the output location. The template's `@id:<unique-id>` format gives no hint about priority tagging.
+
+## Fix
+
+1. **`moscow.md`**: Add a note that MoSCoW classification is for internal triage only and must NOT appear as Gherkin tags or in the .feature file. Priority can be tracked in stories.md or a separate planning artifact.
+2. **`write-bdd-features` skill (SKILL.md)**: Clarify step 4 — "Classify each Example per [[requirements/moscow]]" → add "Record classification in stories.md; do NOT add MoSCoW tags to Examples in the .feature file."
+3. **`feature.feature.template`**: Add a comment in the template that `@id:` tags are for traceability only, not for priority classification.
+
+## Restart Check
+
+SA verifies that no `.feature` file contains `@must`, `@should`, `@could`, or MoSCoW labels on `@id` lines.
\ No newline at end of file
diff --git a/docs/post-mortem/PM_20260501_reviewer-fixing-code.md b/docs/post-mortem/PM_20260501_reviewer-fixing-code.md
new file mode 100644
index 0000000..0e0aad4
--- /dev/null
+++ b/docs/post-mortem/PM_20260501_reviewer-fixing-code.md
@@ -0,0 +1,41 @@
+# PM_20260501_reviewer-fixing-code: Reviewer fixing code instead of rejecting and routing back to TDD
+
+## Failed At
+
+review-gate (Design/Structure/Conventions review) — stakeholder: "Why are reviewers not done properly? Why is R fixing code instead of moving the state back to TDD with a description of what needs to be changed?"
+
+## Root Cause
+
+Three process violations occurred simultaneously, all stemming from conflating the reviewer role with the implementer role:
+
+1. **Reviewer approved despite code smells**: The design review passed on checks 4-9 (Object Calisthenics, code smells, pattern gaps) with "minor" or "acceptable trade-off" verdicts for issues that should have been REJECTED — specifically: union return type in `_filter_conflicts`, duplicated logic between `_filter_conflicts`/`_skip_unmanaged`, dead `detect_state()` method, unreachable ConflictReport path, unused `project_dir` parameter, and two duplicated write-and-commit blocks in `connect()`.
+
+2. **Fixes applied by orchestrator instead of routing back to TDD**: When the first design review REJECTED on `.smith.yaml` inconsistency, the orchestrator fixed `technical_design.md`, `system.md`, `glossary.md`, and ADR-004 directly, then re-ran the review. This should have been: REJECT → route back to TDD cycle with findings → SE implements fixes → re-review.
+
+3. **Conventions review bypassed the flow**: Lint errors (ruff) and type errors (pyright) were fixed directly by the orchestrator instead of being treated as review findings. The conventions review should have REJECTED with a list of violations, then routed back for the SE to fix them in a TDD cycle.
+
+## Missed Gate
+
+The **review-gate** state in the development flow. The flow defines three review sub-gates (Design, Structure, Conventions). Each should produce either APPROVED or REJECTED. On REJECTED, the flow should transition back to the TDD cycle with specific findings — not apply fixes inline.
+
+The reviewer's job is to find problems and report them. The SE's job is to fix them. The orchestrator conflated these by having the reviewer/subagent report findings, then immediately fixing them itself before re-running the review.
+
+## Fix
+
+1. **Reviewer MUST NOT modify production code or tests.** The reviewer's output contract is findings only — a REJECTED report with file:line evidence or an APPROVED verdict. No edits.
+
+2. **On REJECTED, route back to TDD cycle** with the specific findings as input. The SE (or orchestrator acting as SE) picks up the findings, implements fixes, re-runs tests, then re-enters the review gate.
+
+3. **"Minor" is not a pass.** Code smells that are acknowledged but hand-waved as "acceptable trade-offs" should still be flagged. The reviewer should note them; the SE decides whether to fix or defer. Deferring requires explicit acknowledgment, not silent approval.
+
+4. **Spec doc fixes are still code changes.** When a review finds that spec docs are inconsistent with implementation, the fix is: REJECT → route back to the appropriate flow state (e.g., technical-design or adr-draft) → that state's owner applies the fix → re-review. The orchestrator should not fix docs on behalf of another state's owner.
+
+5. **Lint/type errors are review findings, not auto-fix opportunities.** Running `ruff --fix` or manually fixing lint errors during review is the SE's job, not the reviewer's. The conventions review should report violations; the SE fixes them in the next TDD cycle.
+
+## Restart Check
+
+SA verifies that:
+- [ ] All three review sub-gates produce APPROVED/REJECTED verdicts without modifying any files
+- [ ] On REJECTED, the flow transitions back to TDD with a findings document
+- [ ] No code or spec doc changes are made during the review-gate state
+- [ ] Code smells are explicitly listed in findings rather than dismissed as "minor"
diff --git a/docs/post-mortem/PM_20260501_se-dirtying-living-docs.md b/docs/post-mortem/PM_20260501_se-dirtying-living-docs.md
new file mode 100644
index 0000000..d969291
--- /dev/null
+++ b/docs/post-mortem/PM_20260501_se-dirtying-living-docs.md
@@ -0,0 +1,27 @@
+# PM_20260501/se-dirtying-living-docs: SE modified spec documents during TDD/review cycle without flow approval
+
+## Failed At
+
+Design review (passes 3–8) — the SE directly modified living specification documents (domain_model.md, technical_design.md, glossary.md, product_definition.md, system.md, context_map.md, feature file, ADRs) to fix inconsistencies found by the reviewer, without routing those changes through the appropriate flow states.
+
+## Root Cause
+
+The review-design skill has no instruction to distinguish between production code fixes (which the SE can make directly) and specification document fixes (which belong to different flow states and require approval). When the reviewer found cross-document inconsistencies, the SE treated spec docs the same as code — direct edit during the review cycle.
+
+This violates the flow contract: spec documents are owned by specific states (architecture-assessment owns domain_model.md, technical-design owns technical_design.md, etc.). The development flow has no state that owns spec docs, so the SE has no authority to modify them.
+
+## Missed Gate
+
+The review-design skill says: "IF a smell is found → list it in findings" and "Write results to artifacts listed in the current state's out attrs. If findings affect artifacts outside the output contract, flag them in output notes for the appropriate step." The skill already instructs the SE to **flag** out-of-contract changes, not **make** them. The orchestrator ignored this instruction.
+
+## Fix
+
+1. **Process rule:** During TDD/review, the SE may ONLY modify production code and test code. Spec document inconsistencies must be FLAGGED in review output notes, not fixed directly.
+2. **Review-design skill update:** Add an explicit rule: "NEVER modify specification documents (domain_model.md, technical_design.md, glossary.md, product_definition.md, system.md, context_map.md, ADRs, feature files) during review. These are owned by other flow states. Flag inconsistencies in output notes for the appropriate step."
+3. **Flow mechanism:** When the reviewer flags spec doc inconsistencies, the orchestrator should create a separate issue/task to route those fixes through the appropriate flow state, rather than fixing them inline during development.
+
+## Restart Check
+
+- [ ] Review output notes contain flagged spec doc inconsistencies instead of inline fixes
+- [ ] No spec documents are modified during the TDD/review cycle
+- [ ] Spec doc fixes are routed through the appropriate flow state (architecture, planning, etc.)
\ No newline at end of file
diff --git a/docs/research/computer-science/artificial-intelligence/liu_et_al_2023.md b/docs/research/computer-science/artificial-intelligence/liu_et_al_2023.md
new file mode 100644
index 0000000..8ae77e7
--- /dev/null
+++ b/docs/research/computer-science/artificial-intelligence/liu_et_al_2023.md
@@ -0,0 +1,45 @@
+# Lost in the Middle (Positional Attention Degradation) — Liu et al., 2023
+
+## Citation
+
+Liu, N. F., Lin, K., Hewitt, J., Paranjape, A., Bevilacqua, M., Petroni, F., & Liang, P. (2023). "Lost in the Middle: How Language Models Use Long Contexts." *Transactions of the Association for Computational Linguistics (TACL)*, arXiv preprint arXiv:2307.03172.
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Experiment
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Language models exhibit U-shaped attention pattern - information at beginning and end of long context receives significantly more attention than middle content.
+
+## Core Findings
+
+1. **U-Shaped Performance Pattern**: Performance highest when relevant information occurs at beginning or end of input context, degrades significantly in middle positions
+2. **Multi-Document QA Results**: Even explicitly long-context models struggle to access information in middle of long contexts
+3. **Key-Value Retrieval Degradation**: Performance drops substantially when target information positioned in middle sections
+4. **Primacy/Recency Effects**: Beginning benefits from setting attention baseline, end benefits from proximity to output position
+5. **Context Length Impact**: Performance degradation becomes more pronounced as context length increases
+
+## Mechanism
+
+Transformer attention patterns distribute weight unevenly across sequence positions. Beginning content benefits from primacy effects (first tokens establish attention baseline), end content benefits from recency effects (proximity to output). Middle content competes with both extremes and receives proportionally less attention weight, causing information retrieval failures.
+
+## Relevance
+
+Critical for long-context AI applications, prompt engineering strategies, context window utilization. Essential for understanding attention limitations in large language models, designing effective retrieval-augmented generation systems, optimizing document processing workflows.
+
+## Related Research
+
+Published in TACL 2023, builds on transformer attention mechanisms research. Authors include Nelson Liu, John Hewitt (Stanford), Percy Liang (Stanford). Connects to attention analysis, long-context modeling, retrieval-augmented generation literature. Foundational for understanding positional biases in modern language models.
diff --git a/docs/research/design/accessibility/w3c_wcag21_2018.md b/docs/research/design/accessibility/w3c_wcag21_2018.md
new file mode 100644
index 0000000..372c03e
--- /dev/null
+++ b/docs/research/design/accessibility/w3c_wcag21_2018.md
@@ -0,0 +1,48 @@
+# Web Content Accessibility Guidelines 2.1 — W3C, 2018
+
+## Citation
+
+W3C. (2018). *Web Content Accessibility Guidelines (WCAG) 2.1*, W3C Recommendation 5 June 2018. https://www.w3.org/TR/WCAG21/
+
+## Source Type
+
+Industry Standard
+
+## Method
+
+Specification
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Text and images of text must have a contrast ratio of at least 4.5:1 against their background (Level AA) to ensure readability for users with visual impairments, including low vision and color blindness.
+
+## Core Findings
+
+1. **Contrast requirements**: Normal text requires 4.5:1 contrast (AA) or 7:1 (AAA); large text (18pt+ or 14pt+ bold) requires 3:1 (AA) or 4.5:1 (AAA).
+2. **Four principles framework**: Web content must be perceivable, operable, understandable, and robust (POUR).
+3. **Three conformance levels**: A (minimum), AA (standard target), AAA (enhanced) with 61 success criteria total in WCAG 2.1.
+4. **Legal adoption worldwide**: WCAG 2.1 AA is legally mandated by EU Web Accessibility Directive, US Section 508, and many national laws.
+5. **Calculation formula**: Contrast ratio = (L1 + 0.05) / (L2 + 0.05) where L1 is lighter color's relative luminance, L2 is darker.
+6. **Exemptions**: Incidental text, logotypes, and inactive UI components are exempt from contrast requirements.
+7. **New 2.1 criteria**: Added 17 new success criteria focused on mobile accessibility, low vision, and cognitive disabilities.
+
+## Mechanism
+
+Relative luminance is computed from sRGB values via gamma correction: for each RGB channel, divide by 255, then apply gamma function (≤ 0.04045 divide by 12.92; > 0.04045 use ((V + 0.055) / 1.055)^2.4), then combine as L = 0.2126×R + 0.7152×G + 0.0722×B. The contrast ratio formula produces values from 1:1 (identical colors) to 21:1 (black on white). This mathematical approach ensures consistent, measurable accessibility standards.
+
+## Relevance
+
+Essential standard for web accessibility compliance and legal requirements globally. Critical for brand palette design, interface color systems, and any digital content requiring inclusive design. Directly applicable to color contrast validation, automated accessibility testing, and design system documentation. Required for government, education, and increasingly private sector websites.
+
+## Related Research
+
+- (Albers, 1963) — Color theory foundations showing why contrast relationships matter more than absolute colors
+- (EN 301 549, 2014) — European standard incorporating WCAG requirements for ICT accessibility
\ No newline at end of file
diff --git a/docs/research/design/visual/airey_2010.md b/docs/research/design/visual/airey_2010.md
new file mode 100644
index 0000000..34f349b
--- /dev/null
+++ b/docs/research/design/visual/airey_2010.md
@@ -0,0 +1,48 @@
+# Logo Design Love — Airey, 2010
+
+## Citation
+
+Airey, D. (2010). *Logo Design Love: A Guide to Creating Iconic Brand Identities*. New Riders. ISBN 978-0-321-66436-3.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Strong logos have one dominant feature — not two, three, or four. The design process must be monochrome-first to focus on the core idea.
+
+## Core Findings
+
+1. **Single dominant feature rule**: Effective logos have exactly one memorable element that viewers can identify and recall.
+2. **Monochrome-first methodology**: Color should be added only after the black-and-white shape proves its strength and recognition.
+3. **Systematic stress-testing**: Five evaluation checkpoints ensure logo robustness across contexts: 5-second test, blur test, scalability test, monochrome test, proximity test.
+4. **Written brief requirement**: No design work should proceed without documented project parameters and goals.
+5. **Iterative refinement process**: 20-40 rough concepts narrowed to 3-5 for vector refinement before color application.
+6. **Appropriateness over novelty**: Logo style should match the brand's personality and context rather than following design trends.
+7. **Scalability imperative**: Logos must work effectively from 16px favicon size to 500px+ display applications.
+
+## Mechanism
+
+Airey's process follows explicit checkpoints: (1) written brief before sketching, (2) 20-40 rough concepts in black ink on paper, (3) select top 3-5 and refine in vector format, (4) add color only after monochrome shape proves strong, (5) stress-test across sizes and backgrounds. The "one thing" test eliminates designs with competing elements. Each evaluation method targets specific failure modes: blur test catches detail-dependent designs, scalability test reveals breakdown points, proximity test ensures differentiation from competitors.
+
+## Relevance
+
+Essential methodology for brand identity design, startup logo creation, and visual identity systems. The stress-testing framework applies to any visual design requiring recognition across contexts. Monochrome-first principle prevents common failures in digital interfaces where color may not be available (accessibility, printing, small sizes).
+
+## Related Research
+
+- (Rand, 1985) — Foundational principles of logo simplicity and testing methods
+- (Wertheimer, 1923) — Gestalt principles underlying logo recognition and memorability
\ No newline at end of file
diff --git a/docs/research/design/visual/albers_1963.md b/docs/research/design/visual/albers_1963.md
new file mode 100644
index 0000000..ebd30cb
--- /dev/null
+++ b/docs/research/design/visual/albers_1963.md
@@ -0,0 +1,48 @@
+# Interaction of Color — Albers, 1963
+
+## Citation
+
+Albers, J. (1963). *Interaction of Color*. Yale University Press. ISBN 978-0-300-01846-8.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Observational
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Color is the most relative medium in art — the same physical color is perceived differently depending on its surrounding colors and the quantity in which it appears.
+
+## Core Findings
+
+1. **Color relativity principle**: "Every perception of color is an illusion... we do not see colors as they really are. In our perception they alter one another."
+2. **Context-dependent perception**: The same physical color appears dramatically different when surrounded by different colors or used in different proportions.
+3. **Experiential learning approach**: Color is best studied through hands-on experimentation and observation rather than theoretical study alone.
+4. **"Making one color appear as two"**: Albers' key exercise demonstrates that color perception is determined by relationships, not absolute values.
+5. **Systematic color exploration**: The "Homage to the Square" series methodically explored chromatic interactions with nested squares across hundreds of works.
+6. **Pedagogical methodology**: Prioritized experience over theory - "what counts is not so-called knowledge of so-called facts, but vision — seeing."
+7. **Brand design implications**: Colors must be defined as relationships (contrast ratios, visual weight proportions) rather than fixed values applied without context.
+
+## Mechanism
+
+Albers' key exercise — making one color appear as two different colors by changing its surroundings — proves that color perception is determined by relationships, not absolute values. This has direct implications for brand design: an accent color that reads clearly on white may appear muddy on dark backgrounds, not because the accent changed, but because its relationship to the background changed. The actionable rule is to define brand colors as relationships (primary is always N× the visual weight of accent; text maintains ≥4.5:1 contrast) rather than fixed hex values applied without context.
+
+## Relevance
+
+Foundational for brand palette design, interface color systems, and accessibility guidelines. Essential for understanding why colors must be tested in context rather than isolation. Critical for digital design where the same color appears across various backgrounds, screen types, and lighting conditions. Directly applies to contrast ratios, color accessibility standards, and responsive design systems.
+
+## Related Research
+
+- (Land, 1977) — Retinex theory of color constancy challenging some of Albers' assumptions
+- (Jameson, 1985) — Defense of Albers' pigment-based approach vs. theoretical color mixing
\ No newline at end of file
diff --git a/docs/research/design/visual/arnheim_1954.md b/docs/research/design/visual/arnheim_1954.md
new file mode 100644
index 0000000..cff6ac8
--- /dev/null
+++ b/docs/research/design/visual/arnheim_1954.md
@@ -0,0 +1,48 @@
+# Art and Visual Perception — Arnheim, 1954
+
+## Citation
+
+Arnheim, R. (1954). *Art and Visual Perception: A Psychology of the Creative Eye*. University of California Press. (Revised edition 1974, ISBN 978-0-520-02623-5.)
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Observational
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Visual shapes carry innate psychological meaning that is perceived instantly, before conscious thought. Circles are perceived as soft, unified, and complete; squares as stable, solid, and rational; triangles as dynamic, directional, and energetic.
+
+## Core Findings
+
+1. **Gestalt-based shape psychology**: Visual forms carry inherent meaning derived from their structural properties, not arbitrary cultural conventions.
+2. **Circle associations**: Perceived as soft, unified, complete due to lack of edges and continuous form.
+3. **Square associations**: Perceived as stable, solid, rational due to broad base and balanced structure.
+4. **Triangle associations**: Perceived as dynamic, directional, energetic due to pointed direction and asymmetry.
+5. **Visual force fields**: Off-center placement creates tension; angled placement adds dynamism by disrupting expected stability.
+6. **Compositional principles**: Visual forces within compositions create perceived tension or calm based on geometric relationships.
+7. **Simplicity rule**: The simplest geometric form expressing intended meaning is the most effective design solution.
+
+## Mechanism
+
+Arnheim demonstrated that shape perception follows Gestalt principles where "visual forces" within compositions create perceived tension or calm. A circle placed off-center creates visual tension because symmetry demands centering. A square at an angle creates dynamism by disrupting expected stability. Complex shapes combine meanings of their geometric primitives - reducing logos to geometric components reveals whether shape language is coherent or contradictory.
+
+## Relevance
+
+Foundational for logo design, visual identity systems, and interface design. Essential for understanding how geometric forms communicate brand personality and user expectations. Applied in icon design, where shape psychology determines immediate recognition and emotional response. Critical for any visual communication requiring instant psychological impact.
+
+## Related Research
+
+- (Wertheimer, 1923) — Gestalt principles underlying visual perception
+- (Rand, 1985) — Practical application of simplicity principles in logo design
\ No newline at end of file
diff --git a/docs/research/design/visual/biederman_1987.md b/docs/research/design/visual/biederman_1987.md
new file mode 100644
index 0000000..a728d37
--- /dev/null
+++ b/docs/research/design/visual/biederman_1987.md
@@ -0,0 +1,48 @@
+# Recognition-by-Components — Biederman, 1987
+
+## Citation
+
+Biederman, I. (1987). "Recognition-by-Components: A Theory of Human Image Understanding." *Psychological Review*, 94(2), 115–147. https://doi.org/10.1037/0033-295X.94.2.115
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Experiment
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Humans recognize objects by decomposing them into simple geometric components called "geons" (geometric ions). Line drawings containing only essential edges are recognized as quickly as full-color photographs, proving that edge information alone is sufficient for rapid object recognition.
+
+## Core Findings
+
+1. **Geon-based recognition**: Approximately 36 basic 3-dimensional shapes (geons) can be combined to describe virtually all common objects we encounter.
+2. **Edge primacy**: Line drawings with critical edges present are recognized as quickly and accurately as full-color photographs of the same objects.
+3. **Viewpoint invariance**: Objects can be recognized from almost any viewing angle due to invariant edge properties of geons (curvature, parallel lines, co-termination, symmetry, co-linearity).
+4. **Combinatorial power**: With just 24 geons, there are 306 billion possible combinations of 3 geons, allowing recognition of virtually unlimited objects.
+5. **Speech analogy**: Just as ~44 phonemes create all English words, ~36 geons create all recognizable objects through systematic combination.
+6. **Early development**: Geon recognition develops in infants as early as 4 months old, making it one of fundamental perceptual skills.
+7. **Noise resistance**: Objects remain recognizable despite visual noise provided the constituent geons are visible.
+
+## Mechanism
+
+Biederman's experiments showed that recognition speed and accuracy were nearly identical for line drawings and full-color photographs of the same objects, provided critical edges were present. The visual system extracts edge-based structural descriptions (geon assemblies) as the primary recognition pathway, with color and texture serving only as secondary confirmation. For logo design, this provides perceptual science basis for monochrome-first design: if edges/silhouette carry the recognition signal, color and detail are secondary and can be added later without affecting core identifiability.
+
+## Relevance
+
+Foundational theory for understanding how visual recognition works, with direct applications to logo design, icon design, and visual identity systems. Provides scientific justification for monochrome-first design approaches and edge-based recognition. Essential for creating visual marks that work across scales, contexts, and viewing conditions. Critical for understanding why simple geometric shapes are most effective for brand symbols.
+
+## Related Research
+
+- (Wertheimer, 1923) — Gestalt principles that complement geon-based recognition
+- (Kare, 1984) — Practical application of edge-based recognition in icon design
\ No newline at end of file
diff --git a/docs/research/design/visual/hicks_2011.md b/docs/research/design/visual/hicks_2011.md
new file mode 100644
index 0000000..1fafcb7
--- /dev/null
+++ b/docs/research/design/visual/hicks_2011.md
@@ -0,0 +1,47 @@
+# The Icon Handbook — Hicks, 2011
+
+## Citation
+
+Hicks, J. (2011). *The Icon Handbook*. Five Simple Steps. ISBN 978-1-907828-00-3.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Icons must be designed at the smallest target size first and then scaled up, adding detail only at larger sizes.
+
+## Core Findings
+
+1. **Progressive simplification methodology**: Create separate pixel-perfect versions at each target size tier (16px, 24px, 32px, 48px, 128px, 256px, 512px) rather than scaling a single vector.
+2. **Size-tier optimization**: At each tier, remove details that cannot be rendered at that resolution and exaggerate key features for clarity.
+3. **Visual acuity limitations**: At 16×16 pixels, fine lines (under 2px), subtle gradients, and interior details vanish or create visual noise.
+4. **Industry standard practice**: Progressive simplification is the standard methodology used by professional icon designers including major software companies.
+5. **Firefox and Skype precedent**: Hicks' work on major brand icons demonstrates the effectiveness of this approach at scale.
+6. **Downscaling failure**: Simply downscaling a 512px icon to 16px produces a muddy, unrecognizable mark that fails usability tests.
+
+## Mechanism
+
+Hicks' tier system works because human visual acuity is finite. At 16×16 pixels, fine lines (under 2px), subtle gradients, and interior details vanish or create visual noise. At 512×512, those same details add richness. Progressive simplification acknowledges this by treating each size tier as a distinct design problem: the 16px version is a hand-optimized silhouette, the 32px version may add one key interior detail, the 128px version adds secondary features, and the 512px version is the full design.
+
+## Relevance
+
+Essential methodology for any icon system, mobile app design, or interface requiring icons at multiple resolutions. Critical for responsive design systems, desktop applications, and brand identity systems that must work across various scales and contexts. Directly applicable to favicon design, app icon creation, and UI iconography.
+
+## Related Research
+
+- (Wertheimer, 1923) — Gestalt principles that inform icon recognition at small sizes
+- (Miller, 1956) — Cognitive load implications of visual complexity in small-scale graphics
\ No newline at end of file
diff --git a/docs/research/design/visual/itten_1961.md b/docs/research/design/visual/itten_1961.md
new file mode 100644
index 0000000..47cb222
--- /dev/null
+++ b/docs/research/design/visual/itten_1961.md
@@ -0,0 +1,48 @@
+# The Art of Color — Itten, 1961
+
+## Citation
+
+Itten, J. (1961). *The Art of Color: The Subjective Experience and Objective Rationale of Color*. Reinhold Publishing. ISBN 978-0-442-24037-6.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Observational
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Color interaction is relational — the same hue appears different depending on its neighbors. Itten's seven contrast types provide a systematic framework for predicting how colors will read in context.
+
+## Core Findings
+
+1. **Seven color contrasts framework**: (1) contrast of hue, (2) light-dark contrast, (3) cold-warm contrast, (4) complementary contrast, (5) simultaneous contrast, (6) contrast of saturation, (7) contrast of extension (proportion).
+2. **Relational color perception**: Same physical color appears different depending on surrounding colors and proportions - colors cannot be judged in isolation.
+3. **Light-dark contrast supremacy**: The strongest contrast foundation - creates clarity, drama, and legibility. Forms basis of accessibility guidelines (4.5:1 contrast ratios).
+4. **Complementary contrast power**: Opposite hues create maximum visual tension and vibrancy, but risk uncomfortable vibration at similar saturation levels.
+5. **Simultaneous contrast effect**: Neutral colors shift toward the complement of their background (grey on red appears greenish).
+6. **Proportional color weight**: Visual weight depends on area and inherent brightness - yellow is "heavier" than equal-area violet (proportion ratios: yellow:violet ≈ 1:3).
+7. **Contextual color testing requirement**: Brand colors must be evaluated in actual usage contexts, not in isolation.
+
+## Mechanism
+
+Itten identified seven contrasts as foundation of color composition. The most actionable for brand design: **complementary contrast** creates maximum visual tension between opposite hues; **simultaneous contrast** makes neutrals shift toward complement of their background. Each contrast produces different emotional effects from calm (analogous, low contrast) to vibrant (complementary, high contrast). Colors must be defined as relationships rather than absolute values.
+
+## Relevance
+
+Foundational for brand palette design, interface color systems, and accessibility guidelines. Essential for understanding why colors must be tested in context rather than isolation. Directly applies to contrast ratios, WCAG accessibility standards, and responsive design systems where colors appear across various backgrounds and contexts.
+
+## Related Research
+
+- (Albers, 1963) — Further development of color interaction principles focusing on pigment-based relationships
+- (Chevreul, 1839) — Early simultaneous contrast observations that influenced Itten's work
\ No newline at end of file
diff --git a/docs/research/design/visual/kare_1984.md b/docs/research/design/visual/kare_1984.md
new file mode 100644
index 0000000..768e355
--- /dev/null
+++ b/docs/research/design/visual/kare_1984.md
@@ -0,0 +1,47 @@
+# Macintosh Icon Design — Kare, 1984
+
+## Citation
+
+Kare, S. (1984). Original Macintosh icon designs. Museum of Modern Art (MoMA), New York. Documented in: Kindy, D. (2019). "How Susan Kare Designed User-Friendly Icons for the First Macintosh." *Smithsonian Magazine*.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Icons designed at the smallest target size first, on a pixel grid, are more recognizable and enduring than icons designed at large size and scaled down.
+
+## Core Findings
+
+1. **Pixel-perfect methodology**: Kare designed all original Macintosh icons on a 32×32 pixel grid using graph paper, working at exact output resolution before touching software.
+2. **"Favicon-first" principle**: If a mark cannot hold its identity at 16×16 or 32×32 pixels, it is not strong enough for any size.
+3. **Constraint-driven design**: At 32×32 (1,024 pixels, monochrome), there is no room for decoration — each pixel must contribute to recognition.
+4. **Enduring recognition**: Her icons remain instantly recognizable 40+ years later because every pixel carried meaning.
+5. **Metaphor-driven approach**: Icons were inspired by art history, Japanese logograms, hieroglyphics, and real-world objects to create instant recognition.
+6. **Progressive simplification**: Detail that does not read at the smallest size should not exist in the core design.
+
+## Mechanism
+
+Kare's methodology — designing on the smallest graph paper she could find, composing each icon at its actual pixel resolution before touching software — enforces progressive simplification by constraint. Working within severe technological constraints (32×32 pixels, monochrome) made her an early pioneer of pixel art, drawing from her fine art experience in mosaics, needlepoint, and pointillism. This "peculiar sort of minimal pointillism" required solving recognition problems with only horizontal, vertical, or 45-degree lines.
+
+## Relevance
+
+Foundational methodology for icon design across all digital interfaces. Established the principle that icons must work at their smallest intended size first. Essential for favicon design, mobile app icons, and any interface requiring recognizable symbols at multiple scales. Her approach directly influences modern icon design systems and responsive iconography.
+
+## Related Research
+
+- (Hicks, 2011) — Modern application of progressive simplification principles in "The Icon Handbook"
+- (Wertheimer, 1923) — Gestalt principles that inform instant icon recognition
\ No newline at end of file
diff --git a/docs/research/design/visual/lupton_2010.md b/docs/research/design/visual/lupton_2010.md
new file mode 100644
index 0000000..7eabe8d
--- /dev/null
+++ b/docs/research/design/visual/lupton_2010.md
@@ -0,0 +1,47 @@
+# Thinking with Type — Lupton, 2010
+
+## Citation
+
+Lupton, E. (2010). *Thinking with Type: A Critical Guide for Designers, Writers, Editors, & Students* (2nd ed.). Princeton Architectural Press. ISBN 978-1-56898-969-3.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Observational
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Typography is a system of relationships — between letterforms, between text and space, between type and image. For branding, three typographic choices carry the most personality signal: serif vs sans-serif (traditional/authoritative vs modern/approachable), weight (light/delicate vs bold/assertive), and spacing/tracking (tight/urgent vs open/relaxed).
+
+## Core Findings
+
+1. **Serif vs. sans-serif psychology**: Serif typefaces (Times, Garamond, Georgia) carry centuries of association with authority, tradition, and print culture, while sans-serif faces (Helvetica, Futura, Inter) signal modernity, clarity, and directness.
+2. **Readability considerations**: Serifs create horizontal rhythm that guides the eye along lines, making serif text more readable at small sizes in long passages.
+3. **Logo scalability**: Sans-serif faces dominate in logo marks because they survive reduction to small sizes better than serifs — fine serif details become visual noise at favicon sizes.
+4. **Weight as personality lever**: A single typeface at light weight with generous tracking feels elegant and premium; the same typeface at bold weight with tight tracking feels urgent and powerful.
+5. **Spacing psychology**: Tight tracking conveys urgency and intensity, while generous tracking suggests luxury and thoughtfulness.
+6. **Systematic flexibility**: For brand systems, Lupton recommends sans-serif families with wide weight ranges (300-700) for maximum flexibility from single typeface choices.
+
+## Mechanism
+
+Typography functions as a visual language where formal qualities communicate before content is read. Serif typefaces leverage historical associations with authority and permanence, while their horizontal flow aids sustained reading. Sans-serif typefaces eliminate decorative elements to emphasize pure form and modernity. Weight and spacing act as volume controls for typographic voice — lighter weights whisper, bolder weights shout, tight spacing creates urgency, loose spacing creates calm. These relationships work pre-cognitively, making typographic choices powerful tools for brand personality expression.
+
+## Relevance
+
+Essential framework for brand identity design, web typography, and interface design. Critical for understanding how typographic choices communicate brand personality before users read content. Directly applicable to logo design, UI typography, marketing materials, and any system requiring consistent typographic voice across multiple touchpoints.
+
+## Related Research
+
+- (Wertheimer, 1923) — Gestalt principles that inform typographic hierarchy and spacing
+- (Bringhurst, 2004) — Classical typography principles and historical context
\ No newline at end of file
diff --git a/docs/research/design/visual/muller_brockmann_1981.md b/docs/research/design/visual/muller_brockmann_1981.md
new file mode 100644
index 0000000..c7b0a82
--- /dev/null
+++ b/docs/research/design/visual/muller_brockmann_1981.md
@@ -0,0 +1,47 @@
+# Grid Systems in Graphic Design — Müller-Brockmann, 1981
+
+## Citation
+
+Müller-Brockmann, J. (1981). *Grid Systems in Graphic Design: A Visual Communication Manual for Graphic Designers, Typographers, and Three Dimensional Designers* (4th ed.). Arthur Niggli. ISBN 978-3-7212-0145-1.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Observational
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+A grid provides the rational structure upon which creative work becomes legible, ordered, and balanced, producing visual order that the viewer perceives as clarity and competence even without consciously recognizing the grid.
+
+## Core Findings
+
+1. **Grid-based composition principles**: Grid systems create visual order that viewers subconsciously recognize as professional and trustworthy, even when they cannot identify the underlying structure.
+2. **Rule of thirds effectiveness**: The 3×3 grid creates four natural focal points at intersections where the eye is drawn preferentially over the center.
+3. **Golden ratio applications**: The golden ratio (1:1.618) produces naturally pleasing asymmetric balance with the "golden point" at roughly 62% from left, 38% from top.
+4. **Dynamic vs. static positioning**: Placing primary elements at intersection points creates dynamic tension; centered compositions create calm stability but risk appearing static.
+5. **Constraint liberation principle**: The grid constrains placement but liberates proportion - elements can be large/small, bold/light within the rational structure.
+6. **Swiss design methodology**: Müller-Brockmann's International Typographic Style demonstrated that systematic approaches produce more effective communication than intuitive placement.
+
+## Mechanism
+
+The rule of thirds divides a canvas into 9 equal zones (3 columns × 3 rows). The four intersections are natural focal points — the eye is drawn to them preferentially over the center. Placing the primary mark at an upper-third intersection and secondary elements along the lower third creates dynamic tension. A centered composition (mark at dead center) creates calm and stability but risks feeling static. Müller-Brockmann's key rule: the grid constrains placement but liberates proportion. Within a grid, elements can be large or small, bold or light — the grid ensures they relate to each other rationally. Without a grid, elements appear arbitrary and the composition feels disorganized regardless of individual element quality.
+
+## Relevance
+
+Foundational methodology for all layout design, logo positioning, and visual composition. Essential for creating professional, trustworthy visual communications across print, digital, and environmental design. Critical for brand identity systems, web design, publication design, and any visual communication requiring systematic organization and hierarchy.
+
+## Related Research
+
+- (Wertheimer, 1923) — Gestalt principles that inform grid-based visual organization
+- (Rand, 1985) — Application of grid principles to logo and identity design
\ No newline at end of file
diff --git a/docs/research/design/visual/rand_1985.md b/docs/research/design/visual/rand_1985.md
new file mode 100644
index 0000000..187733f
--- /dev/null
+++ b/docs/research/design/visual/rand_1985.md
@@ -0,0 +1,46 @@
+# A Designer's Art — Rand, 1985
+
+## Citation
+
+Rand, P. (1985). *Paul Rand: A Designer's Art*. Yale University Press. ISBN 978-0-300-03242-6.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+A logo cannot survive unless it is designed with the utmost simplicity and restraint.
+
+## Core Findings
+
+1. Strong logos must work in monochrome first — color is added only after the shape proves its identity in black and white.
+2. Logos should be tested by blurring and degrading them to verify they remain recognizable under poor conditions.
+3. "Ideas do not need to be esoteric to be original or exciting" — simplicity does not mean lack of sophistication.
+4. The most enduring corporate marks are those reducible to the fewest recognizable elements.
+5. A logo must function across all applications: small sizes, poor printing, different backgrounds, and various media.
+
+## Mechanism
+
+Rand's design process was explicitly monochrome-first: design in black and white, test under degraded conditions, then add color only when the shape is proven. The blur test applies Gaussian blur to probe whether the global silhouette carries recognition independent of detail. If the blurred mark remains identifiable, the shape is strong; if not, it relies too much on detail and will fail at small sizes or poor reproduction quality.
+
+## Relevance
+
+Foundational methodology for logo design and brand identity systems. The monochrome-first approach and stress-testing principles directly apply to creating robust visual identities that work across all contexts and scales. Essential for preventing late-stage failures when implementing brand systems.
+
+## Related Research
+
+- (Airey, 2010) — Contemporary application of Rand's principles in Logo Design Love
+- (Arnheim, 1954) — Psychological foundations of visual perception that inform Rand's approach
\ No newline at end of file
diff --git a/docs/research/design/visual/wertheimer_1923.md b/docs/research/design/visual/wertheimer_1923.md
new file mode 100644
index 0000000..a821b04
--- /dev/null
+++ b/docs/research/design/visual/wertheimer_1923.md
@@ -0,0 +1,48 @@
+# Laws of Organization in Perceptual Forms — Wertheimer, 1923
+
+## Citation
+
+Wertheimer, M. (1923). "Laws of Organization in Perceptual Forms." *Psychologische Forschung*, 4, 301–350. Translated in: Ellis, W.D. (ed.), *A Source Book of Gestalt Psychology*, 1938.
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Experiment
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+The human visual system automatically organizes visual elements into coherent groups using hardwired perceptual principles that operate pre-attentively, making them the most powerful tool for creating instantly recognizable visual designs.
+
+## Core Findings
+
+1. **Proximity principle**: Elements close together are perceived as belonging to one group - the strongest grouping force.
+2. **Similarity principle**: Elements sharing color, shape, or size are visually grouped together, even when scattered.
+3. **Closure principle**: The mind fills in gaps to perceive complete shapes from incomplete visual information.
+4. **Figure-ground separation**: Elements are automatically perceived as either foreground objects or background context.
+5. **Continuation principle**: The eye follows the smoothest path through intersecting lines and curves.
+6. **Pre-attentive processing**: These organizational principles operate before conscious thought, making them universally reliable.
+7. **Simplicity preference**: Given visual elements, the brain imposes the simplest stable structure it can perceive.
+
+## Mechanism
+
+Wertheimer demonstrated that perceptual organization is hardwired, not learned. Given visual elements, the brain automatically imposes the simplest stable structure. Proximity grouping dominates (closest elements group first), followed by similarity grouping. Closure allows the brain to complete partial shapes automatically. Figure-ground separation makes elements stand out from backgrounds without conscious effort. For design, this means simplifying shapes until Gestalt grouping takes over - the viewer's brain will complete forms more reliably than added detail.
+
+## Relevance
+
+Foundational for all visual design, user interface design, and logo creation. Essential for creating layouts that group related information automatically, icons that remain recognizable at any size, and visual hierarchies that guide attention without conscious effort. Critical for any design requiring instant visual organization and recognition.
+
+## Related Research
+
+- (Arnheim, 1954) — Application of Gestalt principles to art and visual perception
+- (Köhler, 1920) — Gestalt psychology's figure-ground and organizational principles
\ No newline at end of file
diff --git a/docs/research/information-science/documentation/procida_2021.md b/docs/research/information-science/documentation/procida_2021.md
new file mode 100644
index 0000000..25e5c7c
--- /dev/null
+++ b/docs/research/information-science/documentation/procida_2021.md
@@ -0,0 +1,45 @@
+# Diátaxis Documentation Framework — Procida, 2021
+
+## Citation
+
+Procida, D. (2021). *Diátaxis: A systematic approach to technical documentation authoring*. https://diataxis.fr/
+
+## Source Type
+
+Specification
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Technical documentation has four distinct modes organized along two axes (action vs cognition, acquisition vs application) - mixing modes in single document produces confusion.
+
+## Core Findings
+
+1. **Four Documentation Types**: Tutorial (learning-oriented), How-to guide (task-oriented), Reference (information-oriented), Explanation (understanding-oriented)
+2. **Two-Axis Framework**: Action vs Cognition crossed with Acquisition vs Application creates systematic quadrant organization
+3. **Mode Separation**: Each quadrant demands distinct writing approach - combining forces mental state switching in readers
+4. **Practical Application**: Framework adopted successfully in hundreds of documentation projects including Gatsby, Cloudflare, Vonage
+5. **Quality Principle**: Provides active principle for maintainers to think effectively about documentation work
+
+## Mechanism
+
+Two axes create systematic quadrant: Tutorials (learning-oriented, action + acquisition), How-to guides (task-oriented, action + application), Reference (information-oriented, cognition + application), Explanation (understanding-oriented, cognition + acquisition). Each quadrant requires different content approach, style, and architecture.
+
+## Relevance
+
+Essential for technical writing, documentation architecture, developer experience. Applied in software documentation, API guides, educational content. Foundational for organizing complex technical information systems and improving user documentation experience.
+
+## Related Research
+
+Created by Daniele Procida. Name from Ancient Greek διάταξις (diataxis): "dia" (across) + "taxis" (arrangement). Adopted by major tech companies and open-source projects. Addresses content (what to write), style (how to write), architecture (how to organize) problems in technical documentation.
diff --git a/docs/research/information-science/domain-modeling/brandolini_2012.md b/docs/research/information-science/domain-modeling/brandolini_2012.md
new file mode 100644
index 0000000..a3595d0
--- /dev/null
+++ b/docs/research/information-science/domain-modeling/brandolini_2012.md
@@ -0,0 +1,45 @@
+# Event Storming — Brandolini, 2012
+
+## Citation
+
+Brandolini, A. (2012–present). *Event Storming*. eventstorming.com. Originally developed as "Event-based modelling" circa 2012; refined and published through workshops and online resources.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Start with what the business cares about (events that happened) rather than data structures or process flows to discover domain boundaries collaboratively.
+
+## Core Findings
+
+1. **Event-First Approach**: Begin with domain events (past-tense, business-relevant verbs) placed on timeline
+2. **Collaborative Discovery**: Visual, tactile format (sticky notes) lowers barrier for non-technical stakeholder participation
+3. **Natural Boundary Detection**: Grouping events and commands surfaces bounded context boundaries where terms change meaning
+4. **Multiple Flavors**: Improve existing business, envision startup ecosystem, explore new services, design critical software
+5. **Temporal Dependencies**: Placing events on timeline reveals causal chains and business flow patterns
+
+## Mechanism
+
+EventStorming works by starting with business-relevant events rather than technical structures. Participants naturally discover temporal dependencies and causal chains by placing events chronologically. Commands reveal intent; aggregates reveal consistency boundaries. The visual format enables cross-discipline conversation between stakeholders with different backgrounds, delivering collaboration beyond silo boundaries.
+
+## Relevance
+
+Essential for Domain-Driven Design, collaborative domain modeling, microservices architecture design. Applied in startup ecosystem exploration, business process improvement, software design workshops. Fundamental for breaking down silos between business and technical teams in complex domain discovery.
+
+## Related Research
+
+Connects to (Evans, 2003) on Domain-Driven Design principles, (Vernon, 2013) on implementing DDD. Part of broader collaborative modeling approaches alongside Design Thinking and Lean Startup methodologies. Related to workshop facilitation techniques and business process modeling frameworks.
\ No newline at end of file
diff --git a/docs/research/information-science/domain-modeling/evans_2003.md b/docs/research/information-science/domain-modeling/evans_2003.md
new file mode 100644
index 0000000..799ae51
--- /dev/null
+++ b/docs/research/information-science/domain-modeling/evans_2003.md
@@ -0,0 +1,45 @@
+# Domain-Driven Design — Evans, 2003
+
+## Citation
+
+Evans, E. (2003). *Domain-Driven Design: Tackling Complexity in the Heart of Software*. Addison-Wesley.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Complex software must be built around shared domain model - a ubiquitous language used by both domain experts and developers in conversation, code, and documentation.
+
+## Core Findings
+
+1. **Ubiquitous Language**: Single terminology shared between domain experts and developers eliminates translation costs and catches misunderstandings early
+2. **Bounded Contexts**: Define where terms have single meaning, preventing incoherent unified models when terms mean different things in different subdomains
+3. **Aggregates**: Define transactional consistency boundaries - all invariants within aggregate must hold after each operation
+4. **Context Mapping Patterns**: Upstream/Downstream, Anti-corruption Layer, Conformist, Open-host Service define how separate bounded contexts interact
+5. **Strategic vs. Tactical Design**: Strategic focuses on bounded contexts and context mapping; tactical focuses on entities, value objects, services
+
+## Mechanism
+
+Ubiquitous language eliminates translation costs between domain experts and developers. When "Order" means same thing in conversation and code, misunderstandings are caught early. Bounded contexts prevent alternative unified model becoming incoherent. Aggregates enforce transactional consistency boundaries with operations spanning aggregates accepting eventual consistency.
+
+## Relevance
+
+Essential for complex software architecture, microservices design, team organization, domain modeling. Applied in enterprise software development, distributed systems architecture. Foundational for strategic system design aligning technical implementation with business domains and expert knowledge.
+
+## Related Research
+
+Connects to (Brandolini, 2012) on Event Storming for domain discovery, (Vernon, 2013) on DDD implementation. Part of broader software architecture approaches alongside microservices, CQRS, event sourcing. Related to Conway's Law and team topologies for organizational design.
\ No newline at end of file
diff --git a/docs/research/information-science/domain-modeling/vernon_2013.md b/docs/research/information-science/domain-modeling/vernon_2013.md
new file mode 100644
index 0000000..72d60a8
--- /dev/null
+++ b/docs/research/information-science/domain-modeling/vernon_2013.md
@@ -0,0 +1,45 @@
+# Implementing Domain-Driven Design — Vernon, 2013
+
+## Citation
+
+Vernon, V. (2013). *Implementing Domain-Driven Design*. Addison-Wesley.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Context mapping provides nine inter-context relationship patterns describing how bounded contexts relate to each other, preventing model pollution and reducing integration friction.
+
+## Core Findings
+
+1. **Nine Context Mapping Patterns**: Shared Kernel, Customer-Supplier, Conformist, Anticorruption Layer, Separate Ways, Open Host Service, Published Language, Big Ball of Mud, Partnership
+2. **Relationship Trade-offs**: Each pattern carries specific coordination costs and risk implications requiring careful selection
+3. **Pattern Selection Guidance**: Use ACL when downstream has limited influence; Customer-Supplier when teams can negotiate; Open Host Service for many standardized consumers
+4. **Social Contract Explicit**: Context maps make team relationships, obligations, and constraints transparent
+5. **Practical DDD Implementation**: Extends Evans' foundational work with concrete implementation patterns and guidance
+
+## Mechanism
+
+Context mapping makes social and technical contracts between teams explicit. Customer-Supplier demands upstream awareness; Conformist accepts upstream dominance; Anticorruption Layer isolates from model drift. Named relationships clarify obligations and constraints, preventing accidental coupling and model contamination.
+
+## Relevance
+
+Essential for microservices architecture, distributed systems design, team organization. Applied in bounded context definition, API design, organizational patterns. Critical for implementing DDD at scale in complex enterprise environments with multiple development teams.
+
+## Related Research
+
+Vaughn Vernon builds on (Evans, 2003) foundational DDD work. Author of "Reactive Messaging Patterns with the Actor Model" (2015), "Domain-Driven Design Distilled" (2016). Leading DDD practitioner and educator providing concrete implementation guidance for Evans' theoretical framework.
diff --git a/docs/research/psychology/cognitive/craik_lockhart_1972.md b/docs/research/psychology/cognitive/craik_lockhart_1972.md
new file mode 100644
index 0000000..682be6a
--- /dev/null
+++ b/docs/research/psychology/cognitive/craik_lockhart_1972.md
@@ -0,0 +1,48 @@
+# Levels of Processing — Craik & Lockhart, 1972
+
+## Citation
+
+Craik, F. I. M., & Lockhart, R. S. (1972). "Levels of processing: A framework for memory research." *Journal of Verbal Learning and Verbal Behavior*, 11(6), 671–684. https://doi.org/10.1016/S0022-5371(72)80001-X
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Deeper processing—explaining *why* a rule matters—leads to better retention and application than shallow processing. Memory is a byproduct of the depth of cognitive processing, not repetition.
+
+## Core Findings
+
+1. **Three levels of processing**: Structural/visual (appearance), phonemic (sound), and semantic (meaning) — with deeper levels producing stronger memory traces.
+2. **Semantic processing superiority**: Information processed for meaning creates more durable memory traces than information processed for appearance or sound.
+3. **Elaborative encoding advantage**: Adding semantic processing requirements (explaining rationale) improves both immediate compliance and long-term adherence.
+4. **Contradiction of multi-store model**: Challenges Atkinson-Shiffrin model by showing that rehearsal type matters more than rehearsal amount.
+5. **Self-reference effect enhancement**: Information relating to oneself receives the deepest processing and highest recall.
+6. **Neural correlates**: Brain imaging shows increased left prefrontal cortex activity during semantic vs. shallow processing tasks.
+
+## Mechanism
+
+The "levels of processing" framework shows that semantic processing (meaning-based) creates stronger memory traces than phonetic (sound-based) or visual (appearance-based) processing. Forcing reviewers to explain why rules matter engages semantic processing, creating widespread activation in semantic networks through meaningful connections to existing knowledge.
+
+## Relevance
+
+Enforcement tables with "Why it matters" columns force elaborative encoding. Instead of superficial rule checking, reviewers must process the underlying rationale, leading to better internalization of design principles. Essential for any learning system requiring deep understanding rather than rote compliance.
+
+## Related Research
+
+- (Craik & Tulving, 1975) — Empirical validation showing semantic encoding superiority in recall tasks
+- (Hyde & Jenkins, 1973) — Orienting tasks and incidental learning effects
+- (Lockhart & Craik, 1990) — Retrospective commentary on levels of processing framework
\ No newline at end of file
diff --git a/docs/research/psychology/cognitive/fisher_geiselman_1987.md b/docs/research/psychology/cognitive/fisher_geiselman_1987.md
new file mode 100644
index 0000000..a0eb6a9
--- /dev/null
+++ b/docs/research/psychology/cognitive/fisher_geiselman_1987.md
@@ -0,0 +1,47 @@
+# The Enhanced Cognitive Interview — Fisher & Geiselman, 1987
+
+## Citation
+
+Fisher, R. P., & Geiselman, R. E. (1987). "Enhancing enhanced eyewitness memory: Refining the cognitive interview." *Journal of Police Science and Administration*, 15, 291-297. (Enhanced version of original 1984 cognitive interview)
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Experiment
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+The enhanced Cognitive Interview elicits approximately 35% more correct information than standard interviews with equal accuracy rates.
+
+## Core Findings
+
+1. **Four retrieval mnemonics**: (1) Mental reinstatement of context, (2) Report everything, (3) Temporal reversal, (4) Perspective change - each opens different memory access routes.
+2. **Enhanced version improvements**: 45% increase in correct information over original CI through better interview structure and social dynamics.
+3. **Equal accuracy rates**: 85% accuracy for CI vs 82% for standard interviews - more information without sacrificing reliability.
+4. **Encoding specificity leverage**: Context reinstatement increases memory availability by overlapping retrieval cues with original encoding conditions.
+5. **Multi-component memory access**: Different retrieval routes surface information that direct questions cannot access.
+6. **Field effectiveness**: Real police officers trained in CI gather significantly more accurate information from actual crime witnesses.
+
+## Mechanism
+
+Four retrieval mnemonics open different memory access routes, collectively surfacing what direct questions cannot reach. Mental reinstatement leverages encoding specificity principle - memories encoded with environmental/emotional context become accessible when similar context is recreated. "Report everything" and perspective changes tap the multi-component view of memory, accessing different aspects of the complex memory trace through alternative retrieval pathways.
+
+## Relevance
+
+Foundational technique for investigative interviewing, user research, and requirements gathering. Widely adopted by police departments, private investigators, and attorneys. The principles apply to any situation requiring complete information extraction: incident analysis, post-mortem reviews, and stakeholder interviews where comprehensive recall is essential.
+
+## Related Research
+
+- (Tulving & Thomson, 1973) — Encoding specificity principle underlying context reinstatement
+- (Flanagan, 1954) — Critical incident technique as complementary approach to incident-based recall
diff --git a/docs/research/psychology/cognitive/flanagan_1954.md b/docs/research/psychology/cognitive/flanagan_1954.md
new file mode 100644
index 0000000..2ebd13f
--- /dev/null
+++ b/docs/research/psychology/cognitive/flanagan_1954.md
@@ -0,0 +1,47 @@
+# The Critical Incident Technique — Flanagan, 1954
+
+## Citation
+
+Flanagan, J. C. (1954). "The critical incident technique." *Psychological Bulletin*, 51(4), 327–357. https://doi.org/10.1037/h0061470
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Observational
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Anchoring an interview on a specific past incident breaks schema-based recall, revealing real workarounds, edge cases, and failure modes that never surface when asked "how does this usually work?"
+
+## Core Findings
+
+1. **Direct observations of critical behavior**: CIT focuses on specific incidents where the purpose and consequences are clear, avoiding generalized schema-based responses.
+2. **Schema bypass mechanism**: Direct questions elicit sanitized mental models of "how things should work," while incidents access episodic memory anchored to specific sensory and emotional details.
+3. **Flexible data collection**: Method allows participants to describe experiences in their own words without forcing them into predetermined frameworks.
+4. **Rare event identification**: CIT captures uncommon but significant events that routine methods miss by focusing only on everyday patterns.
+5. **Practical problem solving**: Originally developed for Aviation Psychology Program during WWII to identify pilot errors and improve training/systems design.
+6. **Wide applicability**: Successfully used in healthcare, organizational development, market research, and information-seeking behavior studies.
+
+## Mechanism
+
+Direct questions elicit the stakeholder's mental schema — a sanitized, gap-free description of how things should work. Critical incidents bypass the schema because episodic memory is anchored to specific sensory and emotional detail. The technique requires incidents to be "sufficiently complete" and "critical" (making positive or negative contribution) where purpose and consequences are clear to the observer.
+
+## Relevance
+
+Foundational technique for requirements gathering, user research, and system design. Essential for uncovering actual user behaviors, workarounds, and failure modes that structured interviews miss. Widely adopted in UX research, safety analysis, and organizational problem-solving where understanding real-world behavior patterns is crucial.
+
+## Related Research
+
+- (Bitner, Booms & Tetreault, 1990) — Service encounter satisfaction research using CIT
+- (Klein, 1998) — Recognition-primed decision making and naturalistic observation methods
diff --git a/docs/research/psychology/cognitive/gollwitzer_1999.md b/docs/research/psychology/cognitive/gollwitzer_1999.md
new file mode 100644
index 0000000..f2f9c32
--- /dev/null
+++ b/docs/research/psychology/cognitive/gollwitzer_1999.md
@@ -0,0 +1,48 @@
+# Implementation Intentions — Gollwitzer, 1999
+
+## Citation
+
+Gollwitzer, P. M. (1999). "Implementation intentions: Strong effects of simple plans." *American Psychologist*, 54(7), 493-503. https://doi.org/10.1037/0003-066X.54.7.493
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Experiment
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+"If X then Y" plans are 2–3x more likely to execute than general intentions because they create automatic cue-response links in memory.
+
+## Core Findings
+
+1. **If-then plan effectiveness**: Implementation intentions result in 2-3x higher goal achievement rates compared to general goal intentions alone.
+2. **Strategic automaticity**: If-then plans automate action initiation by creating strong mental links between situational cues and desired behaviors.
+3. **Cognitive load reduction**: Pre-planned responses eliminate hesitation and deliberation, freeing cognitive resources for other tasks.
+4. **Goal shielding**: Implementation intentions protect ongoing goal pursuit from distracting thoughts, competing goals, and emotional interference.
+5. **Broad applicability**: Effective across diverse domains including health behaviors (breast self-examination: 100% vs 53% completion), voting (4.1 percentage point increase), and emotion regulation.
+6. **Planning specificity requirement**: Plans must specify when, where, and how the behavior will be performed to achieve maximum effectiveness.
+
+## Mechanism
+
+If-then plans create automatic cue-response links in memory. The brain processes "if function > 20 lines then extract helper" as an action trigger, not a suggestion to consider. The anticipated situation becomes highly activated in memory, leading to immediate, efficient action initiation without conscious intent when the cue is encountered.
+
+## Relevance
+
+Foundational for automated behavioral interventions, habit formation systems, and decision-making tools. Essential for any system requiring reliable execution of intended behaviors — from code review processes to health interventions. The specificity requirement directly applies to creating effective automation rules and behavioral prompts.
+
+## Related Research
+
+- (Gollwitzer & Brandstätter, 1997) — Original empirical validation of implementation intentions
+- (Rogers et al., 2015) — Planning prompts and follow-through effectiveness
+- (Achtziger, Gollwitzer & Sheeran, 2008) — Goal shielding mechanisms
diff --git a/docs/research/psychology/cognitive/hattie_timperley_2007.md b/docs/research/psychology/cognitive/hattie_timperley_2007.md
new file mode 100644
index 0000000..df053a6
--- /dev/null
+++ b/docs/research/psychology/cognitive/hattie_timperley_2007.md
@@ -0,0 +1,48 @@
+# The Power of Feedback — Hattie & Timperley, 2007
+
+## Citation
+
+Hattie, J., & Timperley, H. (2007). "The power of feedback." *Review of Educational Research*, 77(1), 81–112. https://doi.org/10.3102/003465430298487
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Meta-analysis
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Feedback is most effective when it tells the agent exactly what went wrong and what the correct action is. "FAIL: function > 20 lines at file:47" is actionable; "Apply function length rules" is not.
+
+## Core Findings
+
+1. **Feedback power**: Among the most powerful influences on learning and achievement, but impact can be positive or negative depending on type and delivery.
+2. **Specific feedback superiority**: Task-specific feedback that identifies exact errors and correct actions is significantly more effective than general comments.
+3. **Four feedback levels**: Task level (correctness), process level (strategies), self-regulation level (monitoring), and self level (personal praise) — with task and process being most effective.
+4. **Three key questions framework**: "Where am I going?" (goals), "How am I going?" (progress), "Where to next?" (improvement strategies).
+5. **Timing effects**: Immediate feedback works best for procedural tasks, delayed feedback for complex learning requiring reflection.
+6. **Cognitive load management**: Effective feedback reduces rather than increases cognitive burden by providing clear direction.
+
+## Mechanism
+
+Specific feedback creates a direct mapping between error and correction, reducing cognitive load by eliminating interpretation steps. Vague feedback requires the recipient to infer what went wrong, which introduces interpretation errors and reduces action likelihood. The model emphasizes reducing gaps between current performance and goals through precise, actionable information.
+
+## Relevance
+
+Foundational for automated feedback systems, code review processes, and instructional design. Critical for any system providing performance feedback — from linting tools to learning management systems. The specificity principle directly applies to error messaging, validation feedback, and progress indicators in software interfaces.
+
+## Related Research
+
+- (Black & Wiliam, 1998) — Formative assessment and feedback loops in learning
+- (Kulhavy & Stock, 1989) — Feedback timing and learning effectiveness
+- (Kluger & DeNisi, 1996) — Feedback intervention theory
diff --git a/docs/research/psychology/cognitive/kahneman_2011.md b/docs/research/psychology/cognitive/kahneman_2011.md
new file mode 100644
index 0000000..616c09f
--- /dev/null
+++ b/docs/research/psychology/cognitive/kahneman_2011.md
@@ -0,0 +1,47 @@
+# Thinking, Fast and Slow — Kahneman, 2011
+
+## Citation
+
+Kahneman, D. (2011). *Thinking, Fast and Slow*. Farrar, Straus and Giroux. ISBN 978-0-374-27563-1. [Bestseller with millions of copies sold]
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Synthesis
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Human thinking operates through two distinct systems: System 1 (fast, automatic, intuitive) and System 2 (slow, deliberate, effortful), with System 1 dominating most decisions despite being vulnerable to systematic biases.
+
+## Core Findings
+
+1. **Dual-process theory**: System 1 operates automatically and quickly with little conscious effort; System 2 requires attention and operates more slowly and deliberately.
+2. **System 1 dominance**: Most of our thinking and decision-making is handled by System 1, while System 2 often endorses impressions and intuitions generated by System 1.
+3. **Cognitive biases**: System 1 is susceptible to predictable errors including anchoring bias, availability heuristic, confirmation bias, and overconfidence.
+4. **Cognitive ease**: When information is processed fluently by System 1, we experience cognitive ease, leading to increased belief and positive affect.
+5. **Loss aversion**: People feel losses more intensely than equivalent gains, leading to irrational decision-making patterns.
+6. **Prospect theory**: People evaluate outcomes relative to reference points rather than in absolute terms, and overweight small probabilities.
+
+## Mechanism
+
+System 2 must be deliberately activated before System 1's automatic judgments become anchored. This requires effortful cognitive work that most people avoid due to mental laziness. Running deliberate analytical processes (like systematic review checklists) before allowing intuitive responses prevents System 1's fast impressions from contaminating careful evaluation.
+
+## Relevance
+
+Foundational for understanding human decision-making biases in design, product management, user research, and team processes. Essential for creating systems that account for predictable human cognitive limitations. Critical for designing decision-support tools and review processes.
+
+## Related Research
+
+- (Tversky & Kahneman, 1974) — Original heuristics and biases research
+- (Stanovich & West, 2000) — Dual-process theory development
diff --git a/docs/research/psychology/cognitive/klein_1998.md b/docs/research/psychology/cognitive/klein_1998.md
new file mode 100644
index 0000000..150da87
--- /dev/null
+++ b/docs/research/psychology/cognitive/klein_1998.md
@@ -0,0 +1,47 @@
+# Sources of Power / PreMortem — Klein, 1998/2007
+
+## Citation
+
+Klein, G. (1998). *Sources of Power: How People Make Decisions*. MIT Press. ISBN 0-262-61146-5. PreMortem method described in Harvard Business Review (2007) and further developed as risk assessment technique.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Observational
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Prospective hindsight ("imagine this failed — why?") catches 30% more potential issues than forward-looking review because the brain is better at explaining past events than predicting future ones.
+
+## Core Findings
+
+1. **Recognition-primed decision (RPD) model**: Experts don't compare multiple options; they rapidly recognize situations and implement the first viable solution that comes to mind.
+2. **PreMortem technique**: By framing failure as having already occurred, teams activate explanation mode rather than prediction mode, uncovering more failure scenarios.
+3. **Naturalistic decision making**: Laboratory models cannot adequately describe decision-making under time pressure and uncertainty in real-world settings.
+4. **Expertise development**: Experts rely on pattern recognition and mental simulation rather than analytical comparison of alternatives.
+5. **Intuition validation**: Expert intuition is based on rapid pattern recognition from extensive domain experience, not mystical insight.
+6. **Time pressure effects**: Under pressure, people satisfice (find first adequate solution) rather than optimize (find best possible solution).
+
+## Mechanism
+
+PreMortem shifts cognitive frame from prediction (weak) to explanation (strong). By asking "imagine this already failed — why?" the technique activates the brain's superior ability to generate causal explanations for past events. This reveals failure modes that forward-looking analysis ("what could go wrong?") typically misses because prediction requires cognitive resources that explanation does not.
+
+## Relevance
+
+Essential methodology for project risk assessment, decision-making improvement, and team planning processes. Widely adopted in software development, military planning, and organizational risk management. Critical for any high-stakes decision where failure analysis is valuable.
+
+## Related Research
+
+- (Kahneman & Klein, 2009) — Conditions for intuitive expertise
+- (Mitchell et al., 1989) — Prospective hindsight effectiveness studies
diff --git a/docs/research/psychology/cognitive/mcdaniel_einstein_2000.md b/docs/research/psychology/cognitive/mcdaniel_einstein_2000.md
new file mode 100644
index 0000000..71c3668
--- /dev/null
+++ b/docs/research/psychology/cognitive/mcdaniel_einstein_2000.md
@@ -0,0 +1,47 @@
+# Strategic and Automatic Processes in Prospective Memory — McDaniel & Einstein, 2000
+
+## Citation
+
+McDaniel, M. A., & Einstein, G. O. (2000). "Strategic and automatic processes in prospective memory retrieval." *Applied Cognitive Psychology*, 14(7), S127–S144. https://doi.org/10.1002/acp.775
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Experiment
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Memory for intended actions is better when cues are embedded at the point of action, not in a separate appendix or reference document.
+
+## Core Findings
+
+1. **Embedded cue effectiveness**: Prospective memory performance significantly improves when contextual cues appear exactly where the intended action should occur.
+2. **Multi-process model**: Prospective memory can operate through both strategic monitoring and automatic retrieval, depending on cue-action associations.
+3. **Event-based vs. time-based differences**: Event-based prospective memory (triggered by external cues) consistently outperforms time-based (triggered by internal monitoring).
+4. **Strategic automaticity**: Strong cue-action associations can trigger retrieval automatically, reducing cognitive load on working memory.
+5. **Point-of-decision placement**: Inline reminders and checks are more effective than separate reference materials or appendices.
+6. **Immediate-execute vs. delayed-execute**: Performance deteriorates when delays or interruptions occur between cue perception and intended action.
+
+## Mechanism
+
+Placing if-then gates inline rather than in a separate reference document increases adherence because the cue appears exactly when the developer is about to make the relevant decision. Embedded cues leverage both the encoding specificity principle (context overlap) and reduce the cognitive load of having to remember to check separate reference materials. Strong cue-action associations can trigger automatic retrieval without conscious monitoring.
+
+## Relevance
+
+Critical for interface design, process documentation, and workflow systems. Applies to code review checklists, safety procedures, quality gates, and any system requiring reliable execution of intended actions. Essential for designing effective reminders, notifications, and decision-support systems where timing and context are crucial.
+
+## Related Research
+
+- (Gollwitzer, 1999) — Implementation intentions creating automatic cue-response links
+- (Miller, 1956) — Working memory limitations affecting monitoring-based prospective memory
diff --git a/docs/research/psychology/cognitive/miller_1956.md b/docs/research/psychology/cognitive/miller_1956.md
new file mode 100644
index 0000000..53c1cad
--- /dev/null
+++ b/docs/research/psychology/cognitive/miller_1956.md
@@ -0,0 +1,48 @@
+# The Magical Number Seven, Plus or Minus Two — Miller, 1956
+
+## Citation
+
+Miller, G. A. (1956). "The magical number seven, plus or minus two: Some limits on our capacity for processing information." *Psychological Review*, 63(2), 81–97. https://doi.org/10.1037/h0043158
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Experiment
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Working memory is limited to 7±2 items. Chunking related information into structured patterns allows processing of larger information sets by treating chunks as single items.
+
+## Core Findings
+
+1. **Working memory capacity**: Humans can hold approximately 7±2 discrete items in short-term memory before performance degrades.
+2. **Chunking mechanism**: Related items can be grouped into meaningful units ("chunks"), allowing more information to be retained by treating each chunk as a single item.
+3. **Information channel capacity**: One-dimensional absolute judgment is limited to approximately 2-3 bits of information (4-8 alternatives).
+4. **Coincidence observation**: Miller noted the correspondence between judgment limits and memory span was only coincidental, not indicative of a deeper principle.
+5. **Structured presentation advantage**: Tables and organized formats reduce cognitive load compared to narrative text by enabling parallel processing.
+6. **Recognition vs. capacity**: The limit applies to simultaneous retention, not recognition or learned associations.
+
+## Mechanism
+
+Rather than processing each item sequentially, structured presentations allow the reviewer to scan patterns and identify exceptions. Tables with consistent column structure create predictable information architecture that reduces cognitive load. Chunking leverages existing knowledge to group related information into higher-order units, effectively multiplying working memory capacity by the complexity of each chunk.
+
+## Relevance
+
+Foundational principle for interface design, information architecture, and cognitive load management. Enforcement tables in verification tasks leverage chunking principles. Instead of prose checklists requiring sequential processing, structured tables allow reviewers to process multiple items in parallel while maintaining consistency. Essential for any system requiring human information processing.
+
+## Related Research
+
+- (Sweller, 1988) — Cognitive load theory and instructional design
+- (Cowan, 2001) — Revised capacity estimate of ~4 chunks rather than 7±2
+- (Baddeley, 1992) — Working memory model with multiple components
\ No newline at end of file
diff --git a/docs/research/psychology/cognitive/reynolds_gutman_1988.md b/docs/research/psychology/cognitive/reynolds_gutman_1988.md
new file mode 100644
index 0000000..1b85d7e
--- /dev/null
+++ b/docs/research/psychology/cognitive/reynolds_gutman_1988.md
@@ -0,0 +1,46 @@
+# Laddering Theory / Means-End Chain — Reynolds & Gutman, 1988
+
+## Citation
+
+Reynolds, T. J., & Gutman, J. (1988). "Laddering theory, method, analysis, and interpretation." *Journal of Advertising Research*, 28(1), 11–31. DOI: 10.1080/00218499.1988.12467766. [Highly cited: 3,779+ citations]
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+The laddering technique reveals that the stakeholder's first answer about what they want is rarely the real constraint — repeatedly asking "Why is that important to you?" climbs from surface attributes to deeper motivational values.
+
+## Core Findings
+
+1. **Means-end chain structure**: Concrete attribute → functional consequence → psychosocial consequence → terminal value represents four levels of consumer motivation.
+2. **Progressive revelation**: Stakeholders typically start with concrete attributes but the real decision drivers lie at the consequence and value levels.
+3. **Value conflict identification**: Stakeholders whose surface requirements look identical often have ladders that diverge at the consequence level, revealing hidden conflicts.
+4. **Interview methodology**: Systematic probing with "Why is that important?" uncovers deeper motivational structures that traditional surveys miss.
+5. **Advertising applications**: Understanding means-end chains enables more effective positioning by connecting product features to personal values.
+
+## Mechanism
+
+The laddering interview technique systematically probes upward through levels of abstraction using "Why is that important to you?" prompts. Each level reveals different types of motivation: attributes (what the product has), functional consequences (what it does), psychosocial consequences (how it makes you feel/appear), and values (what life goals it serves). This climbing process reveals the complete motivational pathway.
+
+## Relevance
+
+Essential technique for requirements gathering, user research, and stakeholder analysis. Helps product managers, UX researchers, and business analysts uncover the true drivers behind stated requirements. Critical for avoiding surface-level solutions that miss deeper user needs and organizational goals.
+
+## Related Research
+
+- (Rokeach, 1973) — Values theory underlying the terminal value concept
+- (Gutman, 1982) — Original means-end chain model foundation
diff --git a/docs/research/psychology/cognitive/tversky_kahneman_1974.md b/docs/research/psychology/cognitive/tversky_kahneman_1974.md
new file mode 100644
index 0000000..099369d
--- /dev/null
+++ b/docs/research/psychology/cognitive/tversky_kahneman_1974.md
@@ -0,0 +1,46 @@
+# Judgment Under Uncertainty: Heuristics and Biases — Tversky & Kahneman, 1974
+
+## Citation
+
+Tversky, A., & Kahneman, D. (1974). Judgment under uncertainty: Heuristics and biases. *Science*, 185(4157), 1124-1131. https://doi.org/10.1126/science.185.4157.1124
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Experiment
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+People use mental shortcuts (heuristics) that systematically bias judgment under uncertainty.
+
+## Core Findings
+
+1. People rely on three main heuristics when making judgments under uncertainty: representativeness, availability, and anchoring and adjustment.
+2. Representativeness heuristic: People judge probability by similarity to mental prototypes, ignoring base rates and sample size.
+3. Availability heuristic: People assess probability by how easily examples come to mind, biasing toward memorable or recent events.
+4. Anchoring and adjustment: People adjust insufficiently from initial values, even when the anchor is irrelevant.
+5. These heuristics are useful but lead to severe and systematic errors in prediction and judgment.
+
+## Mechanism
+
+Heuristics serve as cognitive shortcuts that reduce complex probability assessments to simpler judgmental operations. However, they rely on selective accessibility of information rather than comprehensive analysis. Anchoring activates associative networks around initial values, making adjustment insufficient. Availability conflates memorability with frequency. Representativeness ignores statistical principles in favor of similarity matching.
+
+## Relevance
+
+Foundational for understanding cognitive biases in decision-making processes. Critical for designing systems that account for human judgment limitations. Directly applicable to forecast accuracy, risk assessment, and quality control processes where human judgment is involved.
+
+## Related Research
+
+- (Kahneman, 2011) — System 1 vs System 2 thinking framework
+- (Gilovich, Griffin & Kahneman, 2002) — Heuristics and biases comprehensive review
diff --git a/docs/research/psychology/social/cialdini_2001.md b/docs/research/psychology/social/cialdini_2001.md
new file mode 100644
index 0000000..812e9f6
--- /dev/null
+++ b/docs/research/psychology/social/cialdini_2001.md
@@ -0,0 +1,47 @@
+# Influence: The Psychology of Persuasion — Cialdini, 1984/2001
+
+## Citation
+
+Cialdini, R. B. (1984). *Influence: The Psychology of Persuasion*. William Morrow and Company. Revised edition (2001). HarperBusiness. ISBN 0-688-12816-5.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Observational
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Influence operates through six universal principles of persuasion that create automatic compliance responses in human behavior.
+
+## Core Findings
+
+1. **Six principles of influence**: Reciprocity, commitment and consistency, social proof, authority, liking, and scarcity create predictable persuasion outcomes.
+2. **Commitment and consistency**: People feel compelled to remain consistent with prior commitments, especially when made publicly or in writing.
+3. **Micro-commitments**: Small initial commitments (like checking a box or filling in a form) create resistance to reversals and increase likelihood of larger compliance.
+4. **Automatic responses**: These principles trigger "click-whirr" automatic responses where people comply without conscious deliberation.
+5. **Research methodology**: Based on three years of undercover participant observation at car dealerships, fundraising organizations, and telemarketing firms.
+6. **Unity principle** (added 2016): People are more influenced by those they identify with or consider part of their in-group.
+
+## Mechanism
+
+Commitment devices work by engaging the consistency principle — once someone has made an explicit commitment (especially written or public), psychological pressure to remain consistent makes reversal psychologically costly. Structured tables with PASS/FAIL cells create micro-commitments where marking "FAIL" requires explicit justification, making silent passes feel inconsistent.
+
+## Relevance
+
+Foundational framework for understanding persuasion in business, marketing, negotiation, and social influence contexts. Essential for both applying ethical influence techniques and defending against manipulation. Widely used in UX design, sales processes, and behavioral change interventions.
+
+## Related Research
+
+- (Kiesler, 1971) — Psychology of commitment and consistency  
+- (Festinger, 1957) — Cognitive dissonance theory underlying consistency principle
diff --git a/docs/research/psychology/social/mellers_et_al_2001.md b/docs/research/psychology/social/mellers_et_al_2001.md
new file mode 100644
index 0000000..8a5c980
--- /dev/null
+++ b/docs/research/psychology/social/mellers_et_al_2001.md
@@ -0,0 +1,46 @@
+# Adversarial Collaboration — Kahneman & Various, Multiple Studies
+
+## Citation
+
+Various studies on adversarial collaboration including: Kahneman, D. & Klein, G. (2009). "Conditions for intuitive expertise: A failure to disagree." *American Psychologist*, 64(6), 515-526. Clark, C. J., Costello, T., Mitchell, G., & Tetlock, P. E. (2022). "Keep your enemies close: Adversarial collaborations will improve behavioral science." *Journal of Applied Research in Memory and Cognition*, 11(1), 1-18.
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Adversarial collaboration produces highest-quality thinking when parties with opposing hypotheses work together to find flaws in each other's reasoning and jointly design experiments.
+
+## Core Findings
+
+1. **Collaborative antagonism**: Scientists with competing hypotheses can co-design experiments that satisfy both groups regarding bias and experimental weaknesses.
+2. **Quality improvement**: Adversarial collaboration reduces cognitive-motivational biases and improves scientific reasoning quality.
+3. **Feasibility paradox**: "Most feasible when least needed" — works best when camps have testable theories and common methodological standards, but is hardest to implement when most needed (when communities lack falsification criteria).
+4. **Neutral moderation**: Often requires neutral third-party moderators to facilitate the collaboration process.
+5. **Open science framework**: Emphasizes transparency throughout research process, fitting within broader open science movement.
+
+## Mechanism
+
+Explicitly framing reviewers as "your job is to break this feature" activates adversarial collaboration mode. Reviewers seek disconfirmation rather than confirmation. Joint experimental design forces both sides to agree on methodology, reducing bias. Co-publication of results ensures accountability and prevents selective interpretation.
+
+## Relevance
+
+Powerful methodology for resolving scientific disputes, improving research quality, and reducing confirmation bias in academic work. Applicable to peer review, experimental design, hypothesis testing, and conflict resolution in research communities. Essential for advancing contentious scientific questions.
+
+## Related Research
+
+- (Tetlock & Mitchell, 2009) — Implicit bias and accountability systems  
+- (Latham, Erez & Locke, 1988) — Early example of adversarial collaboration in goal-setting research
diff --git a/docs/research/psychology/social/rogers_farson_1957.md b/docs/research/psychology/social/rogers_farson_1957.md
new file mode 100644
index 0000000..2c53bd3
--- /dev/null
+++ b/docs/research/psychology/social/rogers_farson_1957.md
@@ -0,0 +1,46 @@
+# Active Listening — Rogers & Farson, 1957
+
+## Citation
+
+Rogers, C. R., & Farson, R. E. (1957). "Active Listening." Industrial Relations Center, University of Chicago. Reprinted in Newman, R. G., Danziger, M. A., & Cohen, M. (1987). *Communicating in Business Today*. D.C. Heath & Company.
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Active listening is a transformative communication technique that creates non-threatening, empathic environments where people feel heard and understood, promoting personal and relational growth.
+
+## Core Findings
+
+1. **Three-step process**: Paraphrasing what was heard, asking clarifying questions, then summarizing main points and intent reduces misunderstandings and builds trust.
+2. **Therapeutic foundation**: Rooted in Rogers' three facilitative conditions for effective counseling: empathy, genuineness, and unconditional positive regard.
+3. **Transformative power**: "Sensitive listening is a most effective agent for individual personality change and group development."
+4. **Behavioral outcomes**: People who have been listened to become "more emotionally mature, more open to their experiences, less defensive, more democratic, and less authoritarian."
+5. **Active vs. passive**: Despite popular notion that listening is passive, clinical and research evidence shows it actively brings about changes in people's attitudes and values.
+
+## Mechanism
+
+Paraphrasing forces the listener to reconstruct the speaker's meaning, immediately surfacing gaps in understanding. Clarifying questions address residual ambiguity. Summarizing creates a shared record that both parties can confirm or correct. This process establishes empathic connection and creates psychological safety for the speaker to be vulnerable and authentic.
+
+## Relevance
+
+Foundational communication technique applicable across therapeutic, business, educational, and interpersonal contexts. Essential skill for leaders, counselors, managers, and anyone seeking to build trust and understanding in human relationships. Forms the basis for modern conflict resolution and negotiation strategies.
+
+## Related Research
+
+- (Gottman, 1999) — Critique of active listening effectiveness in marriage therapy
+- (McNaughton et al., 2008) — LAFF strategy development for educational contexts
diff --git a/docs/research/psychology/social/tetlock_1985.md b/docs/research/psychology/social/tetlock_1985.md
new file mode 100644
index 0000000..6b3baaa
--- /dev/null
+++ b/docs/research/psychology/social/tetlock_1985.md
@@ -0,0 +1,45 @@
+# Accountability: A Social Check on the Fundamental Attribution Error — Tetlock, 1985
+
+## Citation
+
+Tetlock, P. E. (1985). Accountability: A social check on the fundamental attribution error. *Social Psychology Quarterly*, 48(3), 227-236.
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Experiment
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Anticipating accountability to an unknown audience improves reasoning quality and reduces attribution errors.
+
+## Core Findings
+
+1. Accountability to an unknown audience with unknown views improves reasoning quality compared to no accountability conditions.
+2. People anticipating being audited adjust their reasoning to be more careful and systematic.
+3. Accountability reduces the fundamental attribution error - the tendency to overemphasize personality-based explanations while underemphasizing situational factors.
+4. The effect is stronger when people don't know what position they'll be expected to defend.
+
+## Mechanism
+
+When people expect to justify their judgments to others, they engage in more effortful, systematic processing. The anticipation of having to explain one's reasoning to an unknown audience motivates more careful consideration of multiple perspectives and evidence, leading to less biased judgments.
+
+## Relevance
+
+Critical for review processes and agent design. Creating accountability structures (like APPROVED/REJECTED with evidence requirements) prompts more careful analysis. Unknown audience accountability is particularly powerful because reviewers can't game their response to please a specific viewpoint.
+
+## Related Research
+
+- (Kahneman, 2011) — System 1 vs System 2 thinking and cognitive biases
+- (Lerner & Tetlock, 1999) — Comprehensive review of accountability effects on judgment
diff --git a/docs/research/software-engineering/architecture/bass_et_al_2021.md b/docs/research/software-engineering/architecture/bass_et_al_2021.md
new file mode 100644
index 0000000..7ebd225
--- /dev/null
+++ b/docs/research/software-engineering/architecture/bass_et_al_2021.md
@@ -0,0 +1,48 @@
+# Software Architecture in Practice — Bass, Clements & Kazman, 2021
+
+## Citation
+
+Bass, L., Clements, P., & Kazman, R. (2021). *Software Architecture in Practice* (4th ed.). Addison-Wesley. ISBN 978-0-13-534613-8. First edition published 1998.
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Synthesis
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Quality attributes — not functional requirements — drive architectural decisions. Performance, availability, security, modifiability, reliability, and usability create measurable constraints that determine system structure.
+
+## Core Findings
+
+1. **Quality attribute primacy**: Six architecturally significant quality attributes (Performance, Availability, Security, Modifiability, Reliability, Usability) drive structural decisions more than functional requirements.
+2. **Architectural tactics catalog**: Each quality attribute produces concrete architectural tactics — Performance tactics include resource arbitration, concurrency, caching; Modifiability tactics include encapsulation, substitution, binding time.
+3. **Style-attribute alignment**: Architectural style selection must be justified against quality attribute priorities, not personal preference or technology trends.
+4. **Utility tree methodology**: Systematic approach to prioritize quality attributes against business value, producing ranked constraints for architectural decision-making.
+5. **Trade-off recognition**: Quality attributes often conflict — optimizing for Performance may harm Modifiability, requiring explicit trade-off decisions.
+6. **ATAM integration**: Architecture Tradeoff Analysis Method provides structured evaluation framework for discovering architectural risks early.
+7. **Measurable constraints**: Quality attributes work because they create concrete, testable constraints on system structure rather than abstract goals.
+
+## Mechanism
+
+Quality attributes work as architectural drivers because they create measurable constraints on system structure. Performance requires specific structural patterns (caching layers, async processing, resource pooling); Modifiability requires different patterns (abstraction layers, dependency inversion, plugin architectures). These constraints are often in tension — optimizing for Performance may harm Modifiability. The utility tree method forces stakeholders to prioritize quality attributes against business value, producing a ranked list that architects use to make trade-off decisions with explicit justification.
+
+## Relevance
+
+Foundational methodology for architectural decision-making and system design. Essential for understanding how non-functional requirements translate into concrete structural choices. Critical for architectural evaluation, technology selection, and trade-off analysis. Widely adopted framework used by enterprise architects, system designers, and software engineering teams globally.
+
+## Related Research
+
+- (Kazman et al., 2000) — ATAM methodology for architectural trade-off analysis
+- (Fowler, 2003) — Architect's role in making significant decisions that are hard to change later
diff --git a/docs/research/software-engineering/architecture/boehm_1991.md b/docs/research/software-engineering/architecture/boehm_1991.md
new file mode 100644
index 0000000..0c3d574
--- /dev/null
+++ b/docs/research/software-engineering/architecture/boehm_1991.md
@@ -0,0 +1,47 @@
+# Software Risk Management — Boehm, 1991
+
+## Citation
+
+Boehm, B. W. (1991). "Software Risk Management: Principles and Practices." *IEEE Software*, 8(1), 32–41. https://doi.org/10.1109/52.62930
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Architecture risk can be systematically assessed using Probability × Impact classification, replacing intuitive risk assessment with an explicit, auditable evaluation framework.
+
+## Core Findings
+
+1. **Risk quantification framework**: Each identified risk is rated on two dimensions: Probability (likelihood of materialization) and Impact (severity of consequence), with risks prioritized by their product.
+2. **Risk leverage concept**: The ratio of risk reduction to mitigation cost enables teams to focus effort on high-leverage interventions (significant risk reduction for low cost).
+3. **Systematic risk identification**: Boehm's 10 top software risk items provide a checklist for proactive risk identification across personnel, requirements, technology, and schedule dimensions.
+4. **Risk mitigation strategies**: Three primary approaches - risk avoidance (eliminate risk source), risk monitoring (track risk indicators), and risk contingency planning (prepare response plans).
+5. **COCOMO model foundation**: Boehm's cost estimation models (COCOMO/COCOMO II) provide quantitative basis for impact assessment in software projects.
+6. **Spiral model integration**: Risk assessment is built into the spiral software development model at each iteration cycle.
+
+## Mechanism
+
+Probability × Impact works because it forces decision-makers to externalize and quantify what would otherwise remain gut feelings. Low-probability high-impact risks (e.g., database vendor bankruptcy) are distinguished from high-probability low-impact risks (e.g., minor performance degradation) — both may have the same exposure score but demand different mitigation strategies. The framework also introduces risk leverage: high-leverage mitigations (significant risk reduction for low cost) are prioritized over low-leverage ones (minor risk reduction for high cost).
+
+## Relevance
+
+Foundational framework for architectural decision records (ADRs) where each decision carries potential risks requiring explicit evaluation. Essential for project management, system design, and any development context requiring systematic risk assessment. Directly applicable to technology selection, architecture planning, and resource allocation decisions in software engineering.
+
+## Related Research
+
+- (Kazman, Klein & Clements, 2000) — ATAM method building on Boehm's risk assessment principles
+- (Fowler, 2003) — Architectural decision-making frameworks incorporating risk evaluation
diff --git a/docs/research/software-engineering/architecture/brown_2018.md b/docs/research/software-engineering/architecture/brown_2018.md
new file mode 100644
index 0000000..45a6e27
--- /dev/null
+++ b/docs/research/software-engineering/architecture/brown_2018.md
@@ -0,0 +1,48 @@
+# C4 Model — Brown, 2006–2018
+
+## Citation
+
+Brown, S. (2018). *Software Architecture for Developers*, Volume 1. Leanpub. C4 model first described 2006–2011, official site launched 2018. Available at https://c4model.com
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Four levels of architectural abstraction — Context, Container, Component, Code — provide just enough detail at each audience level without overwhelming any single audience.
+
+## Core Findings
+
+1. **Hierarchical abstraction levels**: Context (system in environment), Container (deployable units), Component (modules within containers), Code (classes and functions).
+2. **Audience-specific communication**: Each level answers different questions for different audiences — Context for stakeholders, Container for developers/operators, Component for internal structure, Code for detailed design.
+3. **Progressive disclosure**: Starting from Context and drilling down prevents premature detail overload.
+4. **Notation independence**: C4 works with any diagramming tool or notation — boxes and lines are sufficient.
+5. **Tooling independence**: Can be implemented with simple drawing tools, specialized software, or code-based approaches.
+6. **Developer-friendly approach**: Focuses on developer mental models rather than formal architectural frameworks.
+7. **Supporting diagrams**: System landscape, dynamic, and deployment diagrams complement the core four levels.
+
+## Mechanism
+
+The C4 model works because each level answers a different question for a different audience: Context for stakeholders and non-technical team members ("what does the system interact with?"), Container for developers and operators ("what are the deployable units and their tech stacks?"), Component for developers working within a container ("how is this container structured internally?"), and Code for detailed design (rarely needed as a diagram). Starting from Context and drilling down prevents premature detail and ensures the architecture communicates effectively at every level.
+
+## Relevance
+
+Essential methodology for software architecture documentation and communication. Widely adopted for system design, technical onboarding, and stakeholder communication. Critical for teams needing to communicate architecture across different technical skill levels and organizational roles. Directly applicable to microservices documentation, system integration planning, and technical decision-making processes.
+
+## Related Research
+
+- (Kruchten, 1995) — 4+1 architectural view model that influenced hierarchical approach
+- (Fowler, 2003) — "Who Needs an Architect?" discussion on architectural communication
\ No newline at end of file
diff --git a/docs/research/software-engineering/architecture/cockburn_2005.md b/docs/research/software-engineering/architecture/cockburn_2005.md
new file mode 100644
index 0000000..4d64f90
--- /dev/null
+++ b/docs/research/software-engineering/architecture/cockburn_2005.md
@@ -0,0 +1,47 @@
+# Hexagonal Architecture (Ports & Adapters) — Cockburn, 2005
+
+## Citation
+
+Cockburn, A. (2005). "Hexagonal Architecture." *Alistair Cockburn's blog*. Originally discussed on the Portland Pattern Repository wiki in the early 2000s; formalized as "Ports and Adapters" in 2005.
+
+## Source Type
+
+Blog/Article
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Software should be designed so that the domain core has no dependency on any external technology or delivery mechanism. The domain exposes ports (interfaces) that define what it needs; adapters implement those ports for specific technologies.
+
+## Core Findings
+
+1. **Dependency inversion principle**: Infrastructure depends on domain abstractions, not the other way around, making the domain testable in isolation and swappable in deployment.
+2. **Ports and adapters pattern**: Ports are domain-defined interfaces; adapters are infrastructure implementations that connect external systems to the domain through these ports.
+3. **Technology independence**: The same domain logic can be exercised through any delivery mechanism (HTTP, CLI, message queue, test harness) without modification.
+4. **Symmetrical architecture**: All external dependencies (databases, UI, external services, test harnesses) are treated equally as adapters, eliminating the traditional "top" and "bottom" of layered architectures.
+5. **Framework isolation**: The domain remains independent of frameworks, databases, and UI technologies, enabling easier testing and technology evolution.
+6. **Business logic protection**: Core business rules are isolated from infrastructure concerns, making them more maintainable and less brittle to external changes.
+
+## Mechanism
+
+By reversing the dependency so that infrastructure depends on domain abstractions (not the other way around), the domain becomes testable in isolation and swappable in deployment. The hexagonal shape represents that there are multiple ways to interact with the application - through different ports - and each port can have multiple adapters. This ensures the domain remains independent of frameworks, databases, and UI, and that the same domain logic can be exercised through any delivery mechanism without modification.
+
+## Relevance
+
+Foundational pattern for clean architecture, domain-driven design, and microservices architecture. Essential for creating testable, maintainable systems that can evolve independently of infrastructure concerns. Critical for understanding how to structure applications to achieve technology independence and high testability. Directly applicable to API design, service architecture, and any system requiring multiple integration points.
+
+## Related Research
+
+- (Martin, 2017) — Clean Architecture building on Cockburn's dependency inversion principles
+- (Fowler, 2003) — Architectural decision-making frameworks that support ports and adapters pattern
\ No newline at end of file
diff --git a/docs/research/software-engineering/architecture/conway_1968.md b/docs/research/software-engineering/architecture/conway_1968.md
new file mode 100644
index 0000000..4b0476e
--- /dev/null
+++ b/docs/research/software-engineering/architecture/conway_1968.md
@@ -0,0 +1,47 @@
+# Conway's Law and Inverse Conway Maneuver — Conway, 1968
+
+## Citation
+
+Conway, M. E. (1968). "How Do Committees Invent?" *Datamation*, 14(4), 28–31. https://www.melconway.com/Home/Committees_Paper.html
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Any organization that designs a system will produce a design whose structure is a copy of the organization's communication structure. This is known as "Conway's Law."
+
+## Core Findings
+
+1. **System-organization mirroring**: Organizations are constrained to produce designs that copy their communication structures.
+2. **Communication boundaries become system boundaries**: Teams that communicate frequently create tightly coupled systems; teams with minimal communication create loosely coupled systems.
+3. **Empirical validation**: MIT and Harvard Business School research found "strong evidence to support the mirroring hypothesis" - loosely-coupled organizations produce significantly more modular products.
+4. **Inverse Conway Maneuver**: Deliberately restructuring teams to match desired architecture rather than fighting organizational constraints.
+5. **Three strategic responses**: Organizations can ignore (creating friction), accept (aligning architecture with existing structure), or invert (restructuring teams for desired architecture).
+6. **Architectural implications**: Microservices require autonomous teams, monoliths work with closely collaborating teams, API boundaries should align with team boundaries.
+
+## Mechanism
+
+System boundaries mirror communication boundaries. Teams that communicate frequently create tightly coupled systems. Teams with minimal communication create loosely coupled systems. Organizational design becomes architectural design. The Inverse Conway Maneuver deliberately alters team organization to encourage the desired software architecture—aligning Conway's Law with architectural intent rather than fighting it.
+
+## Relevance
+
+Foundational principle for organizational design in software development. Critical for microservices architecture, team topology design, and system boundary definition. Agent role design implements Inverse Conway: the system-architect → software-engineer → system-architect loop creates a closed communication path where SA designs module boundaries, SE builds within them, and SA verifies boundary respect.
+
+## Related Research
+
+- (Skelton & Pais, 2019) — Team Topologies and modern application of Conway's Law
+- (MacCormack, Rusnak & Baldwin, 2011) — Empirical validation of the mirroring hypothesis
\ No newline at end of file
diff --git a/docs/research/software-engineering/architecture/fielding_2000.md b/docs/research/software-engineering/architecture/fielding_2000.md
new file mode 100644
index 0000000..66f9d49
--- /dev/null
+++ b/docs/research/software-engineering/architecture/fielding_2000.md
@@ -0,0 +1,47 @@
+# Representational State Transfer (REST) — Fielding, 2000
+
+## Citation
+
+Fielding, R. T. (2000). *Architectural Styles and the Design of Network-based Software Architectures*. Doctoral dissertation, University of California, Irvine. https://www.ics.uci.edu/~fielding/pubs/dissertation/top.htm
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+REST defines six architectural constraints for network-based software that enable scalable, reliable, and maintainable distributed systems by treating network communication as stateless operations on resources identified by URLs.
+
+## Core Findings
+
+1. **Six architectural constraints**: Client-Server (separation of concerns), Stateless (each request self-contained), Cacheable (responses declare cacheability), Uniform Interface (standardized resource operations), Layered System (transparent intermediaries), Code-on-Demand (optional client extensions).
+2. **Uniform Interface supremacy**: The constraint that differentiates REST from other distributed architectures - resources identified by URIs, manipulated via standard methods (GET, POST, PUT, DELETE), with hypermedia driving application state (HATEOAS).
+3. **Statelessness benefits**: Each request contains all needed information, improving reliability (any server can handle any request) and scalability (no server-side session state management).
+4. **Cacheability advantages**: Explicit cache control reduces latency and server load while maintaining data consistency.
+5. **Layered system flexibility**: Intermediaries (caches, proxies, load balancers) can be inserted without affecting client or server design.
+6. **Web architecture alignment**: REST explains why the Web scales - it codifies the architectural principles that made the WWW successful.
+
+## Mechanism
+
+REST works because the Uniform Interface constraint reduces coupling between client and server to a minimum: clients only need to understand media types and standard methods, not server implementation details. Statelessness improves reliability and scalability. For API design, REST implies contracts should be expressed as resource shapes (data structure) and media types (data format), not procedure calls. The contract becomes the resource schema and allowed transitions, not method signatures.
+
+## Relevance
+
+Foundational architecture for web services, APIs, and distributed systems. Essential for understanding modern web architecture, microservices design, and HTTP-based APIs. Critical for system architects designing scalable, maintainable distributed systems. Directly applicable to API design, web service architecture, and system integration patterns.
+
+## Related Research
+
+- (Conway, 1968) — Organizational structure implications for REST service boundaries
+- (Fowler, 2014) — Microservices architecture patterns building on REST principles
diff --git a/docs/research/software-engineering/architecture/fowler_2003.md b/docs/research/software-engineering/architecture/fowler_2003.md
new file mode 100644
index 0000000..18f4474
--- /dev/null
+++ b/docs/research/software-engineering/architecture/fowler_2003.md
@@ -0,0 +1,47 @@
+# Who Needs an Architect? — Fowler, 2003
+
+## Citation
+
+Fowler, M. (2003). "Who Needs an Architect?" *IEEE Software*, 20(5), 11–13. https://martinfowler.com/ieeeSoftware/whoNeedsArchitect.pdf
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+The architect's job is not to draw diagrams—it is to make **significant decisions** that are hard to change later. The architect is a facilitator who builds consensus around technical direction, not a dictator who issues edicts.
+
+## Core Findings
+
+1. **Four architect archetypes**: Architect as Decision-Maker (owns hard-to-change choices), Expert (provides technical depth), Facilitator (brings stakeholders to consensus), and Gatekeeper (enforces standards and reviews compliance).
+2. **Programming architects superiority**: The best architects are also programmers who understand implementation constraints firsthand rather than ivory-tower theorists.
+3. **Policy vs. detail separation**: The architect owns **policy** (business rules, interfaces, architectural constraints) while developers own **detail** (algorithms, data structures, implementation mechanics).
+4. **Significant decisions focus**: Architecture is about making important decisions that affect the system's ability to meet its quality requirements, not about creating comprehensive documentation.
+5. **Facilitation over dictation**: Effective architects build consensus and shared understanding rather than issuing top-down mandates.
+6. **Hands-on involvement**: Architects must stay involved in implementation to understand real-world constraints and trade-offs.
+
+## Mechanism
+
+This separation enables independent evolution of concerns - policy can change without affecting implementation details, and vice versa. The architect focuses on decisions that are expensive to change later (technology choices, integration patterns, quality attribute strategies) while leaving implementation flexibility to developers. Facilitation works better than dictation because it creates buy-in and shared understanding, making architectural decisions more likely to be followed and adapted appropriately as circumstances change.
+
+## Relevance
+
+Foundational framework for defining architectural roles and responsibilities in modern software development. The system-architect role combines decision-maker and gatekeeper functions: making architectural decisions (ADRs) and enforcing them through adversarial review. Essential for understanding the balance between architectural guidance and implementation autonomy in agile development environments.
+
+## Related Research
+
+- (Martin, 2017) — Clean Architecture principles building on Fowler's policy/detail separation
+- (Bass et al., 2021) — Software Architecture in Practice expanding on architectural decision-making frameworks
\ No newline at end of file
diff --git a/docs/research/software-engineering/architecture/hohpe_woolf_2003.md b/docs/research/software-engineering/architecture/hohpe_woolf_2003.md
new file mode 100644
index 0000000..c32ca3f
--- /dev/null
+++ b/docs/research/software-engineering/architecture/hohpe_woolf_2003.md
@@ -0,0 +1,47 @@
+# Enterprise Integration Patterns — Hohpe & Woolf, 2003
+
+## Citation
+
+Hohpe, G., & Woolf, B. (2003). *Enterprise Integration Patterns: Designing, Building, and Deploying Messaging Solutions*. Addison-Wesley. ISBN 978-0-321-20068-6.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Asynchronous messaging between systems follows a catalog of 65 integration patterns that solve recurring coupling, reliability, and ordering problems, providing technology-independent solutions to distributed system integration challenges.
+
+## Core Findings
+
+1. **Foundational messaging patterns**: Message (data packet), Message Channel (transport), Message Endpoint (producer/consumer), Message Router (content-based routing), Message Translator (schema conversion), and Publish-Subscribe Channel (one-to-many distribution) form the core vocabulary.
+2. **Decoupling dimensions**: Integration patterns decouple time (asynchronous delivery), space (location independence), and schema (data model independence) between systems.
+3. **Event contract specifications**: Beyond payload schema, event contracts must specify ordering guarantees, delivery semantics (at-most-once, at-least-once, exactly-once), and error handling policies.
+4. **Pattern language approach**: 65 patterns organized into categories (messaging systems, channels, construction, routing, transformation, endpoints, system management) providing comprehensive integration vocabulary.
+5. **Technology independence**: Patterns apply across messaging technologies (JMS, MSMQ, TIBCO, modern cloud messaging, microservices, serverless architectures).
+6. **Industry adoption**: Spurred development of Enterprise Service Bus implementations including Apache Camel, Mule, WSO2, Oracle Service Bus, Open ESB, and modern integration platforms.
+
+## Mechanism
+
+Integration patterns work because they decouple time, space, and schema between systems. A Message Channel decouples space (producer and consumer don't need to know each other's location); asynchronous delivery decouples time (producer and consumer don't need to be available simultaneously); a Message Translator decouples schema (each system retains its own data model). The key insight is that event contracts must specify not just the payload schema but also ordering guarantees (per-sender FIFO, causal ordering), delivery semantics, and error handling. Without these, integration points become fragile and hard to reason about.
+
+## Relevance
+
+Foundational reference for all distributed system integration, microservices architecture, event-driven systems, and API design. Essential for understanding asynchronous messaging patterns that remain relevant across technology generations from enterprise messaging to modern serverless and cloud-native architectures. Critical for designing robust, loosely-coupled distributed systems.
+
+## Related Research
+
+- (Fielding, 2000) — REST architectural style complementing messaging patterns for distributed systems
+- (Conway, 1968) — Organizational structures affecting integration architecture design
diff --git a/docs/research/software-engineering/architecture/kazman_klein_clements_2000.md b/docs/research/software-engineering/architecture/kazman_klein_clements_2000.md
new file mode 100644
index 0000000..a5ed3c0
--- /dev/null
+++ b/docs/research/software-engineering/architecture/kazman_klein_clements_2000.md
@@ -0,0 +1,47 @@
+# Architecture Tradeoff Analysis Method (ATAM) — Kazman, Klein & Clements, 2000
+
+## Citation
+
+Kazman, R., Klein, M., & Clements, P. (2000). "ATAM: Method for Architecture Evaluation" (CMU/SEI-2000-TR-004). Software Engineering Institute, Carnegie Mellon University.
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Architecture should be evaluated early through structured scenario analysis. ATAM discovers **trade-offs** and **sensitivity points** before implementation begins, when change cost is minimal.
+
+## Core Findings
+
+1. **Risk-mitigation roadmap**: ATAM produces a structured assessment of architectural risks rather than a simple pass/fail verdict.
+2. **Nine-step process**: Systematic methodology covering stakeholder presentation, business driver analysis, architecture presentation, approach identification, quality attribute tree generation, analysis, scenario brainstorming, re-analysis, and results presentation.
+3. **Sensitivity points identification**: Reveals architectural decisions that most significantly affect quality attributes.
+4. **Trade-off points analysis**: Identifies decisions affecting multiple quality attributes in opposing ways, highlighting necessary compromises.
+5. **Quality attribute focus**: Structures evaluation around specific quality concerns (performance, security, maintainability, etc.) rather than general architectural goodness.
+6. **Stakeholder-driven scenarios**: Uses real stakeholder scenarios to test architectural decisions against actual usage patterns and concerns.
+
+## Mechanism
+
+The method reveals how architectural decisions affect quality attributes and identifies decisions that most impact system success. ATAM works by systematically walking through architectural approaches against stakeholder-prioritized quality attribute scenarios. Sensitivity points emerge when small changes in architectural decisions cause large changes in quality attribute response. Trade-off points appear when architectural decisions improve one quality attribute while degrading another, forcing explicit design trade-offs.
+
+## Relevance
+
+Foundational methodology for architectural assessment and review processes. ATAM-style analysis is applied in adversarial review during verification: testing implemented architecture against quality-attribute scenarios identified during design. Essential for system architects who need to evaluate architectural decisions before implementation when change costs are minimal.
+
+## Related Research
+
+- (Bass, Clements & Kazman, 2021) — Software Architecture in Practice expanding on ATAM methodology
+- (Clements, Kazman & Klein, 2002) — Evaluating Software Architectures comprehensive guide
\ No newline at end of file
diff --git a/docs/research/software-engineering/architecture/kruchten_1995.md b/docs/research/software-engineering/architecture/kruchten_1995.md
new file mode 100644
index 0000000..60074e0
--- /dev/null
+++ b/docs/research/software-engineering/architecture/kruchten_1995.md
@@ -0,0 +1,47 @@
+# The 4+1 View Model of Architecture — Kruchten, 1995
+
+## Citation
+
+Kruchten, P. B. (1995). "The 4+1 View Model of Architecture." *IEEE Software*, 12(6), 42–50. https://doi.org/10.1109/52.469759
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Software architecture cannot be adequately captured in a single model or view. The 4+1 model provides multiple, complementary perspectives that together form a complete architectural description.
+
+## Core Findings
+
+1. **Five complementary views**: Logical view (object model, functional requirements), Process view (concurrency, distribution, performance), Physical view (deployment, hardware topology), Development view (static organization, modules, subsystems), and Scenarios (+1) that tie views together through use cases.
+2. **Stakeholder-specific concerns**: Each view addresses different stakeholder concerns and quality attributes - developers need Development view, system integrators need Physical view, performance engineers need Process view.
+3. **Scenario validation**: The scenarios (+1) validate that the architecture works as an integrated whole by showing how the views collaborate to support key use cases.
+4. **Quality attribute mapping**: Each view specifically addresses non-functional requirements - Performance (Process), Availability (Physical), Modifiability (Development), Functionality (Logical).
+5. **Multi-perspective necessity**: Architecture is not just structure - it must address non-functional requirements through specific design decisions in each view.
+6. **IEEE 1471 influence**: Kruchten's work heavily influenced IEEE 1471-2000 standard for architectural description.
+
+## Mechanism
+
+The model emphasizes that architecture is not just structure—it must address non-functional requirements (performance, availability, modifiability) through specific design decisions in each view. Each view uses different notation and focuses on different architectural elements, but scenarios weave through all views to demonstrate end-to-end system behavior. This multi-perspective approach ensures no critical architectural concern is overlooked while avoiding the complexity of a single, monolithic architectural model.
+
+## Relevance
+
+Foundational framework for architectural documentation and communication. C4 diagrams and modern architectural documentation templates follow this multi-view principle. Context, Container, Component, and Code diagrams provide complementary perspectives that together describe complete architecture. Essential for enterprise architecture, system design documentation, and architectural review processes.
+
+## Related Research
+
+- (Brown, 2018) — C4 model applying multi-view principles to contemporary software architecture
+- (Bass et al., 2021) — Software Architecture in Practice building on Kruchten's view-based approach
\ No newline at end of file
diff --git a/docs/research/software-engineering/architecture/martin_2012_clean.md b/docs/research/software-engineering/architecture/martin_2012_clean.md
new file mode 100644
index 0000000..bd22e81
--- /dev/null
+++ b/docs/research/software-engineering/architecture/martin_2012_clean.md
@@ -0,0 +1,48 @@
+# Clean Architecture — Martin, 2012
+
+## Citation
+
+Martin, R. C. (2012). "The Clean Architecture." *8th Light Blog*. Later expanded in *Clean Architecture: A Craftsman's Guide to Software Structure and Design* (2017), Prentice Hall. ISBN 978-0-13-449416-6.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+The dependency arrow always points inward: from infrastructure toward application toward domain. The domain knows nothing about frameworks, databases, or external services.
+
+## Core Findings
+
+1. **Dependency Rule**: Source code dependencies must point inward only — outer layers can depend on inner layers, but inner layers must never depend on outer layers.
+2. **Concentric layer structure**: Four layers from outside to inside: Frameworks/Drivers → Interface Adapters → Application Business Rules (Use Cases) → Enterprise Business Rules (Entities).
+3. **Framework independence**: The architecture doesn't depend on frameworks; frameworks are tools to be used, not architectures to be conformed to.
+4. **Testable in isolation**: Business rules can be tested without UI, database, web server, or any external element because dependencies point inward.
+5. **Database independence**: Business rules are not bound to the database — you can swap Oracle for SQL Server, MongoDB, CouchDB, or something else without affecting business rules.
+6. **UI independence**: The UI can change without changing the rest of the system — Web UI could be replaced with console UI without changing business rules.
+7. **Building on previous architectures**: Clean Architecture synthesizes Hexagonal Architecture (Cockburn, 2005), Onion Architecture, Screaming Architecture, and DCI into a unified approach.
+
+## Mechanism
+
+Clean Architecture builds on Hexagonal Architecture and layer-based approaches by making the dependency rule explicit: source code dependencies must point inward only. The outermost layers (frameworks, drivers, UI, database) are details that can be changed without affecting inner layers. The innermost layer (entities, use cases) contains business rules that have no knowledge of the outside world. This ensures that the domain is both testable in isolation and insulated from infrastructure churn. Dependency Inversion Principle enables this by having high-level modules define interfaces that low-level modules must implement.
+
+## Relevance
+
+Foundational architecture pattern for creating maintainable, testable, framework-independent systems. Essential for microservices design, domain-driven design implementation, and any system requiring long-term maintainability. Critical for applications where business logic must evolve independently of technical infrastructure choices.
+
+## Related Research
+
+- (Parnas, 1972) — Information hiding principles underlying Clean Architecture's dependency rule
+- (Cockburn, 2005) — Hexagonal Architecture that Clean Architecture builds upon and generalizes
\ No newline at end of file
diff --git a/docs/research/software-engineering/architecture/nygard_2011.md b/docs/research/software-engineering/architecture/nygard_2011.md
new file mode 100644
index 0000000..f12a991
--- /dev/null
+++ b/docs/research/software-engineering/architecture/nygard_2011.md
@@ -0,0 +1,48 @@
+# Architecture Decision Records — Nygard, 2011
+
+## Citation
+
+Nygard, M. (2011). "Documenting Architecture Decisions." *Cognitect Blog*. November 15, 2011. Later adopted by ThoughtWorks Technology Radar (2016). https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions
+
+## Source Type
+
+Blog/Article
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Architecturally significant decisions should be documented as short, immutable records capturing the context, decision, rationale, alternatives, and consequences. Each record is written once and never edited — if understanding changes, a new record supersedes the old one.
+
+## Core Findings
+
+1. **Five-part structure**: Title, Context (forces at play), Decision (response to forces), Status (proposed/accepted/superseded), Consequences (resulting context after decision).
+2. **Immutability principle**: ADRs are never edited after acceptance — superseded decisions remain as historical record with references to replacements.
+3. **Lightweight format**: One to two pages maximum, written in Markdown, stored in version control with code.
+4. **Architecturally significant scope**: Decisions affecting structure, non-functional characteristics, dependencies, interfaces, or construction techniques.
+5. **Sequential numbering**: ADRs numbered monotonically and sequentially (never reused) for easy reference.
+6. **Conversation with future developers**: Written in full sentences with active voice to communicate reasoning to new team members.
+7. **ThoughtWorks adoption**: Added to Technology Radar in 2016, driving widespread industry adoption.
+
+## Mechanism
+
+ADRs work because they externalise architectural reasoning that would otherwise remain tacit, tribal knowledge. By forcing the decision-maker to articulate the context (what forces are at play), the decision (what was chosen), the reason (why this choice over alternatives), and the consequences (what becomes easier or harder), ADRs create a decision trail that new team members can read. Immutability prevents retroactive justification: you cannot rewrite history, only supersede it. The consequences of one ADR often become the context for subsequent ADRs, creating a decision pattern language.
+
+## Relevance
+
+Essential practice for software architecture documentation and knowledge management. Critical for distributed teams, high-turnover environments, and complex systems requiring architectural decision tracking. Widely adopted across the software industry for maintaining architectural reasoning, onboarding new developers, and preventing repeated architectural mistakes. Directly applicable to any project requiring transparent decision-making processes.
+
+## Related Research
+
+- (Kruchten, 2004) — Importance of architecture decisions in software development
+- (Brown, 2018) — C4 model complementing ADR documentation with visual architecture communication
\ No newline at end of file
diff --git a/docs/research/software-engineering/architecture/parnas_1972.md b/docs/research/software-engineering/architecture/parnas_1972.md
new file mode 100644
index 0000000..bb2efea
--- /dev/null
+++ b/docs/research/software-engineering/architecture/parnas_1972.md
@@ -0,0 +1,47 @@
+# Information Hiding — Parnas, 1972
+
+## Citation
+
+Parnas, D. L. (1972). "On the criteria to be used in decomposing systems into modules." *Communications of the ACM*, 15(12), 1053–1058. https://doi.org/10.1145/361598.361623
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+The correct criterion for decomposing a system into modules is **information hiding**: each module hides a design decision that is likely to change. Modules should reveal only what callers need while hiding implementation details.
+
+## Core Findings
+
+1. **Information hiding principle**: Each module should hide one specific design decision that is likely to change, creating a stable interface while allowing implementation flexibility.
+2. **Decomposition by change-prone decisions**: Rather than decomposing by execution steps (procedure-based), decompose by decisions most likely to change (data structures, algorithms, I/O formats, external protocols).
+3. **Module interface stability**: The module's public interface should be change-stable while the implementation remains change-free from the caller's perspective.
+4. **Coupling reduction**: Information hiding prevents tight coupling by making modules depend only on abstract interfaces, not concrete implementations.
+5. **Foundation for modern principles**: This 1972 paper established the theoretical foundation for SOLID principles (especially Dependency Inversion), Hexagonal Architecture, and Domain-Driven Design bounded contexts.
+6. **Engineering professionalization**: Parnas was among the first to apply traditional engineering principles to software design, earning professional engineering licenses and advocating for software engineering as a legitimate engineering discipline.
+
+## Mechanism
+
+Decomposing by execution steps (procedure-based) creates tight coupling to implementation order. Decomposing by change-prone decisions (information-hiding) allows each decision to be changed independently without affecting other modules. The mechanism works by identifying decisions most likely to change (data structures, algorithms, I/O formats, external service protocols), then making each such decision a module boundary. The module's public interface exposes only what callers need; all implementation details remain hidden and changeable.
+
+## Relevance
+
+Foundational principle for all modern software architecture. Essential for creating maintainable, evolvable systems where changes to implementation details don't cascade through the entire codebase. Critical for microservices design, API development, library design, and any system requiring long-term maintainability. Directly applicable to bounded context identification, dependency injection, and modular system design.
+
+## Related Research
+
+- (Martin, 2000) — SOLID principles building on Parnas's information hiding foundation
+- (Cockburn, 2005) — Hexagonal Architecture applying information hiding to external dependencies
\ No newline at end of file
diff --git a/docs/research/software-engineering/architecture/skelton_pais_2019.md b/docs/research/software-engineering/architecture/skelton_pais_2019.md
new file mode 100644
index 0000000..9d1fcf3
--- /dev/null
+++ b/docs/research/software-engineering/architecture/skelton_pais_2019.md
@@ -0,0 +1,48 @@
+# Team Topologies — Skelton & Pais, 2019
+
+## Citation
+
+Skelton, M., & Pais, M. (2019). *Team Topologies: Organizing Business and Technology Teams for Fast Flow*. IT Revolution Press. ISBN 978-1942788812.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Team cognitive load and communication patterns directly impact software architecture quality. Teams should be organized around the architecture you want, not the architecture you have.
+
+## Core Findings
+
+1. **Four fundamental team types**: Stream-aligned (continuous flow aligned to business capability), Enabling (helps other teams overcome obstacles), Complicated-subsystem (specialized knowledge for complex subsystems), Platform (provides internal services to reduce cognitive load).
+2. **Three team interaction modes**: Collaboration (working together for discovery and rapid learning), X-as-a-Service (consuming services with minimal collaboration), Facilitating (helping another team learn or adopt new approaches).
+3. **Cognitive load management**: Teams have limited cognitive capacity — exceeding this limit through too many responsibilities, technologies, or domains reduces effectiveness.
+4. **Conway's Law application**: Team boundaries become system boundaries, so design team structures that mirror your desired architecture using the Inverse Conway Maneuver.
+5. **Fast flow optimization**: Team topologies should minimize cognitive load while maximizing flow of value to customers.
+6. **Evolutionary design**: Team structures and communication pathways must be able to evolve with technological and organizational maturity.
+7. **Second edition emphasis**: Cognitive load as a design principle, organizations as "flourishing ecosystems" rather than "efficient machines."
+
+## Mechanism
+
+Teams are the fundamental means of delivery, where team structures determine software architecture through Conway's Law. By deliberately designing team topologies to match desired system architecture, organizations can influence both technical and organizational outcomes. Cognitive load acts as a constraint — teams exceeding their cognitive capacity produce lower quality software with slower delivery. The four team types and three interaction modes provide a vocabulary for designing sustainable organizational structures.
+
+## Relevance
+
+Essential framework for organizational design in technology companies. Critical for DevOps transformation, microservices architecture, platform engineering, and any organization seeking to improve software delivery performance. Directly applicable to team formation, organizational restructuring, and aligning team boundaries with system boundaries.
+
+## Related Research
+
+- (Conway, 1968) — Conway's Law as the theoretical foundation for team-system mirroring
+- (Brooks, 1975) — The Mythical Man-Month on team size and communication overhead limits
\ No newline at end of file
diff --git a/docs/research/software-engineering/process/beck_1999_yagni.md b/docs/research/software-engineering/process/beck_1999_yagni.md
new file mode 100644
index 0000000..675ee12
--- /dev/null
+++ b/docs/research/software-engineering/process/beck_1999_yagni.md
@@ -0,0 +1,45 @@
+# YAGNI ("You Aren't Gonna Need It") — Beck & Jeffries, 1999
+
+## Citation
+
+Beck, K., & Jeffries, R. (1999). Extreme Programming principle, originated on the Ward Cunningham Wiki and in comp.software.extreme-programming discussions. Later articulated in Beck, K. (2000). *Extreme Programming Explained*, Addison-Wesley.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Never add functionality until it is required by a failing test or current requirement - speculative code adds complexity without delivering value.
+
+## Core Findings
+
+1. **Principle Definition**: "Always implement things when you actually need them, never when you just foresee that you need them" (Ron Jeffries)
+2. **Cognitive Bias Protection**: YAGNI counteracts planning fallacy (overestimating likelihood of predicted needs) and sunk cost bias
+3. **Design Priority**: YAGNI operates as highest-priority design rule (YAGNI > KISS > DRY > OC > SOLID > patterns)
+4. **XP Integration**: Used with continuous refactoring, automated unit testing, and continuous integration
+5. **Expert Validation**: John Carmack noted "rarely architecting for future requirements turns out net-positive"
+
+## Mechanism
+
+YAGNI protects against two cognitive biases: planning fallacy (overestimating likelihood that predicted future needs will materialize) and sunk cost (reluctance to remove expensive-to-write code). By deferring all implementation until demanded by tests or requirements, YAGNI keeps codebase minimal and focused. Must be used with supporting practices like continuous refactoring to avoid technical debt.
+
+## Relevance
+
+Essential for lean software development, preventing over-engineering and feature creep. Applied in TDD workflows, API design, architecture decisions. Fundamental principle in Extreme Programming and Agile methodologies for maintaining code simplicity and reducing maintenance burden.
+
+## Related Research
+
+Connects to (Beck, 2002) on TDD practices, (Fowler, 1999) on refactoring support, (Gamma et al., 1994) on design patterns as lower priority. Part of broader XP methodology alongside KISS principle and DRY principle. Related to Lean principles of waste elimination.
\ No newline at end of file
diff --git a/docs/research/software-engineering/process/beyer_et_al_2016.md b/docs/research/software-engineering/process/beyer_et_al_2016.md
new file mode 100644
index 0000000..7458ea9
--- /dev/null
+++ b/docs/research/software-engineering/process/beyer_et_al_2016.md
@@ -0,0 +1,45 @@
+# Blameless Post-Mortems — Beyer et al., 2016
+
+## Citation
+
+Beyer, B., Jones, R., Petoff, J., & Murphy, N. R. (2016). *Site Reliability Engineering: How Google Runs Production Systems*. O'Reilly Media. Chapter 15: "Postmortem Culture: Learning from Failure."
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Observational
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Blameless post-mortems focus on process and system failures (not individual mistakes) to produce more actionable improvements than blame-oriented reviews.
+
+## Core Findings
+
+1. **Fundamental Principle**: Every failure is a process failure, not a people failure
+2. **Psychological Safety Effect**: Blameless approach enables honest disclosure of full context including mistakes and near-misses
+3. **Process-Level Framing**: "What process gap allowed this to happen?" vs "Who made the mistake?" shifts improvement target from individual behavior to systemic reliability
+4. **Complete Root Cause Analysis**: Participants share more information when not threatened with punishment
+5. **Google SRE Origin**: Developed as core practice in Site Reliability Engineering at Google, founded by Benjamin Treynor Sloss in 2003
+
+## Mechanism
+
+Blameless post-mortems work through psychological safety enabling honest disclosure. When participants know they will not be punished, they share complete context including their own mistakes. This produces more comprehensive root cause analysis than blame-oriented reviews where participants hide information for self-protection. Process-level framing shifts focus to systemic improvements.
+
+## Relevance
+
+Essential for incident response, organizational learning, reliability engineering. Applied in SRE practices, DevOps culture, continuous improvement. Fundamental for building high-reliability organizations and preventing repeat failures through systemic fixes rather than individual blame.
+
+## Related Research
+
+Connects to (Amy Edmondson) on psychological safety, (Sidney Dekker) on Just Culture, (John Allspaw) on post-mortem practices. Part of broader SRE methodology alongside error budgets, monitoring, automation. Related to learning organization principles and continuous improvement frameworks.
\ No newline at end of file
diff --git a/docs/research/software-engineering/process/calver_2020.md b/docs/research/software-engineering/process/calver_2020.md
new file mode 100644
index 0000000..f07a8f7
--- /dev/null
+++ b/docs/research/software-engineering/process/calver_2020.md
@@ -0,0 +1,45 @@
+# Calendar Versioning — CalVer, 2020
+
+## Citation
+
+CalVer (2020). Calendar Versioning. https://calver.org
+
+## Source Type
+
+Specification
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Date-based version segments communicate release timing directly, suitable for projects with large/constantly-changing scope or time-sensitive releases.
+
+## Core Findings
+
+1. **Family of Schemes**: Not single scheme but flexible framework - YY.MINOR.MICRO (pip), YYYY.MM.DD (certifi), YY.0M (OpenSCAD)
+2. **Standard Terminology**: YYYY/YY/0Y (year), MM/0M (month), WW/0W (week), DD/0D (day) segments
+3. **Wide Adoption**: Ubuntu, Twisted, youtube-dl, pip, PyCharm, Unity, LibreOffice, OpenSCAD, Stripe API
+4. **Compatibility Limitation**: CalVer alone doesn't signal breaking changes - some projects use hybrid SemVer+CalVer approach
+5. **Three Key Use Cases**: Large/changing scope systems, time-sensitive releases, external-change-driven projects
+
+## Mechanism
+
+CalVer replaces arbitrary version increments with calendar-derived segments using Gregorian calendar and UTC convention. Date segments are 1-based (unlike traditional 0-based incremented versions) with short/zero-padded years relative to year 2000. Projects choose appropriate scheme based on release patterns and communication needs.
+
+## Relevance
+
+Essential for projects with time-based releases, security updates, business support schedules, large system coordination. Applied in operating systems, frameworks, security libraries, API versioning. Alternative to SemVer when semantic meaning is less relevant than temporal context.
+
+## Related Research
+
+Connects to (Preston-Werner, 2013) on Semantic Versioning as alternative approach. Part of broader software versioning strategies including hybrid approaches. Related to release management, dependency management, and software lifecycle practices.
\ No newline at end of file
diff --git a/docs/research/software-engineering/process/clegg_barker_1994.md b/docs/research/software-engineering/process/clegg_barker_1994.md
new file mode 100644
index 0000000..03fd7b8
--- /dev/null
+++ b/docs/research/software-engineering/process/clegg_barker_1994.md
@@ -0,0 +1,45 @@
+# MoSCoW Prioritization — Clegg & Barker, 1994
+
+## Citation
+
+Clegg, D., & Barker, R. (1994). *Case Method Fast-Track: A RAD Approach*. Addison-Wesley. (DSDM origin.)
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Classifying requirements as Must/Should/Could/Won't forces explicit negotiation about what is essential vs. desired, preventing gold-plating.
+
+## Core Findings
+
+1. **Four Priority Categories**: Must have (critical), Should have (important but not necessary), Could have (desirable), Won't have (lowest priority/out of scope)
+2. **DSDM Integration**: Originally developed by Dai Clegg in 1994 for Rapid Application Development, extensively used in Dynamic Systems Development Method from 2002
+3. **Effort Constraints**: DSDM mandates that Must requirements cannot exceed 60% of total effort
+4. **Plain English Value**: Categories more meaningful than High/Medium/Low, helping customers understand priority impact
+5. **Story-Level Application**: When applied within single story, reveals bloated stories that should be split
+
+## Mechanism
+
+MoSCoW forces explicit negotiation by using plain English categories that clarify business impact. The 60% constraint on Must requirements prevents scope creep. At story level, if only 3 of 12 examples are Must, the remaining 9 can be deferred, keeping stories focused and deliverable within timeboxes.
+
+## Relevance
+
+Essential for Agile development, requirements prioritization, scope management, minimum viable product definition. Applied in Scrum, RAD, DSDM methodologies. Fundamental for timeboxed delivery and preventing feature creep in iterative development approaches.
+
+## Related Research
+
+Connects to (Kano et al., 1984) on alternative prioritization methods. Part of broader Agile methodology alongside user stories and timeboxing. Related to scope management, minimum viable product concepts, and iterative development frameworks.
diff --git a/docs/research/software-engineering/process/fagan_1976.md b/docs/research/software-engineering/process/fagan_1976.md
new file mode 100644
index 0000000..ff3e2aa
--- /dev/null
+++ b/docs/research/software-engineering/process/fagan_1976.md
@@ -0,0 +1,45 @@
+# Design and Code Inspections — Fagan, 1976
+
+## Citation
+
+Fagan, M. E. (1976). "Design and Code Inspections to Reduce Errors in Program Development." *IBM Systems Journal*, 15(3), 182–211.
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Experiment
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Structured inspections using checklists detect 60-90% of defects before testing, far more than unstructured walkthroughs by forcing reviewers to articulate specific failures.
+
+## Core Findings
+
+1. **Defect Detection Rate**: Structured inspections detect 60-90% of defects before testing phase
+2. **Cost Effectiveness**: Fixing defects in early phases costs 10-100x less than fixing in maintenance phase
+3. **Six-Phase Process**: Planning, Overview, Preparation, Inspection meeting, Rework, Follow-up
+4. **Role-Based Review**: Author, Reader, Reviewers, Moderator, Recorder each have specific responsibilities
+5. **Checklist-Driven**: Systematic checking against specific quality attributes prevents confirmation bias
+
+## Mechanism
+
+Fagan inspections constrain reviewer attention to overcome confirmation bias. Unstructured reviews allow skimming and overlooking defects through expectation confirmation. Structured inspection requires checking each quality attribute individually, forcing System 2 thinking. Self-declaration checklists (AGREE/DISAGREE criteria) prevent vague "looks good" approvals that hide defects.
+
+## Relevance
+
+Essential for code quality assurance, defect prevention, software inspection processes. Applied in formal review procedures, quality gates, peer review systems. Foundational for static analysis, code review practices, and quality assurance in software development lifecycle.
+
+## Related Research
+
+Connects to (Tversky & Kahneman, 1974) on confirmation bias, (Kahneman, 2011) on System 1/2 thinking. Part of broader software quality methodologies alongside testing, static analysis. Related to inspection techniques, peer review processes, and formal verification approaches.
\ No newline at end of file
diff --git a/docs/research/software-engineering/process/preston-werner_2013.md b/docs/research/software-engineering/process/preston-werner_2013.md
new file mode 100644
index 0000000..a0218f8
--- /dev/null
+++ b/docs/research/software-engineering/process/preston-werner_2013.md
@@ -0,0 +1,45 @@
+# Semantic Versioning 2.0.0 — Preston-Werner, 2013
+
+## Citation
+
+Preston-Werner, T. (2013). Semantic Versioning 2.0.0. https://semver.org
+
+## Source Type
+
+Specification
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Version numbers follow MAJOR.MINOR.PATCH format encoding compatibility intent to enable automated dependency resolution and prevent dependency hell.
+
+## Core Findings
+
+1. **Three-Part Versioning**: MAJOR.MINOR.PATCH where MAJOR for incompatible API changes, MINOR for backward-compatible additions, PATCH for backward-compatible bug fixes
+2. **Build Metadata Independence**: Build metadata (after `+`) provides arbitrary information without affecting version precedence - `1.0.0+20260430` and `1.0.0` have same precedence
+3. **Pre-release Ordering**: Pre-release versions (after `-`) have lower precedence than normal versions: `1.0.0-alpha < 1.0.0`
+4. **Dependency Resolution**: Enables automated package management with range specifications like `>=3.1.0 <4.0.0` preventing dependency hell
+5. **Public API Declaration**: Requires clear, precise public API definition as foundation for meaningful version communication
+
+## Mechanism
+
+SemVer encodes compatibility intent in version number itself. MAJOR increments signal breaking changes requiring consumer updates; MINOR increments signal safe additions; PATCH increments signal safe fixes. Build metadata suffix (§10) allows arbitrary data (dates, commit hashes) without affecting dependency solver precedence calculations.
+
+## Relevance
+
+De facto standard for software versioning, essential for package management systems (npm, pip, Maven), continuous integration, API evolution communication. Foundational for dependency resolution algorithms, semantic release automation, software distribution strategies.
+
+## Related Research
+
+Created by Tom Preston-Werner (GitHub co-founder, Gravatar inventor) in 2013. Based on widespread existing practices in open/closed-source software. Influences modern package managers, CI/CD systems, release automation tools. Licensed under Creative Commons CC BY 3.0, maintained as open specification.
\ No newline at end of file
diff --git a/docs/research/software-engineering/process/reinertsen_2009.md b/docs/research/software-engineering/process/reinertsen_2009.md
new file mode 100644
index 0000000..604bb53
--- /dev/null
+++ b/docs/research/software-engineering/process/reinertsen_2009.md
@@ -0,0 +1,45 @@
+# The Principles of Product Development Flow (WSJF) — Reinertsen, 2009
+
+## Citation
+
+Reinertsen, D. G. (2009). *The Principles of Product Development Flow: Second Generation Lean Product Development*. Celeritas Publishing.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Weighted Shortest Job First (WSJF), calculated as Cost of Delay divided by Duration, optimizes product development work sequencing by prioritizing items where delay is most costly relative to implementation time.
+
+## Core Findings
+
+1. **WSJF Formula**: Cost of Delay ÷ Duration provides quantitative ranking replacing subjective prioritization
+2. **Cost of Delay Quantification**: "One thing" to quantify - partial derivative of total expected value with respect to time ($/time units)
+3. **CD3 Algorithm**: "Cost of Delay Divided by Duration" maximizes total value delivered by scarce development capacity
+4. **Manager Knowledge Gap**: ~85% of product managers don't know their Cost of Delay 
+5. **Intuition Failure**: Intuitive Cost of Delay estimates differ by 50:1 ratio, making quantification essential
+
+## Mechanism
+
+WSJF transforms subjective prioritization into quantitative ranking. Cost of Delay captures economic impact of not doing work now. Duration normalizes for effort. The ratio identifies work delivering most value per unit time invested. "CD3" scheduling algorithm maximizes value in any given time period.
+
+## Relevance
+
+Essential for product management, agile prioritization, lean development. Applied in feature prioritization, backlog management, resource allocation. Foundational for SAFe (Scaled Agile Framework) prioritization and quantitative product development flow optimization.
+
+## Related Research
+
+Created by Donald G. Reinertsen, author of "Managing the Design Factory" (1997). Cost of Delay concept described as "golden key that unlocks many doors" with "astonishing power to transform development organization mindset." Adopted in SAFe methodology and lean-agile practices worldwide.
diff --git a/docs/research/software-engineering/quality/bay_2008.md b/docs/research/software-engineering/quality/bay_2008.md
new file mode 100644
index 0000000..b2ba63d
--- /dev/null
+++ b/docs/research/software-engineering/quality/bay_2008.md
@@ -0,0 +1,46 @@
+# Object Calisthenics — Bay, 2008
+
+## Citation
+
+Bay, J. (2008). "Object Calisthenics." In *The ThoughtWorks Anthology*, pp. 65–78. Pragmatic Bookshelf.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+Moderate
+
+## Key Insight
+
+Nine syntactic constraints force developers to distribute behavior rather than centralize data, producing significantly better object-oriented designs through structural pressure.
+
+## Core Findings
+
+1. **Extreme constraints methodology**: Nine rules including "no more than two instance variables per class" and "no getters/setters/properties" create structural pressure toward better designs.
+2. **Anti-pattern prevention**: "No more than two instance variables" prevents god-object anti-pattern by making it impossible to hold all related data in one class, forcing decomposition into collaborating objects.
+3. **Training wheels philosophy**: Constraints are intentionally extreme - they are temporary training tools, not permanent rules - but design habits they produce persist after constraints are relaxed.
+4. **Behavior distribution**: Forces small, focused classes with clear responsibilities and behavior-rich objects rather than data containers.
+5. **ThoughtWorks methodology**: Published as part of ThoughtWorks Anthology, reflecting company's agile software development practices and expertise.
+
+## Mechanism
+
+Constraints create structural pressure toward small, focused classes with clear responsibilities. Extreme limitations make poor design choices impossible, forcing developers to find alternative approaches that result in better object-oriented structure. Design habits developed under constraints (small classes, behavior-rich objects, encapsulated data) persist after constraints are relaxed.
+
+## Relevance
+
+Valuable training methodology for developing better object-oriented design skills. Useful for teams struggling with large classes, anemic domain models, or excessive coupling. Educational tool for understanding principles behind good OOP design through extreme application.
+
+## Related Research
+
+- (Martin, 2000) — SOLID principles providing theoretical foundation for good OOP design
+- (Fowler, 1999) — Refactoring techniques for improving object-oriented design incrementally
\ No newline at end of file
diff --git a/docs/research/software-engineering/quality/beck_2002.md b/docs/research/software-engineering/quality/beck_2002.md
new file mode 100644
index 0000000..04e3f50
--- /dev/null
+++ b/docs/research/software-engineering/quality/beck_2002.md
@@ -0,0 +1,48 @@
+# Test-Driven Development — Beck, 2002
+
+## Citation
+
+Beck, K. (2002). *Test-Driven Development: By Example*. Addison-Wesley.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+The RED-GREEN-REFACTOR cycle — write a failing test, write the minimum code to pass it, then improve the structure while keeping all tests green — drives better design by forcing each unit of behaviour to be specified before it is implemented.
+
+## Core Findings
+
+1. Tests written first act as specifications, not verification, preventing over-engineering and ensuring every piece of code has a reason to exist
+2. The RED phase forces developers to articulate what they want before building it
+3. The GREEN phase constrains implementation to the minimum that satisfies the specification, preventing speculative generalisation
+4. The REFACTOR phase is safe because all existing tests remain green, providing a regression net
+5. The cycle produces small, focused, well-tested units of code because each unit must be both specifiable (testable) and minimal (just enough to pass)
+6. Two basic rules: (1) Never write a single line of code unless you have a failing automated test, (2) Eliminate duplication
+
+## Mechanism
+
+TDD works by inverting the traditional development flow. Instead of writing code then testing it, developers first write failing tests that specify desired behavior. This forces clear thinking about requirements and interfaces before implementation. The requirement to make tests pass with minimal code prevents over-engineering, while the refactoring phase improves design quality under the safety net of comprehensive test coverage.
+
+## Relevance
+
+Foundational methodology for software quality assurance and design. TDD has become a cornerstone practice in agile development, influencing modern software engineering through improved code quality, better design, and increased developer confidence in changes. Essential for understanding test-first approaches and the relationship between testing and design.
+
+## Related Research
+
+- (Beck, 1999) - Extreme Programming Explained, which introduced TDD as part of XP practices
+- (Fowler et al., 1999) - Refactoring book co-authored by Beck, providing systematic approach to code improvement
+- (Beck & Gamma, 2004) - JUnit framework implementation demonstrating TDD principles in practice
\ No newline at end of file
diff --git a/docs/research/software-engineering/quality/demillo_lipton_sayward_1978.md b/docs/research/software-engineering/quality/demillo_lipton_sayward_1978.md
new file mode 100644
index 0000000..4371426
--- /dev/null
+++ b/docs/research/software-engineering/quality/demillo_lipton_sayward_1978.md
@@ -0,0 +1,45 @@
+# Mutation Testing — DeMillo, Lipton & Sayward, 1978
+
+## Citation
+
+DeMillo, R. A., Lipton, R. J., & Sayward, F. G. (1978). "Hints on test data selection: Help for the practicing programmer." *Computer*, 11(4), 34–41.
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+A meaningful test fails when a mutation (small deliberate code change) is introduced - if test survives every mutation without failing, it tests nothing useful.
+
+## Core Findings
+
+1. **Competent Programmer Hypothesis**: Competent programmers write programs close to being correct behaviorally
+2. **Coupling Effect**: Simple faults cascade to form other emergent faults, so detecting simple mutations catches complex bugs
+3. **RIP Model**: Tests must Reach mutated statement, Infect program state, and Propagate incorrect state to output
+4. **Equivalent Mutants Problem**: Some mutants produce behaviorally equivalent programs, creating analysis challenges  
+5. **Quality Measurement**: Mutation score (mutants killed / total mutants) provides objective test quality metric
+
+## Mechanism
+
+Mutation testing systematically introduces small bugs into code using mutation operators (arithmetic/relational/logical changes) and checks whether tests detect them. Tests failing to catch artificial bugs indicate weak test quality or missing edge cases. Strong mutation requires full RIP model satisfaction; weak mutation only requires reach/infect.
+
+## Relevance
+
+Essential for test quality assessment, TDD validation, regression testing. Applied in modern tools (PITest, Stryker, mutmut, cosmic-ray). Fundamental for measuring test effectiveness beyond code coverage, ensuring tests constrain actual behavior rather than implementation details.
+
+## Related Research
+
+Originally proposed by Richard Lipton (1971), developed by DeMillo, Lipton & Sayward (1978). First implementation by Timothy Budd (1980). Connects to (Jia & Harman, 2011) comprehensive survey. Modern applications in security testing, object-oriented mutation operators, higher-order mutants research.
\ No newline at end of file
diff --git a/docs/research/software-engineering/quality/feathers_2004.md b/docs/research/software-engineering/quality/feathers_2004.md
new file mode 100644
index 0000000..ae4dda3
--- /dev/null
+++ b/docs/research/software-engineering/quality/feathers_2004.md
@@ -0,0 +1,46 @@
+# Working Effectively with Legacy Code — Feathers, 2004
+
+## Citation
+
+Feathers, M. (2004). *Working Effectively with Legacy Code*. Prentice Hall.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Legacy code is code without tests - the safest way to modify it is to first write characterization tests that capture current behavior, then refactor under the safety net of those tests.
+
+## Core Findings
+
+1. **Legacy code definition**: Code without automated tests, making it dangerous to modify and prone to introducing bugs during changes.
+2. **Characterization tests methodology**: Tests that document what code currently does (not what it should do) - essential when modifying untested code to create regression safety net.
+3. **Seam-based approach**: Seams (parameter seams, link seams, preprocessing seams, object seams) are points where behavior can be varied without editing code - primary mechanism for getting legacy code under test.
+4. **Test-first modification**: Process is identify seam, get code under test by writing characterization test at that seam, then refactor safely.
+5. **"Edit and pray" elimination**: Replaces dangerous "modify and hope nothing breaks" approach with disciplined "test, then modify, then verify" cycle.
+
+## Mechanism
+
+Characterization tests differ from specification tests: they document what code currently does, not what it should do. This creates regression protection before any changes are made. Seams allow injecting test doubles at specific points without modifying production code. Process: identify seam, get code under test, write characterization test, then refactor. Avoids dangerous untested modifications.
+
+## Relevance
+
+Essential methodology for working with legacy codebases safely. Critical for organizations maintaining large existing systems without comprehensive test coverage. Foundational approach for incremental improvement of legacy systems and technical debt reduction.
+
+## Related Research
+
+- (Beck, 2002) — Test-driven development providing methodology for new code development with tests
+- (Fowler, 1999) — Refactoring techniques that require test safety net provided by characterization tests
\ No newline at end of file
diff --git a/docs/research/software-engineering/quality/fowler_1999.md b/docs/research/software-engineering/quality/fowler_1999.md
new file mode 100644
index 0000000..62d13de
--- /dev/null
+++ b/docs/research/software-engineering/quality/fowler_1999.md
@@ -0,0 +1,46 @@
+# Refactoring: Improving the Design of Existing Code — Fowler, 1999
+
+## Citation
+
+Fowler, M. (1999). *Refactoring: Improving the Design of Existing Code*. Addison-Wesley. Second edition 2018 with K. Beck, J. Brant, W. Opdyke, D. Roberts.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Refactoring is disciplined technique for restructuring existing code without changing external behavior, done in small steps each verified by tests.
+
+## Core Findings
+
+1. **Catalog methodology**: 66 named transformations (Extract Method, Move Field, Replace Conditional with Polymorphism, etc.) each with known pre-condition, mechanic, and effect on code structure.
+2. **Code smell diagnostics**: Diagnostic indicators (Long Method, Feature Envy, Switch Statements, etc.) signal when refactoring is needed and point to specific techniques.
+3. **Test-driven safety**: Small, test-verified steps ensure restructuring doesn't introduce bugs while improving design quality.
+4. **Behavior preservation**: External functionality remains unchanged while internal structure improves through systematic transformations.
+5. **Design emergence**: Better design emerges through incremental improvements rather than upfront architectural decisions.
+
+## Mechanism
+
+Each refactoring has known pre-condition (when safe to apply), step-by-step mechanic (the transformation), and guaranteed post-condition (what improves). By applying refactorings in small, test-verified steps, developer can restructure code safely without introducing bugs. Code smells serve as diagnostic indicators pointing to specific refactoring technique most likely to improve structure. The smell identifies problem; refactoring provides solution.
+
+## Relevance
+
+Foundational methodology for systematic code improvement and design evolution. Essential practice for maintaining code quality, reducing technical debt, and enabling sustainable software development. Widely adopted as core agile development practice.
+
+## Related Research
+
+- (Beck, 2002) — Test-driven development methodology supporting refactoring safety
+- (Shvets, 2014) — Comprehensive online refactoring catalog building on Fowler's work
\ No newline at end of file
diff --git a/docs/research/software-engineering/quality/freeman_pryce_2009.md b/docs/research/software-engineering/quality/freeman_pryce_2009.md
new file mode 100644
index 0000000..56694a9
--- /dev/null
+++ b/docs/research/software-engineering/quality/freeman_pryce_2009.md
@@ -0,0 +1,45 @@
+# GOOS — Growing Object-Oriented Software, Guided by Tests — Freeman & Pryce, 2009
+
+## Citation
+
+Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Acceptance tests and unit tests operate at two separate nested timescales - outer loop writes failing acceptance tests before implementation; inner loop drives implementation with unit-level Red-Green-Refactor cycles.
+
+## Core Findings
+
+1. **Double Loop TDD**: Outer loop (acceptance tests) provides direction (what to build); inner loop (unit tests) provides momentum (how to build it)
+2. **Nested Timescales**: Acceptance test stays red throughout all inner cycles and goes green only when feature is complete
+3. **Direction vs Momentum**: Acceptance tests prevent over-engineering by defining "done"; unit tests drive good internal design
+4. **Integration Safety**: Acceptance tests catch integration issues early while unit tests provide rapid feedback
+5. **Mock Objects**: Use test doubles to maintain fast, isolated unit tests while preserving design feedback
+
+## Mechanism
+
+Outer loop begins with failing acceptance test for next feature, then enters inner loop of Red-Green-Refactor unit test cycles. Inner loop repeats (write failing unit test, make it pass with minimal code, refactor) until acceptance test passes. This structure provides safety nets at both levels for refactoring and ensures comprehensive test coverage.
+
+## Relevance
+
+Essential for advanced TDD practices, BDD implementation, acceptance test-driven development. Applied in enterprise software development, continuous integration, behavior-driven development. Foundational for understanding relationship between unit and acceptance testing in agile methodologies.
+
+## Related Research
+
+Connects to (Beck, 2002) on TDD fundamentals, (North, 2006) on BDD practices. Part of broader testing methodologies alongside ATDD, specification by example. Related to mock object patterns and test double strategies for maintainable test suites.
\ No newline at end of file
diff --git a/docs/research/software-engineering/quality/gamma_et_al_1994.md b/docs/research/software-engineering/quality/gamma_et_al_1994.md
new file mode 100644
index 0000000..7e3ce29
--- /dev/null
+++ b/docs/research/software-engineering/quality/gamma_et_al_1994.md
@@ -0,0 +1,46 @@
+# Design Patterns: Elements of Reusable Object-Oriented Software — Gamma, Helm, Johnson, Vlissides, 1994
+
+## Citation
+
+Gamma, E., Helm, R., Johnson, R., & Vlissides, J. (1994). *Design Patterns: Elements of Reusable Object-Oriented Software*. Addison-Wesley.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Design patterns provide reusable solutions to recurring design problems by naming proven structural approaches that teams can communicate at higher abstraction level.
+
+## Core Findings
+
+1. **Pattern catalog**: 23 design patterns divided into three categories by intent: Creational (abstracting object creation), Structural (composing classes/objects into larger structures), Behavioral (allocating responsibility between objects).
+2. **Communication abstraction**: Patterns name recurring design structures enabling teams to communicate at higher level - saying "Strategy pattern" conveys entire structural solution.
+3. **Problem-solution mapping**: Each pattern captures proven solution to specific class of design problem - Strategy eliminates type-switching, Observer decouples event sources from handlers, State replaces conditional state machines.
+4. **Foundational principles**: "Program to interface, not implementation" and "Favor object composition over class inheritance" guide pattern application.
+5. **Massive influence**: Over 500,000 copies sold in 14 languages, ACM SIGPLAN Programming Languages Achievement Award 2005, foundational for object-oriented design.
+
+## Mechanism
+
+Patterns work by naming recurring design structures so teams can communicate at higher level of abstraction. Each pattern captures proven solution to specific class of design problem. Patterns should be applied only when code smell triggers them, never speculatively. The smell identifies the gap; the pattern provides structural solution.
+
+## Relevance
+
+Foundational reference for object-oriented design and software architecture. Essential vocabulary for software development teams and architectural decision-making. Widely adopted across programming languages and frameworks for systematic design improvement.
+
+## Related Research
+
+- (Fowler, 1999) — Refactoring methodology that prepares code for pattern application
+- (Shvets, 2014) — Modern catalog connecting code smells to appropriate patterns
\ No newline at end of file
diff --git a/docs/research/software-engineering/quality/google_testing_2013.md b/docs/research/software-engineering/quality/google_testing_2013.md
new file mode 100644
index 0000000..f478b9a
--- /dev/null
+++ b/docs/research/software-engineering/quality/google_testing_2013.md
@@ -0,0 +1,45 @@
+# Test-Behavior Alignment — Google Testing Blog, 2013
+
+## Citation
+
+Google Testing Blog. (2013). "Testing on the Toilet: Test Behavior, Not Implementation." By Andrew Trenk. *Google Testing Blog*.
+
+## Source Type
+
+Blog/Article
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Test setup may need to change if implementation changes, but the actual test assertion should not need to change if the code's user-facing behavior doesn't change.
+
+## Core Findings
+
+1. **Implementation Independence**: Tests should focus on testing code's public API, not internal implementation details
+2. **Maintenance Benefits**: Tests independent of implementation details are easier to maintain since they don't need changes with each implementation change
+3. **Documentation Value**: Behavior-focused tests act as code samples showing different ways class methods can be used
+4. **Setup vs Assertion**: Test setup may change with implementation (e.g., new constructor dependencies) but assertions should remain stable
+5. **Brittleness Prevention**: Tests tightly coupled to implementation details break during refactoring and become drag on design improvement
+
+## Mechanism
+
+Implementation-focused tests verify internal structure (method calls, object creation, internal state) creating brittleness. Behavior-focused tests verify observable outcomes that users can witness, providing stability. Former creates maintenance overhead; latter provides lasting value through internal rewrites.
+
+## Relevance
+
+Essential for maintainable test suites, refactoring safety, TDD practices. Applied in contract testing, behavior-driven development, test design principles. Foundational for writing tests from caller's perspective without knowledge of internal implementation mechanics.
+
+## Related Research
+
+Connects to (Freeman & Pryce, 2009) on GOOS principles, (Fowler, 2018) on Test Pyramid. Part of broader testing methodologies alongside TDD, BDD, contract testing. Related to mock object patterns and test double strategies for behavior verification.
\ No newline at end of file
diff --git a/docs/research/software-engineering/quality/maciver_2016.md b/docs/research/software-engineering/quality/maciver_2016.md
new file mode 100644
index 0000000..4db4887
--- /dev/null
+++ b/docs/research/software-engineering/quality/maciver_2016.md
@@ -0,0 +1,47 @@
+# Property-Based Testing — MacIver, 2016
+
+## Citation
+
+MacIver, D. R. (2016). "What is Property Based Testing?" *Hypothesis*. https://hypothesis.works/articles/what-is-property-based-testing/
+
+## Source Type
+
+Blog/Article
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Property-based testing constructs tests such that when these tests are fuzzed with generated inputs, failures reveal problems that could not have been revealed by direct fuzzing alone.
+
+## Core Findings
+
+1. Meaningful property tests assert invariants—things that must always be true about the contract
+2. Tautological tests assert reconstruction patterns that merely verify the implementation without constraining behavior
+3. Property tests generate diverse inputs to verify that certain properties hold across the entire input space
+4. This approach discovers edge cases that example-based tests typically miss and provides stronger confidence in correctness
+5. Property-based testing complements example-based BDD scenarios by providing broader coverage of the input space
+
+## Mechanism
+
+Property tests work by generating diverse inputs to verify that certain properties (invariants) hold across the entire input space. Unlike example-based tests that check specific scenarios, property tests explore the full domain of possible inputs, automatically discovering edge cases that developers typically miss. The key is focusing on behavioral contracts rather than implementation details.
+
+## Relevance
+
+Essential for comprehensive test coverage in software quality assurance. Property-based testing complements traditional BDD scenarios by providing mathematical rigor to test validation. Particularly valuable for testing complex algorithms, data transformations, and API contracts where exhaustive example-based testing is impractical.
+
+## Related Research
+
+- (Claessen & Hughes, 2000) - Original QuickCheck paper establishing property-based testing foundations
+- (Fink & Bishop, 1997) - Early work on property-based testing for software assurance
+- (MacIver et al., 2019) - Hypothesis library implementation extending QuickCheck concepts to Python
\ No newline at end of file
diff --git a/docs/research/software-engineering/quality/martin_2000_solid.md b/docs/research/software-engineering/quality/martin_2000_solid.md
new file mode 100644
index 0000000..d1a13ca
--- /dev/null
+++ b/docs/research/software-engineering/quality/martin_2000_solid.md
@@ -0,0 +1,47 @@
+# SOLID Principles — Martin, 2000
+
+## Citation
+
+Martin, R. C. (2000). Design Principles and Design Patterns. Object Mentor. [PDF archived at Internet Archive]
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Five object-oriented design principles that reduce coupling and increase maintainability when applied together.
+
+## Core Findings
+
+1. **Single Responsibility Principle (SRP)**: A class should have only one reason to change — each class should have only one responsibility.
+2. **Open-Closed Principle (OCP)**: Software entities should be open for extension but closed for modification.
+3. **Liskov Substitution Principle (LSP)**: Derived classes must be substitutable for their base classes without altering program correctness.
+4. **Interface Segregation Principle (ISP)**: Clients should not be forced to depend on interface methods they don't use.
+5. **Dependency Inversion Principle (DIP)**: Depend on abstractions, not concrete implementations.
+6. The SOLID acronym was coined around 2004 by Michael Feathers to make these principles memorable.
+
+## Mechanism
+
+Each principle addresses specific coupling pathologies: SRP prevents god-objects by enforcing single responsibility; OCP prevents modification cascades by enabling extension over modification; LSP prevents behavioral contract violations in inheritance hierarchies; ISP prevents fat interfaces that force unnecessary dependencies; DIP enables loose coupling by inverting dependencies toward abstractions. Together they reduce change propagation and make systems more testable.
+
+## Relevance
+
+Foundational for modern software architecture and clean code practices. Directly applicable to module design, interface definitions, and refactoring strategies. Essential for creating maintainable codebases that can evolve without breaking existing functionality.
+
+## Related Research
+
+- (Fowler, 1999) — Refactoring patterns that support SOLID principles
+- (Beck, 2002) — Test-driven development practices that reinforce these design principles
\ No newline at end of file
diff --git a/docs/research/software-engineering/quality/martin_2017_first_class_tests.md b/docs/research/software-engineering/quality/martin_2017_first_class_tests.md
new file mode 100644
index 0000000..4423ff7
--- /dev/null
+++ b/docs/research/software-engineering/quality/martin_2017_first_class_tests.md
@@ -0,0 +1,45 @@
+# Test Contra-variance (First-Class Tests) — Martin, 2017
+
+## Citation
+
+Martin, R. C. (2017). "Test Contra-variance." *Clean Coder Blog*, October 3, 2017.
+
+## Source Type
+
+Blog/Article
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Tests should be treated as first-class citizens with independent structural design - not coupled mirror images of production code structure.
+
+## Core Findings
+
+1. **Structural Contra-variance**: Test structure should not mirror production code structure (one test class per production class creates fragile coupling)
+2. **Behavioral Contra-variance**: As tests become more specific, production code becomes more generic, moving in opposite directions along generality axis
+3. **Fragile Test Problem**: Covariant test structure causes large test changes from small production changes, breaking refactoring workflows
+4. **Decoupling Through Generalization**: Generalizing production code to satisfy test specifications creates behavioral decoupling while maintaining correctness
+5. **Independent Test Design**: Tests need their own architectural design to minimize coupling while maintaining behavioral verification
+
+## Mechanism
+
+Covariant test structure (mirroring production classes) creates tight coupling preventing safe refactoring. Contra-variant approach: tests maintain stable public API focus while production code extracts classes/methods behind interface. Tests become increasingly specific behavioral specifications; production code generalizes to satisfy broader spectrum of behaviors than tests specify.
+
+## Relevance
+
+Essential for sustainable TDD practices, refactoring safety, test maintenance. Applied in contract testing, API design, behavior-driven development. Fundamental for writing tests that enable rather than obstruct design improvements and architectural evolution.
+
+## Related Research
+
+Part of Robert C. Martin's Clean Code philosophy. Connects to (Beck, 2002) TDD principles, (Freeman & Pryce, 2009) GOOS methodology. Related to test design patterns, mock object strategies, behavioral specification approaches. Foundational for understanding test-production code relationships.
\ No newline at end of file
diff --git a/docs/research/software-engineering/quality/meszaros_2007.md b/docs/research/software-engineering/quality/meszaros_2007.md
new file mode 100644
index 0000000..89cddf2
--- /dev/null
+++ b/docs/research/software-engineering/quality/meszaros_2007.md
@@ -0,0 +1,46 @@
+# xUnit Test Patterns — Meszaros, 2007
+
+## Citation
+
+Meszaros, G. (2007). *xUnit Test Patterns: Refactoring Test Code*. Addison-Wesley.
+
+## Source Type
+
+Practitioner Book
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Tests should specify observable behavior, not verify implementation - coupling to internal details creates brittle tests that break during refactoring even when behavior is preserved.
+
+## Core Findings
+
+1. **Test coupling spectrum**: Four levels from most resilient to most brittle: end-to-end tests (highest), unit contract tests, property-based tests, and white-box tests (lowest, avoid).
+2. **Implementation coupling danger**: Tests coupled to implementation details break when code is refactored even when behavior is preserved, producing false negatives that erode trust in test suite.
+3. **Semantic alignment rule**: Tests must operate at same abstraction level as acceptance criterion they verify - if criterion says "user presses W," test sends W through actual input mechanism, not internal method call.
+4. **Observable behavior focus**: Decoupled tests use public interfaces and assert on observable outcomes, remaining green through refactoring because they verify what system does, not how it does it.
+5. **Test pattern catalog**: Comprehensive patterns for test organization, fixture management, result verification, and test code maintainability in xUnit frameworks.
+
+## Mechanism
+
+Test coupling arises when test depends on how system works internally rather than what it does externally. Coupled tests use private methods, internal state, or implementation-specific assertions. When implementation changes — even if behavior is identical — coupled tests fail, creating noise that trains developers to ignore test failures. Decoupled tests use public interfaces and assert on observable outcomes, remaining green through refactoring.
+
+## Relevance
+
+Essential reference for writing maintainable test code in xUnit frameworks (JUnit, NUnit, etc.). Foundational for test-driven development practices and ensuring tests support rather than hinder refactoring. Widely used for improving test suite quality and reducing test maintenance burden.
+
+## Related Research
+
+- (Beck, 2002) — Test-driven development methodology using xUnit frameworks
+- (Fowler, 1999) — Refactoring techniques that tests must support without breaking
\ No newline at end of file
diff --git a/docs/research/software-engineering/quality/north_2006.md b/docs/research/software-engineering/quality/north_2006.md
new file mode 100644
index 0000000..965e20b
--- /dev/null
+++ b/docs/research/software-engineering/quality/north_2006.md
@@ -0,0 +1,45 @@
+# Behaviour-Driven Development — North, 2006
+
+## Citation
+
+North, D. (2006). "Introducing BDD." *Better Software Magazine*, March 2006. Originally published at dannorth.net.
+
+## Source Type
+
+Blog/Article
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+BDD refines TDD by writing tests in domain language of business (Given/When/Then), making them specifications rather than verification tools.
+
+## Core Findings
+
+1. **Test Method Names as Sentences**: Using "should" template (The class *should* do something) keeps tests focused and creates readable documentation
+2. **Behaviour vs Test Vocabulary**: Word "behaviour" eliminates TDD coaching confusion - what to test, when to delete tests, test naming becomes clear
+3. **Given/When/Then Template**: Structured format captures acceptance criteria as executable specifications that business stakeholders can validate
+4. **Ubiquitous Language for Analysis**: BDD provides consistent vocabulary bridging technical and business domains throughout entire analysis process
+5. **Business Value Focus**: "What's the next most important thing the system doesn't do?" drives feature prioritization and development direction
+
+## Mechanism
+
+By requiring tests written in domain vocabulary (not implementation language), BDD forces shared understanding between domain experts and developers. "Given a registered user / When user logs in / Then user sees welcome message" specifies observable behaviour stakeholders care about, not technical implementation steps. Eliminates implementation coupling from specifications.
+
+## Relevance
+
+Foundational for behavior-driven development practices, acceptance test-driven development, specification by example. Essential for bridging business-technical communication gaps, creating living documentation, automated acceptance testing frameworks like Cucumber.
+
+## Related Research
+
+Created by Dan North, influenced by Eric Evans' Domain-Driven Design ubiquitous language concept, Chris Matts' business value focus. Led to development of JBehave framework, Ruby RSpec project, Cucumber framework. Foundational for modern BDD tools and practices, specification by example methodologies.
\ No newline at end of file
diff --git a/docs/research/software-engineering/quality/shvets_2014.md b/docs/research/software-engineering/quality/shvets_2014.md
new file mode 100644
index 0000000..78780ee
--- /dev/null
+++ b/docs/research/software-engineering/quality/shvets_2014.md
@@ -0,0 +1,46 @@
+# Refactoring.Guru — Shvets, 2014
+
+## Citation
+
+Shvets, A. (2014–present). *Refactoring.Guru*. https://refactoring.guru/
+
+## Source Type
+
+Blog/Article
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Refactoring follows systematic catalog of 66 techniques triggered by 21 code smells, creating diagnostic chain from problem identification to pattern application.
+
+## Core Findings
+
+1. **Smell-first methodology**: 21 code smells organized into 5 categories (Bloaters, OO Abusers, Change Preventers, Dispensables, Couplers) drive refactoring decisions.
+2. **Systematic technique catalog**: 66 refactoring techniques organized into 6 categories (Composing Methods, Moving Features between Objects, Organizing Data, Simplifying Conditional Expressions, Simplifying Method Calls, Dealing with Generalization).
+3. **Pattern-smell connection**: Each of 22 GoF design patterns triggered by specific code smell, creating motivation for pattern application.
+4. **Diagnostic methodology**: Smell → refactoring technique → design pattern progression prevents speculative application.
+5. **Visual learning approach**: Comprehensive illustrations and examples make complex concepts accessible to practitioners.
+
+## Mechanism
+
+The catalog provides smell-first approach: identify code smell, then apply corresponding refactoring technique or design pattern. Five smell categories group related pathologies: Bloaters (structures grown too large), OO Abusers (misapplied OOP), Change Preventers (changes that ripple), Dispensables (dead weight), Couplers (excessive inter-object dependency). Each smell entry links to refactoring techniques that resolve it, and each pattern entry explains which smell triggers it. Creates diagnostic chain where each step is motivated by previous one rather than applied speculatively.
+
+## Relevance
+
+Essential reference for code quality improvement, refactoring practice, and design pattern application. Widely used by developers for systematic code improvement and architectural decision-making. Provides practical methodology for identifying and resolving code quality issues.
+
+## Related Research
+
+- (Fowler, 1999) — Foundational refactoring catalog and methodology
+- (Gamma et al., 1995) — Original Gang of Four design patterns catalog
\ No newline at end of file
diff --git a/docs/research/software-engineering/requirements/christel_kang_1992.md b/docs/research/software-engineering/requirements/christel_kang_1992.md
new file mode 100644
index 0000000..ab69d75
--- /dev/null
+++ b/docs/research/software-engineering/requirements/christel_kang_1992.md
@@ -0,0 +1,45 @@
+# Issues in Requirements Elicitation — Christel & Kang, 1992
+
+## Citation
+
+Christel, M. G., & Kang, K. C. (1992). *Issues in Requirements Elicitation*. CMU/SEI-92-TR-012. Software Engineering Institute, Carnegie Mellon University. https://www.sei.cmu.edu/library/abstracts/reports/92tr012.cfm
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Observational
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Stakeholders have three structural problems making direct questioning insufficient: omitting "obvious" information, trouble articulating unarticulated needs, not knowing what they want until seeing what they don't want.
+
+## Core Findings
+
+1. **Three Stakeholder Problems**: Omit obvious information, difficulty articulating tacit knowledge, discover needs reactively
+2. **Expert Knowledge Nature**: Largely procedural and tacit - experts describe beliefs about what happens, not actual reality
+3. **Elicitation Activities**: Fact-finding, requirements gathering, evaluation and rationalization, prioritization, integration
+4. **Gap-Finding Necessity**: Required techniques that bypass expert's mental schema to uncover actual vs. perceived processes
+5. **Specification vs. Elicitation**: Most tools focus on representation (specification) rather than discovery (elicitation)
+
+## Mechanism
+
+Expert knowledge is largely procedural and tacit. When asked "how does the system work?", experts describe their beliefs about what happens rather than observing actual processes. Gap-finding techniques are required because they bypass the expert's mental schema and reveal discrepancies between perceived and actual workflows.
+
+## Relevance
+
+Essential for requirements engineering, systems analysis, stakeholder interviews, business analysis. Applied in software development, system design, process improvement. Foundational for understanding why traditional interviewing fails and why observational techniques are necessary.
+
+## Related Research
+
+Connects to (Flanagan, 1954) on Critical Incident Technique for elicitation, (Fisher & Geiselman, 1987) on Enhanced Cognitive Interview methods. Part of broader requirements engineering methodology alongside prototyping and use case analysis. Related to tacit knowledge research and cognitive interview techniques.
diff --git a/docs/research/software-engineering/requirements/kano_et_al_1984.md b/docs/research/software-engineering/requirements/kano_et_al_1984.md
new file mode 100644
index 0000000..11a977f
--- /dev/null
+++ b/docs/research/software-engineering/requirements/kano_et_al_1984.md
@@ -0,0 +1,45 @@
+# Attractive Quality and Must-Be Quality (Kano Model) — Kano et al., 1984
+
+## Citation
+
+Kano, N., Seraku, N., Takahashi, F., & Tsuji, S. (1984). "Attractive quality and must-be quality." *Journal of the Japanese Society for Quality Control*, 14(2), 39–48.
+
+## Source Type
+
+Academic Paper
+
+## Method
+
+Observational
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Customer satisfaction is not unidimensional - features fall into asymmetric categories enabling differentiated prioritization strategies.
+
+## Core Findings
+
+1. **Five Feature Categories**: Must-be (expected baseline), One-dimensional (more is better), Attractive (delighters), Indifferent (no impact), Reverse (negative impact)
+2. **Asymmetric Satisfaction**: Must-be features don't increase satisfaction when present but cause dissatisfaction when absent; Attractive features delight when present but don't dissatisfy when absent
+3. **Strategic Prioritization**: First ensure Must-be features exist, then maximize One-dimensional features, then selectively invest in Attractive features
+4. **Temporal Evolution**: Features migrate from Attractive → One-dimensional → Must-be over time as customer expectations evolve
+5. **Empirical Measurement**: Standardized functional/dysfunctional questionnaire pairs enable systematic feature classification
+
+## Mechanism
+
+Must-be features form satisfaction baseline - presence doesn't increase satisfaction but absence causes dissatisfaction. Attractive features provide asymmetric delight - presence surprises positively but absence goes unnoticed. This asymmetry enables strategic resource allocation focusing first on preventing dissatisfaction, then creating satisfaction.
+
+## Relevance
+
+Essential for product management, requirements prioritization, customer satisfaction strategy. Applied in Quality Function Deployment (QFD), feature roadmapping, competitive analysis. Foundational for understanding satisfaction drivers beyond linear assumptions in product development.
+
+## Related Research
+
+Developed by Noriaki Kano building on Herzberg's two-factor theory. Connects to (Herzberg, 1959) on hygiene vs motivator factors. Applied in Quality Function Deployment methodologies and modern product management frameworks. Related to Jobs-to-be-Done theory and customer outcome prioritization.
diff --git a/docs/research/software-engineering/requirements/wake_2003.md b/docs/research/software-engineering/requirements/wake_2003.md
new file mode 100644
index 0000000..a4616dd
--- /dev/null
+++ b/docs/research/software-engineering/requirements/wake_2003.md
@@ -0,0 +1,45 @@
+# INVEST in Good Stories — Wake, 2003
+
+## Citation
+
+Wake, B. (2003). *INVEST in Good Stories, and SMART Tasks*. XP123.com.
+
+## Source Type
+
+Blog/Article
+
+## Method
+
+Theoretical
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Stories that are Independent, Negotiable, Valuable, Estimable, Small, and Testable produce fewer downstream defects and smoother development cycles.
+
+## Core Findings
+
+1. **INVEST Acronym**: Independent (no overlap, any order), Negotiable (details co-created), Valuable (to customer), Estimable (sized appropriately), Small (few person-weeks), Testable (clear acceptance criteria)
+2. **Vertical Slicing**: Split stories vertically through all layers (network/persistence/logic/presentation) rather than horizontally by layer
+3. **Testable Forces Clarity**: "Testable" forces Product Owner to express observable outcomes, directly enabling Given/When/Then format
+4. **Size Impact**: Smaller stories get more accurate estimates; above few person-weeks becomes too hard to scope
+5. **SMART Tasks Extension**: Specific, Measurable, Achievable, Relevant, Time-boxed tasks complement INVEST stories
+
+## Mechanism
+
+"Testable" forces PO to express observable outcomes directly enabling Given/When/Then. "Small" forces decomposition preventing scope creep. "Independent" prevents hidden ordering dependencies. "Valuable" ensures customer-centric vertical slicing through technical layers.
+
+## Relevance
+
+Foundational for agile user story writing, product backlog management, acceptance criteria definition. Applied in Scrum, XP, SAFe methodologies. Essential for Product Owners, Business Analysts, development teams writing effective requirements.
+
+## Related Research
+
+Created by Bill Wake (XP123.com) in 2003. Consciously developed by clustering story attributes and finding memorable acronym. Widely adopted in agile methodologies. Complements (Cohn, 2004) user story practices and (North, 2006) BDD Given/When/Then format.
diff --git a/docs/research/software-engineering/requirements/wynne_2015.md b/docs/research/software-engineering/requirements/wynne_2015.md
new file mode 100644
index 0000000..1945ec9
--- /dev/null
+++ b/docs/research/software-engineering/requirements/wynne_2015.md
@@ -0,0 +1,45 @@
+# Example Mapping — Wynne, 2015
+
+## Citation
+
+Wynne, M. (2015). "Introducing Example Mapping." *Cucumber Blog*. https://cucumber.io/blog/bdd/example-mapping-introduction/
+
+## Source Type
+
+Blog/Article
+
+## Method
+
+Case Study
+
+## Verification Status
+
+Verified
+
+## Confidence
+
+High
+
+## Key Insight
+
+Inserting a "rules" layer between stories and examples prevents redundant or contradictory acceptance criteria - visual arrangement reveals story complexity and knowledge gaps before development begins.
+
+## Core Findings
+
+1. **Four Card Types**: Story (yellow), Rules (blue), Examples (green), Questions (red) using colored index cards in visual mapping
+2. **Quality Signals**: Many rules → story needs splitting; many examples per rule → rule too complex; many red cards → story not ready; no red cards → conversation may be insufficient
+3. **Time-boxed Process**: Well-understood, well-sized story should map in ~25 minutes with thumb-vote to determine development readiness
+4. **Rules Layer Value**: Groups related examples under business rules they illustrate, preventing duplicated logic and making business constraints explicit
+5. **"Friends Episode" Naming**: Rough examples using informal names ("The one where customer forgot receipt") instead of formal Gherkin during mapping
+
+## Mechanism
+
+Collaborative session involves stakeholders placing colored cards on table/wall. Visual arrangement provides instant feedback on story complexity. Rules layer acts as intermediary between high-level stories and concrete examples, preventing redundancy and revealing natural slicing points.
+
+## Relevance
+
+Essential for BDD story refinement, three amigos sessions, backlog grooming. Applied in agile requirements discovery, acceptance criteria definition, story sizing. Foundational technique for preventing oversized stories entering sprints and discovering unknown unknowns systematically.
+
+## Related Research
+
+Created by Matt Wynne (Cucumber Project Lead) in 2015. Builds on (North, 2006) BDD practices and three amigos concept. Complements (Wake, 2003) INVEST criteria by providing structured discovery technique. Part of broader BDD ecosystem alongside Gherkin, specification workshops, deliberate discovery practices.
\ No newline at end of file
diff --git a/docs/scientific-research/README.md b/docs/scientific-research/README.md
deleted file mode 100644
index 3338996..0000000
--- a/docs/scientific-research/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-# Scientific Research — Index
-
-Theoretical and empirical foundations for the decisions made in this template, organized by domain.
-
-| File | Entries | Domain |
-|---|---|---|
-| `cognitive-science.md` | 1–10 | Pre-mortem, implementation intentions, commitment devices, System 2, adversarial collaboration, accountability, chunking, elaborative encoding, error feedback, prospective memory |
-| `testing.md` | 11–15, 51–54 | Observable behavior testing, test-behavior alignment, first-class tests, property-based testing, mutation testing, Canon TDD, GOOS outer/inner loop, Is TDD Dead, BDD origin |
-| `software-economics.md` | 16 | Cost of change curve (shift left) |
-| `requirements-elicitation.md` | 17–20, 28–30, 43–50 | INVEST, Example Mapping, declarative Gherkin, MoSCoW, active listening, Kipling 5Ws, BA framework, FDD, affinity mapping, Event Storming, CIT, cognitive interview, laddering, funnel technique, RE issues |
-| `domain-modeling.md` | 31, 63–68 | DDD bounded contexts, ubiquitous language, feature identification, DDD Reference, Fowler UL/BC bliki, Vernon IDDD, Verraes UL-not-glossary, Whirlpool |
-| `oop-design.md` | 32–35 | Object Calisthenics, Refactoring (Fowler), GoF Design Patterns, SOLID |
-| `refactoring-empirical.md` | 36–41 | QDIR smell prioritization, smells + architectural refactoring, SPIRIT tool, bad OOP engineering properties, CWC complexity metric, metric threshold unreliability |
-| `architecture.md` | 42, 55–58 | Hexagonal Architecture, ADRs, 4+1 View Model, C4 model, information hiding |
-| `ai-agents.md` | 21–27 | Minimal-scope agent design, context isolation, on-demand skills, instruction conflict resolution failure, positional attention degradation, modular prompt de-duplication, three-file separation |
-| `documentation.md` | 59–62 | Developer information needs, docs-as-code, Diátaxis documentation framework, blameless post-mortems |
diff --git a/docs/scientific-research/ai-agents.md b/docs/scientific-research/ai-agents.md
deleted file mode 100644
index aaae407..0000000
--- a/docs/scientific-research/ai-agents.md
+++ /dev/null
@@ -1,118 +0,0 @@
-# Scientific Research — AI Agent Design
-
-Foundations for the agent architecture, file structure, and context management decisions in this template.
-
----
-
-### 21. Minimal-Scope Agent Design
-
-| | |
-|---|---|
-| **Source** | OpenAI. (2024). *Agent definitions*. OpenAI Agents SDK Documentation. https://platform.openai.com/docs/guides/agents/define-agents |
-| **Date** | 2024 |
-| **Alternative** | Anthropic. (2024). *Building effective agents*. Anthropic Engineering Blog. https://www.anthropic.com/engineering/building-effective-agents |
-| **Status** | Confirmed — corrects the belief that subagents should be "lean routing agents" |
-| **Core finding** | "Define the smallest agent that can own a clear task. Add more agents only when you need separate ownership, different instructions, different tool surfaces, or different approval policies." The split criterion is ownership boundary, not instruction volume. |
-| **Mechanism** | Multiple agents competing to own the same concern create authority conflicts and inconsistent tool access. The right unit is the smallest coherent domain that requires exclusive responsibility. |
-| **Where used** | Agent design in `.opencode/agents/*.md` — 4 agents, each owning a distinct domain (PO, software-engineer, reviewer, setup). |
-
----
-
-### 22. Context Isolation via Subagents
-
-| | |
-|---|---|
-| **Source** | Anthropic. (2025). *Best practices for Claude Code*. Anthropic Documentation. https://www.anthropic.com/engineering/claude-code-best-practices |
-| **Date** | 2025 |
-| **Status** | Confirmed — the primary reason subagents exist is context isolation, not routing |
-| **Core finding** | Subagents run in their own context windows and report back summaries, keeping the main conversation clean for implementation. Every file read in a subagent burns tokens in a child window, not the primary window. |
-| **Mechanism** | Context window is the primary performance constraint for LLM agents. Investigation tasks rapidly exhaust context if done inline. Delegating to a subagent quarantines that cost; the primary agent receives only the distilled result. A fresh context in the subagent also prevents anchoring bias from prior conversation state. |
-| **Where used** | OpenCode `task` tool usage in all agents; `explore` and `general` built-in subagents. |
-
----
-
-### 23. On-Demand Skill Loading (Context Budget)
-
-| | |
-|---|---|
-| **Source** | Anthropic. (2025). *Best practices for Claude Code*. Anthropic Documentation. https://www.anthropic.com/engineering/claude-code-best-practices |
-| **Date** | 2025 |
-| **Alternative** | OpenCode. (2026). *Agent Skills*. OpenCode Documentation. https://opencode.ai/docs/skills/ |
-| **Status** | Confirmed (vendor guidance) — benefit on task completion quality extrapolated from RAG retrieval literature |
-| **Core finding** | "CLAUDE.md is loaded every session, so only include things that apply broadly. For domain knowledge or workflows only relevant sometimes, use skills instead. Claude loads them on demand without bloating every conversation." Bloated always-loaded files cause Claude to ignore critical instructions. |
-| **Mechanism** | Every token in an unconditionally-loaded file competes for attention against the task prompt. Long always-loaded files push important instructions beyond effective attention range, causing silent non-compliance. Skills are injected only when the task calls for them, preserving the primary context budget. |
-| **Where used** | `AGENTS.md` carries only shared project conventions and commands; all step-specific workflows live in `.opencode/skills/*.md` and are loaded via the `skill` tool only when the relevant step begins. |
-
----
-
-### 24. Instruction Conflict Resolution Failure in LLMs
-
-| | |
-|---|---|
-| **Source** | Geng et al. (2025). Control Illusion: The Failure of Instruction Hierarchies in Large Language Models. AAAI-26. arXiv:2502.15851. https://arxiv.org/abs/2502.15851 |
-| **Date** | 2025 |
-| **Alternative** | Wallace et al. (2024). The Instruction Hierarchy: Training LLMs to Prioritize Privileged Instructions. arXiv:2404.13208. |
-| **Status** | Confirmed — peer-reviewed (AAAI-26), replicated across 6 models |
-| **Core finding** | LLMs do not reliably prioritize system-prompt instructions over conflicting instructions from other sources. Resolution is inconsistent and biased by pretraining-derived priors, not by prompt structure or position. |
-| **Mechanism** | No structural separation between instruction sources enforces reliable priority at inference time. When the same directive appears in two locations with divergent content, the model selects between them based on statistical priors from pretraining. |
-| **Where used** | Justifies single source of truth in `AGENTS.md`: workflow details duplicated across agent files and skills that drift out of sync produce conflicting instructions the model cannot resolve reliably. |
-
----
-
-### 25. Positional Attention Degradation in Long Contexts
-
-| | |
-|---|---|
-| **Source** | Liu et al. (2023). Lost in the Middle: How Language Models Use Long Contexts. *Transactions of the Association for Computational Linguistics*. arXiv:2307.03172. https://arxiv.org/abs/2307.03172 |
-| **Date** | 2023 |
-| **Alternative** | McKinnon (2025). arXiv:2511.05850 — effect attenuated for simple retrieval in Gemini 2.5+; persists for multi-hop reasoning. |
-| **Status** | Confirmed with caveat — robust for multi-hop reasoning; attenuated for simple retrieval in frontier models (2025–2026) |
-| **Core finding** | Performance on tasks requiring retrieval from long contexts follows a U-shaped curve: highest when relevant content is at the beginning or end of the context, degraded when content falls in the middle. |
-| **Mechanism** | Transformer attention is not uniform across token positions. Content placed in the middle of a long context receives less attention weight regardless of its relevance. |
-| **Where used** | Supports keeping always-loaded files lean. Duplicated workflow detail in always-loaded files increases total context length, pushing other content into lower-attention positions. |
-
----
-
-### 26. Modular Prompt De-duplication Reduces Interference
-
-| | |
-|---|---|
-| **Source** | Sharma & Henley (2026). Modular Prompt Optimization. arXiv:2601.04055. https://arxiv.org/abs/2601.04055 |
-| **Date** | 2026 |
-| **Status** | Partially confirmed — single-agent reasoning benchmarks only; not tested on multi-file agent architectures |
-| **Core finding** | Structured prompts with explicit section de-duplication outperform both monolithic prompts and unstructured modular prompts. The mechanism cited is "reducing redundancy and interference between components." |
-| **Mechanism** | Redundant content across prompt sections creates competing attention targets. De-duplication concentrates relevant signal in one canonical location per concern. |
-| **Where used** | Supports the rule that skills and agent routing files contain no duplication of `AGENTS.md` content or of each other. |
-
----
-
-### 27. Agent File Architecture — Three-File Separation
-
-| | |
-|---|---|
-| **Source** | Convergence of entries 23, 24, 25, 26. |
-| **Date** | — |
-| **Status** | Inferred — no direct A/B test of this architecture exists; supported by convergence of confirmed and partially confirmed findings above |
-| **Core finding** | Three distinct failure modes (instruction conflict on drift, positional attention degradation, redundancy interference) converge to produce a three-file split with defined content rules for each. |
-| **Mechanism** | Each file runs at a different time and serves a different purpose. Mixing concerns across files reintroduces the failure modes the split is designed to prevent. |
-| **Where used** | Structural rule for `AGENTS.md`, `.opencode/agents/*.md`, and `.opencode/skills/*.md`. |
-
-| File | Runs when | Contains | Does NOT contain |
-|---|---|---|---|
-| `AGENTS.md` | Every session, always loaded | Project conventions, shared commands, formats, standards | Step procedures, role-specific rules, path specs |
-| `.opencode/agents/*.md` | When that role is invoked | Role identity, step ownership, skill load instructions, tool permissions, escalation paths | Workflow details, principle lists, path specs, commit formats |
-| `.opencode/skills/*.md` | On demand, when that step begins | Full procedural instructions for that step, self-contained | Duplication of `AGENTS.md` content or other skills |
-
----
-
-## Bibliography
-
-1. Anthropic. (2024). Building effective agents. https://www.anthropic.com/engineering/building-effective-agents
-2. Anthropic. (2025). Best practices for Claude Code. https://www.anthropic.com/engineering/claude-code-best-practices
-3. Geng et al. (2025). Control Illusion. AAAI-26. arXiv:2502.15851. https://arxiv.org/abs/2502.15851
-4. Liu, N. F. et al. (2023). Lost in the Middle. *TACL*. arXiv:2307.03172. https://arxiv.org/abs/2307.03172
-5. McKinnon, R. (2025). arXiv:2511.05850. https://arxiv.org/abs/2511.05850
-6. OpenAI. (2024). Agent definitions. https://platform.openai.com/docs/guides/agents/define-agents
-7. OpenCode. (2026). Agent Skills. https://opencode.ai/docs/skills/
-8. Sharma, A., & Henley, A. (2026). Modular Prompt Optimization. arXiv:2601.04055. https://arxiv.org/abs/2601.04055
-9. Wallace, E. et al. (2024). The Instruction Hierarchy. arXiv:2404.13208.
diff --git a/docs/scientific-research/architecture.md b/docs/scientific-research/architecture.md
deleted file mode 100644
index 8cf3a9d..0000000
--- a/docs/scientific-research/architecture.md
+++ /dev/null
@@ -1,86 +0,0 @@
-# Scientific Research — Architecture
-
-Foundations for the architectural decisions and patterns used in this template.
-
----
-
-### 42. Hexagonal Architecture — Ports and Adapters
-
-| | |
-|---|---|
-| **Source** | Cockburn, A. (2005). "Hexagonal Architecture." *alistair.cockburn.us*. https://alistair.cockburn.us/hexagonal-architecture/ |
-| **Date** | 2005 |
-| **Alternative** | Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley. (Chapter 7: "Ports and Adapters") |
-| **Status** | Confirmed — foundational; widely adopted as Clean Architecture, Onion Architecture |
-| **Core finding** | The application domain should have no knowledge of external systems (databases, filesystems, network, UI). All contact between the domain and the outside world passes through a **port** (an interface / Protocol) and an **adapter** (a concrete implementation of that port). The domain is independently testable without any infrastructure. The key structural rule: dependency arrows point inward — domain code never imports from adapters; adapters import from domain. |
-| **Mechanism** | Two distinct sides of any application: the "driving side" (actors who initiate action — tests, UI, CLI) and the "driven side" (actors the application drives — databases, filesystems, external services). Each driven-side dependency is hidden behind a port. Tests supply a test adapter; production supplies a real adapter. Substituting adapters requires no domain code changes. This is SOLID-D at the architectural layer. |
-| **Where used** | Step 2 (Architecture): if an external dependency is identified during domain analysis, assign it a Protocol. `ports/` and `adapters/` folders emerge when a concrete dependency is confirmed — do not pre-create them. The dependency-inversion principle (SOLID-D) is the goal; the folder names are convention, not law. |
-
----
-
-### 55. Architecture Decision Records (ADRs)
-
-| | |
-|---|---|
-| **Source** | Nygard, M. T. (2011). "Documenting Architecture Decisions." *cognitect.com*. https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions |
-| **Date** | 2011 |
-| **Alternative** | Keeling, M. (2017). *Design It!: From Programmer to Software Architect*. Pragmatic Bookshelf. (Chapter 6: "Architectural Decisions") |
-| **Status** | Confirmed — widely adopted industry standard; tooled by adr-tools, ADR Manager, Log4Brains |
-| **Core finding** | Architectural decisions should be recorded as short, immutable documents capturing: what was decided, why, and what alternatives were rejected. Without this record, decisions get re-litigated by every new developer (or AI agent) who encounters the codebase, producing rework and re-divergence. |
-| **Mechanism** | An ADR is written at decision time, never edited afterward. If the decision changes, a new ADR is written that supersedes the old one. The append-only record becomes a reliable audit trail. The constraint "one sentence per field" forces clarity — if you can't state the reason in one sentence, the decision is not yet understood. |
-| **Where used** | `docs/architecture/architecture.md` (ADR template). SE appends one block per non-obvious decision after Step 2. The `living-docs` skill reads ADRs as input for C4 diagram annotations. |
-
----
-
-### 56. The 4+1 View Model of Architecture
-
-| | |
-|---|---|
-| **Source** | Kruchten, P. B. (1995). "The 4+1 View Model of Architecture." *IEEE Software*, 12(6), 42–50. https://doi.org/10.1109/52.469759 |
-| **Date** | 1995 |
-| **Alternative** | Bass, L., Clements, P., & Kazman, R. (2021). *Software Architecture in Practice* (4th ed.). Addison-Wesley. |
-| **Status** | Confirmed — 3,000+ citations; foundational IEEE reference for architectural documentation |
-| **Core finding** | A single architectural diagram cannot communicate all relevant aspects of a system. Four distinct views are required: **Logical** (domain objects and relationships), **Process** (runtime behavior and concurrency), **Development** (module organisation and dependencies), **Physical** (deployment topology). A fifth **Scenarios** view (use cases) ties the four together by showing how each scenario exercises each view. |
-| **Mechanism** | Different stakeholders need different views: a developer needs the Development view; an operator needs the Physical view; a domain expert needs the Logical view. Conflating views into one diagram produces a cluttered diagram that satisfies nobody. The 4+1 model assigns each concern to its appropriate view and cross-validates them through scenarios. |
-| **Where used** | Theoretical foundation for the C4 model (entry 57). The `living-docs` skill generates C4 diagrams that map to: Context diagram (Scenarios view), Container diagram (Physical + Development views), Component diagram (Logical + Development views). |
-
----
-
-### 57. The C4 Model for Software Architecture
-
-| | |
-|---|---|
-| **Source** | Brown, S. (2018). *The C4 Model for Software Architecture*. Leanpub. https://c4model.com |
-| **Date** | 2018 (ongoing) |
-| **Alternative** | Brown, S. (2023). "The C4 model for visualising software architecture." *InfoQ*. |
-| **Status** | Confirmed — widely adopted; tooled by Structurizr, PlantUML C4, Mermaid C4 |
-| **Core finding** | Software architecture can be communicated at four zoom levels: **Level 1 — System Context** (who uses the system and what external systems it talks to), **Level 2 — Container** (major runnable/deployable units), **Level 3 — Component** (major structural building blocks within a container), **Level 4 — Code** (classes, interfaces; usually auto-generated). Each level answers a specific question; mixing levels in one diagram creates confusion. |
-| **Mechanism** | C4 operationalises the 4+1 View Model (entry 56) into a lightweight notation that can be expressed in text (PlantUML, Mermaid) and version-controlled alongside code. The notation is deliberately constrained: boxes (people, systems, containers, components) and unidirectional arrows with labels. No UML formalism required. Context + Container diagrams cover >90% of communication needs for most teams. |
-| **Where used** | The `living-docs` skill generates and updates C4 diagrams in `docs/c4/`. Context diagram (L1) always generated; Container (L2) generated when multiple containers are identified; Component (L3) generated on demand. Source files are Mermaid so they render in GitHub and are version-controlled. |
-
----
-
-### 58. Information Hiding — Module Decomposition Criterion
-
-| | |
-|---|---|
-| **Source** | Parnas, D. L. (1972). "On the criteria to be used in decomposing systems into modules." *Communications of the ACM*, 15(12), 1053–1058. https://doi.org/10.1145/361598.361623 |
-| **Date** | 1972 |
-| **Alternative** | Parnas, D. L. (1974). "On a 'buzzword': Hierarchical structure." *Proc. IFIP Congress 74*, 336–339. |
-| **Status** | Confirmed — 4,000+ citations; foundational criterion for all modular decomposition in software engineering |
-| **Core finding** | The correct criterion for decomposing a system into modules is **information hiding**: each module hides a design decision that is likely to change. A module's interface reveals only what callers need; its implementation hides how. Decomposing by execution steps (procedure-based) creates tight coupling to implementation order; decomposing by change-prone decisions (information-hiding) allows each decision to be changed independently. |
-| **Mechanism** | Identify which decisions are most likely to change (data structures, algorithms, I/O formats, external service protocols). Each such decision becomes a module boundary. The module's public interface is defined to be change-stable; the implementation is change-free from the caller's perspective. This is the theoretical basis for SOLID-D (depend on abstractions), Hexagonal Architecture (hide external decisions behind ports), and DDD bounded contexts (hide language decisions behind context boundaries). |
-| **Where used** | Step 2 Architecture: bounded context check ("same word, different meaning across features? → module boundary") and external dep Protocol assignment both apply the information-hiding criterion. The `living-docs` skill uses module boundaries as container/component boundaries in `docs/c4/` diagrams. |
-
----
-
-## Bibliography
-
-1. Bass, L., Clements, P., & Kazman, R. (2021). *Software Architecture in Practice* (4th ed.). Addison-Wesley.
-2. Brown, S. (2018). *The C4 Model for Software Architecture*. Leanpub. https://c4model.com
-3. Cockburn, A. (2005). Hexagonal Architecture. *alistair.cockburn.us*. https://alistair.cockburn.us/hexagonal-architecture/
-4. Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley.
-5. Keeling, M. (2017). *Design It!: From Programmer to Software Architect*. Pragmatic Bookshelf.
-6. Kruchten, P. B. (1995). The 4+1 View Model of Architecture. *IEEE Software*, 12(6), 42–50. https://doi.org/10.1109/52.469759
-7. Nygard, M. T. (2011). Documenting Architecture Decisions. *cognitect.com*. https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions
-8. Parnas, D. L. (1972). On the criteria to be used in decomposing systems into modules. *CACM*, 15(12), 1053–1058. https://doi.org/10.1145/361598.361623
diff --git a/docs/scientific-research/cognitive-science.md b/docs/scientific-research/cognitive-science.md
deleted file mode 100644
index dad8e2b..0000000
--- a/docs/scientific-research/cognitive-science.md
+++ /dev/null
@@ -1,150 +0,0 @@
-# Scientific Research — Cognitive Science
-
-Mechanisms from cognitive and social psychology that justify workflow design decisions in this template.
-
----
-
-### 1. Pre-mortem (Prospective Hindsight)
-
-| | |
-|---|---|
-| **Source** | Klein, G. (1998). *Sources of Power: How People Make Decisions*. MIT Press. |
-| **Date** | 1998 |
-| **Status** | Confirmed |
-| **Core finding** | Asking "imagine this failed — why?" catches 30% more issues than forward-looking review. |
-| **Mechanism** | Prospective hindsight shifts from prediction (weak) to explanation (strong). The brain is better at explaining past events than predicting future ones. By framing as "it already failed," you activate explanation mode. |
-| **Where used** | PO pre-mortem at scope, software-engineer pre-mortem before handoff. |
-
----
-
-### 2. Implementation Intentions
-
-| | |
-|---|---|
-| **Source** | Gollwitzer, P. M. (1999). Implementation intentions: Strong effects of simple planning aids. *American Journal of Preventive Medicine*, 16(4), 257–276. |
-| **Date** | 1999 |
-| **Status** | Confirmed |
-| **Core finding** | "If X then Y" plans are 2–3x more likely to execute than general intentions. |
-| **Mechanism** | If-then plans create automatic cue-response links in memory. The brain processes "if function > 20 lines then extract helper" as an action trigger, not a suggestion to consider. |
-| **Where used** | Refactor Self-Check Gates in `implementation/SKILL.md`, Code Quality checks in `verify/SKILL.md`. |
-
----
-
-### 3. Commitment Devices
-
-| | |
-|---|---|
-| **Source** | Cialdini, R. B. (2001). *Influence: The Psychology of Persuasion* (rev. ed.). HarperBusiness. |
-| **Date** | 2001 |
-| **Status** | Confirmed |
-| **Core finding** | Forcing an explicit micro-commitment (filling in a PASS/FAIL cell) creates resistance to reversals. A checkbox checked is harder to uncheck than a todo noted. |
-| **Mechanism** | Structured tables with PASS/FAIL cells create commitment-device effects. The act of marking "FAIL" requires justification, making silent passes psychologically costly. |
-| **Where used** | SOLID enforcement table, ObjCal enforcement table, Design Patterns table — all require explicit PASS/FAIL with evidence. |
-
----
-
-### 4. System 2 Before System 1
-
-| | |
-|---|---|
-| **Source** | Kahneman, D. (2011). *Thinking, Fast and Slow*. Farrar, Straus and Giroux. |
-| **Date** | 2011 |
-| **Status** | Confirmed |
-| **Core finding** | System 1 (fast, automatic) is vulnerable to anchoring and confirmation bias. System 2 (slow, deliberate) must be activated before System 1's judgments anchor. |
-| **Mechanism** | Running semantic review *before* automated commands prevents the "all green" dopamine hit from anchoring the reviewer's judgment. Doing hard cognitive work first protects against System 1 shortcuts. |
-| **Where used** | Verification order in `verify/SKILL.md`: semantic alignment check before commands. |
-
----
-
-### 5. Adversarial Collaboration
-
-| | |
-|---|---|
-| **Source** | Mellers, B. A., Hertwig, R., & Kahneman, D. (2001). Do frequency representations eliminate cooperative bias? *Psychological Review*, 108(4), 709–735. |
-| **Date** | 2001 |
-| **Status** | Confirmed |
-| **Core finding** | Highest-quality thinking emerges when parties hold different hypotheses and are charged with finding flaws in each other's reasoning. |
-| **Mechanism** | Explicitly framing the reviewer as "your job is to break this feature" activates the adversarial collaboration mode. The reviewer seeks disconfirmation rather than confirmation. |
-| **Where used** | Adversarial mandate in `reviewer.md` and `verify/SKILL.md`. |
-
----
-
-### 6. Accountability to Unknown Audience
-
-| | |
-|---|---|
-| **Source** | Tetlock, P. E. (1983). Accountability: A social determinant of judgment. In *Psychology of Learning and Motivation* (Vol. 17, pp. 295–332). Academic Press. |
-| **Date** | 1983 |
-| **Status** | Confirmed |
-| **Core finding** | Accountability to an unknown audience with unknown views improves reasoning quality. The agent anticipates being audited and adjusts reasoning. |
-| **Mechanism** | The explicit report format (APPROVED/REJECTED with evidence) creates an accountability structure — the reviewer's reasoning will be read by the PO. |
-| **Where used** | Report format in `verify/SKILL.md`, structured evidence columns in all enforcement tables. |
-
----
-
-### 7. Chunking and Cognitive Load Reduction
-
-| | |
-|---|---|
-| **Source** | Miller, G. A. (1956). The magical number seven, plus or minus two. *Psychological Review*, 63(2), 81–97. |
-| **Date** | 1956 |
-| **Alternative** | Sweller, J. (1988). Cognitive load during problem solving. *Cognitive Science*, 12(2), 257–285. |
-| **Status** | Confirmed |
-| **Core finding** | Structured tables reduce working memory load vs. narrative text. Chunking related items into table rows enables parallel processing. |
-| **Mechanism** | Replacing prose checklists with structured tables (rows × columns) allows the reviewer to process all items in a single pass. |
-| **Where used** | All enforcement tables in `verify/SKILL.md` and `reviewer.md`. |
-
----
-
-### 8. Elaborative Encoding
-
-| | |
-|---|---|
-| **Source** | Craik, F. I. M., & Lockhart, R. S. (1972). Levels of processing: A framework for memory research. *Journal of Verbal Learning and Verbal Behavior*, 11(6), 671–684. |
-| **Date** | 1972 |
-| **Status** | Confirmed |
-| **Core finding** | Deeper processing — explaining *why* a rule matters — leads to better retention and application than shallow processing. |
-| **Mechanism** | Adding a "Why it matters" column to enforcement tables forces the reviewer to process the rationale, not just scan the rule name. |
-| **Where used** | SOLID table, ObjCal table, Design Patterns table — all have "Why it matters" column. |
-
----
-
-### 9. Error-Specific Feedback
-
-| | |
-|---|---|
-| **Source** | Hattie, J., & Timperley, H. (2007). The power of feedback. *Review of Educational Research*, 77(1), 81–112. |
-| **Date** | 2007 |
-| **Status** | Confirmed |
-| **Core finding** | Feedback is most effective when it tells the agent exactly what went wrong and what the correct action is. "FAIL: function > 20 lines at file:47" is actionable; "Apply function length rules" is not. |
-| **Mechanism** | The evidence column in enforcement tables requires specific file:line references, turning vague rules into actionable directives. |
-| **Where used** | Evidence column in all enforcement tables. |
-
----
-
-### 10. Prospective Memory Cues
-
-| | |
-|---|---|
-| **Source** | McDaniel, M. A., & Einstein, G. O. (2000). Strategic and automatic processes in prospective memory retrieval. *Applied Cognitive Psychology*, 14(7), S127–S144. |
-| **Date** | 2000 |
-| **Status** | Confirmed |
-| **Core finding** | Memory for intended actions is better when cues are embedded at the point of action, not in a separate appendix. |
-| **Mechanism** | Placing if-then gates inline (in the REFACTOR section) rather than in a separate "reference" document increases adherence. The cue appears exactly when the developer is about to make the relevant decision. |
-| **Where used** | Refactor Self-Check Gates embedded inline in `refactor/SKILL.md`. |
-
----
-
-## Bibliography
-
-1. Cialdini, R. B. (2001). *Influence: The Psychology of Persuasion* (rev. ed.). HarperBusiness.
-2. Craik, F. I. M., & Lockhart, R. S. (1972). Levels of processing: A framework for memory research. *Journal of Verbal Learning and Verbal Behavior*, 11(6), 671–684.
-3. Gollwitzer, P. M. (1999). Implementation intentions. *American Journal of Preventive Medicine*, 16(4), 257–276.
-4. Hattie, J., & Timperley, H. (2007). The power of feedback. *Review of Educational Research*, 77(1), 81–112.
-5. Kahneman, D. (2011). *Thinking, Fast and Slow*. Farrar, Straus and Giroux.
-6. Klein, G. (1998). *Sources of Power: How People Make Decisions*. MIT Press.
-7. McDaniel, M. A., & Einstein, G. O. (2000). Strategic and automatic processes in prospective memory retrieval. *Applied Cognitive Psychology*, 14(7), S127–S144.
-8. Mellers, B. A., Hertwig, R., & Kahneman, D. (2001). Do frequency representations eliminate cooperative bias? *Psychological Review*, 108(4), 709–735.
-9. Miller, G. A. (1956). The magical number seven, plus or minus two. *Psychological Review*, 63(2), 81–97.
-10. Sweller, J. (1988). Cognitive load during problem solving. *Cognitive Science*, 12(2), 257–285.
-11. Tetlock, P. E. (1983). Accountability: A social determinant of judgment. In *Psychology of Learning and Motivation* (Vol. 17). Academic Press.
diff --git a/docs/scientific-research/documentation.md b/docs/scientific-research/documentation.md
deleted file mode 100644
index 9c77a00..0000000
--- a/docs/scientific-research/documentation.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# Scientific Research — Documentation
-
-Foundations for living documentation, docs-as-code, information architecture, and post-mortem practices used in this template.
-
----
-
-### 59. Information Needs in Collocated Software Development Teams
-
-| | |
-|---|---|
-| **Source** | Ko, A. J., DeLine, R., & Venolia, G. (2007). "Information Needs in Collocated Software Development Teams." *Proc. 29th International Conference on Software Engineering (ICSE 2007)*, pp. 344–353. IEEE. https://doi.org/10.1109/ICSE.2007.45 |
-| **Date** | 2007 |
-| **Alternative** | Dagenais, B., & Robillard, M. P. (2010). "Creating and evolving developer documentation." *Proc. FSE 2010*, pp. 127–136. ACM. |
-| **Status** | Confirmed — empirical study; 600+ citations |
-| **Core finding** | Developers spend 35–50% of their working time not writing code but searching for information — navigating code, reading past decisions, and understanding relationships between components. The most frequently sought information is: who wrote this, why was it written this way, and what does this module depend on. Direct questioning of teammates is the most common fallback when documentation is absent, creating serial bottlenecks. |
-| **Mechanism** | Information seeking is triggered by a task, not by curiosity. A developer encountering an unfamiliar component has a specific decision to make. When documentation is absent, the seek-ask-wait loop (find the right person, ask, wait for a response) dominates time. Persistent documentation (ADRs, architecture diagrams, glossary) short-circuits this loop by making the answer findable without a human intermediary. |
-| **Where used** | Justifies the full `living-docs` skill: C4 diagrams answer "what does this module depend on?"; the ADR record answers "why was it written this way?"; the living glossary answers "what does this term mean in this context?". Collectively these eliminate the three most frequent information needs identified by Ko et al. |
-
----
-
-### 60. Software Engineering at Google — Documentation Chapter
-
-| | |
-|---|---|
-| **Source** | Winters, T., Manshreck, T., & Wright, H. (2020). *Software Engineering at Google: Lessons Learned from Programming Over Time*. O'Reilly. Chapter 10: "Documentation." https://abseil.io/resources/swe-book/html/ch10.html |
-| **Date** | 2020 |
-| **Alternative** | Fitzpatrick, B., & Collins-Sussman, B. (2012). *Team Geek*. O'Reilly. |
-| **Status** | Confirmed — large-scale industry evidence from a codebase with ~2 billion lines of code |
-| **Core finding** | Documentation that lives outside the code repository decays at a rate proportional to how often the code changes — because there is no mechanism that forces the doc to be updated when the code changes. Docs-as-code (documentation in the same repo, reviewed in the same PRs, tested in the same CI pipeline) dramatically reduces divergence because the cost of updating the doc is incurred at the same moment as the cost of the code change. |
-| **Mechanism** | Google's g3doc system co-locates docs with the code they describe. When a PR changes `payments/service.py`, the reviewer also sees `payments/README.md` in the diff and can flag staleness immediately. At scale, Google found that docs with no co-located tests or CI checks become stale within 3–6 months regardless of team discipline. |
-| **Where used** | Justifies co-locating `docs/` within the project repository. Living docs (`docs/architecture/c4/`, `docs/glossary.md`) are updated in the same commits as the code they describe. The `living-docs` skill is the mechanism that enforces this — it runs after Step 5 to regenerate diagrams from the current state of the codebase and discovery docs. |
-
----
-
-### 61. Diátaxis — A Systematic Framework for Technical Documentation
-
-| | |
-|---|---|
-| **Source** | Procida, D. (2021). "Diátaxis — A systematic approach to technical documentation." *diataxis.fr*. https://diataxis.fr |
-| **Date** | 2021 |
-| **Status** | Confirmed — adopted by Django, NumPy, Gatsby, Cloudflare, and the Python Software Foundation |
-| **Core finding** | Technical documentation fails because it conflates four fundamentally different needs into a single undifferentiated text. The four types are: **Tutorials** (learning-oriented; guides a beginner through a complete task), **How-to guides** (task-oriented; solves a specific problem for a practitioner), **Reference** (information-oriented; describes the system accurately and completely), **Explanation** (understanding-oriented; discusses concepts and decisions). Each type has a different audience mental state and requires a different writing mode. Mixing them degrades all four. |
-| **Mechanism** | The two axes of Diátaxis are: **practical ↔ theoretical** (tutorials and how-to guides are practical; reference and explanation are theoretical) and **acquiring ↔ applying** (tutorials and explanation are for acquiring knowledge; how-to guides and reference are for applying it). A document that tries to be both a tutorial and a reference simultaneously will be a poor tutorial (too much information) and a poor reference (not structured for lookup). |
-| **Where used** | Documentation structure in this template maps to Diátaxis: `README.md` = tutorial (getting started), `AGENTS.md` = reference (complete description of roles, skills, commands) and explanation (why the workflow exists), `docs/c4/` = reference (system structure), post-mortems = explanation (why decisions were made). The `living-docs` skill produces reference-type documentation (C4 diagrams, glossary) — not tutorials. |
-
----
-
-### 62. Blameless Post-Mortems and a Just Culture
-
-| | |
-|---|---|
-| **Source** | Allspaw, J. (2012). "Blameless PostMortems and a Just Culture." *code.etsy.com* (archived). https://www.etsy.com/codeascraft/blameless-postmortems/ |
-| **Date** | 2012 |
-| **Alternative** | Dekker, S. (2006). *The Field Guide to Understanding Human Error*. Ashgate. |
-| **Status** | Confirmed — foundational DevOps/SRE practice; referenced in Google SRE Book (2016) |
-| **Core finding** | Post-mortems that assign blame produce less information and lower long-term system reliability than blameless post-mortems. When individuals believe they will be blamed, they withhold information about contributing factors, preventing the systemic causes from being identified and fixed. A blameless post-mortem treats the incident as a system failure, not an individual failure — asking "what conditions allowed this to happen?" not "who caused this?" |
-| **Mechanism** | Allspaw's model separates two questions: (1) what happened? (factual, blameless) and (2) what changes would prevent recurrence? (systemic). The post-mortem document records both. The output is not an individual's performance review but a list of system changes — process improvements, documentation gaps, tooling additions. Etsy's incident rate fell after adopting blameless post-mortems because engineers began reporting near-misses that they previously concealed. |
-| **Where used** | `docs/post-mortem/` directory. Post-mortems in this template follow the blameless model: they report workflow gaps found, not who made the mistake. The output of each post-mortem is a list of improvements to skills, agents, or workflow documentation. The `living-docs` skill is one such improvement — it emerged from the discovery that architecture and glossary documentation were falling behind the codebase. |
-
----
-
-## Bibliography
-
-1. Allspaw, J. (2012). Blameless PostMortems and a Just Culture. *code.etsy.com*. https://www.etsy.com/codeascraft/blameless-postmortems/
-2. Dagenais, B., & Robillard, M. P. (2010). Creating and evolving developer documentation. *Proc. FSE 2010*, pp. 127–136. ACM.
-3. Dekker, S. (2006). *The Field Guide to Understanding Human Error*. Ashgate.
-4. Ko, A. J., DeLine, R., & Venolia, G. (2007). Information Needs in Collocated Software Development Teams. *Proc. ICSE 2007*, pp. 344–353. https://doi.org/10.1109/ICSE.2007.45
-5. Procida, D. (2021). Diátaxis — A systematic approach to technical documentation. *diataxis.fr*. https://diataxis.fr
-6. Winters, T., Manshreck, T., & Wright, H. (2020). *Software Engineering at Google*. O'Reilly. Chapter 10. https://abseil.io/resources/swe-book/html/ch10.html
diff --git a/docs/scientific-research/domain-modeling.md b/docs/scientific-research/domain-modeling.md
deleted file mode 100644
index eb9143e..0000000
--- a/docs/scientific-research/domain-modeling.md
+++ /dev/null
@@ -1,115 +0,0 @@
-# Scientific Research — Domain Modeling
-
-Foundations for bounded context identification, ubiquitous language, and feature decomposition used in this template.
-
----
-
-### 31. Domain-Driven Design — Bounded Contexts and Feature Identification
-
-| | |
-|---|---|
-| **Source** | Evans, E. (2003). *Domain-Driven Design: Tackling Complexity in the Heart of Software*. Addison-Wesley. |
-| **Date** | 2003 |
-| **Alternative** | Context Mapper (2025). Rapid Object-Oriented Analysis and Design. https://contextmapper.org/docs/rapid-ooad |
-| **Status** | Confirmed — foundational DDD literature |
-| **Core finding** | A Bounded Context is a boundary within which a particular ubiquitous language is consistent. Features are identified by grouping related user stories that share the same language. The decomposition criterion is "single responsibility per context" + "consistency of language." |
-| **Mechanism** | In DDD: (1) Extract ubiquitous language from requirements → (2) Group by language consistency → (3) Each group is a candidate bounded context → (4) Each bounded context maps to a feature. Context Mapper automates this: User Stories → Subdomains (via noun/verb extraction) → Bounded Contexts of type FEATURE. |
-| **Where used** | Stage 1 Discovery: after session synthesis, verify each feature has consistent language. Noun/verb extraction from discovery answers builds the Domain Model in `docs/discovery.md`. The `Rules (Business):` section in `.feature` files captures the ubiquitous language rules that govern each feature. |
-
----
-
-### 63. DDD Reference — Pattern Summaries (CC-BY)
-
-| | |
-|---|---|
-| **Source** | Evans, E. (2015). *DDD Reference: Definitions and Pattern Summaries*. domainlanguage.com. https://www.domainlanguage.com/ddd/reference/ |
-| **Date** | 2015 |
-| **Alternative** | Evans, E. (2003). *Domain-Driven Design*. Addison-Wesley. (full book; entry #31) |
-| **Status** | Confirmed — freely available CC-BY canonical summary; maintained by Evans personally |
-| **Core finding** | The open-access pattern summary of all DDD patterns from the 2003 book. More precisely citable than the book for specific pattern definitions. Key patterns: Ubiquitous Language ("Use the model as the backbone of a language. Commit the team to exercising that language relentlessly in all communication within the team and in the code."), Bounded Context, Context Map, Domain Events, Aggregates, Repositories. |
-| **Mechanism** | Each pattern is described with: intent, prescription, and "therefore" consequences. The Ubiquitous Language pattern prescribes: use the same terms in diagrams, writing, and especially speech. Refactor the code when the language changes. Resolve confusion over terms in conversation, the way confusion over ordinary words is resolved — by agreement and precision. |
-| **Where used** | Primary reference for `docs/discovery.md` Domain Model structure and the ubiquitous language practice. `living-docs` skill glossary entries derive from this: terms must match code identifiers (Evans' "use the same language in code" prescription). `docs/scientific-research/domain-modeling.md`. |
-| **Note** | Supersedes entry #31 as the citable source for specific pattern quotes. Entry #31 remains as the book reference. Use this entry when citing a specific Evans pattern definition. |
-
----
-
-### 64. UbiquitousLanguage — Fowler Bliki
-
-| | |
-|---|---|
-| **Source** | Fowler, M. (2006). "UbiquitousLanguage." *martinfowler.com*. https://martinfowler.com/bliki/UbiquitousLanguage.html |
-| **Date** | 2006 |
-| **Alternative** | Evans (2015) DDD Reference (entry #63) — the primary source Fowler summarises |
-| **Status** | Confirmed — widely cited secondary source; Fowler wrote the DDD foreword and is considered the authoritative secondary interpreter of Evans |
-| **Core finding** | The ubiquitous language is a practice, not a document. The glossary is a secondary artifact — a snapshot of the current state of the language. The language itself lives in conversation, in the code, and in all written communication. "By using the model-based language pervasively and not being satisfied until it flows, we approach a model that is complete and comprehensible." Domain experts must object to inadequate terms; developers must flag ambiguity. |
-| **Mechanism** | The key test of a ubiquitous language: can a domain expert read the domain layer code and recognize their domain? If the code uses different names than the glossary, the code must be refactored — not the glossary relaxed. The language evolves through experimentation with alternative expressions, followed by code refactoring to match the new model. |
-| **Where used** | `living-docs` skill — grounds the rule "verify each term matches the identifier used in the code's domain layer." `docs/glossary.md` — the glossary is explicitly secondary to the code. `docs/scientific-research/domain-modeling.md`. |
-
----
-
-### 65. BoundedContext — Fowler Bliki
-
-| | |
-|---|---|
-| **Source** | Fowler, M. (2014). "BoundedContext." *martinfowler.com*. https://martinfowler.com/bliki/BoundedContext.html |
-| **Date** | 2014 |
-| **Alternative** | Evans (2015) DDD Reference (entry #63) — Fowler cites Evans directly |
-| **Status** | Confirmed — includes a direct Evans quote; the canonical accessible reference for Bounded Context as a design pattern |
-| **Core finding** | "Total unification of the domain model for a large system will not be feasible or cost-effective" (Evans, quoted directly). The same word can mean different things in different Bounded Contexts — this is not a defect but a reflection of domain reality. "You need a different model when the language changes." A Bounded Context is the boundary within which a particular ubiquitous language is internally consistent. Terms must be qualified by their context when a project has more than one bounded context. |
-| **Mechanism** | Fowler's electricity utility example: the word "meter" meant different things in billing, grid management, and customer service. Attempting to unify these into one definition created confusion. Each bounded context maintains its own model and its own language. Context Maps document the relationships and translation rules between bounded contexts. |
-| **Where used** | `living-docs` skill — `**Bounded context:**` field in `docs/glossary.md` entries is mandatory when the project has more than one bounded context (this is the Evans/Fowler requirement). `docs/scientific-research/domain-modeling.md`. |
-
----
-
-### 66. Implementing Domain-Driven Design
-
-| | |
-|---|---|
-| **Source** | Vernon, V. (2013). *Implementing Domain-Driven Design*. Addison-Wesley. |
-| **Date** | 2013 |
-| **Alternative** | Evans (2003) DDD (entry #31) — Vernon explicitly builds on Evans |
-| **Status** | Confirmed — second most cited DDD book; ~5,000 citations |
-| **Core finding** | Three additions to Evans: (1) **Domain Events as first-class vocabulary** — past-tense verb phrases ("OrderPlaced," "VersionDisplayed") are part of the ubiquitous language and belong in the glossary as a distinct type. (2) **Context Maps as the organizing principle** for multi-context glossaries — each bounded context has its own language documentation; the Context Map shows translation rules between contexts. (3) **Documentation co-located with the code** — docs in the same repository decay at the same rate as the code, dramatically reducing divergence. |
-| **Mechanism** | Vernon's IDDD samples (github.com/VaughnVernon/IDDD_Samples) demonstrate all three in practice. The Product Owner / Business Analyst plays the domain-expert-representative role in glossary maintenance — validating semantic correctness — while developers own structural precision. Neither writes the glossary unilaterally. |
-| **Where used** | `living-docs` skill — `Domain Event` added as a distinct Type value in `docs/glossary.md` entries. Grounds the PO-owned glossary with SE input via `docs/architecture.md` Reason: fields. `docs/scientific-research/domain-modeling.md`. |
-
----
-
-### 67. Ubiquitous Language Is Not a Glossary — Verraes
-
-| | |
-|---|---|
-| **Source** | Verraes, M. (2013). "Ubiquitous Language Is Not a Glossary." *verraes.net*. https://web.archive.org/web/20131004/https://verraes.net/2013/04/ubiquitous-language-is-not-a-glossary/ |
-| **Date** | 2013 |
-| **Alternative** | Fowler (2006) UbiquitousLanguage (entry #64) — the same secondary-artifact point, less pointed |
-| **Status** | Confirmed — original URL is 404; widely documented through community discussion and practitioner secondary accounts; thesis is uncontested in the DDD community |
-| **Core finding** | A glossary is not a ubiquitous language. Teams that maintain a glossary but do not reflect its terms in the code have the *appearance* of a ubiquitous language without the substance. The glossary is a secondary artifact derived from the code and domain-expert conversations — not the reverse. The canonical source of truth is the domain layer code, not the glossary document. A glossary that diverges from the code is lying. |
-| **Mechanism** | The test: can a domain expert read the domain layer code and recognize their domain without a translator? If yes, the ubiquitous language exists. If the only evidence of the language is the glossary document, it does not exist. Consequence: every term added to the glossary must be verified against the corresponding code identifier. |
-| **Where used** | `living-docs` skill — grounds the checklist item "Verify each term matches the identifier used in the code's domain layer." Prevents the common failure mode of glossary-as-theatre. `docs/scientific-research/domain-modeling.md`. |
-
----
-
-### 68. Whirlpool Process of Model Exploration — Evans
-
-| | |
-|---|---|
-| **Source** | Evans, E. (2011). *Whirlpool Process of Model Exploration*. domainlanguage.com. https://www.domainlanguage.com/ddd/whirlpool/ |
-| **Date** | 2011 |
-| **Alternative** | Brandolini, A. (2013). *Introducing EventStorming*. Leanpub. — a later, more structured alternative to Whirlpool |
-| **Status** | Confirmed — freely available; Evans' own post-2003 process guidance |
-| **Core finding** | Model exploration is a cycle: Scenario Exploring → Harvesting Abstractions → Probing the Model → Challenging the Model → back to Scenario Exploring. New vocabulary crystallizes at the Harvesting Abstractions step — concrete scenarios surface candidate terms, which are then named, defined, and reflected in the code. The glossary grows at each Harvesting Abstractions step. |
-| **Mechanism** | The Whirlpool is not a development process — it fits within most iterative processes. It is a model-exploration subprocess triggered whenever the team encounters a poorly understood domain concept. The output of each cycle is a refined model expressed in clearer language, with updated code identifiers and glossary entries. |
-| **Where used** | `living-docs` skill — grounds the timing of glossary updates: after each completed feature (Step 5) corresponds to the Harvesting Abstractions step in the Whirlpool. Discovery sessions (Stage 1) correspond to Scenario Exploring. `docs/scientific-research/domain-modeling.md`. |
-
----
-
-## Bibliography
-
-1. Context Mapper. (2025). Rapid Object-Oriented Analysis and Design. https://contextmapper.org/docs/rapid-ooad
-2. Evans, E. (2003). *Domain-Driven Design: Tackling Complexity in the Heart of Software*. Addison-Wesley.
-3. Evans, E. (2011). *Whirlpool Process of Model Exploration*. domainlanguage.com. https://www.domainlanguage.com/ddd/whirlpool/
-4. Evans, E. (2015). *DDD Reference: Definitions and Pattern Summaries* (CC-BY). domainlanguage.com. https://www.domainlanguage.com/ddd/reference/
-5. Fowler, M. (2006). UbiquitousLanguage. martinfowler.com. https://martinfowler.com/bliki/UbiquitousLanguage.html
-6. Fowler, M. (2014). BoundedContext. martinfowler.com. https://martinfowler.com/bliki/BoundedContext.html
-7. Vernon, V. (2013). *Implementing Domain-Driven Design*. Addison-Wesley.
-8. Verraes, M. (2013). Ubiquitous Language Is Not a Glossary. verraes.net (archived). https://web.archive.org/web/20131004/https://verraes.net/2013/04/ubiquitous-language-is-not-a-glossary/
diff --git a/docs/scientific-research/oop-design.md b/docs/scientific-research/oop-design.md
deleted file mode 100644
index 4b0637d..0000000
--- a/docs/scientific-research/oop-design.md
+++ /dev/null
@@ -1,64 +0,0 @@
-# Scientific Research — OOP Design
-
-Foundations for object-oriented design principles used in this template.
-
----
-
-### 32. Object Calisthenics — Nine Rules
-
-| | |
-|---|---|
-| **Source** | Bay, J. "Object Calisthenics." *The Thoughtworks Anthology* (PragProg, 2008). Original in IEEE Software/DevX, ~2005. https://www.bennadel.com/resources/uploads/2012/objectcalisthenics.pdf |
-| **Date** | ~2005 |
-| **Status** | Practitioner synthesis |
-| **Core finding** | 9 rules to internalize OOP: (1) One level indentation per method, (2) No ELSE, (3) Wrap primitives/Strings, (4) First class collections, (5) One dot per line, (6) No abbreviations, (7) Classes ≤50 lines, (8) ≤2 instance variables, (9) No getters/setters. 7 of 9 enforce data encapsulation; 1 drives polymorphism; 1 drives naming. |
-| **Mechanism** | Restrictions force decomposition. When you cannot use getters, behavior must move into the object. When you cannot use ELSE, you use polymorphism. When classes must be ≤2 ivars, you discover missing abstractions. |
-| **Where used** | Refactor self-declaration checklist in `refactor/SKILL.md`. |
-
----
-
-### 33. Refactoring
-
-| | |
-|---|---|
-| **Source** | Fowler, M. (1999/2018). *Refactoring: Improving the Design of Existing Code* (2nd ed.). Addison-Wesley. https://martinfowler.com/books/refactoring.html |
-| **Date** | 1999, 2018 |
-| **Status** | Confirmed — foundational |
-| **Core finding** | Refactoring = behavior-preserving transformations. 68 catalogued refactorings, each small enough to do safely but cumulative effect significant. Code smells (duplicate code, long methods, feature envy) indicate refactoring opportunities. |
-| **Mechanism** | Small steps reduce risk. Each refactoring is reversible. Test suite validates behavior unchanged. |
-| **Where used** | `refactor/SKILL.md`: smell detection triggers refactoring; full protocol and catalogue entries. |
-
----
-
-### 34. Design Patterns
-
-| | |
-|---|---|
-| **Source** | Gamma, E., Helm, R., Johnson, R., Vlissides, J. (1995). *Design Patterns: Elements of Reusable Object-Oriented Software*. Addison-Wesley. |
-| **Date** | 1995 |
-| **Status** | Confirmed — foundational |
-| **Core finding** | 23 patterns catalogued in 3 categories: Creational (5), Structural (7), Behavioral (11). Key principles: "Favor composition over inheritance," "Program to an interface, not an implementation." |
-| **Mechanism** | Patterns are recurring solutions to common problems. Named and catalogued so developers don't rediscover them. |
-| **Where used** | `design-patterns/SKILL.md`: full GoF catalogue with smell-triggered Python before/after examples. |
-
----
-
-### 35. SOLID Principles
-
-| | |
-|---|---|
-| **Source** | Martin, R. C. (2000). "Principles of OOD." *ButUncleBob.com*. Acronym coined by Michael Feathers (2004). https://blog.interface-solv.com/wp-content/uploads/2020/07/Principles-Of-OOD.pdf |
-| **Date** | 2000 |
-| **Status** | Confirmed |
-| **Core finding** | S: One reason to change. O: Open extension, closed modification. L: Subtypes substitutable. I: No forced stub methods. D: Depend on abstractions, not concretes. |
-| **Mechanism** | Each principle targets a specific coupling failure mode. Together they produce low coupling, high cohesion. |
-| **Where used** | Refactor self-declaration checklist in `refactor/SKILL.md`: 5-row SOLID table with Python before/after examples. |
-
----
-
-## Bibliography
-
-1. Bay, J. (~2005). "Object Calisthenics." *IEEE Software/DevX*. https://www.bennadel.com/resources/uploads/2012/objectcalisthenics.pdf
-2. Fowler, M. (1999/2018). *Refactoring: Improving the Design of Existing Code* (2nd ed.). Addison-Wesley. https://martinfowler.com/books/refactoring.html
-3. Gamma, E., Helm, R., Johnson, R., & Vlissides, J. (1995). *Design Patterns: Elements of Reusable Object-Oriented Software*. Addison-Wesley.
-4. Martin, R. C. (2000). "Principles of OOD." *ButUncleBob.com*. https://blog.interface-solv.com/wp-content/uploads/2020/07/Principles-Of-OOD.pdf
diff --git a/docs/scientific-research/refactoring-empirical.md b/docs/scientific-research/refactoring-empirical.md
deleted file mode 100644
index 61d666c..0000000
--- a/docs/scientific-research/refactoring-empirical.md
+++ /dev/null
@@ -1,100 +0,0 @@
-# Scientific Research — Refactoring (Empirical)
-
-Empirical studies on code smells, refactoring prioritization, and OOP complexity used in this template.
-
----
-
-### 36. QDIR — Bad-Smells + OO Metrics Prioritization
-
-| | |
-|---|---|
-| **Source** | Malhotra, R., Singh, P. (2020). "Exploiting bad-smells and object-oriented characteristics to prioritize classes for refactoring." *Int. J. Syst. Assur. Eng. Manag.* 11(Suppl 2), 133–144. Springer. |
-| **Date** | 2020 |
-| **URL** | https://doi.org/10.1007/s13198-020-01001-x |
-| **Status** | Confirmed — empirical |
-| **Core finding** | QDIR (Quality Depreciation Index Rule) combines bad-smell severity with OO metrics (LOC, WMC, CBO, RFC, DIT) to prioritize classes for refactoring. Validated on 8 open-source Java systems. |
-| **Mechanism** | Classes with high smell severity AND high OO metrics are prioritized. QDIR = weighted sum. |
-| **Where used** | Refactor prioritization: when smell detected, check OO metrics to prioritize. |
-
----
-
-### 37. Smells + Architectural Refactoring
-
-| | |
-|---|---|
-| **Source** | Silva, C. et al. (2020). "When Are Smells Indicators of Architectural Refactoring Opportunities." *Proc. 28th Int. Conf. on Program Comprehension*. ACM. |
-| **Date** | 2020 |
-| **URL** | https://doi.org/10.1145/3387904.3389276 |
-| **Status** | Confirmed — empirical |
-| **Core finding** | Study of 50 projects, 52,667 refactored elements. 67.53% of smells co-occur. Smells that co-occur are indicators of architectural refactoring in 88.53% of cases. |
-| **Mechanism** | Single smells are often code-level; co-occurring smells indicate architectural problems. Pattern catalog for smells → specific architectural refactorings. |
-| **Where used** | Smell detection triggers architectural analysis when co-occurrence patterns detected. |
-
----
-
-### 38. SPIRIT Tool — Code Smell Prioritization
-
-| | |
-|---|---|
-| **Source** | Vidal, S. A., Marcos, C., Díaz-Pace, J. A. (2014). "An Approach to Prioritize Code Smells for Refactoring." *Automated Software Engineering*, 23(3), 501–532. |
-| **Date** | 2014 |
-| **URL** | https://doi.org/10.1007/s10515-014-0175-x |
-| **Status** | Confirmed — tool |
-| **Core finding** | SPIRIT (Smart Identification of Refactoring opportunITies) prioritizes smells by 3 criteria: (1) component stability, (2) impact on modifiability scenarios, (3) smell relevance. Top-ranked smells correlate with expert developer judgment. |
-| **Mechanism** | Semi-automated ranking. Combines version history (stable vs. unstable), impact analysis, and smell type. |
-| **Where used** | Refactor prioritization: stability = has the class changed recently? Unstable + smelly = prioritize. |
-
----
-
-### 39. Bad Engineering Properties of OOP
-
-| | |
-|---|---|
-| **Source** | Cardelli, L. (1996). "Bad Engineering Properties of Object-Oriented Languages." *ACM Computing Surveys*, 28(4), 150. |
-| **Date** | 1996 |
-| **URL** | https://www.microsoft.com/en-us/research/publication/bad-engineering-properties-of-object-oriented-languages/ |
-| **Status** | Confirmed — foundational critique |
-| **Core finding** | OOP has 5 "economy" problems: (1) Execution (virtual methods prevent inlining), (2) Compilation (no code/interface separation), (3) Small-scale dev (expressive type systems missing), (4) Large-scale dev (poor class extension/modification), (5) Language features (baroque complexity). |
-| **Mechanism** | OOP is not universally superior. Trade-offs exist. Knowing these helps avoid over-engineering. |
-| **Where used** | Anti-pre-pattern: know when OOP adds complexity vs. value. |
-
----
-
-### 40. Code Complexity Model of OOP
-
-| | |
-|---|---|
-| **Source** | Aluthwaththage, J. H., Thathsarani, H. A. N. N. (2024). "A Novel OO-Based Code Complexity Metric." *Proc. Future Technologies Conference (FTC)*, 616–628. Springer/IEEE. |
-| **Date** | 2024 |
-| **URL** | https://link.springer.com/chapter/10.1007/978-3-031-73125-9_39 |
-| **Alternative** | Misra et al. (2024). "A Suite of Object Oriented Cognitive Complexity Metrics." IEEE. |
-| **Status** | Partially confirmed — recent |
-| **Core finding** | CWC (Combined Weighted Complexity) measures OOP complexity at statement level, considering 8 factors: nesting depth, control types, compound conditions, try-catch, threads, pointers, references, dynamic memory. Addresses gap in existing metrics ignoring cognitive load. |
-| **Mechanism** | Granular complexity scoring. Higher scores indicate more cognitively demanding code. |
-| **Where used** | Complexity measurement: when function > 20 lines, consider CWC-style granular scoring. |
-
----
-
-### 41. Metric Thresholds for Smell Detection
-
-| | |
-|---|---|
-| **Source** | Bigonha, M. A. S., et al. (2019). "The usefulness of software metric thresholds for detection of bad smells and fault prediction." *Information and Software Technology*, 115, 79–92. |
-| **Date** | 2019 |
-| **URL** | https://doi.org/10.1016/j.infsof.2019.08.005 |
-| **Alternative** | Catal et al. (2018). "Software metrics thresholds calculation techniques." *Info. Softw. Technol.* |
-| **Status** | Confirmed |
-| **Core finding** | Metric thresholds (e.g., LOC > 600) used for smell detection are unreliable. Study on 92 open-source systems found precision too low for practical use. Neither heuristic-based nor ML approaches achieve acceptable accuracy. |
-| **Mechanism** | Fixed thresholds are context-dependent. Thresholds should be project-specific, not universal. |
-| **Where used** | Anti-pre-pattern: do not rely on fixed thresholds. Use co-occurrence patterns (entry 37) instead. |
-
----
-
-## Bibliography
-
-1. Aluthwaththage, J. H., & Thathsarani, H. A. N. N. (2024). A Novel OO-Based Code Complexity Metric. *Proc. Future Technologies Conference (FTC)*, 616–628. https://link.springer.com/chapter/10.1007/978-3-031-73125-9_39
-2. Bigonha, M. A. S., et al. (2019). The usefulness of software metric thresholds. *Information and Software Technology*, 115, 79–92. https://doi.org/10.1016/j.infsof.2019.08.005
-3. Cardelli, L. (1996). Bad Engineering Properties of Object-Oriented Languages. *ACM Computing Surveys*, 28(4), 150. https://www.microsoft.com/en-us/research/publication/bad-engineering-properties-of-object-oriented-languages/
-4. Malhotra, R., & Singh, P. (2020). Exploiting bad-smells and OO characteristics. *Int. J. Syst. Assur. Eng. Manag.*, 11(Suppl 2), 133–144. https://doi.org/10.1007/s13198-020-01001-x
-5. Silva, C. et al. (2020). When Are Smells Indicators of Architectural Refactoring Opportunities. *Proc. 28th ICPC*. https://doi.org/10.1145/3387904.3389276
-6. Vidal, S. A., Marcos, C., & Díaz-Pace, J. A. (2014). An Approach to Prioritize Code Smells. *Automated Software Engineering*, 23(3), 501–532. https://doi.org/10.1007/s10515-014-0175-x
diff --git a/docs/scientific-research/requirements-elicitation.md b/docs/scientific-research/requirements-elicitation.md
deleted file mode 100644
index b272727..0000000
--- a/docs/scientific-research/requirements-elicitation.md
+++ /dev/null
@@ -1,246 +0,0 @@
-# Scientific Research — Requirements Elicitation
-
-Foundations for the PO interview structure, Gherkin criteria, and feature discovery in this template.
-
----
-
-### 17. INVEST Criteria for User Stories
-
-| | |
-|---|---|
-| **Source** | Wake, B. (2003). *INVEST in Good Stories, and SMART Tasks*. XP123.com. |
-| **Date** | 2003 |
-| **Alternative** | Cohn, M. (2004). *User Stories Applied: For Agile Software Development*. Addison-Wesley. |
-| **Status** | Confirmed |
-| **Core finding** | Stories that are Independent, Negotiable, Valuable, Estimable, Small, and Testable produce fewer downstream defects and smoother development cycles. |
-| **Mechanism** | INVEST serves as a quality gate before stories enter development. "Testable" forces the PO to express observable outcomes (directly enabling Given/When/Then). "Small" forces decomposition. "Independent" prevents hidden ordering dependencies. |
-| **Where used** | INVEST gate in Phase 3 of `scope/SKILL.md`. |
-
----
-
-### 18. Example Mapping (Rules Layer)
-
-| | |
-|---|---|
-| **Source** | Wynne, M. (2015). *Introducing Example Mapping*. Cucumber Blog. https://cucumber.io/blog/bdd/example-mapping-introduction/ |
-| **Date** | 2015 |
-| **Status** | Confirmed |
-| **Core finding** | Inserting a "rules" layer between stories and examples prevents redundant or contradictory acceptance criteria. A story with many rules needs splitting; a story with many open questions is not ready for development. |
-| **Mechanism** | Four card types: Story (yellow), Rules (blue), Examples (green), Questions (red). The rules layer groups related examples under the business rule they illustrate. Red cards (unanswered questions) are a first-class signal to stop and investigate. |
-| **Where used** | `Rules (Business):` section in each `.feature` file. PO identifies business rules before writing Examples in Stage 2 Step B. |
-
----
-
-### 19. Declarative Gherkin
-
-| | |
-|---|---|
-| **Source** | Cucumber Team. (2024). *Better Gherkin*. Cucumber Documentation. https://cucumber.io/docs/bdd/better-gherkin/ |
-| **Date** | 2024 |
-| **Status** | Confirmed |
-| **Core finding** | Declarative Gherkin ("When Bob logs in") produces specifications that survive UI changes. Imperative Gherkin ("When I click the Login button") couples specs to implementation details and breaks on every UI redesign. |
-| **Mechanism** | Declarative steps describe *what happens* at the business level. Imperative steps describe *how the user interacts with a specific UI*. AI agents are especially prone to writing imperative Gherkin because they mirror literal steps. |
-| **Where used** | Declarative vs. imperative table in Stage 2 Step B (Criteria) of `scope/SKILL.md`. |
-
----
-
-### 20. MoSCoW Prioritization (Within-Story Triage)
-
-| | |
-|---|---|
-| **Source** | Clegg, D., & Barker, R. (1994). *Case Method Fast-Track: A RAD Approach*. Addison-Wesley (DSDM origin). |
-| **Date** | 1994 |
-| **Status** | Confirmed |
-| **Core finding** | Classifying requirements as Must/Should/Could/Won't forces explicit negotiation about what is essential vs. desired. When applied *within* a single story, it reveals bloated stories that should be split. |
-| **Mechanism** | DSDM mandates that Musts cannot exceed 60% of total effort. At the story level: if a story has 12 Examples and only 3 are Musts, the remaining 9 can be deferred. This prevents gold-plating and keeps stories small. |
-| **Where used** | MoSCoW triage in Stage 2 Step B (Criteria) of `scope/SKILL.md`. |
-
----
-
-### 28. Active Listening — Paraphrase-Clarify-Summarize
-
-| | |
-|---|---|
-| **Source** | Rogers, C. R., & Farson, R. E. (1957). *Active Listening*. Industrial Relations Center, University of Chicago. |
-| **Date** | 1957 |
-| **Alternative** | McNaughton, D. et al. (2008). Learning to Listen. *Topics in Early Childhood Special Education*, 27(4), 223–231. |
-| **Status** | Confirmed — foundational clinical research; widely replicated |
-| **Core finding** | Active listening — paraphrasing what was heard in the listener's own words, asking clarifying questions, then summarizing the main points and intent — reduces misunderstanding, builds trust, and confirms mutual understanding before proceeding. |
-| **Mechanism** | Paraphrasing forces the listener to reconstruct the speaker's meaning, surfacing gaps immediately. Clarifying questions address residual ambiguity. Summarizing creates a shared record that both parties can confirm or correct. |
-| **Where used** | PO summarization protocol in `scope/SKILL.md`: after each interview round, PO produces a "Here is what I understood" block before proceeding. |
-
----
-
-### 28a. Active Listening — Three-Level Structure
-
-| | |
-|---|---|
-| **Source** | Synthesis of: Nielsen (2010); Farrell (2017); Ambler (2002); Wynne (2015). |
-| **Date** | 2010–2015 |
-| **Status** | Synthesized rule of thumb — each component individually confirmed |
-| **Core finding** | Active listening in requirements interviews operates at three granularities: **Level 1** (per answer) — immediate paraphrase; **Level 2** (per topic cluster) — transition summary; **Level 3** (end of interview) — full synthesis serving four downstream purposes. |
-| **Level 3 — four uses** | 1. Accuracy gate (NN/G). 2. Scope crystallization (Ambler/FDD). 3. Input to domain modeling (Ambler/FDD). 4. Baseline trigger (Wynne/Cucumber). |
-| **Where used** | Stage 1 Discovery sessions in `scope/SKILL.md`. |
-
----
-
-### 29. The Kipling Method — Five Ws and One H
-
-| | |
-|---|---|
-| **Source** | Kipling, R. (1902). *Just So Stories*. Macmillan. |
-| **Date** | 1902 |
-| **Alternative** | Hermagoras of Temnos (2nd century BCE) — seven circumstances of rhetoric. |
-| **Status** | Practitioner synthesis — journalism, business analysis, investigative methodology |
-| **Core finding** | The six interrogative questions (Who, What, When, Where, Why, How) form a complete framework for gathering all essential facts about any situation. Together they ensure completeness and prevent gaps. |
-| **Where used** | Stage 1 Discovery, General questions (first session): the initial seven questions are an adaptation of the 5W1H framework. |
-
----
-
-### 30. BA Requirements Question Framework
-
-| | |
-|---|---|
-| **Source** | Brandenburg, L. (2025). *Requirements Discovery Checklist Pack*. TechCanvass. |
-| **Date** | 2025 |
-| **Status** | Practitioner synthesis — consolidated BA methodology, not peer-reviewed |
-| **Core finding** | Ten questions consistently make the most difference in requirements elicitation: (1) What problem are we solving? (2) What happens if we do nothing? (3) Who uses this? (4) What does success look like? (5) Walk me through how this works today. (6) Where does this usually break? (7) What decisions will this help? (8) What should definitely not happen? (9) What happens if input is wrong? (10) What assumptions are we making? |
-| **Where used** | Stage 1 Discovery, General questions: the "Success", "Failure", and "Out-of-scope" questions map to this framework. |
-
----
-
-### 43. Feature-Driven Development — Domain Modeling to Feature List
-
-| | |
-|---|---|
-| **Source** | Ambler, S. W. (2002). *Agile Modeling*. Wiley. https://www.agilemodeling.com/essays/fdd.htm |
-| **Date** | 2002 |
-| **Alternative** | Palmer, S. R., & Felsing, J. M. (2002). *A Practical Guide to Feature-Driven Development*. Prentice Hall. |
-| **Status** | Confirmed |
-| **Core finding** | FDD requires domain modeling *before* feature naming. Features are expressed as "Action result object" triples. Features group into Feature Sets (shared domain object), which group into Subject Areas. |
-| **Mechanism** | Domain modeling extracts the vocabulary (nouns = candidate classes, verbs = candidate methods). Feature identification then asks: "what verbs act on each noun?" |
-| **Where used** | Stage 1 Discovery in `scope/SKILL.md`: after session synthesis, PO performs domain analysis (nouns/verbs → subject areas → FDD "Action object" feature names) for first session. |
-
----
-
-### 44. Affinity Mapping / KJ Method — Bottom-Up Feature Identification
-
-| | |
-|---|---|
-| **Source** | Krause, R., & Pernice, K. (2024). Affinity Diagramming. *Nielsen Norman Group*. https://www.nngroup.com/articles/affinity-diagram/ |
-| **Date** | 2024 (method origin: Kawakita, J., 1960s) |
-| **Alternative** | Kawakita, J. (1967). *Abduction*. Chuokoronsha. |
-| **Status** | Confirmed |
-| **Core finding** | Affinity diagramming groups raw observations/requirements into clusters by bottom-up similarity — no categories are named until grouping is complete. This prevents confirmation bias from top-down pre-labelling. |
-| **Where used** | Stage 1 Discovery in `scope/SKILL.md` (alternative to FDD domain modeling): PO uses affinity mapping on interview answers to derive feature clusters. Best suited when working from interview transcripts solo. |
-
----
-
-### 45. Event Storming — Domain Events to Functional Areas
-
-| | |
-|---|---|
-| **Source** | Brandolini, A. (2013–present). *EventStorming*. Leanpub / eventstorming.com. https://eventstorming.com |
-| **Date** | 2013 |
-| **Status** | Confirmed |
-| **Core finding** | Event Storming is a collaborative workshop where domain experts place past-tense domain events on a timeline. Sorting the events creates natural Functional Area clusters — these are candidate feature groups. The workshop also produces Ubiquitous Language, a Problem Inventory, and Actor roles. |
-| **Mechanism** | Temporal sequencing of domain events forces resolution of conflicting mental models across organisational silos. Clusters emerge from shared vocabulary and causal proximity. |
-| **Where used** | Optional alternative in Stage 1 Discovery in `scope/SKILL.md` for cross-silo discovery. |
-
----
-
-### 46. Critical Incident Technique — Gap-Finding via Past Events
-
-| | |
-|---|---|
-| **Source** | Flanagan, J. C. (1954). "The critical incident technique." *Psychological Bulletin*, 51(4), 327–357. https://doi.org/10.1037/h0061470 |
-| **Date** | 1954 |
-| **Alternative** | Rosala, M. (2020). The Critical Incident Technique in UX. *Nielsen Norman Group*. https://www.nngroup.com/articles/critical-incident-technique/ |
-| **Status** | Confirmed — foundational; ~200 follow-on empirical studies |
-| **Core finding** | Anchoring an interview on a specific past incident ("Tell me about a time when X broke down") breaks schema-based recall. Stakeholders describing actual past events report real workarounds, edge cases, and failure modes that never surface when asked "how does this usually work?" |
-| **Mechanism** | Direct questions elicit the stakeholder's mental schema — a sanitized, gap-free description of how things *should* work. Incidents bypass the schema because episodic memory is anchored to specific sensory and emotional detail. |
-| **Where used** | Cross-cutting and per-feature questions (gap-finding) in Stage 1 Discovery in `scope/SKILL.md`. |
-
----
-
-### 47. Cognitive Interview — Memory-Enhancing Elicitation Technique
-
-| | |
-|---|---|
-| **Source** | Fisher, R. P., & Geiselman, R. E. (1992). *Memory-Enhancing Techniques for Investigative Interviewing: The Cognitive Interview*. Charles C. Thomas. |
-| **Date** | 1984 (original); 1987 (enhanced CI); 1992 (manual) |
-| **Alternative** | Moody, W., Will, R. P., & Blanton, J. E. (1996). Enhancing knowledge elicitation using the cognitive interview. *Expert Systems with Applications*, 10(1), 127–133. |
-| **Status** | Confirmed — meta-analysis: Köhnken et al. (1999), *Psychology, Crime & Law*, 5(1-2), 3–27. |
-| **Core finding** | The enhanced CI elicits ~35% more correct information than standard interviews with equal accuracy rates. |
-| **Mechanism** | Four retrieval mnemonics: (1) mental reinstatement of context; (2) report everything; (3) temporal reversal; (4) perspective change. Each mnemonic opens a different memory access route, collectively surfacing what direct questions cannot. |
-| **Where used** | Cross-cutting and per-feature questions (gap-finding) in Stage 1 Discovery in `scope/SKILL.md`. |
-
----
-
-### 48. Laddering / Means-End Chain — Surfacing Unstated Motivations
-
-| | |
-|---|---|
-| **Source** | Reynolds, T. J., & Gutman, J. (1988). "Laddering theory, method, analysis, and interpretation." *Journal of Advertising Research*, 28(1), 11–31. |
-| **Date** | 1988 |
-| **Status** | Confirmed — operationalised in IS research (Hunter & Beck 2000) |
-| **Core finding** | Repeatedly asking "Why is that important to you?" climbs a means-end chain from concrete attribute → functional consequence → psychosocial consequence → terminal value. The stakeholder's first answer is rarely the real constraint. |
-| **Mechanism** | The Gherkin "So that [benefit]" clause is structurally a single-rung means-end ladder. Full laddering reveals value conflicts between stakeholders whose surface requirements look identical but whose ladders diverge at the consequence level. |
-| **Where used** | Cross-cutting and per-feature questions (gap-finding) in Stage 1 Discovery in `scope/SKILL.md`. |
-
----
-
-### 49. Funnel Technique — Question Ordering to Prevent Priming
-
-| | |
-|---|---|
-| **Source** | Rosala, M., & Moran, K. (2022). The Funnel Technique in Qualitative User Research. *Nielsen Norman Group*. https://www.nngroup.com/articles/the-funnel-technique-in-qualitative-user-research/ |
-| **Date** | 2022 |
-| **Alternative** | Christel, M. G., & Kang, K. C. (1992). *Issues in Requirements Elicitation*. CMU/SEI-92-TR-012. |
-| **Status** | Confirmed — standard NNG qualitative research protocol |
-| **Core finding** | Starting with broad open-ended questions before narrowing to specifics prevents the interviewer from priming the interviewee's responses. |
-| **Mechanism** | Priming bias is structural: any category name the interviewer introduces activates a schema that filters what the interviewee considers worth reporting. The funnel sequences questions so the interviewee's own categories emerge first. |
-| **Where used** | Within each Stage 1 Discovery session in `scope/SKILL.md`. |
-
----
-
-### 50. Issues in Requirements Elicitation — Why Direct Questions Fail
-
-| | |
-|---|---|
-| **Source** | Christel, M. G., & Kang, K. C. (1992). *Issues in Requirements Elicitation*. CMU/SEI-92-TR-012. Software Engineering Institute, Carnegie Mellon University. https://www.sei.cmu.edu/library/abstracts/reports/92tr012.cfm |
-| **Date** | 1992 |
-| **Alternative** | Sommerville, I., & Sawyer, P. (1997). *Requirements Engineering: A Good Practice Guide*. Wiley. |
-| **Status** | Confirmed — foundational SEI technical report |
-| **Core finding** | Stakeholders have three structural problems that make direct questioning insufficient: (1) they omit information that is "obvious" to them; (2) they have trouble communicating needs they have never had to articulate; (3) they may not know what they want until they see what they don't want. |
-| **Mechanism** | Expert knowledge is largely procedural and tacit. When asked "how does the system work?", experts describe what they believe happens, not what actually happens. Gap-finding techniques are required because they bypass the expert's mental schema. |
-| **Where used** | Theoretical justification for the 3-session interview structure and use of CIT, CI, and Laddering in `scope/SKILL.md`. |
-
----
-
-## Bibliography
-
-1. Ambler, S. W. (2002). *Agile Modeling*. Wiley. https://www.agilemodeling.com/essays/fdd.htm
-2. Brandenburg, L. (2025). *Requirements Discovery Checklist Pack*. TechCanvass.
-3. Brandolini, A. (2013–present). *EventStorming*. https://eventstorming.com
-4. Christel, M. G., & Kang, K. C. (1992). *Issues in Requirements Elicitation*. CMU/SEI-92-TR-012. https://www.sei.cmu.edu/library/abstracts/reports/92tr012.cfm
-5. Clegg, D., & Barker, R. (1994). *Case Method Fast-Track: A RAD Approach*. Addison-Wesley.
-6. Cohn, M. (2004). *User Stories Applied*. Addison-Wesley.
-7. Cucumber Team. (2024). Better Gherkin. https://cucumber.io/docs/bdd/better-gherkin/
-8. Farrell, S. (2017). UX Research Cheat Sheet. *Nielsen Norman Group*. https://www.nngroup.com/articles/ux-research-cheat-sheet/
-9. Fisher, R. P., & Geiselman, R. E. (1992). *Memory-Enhancing Techniques for Investigative Interviewing*. Charles C. Thomas.
-10. Flanagan, J. C. (1954). The critical incident technique. *Psychological Bulletin*, 51(4), 327–357. https://doi.org/10.1037/h0061470
-11. Kawakita, J. (1967). *Abduction*. Chuokoronsha.
-12. Kipling, R. (1902). *Just So Stories*. Macmillan.
-13. Köhnken, G., Milne, R., Memon, A., & Bull, R. (1999). The cognitive interview: A meta-analysis. *Psychology, Crime & Law*, 5(1-2), 3–27.
-14. Krause, R., & Pernice, K. (2024). Affinity Diagramming. *Nielsen Norman Group*. https://www.nngroup.com/articles/affinity-diagram/
-15. McNaughton, D. et al. (2008). Learning to Listen. *Topics in Early Childhood Special Education*, 27(4), 223–231.
-16. Moody, W., Will, R. P., & Blanton, J. E. (1996). Enhancing knowledge elicitation using the cognitive interview. *Expert Systems with Applications*, 10(1), 127–133.
-17. Nielsen, J. (2010). *Interviewing Users*. Nielsen Norman Group. https://www.nngroup.com/articles/interviewing-users/
-18. Palmer, S. R., & Felsing, J. M. (2002). *A Practical Guide to Feature-Driven Development*. Prentice Hall.
-19. Reynolds, T. J., & Gutman, J. (1988). Laddering theory, method, analysis, and interpretation. *Journal of Advertising Research*, 28(1), 11–31.
-20. Rogers, C. R., & Farson, R. E. (1957). *Active Listening*. Industrial Relations Center, University of Chicago.
-21. Rosala, M. (2020). The Critical Incident Technique in UX. *Nielsen Norman Group*. https://www.nngroup.com/articles/critical-incident-technique/
-22. Rosala, M., & Moran, K. (2022). The Funnel Technique. *Nielsen Norman Group*. https://www.nngroup.com/articles/the-funnel-technique-in-qualitative-user-research/
-23. Wake, B. (2003). INVEST in Good Stories, and SMART Tasks. *XP123.com*.
-24. Wynne, M. (2015). Introducing Example Mapping. *Cucumber Blog*. https://cucumber.io/blog/bdd/example-mapping-introduction/
diff --git a/docs/scientific-research/software-economics.md b/docs/scientific-research/software-economics.md
deleted file mode 100644
index becd695..0000000
--- a/docs/scientific-research/software-economics.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# Scientific Research — Software Economics
-
-Foundations for the shift-left, early defect detection, and workflow ordering decisions in this template.
-
----
-
-### 16. Cost of Change Curve (Shift Left)
-
-| | |
-|---|---|
-| **Source** | Boehm, B. W. (1981). *Software Engineering Economics*. Prentice-Hall. |
-| **Date** | 1981 |
-| **Alternative** | Boehm, B., & Papaccio, P. N. (1988). Understanding and controlling software costs. *IEEE Transactions on Software Engineering*, 14(10), 1462–1477. |
-| **Status** | Confirmed |
-| **Core finding** | The cost to fix a defect multiplies by roughly 10x per SDLC phase: requirements (1x) → design (5x) → coding (10x) → testing (20x) → production (200x). A defect caught during requirements costs 200x less than the same defect found after release. |
-| **Mechanism** | Defects compound downstream: a wrong requirement becomes a wrong design, which becomes wrong code, which becomes wrong tests, all of which must be unwound. Catching errors at the source eliminates the entire cascade. This is the empirical foundation for "shift left" — investing earlier in quality always dominates fixing later. |
-| **Where used** | Justifies the multi-session PO elicitation model: every acceptance criterion clarified at scope prevents 10–200x rework downstream. Also justifies the adversarial pre-mortem at the end of each elicitation cycle, and the adversarial mandate in `verify/SKILL.md`. The entire 5-step pipeline is ordered to surface defects at the earliest (cheapest) phase. |
-
----
-
-## Bibliography
-
-1. Boehm, B. W. (1981). *Software Engineering Economics*. Prentice-Hall.
-2. Boehm, B., & Papaccio, P. N. (1988). Understanding and controlling software costs. *IEEE Transactions on Software Engineering*, 14(10), 1462–1477.
diff --git a/docs/scientific-research/testing.md b/docs/scientific-research/testing.md
deleted file mode 100644
index 2c7f7d7..0000000
--- a/docs/scientific-research/testing.md
+++ /dev/null
@@ -1,137 +0,0 @@
-# Scientific Research — Testing
-
-Foundations for test design, TDD, BDD, and property-based testing used in this template.
-
----
-
-### 11. Observable Behavior Testing
-
-| | |
-|---|---|
-| **Source** | Fowler, M. (2018). *The Practical Test Pyramid*. Thoughtworks. https://martinfowler.com/articles/practical-test-pyramid.html |
-| **Date** | 2018 |
-| **Status** | Confirmed |
-| **Core finding** | Tests should answer "if I enter X and Y, will the result be Z?" — not "will method A call class B first?" |
-| **Mechanism** | A test is behavioral if its assertion describes something a caller/user can observe without knowing the implementation. The test should still pass if you completely rewrite the internals. |
-| **Where used** | Contract test rule in `implementation/SKILL.md`: "Write every test as if you cannot see the production code." |
-
----
-
-### 12. Test-Behavior Alignment
-
-| | |
-|---|---|
-| **Source** | Google Testing Blog (2013). *Testing on the Toilet: Test Behavior, Not Implementation*. |
-| **Date** | 2013 |
-| **Status** | Confirmed |
-| **Core finding** | Test setup may need to change if implementation changes, but the actual test shouldn't need to change if the code's user-facing behavior doesn't change. |
-| **Mechanism** | Tests that are tightly coupled to implementation break on refactoring and become a drag on design improvement. Behavioral tests survive internal rewrites. |
-| **Where used** | Contract test rule in `implementation/SKILL.md`, reviewer verification check in `reviewer.md`. |
-
----
-
-### 13. Tests as First-Class Citizens
-
-| | |
-|---|---|
-| **Source** | Martin, R. C. (2017). *First-Class Tests*. Clean Coder Blog. |
-| **Date** | 2017 |
-| **Status** | Confirmed |
-| **Core finding** | Tests should be treated as first-class citizens of the system — not coupled to implementation. Bad tests are worse than no tests because they give false confidence. |
-| **Mechanism** | Tests written as "contract tests" — describing what the caller observes — remain stable through refactoring. Tests that verify implementation details are fragile and create maintenance burden. |
-| **Where used** | Contract test rule in `implementation/SKILL.md`, verification check in `reviewer.md`. |
-
----
-
-### 14. Property-Based Testing (Invariant Discovery)
-
-| | |
-|---|---|
-| **Source** | MacIver, D. R. (2016). *What is Property Based Testing?* Hypothesis. https://hypothesis.works/articles/what-is-property-based-testing/ |
-| **Date** | 2016 |
-| **Status** | Confirmed |
-| **Core finding** | Property-based testing is "the construction of tests such that, when these tests are fuzzed, failures reveal problems that could not have been revealed by direct fuzzing." Property tests test *invariants* — things that must always be true about the contract. |
-| **Mechanism** | Meaningful property tests assert invariants: `assert Score(x).value >= 0` tests the contract. Tautological tests assert reconstruction: `assert Score(x).value == x` tests the implementation. |
-| **Where used** | Meaningful vs. Tautological table in `implementation/SKILL.md`. |
-
----
-
-### 15. Mutation Testing (Test Quality Verification)
-
-| | |
-|---|---|
-| **Source** | King, K. N. (1991). *The Gamma (formerly mutants)*. |
-| **Date** | 1991 |
-| **Alternative** | Mutation testing tools: Cosmic Ray, mutmut (Python) |
-| **Status** | Confirmed |
-| **Core finding** | A meaningful test fails when a mutation (small deliberate code change) is introduced. A tautological test passes even with mutations because it doesn't constrain the behavior. |
-| **Mechanism** | If a test survives every mutation of the production code without failing, it tests nothing. Only tests that fail on purposeful "damage" to the code are worth keeping. |
-| **Where used** | Implicitly encouraged: tests must describe contracts, not implementation, which is the theoretical complement to mutation testing. |
-
----
-
-### 51. Canon TDD — Authoritative Red-Green-Refactor Definition
-
-| | |
-|---|---|
-| **Source** | Beck, K. (2023). "Canon TDD." *tidyfirst.substack.com*. December 11, 2023. https://tidyfirst.substack.com/p/canon-tdd |
-| **Date** | 2023 |
-| **Alternative** | Fowler, M. (2023). "Test Driven Development." *martinfowler.com*. https://martinfowler.com/bliki/TestDrivenDevelopment.html |
-| **Status** | Confirmed — canonical source; explicitly authored to stop strawman critiques |
-| **Core finding** | The canonical TDD loop is: (1) write a list of test scenarios; (2) convert exactly one item into a runnable test; (3) make it pass; (4) optionally refactor; (5) repeat. Writing all test code before any implementation is an explicit anti-pattern. |
-| **Mechanism** | The interleaving of test-writing and implementation is not cosmetic — each test drives interface decisions at the moment they are cheapest to make. |
-| **Where used** | Justifies one-@id-at-a-time interleaved TDD in Step 3 of `implementation/SKILL.md`. |
-
----
-
-### 52. GOOS — Outer/Inner TDD Loop
-
-| | |
-|---|---|
-| **Source** | Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley. |
-| **Date** | 2009 |
-| **Status** | Confirmed — canonical ATDD/BDD integration model |
-| **Core finding** | Acceptance tests and unit tests operate at two separate, nested timescales. The outer loop: write one failing acceptance test before any implementation. The inner loop: drive implementation with unit-level Red-Green-Refactor cycles until the acceptance test passes. |
-| **Mechanism** | The outer loop provides direction (what to build); the inner loop provides momentum (how to build it). The acceptance test stays red throughout all inner cycles and goes green only when the feature is complete. |
-| **Where used** | Justifies the two-level structure in Step 3: outer loop per `@id` acceptance test, inner loop per unit. |
-
----
-
-### 53. Is TDD Dead? — Anti-Bureaucracy Evidence
-
-| | |
-|---|---|
-| **Source** | Beck, K., Fowler, M., & Hansson, D. H. (2014). "Is TDD Dead?" Video series. *martinfowler.com*. https://martinfowler.com/articles/is-tdd-dead/ |
-| **Date** | 2014 |
-| **Status** | Confirmed — primary evidence for what TDD practitioners reject as overhead |
-| **Core finding** | Per-cycle human reviewer gates, per-cycle checklists, and tests with zero delta coverage are all explicitly identified as harmful overhead. The green bar is the quality gate — not a checklist. |
-| **Mechanism** | Administrative overhead added to TDD workflows increases the cost per cycle without increasing coverage or catching defects. The optimal TDD loop is as lean as productive. |
-| **Where used** | Justifies removing per-test reviewer gates. Self-declaration moves to end-of-feature (once), preserving accountability at feature granularity without interrupting cycle momentum. |
-
----
-
-### 54. Introducing BDD — Behavioural-Driven Development Origin
-
-| | |
-|---|---|
-| **Source** | North, D. (2006). "Introducing BDD." *Better Software Magazine*. https://dannorth.net/introducing-bdd/ |
-| **Date** | 2006 |
-| **Alternative** | Fowler, M. (2013). "Given When Then." *martinfowler.com*. https://martinfowler.com/bliki/GivenWhenThen.html |
-| **Status** | Confirmed — primary BDD source |
-| **Core finding** | BDD evolved directly from TDD to address persistent practitioner confusion. BDD reframes TDD vocabulary around observable behavior: scenarios instead of tests, Given-When-Then instead of Arrange-Act-Assert. |
-| **Mechanism** | "Given" captures preconditions (Arrange), "When" captures the triggering event (Act), "Then" captures the observable outcome (Assert). Translating to G/W/T shifts focus from implementation mechanics to user-observable behavior. |
-| **Where used** | Theoretical link between Gherkin `@id` Examples (Step 1 output) and the TDD inner loop (Step 3). |
-
----
-
-## Bibliography
-
-1. Beck, K. (2023). "Canon TDD." *tidyfirst.substack.com*. https://tidyfirst.substack.com/p/canon-tdd
-2. Beck, K., Fowler, M., & Hansson, D. H. (2014). "Is TDD Dead?" *martinfowler.com*. https://martinfowler.com/articles/is-tdd-dead/
-3. Fowler, M. (2018). *The Practical Test Pyramid*. https://martinfowler.com/articles/practical-test-pyramid.html
-4. Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley.
-5. Google Testing Blog. (2013). Testing on the Toilet: Test Behavior, Not Implementation.
-6. King, K. N. (1991). *The Gamma (formerly mutants)*.
-7. MacIver, D. R. (2016). What is Property Based Testing? *Hypothesis*. https://hypothesis.works/articles/what-is-property-based-testing/
-8. Martin, R. C. (2017). First-Class Tests. *Clean Coder Blog*.
-9. North, D. (2006). Introducing BDD. *Better Software Magazine*. https://dannorth.net/introducing-bdd/
diff --git a/docs/features/in-progress/.gitkeep b/docs/spec/.gitkeep
similarity index 100%
rename from docs/features/in-progress/.gitkeep
rename to docs/spec/.gitkeep
diff --git a/docs/spec/context_map.md b/docs/spec/context_map.md
new file mode 100644
index 0000000..b5f258d
--- /dev/null
+++ b/docs/spec/context_map.md
@@ -0,0 +1,84 @@
+# Context Map: smith
+
+> DDD context map showing relationships between bounded contexts.
+> Updated by the Software Architect when contexts or relationships change.
+> Follows the DDD strategic design patterns for inter-context relationships.
+
+---
+
+## Context Relationships
+
+| Upstream Context | Downstream Context | Relationship Pattern | Translation / Anti-Corruption Layer |
+|-----------------|-------------------|---------------------|-------------------------------------|
+| Template Source (External) | Connection | Customer-Supplier | Connection reads files from the template source; no translation needed — files are copied as-is |
+
+> smith has one bounded context (Connection). The Template Source is an external dependency, not a separate bounded context within smith. It provides files but has no domain logic or invariants that smith owns. The relationship is Customer-Supplier: smith (downstream) depends on the template source (upstream) for file content, but does not control it. If template versioning or validation becomes a domain concern, Template Source may be promoted to its own bounded context.
+
+---
+
+## Context Map Diagram
+
+```mermaid
+graph LR
+    Connection[Connection Context<br/>connect · disconnect · update · status]
+    TS[Template Source<br/>External Dependency]
+
+    TS -->|provides files| Connection
+```
+
+> The Connection context is the sole bounded context within smith. It owns the Connection aggregate and all four CLI commands. The Template Source is an external dependency (default: agents-smith; override: `--from <path/url>`) that provides the agentic files to be written. There is no anti-corruption layer because the files are copied as-is — no domain translation is needed.
+
+---
+
+## Integration Points
+
+| Integration | From | To | Mechanism | Contract |
+|-------------|------|----|-----------|----------|
+| File Provisioning | Template Source | Connection | importlib.resources from package data (bundled), filesystem read (local), HTTP download (URL) | Template source must provide a valid directory structure containing AGENTS.md, .opencode/, .templates/, and .flowr/ |
+
+> The only integration point is file provisioning: the Connection context reads agentic files from the template source. For the default (agents-smith), files are read from the `smith/data/` package directory via `importlib.resources` — no network access required. For `--from <path>`, files are read from the local filesystem. For `--from <url>`, files are downloaded via HTTP (`.tar.gz` or `.zip`) and extracted to a temporary directory. No domain events cross this boundary — it is a simple data dependency.
+
+---
+
+## Anti-Corruption Layers
+
+| ACL | Protects Context | From Context | Translation Rules |
+|-----|-----------------|--------------|-------------------|
+| TemplateSourceAdapter | Connection | Template Source (External) | Normalises different source types (bundled package data, local path, remote URL) into a uniform file-provider interface that the Connection aggregate can consume without knowing the source type |
+
+> The TemplateSourceAdapter protects the Connection context from variations in how template files are obtained. It translates between three source types (bundled package data via importlib.resources, local filesystem paths, remote URLs via HTTP download) and presents a uniform interface: "given a template source, provide the set of files to write." This keeps the Connection aggregate focused on its invariants (atomicity, safety, clean separation) without coupling to file resolution details.
+
+---
+
+## Bounded Context Details
+
+### Connection Context
+
+**Responsibility:** Manage the full lifecycle of connecting agentic files to a project directory — connect, disconnect, update, and status.
+
+**Aggregate Root:** Connection
+
+**Key Invariants:**
+- Atomicity: either all agentic files are written or none are
+- Safety: existing files are never overwritten without explicit `--overwrite` flag
+- Clean separation: on disconnect, no agentic files remain (only .gitignore entries)
+- Consistency: .gitignore section and agentic file set are always in sync
+
+**CLI Commands (delivery mechanism):**
+- `smith connect [--from <source>] [--overwrite]`
+- `smith disconnect`
+- `smith update`
+- `smith status`
+
+**Entities:** Connection (aggregate root)
+
+**Value Objects:** TemplateSource, GitignoreSection, ConnectionStatus
+
+---
+
+## Changes
+
+| Date | Source | Change | Reason |
+|------|--------|--------|--------|
+| 2026-05-01 | architecture-assessment | Complete rewrite for corrected product scope | Previous context map described the wrong product (Python project template with single Template context). smith is an AI pair programming platform with a Connection context and external Template Source dependency. |
+| 2026-05-01 | IN_20260501_local-bundle-reversal | Updated integration point and ACL description: bundled template resolution is now local package data via importlib.resources, not GitHub-based download; URL sources download via requests with no persistent cache | Local bundle provides instant offline default; GitHub-based resolution introduced runtime network dependency and cache staleness |
\ No newline at end of file
diff --git a/docs/spec/domain_model.md b/docs/spec/domain_model.md
new file mode 100644
index 0000000..42eda0e
--- /dev/null
+++ b/docs/spec/domain_model.md
@@ -0,0 +1,123 @@
+# Domain Model: smith
+
+> Current understanding of the business domain.
+> Updated by the Domain Expert when domain understanding evolves.
+> This document captures what code cannot express: WHY entities exist, HOW aggregates are bounded, and WHAT business capabilities each context serves.
+>
+> **Evolving document:** Event Storming fills the Event Map, Aggregate Candidates, and Context Candidates sections (workshop draft). Domain Modeling then formalizes them into Entities, Relationships, and Aggregate Boundaries.
+
+---
+
+## Summary
+
+smith is an AI pair programming platform that connects standardised agent configurations (AGENTS.md, .opencode/, .templates/, .flowr/) to any project directory — and disconnects cleanly when done. Its domain is centred on the **Connection lifecycle**: connect, disconnect, update, and status. The domain has one bounded context — the **Connection context** — which owns the Connection aggregate and supporting value objects. The initial delivery (smith-commands) validates the full connect/work/disconnect cycle end-to-end with four CLI commands.
+
+---
+
+## Event Map
+
+### Domain Events
+
+| Event | Description | Trigger | Bounded Context |
+|-------|-------------|---------|-----------------|
+| `ConnectionRequested` | User invoked `smith connect` in a project directory | User runs `smith connect [--from <source>] [--overwrite]` | Connection |
+| `ConnectionEstablished` | All agentic files written to the project directory atomically | All files written successfully | Connection |
+
+| `ConnectionRolledBack` | Partial write detected; all written files removed to restore clean state | Write failure during connect | Connection |
+| `DisconnectionRequested` | User invoked `smith disconnect` in a connected project directory | User runs `smith disconnect` | Connection |
+| `DisconnectionCompleted` | All agentic files removed; managed .gitignore section preserved | All files removed successfully | Connection |
+| `UpdateRequested` | User invoked `smith update` in a connected project directory | User runs `smith update` | Connection |
+| `UpdateCompleted` | Agentic files updated to latest from template source | All files updated successfully | Connection |
+| `StatusRequested` | User invoked `smith status` in a project directory | User runs `smith status` | Connection |
+| `StatusReported` | Connection status displayed to the user | Status check completed | Connection |
+
+### Commands
+
+| Command | Description | Produces Event | Actor |
+|---------|-------------|----------------|-------|
+| `Connect` | Write all agentic files to a project directory from a template source | `ConnectionEstablished` or `ConnectionRolledBack` | Engineer |
+| `Disconnect` | Remove all agentic files and managed .gitignore entries from a project directory | `DisconnectionCompleted` | Engineer |
+| `Update` | Refresh agentic files from the template source in an already-connected project | `UpdateCompleted` | Engineer |
+| `ReportStatus` | Display whether the project directory is connected and which agentic files are present | `StatusReported` | Engineer |
+
+### Read Models
+
+| Read Model | Description | Consumes Event | Used By |
+|------------|-------------|----------------|---------|
+| `ConnectionStatus` | Whether the project is connected, which files are present, and the template source | `StatusRequested` | CLI output |
+| `RollbackLog` | Files that were written before rollback was triggered | `ConnectionRolledBack` | CLI error output |
+
+---
+
+## Context Candidates
+
+> Filled during Event Storming. Formalized in Bounded Contexts section below by Domain Modeling.
+
+| Candidate | Responsibility | Grouped Aggregates | Notes |
+|-----------|---------------|--------------------|-------|
+| Connection | Owns the full lifecycle of connecting/disconnecting agentic files to a project directory | Connection | Single context — the Connection lifecycle is the domain |
+
+---
+
+## Aggregate Candidates
+
+> Filled during Event Storming. Formalized in Aggregate Boundaries section below by Domain Modeling.
+
+| Candidate | Events Grouped | Tentative Root Entity | Notes |
+|-----------|---------------|-----------------------|-------|
+| Connection | `ConnectionRequested`, `ConnectionEstablished`, `ConnectionRolledBack`, `DisconnectionRequested`, `DisconnectionCompleted`, `UpdateRequested`, `UpdateCompleted`, `StatusRequested`, `StatusReported` | Connection | Single aggregate — the Connection is the sole entry point for all four commands |
+
+---
+
+## Bounded Contexts
+
+| Context | Responsibility | Key Entities | Integration Points |
+|---------|----------------|--------------|-------------------|
+| Connection | Manage the full lifecycle of connecting agentic files to a project directory: connect, disconnect, update, and status | Connection, TemplateSource, GitignoreSection | Template Source (external dependency for file resolution) |
+
+> smith has one bounded context (Connection). The Template Source is an infrastructure dependency, not a separate bounded context — it provides files but has no independent domain logic or invariants. If template versioning or validation becomes a domain concern in future, it may be extracted into its own context.
+
+---
+
+## Entities
+
+| Name | Type | Description | Bounded Context | Aggregate Root? |
+|------|------|-------------|-----------------|-----------------|
+| Connection | Entity | The aggregate root representing a project directory's connection to smith's agentic configuration. Tracks connection state, template source, and the set of managed files. | Connection | Yes |
+
+---
+
+## Value Objects
+
+| Name | Type | Description | Bounded Context |
+|------|------|-------------|-----------------|
+| TemplateSource | Where agentic files come from: default (agents-smith), local path, or URL. Immutable once resolved. | Connection |
+| GitignoreSection | The `# smith managed` section in .gitignore. Contains entries for all agentic file patterns. Managed as a unit — added on connect, removed on disconnect. | Connection |
+| ConnectionStatus | The current state of a project's connection: connected, disconnected, or partial (some but not all agentic files present). | Connection |
+| FileSpec | A single file or directory to be written during connect or update, with a source path (from the template) and a destination path (in the project directory). | Connection |
+
+---
+
+## Relationships
+
+| Subject | Relation | Object | Cardinality | Notes |
+|---------|----------|--------|-------------|-------|
+| Connection | resolves | TemplateSource | 1:1 | Each connection resolves one template source |
+| Connection | maintains | GitignoreSection | 1:1 | Each connection manages one .gitignore section |
+| Connection | manages | FileSpec | 1:many | Each connection manages multiple file specifications |
+
+---
+
+## Aggregate Boundaries
+
+| Aggregate | Root Entity | Invariants | Bounded Context |
+|-----------|-------------|------------|-----------------|
+| Connection | Connection | **Atomicity:** either all agentic files are written or none are — no partial connections, ever. **Safety:** user-tracked files (not managed by smith) are never overwritten; smith-managed files are auto-updated — zero silent overwrites of user-tracked files, ever. **Clean separation:** on disconnect, no agentic files remain (only .gitignore entries) — zero orphaned files after disconnect. **Consistency:** the .gitignore section and the agentic file set must always be in sync — connected means files present AND .gitignore section present; disconnected means no agentic files present but the .gitignore section is preserved as a guard. | Connection |
+
+---
+
+## Changes
+
+| Date | Source | Change | Reason |
+|------|--------|--------|--------|
+| 2026-05-01 | architecture-assessment | Complete rewrite for corrected product scope | Previous domain model described the wrong product (Python project template). smith is an AI pair programming platform with connect/disconnect/update/status commands. |
\ No newline at end of file
diff --git a/docs/spec/glossary.md b/docs/spec/glossary.md
new file mode 100644
index 0000000..ea32a65
--- /dev/null
+++ b/docs/spec/glossary.md
@@ -0,0 +1,255 @@
+# Glossary: smith
+
+> Living glossary of domain terms used in this project.
+> Written and maintained by the Domain Expert during Discovery.
+> Append-only: never edit or remove past entries. If a term changes, mark it retired in favor of the new entry and write a new entry.
+> Code and tests take precedence over this glossary — if they diverge, refactor the code, not this file.
+
+---
+
+## Entry Format
+
+```
+## <Term>
+
+**Definition:** <one sentence — genus + differentia: "A [category] that [distinguishes it from others in that category]">
+
+**Aliases:** <deprecated synonyms the team should stop using, or "none">
+
+**Example:** <one sentence showing the term in use in this project; optional but encouraged>
+
+**Source:** <feature stem or discovery session date>
+```
+
+Entries are sorted alphabetically.
+
+---
+
+## Agentic File
+
+**Definition:** A file or directory that smith manages in a connected project, drawn from a template source and written to the project directory.
+
+**Aliases:** managed file, smith file
+
+**Example:** AGENTS.md, .opencode/, .templates/, and .flowr/ are the agentic files that `smith connect` writes to a project directory.
+
+**Source:** smith-commands
+
+---
+
+## Agentic File Set
+
+**Definition:** The complete set of agentic files (AGENTS.md, .opencode/, .templates/, .flowr/) that smith writes as a unit during connection. In code, represented as `list[FileSpec]` rather than a separate entity.
+
+**Aliases:** file set, managed set
+
+**Example:** The Agentic File Set is written atomically — either all four items are present or none are.
+
+**Source:** smith-commands
+
+---
+
+## Atomic Connection
+
+**Definition:** A connection guarantee that either all agentic files are written to the project directory or none are, ensuring no partial state exists.
+
+**Aliases:** none
+
+**Example:** When `smith connect` encounters a write failure, it rolls back all previously written files to maintain an atomic connection.
+
+**Source:** smith-commands
+
+---
+
+## Clean Separation
+
+**Definition:** A disconnection guarantee that no agentic files remain in the project directory after `smith disconnect`, leaving only .gitignore entries as a trace.
+
+**Aliases:** none
+
+**Example:** After running `smith disconnect`, the project directory contains no .opencode/ directory, no .templates/ directory, no .flowr/ directory, and no AGENTS.md file.
+
+**Source:** smith-commands
+
+---
+
+## Connect
+
+**Definition:** The CLI command `smith connect [--from <source>] [--overwrite]` that writes all agentic files from a template source to the current project directory, adds a managed .gitignore section, and establishes a connection.
+
+**Aliases:** connect command, smith connect
+
+**Example:** Running `smith connect` in a project directory writes AGENTS.md, .opencode/, .templates/, and .flowr/ and adds their patterns to .gitignore under `# smith managed`.
+
+**Source:** smith-commands
+
+---
+
+## Connection
+
+**Definition:** The aggregate root representing the state of a project directory's relationship to smith's agentic configuration, tracking whether the project is connected, the template source, and the set of managed files.
+
+**Aliases:** none
+
+**Example:** A Connection is established by `smith connect` and removed by `smith disconnect`.
+
+**Source:** smith-commands
+
+---
+
+## Disconnect
+
+**Definition:** The CLI command `smith disconnect` that removes all agentic files managed by smith from the current project directory while preserving the `# smith managed` section in .gitignore as a guard for future usage.
+
+**Aliases:** disconnect command, smith disconnect
+
+**Example:** Running `smith disconnect` removes AGENTS.md, .opencode/, .templates/, .flowr/ (only those tracked in the `# smith managed` section), but preserves the section header in .gitignore.
+
+**Source:** smith-commands
+
+---
+
+## Managed .gitignore Section
+
+**Definition:** A delimited section in .gitignore marked with `# smith managed` that contains entries for all agentic file patterns, added on connect and preserved on disconnect. The section's presence indicates an existing or previous connection.
+
+**Aliases:** gitignore section, managed section, GitignoreSection (code)
+
+**Example:** After `smith connect`, .gitignore contains a `# smith managed` section with entries like `.opencode/` and `.flowr/sessions/`.
+
+**Source:** smith-commands
+
+---
+
+## Safety
+
+**Definition:** A connection guarantee that user-tracked files (not managed by smith) are never overwritten; smith-managed files are auto-updated, ensuring zero silent overwrites.
+
+**Aliases:** overwrite protection
+
+**Example:** When `smith connect` finds an existing AGENTS.md that is user-tracked (not in `# smith managed`), it skips user-tracked files and proceeds with the remaining files.
+
+**Source:** smith-commands
+
+---
+
+## Smith
+
+**Definition:** An AI pair programming platform that connects standardised agent configurations to any project directory, enabling engineers to immediately work with consistent AI agent workflows.
+
+**Aliases:** agents-smith (PyPI package name), smith (Python module name)
+
+**Example:** `smith connect` in any project directory sets up AGENTS.md, .opencode/, .templates/, and .flowr/ so engineers can start using AI-assisted workflows immediately.
+
+**Source:** smith-commands
+
+---
+
+## Status
+
+**Definition:** The CLI command `smith status` that reports whether the current project directory is connected, which agentic files are present, and which template source was used.
+
+**Aliases:** status command, smith status
+
+**Example:** Running `smith status` in a connected project shows "Connected" with a list of present agentic files and the template source.
+
+**Source:** smith-commands
+
+---
+
+## Template Source
+
+**Definition:** The origin of agentic files to be written during connection: the default agents-smith templates, a local directory path, or a remote URL specified via `--from`.
+
+**Aliases:** source, template source
+
+**Example:** `smith connect --from ./my-templates` uses a local directory as the template source instead of the default agents-smith templates.
+
+**Source:** smith-commands
+
+---
+
+## Update
+
+**Definition:** The CLI command `smith update` that refreshes agentic files in a connected project directory from the original template source, applying any changes from the source to the project.
+
+**Aliases:** update command, smith update
+
+**Example:** Running `smith update` after the default agents-smith templates have been updated writes the latest versions of agentic files to the project directory.
+
+**Source:** smith-commands
+
+---
+
+## ConnectionStatus
+
+**Definition:** A value object representing the current state of a project directory's connection to smith's agentic configuration: connected, disconnected, or partial (some but not all agentic files present).
+
+**Aliases:** status, connection state
+
+**Example:** Running `smith status` returns a ConnectionStatus of "connected" when all agentic files are present, or "partial" when some are missing.
+
+**Source:** smith-commands
+
+---
+
+## FileSpec
+
+**Definition:** A value object representing a single file or directory to be written during a connect or update operation, with a source path (from the template) and a destination path (in the project directory).
+
+**Aliases:** file specification, agentic file (informal)
+
+**Example:** A FileSpec for AGENTS.md has source `templates/AGENTS.md` and destination `./AGENTS.md` in the project directory.
+
+**Source:** smith-commands
+
+---
+
+## Managed Section Header (smith metadata)
+
+**Definition:** Source metadata stored within the `# smith managed` section header of `.gitignore`, using the format `# smith managed source:<location>`. Connection state is inferred from the presence of the managed section — no separate metadata file is created (stateless design).
+
+**Aliases:** section header metadata, stateless metadata
+
+**Example:** After `smith connect --from ./my-templates`, the `.gitignore` section header reads `# smith managed source:./my-templates`.
+
+**Source:** smith-commands
+
+---
+
+## Agents-Smith
+
+**Definition:** The default bundled template source for smith, providing the standard agentic files (AGENTS.md, .opencode/, .templates/, .flowr/) packaged in the `smith/data/` directory and read via `importlib.resources`.
+
+**Aliases:** agents-smith, default template, bundled template
+
+**Example:** Running `smith connect` without `--from` reads the agents-smith templates from the packaged `smith/data/` directory — no network call required.
+
+**Source:** smith-commands, IN_20260501_local-bundle-reversal
+
+---
+
+## Bundled Template Resolution
+
+**Definition:** The process by which the default `agents-smith` template source reads template files from the `smith/data/` package directory via `importlib.resources`, rather than downloading them at runtime.
+
+**Aliases:** local bundle resolution, packaged template resolution
+
+**Example:** When an engineer runs `smith connect`, BundledTemplateSource reads agentic files from `smith/data/AGENTS.md`, `smith/data/.opencode/`, etc. via `importlib.resources` — no network access or caching required.
+
+**Source:** IN_20260501_local-bundle-reversal
+
+---
+
+## Retired Terms
+
+| Term | Retired In Favor Of | Reason | Date |
+|------|---------------------|--------|------|
+| Cache Directory | — | Bundled source no longer uses caching; URL sources re-download each time | 2026-05-01 |
+| GitHub-based Resolution | Local Bundle Resolution | Bundled source now reads from packaged files, not GitHub downloads | 2026-05-01 |
+| CLI Application | Connection | Product scope changed from project template to AI pair programming platform; the CLI is now one delivery mechanism for the Connection aggregate | 2026-05-01 |
+| CLI Entrypoint | Connection | Product scope changed; the entry point is now the `smith` command, not a generic CLI application | 2026-05-01 |
+| Package Metadata | TemplateSource | Package metadata is an infrastructure concern; the domain concept is the template source that provides files | 2026-05-01 |
+| Project Template | smith | Product scope changed from project template to AI pair programming platform | 2026-05-01 |
+| Quality Gate | (kept, but not a domain term) | Quality Gate is a process concept, not a smith domain term | 2026-05-01 |
+| Workflow Engine | (removed) | The workflow engine concept belongs to agents-smith (the template source), not to smith's domain | 2026-05-01 |
\ No newline at end of file
diff --git a/docs/spec/product_definition.md b/docs/spec/product_definition.md
new file mode 100644
index 0000000..c3d03d3
--- /dev/null
+++ b/docs/spec/product_definition.md
@@ -0,0 +1,156 @@
+# Product Definition: smith
+
+> **Status:** DRAFT (2026-05-01)
+> This document is the single source of truth for project scope and conventions.
+> Supersedes IN_20260422 — the original product definition captured the wrong product scope.
+
+---
+
+## What smith IS
+
+- An AI pair programming platform that assimilates ordinary projects into high-performing, AI-augmented systems
+- A CLI tool (`smith`) that connects standardised agent configurations (AGENTS.md, .opencode/, .templates/, .flowr/) to any project directory — and disconnects cleanly when done
+- A standardisation engine: the same agents, the same flows, every project, by connecting once and working immediately
+- A demonstration vehicle that ships with four working commands (`connect`, `disconnect`, `update`, `status`) so engineers see the full connect/work/disconnect cycle end-to-end
+
+## What smith IS NOT
+
+- Does NOT execute AI agents — smith configures projects to use AI agents, it doesn't run them
+- Does NOT provide CI/CD infrastructure — it doesn't replace your pipelines or deployment setup
+- Does NOT manage package dependencies or versions
+- Does NOT enforce a specific programming language or framework — smith works with any project
+- Does NOT silently overwrite project customizations — user-tracked files are skipped; smith-managed files are auto-updated
+- Does NOT leave partial state — connects are atomic: all files or none
+
+## Why does this exist
+
+AI agents need structure. Without consistent agent configurations, each project has different .opencode agents, different workflows, and different templates. Engineers waste time maintaining these across projects. Existing solutions are either bare skeletons or opinionated frameworks. smith fills this gap by providing a standardised, reversible way to connect AI agent configurations to any project — new or legacy — so engineers can focus on building, not configuring. Like Agent Smith in the Matrix, smith enters a project, copies its patterns, and returns something more capable than what it found.
+
+## Users
+
+- **Software Engineer** — runs `smith connect` in any project directory to immediately start working with standard AI agent workflows; runs `smith disconnect` when done
+- **Tech Lead** — standardises AI agent configurations across the team's projects by connecting the same template to each one
+
+## Quality Attributes
+
+| Attribute | Scenario | Target | Priority |
+|-----------|----------|--------|----------|
+| Safety | When smith connects to a project that already has user-tracked agentic files (not managed by smith), it skips user-tracked files; smith-managed files are auto-updated without `--overwrite`. | Zero silent overwrites of user-tracked files, ever | Must (#1) |
+| Atomicity | When smith connects, either all agentic files are written or none are | No partial connections, ever | Must (#2) |
+| Clean separation | When smith disconnects from a project, no agentic files remain (only .gitignore entries) | Zero orphaned files after disconnect | Must (#3) |
+| Usability | When an engineer runs `smith connect` in any project directory, they can immediately start working with standard flows and agents | < 1 minute from connect to working | Must (#4) |
+| Modifiability | When a new template source type is needed, it can be added as an infrastructure adapter without changing domain logic | Zero domain changes for new source types | Should (#5) |
+| Testability | When unit tests run, domain logic can be tested via port mocks without filesystem or network access | 100% domain test coverage without infrastructure | Should (#6) |
+
+---
+
+## Out of Scope
+
+- AI execution engine (smith configures agents, doesn't run them)
+- CI/CD infrastructure
+- Package management
+- Language/framework enforcement
+- IDE-specific configuration
+
+## Delivery Order
+
+1 → **smith-commands** — `smith connect [--from <path/url>]`, `smith disconnect`, `smith update`, `smith status`. Four commands that demonstrate the full connect/work/disconnect cycle end-to-end. This feature validates the entire workflow and serves as the reference implementation for future features.
+
+---
+
+## Project Conventions
+
+### Definition of Done
+
+All criteria must be met before a feature is considered done.
+
+**Development:**
+
+- [ ] All BDD scenarios from `features/smith-commands.feature` pass
+- [ ] Quality Gate passes all three tiers (Design → Structure → Conventions)
+- [ ] Test coverage meets project threshold (≥ 80%)
+- [ ] No test coupling — tests verify behavior, not structure
+- [ ] Production code follows priority order: YAGNI > DRY > KISS > OC > SOLID > Design Patterns
+- [ ] Code uses ubiquitous language from glossary.md (Connection, FileSpec, TemplateSource, GitignoreSection, ConnectionStatus)
+- [ ] Safety invariant verified: no silent overwrites of user-tracked files in any code path (all Must Examples in Rule 2 pass)
+- [ ] Atomicity invariant verified: pair-atomic write (AGENTS.md + .opencode/) tested with rollback (SC-008)
+- [ ] Clean separation invariant verified: disconnect removes all managed files, preserves user-tracked files (SC-014, SC-017)
+- [ ] Exit codes verified: 0 (success), 1 (error) — all Examples assert correct exit code
+
+**Review — Tier 1: Design Correctness (does it do the right thing?)**
+
+- [ ] Domain invariants enforced: Safety (user-tracked files are never overwritten; smith-managed files are auto-updated), Atomicity (pair-atomic writes), Clean separation (no orphaned files)
+- [ ] All ports are Protocol interfaces in the domain layer; no infrastructure imports in domain or application
+- [ ] Connection aggregate is the sole entry point for all four commands
+- [ ] CLI is a thin delivery adapter that delegates to application use cases
+
+**Review — Tier 2: Test Structure (are tests good enough?)**
+
+- [ ] Each Must Example has a passing test with observable outcome
+- [ ] Tests mock ports (FileSystemPort, GitignorePort, TemplateSourcePort) — no filesystem/network in unit tests
+- [ ] SC-008 (pair-atomic rollback) has a test that simulates mid-write failure
+- [ ] User-tracked file skipping tested for each managed file type
+- [ ] No test couples to implementation details (private methods, file paths, internal state)
+
+**Review — Tier 3: Conventions (does it follow project standards?)**
+
+- [ ] CI pipeline passes all checks
+- [ ] Code Review approved by R (independent reviewer, not the SE who wrote the code)
+- [ ] Acceptance Testing passed — PO verifies BDD scenarios behave as expected
+- [ ] `smith` CLI command works (`python -m smith` entry point)
+- [ ] `--help` and `--version` flags work
+
+**Deployment:**
+
+- [ ] Release Verification checklist completed
+- [ ] CHANGELOG.md updated with delivered scenarios
+
+### Deployment
+
+**Deployment type:** Library (installable Python package)
+
+**CLI command:** `smith` (entry point: `python -m smith`)
+**PyPI package:** `agents-smith`
+
+#### Common (all deployment types)
+
+- [ ] Version bumped in pyproject.toml
+- [ ] CHANGELOG.md updated with version and delivered scenarios
+- [ ] Git tag created (format: `v<semver>`)
+
+#### Library
+
+- [ ] Package builds without errors (`python -m build`)
+- [ ] Package published to PyPI (`twine upload dist/*`)
+- [ ] Installable from PyPI in clean environment
+
+#### Rollback Plan
+
+- Revert the git tag and re-publish the previous version to PyPI
+- PyPI does not support deleting versions; yank the release instead (`twine upload --yank`)
+
+### Branch Strategy
+
+- **Convention:** Trunk-based (short-lived feature branches from trunk, PR before merge)
+- **Branch naming:** `<type>/<short-description>` (e.g., `feature/add-smith-commands`)
+- **Merge policy:** Squash merge to trunk after approval
+
+### Naming
+
+- **CLI command:** `smith`
+- **PyPI package:** `agents-smith`
+- **Python module: `smith``
+- **Tagline:** Pair program with AI, the right way.
+- **Branding:** Matrix/Agent Smith theme (see `docs/branding.md`)
+
+### .gitignore Convention
+
+smith manages its own section in .gitignore, marked with `# smith managed`. On connect, entries for agentic files are added to this section. On disconnect, the agentic files are removed but the `# smith managed` section is preserved (it serves as a guard for future usage). Files listed in `# smith managed` are treated as managed by smith; files outside this section are user-tracked and never touched by smith.
+
+---
+
+## Scope Changes
+
+| Date | Session | Change | Reason |
+|------|---------|--------|--------|
+| 2026-05-01 | IN_20260501_stakeholder-reinterview | Complete product redefinition: smith is an AI pair programming platform, not just a Python template. Delivery order changed from cli-entrypoint to smith-commands. | Stakeholder clarified product scope during reinterview |
\ No newline at end of file
diff --git a/docs/spec/system.md b/docs/spec/system.md
new file mode 100644
index 0000000..44e3bac
--- /dev/null
+++ b/docs/spec/system.md
@@ -0,0 +1,157 @@
+# System Overview: smith
+
+> Current-state description of the production system.
+> Updated by the Software Architect when domain understanding changes (rare).
+> Contains only completed features — nothing from backlog or in-progress.
+> This document captures what code cannot express: WHY contexts exist, HOW they relate, WHAT the aggregate boundaries are and why.
+
+---
+
+## Summary
+
+smith is an AI pair programming platform that connects standardised agent configurations (AGENTS.md, .opencode/, .templates/, .flowr/) to any project directory — and disconnects cleanly when done. Its sole bounded context is the **Connection lifecycle**: connect, disconnect, update, and status. The system is delivered as a CLI tool (`smith`) with one external runtime dependency (`requests`, for URL template sources), using hexagonal architecture to keep domain logic independent of filesystem operations and template resolution. The primary users are software engineers and tech leads who need consistent AI agent configurations across projects.
+
+---
+
+## Delivery
+
+**Mechanism:** CLI (command-line interface)
+
+The `smith` command is the sole delivery mechanism. Users interact with four subcommands: `connect`, `disconnect`, `update`, `status`. The CLI is a thin adapter that parses arguments and delegates to application use cases. The domain has no knowledge of argparse or terminal output — it enforces invariants and produces domain objects; the delivery layer translates these into human-readable output and exit codes.
+
+---
+
+## Context (C4 Level 1)
+
+### Actors
+
+| Actor | Description |
+|-------|-------------|
+| Software Engineer | Runs `smith connect` in any project directory to immediately start working with standard AI agent workflows; runs `smith disconnect` when done |
+| Tech Lead | Standardises AI agent configurations across the team's projects by connecting the same template to each one |
+
+### Systems
+
+| System | Kind | Description |
+|--------|------|-------------|
+| smith | Internal | CLI tool that manages the Connection lifecycle: connect, disconnect, update, status |
+| Template Source | External | Provides agentic files for provisioning. Three variants: bundled (agents-smith, packaged in smith/data/), local path (filesystem), remote URL (HTTP/HTTPS) |
+| Project Directory | External | The target project directory where agentic files are written/removed |
+
+### Interactions
+
+| Interaction | Behaviour | Technology |
+|-------------|-----------|------------|
+| Engineer → smith | Runs CLI commands (connect, disconnect, update, status) | Shell / terminal |
+| smith → Template Source | Reads template files for provisioning | importlib.resources (bundled), requests (URL), pathlib (local) |
+| smith → Project Directory | Writes/removes agentic files atomically; manages .gitignore section with source metadata in header; stateless — no metadata file | pathlib, shutil, tempfile |
+
+---
+
+## Container (C4 Level 2)
+
+### Boundary: smith
+
+| Container | Technology | Responsibility |
+|-----------|------------|----------------|
+| CLI Delivery Layer | argparse (stdlib) | Parse CLI arguments, dispatch to use cases, format output, set exit codes |
+| Application Services | Python (pure) | Orchestrate use cases: connect, disconnect, update, status. Enforce invariants via domain layer |
+| Domain Layer | Python (pure) | Enforce invariants (atomicity, safety, clean separation, consistency). Define ports (Protocols) that infrastructure must implement |
+| Infrastructure Adapters | Python + requests | Implement domain ports: BundledTemplateSource (importlib.resources from smith/data), LocalTemplateSource, UrlTemplateSource (requests + tarfile/zipfile), AtomicFileSystem, GitignoreManager, SectionMetadata |
+
+### Interactions
+
+| Interaction | Behaviour |
+|-------------|-----------|
+| CLI → Application Services | Dispatches parsed CLI arguments to the appropriate use case (ConnectUseCase, DisconnectUseCase, etc.) |
+| Application Services → Domain | Delegates invariant enforcement to the Connection aggregate; uses ports for side effects |
+| Infrastructure → Domain | Implements domain port Protocols; dependency arrow points inward (infrastructure depends on domain, not vice versa) |
+| Infrastructure → Template Source | Reads template files: importlib.resources from packaged data (bundled), filesystem read (local), HTTP download (URL) |
+| Infrastructure → Project Directory | Writes/removes agentic files atomically via temp-directory staging; manages .gitignore section with source metadata; stateless — no metadata file |
+
+---
+
+## Module Structure
+
+| Module | Responsibility | Bounded Context |
+|--------|----------------|-----------------|
+| `smith.domain.connection` | Connection aggregate root — enforces atomicity, safety, clean separation, consistency invariants | Connection |
+| `smith.domain.value_objects` | TemplateSource, GitignoreSection, ConnectionStatus, FileSpec — immutable value objects | Connection |
+| `smith.domain.ports` | TemplateSourcePort, FileSystemPort, GitignorePort, MetadataPort — Protocol interfaces defining what the domain needs | Connection |
+| `smith.application.connect` | ConnectUseCase — orchestrates conflict check, template resolution, atomic write, .gitignore update, metadata save | Connection |
+| `smith.application.disconnect` | DisconnectUseCase — orchestrates file removal, preserving the .gitignore section as a guard | Connection |
+| `smith.application.update` | UpdateUseCase — orchestrates connection check, template resolution, atomic overwrite, .gitignore update, metadata update | Connection |
+| `smith.application.status` | StatusUseCase — orchestrates connection check, file presence check, status report | Connection |
+| `smith.infrastructure.template_source` | BundledTemplateSource (importlib.resources from smith/data/), LocalTemplateSource, UrlTemplateSource (requests + tarfile/zipfile, no cache) — implement TemplateSourcePort | Connection |
+| `smith.infrastructure.filesystem` | AtomicFileSystem — implements FileSystemPort with temp-directory staging | Connection |
+| `smith.infrastructure.gitignore` | GitignoreManager — implements GitignorePort with delimited section management | Connection |
+| `smith.infrastructure.metadata` | SectionMetadata — delegates to GitignoreManager for source metadata in gitignore section header (stateless — no .smith.yaml file) | Connection |
+| `smith.delivery.cli` | build_parser(), main(), command handlers — argparse setup and dispatch | Connection |
+
+---
+
+## Domain Model Documentation
+
+### Why Each Context Exists
+
+| Bounded Context | Business Capability | Why It's Separate |
+|-----------------|---------------------|-------------------|
+| Connection | Manage the full lifecycle of connecting agentic files to a project directory | The Connection lifecycle is the core domain — connect, disconnect, update, status. It encapsulates all invariants (atomicity, safety, clean separation, consistency) and is the sole entry point for all four commands. No other context is needed because the domain is small and cohesive. |
+
+### Aggregate Boundary Rationale
+
+| Aggregate | Why These Entities Are Grouped | Transactional Invariant |
+|-----------|-------------------------------|------------------------|
+| Connection | The Connection aggregate root owns the TemplateSource, GitignoreSection, and the list of FileSpecs. All operations (connect, disconnect, update, status) go through the Connection root. The file set cannot exist independently — it is always part of a Connection. | **Atomicity:** either all agentic files are written or none are. **Safety:** user-tracked files are skipped; smith-managed files are auto-updated. **Clean separation:** on disconnect, no agentic files remain. **Consistency:** .gitignore section and agentic file set are always in sync. |
+
+---
+
+## Active Constraints
+
+- **Minimal runtime dependencies:** The package has one external runtime dependency (`requests`), used only for URL template source resolution. The bundled `agents-smith` source reads from packaged files via `importlib.resources` — no network call required. All other functionality uses Python stdlib. See ADR-007.
+- **Atomicity via temp-directory staging:** All file writes must be staged to a temporary directory before being committed to the project directory. No partial connections are allowed.
+- **Safety via pre-write conflict check:** Before any write, the project directory must be scanned for existing agentic files. User-tracked files are skipped; smith-managed files are auto-updated.
+- **Clean separation via managed .gitignore section:** The `# smith managed` / `# end smith managed` delimiters must be used to mark the section. On disconnect, agentic files are removed but the section is preserved as a guard for future usage.
+- **Hexagonal architecture:** Domain logic must not import from infrastructure, application, or delivery layers. The dependency arrow always points inward.
+- **Usability:** `smith connect` must complete (files written and .gitignore updated) in under 1 minute in any project directory.
+
+---
+
+## Key Decisions
+
+- **argparse as CLI framework** — Sufficient for four subcommands; maintains minimal runtime dependencies. See ADR-001.
+- **Atomic file writes via temp-directory staging** — All files written to a temp directory first, then moved atomically. On failure, the temp directory is discarded. See ADR-002.
+- **Hexagonal architecture (Ports & Adapters)** — Domain logic is independent of filesystem, network, and CLI. Ports are Protocol interfaces defined in the domain layer; infrastructure adapters implement them.
+- **Stateless design — no .smith.yaml** — Connection state is inferred from the `# smith managed` section in `.gitignore`. Source metadata is stored in the section header (e.g., `# smith managed source:agents-smith`). No separate metadata file is created. ADR-004 (originally defining .smith.yaml) is superseded by this stateless decision.
+- **No smart merge** — For `.flowr/` and `.templates/` that already exist, `--overwrite` replaces entirely. No partial merge logic. This is a deliberate simplicity trade-off (YAGNI > DRY).
+- **Local bundled template resolution** — The default `agents-smith` template source reads agentic files from `smith/data/` via `importlib.resources`. No network call is required. A manual script (`scripts/update-bundle.sh`) syncs the bundle from the agents-smith `v8_release` branch when a new release is prepared. See ADR-007.
+
+---
+
+## ADRs
+
+See `docs/adr/` for the full decision record.
+
+- ADR-001: Use argparse as CLI framework (minimal runtime dependencies) — `docs/adr/ADR_20260501_argparse-cli-framework.md`
+- ADR-002: Atomic file writes via temp-directory staging — `docs/adr/ADR_20260501_atomic-file-writes-via-temp-directory.md`
+- ADR-003: Hexagonal architecture (Ports & Adapters) — `docs/adr/ADR_20260501_hexagonal-architecture.md`
+- ADR-004: .smith.yaml metadata file — `docs/adr/ADR_20260501_smith-yaml-metadata.md`
+- ADR-005: No smart merge for .flowr/ and .templates/ — `docs/adr/ADR_20260501_no-smart-merge.md`
+- ADR-006: GitHub-based bundled template resolution — **Superseded** by ADR-007
+- ADR-007: Local bundled template resolution (importlib.resources) — `docs/adr/ADR_20260501_local-bundled-template-resolution.md`
+
+---
+
+## Completed Features
+
+See `docs/features/` for accepted features.
+
+---
+
+## Changes
+
+| Date | Source | Change | Reason |
+|------|--------|--------|--------|
+| 2026-05-01 | architecture-assessment | Initial system overview | New feature: smith-commands (connect, disconnect, update, status) |
+| 2026-05-01 | IN_20260501_agents-smith-dependency-resolution | Changed bundled template resolution from importlib.resources/smith/data/ to GitHub-based download + local cache; updated dependency constraint from zero runtime deps to one (requests) | Bundled template files in smith/data/ were stale copies that would go out of sync; GitHub-based resolution ensures templates are always current |
+| 2026-05-01 | IN_20260501_local-bundle-reversal | Reverted bundled template resolution to local package (importlib.resources + smith/data/); fully implemented UrlTemplateSource; removed caching; deprecated BDD examples a1b2c3d4 and e5f6g7h8; superseded ADR-006 with ADR-007 | GitHub-based resolution introduced runtime network dependency and cache staleness issues; local bundle provides instant offline default |
\ No newline at end of file
diff --git a/docs/spec/technical_design.md b/docs/spec/technical_design.md
new file mode 100644
index 0000000..77b6549
--- /dev/null
+++ b/docs/spec/technical_design.md
@@ -0,0 +1,722 @@
+# Technical Design: smith
+
+> Technical design document for the smith-commands feature.
+> Updated by the Software Architect when stack, contracts, or interfaces change.
+> Contract-first design: API and event contracts are defined here before implementation begins.
+
+---
+
+## Feature
+
+`docs/features/smith-commands/` — the connect/disconnect/update/status CLI commands.
+
+---
+
+## Architectural Style
+
+**Style:** Hexagonal (Ports & Adapters)
+
+**Rationale:** smith's core domain — the Connection lifecycle — must be testable in isolation from filesystem operations, network requests, and CLI argument parsing. The quality attribute priority (Safety > Atomicity > Clean Separation > Usability) demands that domain invariants are enforced without coupling to infrastructure. Hexagonal architecture achieves this by defining ports (Protocol interfaces) in the domain layer that infrastructure adapters implement. The CLI is a delivery mechanism — a thin adapter that translates argparse results into domain commands. This allows the Connection aggregate to enforce atomicity and safety invariants without knowing whether files are written to a real filesystem or an in-memory test double. The dependency arrow always points inward: infrastructure → application → domain (Cockburn, 2005; Evans, 2003).
+
+Note: The Safety invariant protects user-tracked files (not managed by smith) from silent overwrite. Smith-managed files (in the `# smith managed` section) may be updated by `smith connect` (auto-update) and `smith update` without `--overwrite`.
+
+---
+
+## Quality Attributes
+
+| Attribute | Priority | Architectural Decision | ADR Ref |
+|-----------|----------|----------------------|---------|
+| Safety | 1 (Must) | Conflict detection before any write; `--overwrite` gate enforced for user-tracked files; no silent overwrites of user-tracked files ever; smith-managed files may be updated without `--overwrite` | — |
+| Atomicity | 2 (Must) | Temp-directory staging with atomic rename; all files written to staging area first, then moved to final locations; on failure, staging area is discarded | ADR-002 |
+| Clean Separation | 3 (Must) | Managed `.gitignore` section with clear delimiters; disconnect removes all agentic files while preserving the section as a guard; connection state inferred from the managed section in `.gitignore` (stateless — no metadata file) | — |
+| Usability | 4 (Must) | Four subcommands with clear output; argparse provides help text; exit codes distinguish success/error | ADR-001 |
+| Modifiability | 5 (Should) | Hexagonal architecture allows adding new template sources (URL types, git repos) without changing domain logic; new CLI flags are thin delivery-layer additions | — |
+| Testability | 6 (Should) | Domain logic tested via port mocks; no filesystem or network in unit tests; integration tests use temp directories | — |
+
+---
+
+## Stack
+
+| Layer | Technology | Version | Rationale |
+|-------|-----------|---------|-----------|
+| Language | Python | 3.13 | Project requirement (pyproject.toml: `requires-python = ">=3.13"`) |
+| CLI Framework | argparse | stdlib | Sufficient for four subcommands with options; maintains minimal runtime dependencies (ADR-001) |
+| Package metadata | importlib.metadata | stdlib | Already used for version/description; no new dependency |
+| HTTP client | requests | PyPI | URL template source resolution (tar.gz/zip download); cleaner API and error handling than urllib.request (ADR-007) |
+| Archive extraction | tarfile / zipfile | stdlib | Extract downloaded template archives for URL sources; no new dependency |
+| Package resources | importlib.resources | stdlib | Read bundled template files from `smith.data` package; no new dependency |
+| File operations | pathlib / shutil / tempfile | stdlib | Atomic writes, directory operations; no new dependency |
+| Metadata storage | — | — | Connection state inferred from `# smith managed` section in `.gitignore`; source metadata stored in section header (e.g., `# smith managed source:agents-smith`); no separate metadata file — stateless design |
+
+**Minimal runtime dependencies** is a deliberate constraint. The only external dependency is `requests` (used for URL template source resolution). The bundled `agents-smith` source reads from packaged files via `importlib.resources` — no network call needed. See ADR-007 for the rationale.
+
+---
+
+## Module Structure
+
+```
+smith/
+  __init__.py              # Package marker
+  __main__.py              # Entry point: python -m smith
+  domain/
+    __init__.py
+    connection.py          # Connection aggregate root
+    value_objects.py        # TemplateSource, GitignoreSection, ConnectionStatus, FileSpec
+    ports.py               # TemplateSourcePort, FileSystemPort, GitignorePort, MetadataPort (Protocols)
+  application/
+    __init__.py
+    connect.py             # ConnectUseCase
+    disconnect.py          # DisconnectUseCase
+    update.py              # UpdateUseCase
+    status.py              # StatusUseCase
+  infrastructure/
+    __init__.py
+    template_source.py     # BundledTemplateSource (importlib.resources), LocalTemplateSource, UrlTemplateSource
+    filesystem.py          # AtomicFileSystem
+    gitignore.py           # GitignoreManager
+    metadata.py            # SectionMetadata
+  delivery/
+    __init__.py
+    cli.py                 # build_parser(), main(), command handlers
+```
+
+**Dependency direction:** `delivery` → `application` → `domain` ← `infrastructure`
+
+The domain layer has **zero** imports from application, infrastructure, or delivery. The application layer imports from domain only. Infrastructure implements domain ports. Delivery calls application use cases.
+
+**Rationale:** This structure enforces the hexagonal boundary. The Connection aggregate enforces invariants (atomicity, safety, clean separation) without knowing whether files are written to a real filesystem or a test double. New template source types (git repos, archives) are added as infrastructure adapters without touching domain or application code.
+
+---
+
+## API Contracts
+
+### `smith connect [--from <path|url>] [--overwrite]`
+
+**Behaviour:** Write all agentic files from the template source to the current project directory. Add a managed `.gitignore` section with source metadata in the section header.
+
+**Request:**
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `--from` | string | No | `agents-smith` | Template source: `agents-smith` (bundled), local path, or URL |
+| `--overwrite` | flag | No | False | Replace existing agentic files without prompting |
+
+**Response (stdout):**
+| Condition | Output | Exit Code |
+|-----------|--------|-----------|
+| Success | `Connected from <source>.` + list of files written | 0 |
+| Error | `Error: <message>` | 1 |
+
+**Preconditions:**
+- Current directory is a project directory (writable)
+- If already connected (`# smith managed` section exists), auto-update managed files
+- User-tracked files (not in `# smith managed` section) are skipped, not reported as conflicts
+- `--overwrite` replaces all managed files; user-tracked files are always preserved
+
+**Postconditions:**
+- All agentic files present in project directory (atomicity)
+- `# smith managed` section added to `.gitignore` with source metadata in header
+- No partial state on failure (atomicity)
+
+---
+
+### `smith disconnect`
+
+**Behaviour:** Remove all smith-managed agentic files from the current project directory. Preserve the `# smith managed` section in `.gitignore` (serves as guard for future usage). Smith is stateless — no metadata file to remove.
+
+**Request:** No parameters.
+
+**Response (stdout):**
+| Condition | Output | Exit Code |
+|-----------|--------|-----------|
+| Success | `Disconnected.` + list of files removed | 0 |
+| Not connected | `Not connected — nothing to disconnect.` | 0 |
+| Error | `Error: <message>` | 1 |
+
+**Preconditions:**
+- Current directory is a project directory (writable)
+
+**Postconditions:**
+- No smith-managed agentic files remain in project directory (clean separation)
+- `# smith managed` section preserved in `.gitignore` (guard for future usage)
+- If `.gitignore` is empty after removal, it is left as an empty file (not deleted)
+
+---
+
+### `smith update`
+
+**Behaviour:** Refresh agentic files in a connected project directory from the original or specified template source. Overwrite all managed agentic files with latest versions. If the project is not connected, auto-connect (same as `smith connect`).
+
+**Request:** Optional `--from <source>` to use a different template source.
+
+**Response (stdout):**
+| Condition | Output | Exit Code |
+|-----------|--------|-----------|
+| Success | `Updated from <source>.` + list of files updated | 0 |
+| Not connected (auto-connect) | Same as `smith connect` | 0 |
+| Error | `Error: <message>` | 1 |
+
+**Preconditions:**
+- Template source must be reachable
+
+**Postconditions:**
+- All agentic files updated to latest from template source
+- Source metadata in `.gitignore` section header updated
+- `.gitignore` managed section patterns updated if changed
+
+---
+
+### `smith status`
+
+**Behaviour:** Report whether the current project directory is connected, which agentic files are present, and which template source was used.
+
+**Request:** No parameters.
+
+**Response (stdout):**
+| Condition | Output | Exit Code |
+|-----------|--------|-----------|
+| Connected | `Connected from <source>.` + list of files with status | 0 |
+| Disconnected | `Not connected.` | 1 |
+| Partial | `Partial connection — some files missing:` + list with status | 1 |
+
+**Preconditions:** None (always succeeds in reporting).
+
+**Postconditions:** None (read-only, no side effects).
+
+---
+
+## Event Contracts
+
+smith is a synchronous CLI tool with no event-driven communication. All operations are request-response within a single process. No event contracts are needed for the current architecture.
+
+If smith evolves to support background operations or daemon mode, event contracts will be defined at that time (YAGNI).
+
+---
+
+## Interface Definitions
+
+### TemplateSourcePort
+
+```python
+from pathlib import Path
+from typing import Protocol
+
+
+class TemplateSourcePort(Protocol):
+    """Port for resolving template files from a source.
+
+    Implementations: BundledTemplateSource, LocalTemplateSource, UrlTemplateSource.
+    The domain defines this interface; infrastructure adapters implement it.
+    """
+
+    def resolve(self) -> list[FileSpec]:
+        """Resolve the template source into a list of file specifications.
+
+        Returns:
+            List of FileSpec objects, each containing a relative path and content.
+
+        Raises:
+            TemplateSourceError: If the source cannot be resolved
+                (not found, network error, invalid archive).
+        """
+        ...
+
+    def gitignore_patterns(self) -> list[str]:
+        """Return gitignore patterns for the managed section.
+
+        Returns:
+            List of gitignore patterns (e.g., ['.opencode/', '.templates/',
+            '.flowr/sessions/']).
+        """
+        ...
+```
+
+### FileSystemPort
+
+```python
+class FileSystemPort(Protocol):
+    """Port for atomic file system operations.
+
+    Implementations: AtomicFileSystem (production), InMemoryFileSystem (tests).
+    The domain defines this interface; infrastructure adapters implement it.
+    """
+
+    def check_conflicts(self, paths: list[Path]) -> list[Path]:
+        """Check which paths already exist in the project directory.
+
+        Args:
+            paths: List of relative paths to check.
+
+        Returns:
+            List of paths that already exist in the project directory.
+        """
+        ...
+
+    def write_atomic(self, specs: list[FileSpec]) -> None:
+        """Write all file specifications atomically to the project directory.
+
+        Either all files are written or none are. On failure, any partially
+        written files are rolled back.
+
+        Args:
+            specs: List of FileSpec objects with relative paths and content.
+
+        Raises:
+            FileSystemError: If any write fails (rolled back to clean state).
+        """
+        ...
+
+    def remove(self, paths: list[Path]) -> None:
+        """Remove files and directories from the project directory.
+
+        Args:
+            paths: List of relative paths to remove.
+
+        Raises:
+            FileSystemError: If any removal fails.
+        """
+        ...
+
+    def exists(self, paths: list[Path]) -> dict[Path, bool]:
+        """Check which paths exist in the project directory.
+
+        Args:
+            paths: List of relative paths to check.
+
+        Returns:
+            Dictionary mapping each path to whether it exists.
+        """
+        ...
+```
+
+### GitignorePort
+
+```python
+class GitignorePort(Protocol):
+    """Port for managing the smith-managed section in .gitignore.
+
+    Implementations: GitignoreManager (production), InMemoryGitignore (tests).
+    The domain defines this interface; infrastructure adapters implement it.
+
+    Connection state is inferred from the managed section in .gitignore,
+    not from a separate metadata file. This port is the primary state mechanism.
+    """
+
+    def add_section(self, patterns: list[str]) -> None:
+        """Add a managed section to .gitignore with the given patterns.
+
+        Creates .gitignore if it does not exist. The section is delimited by
+        '# smith managed' and '# end smith managed' markers.
+
+        Args:
+            patterns: List of gitignore patterns to include.
+        """
+        ...
+
+    def has_section(self) -> bool:
+        """Check whether .gitignore contains a smith-managed section.
+
+        Returns:
+            True if the managed section exists, False otherwise.
+        """
+        ...
+
+    def get_patterns(self) -> list[str]:
+        """Return the gitignore patterns from the managed section.
+
+        Returns:
+            List of gitignore patterns currently in the managed section.
+            Returns an empty list if the section does not exist.
+        """
+        ...
+```
+
+### MetadataPort
+
+```python
+class MetadataPort(Protocol):
+    """Port for reading and writing connection metadata.
+
+    Connection state is inferred from the '# smith managed' section in .gitignore,
+    not from a separate metadata file. This port handles source metadata stored
+    within the gitignore section header (e.g., '# smith managed source:agents-smith').
+
+    Implementations: GitignoreManager (production, dual-implements GitignorePort
+    and MetadataPort), InMemoryGitignore (tests).
+    The domain defines this interface; infrastructure adapters implement it.
+    """
+
+    def save_source(self, source: TemplateSource) -> None:
+        """Write template source metadata to the gitignore section header.
+
+        Args:
+            source: The template source used for the connection.
+        """
+        ...
+
+    def load_source(self) -> TemplateSource | None:
+        """Read template source metadata from the gitignore section header.
+
+        Returns:
+            The stored TemplateSource, or None if not connected.
+        """
+        ...
+```
+
+---
+
+## Value Objects
+
+### FileSpec
+
+```python
+from dataclasses import dataclass
+from pathlib import Path
+
+
+@dataclass(frozen=True)
+class FileSpec:
+    """A file to be written from a template source to a project directory.
+
+    Attributes:
+        relative_path: Path relative to the project root
+            (e.g., 'AGENTS.md', '.opencode/agents/po.md').
+        content: File content as bytes.
+    """
+
+    relative_path: Path
+    content: bytes
+```
+
+### TemplateSource
+
+```python
+from dataclasses import dataclass
+from typing import Literal
+
+
+@dataclass(frozen=True)
+class TemplateSource:
+    """The origin of agentic files.
+
+    Attributes:
+        kind: 'bundled', 'local', or 'url'.
+        location: 'agents-smith' for bundled, absolute path for local, URL for url.
+    """
+
+    kind: Literal["bundled", "local", "url"]
+    location: str
+```
+
+### ConnectionStatus
+
+```python
+from dataclasses import dataclass
+from enum import Enum
+
+
+class ConnectionState(Enum):
+    """Possible states of a project's connection."""
+
+    CONNECTED = "connected"
+    DISCONNECTED = "disconnected"
+    PARTIAL = "partial"
+
+
+@dataclass(frozen=True)
+class ConnectionStatus:
+    """The current state of a project's connection.
+
+    Attributes:
+        state: Whether connected, disconnected, or partial.
+        source: The template source (None if disconnected).
+        present_files: List of agentic file paths that exist.
+        missing_files: List of agentic file paths that are missing.
+    """
+
+    state: ConnectionState
+    source: TemplateSource | None
+    present_files: list[Path]
+    missing_files: list[Path]
+```
+
+### GitignoreSection
+
+```python
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class GitignoreSection:
+    """The managed section in .gitignore.
+
+    Attributes:
+        patterns: List of gitignore patterns (e.g., ['.opencode/', '.templates/']).
+        start_marker: Section start delimiter (default: '# smith managed').
+        end_marker: Section end delimiter (default: '# end smith managed').
+    """
+
+    patterns: list[str]
+    start_marker: str = "# smith managed"
+    end_marker: str = "# end smith managed"
+```
+
+---
+
+## C4 Diagrams
+
+### Context (C4 Level 1)
+
+```mermaid
+graph TB
+    Engineer[Software Engineer / Tech Lead]
+    smith[smith CLI]
+    TS[Template Source]
+    PD[Project Directory]
+
+    Engineer -->|runs commands| smith
+    smith -->|reads templates| TS
+    smith -->|writes/removes agentic files| PD
+    smith -->|manages .gitignore section + source metadata| PD
+```
+
+**Actors:**
+
+| Actor | Description |
+|-------|-------------|
+| Software Engineer | Runs `smith connect` in any project directory to start working with standard AI agent workflows; runs `smith disconnect` when done |
+| Tech Lead | Standardises AI agent configurations across the team's projects by connecting the same template to each one |
+
+**Systems:**
+
+| System | Kind | Description |
+|--------|------|-------------|
+| smith | Internal | CLI tool that connects/disconnects standardised agent configurations to project directories |
+| Template Source | External | Provides agentic files: bundled (agents-smith), local path, or remote URL |
+| Project Directory | External | The target project where agentic files are written/removed |
+
+**Interactions:**
+
+| Interaction | Behaviour | Technology |
+|-------------|-----------|------------|
+| Engineer → smith | Runs CLI commands (connect, disconnect, update, status) | Shell / terminal |
+| smith → Template Source | Reads template files for provisioning | requests (bundled/URL), pathlib (local) |
+| smith → Project Directory | Writes/removes agentic files, manages .gitignore section with source metadata in header | pathlib, shutil, tempfile |
+
+### Container (C4 Level 2)
+
+```mermaid
+graph TB
+    CLI[CLI Delivery Layer<br/>argparse]
+    App[Application Services<br/>Use Case Orchestration]
+    Domain[Domain Layer<br/>Connection Aggregate, Value Objects, Ports]
+    Infra[Infrastructure Adapters<br/>TemplateSource, FileSystem, Gitignore, Metadata]
+    TS[Template Source<br/>External]
+    PD[Project Directory<br/>Filesystem]
+
+    CLI -->|dispatches commands| App
+    App -->|enforces invariants| Domain
+    Infra -.->|implements ports| Domain
+    Infra -->|reads templates| TS
+    Infra -->|writes/removes files| PD
+```
+
+**Boundary: smith**
+
+| Container | Technology | Responsibility |
+|-----------|------------|----------------|
+| CLI Delivery Layer | argparse (stdlib) | Parse CLI arguments, dispatch to use cases, format output |
+| Application Services | Python (pure) | Orchestrate use cases: connect, disconnect, update, status |
+| Domain Layer | Python (pure) | Enforce invariants (atomicity, safety, clean separation, consistency); define ports |
+| Infrastructure Adapters | Python + requests | Implement domain ports: BundledTemplateSource (importlib.resources from smith/data), LocalTemplateSource, UrlTemplateSource (requests + tarfile/zipfile), AtomicFileSystem, GitignoreManager, SectionMetadata |
+
+**Interactions:**
+
+| Interaction | Behaviour |
+|-------------|-----------|
+| CLI → Application Services | Dispatches parsed CLI arguments to the appropriate use case |
+| Application Services → Domain | Delegates invariant enforcement to the Connection aggregate |
+| Infrastructure → Domain | Implements domain port Protocols; dependency arrow points inward |
+| Infrastructure → Template Source | Reads template files: importlib.resources from package data (bundled), filesystem read (local), HTTP download (URL) |
+| Infrastructure → Project Directory | Writes/removes agentic files atomically; manages .gitignore section with source metadata; stateless — no metadata file |
+
+### Component (C4 Level 3) — Domain Layer
+
+```mermaid
+graph TB
+    Connection[Connection<br/>Aggregate Root]
+    TS[TemplateSource<br/>Value Object]
+    GS[GitignoreSection<br/>Value Object]
+    CS[ConnectionStatus<br/>Value Object]
+    FS[FileSpec<br/>Value Object]
+
+    Connection -->|resolves| TS
+    Connection -->|maintains| GS
+    Connection -->|reports| CS
+    Connection -->|manages| FS
+```
+
+### Component (C4 Level 3) — Application Services
+
+```mermaid
+graph TB
+    Connect[ConnectUseCase]
+    Disconnect[DisconnectUseCase]
+    Update[UpdateUseCase]
+    Status[StatusUseCase]
+
+    Connect -->|creates Connection| Connection[Connection Aggregate]
+    Disconnect -->|removes Connection| Connection
+    Update -->|refreshes Connection| Connection
+    Status -->|queries Connection| Connection
+```
+
+### Component (C4 Level 3) — Infrastructure Adapters
+
+```mermaid
+graph TB
+    Bundled[BundledTemplateSource<br/>implements TemplateSourcePort]
+    Local[LocalTemplateSource<br/>implements TemplateSourcePort]
+    Url[UrlTemplateSource<br/>implements TemplateSourcePort]
+    AFS[AtomicFileSystem<br/>implements FileSystemPort]
+    GM[GitignoreManager<br/>implements GitignorePort]
+    YM[SectionMetadata<br/>implements MetadataPort]
+
+    Bundled -->|importlib.resources| DataDir[smith/data/<br/>packaged templates]
+    Local -->|pathlib| LocalFS[Local filesystem<br/>path]
+    Url -->|requests| Remote[Remote URL<br/>HTTP/HTTPS]
+    AFS -->|tempfile + shutil| ProjectFS[Project directory<br/>filesystem]
+    GM -->|pathlib| GitignoreFile[.gitignore<br/>file + source metadata]
+    YM -->|delegates to| GM
+```
+
+---
+
+## Dependencies
+
+| Dependency | What it provides | Why not replaced |
+|------------|------------------|-----------------|
+| `argparse` | CLI argument parsing | Stdlib; sufficient for four subcommands; minimal runtime dependency (ADR-001) |
+| `importlib.metadata` | Package version and description | Stdlib; already used in `__main__.py` |
+| `requests` | HTTP downloads for URL template sources | External; cleaner API and error handling than urllib.request; used for tar.gz/zip archive download from remote URLs (ADR-007) |
+| `importlib.resources` | Read bundled template files from `smith/data` package | Stdlib; no network call needed for the default template source |
+| `pathlib` | Path manipulation | Stdlib; modern Python path handling |
+| `shutil` | File/directory operations (copy, rmtree) | Stdlib; needed for atomic writes and directory removal |
+| `tempfile` | Temporary directory creation for atomic writes | Stdlib; core of the atomicity mechanism (ADR-002) |
+| `tarfile` | Archive extraction for URL template sources | Stdlib; needed for .tar.gz archives |
+| `zipfile` | Archive extraction for URL template sources | Stdlib; needed for .zip archives |
+| `dataclasses` | Value object definitions | Stdlib; frozen dataclasses for immutable value objects |
+| `typing` | Protocol definitions, type hints | Stdlib; `Protocol` for port definitions |
+
+**One runtime dependency beyond Python stdlib:** `requests` is the only external package. This is a deliberate trade-off — `requests` provides significantly better HTTP handling than `urllib.request` for URL template source downloads. The bundled `agents-smith` source reads from packaged files via `importlib.resources` and requires no network call. See ADR-007.
+
+---
+
+## Configuration Keys
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `--from` | string | `agents-smith` | Template source: `agents-smith` (bundled), local path, or URL |
+| `--overwrite` | flag | `False` | Replace existing agentic files without prompting |
+| `smith.managed_section_start` | string | `# smith managed` | Delimiter marking the start of the managed .gitignore section |
+| `smith.managed_section_end` | string | `# end smith managed` | Delimiter marking the end of the managed .gitignore section |
+| `smith.default_template` | string | `agents-smith` | Default template source when `--from` is not specified |
+
+**Note:** Configuration keys with the `smith.` prefix are internal constants, not user-facing configuration. They are defined as module-level constants in the domain layer and are not configurable via environment variables or config files (YAGNI). The only user-facing configuration is the `--from` and `--overwrite` CLI flags.
+
+---
+
+## Atomicity Implementation
+
+The atomicity invariant (all files or none) is implemented using a **temp-directory staging pattern**:
+
+1. **Stage:** Write all agentic files to a temporary directory (via `tempfile.mkdtemp`).
+2. **Validate:** After all writes succeed, check that all expected files exist in the staging area.
+3. **Commit:** Move staged files to their final locations in the project directory. Each file is moved atomically using `os.replace` (atomic on the same filesystem).
+4. **Rollback:** If any step fails, remove the entire staging directory. If some commits have already succeeded, remove the committed files (best-effort rollback).
+
+The `.gitignore` section (with source metadata in the header) is written **after** all agentic files are committed. This ensures that a partial connection never leaves the `.gitignore` section pointing to missing files. There is no separate metadata file — smith is stateless.
+
+**Rollback on disconnect:** The `disconnect` command removes smith-managed agentic files, preserving user-tracked files and the `.gitignore` section. If any removal fails, the command reports the error but continues removing remaining files (best-effort cleanup). The `.gitignore` section is preserved as a guard for future connections.
+
+---
+
+## Safety Implementation
+
+The safety invariant (no silent overwrites of user-tracked files) is implemented as a **pre-write conflict check**:
+
+1. **Auto-update:** If the project is already connected (has `# smith managed` section and all managed files exist), `smith connect` auto-updates smith-managed files without requiring `--overwrite`. This is intentional — smith manages these files.
+2. **Scan:** Before any write to an unconnected or partially-connected project, scan for existing files that conflict with the template.
+3. **Skip:** If user-tracked files are found (files not in the `# smith managed` section), skip them — write only the files that don't conflict. The operation succeeds (exit 0) with the user-tracked files left untouched.
+4. **Overwrite:** If `--overwrite` is set, overwrite all managed files (user-tracked files are still preserved via `_is_path_managed` within `_resolve_specs`).
+
+The conflict check is performed by the `FileSystemPort.check_conflicts()` method, which is called by the `ConnectUseCase` before staging any writes. This keeps the safety check in the application layer (orchestration) while the domain invariant (no silent overwrites) is enforced by the Connection aggregate.
+
+---
+
+## .gitignore Management
+
+The managed section in `.gitignore` uses clear delimiters:
+
+```gitignore
+# smith managed source:agents-smith
+.opencode/
+.templates/
+.flowr/sessions/
+# end smith managed
+```
+
+**On connect:**
+- If `.gitignore` does not exist, create it with the managed section.
+- If `.gitignore` exists but has no managed section, append the managed section.
+- If `.gitignore` exists and has a managed section, auto-update (overwrite managed files, skip user-tracked files).
+
+**On disconnect:**
+- Remove all agentic files tracked in the managed section (AGENTS.md, .opencode/, .templates/, .flowr/).
+- Preserve the `# smith managed` section in .gitignore — its presence serves as a guard for future `smith connect` or `smith update` commands.
+- If an agentic file is NOT tracked in the managed section (user tracks it manually), do NOT remove it.
+
+**On update:**
+- If the gitignore patterns have changed (e.g., template source provides different patterns), replace the managed section with the new patterns.
+
+---
+
+## Template Source Resolution
+
+The `TemplateSourceAdapter` is a facade within `smith/infrastructure/template_source.py` that normalises three source types into a uniform `TemplateSourcePort` interface:
+
+| Source Type | Detection | Resolution |
+|-------------|-----------|------------|
+| Bundled (`agents-smith`) | Default (no `--from` flag) | `importlib.resources` reads agentic files from the `smith.data` package directory; no network call required |
+| Local path | `--from` starts with `/`, `./`, `../`, or is an absolute path | `pathlib.Path` reads files from the local filesystem |
+| Remote URL | `--from` starts with `http://` or `https://` | `requests` downloads the archive; `tarfile` or `zipfile` extracts to a temp directory; agentic file filter applied; temp directory cleaned up after resolution |
+
+**Bundled template resolution (local package):** The default `agents-smith` template source is resolved by reading agentic files directly from the `smith.data` package directory via `importlib.resources`. No network call is required — the files are packaged with smith. The agentic file filter (`_is_agentic_path`) selects only the essential subdirectories: `AGENTS.md`, `.opencode/agents/`, `.opencode/knowledge/`, `.opencode/skills/`, `.opencode/tools/`, `.templates/`, `.flowr/`. This excludes non-essential content like `node_modules/`, `package.json`, and other development artifacts that may exist in the source repository. A manual script (`scripts/update-bundle.sh`) syncs `smith/data/` from the agents-smith `v8_release` branch when a new release is prepared. See ADR-007 for the rationale.
+
+**Local path validation:** The `LocalTemplateSource` adapter validates that the path exists and contains the expected agentic file structure (at minimum, an `AGENTS.md` file). If the path is invalid, it raises `TemplateSourceError`.
+
+**URL download and extraction:** The `UrlTemplateSource` adapter downloads the archive to a temporary directory, extracts it (`.tar.gz` or `.zip`), applies the agentic file filter (only `AGENTS.md`, `.opencode/agents/`, `.opencode/knowledge/`, `.opencode/skills/`, `.opencode/tools/`, `.templates/`, `.flowr/` are included — non-essential content like `node_modules/` is excluded), and returns `FileSpec` objects. The temp directory is cleaned up after the `TemplateSourcePort.resolve()` call completes. No persistent cache is maintained for URL sources — each `resolve()` call re-downloads the archive.
+
+**Network failure handling:** If the URL download fails (network unreachable, HTTP error, timeout), `UrlTemplateSource.resolve()` raises `TemplateSourceError` with a clear message. The bundled source does not require network access — it reads from packaged files and always works offline.
+
+---
+
+## Entry Point Configuration
+
+The `smith` CLI command is configured via `pyproject.toml` console scripts:
+
+```toml
+[project.scripts]
+smith = "smith.delivery.cli:main"
+```
+
+This allows users to run `smith connect` after installing the package, while `python -m smith` continues to work for development.
+
+---
+
+## Changes
+
+| Date | Source | Change | Reason |
+|------|--------|--------|--------|
+| 2026-05-01 | architecture-assessment | Initial technical design | New feature: smith-commands (connect, disconnect, update, status) |
+| 2026-05-01 | IN_20260501_agents-smith-dependency-resolution | Replaced bundled template source (importlib.resources + smith/data/) with GitHub-based download + local cache; added requests dependency; added cache_dir and bundled_archive_url config keys | Bundled template files in smith/data/ were stale copies that would go out of sync; GitHub-based resolution ensures templates are always current |
+| 2026-05-01 | IN_20260501_local-bundle-reversal | Reverted bundled template source to local package (importlib.resources + smith/data/); fully implemented UrlTemplateSource (tar.gz/zip with agentic filter); removed caching; removed cache_dir and bundled_archive_url config keys; deprecated BDD examples a1b2c3d4 and e5f6g7h8; superseded ADR-006 with ADR-007 | GitHub-based resolution introduced runtime network dependency and cache staleness issues; local bundle provides instant offline default experience; UrlTemplateSource handles URL sources independently |
\ No newline at end of file
diff --git a/docs/spec/workflow-design.md b/docs/spec/workflow-design.md
new file mode 100644
index 0000000..db2893d
--- /dev/null
+++ b/docs/spec/workflow-design.md
@@ -0,0 +1,565 @@
+# Development Lifecycle Workflow — Non-Deterministic State Machine Design
+
+## Model Rules
+
+- Each stage is a node or a link to another diagram (sub-flow)
+- Diagrams can contain cycles, but cycles only point to the same level (not parent/child)
+- Each diagram has finite, flat exit points
+- Sub-flows are linked from states that require them
+
+## Design Principles
+
+### Priority Order (conflict resolution)
+
+When two principles conflict, the earlier one wins:
+
+**YAGNI > DRY > KISS > OC > SOLID > Design Patterns**
+
+1. **YAGNI** — Don't build what you don't need yet. If a feature isn't required by a .feature file example, it doesn't exist.
+2. **DRY** — Don't repeat yourself, but only after YAGNI passes. Duplication is better than the wrong abstraction.
+3. **KISS** — Keep it simple, but only after eliminating duplication. The simplest design that passes all .feature examples wins.
+4. **OC** — Object Calisthenics, but only after KISS passes. Structure serves simplicity, not the other way around.
+5. **SOLID** — Apply SOLID principles, but only after OC passes. SOLID is a tool, not a goal.
+6. **Design Patterns** — Use patterns only when simpler approaches don't work. A pattern is justified only when YAGNI, KISS, and OC all point to it.
+
+### Philosophical Principles (from the Zen of Python)
+
+These guide all design decisions. When in doubt, refer to these:
+
+- Beautiful is better than ugly.
+- Explicit is better than implicit.
+- Simple is better than complex.
+- Complex is better than complicated.
+- Flat is better than nested.
+- Sparse is better than dense.
+- Readability counts.
+- Special cases aren't special enough to break the rules.
+- Although practicality beats purity.
+- Errors should never pass silently unless explicitly silenced.
+- In the face of ambiguity, refuse the temptation to guess.
+- There should be one — and preferably only one — obvious way to do it.
+- Now is better than never. Although never is often better than *right now*.
+- If the implementation is hard to explain, it's a bad idea.
+- If the implementation is easy to explain, it may be a good idea.
+
+### Core Workflow Principles
+
+1. **Fail-fast, shift-left** — issues caught early cost 10x less than issues caught late. The Review sub-step is tiered so the most expensive issues (design) are caught before cheaper issues (conventions) are invested in.
+2. **Never invest in Tier 3 work on code that hasn't passed Tier 1** — docstrings, formatting, and conventions are waste on code that may need complete restructuring.
+3. **BDD features are the single thread of truth** — written in Planning, used as test spec in Development, validated in Acceptance.
+4. **Each artifact is a translation of the previous one** — never skip an artifact. Each one is a checkpoint where you can validate alignment with the domain before investing in the next level of detail. If scope is wrong, .features will be wrong. If .features are wrong, tests will test the wrong things. If signatures don't match the domain model, test bodies will couple to wrong structure.
+5. **Architecture must be reviewed before implementation begins** — catching design errors after SE has built everything is 100x more expensive than catching them during architecture. SA's work is reviewed by a separate R hat during Architecture Review & Sign-off, not after Development.
+6. **Technical review happens in Development, not after** — R reviews all three tiers (design, structure, conventions) during Development's Review sub-step. Acceptance (PO) is purely business validation. This eliminates redundancy and catches issues where they're cheapest to fix.
+
+---
+
+## Actors
+
+| Actor | Abbreviation | Responsibility | Documents they own |
+|---|---|---|---|
+| **Product Owner** | PO | Business requirements, scope validation, acceptance sign-off | interview-notes/*.md, product_definition.md, features/<file>.feature |
+| **Domain Expert** | DE | Domain knowledge, ubiquitous language | domain_model.md, glossary.md, event_map |
+| **Software Architect** | SA | Architecture decisions, context mapping, interface contracts | context_map.md, adr/*.md, technical_design.md, system.md, py_stubs, test_stubs |
+| **Software Engineer** | SE | Implementation, test design, code quality | test_bodies, function_bodies, commits |
+| **Reviewer** | R | Independent verification, cannot review own work | Review evidence (categorized by tier), approval records |
+
+**Key principle: You cannot review your own work.** R is a separate hat (not necessarily a separate person). If SA designed the architecture, someone else must wear the R hat for Architecture Review & Sign-off. If SE wrote the code, someone else must wear the R hat for Development's Review sub-step.
+
+In small teams, PO+DE may be one person, and SA+SE may be one person. But the **responsibilities are distinct** — the same person wears different hats. The R hat ensures independent verification at critical gates.
+
+---
+
+## Process Support
+
+The `.opencode/` directory provides the meta-process infrastructure that guides how the flows are executed. It is separate from project artifacts — it is the procedural and reference system, not the work product.
+
+**Entry point**: `AGENTS.md` (project root) is loaded every session. It provides navigation, wikilink resolution, and discovery commands. See `agent-design/principles` for design rationale.
+
+**Discover, don't enumerate**: The number and names of agents, skills, and knowledge files change. AGENTS.md provides discovery commands rather than inventories:
+
+```bash
+ls .opencode/agents/                    # agent identity definitions
+ls .opencode/skills/                    # skill directories (each has SKILL.md)
+find .opencode/knowledge -name '*.md'   # knowledge files
+```
+
+### Agent-Role Mapping
+
+| Agent | Abbreviation | Decides |
+|---|---|---|
+| Product Owner | PO | Scope, priority, acceptance |
+| Domain Expert | DE | Domain model, ubiquitous language |
+| System Architect | SA | Architecture, ADRs, project structure |
+| Reviewer | R | Pass/fail (cannot review own work) |
+
+Each flow state specifies its owner (PO, DE, SA, SE, or R). The owner maps to the agent file in `.opencode/agents/`. Agent files contain identity only (who I am, what I decide) — no skill lists, no routing, no knowledge content.
+
+### Skill Loading
+
+Each flow state loads skills on demand. The flow YAML `skills` field specifies which skill to invoke. Skills are procedural (step-by-step instructions) and are the only files that load knowledge. See `skill-design/principles` for skill structure.
+
+### Knowledge Resolution
+
+Skills reference knowledge via `[[domain/concept]]` wikilinks, resolved to `.opencode/knowledge/{domain}/{concept}.md`. Knowledge files use 4-section progressive disclosure:
+
+| Fragment | Loads | Token Savings |
+|---|---|---|
+| `[[domain/concept#key-takeaways]]` | Frontmatter + Key Takeaways | ~80% |
+| `[[domain/concept#concepts]]` | Frontmatter + Key Takeaways + Concepts | ~65% |
+| `[[domain/concept]]` | Entire file | 0% |
+
+Knowledge domains: `architecture`, `domain-modeling`, `requirements`, `software-craft`, `workflow`, `agent-design`, `skill-design`, `knowledge-design`.
+
+---
+
+## Main Flow: Development Lifecycle
+
+The main flow separates project-level work (done once) from feature-level work (looped per feature).
+
+```
+Discovery → Architecture → Feature Development ←┐
+     ↑            ↑                   │            │
+     │            └ needs_architecture┘            │
+     └ needs_discovery           next-feature ─────┘
+                            completed ──► [Completed]
+                            cancelled ──► [Cancelled]
+```
+
+Terminal exits: **completed** | **cancelled**
+
+| State | Purpose | Sub-flow | Transitions |
+|---|---|---|---|
+| **Discovery** | Domain understanding & scope | → Discovery Flow | `complete` → Architecture |
+| **Architecture** | Architecture & context mapping | → Architecture Flow | `complete` → Feature Development, `needs_discovery` → Discovery |
+| **Feature Development** | Feature-level loop: Planning → Dev → Acceptance → PR | → Feature Development Flow | `next-feature` → Feature Development (loop), `needs_architecture` → Architecture, `cancelled` → Cancelled, `completed` → Completed |
+
+**Why separate project-level and feature-level?** Discovery and Architecture establish the domain model and technical foundation once for the entire project. Feature Development then loops: each feature goes through Planning → Development → Acceptance → PR Creation. When all features are delivered (or none remain), the project completes.
+
+---
+
+## Hotfix Process
+
+Hotfixes use the same Main Flow (Discovery → Architecture → Feature Development) but with constrained scope:
+
+**Scope constraints:**
+- **Discovery**: Focused on root cause analysis of the specific issue
+- **Architecture**: Minimal change that fixes the issue without breaking existing contracts
+- **Planning**: PO decides the specification approach:
+  - *Add new example* to existing .feature file (missing edge case)
+  - *Create new .feature file* (completely new behavior required)
+  - *Fix existing examples* (current specification is wrong)
+- **Development**: Same TDD cycle and Review sub-flow - no shortcuts
+- **Acceptance**: Same acceptance process - PO verifies business behavior
+- **PR Creation**: Same PR process
+
+**Key principle: Quality gates remain the same.** Speed comes from smaller scope and focused specification changes, not skipped steps. A hotfix that breaks architecture or introduces technical debt creates bigger problems than the original issue.
+
+---
+
+## Test Body Design Pattern (cross-cutting)
+
+Every test body across all levels follows **Given/When/Then maps to Arrange/Act/Assert** — but the scope of what's under test differs:
+
+| Test Level | Scope | "Given" sets up | "When" triggers | "Then" asserts |
+|---|---|---|---|---|
+| **Unit** | Single domain object | Value objects, primitives | A method/command on one object | State changes, return values, exceptions |
+| **Integration** | Aggregate + persistence | Aggregate via repository, test DB | A command through the aggregate root | Events emitted, state persisted, invariants held |
+| **Acceptance (BDD)** | Full bounded context | Application service, test doubles | A use case through the API/entry point | End-to-end behavior matching BDD examples |
+
+---
+
+## Discovery Flow — DDD Strategic Phase
+
+```
+Stakeholder Interview ──► Event Storming ──► Language Definition ──► Domain Modeling ──► Scope Boundary
+         │          ↑            ↑                  │                      │          ↑
+         │          │            └ needs_restorming─┘                      │          │
+         ├── needs_full_discovery┘                                              │          │
+         ├── needs_scope_only ──────────────────────────────────────────────┘          │
+         ├── already_known ──► [complete]                                             │
+         │                                                                           │
+         └── needs_reinterview ◄─────────────── Domain Modeling ─────────────────────┘
+```
+
+| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts |
+|---|---|---|---|---|
+| **Stakeholder Interview** | PO | — | — | interview-notes/*.md#pain_points, interview-notes/*.md#business_goals, interview-notes/*.md#terms_to_define, interview-notes/*.md#quality_attributes |
+| **Event Storming** | DE | interview-notes/*.md#pain_points, interview-notes/*.md#business_goals, interview-notes/*.md#terms_to_define | — | domain_model.md#event_map, domain_model.md#context_candidates, domain_model.md#aggregate_candidates |
+| **Language Definition** | DE | interview-notes/*.md#terms_to_define, domain_model.md#event_map | — | glossary.md |
+| **Domain Modeling** | DE | glossary.md, domain_model.md#event_map, domain_model.md#aggregate_candidates, domain_model.md#context_candidates | domain_model.md#bounded_contexts, domain_model.md#entities, domain_model.md#relationships, domain_model.md#aggregate_boundaries, domain_model.md#summary | — |
+| **Scope Boundary** | PO | domain_model.md#bounded_contexts, domain_model.md#aggregate_boundaries, domain_model.md#context_candidates, domain_model.md#summary, glossary.md | — | product_definition.md#what_is, product_definition.md#what_is_not, product_definition.md#why, product_definition.md#users, product_definition.md#out_of_scope, product_definition.md#delivery_order, product_definition.md#quality_attributes, product_definition.md#deployment |
+
+**Routing from Stakeholder Interview:**
+- `needs_full_discovery` → Event Storming (new domain/concept)
+- `needs_scope_only` → Scope Boundary (domain understood, scope new work)
+- `already_known` → complete (no discovery needed)
+
+**Iteration loops:**
+- Event Storming → `needs_reinterview` → Stakeholder Interview (workshop reveals gaps)
+- Language Definition → `needs_restorming` → Event Storming (language contradicts event map)
+- Domain Modeling → `contradiction_found` → Language Definition (model contradicts language)
+- Domain Modeling → `needs_reinterview` → Stakeholder Interview (model reveals missing domain knowledge)
+- Scope Boundary → `needs_reinterview` → Stakeholder Interview (scope questions reveal missing requirements)
+
+**Why this order?** Event Storming (Brandolini) is an exploratory technique that surfaces domain events, commands, and aggregate *candidates* — it comes before formal modeling. Language Definition formalizes the ubiquitous language from interviews + event storming terms — it comes before the domain model because the model is *expressed in* the ubiquitous language. Domain Modeling then formalizes the candidates into proper entities, invariants, and aggregate boundaries using glossary terms.
+
+**domain_model.md is an evolving document:** Event Storming fills the Event Map, Aggregate Candidates, and Context Candidates sections (workshop draft). Domain Modeling then formalizes these into the Bounded Contexts, Entities, Relationships, and Aggregate Boundaries sections. Both steps edit the same document — no separate event storming artifact needed.
+
+**Carried forward to Architecture Flow:** glossary.md, domain_model.md, product_definition.md
+
+---
+
+## Architecture Flow — DDD Tactical + Technical Design
+
+```
+Architecture Assessment ──► no_architecture_needed ──► [complete] (when architecture_exists)
+      │
+      ├── needs_context_update ──► Context Mapping ──► Technical Design ──┐
+      │                               │                                    │
+      │                               └── needs_discovery                  │
+      ├── needs_technical_design ──────────────────────────────────────►  │
+      │                                         │ needs_decisions        │
+      │                                         └──► ADR Draft ─────────►│
+      │                                                                  ▼
+      └── needs_discovery ──► [needs_discovery]           Review & Sign-off
+                                                               │
+                                                               └── inconsistent ──► Architecture Assessment
+```
+
+| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts |
+|---|---|---|---|---|
+| **Architecture Assessment** | SA | product_definition.md#what_is, product_definition.md#delivery_order, product_definition.md#deployment, product_definition.md#quality_attributes, domain_model.md#bounded_contexts, domain_model.md#summary, system.md, technical_design.md, context_map.md | product_definition.md#deployment* | — |
+| **Context Mapping** | SA | domain_model.md#bounded_contexts, domain_model.md#context_candidates, product_definition.md#what_is, product_definition.md#what_is_not, product_definition.md#out_of_scope, glossary.md | — | context_map.md#context_relationships, context_map.md#context_map_diagram, context_map.md#integration_points, context_map.md#anti_corruption_layers |
+| **Technical Design** | SA | context_map.md#context_relationships, context_map.md#integration_points, context_map.md#anti_corruption_layers, domain_model.md#entities, domain_model.md#relationships, domain_model.md#aggregate_boundaries, glossary.md, system.md, product_definition.md#what_is, product_definition.md#what_is_not, product_definition.md#out_of_scope, product_definition.md#deployment, product_definition.md#quality_attributes | technical_design.md#architectural_style, technical_design.md#quality_attributes, technical_design.md#stack, technical_design.md#module_structure, technical_design.md#api_contracts, technical_design.md#event_contracts, technical_design.md#interface_definitions, technical_design.md#c4_diagrams, technical_design.md#dependencies, technical_design.md#configuration_keys, system.md#context, system.md#container, system.md#module_structure, system.md#delivery | — |
+| **ADR Draft** | SA | technical_design.md#architectural_style, technical_design.md#quality_attributes, technical_design.md#stack, technical_design.md#module_structure, context_map.md#context_relationships, domain_model.md#bounded_contexts, domain_model.md#aggregate_boundaries, product_definition.md#what_is, product_definition.md#quality_attributes, glossary.md, system.md | system.md#key_decisions, system.md#active_constraints | adr/*.md |
+| **Review & Sign-off** | R | context_map.md, technical_design.md, system.md, adr/*.md†, product_definition.md#what_is, product_definition.md#what_is_not, product_definition.md#quality_attributes, domain_model.md#bounded_contexts, domain_model.md#aggregate_boundaries, glossary.md | — | — |
+
+**Routing from Architecture Assessment:**
+- `no_architecture_needed` → complete (when `architecture_exists`: system_md, technical_design_md, context_map_md all exist — feature fits existing architecture)
+- `needs_technical_design` → Technical Design (new API contracts, modules, or interfaces)
+- `needs_context_update` → Context Mapping (when `architecture_exists` — bounded context boundaries change, but base architecture exists)
+- `needs_discovery` → needs_discovery exit (domain model insufficient)
+
+**Routing from Context Mapping:**
+- `done` → Technical Design (context boundaries updated, contracts must be verified)
+- `needs_discovery` → needs_discovery exit (bounded contexts in domain_model.md don't hold up under mapping)
+
+**Routing from Technical Design:**
+- `done` → Review & Sign-off (no significant decisions needed)
+- `needs_decisions` → ADR Draft (architecturally significant choice required)
+
+**Why assessment first?** Most features fit the existing architecture. Forcing context mapping and technical design for every feature is wasteful. SA assesses the feature against existing architecture and only does the work that's needed. This also gives SA a chance to interview the stakeholder about technical constraints (deployment target, infrastructure preferences) before making architectural decisions.
+
+**First-run safety (architecture_exists guard):** The `no_architecture_needed` and `needs_context_update` routes are guarded by the `architecture_exists` condition, which checks that system.md, technical_design.md, and context_map.md all exist. This prevents accidentally skipping architecture on a project's first feature where these artifacts don't yet exist.
+
+**Why is ADR conditional?** ADRs record architecturally significant decisions — most features don't involve such decisions. Forcing an ADR per feature creates noise. When SA discovers a decision is needed during technical design, they route to ADR Draft. Otherwise they skip it.
+
+**Why does ADR Draft edit system.md?** system.md is the living reference for the current system state. ADR summaries (key decisions) and risk constraints (active constraints) belong there so that R can verify implementation against them during Review Gate, and so that future SA assessments have a concise summary of architectural decisions without reading every ADR.
+
+**Dual ownership of product_definition.md#deployment\***: PO sets an initial deployment preference during Discovery (Scope Boundary). SA may override it during Architecture Assessment when technical constraints demand a different mechanism. SA has final say — deployment mechanism is an architectural decision, not a business preference.
+
+**Routing from Review & Sign-off:**
+- `approved` → complete (all documents consistent and aligned)
+- `inconsistent` → Architecture Assessment (documents contradict each other — SA must re-examine)
+- `needs_discovery` → needs_discovery exit (domain model insufficient)
+
+**Reconciliation (explicit in review-signoff):** R verifies cross-document consistency before approving:
+- technical_design.md ↔ domain_model.md (module structure matches bounded contexts; API contracts match entities)
+- technical_design.md ↔ product_definition.md (out-of-scope items not in design; quality attributes addressed)
+- technical_design.md ↔ glossary.md (terms in contracts match ubiquitous language)
+- context_map.md ↔ domain_model.md (integration points match context boundaries)
+- adr/*.md ↔ technical_design.md (ADRs consistent with actual design)
+
+**Conditional input †**: Review & Sign-off lists `adr/*.md` as input, but ADRs may not exist when Technical Design routes directly to Review & Sign-off (no `needs_decisions`). R reads whatever ADRs exist — zero ADRs is valid.
+
+**needs_discovery from different sources**: Both Assessment and Context Mapping can exit with `needs_discovery`. Assessment triggers it when the domain model is insufficient to make architectural decisions. Context Mapping triggers it when bounded contexts don't hold up under relationship analysis. Review & Sign-off can also trigger `needs_discovery` when R finds architectural problems that stem from flawed discovery. All three route back to the full Discovery cycle — this is deliberate over-correction: partial discovery rework risks reintroducing the same gaps.
+
+**inconsistent from Review & Sign-off**: When R finds that the architecture documents contradict each other (e.g., technical design uses terms not in the glossary, or context map doesn't align with domain model boundaries), the flow routes back to Architecture Assessment rather than Discovery. The domain model may be fine — the problem is that the architecture doesn't consistently translate it.
+
+**Carried forward to Feature Development Flow:** context_map.md, adr/*.md, technical_design.md, system.md
+
+---
+
+## Planning Flow — BDD Story Definition
+
+```
+Feature Selection → Feature Specification → Feature Breakdown → BDD Features → Definition of Done → Ready
+        │                    │                      ↑                  ↑
+        │                    └ needs_architecture───┘                  │
+        │                                          └ needs_respecification─┘
+        │
+        └ no_features → [completed]
+```
+
+| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts |
+|---|---|---|---|---|
+| **Feature Selection** | PO | product_definition.md#what_is, product_definition.md#why, product_definition.md#delivery_order, technical_design.md#feature, technical_design.md#module_structure | — | — |
+| **Feature Specification** | PO | product_definition.md#what_is, product_definition.md#users, product_definition.md#quality_attributes, product_definition.md#out_of_scope, domain_model.md#bounded_contexts, domain_model.md#entities, domain_model.md#aggregate_boundaries, glossary.md, technical_design.md#api_contracts, technical_design.md#feature | — | interview-notes/*.md |
+| **Feature Breakdown** | PO | product_definition.md#what_is, product_definition.md#why, product_definition.md#users, product_definition.md#delivery_order, technical_design.md#feature, technical_design.md#module_structure, interview-notes/*.md | — | feature_list |
+| **BDD Features** | PO | feature_list, product_definition.md#what_is, product_definition.md#users, product_definition.md#quality_attributes, domain_model.md#entities, domain_model.md#aggregate_boundaries, glossary.md | — | features/<file>.feature |
+| **Definition of Done** | PO | features/<file>.feature, product_definition.md#quality_attributes | product_definition.md#definition_of_done | — |
+| **Ready** | PO | features/<file>.feature, product_definition.md#definition_of_done | — | — |
+
+**Exits:** `complete` → Development, `needs_architecture` → Architecture, `no_features` → Completed (no more features to develop)
+
+**Why feature selection first?** Before planning a feature, PO must verify that the architecture covers it. If technical_design.md doesn't address the feature, Planning routes back to Architecture rather than proceeding with incomplete design.
+
+**Why feature specification?** The initial stakeholder interview in Discovery covers domain understanding at scope level, not feature-level behavioral detail. Feature Specification is a targeted conversation about one feature's concrete behavior — behavioral rules, scenarios, and acceptance criteria — informed by domain constraints (domain_model.md, glossary.md) and technical contracts (technical_design.md#api_contracts).
+
+**Key design principle:** BDD features are the **contract between Planning and Development**. Each example becomes:
+1. A test specification (test body design)
+2. The acceptance criteria (Acceptance validates against them)
+
+Feature Specification fills the gap between Discovery's scope-level interview and feature-level behavioral detail. Feature Breakdown then decomposes the specified feature into stories. BDD features are written using both the breakdown and the specification interview notes.
+
+**Feature file convention:** Flows work on one feature at a time. Artifact references use `features/<file>.feature` (singular placeholder), not `features/*.feature` (glob). The flow engine processes a single feature per cycle through the Feature Development loop.
+
+**Iteration loops:** Feature Breakdown and BDD Features can route back to Feature Specification via `needs_respecification` when decomposition reveals that the specification was incomplete or inconsistent.
+
+**Carried forward to Development Flow:** features/<file>.feature, product_definition.md (with DoD), feature_list, interview-notes/*.md
+
+**Project convention (not per-feature):** Branch naming convention, PR template, merge policy are established once at project start and referenced from product_definition.md, not repeated each planning cycle. Trunk-based: short-lived feature branches from trunk, PR before merge.
+
+---
+
+## Development Flow — TDD Implementation
+
+```
+Project Structuring → [TDD Cycle Flow] → [Review Gate Flow] → Commit
+         ↑                    │                    │
+         └── blocked ─────────┘                    │
+         │                                         │
+         └── needs_planning                        │
+                                                   │
+                              fail ────────────────┘
+```
+
+### Project Structuring (owned by SA)
+
+| Step | What gets created | Source artifact | Output Artifacts |
+|---|---|---|---|
+| Package/module directories | Folder structure matching bounded context design | technical_design.md#module_structure | git_branch |
+| `.py` stubs/signatures | Class names, typed attributes, method signatures, interfaces — **NO behavior** | domain_model.md#entities + domain_model.md#relationships + domain_model.md#bounded_contexts + glossary.md + technical_design.md#api_contracts + technical_design.md#interface_definitions + technical_design.md#dependencies + technical_design.md#configuration_keys + context_map.md#context_relationships + context_map.md#integration_points + context_map.md#anti_corruption_layers + adr/*.md + product_definition.md#quality_attributes | py_stubs |
+| Test class stubs | One test file per `.feature` file, example function names as placeholders — **no fixtures/assertions** | features/<file>.feature | test_stubs |
+
+**Why signatures before tests?** The `.py` stubs consolidate domain ideas into code structure. Test stubs then map `.feature` examples onto that structure. If signatures are wrong (don't match the domain), test bodies will couple to wrong abstractions — and refactoring both tests and implementation is expensive. Signatures are cheap to change; coupled tests are not.
+
+### TDD Cycle Flow (separate flow, owned by SE)
+
+| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts |
+|---|---|---|---|---|
+| **RED** | SE | test_stubs, py_stubs | — | test_bodies |
+| **GREEN** | SE | test_bodies, py_stubs | — | function_bodies |
+| **REFACTOR** | SE | function_bodies, test_bodies | function_bodies | refactored_code |
+
+**Exits:** `all_green` → Review Gate, `blocked` → Project Structuring
+
+### Review Gate Flow (separate flow, owned by R)
+
+| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts |
+|---|---|---|---|---|
+| **Design Review** | R | domain_model.md#bounded_contexts, domain_model.md#entities, domain_model.md#aggregate_boundaries, glossary.md, technical_design.md#module_structure, technical_design.md#api_contracts, technical_design.md#event_contracts, context_map.md#context_relationships, system.md, product_definition.md#quality_attributes, adr/*.md, refactored_code | — | design_review_evidence |
+| **Structure Review** | R | coverage_reports, test_output, refactored_code, features/<file>.feature, domain_model.md#entities, domain_model.md#aggregate_boundaries, glossary.md | — | structure_review_evidence |
+| **Conventions Review** | R | linter_output, refactored_code, product_definition.md#project_conventions, glossary.md | — | conventions_review_evidence |
+
+**Exits:** `pass` → Commit, `fail` → TDD Cycle
+
+| Tier | Name | What R checks | Evidence sources | Fail routes SE to |
+|---|---|---|---|---|
+| **1** | **Design** | Domain alignment, DDD patterns, ubiquitous language, architecture compliance, priority order (YAGNI → DRY → KISS → OC → SOLID → Design Patterns) | R's judgment, domain_model.md#bounded_contexts, domain_model.md#entities, glossary.md, technical_design.md#module_structure, technical_design.md#api_contracts, context_map.md#context_relationships, system.md (key decisions + active constraints) | → REFACTOR (design is wrong — do not polish) |
+| **2** | **Structure** | Test coverage, test coupling, BDD examples pass, missing test cases, behavior vs structure testing | Coverage reports, test runner output, R's judgment | → TDD Cycle (tests need work) |
+| **3** | **Conventions** | Formatting, docstrings, type hints, import ordering, lint rules unrelated to design | Linter/formatter output, R's judgment | → quick surface fix |
+
+**Why this order?** If Tier 1 fails, the design is wrong and will be restructured. Writing docstrings (Tier 3) for code that will be rewritten is pure waste. If Tier 2 fails, behavior is broken — no point formatting broken code. Tier 3 is cheap to fix but only worth it when design and behavior are stable.
+
+**Key principle:** R uses automated tools as **evidence**, not as a replacement for judgment. A linter passing doesn't mean R approves the structure. R might say "tests pass but they're testing implementation details, not behavior" — that's a Tier 2 judgment automation can't make.
+
+### Commit (owned by SE)
+
+| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts |
+|---|---|---|---|---|
+| **Commit** | SE | test_bodies, function_bodies, review_gate_evidence, features/<file>.feature | — | commits |
+
+**Carried forward to Acceptance:** Feature branch (with all commits), test results, coverage report, example traceability
+
+---
+
+## Feature Development Flow — Feature-Level Loop
+
+After Architecture completes, the project enters the feature development loop. Each feature goes through Planning → Development → Acceptance → PR Creation. After a feature is merged, the loop starts again for the next feature. Post-mortem routes back to Planning (most common root cause: specification issues), with an escalation path to Architecture when needed.
+
+```
+Planning ──► Development ──► Acceptance ──► PR Creation ──► [next-feature]
+    │                          │      │          │
+    │                          │      └ rejected─┤
+    │                          │                 │
+    └ needs_architecture       └ rejected  Post-mortem ──► Planning (replan)
+    └ no_features ──► [completed]                    ├──► [needs_architecture]
+                                                    └──► [cancelled]
+```
+
+**Exits:** `next-feature` → loop again, `needs_architecture` → Architecture (parent), `cancelled` → Cancelled (parent), `completed` → Completed (parent)
+
+### Acceptance (owned by PO)
+
+Technical review (design, structure, conventions) already happened in Development Flow's Review sub-step (owned by R). Acceptance is purely business validation by PO.
+
+| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts |
+|---|---|---|---|---|
+| **Acceptance** | PO | features/<file>.feature, product_definition.md#quality_attributes, product_definition.md#definition_of_done | — | acceptance_evidence, approval_record |
+
+**Transitions:** `approved` → PR Creation, `rejected` → Post-mortem
+
+**Why no technical review here?** R already reviewed all three tiers (design, structure, conventions) during Development's Review sub-step. Acceptance is PO's domain: did we build the *right thing*, not did we build the *thing right*.
+
+### PR Creation (owned by SE)
+
+| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts |
+|---|---|---|---|---|
+| **PR Creation** | SE | commits, approval_record, features/<file>.feature | — | pull_request |
+
+**Transitions:** `merged` → next-feature (when `ci_passes=true` + `no_changes_requested=true`), `rejected` → Post-mortem
+
+---
+
+## Post-mortem Flow — Failure Analysis
+
+```
+Root Cause Analysis ──► Document Findings ──► Extract Lessons ──► Action Items ──► Complete (→ Planning)
+         │                                                                     │
+         └── no_issues_found ──► No Action                                     ├──► needs_architecture (→ Architecture)
+                                                                               └──► No Action (→ Cancelled)
+```
+
+| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts |
+|---|---|---|---|---|
+| **Root Cause Analysis** | R | — | — | root_cause |
+| **Document Findings** | R | root_cause | — | post-mortem/PM_YYYYMMDD_<slug>.md#failed_at, post-mortem/PM_YYYYMMDD_<slug>.md#root_cause, post-mortem/PM_YYYYMMDD_<slug>.md#missed_gate |
+| **Extract Lessons** | R | post-mortem/PM_YYYYMMDD_<slug>.md#root_cause, post-mortem/PM_YYYYMMDD_<slug>.md#missed_gate | post-mortem/PM_YYYYMMDD_<slug>.md#fix | — |
+| **Action Items** | R | post-mortem/PM_YYYYMMDD_<slug>.md#fix | post-mortem/PM_YYYYMMDD_<slug>.md#restart_check | — |
+
+**Exits:** `complete` → Planning (replan), `needs_architecture` → Architecture (architectural root cause), `no_action` → Cancelled
+
+**Why route to Planning, not Architecture?** Most PR rejections are specification problems — the feature didn't match what was intended, or scenarios were incomplete. Routing through Architecture every time wastes a cycle for the common case. When the root cause is architectural (wrong bounded context boundaries, wrong technical design), the `needs_architecture` exit escalates to the parent flow.
+
+---
+
+## Document Registry — Complete Artifact List
+
+### Living Documents (maintained throughout project)
+
+| Document | Path | Owner | When Changed | Purpose |
+|---|---|---|---|---|
+| **interview-notes/*.md** | `docs/interview-notes/IN_YYYYMMDD_<slug>.md` | PO | Append-only per session (Discovery + Feature Specification) | Raw stakeholder Q&A, reconstruction source |
+| **product_definition.md** | `docs/product_definition.md` | PO (SA overrides #deployment) | When scope changes | IS/IS NOT boundaries, out of scope, users, project conventions |
+| **glossary.md** | `docs/glossary.md` | DE | When domain terms emerge or change | Ubiquitous language dictionary |
+| **domain_model.md** | `docs/domain_model.md` | DE | When domain understanding evolves | Event map, aggregate/context candidates, bounded contexts, entities, relationships, aggregate boundaries. Evolving: Event Storming fills candidates, Domain Modeling formalizes them |
+| **context_map.md** | `docs/context_map.md` | SA | When contexts or relationships change | DDD relationships: upstream/downstream, anti-corruption layers |
+| **system.md** | `docs/system.md` | SA | When domain understanding changes (rare) | C4 context/container diagrams, module structure, domain model documentation |
+| **technical_design.md** | `docs/technical_design.md` | SA | When stack/contracts change | Stack choices, API/event contracts, interface definitions |
+| **adr/*.md** | `docs/adr/ADR_YYYYMMDD_<slug>.md` | SA | New decisions or status changes | Architecture decisions with risk assessment |
+| **features/*.feature** | `docs/features/feature-name/feature-name.feature` | PO | When requirements change | BDD features in Gherkin format — the single thread of truth (flows process one `<file>.feature` at a time) |
+
+### Intermediate Documents (produced, consumed, then archived)
+
+| Document | Path | Owner | Purpose |
+|---|---|---|---|
+| **Feature list** | Directory structure of `docs/features/` | PO | Decomposed into .feature files, then becomes reference |
+
+### Transient Artifacts (not maintained as documents)
+
+| Artifact | Location | Purpose |
+|---|---|---|
+| **py_stubs** | `src/**/*.py` | Class signatures, typed attributes, interfaces — NO behavior |
+| **test_stubs** | `tests/**/*.py` | Example function names as placeholders — no fixtures/assertions |
+| **test_bodies** | `tests/**/*.py` | Executable specification |
+| **function_bodies** | `src/**/*.py` | Production code |
+| **CI pipeline results** | CI logs | Per-run output |
+| **Coverage reports** | CI artifacts | Per-run metrics |
+
+### Meta-Process Documents
+
+| Document | Path | Owner | When Created | Purpose |
+|---|---|---|---|---|
+| **post-mortem/*.md** | `docs/post-mortem/PM_YYYYMMDD_<slug>.md` | R | When PR is rejected | Root cause analysis, lessons, action items |
+
+### Process Support Files (`.opencode/`)
+
+These files are the meta-process infrastructure, not project artifacts. They guide how the flows are executed.
+
+| Type | Path | Loaded When | Purpose |
+|---|---|---|---|
+| **Navigation** | `AGENTS.md` (project root) | Every session | Where files live, wikilink resolution, discovery commands |
+| **Agent identity** | `.opencode/agents/{role}.md` | When role invoked | Who I am, what I decide |
+| **Skill procedure** | `.opencode/skills/{skill}/SKILL.md` | On demand | Step-by-step instructions for a flow state |
+| **Knowledge reference** | `.opencode/knowledge/{domain}/{concept}.md` | On demand, via wikilinks | What and why (progressive disclosure) |
+| **Research notes** | `docs/research/{domain}/{concept}.md` | When knowledge file references them | Source material cited by knowledge files |
+
+### File Structure Convention
+
+- **Folders**: kebab-case (`interview-notes/`, `post-mortem/`)
+- **Documents**: snake_case (`domain_model.md`, `product_definition.md`)
+- **Features**: kebab-case folder + matching filename (`display-version/display-version.feature`)
+- **Agents**: `.opencode/agents/{role}.md`
+- **Skills**: `.opencode/skills/{skill}/SKILL.md`
+- **Knowledge**: `.opencode/knowledge/{domain}/{concept}.md`
+- **Research**: `docs/research/{domain}/{concept}.md`
+- **ADRs**: `docs/adr/ADR_YYYYMMDD_{slug}.md`
+
+---
+
+## Consolidation Summary — What Flows Where
+
+```
+Discovery ──► interview-notes/*.md
+           ──► domain_model.md#event_map + domain_model.md#context_candidates + domain_model.md#aggregate_candidates (from Event Storming)
+           ──► glossary.md (from Language Definition, using interview-notes + domain_model.md#event_map)
+           ──► domain_model.md#bounded_contexts + #entities + #relationships + #aggregate_boundaries (formalized by Domain Modeling)
+           ──► product_definition.md#what_is + #what_is_not + #why + #users + #out_of_scope + #delivery_order + #quality_attributes + #deployment (from Scope Boundary)
+                    │
+                    ▼
+Architecture ──► [Assessment: SA interviews stakeholder + decides routing; when guards prevent skipping on first run]
+             ──► (no_architecture_needed, when architecture_exists) → skip to Feature Development
+             ──► context_map.md#context_relationships + #context_map_diagram + #integration_points + #anti_corruption_layers (if context boundaries change)
+              ──► technical_design.md#architectural_style + #quality_attributes + #stack + #module_structure + #api_contracts + #event_contracts + #interface_definitions + #c4_diagrams + #dependencies + #configuration_keys (edited by Technical Design)
+              ──► system.md#context + #container + #module_structure + #delivery (edited by Technical Design)
+             ──► adr/*.md (conditional — only when architecturally significant decision required)
+             ──► system.md#key_decisions + #active_constraints (edited by ADR Draft)
+                    │
+                    ▼
+Feature Development ──► [Feature-level loop: Planning → Development → Acceptance → PR Creation per feature]
+                    │
+                    ├── Planning ──► [Feature Selection: PO picks next feature, verifies architecture coverage; routes needs_architecture if gap found]
+                    │             ──► interview-notes/*.md (from Feature Specification, using domain_model + glossary + technical_design#api_contracts)
+                    │             ──► feature_list (from Feature Breakdown, using product_definition.md#what_is + #why + #users + #delivery_order + interview-notes/*.md)
+                    │             ──► features/<file>.feature (from BDD Features, using feature_list + product_definition.md#what_is + #users + product_definition.md#quality_attributes + domain_model.md#entities + #aggregate_boundaries + glossary.md)
+                    │             ──► product_definition.md#definition_of_done (edited by Definition of Done)
+                    │             ──► no_features → Completed (project done)
+                    │                    │
+                    │                    ▼  (BDD features → test specifications AND acceptance criteria)
+                    ├── Development ──► py_stubs + test_stubs + git_branch (from Project Structuring)
+                    │              ──► [TDD Cycle Flow]: test_bodies (RED) → function_bodies (GREEN) → refactored_code (REFACTOR)
+                    │              ──► [Review Gate Flow]: design_review_evidence → structure_review_evidence → conventions_review_evidence
+                    │              ──► commits (from Commit)
+                    │                    │
+                    │                    ▼
+                    ├── Acceptance ──► acceptance_evidence + approval_record (PO validates against BDD scenarios + quality attributes)
+                    │                    │
+                    │                    ▼
+                    ├── PR Creation ──► merged (when ci_passes + no_changes_requested) → next-feature (loop)
+                    │              ──► rejected → Post-mortem
+                    │                    │
+                    │                    ▼ (if rejected)
+                    └── Post-mortem ──► root_cause → post-mortem#failed_at + #root_cause + #missed_gate → #fix → #restart_check
+                                   ──► complete → Planning (replan — most common: specification issues)
+                                   ──► needs_architecture → Architecture (architectural root cause)
+                                   ──► no_action → Cancelled
+```
+
+The **BDD feature is the single thread of truth** — written in Planning (PO), used as test spec in TDD Cycle Flow (RED), validated in Acceptance (PO), and traced in PR Creation (release notes reference which examples were delivered).
+
+The **Review Gate Flow ensures design issues are caught before conventions investment** — fail-fast, shift-left, tier by tier (Design → Structure → Conventions). Never invest in Tier 3 work on code that hasn't passed Tier 1. R reviews ALL three tiers and reports categorized findings to SE.
+
+The **artifact chain ensures each translation is validated before the next level of detail is invested** — scope → features → signatures → test stubs → test bodies → function bodies. The Review Gate Flow checks these artifacts, not creates new ones.
+
+The **"cannot review own work" principle prevents conflicts of interest** — Architecture Review & Sign-off (R verifies SA's architecture before implementation), Review Gate Flow (R verifies SE's implementation across all three tiers, cannot be same person as SE). Acceptance (PO) is purely business validation, not technical review.
diff --git a/pyproject.toml b/pyproject.toml
index 75614c3..09a03aa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "agents-smith"
-version = "0.1.20260421"
-description = "A project for people to pair program with AI, the right way."
+version = "0.1.0"
+description = "AI-assisted software delivery system with flow-based agent orchestration"
 readme = "README.md"
 requires-python = ">=3.13"
 license = { file = "LICENSE" }
@@ -11,13 +11,13 @@ authors = [
 maintainers = [
     { name = "eol", email = "nullhack@users.noreply.github.com" }
 ]
-classifiers = [
-    "Development Status :: 4 - Beta",
-]
 dependencies = [
-    "fire>=0.7.1",
+    "requests>=2.32",
 ]
 
+[project.scripts]
+smith = "smith.__main__:main"
+
 [project.urls]
 Repository = "https://github.com/nullhack/agents-smith"
 Documentation = "https://github.com/nullhack/agents-smith/tree/main/docs/api/"
@@ -26,7 +26,6 @@ Documentation = "https://github.com/nullhack/agents-smith/tree/main/docs/api/"
 dev = [
     "pdoc>=14.0",
     "pytest>=9.0.3",
-    "pytest-beehave[html]>=3.3,<4",
     "pytest-cov>=6.1.1",
     "pytest-mock>=3.14.0",
     "ruff>=0.11.5",
@@ -34,13 +33,14 @@ dev = [
     "hypothesis>=6.148.4",
     "pyright>=1.1.407",
     "ghp-import>=2.1.0",
+    "flowr>=0.3",
+    "gherkin-official>=39.0.0",
+    "safety>=3.7.0",
 ]
 
-[tool.uv]
-package = true
-
 [tool.setuptools]
 packages = ["smith"]
+package-data = { "smith.data" = ["**/*"] }
 
 [tool.ruff.lint]
 ignore = []
@@ -79,7 +79,8 @@ mccabe.max-complexity = 10
 pydocstyle.convention = "google"
 
 [tool.ruff.lint.per-file-ignores]
-"tests/**" = ["S101", "ANN", "D205", "D212", "D415", "D100", "D103"]
+"tests/**" = ["S101", "S404", "S108", "ANN", "D102", "D107", "D205", "D212", "D415", "D100", "D103", "D101"]
+"scripts/*.py" = ["T20"]
 
 [tool.pytest.ini_options]
 minversion = "6.0"
@@ -97,17 +98,18 @@ addopts = """
 testpaths = ["tests"]
 python_files = ["*_test.py"]
 python_functions = ["test_*"]
-render_collapsed = "all"
 
 [tool.coverage.report]
+fail_under = 100
 exclude_lines = [
     "pragma: no cover",
     "def __repr__",
     "if self.debug:",
-    "if settings.DEBUG",
+    "if settings.DEBUG:",
     "raise AssertionError",
     "if 0:",
     "if __name__ == .__main__.:",
+    "...",
 ]
 
 [tool.taskipy.tasks]
@@ -116,18 +118,15 @@ test-coverage = """\
 pytest \
   --cov-config=pyproject.toml \
   --cov=smith \
-  --cov-fail-under=100 \
   --tb=no
 """
 test-build = """\
 pytest \
-  -p no:beehave \
   --doctest-modules \
   --cov-config=pyproject.toml \
   --cov-report html:docs/coverage \
   --cov-report term:skip-covered \
   --cov=smith \
-  --cov-fail-under=100 \
   --hypothesis-show-statistics \
   --html=docs/tests/report.html \
   --self-contained-html \
@@ -151,13 +150,9 @@ pytest \
   -q \
 """
 doc-publish = "task doc-build && ghp-import -n -p -f docs"
-static-check = "pyright"
+static-check = "pyright smith tests"
+validate-flows = "bash scripts/flowr-utils.sh validate"
+regenerate-flowviz = "python scripts/generate-flowviz-data.py"
+release-check = "task lint && task static-check && task test && task doc-build"
 
-[dependency-groups]
-dev = [
-    "gherkin-official>=39.0.0",
-    "safety>=3.7.0",
-]
 
-[tool.beehave]
-features_path = "docs/features"
diff --git a/scripts/flowr-utils.sh b/scripts/flowr-utils.sh
new file mode 100755
index 0000000..d5f4eb2
--- /dev/null
+++ b/scripts/flowr-utils.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+# Utility script for working with flowr flows
+# Usage: ./scripts/flowr-utils.sh [validate|view|list] [FLOW_NAME]
+
+set -e
+
+FLOWS_DIR=".flowr/flows"
+
+# Check if we're in venv (look for flowr)
+if ! source .venv/bin/activate 2>/dev/null || ! python -c "import flowr" 2>/dev/null; then
+    echo "Error: flowr not available. Make sure you're in the project directory and have activated the venv"
+    echo "Run: source .venv/bin/activate"
+    exit 1
+fi
+
+# Activate venv
+source .venv/bin/activate
+
+show_usage() {
+    echo "Usage: $0 [validate|view|list|graph] [FLOW_NAME]"
+    echo ""
+    echo "Commands:"
+    echo "  validate [FLOW]  - Validate flow definition(s)"
+    echo "  view [FLOW]      - View flow as mermaid diagram"
+    echo "  list             - List all available flows"
+    echo "  graph            - Generate interactive D3.js visualization"
+    echo ""
+    echo "Examples:"
+    echo "  $0 list"
+    echo "  $0 validate main-flow"
+    echo "  $0 view tdd-cycle-flow"
+    echo "  $0 validate  # validates all flows"
+    echo "  $0 graph     # generates .flowr/graph/index.html"
+}
+
+list_flows() {
+    echo "Available flowr flows:"
+    for yaml_file in "${FLOWS_DIR}"/*.yaml; do
+        if [[ -f "$yaml_file" ]]; then
+            flow_name=$(basename "$yaml_file" .yaml)
+            echo "  - $flow_name"
+        fi
+    done
+}
+
+validate_flow() {
+    local flow_name="$1"
+    local yaml_file="${FLOWS_DIR}/${flow_name}.yaml"
+    
+    if [[ ! -f "$yaml_file" ]]; then
+        echo "Error: Flow '$flow_name' not found at $yaml_file"
+        exit 1
+    fi
+    
+    echo "Validating $flow_name..."
+    python -m flowr validate "$yaml_file"
+}
+
+view_flow() {
+    local flow_name="$1"
+    local yaml_file="${FLOWS_DIR}/${flow_name}.yaml"
+    
+    if [[ ! -f "$yaml_file" ]]; then
+        echo "Error: Flow '$flow_name' not found at $yaml_file"
+        exit 1
+    fi
+    
+    echo "Mermaid diagram for $flow_name:"
+    echo "================================"
+    python -m flowr mermaid "$yaml_file"
+}
+
+generate_graph() {
+    echo "Generating interactive D3.js visualization..."
+    python scripts/generate-graph.py
+}
+
+validate_all() {
+    echo "Validating all flows..."
+    for yaml_file in "${FLOWS_DIR}"/*.yaml; do
+        if [[ -f "$yaml_file" ]]; then
+            flow_name=$(basename "$yaml_file" .yaml)
+            echo ""
+            validate_flow "$flow_name"
+        fi
+    done
+}
+
+# Main logic
+case "${1:-}" in
+    "list")
+        list_flows
+        ;;
+    "validate")
+        if [[ -n "${2:-}" ]]; then
+            validate_flow "$2"
+        else
+            validate_all
+        fi
+        ;;
+    "view")
+        if [[ -z "${2:-}" ]]; then
+            echo "Error: Flow name required for view command"
+            show_usage
+            exit 1
+        fi
+        view_flow "$2"
+        ;;
+    "graph")
+        generate_graph
+        ;;
+    "-h"|"--help"|"")
+        show_usage
+        ;;
+    *)
+        echo "Error: Unknown command '$1'"
+        show_usage
+        exit 1
+        ;;
+esac
\ No newline at end of file
diff --git a/scripts/generate-flowviz-data.py b/scripts/generate-flowviz-data.py
new file mode 100755
index 0000000..c06f017
--- /dev/null
+++ b/scripts/generate-flowviz-data.py
@@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+"""Generate FlowViz data bundle from flowr YAML.
+
+Outputs a single JS file (`flowviz/data.js`) that defines `window.FLOWVIZ_DATA`.
+This is intentionally `file://` friendly: the HTML can be opened directly
+without needing a local web server (no fetch/XHR).
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import yaml
+
+ROOT = Path(__file__).resolve().parents[1]
+FLOWS_DIR = ROOT / ".flowr" / "flows"
+OUT_DIR = ROOT / "flowviz"
+OUT_FILE = OUT_DIR / "data.js"
+
+
+def _title_case(s: str) -> str:
+    return " ".join([p.capitalize() for p in s.replace("_", "-").split("-") if p])
+
+
+def load_flow_yaml(path: Path) -> dict:
+    """Load and validate a flowr YAML file."""
+    with path.open("r", encoding="utf-8") as f:
+        data = yaml.safe_load(f)
+    if not isinstance(data, dict) or "flow" not in data:
+        raise ValueError(f"Invalid flow YAML: {path}")
+    return data
+
+
+def _flatten_artifacts(artifacts: list | None) -> list[str]:
+    if not artifacts:
+        return []
+    result: list[str] = []
+    for item in artifacts:
+        if isinstance(item, str):
+            result.append(item)
+        elif isinstance(item, dict):
+            for doc, sections in item.items():
+                if isinstance(sections, list) and sections:
+                    for sec in sections:
+                        result.append(f"{doc}#{sec}")
+                else:
+                    result.append(doc)
+    return result
+
+
+def _flatten_attrs(attrs: dict | None) -> dict | None:
+    if not attrs:
+        return attrs
+    flat = {}
+    for key, val in attrs.items():
+        if key in ("input_artifacts", "edited_artifacts", "output_artifacts"):
+            flat[key] = _flatten_artifacts(val)
+        else:
+            flat[key] = val
+    return flat
+
+
+def _resolve_when(
+    when_clause: dict | list | str,
+    state_conditions: dict | None,
+    state_id: str,
+) -> dict[str, str] | None:
+    """Resolve a when clause into a flat dict of conditions.
+
+    Mirrors the flowr loader's resolve_when_clause logic but returns
+    a plain dict suitable for JSON serialization instead of GuardCondition.
+    """
+    if isinstance(when_clause, dict):
+        return dict(when_clause)
+
+    items = [when_clause] if isinstance(when_clause, str) else list(when_clause)
+    resolved: dict[str, str] = {}
+
+    for item in items:
+        if isinstance(item, dict):
+            resolved.update(item)
+        elif isinstance(item, str):
+            # Named reference to a condition group on this state
+            if not state_conditions or item not in state_conditions:
+                raise ValueError(
+                    f"Unknown condition reference '{item}' in state '{state_id}'"
+                )
+            resolved.update(state_conditions[item])
+
+    return resolved or None
+
+
+def _add_exit_node(
+    nodes: list[dict], node_ids: set[str], target: str, exits: list[str]
+) -> None:
+    """Add an exit node to the graph if not already present."""
+    if target not in node_ids:
+        nodes.append({"id": target, "type": "exit", "label": _title_case(target)})
+        node_ids.add(target)
+
+
+def _process_dict_transitions(
+    nxt: dict,
+    st_id: str,
+    exits: list[str],
+    state_conditions: dict | None,
+    nodes: list[dict],
+    node_ids: set[str],
+    edges: list[dict],
+) -> None:
+    """Process dict-format transitions (flowr normal form)."""
+    for trigger, tgt in nxt.items():
+        target: str | None = None
+        when: dict[str, str] | None = None
+
+        if isinstance(tgt, str):
+            target = tgt
+        elif isinstance(tgt, dict):
+            target = tgt.get("to")
+            raw_when = tgt.get("when")
+            if raw_when is not None:
+                when = _resolve_when(raw_when, state_conditions, st_id)
+
+        if target is None:
+            continue
+
+        edge = {
+            "source": st_id,
+            "target": target,
+            "label": "" if trigger == "default" else str(trigger),
+            "kind": "exit" if target in exits else "transition",
+        }
+        if when:
+            edge["when"] = when
+        edges.append(edge)
+        _add_exit_node(nodes, node_ids, target, exits)
+
+
+def _process_list_transitions(
+    nxt: list,
+    st_id: str,
+    exits: list[str],
+    nodes: list[dict],
+    node_ids: set[str],
+    edges: list[dict],
+) -> None:
+    """Process list-format transitions (older/alternate format)."""
+    for t in nxt:
+        target = t["target"]
+        cond = t.get("when", "default")
+        edges.append(
+            {
+                "source": st_id,
+                "target": target,
+                "label": "" if cond == "default" else str(cond),
+                "kind": "exit" if target in exits else "transition",
+            }
+        )
+        _add_exit_node(nodes, node_ids, target, exits)
+
+
+def build_graph(flow_data: dict) -> dict:
+    """Build a visualization graph from flowr YAML data."""
+    exits = list(flow_data.get("exits", []) or [])
+    states = list(flow_data.get("states", []) or [])
+
+    nodes: list[dict] = []
+    edges: list[dict] = []
+
+    for st in states:
+        st_id = st["id"]
+        is_subflow = "flow" in st
+        node_type = "subflow" if is_subflow else "state"
+
+        nodes.append(
+            {
+                "id": st_id,
+                "type": node_type,
+                "label": _title_case(st_id),
+                "subflow": st.get("flow"),
+                "subflowVersion": st.get("flow-version"),
+                "attrs": _flatten_attrs(st.get("attrs")) or None,
+            }
+        )
+
+    for ex in exits:
+        nodes.append(
+            {
+                "id": ex,
+                "type": "exit",
+                "label": _title_case(ex),
+            }
+        )
+
+    node_ids = {n["id"] for n in nodes}
+
+    for st in states:
+        st_id = st["id"]
+        state_conditions = st.get("conditions")
+        nxt = st.get("next")
+        if not nxt:
+            continue
+
+        if isinstance(nxt, dict):
+            _process_dict_transitions(
+                nxt, st_id, exits, state_conditions, nodes, node_ids, edges
+            )
+        elif isinstance(nxt, list):
+            _process_list_transitions(nxt, st_id, exits, nodes, node_ids, edges)
+
+    return {
+        "flow": flow_data["flow"],
+        "version": flow_data.get("version", "0.0.0"),
+        "exits": exits,
+        "nodes": nodes,
+        "edges": edges,
+    }
+
+
+def main() -> int:
+    """Generate the flowviz data bundle from all flowr YAML files."""
+    if not FLOWS_DIR.exists():
+        raise SystemExit(f"Missing flows directory: {FLOWS_DIR}")
+
+    OUT_DIR.mkdir(parents=True, exist_ok=True)
+
+    flows: dict[str, dict] = {}
+    for p in sorted(FLOWS_DIR.glob("*.yaml")):
+        data = load_flow_yaml(p)
+        flows[data["flow"]] = build_graph(data)
+
+    bundle = {
+        "schema": 1,
+        "defaultFlow": "main-flow" if "main-flow" in flows else min(flows),
+        "flows": flows,
+    }
+
+    js = "window.FLOWVIZ_DATA = " + json.dumps(bundle, indent=2, sort_keys=True) + ";\n"
+    OUT_FILE.write_text(js, encoding="utf-8")
+
+    print(f"Wrote {OUT_FILE}")
+    print(f"Flows: {', '.join(sorted(flows.keys()))}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/generate-svg.sh b/scripts/generate-svg.sh
new file mode 100755
index 0000000..544289e
--- /dev/null
+++ b/scripts/generate-svg.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+# Generate SVG diagrams from flowr flow definitions
+# Usage: ./scripts/generate-svg.sh [FLOW_NAME]
+# If FLOW_NAME not provided, generates all flows that work with mermaid-cli
+
+set -e
+
+FLOWS_DIR=".flowr/flows"
+OUTPUT_DIR="flows"
+
+# Flows that actually work with mermaid-cli (very limited due to exit state syntax issues)
+WORKING_FLOWS=(
+    "tdd-cycle-flow"
+)
+
+# Check if python3 is available
+if ! command -v python3 &> /dev/null; then
+    echo "Error: python3 is required"
+    exit 1
+fi
+
+# Check if npx is available (for mermaid-cli)
+if ! command -v npx &> /dev/null; then
+    echo "Error: npx is required for mermaid-cli"
+    exit 1
+fi
+
+# Check if we're in venv (look for flowr)
+if ! source .venv/bin/activate 2>/dev/null || ! python -c "import flowr" 2>/dev/null; then
+    echo "Error: flowr not available. Make sure you're in the project directory and have activated the venv"
+    echo "Run: source .venv/bin/activate"
+    exit 1
+fi
+
+# Activate venv
+source .venv/bin/activate
+
+generate_svg() {
+    local flow_name="$1"
+    local yaml_file="${FLOWS_DIR}/${flow_name}.yaml"
+    local svg_file="${OUTPUT_DIR}/${flow_name}.svg"
+    local temp_mermaid="/tmp/${flow_name}.mmd"
+    
+    if [[ ! -f "$yaml_file" ]]; then
+        echo "Warning: $yaml_file not found, skipping..."
+        return
+    fi
+    
+    echo "Generating $svg_file from flowr definition..."
+    
+    # Step 1: Convert flowr to mermaid (filter out problematic syntax)
+    python -m flowr mermaid "$yaml_file" | grep -v "note right of" > "$temp_mermaid"
+    
+    # Step 2: Convert mermaid to SVG
+    if npx @mermaid-js/mermaid-cli@11.12.0 -i "$temp_mermaid" -o "$svg_file" -t neutral 2>/dev/null; then
+        echo "✓ Generated $svg_file"
+    else
+        echo "⚠️  Failed to generate $svg_file (mermaid-cli compatibility issue)"
+        echo "   Flow definition is valid, but SVG generation failed"
+    fi
+    
+    # Clean up temp file
+    rm -f "$temp_mermaid"
+}
+
+# Create output directory if it doesn't exist
+mkdir -p "$OUTPUT_DIR"
+
+if [[ $# -eq 1 ]]; then
+    # Generate specific flow
+    generate_svg "$1"
+else
+    # Generate working flows
+    echo "Generating SVG diagrams from flowr flows..."
+    echo "Note: Only generating flows known to work with mermaid-cli"
+    
+    for flow_name in "${WORKING_FLOWS[@]}"; do
+        generate_svg "$flow_name"
+    done
+    
+    echo ""
+    echo "SVG generation limitations:"
+    echo "- Only tdd-cycle-flow works with mermaid-cli (exit state syntax issues)"
+    echo "- All other flows have mermaid-cli compatibility problems"
+    echo ""
+    echo "All flows are still valid flowr definitions and can be:"
+    echo "- Validated: python -m flowr validate .flowr/flows/<flow>.yaml"
+    echo "- Viewed as mermaid: python -m flowr mermaid .flowr/flows/<flow>.yaml"
+    echo "- Used programmatically with flowr APIs"
+    echo "✓ All working flows converted to SVG"
+fi
\ No newline at end of file
diff --git a/scripts/update-bundle.sh b/scripts/update-bundle.sh
new file mode 100755
index 0000000..b4fa5c4
--- /dev/null
+++ b/scripts/update-bundle.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+# Update smith/data/ with agentic files from the agents-smith v8_release branch.
+# Usage: ./scripts/update-bundle.sh
+#
+# Downloads the agents-smith v8_release archive from GitHub,
+# extracts it, and copies only the agentic files to smith/data/.
+# Agentic files: AGENTS.md, .opencode/agents/, .opencode/skills/,
+#   .opencode/knowledge/, .opencode/tools/, .templates/, .flowr/
+
+set -euo pipefail
+
+TEMP_DIR=$(mktemp -d)
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+DATA_DIR="$PROJECT_ROOT/smith/data"
+
+echo "Downloading agents-smith v8_release archive..."
+curl -sL "https://github.com/nullhack/agents-smith/archive/refs/heads/v8_release.tar.gz" \
+    -o "$TEMP_DIR/agents-smith.tar.gz"
+
+echo "Extracting archive..."
+tar -xzf "$TEMP_DIR/agents-smith.tar.gz" -C "$TEMP_DIR"
+
+SRC_DIR="$TEMP_DIR/agents-smith-v8_release"
+
+if [ ! -d "$SRC_DIR" ]; then
+    echo "ERROR: Expected directory agents-smith-v8_release not found in archive"
+    rm -rf "$TEMP_DIR"
+    exit 1
+fi
+
+echo "Removing old agentic files from smith/data/..."
+rm -rf "$DATA_DIR/AGENTS.md" "$DATA_DIR/.opencode" "$DATA_DIR/.templates" "$DATA_DIR/.flowr"
+
+echo "Copying AGENTS.md..."
+cp "$SRC_DIR/AGENTS.md" "$DATA_DIR/AGENTS.md"
+
+echo "Copying .opencode/ subdirectories (agents, skills, knowledge, tools)..."
+mkdir -p "$DATA_DIR/.opencode"
+for subdir in agents skills knowledge tools; do
+    if [ -d "$SRC_DIR/.opencode/$subdir" ]; then
+        cp -r "$SRC_DIR/.opencode/$subdir" "$DATA_DIR/.opencode/$subdir"
+        echo "  Copied .opencode/$subdir/"
+    else
+        echo "  Skipped .opencode/$subdir/ (not found in source)"
+    fi
+done
+
+echo "Copying .templates/..."
+cp -r "$SRC_DIR/.templates" "$DATA_DIR/.templates"
+
+echo "Copying .flowr/..."
+cp -r "$SRC_DIR/.flowr" "$DATA_DIR/.flowr"
+
+echo "Cleaning up..."
+rm -rf "$TEMP_DIR"
+
+echo "Done. smith/data/ updated from agents-smith v8_release."
+echo "Files in smith/data/:"
+find "$DATA_DIR" -not -name '__init__.py' -not -path "$DATA_DIR/__init__.py" -type f | head -20
+echo "..."
+TOTAL=$(find "$DATA_DIR" -type f | wc -l)
+echo "Total files: $TOTAL"
+SIZE=$(du -sh "$DATA_DIR" | cut -f1)
+echo "Total size: $SIZE"
\ No newline at end of file
diff --git a/smith/__init__.py b/smith/__init__.py
index 18b665e..2deafdf 100644
--- a/smith/__init__.py
+++ b/smith/__init__.py
@@ -1 +1 @@
-"""Application package."""
+"""Smith — connect AI agent configurations to any project."""
diff --git a/smith/__main__.py b/smith/__main__.py
index a200610..9456bc3 100644
--- a/smith/__main__.py
+++ b/smith/__main__.py
@@ -1,24 +1,6 @@
-"""Entry point for running the application as a module."""
-
-import logging
-
-import fire
-
-logger = logging.getLogger(__name__)
-
-
-def main(verbosity: str = "INFO") -> None:
-    """Run the application.
-
-    Args:
-        verbosity: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
-    """
-    logging.basicConfig(
-        level=getattr(logging, verbosity.upper(), logging.INFO),
-        format="%(levelname)s - %(name)s: %(message)s",
-    )
-    logger.info("Ready.")
+"""Entry point for running smith as ``python -m smith``."""
 
+from smith.delivery.cli import main
 
 if __name__ == "__main__":
-    fire.Fire(main)
+    raise SystemExit(main())
diff --git a/smith/application/__init__.py b/smith/application/__init__.py
new file mode 100644
index 0000000..97d47de
--- /dev/null
+++ b/smith/application/__init__.py
@@ -0,0 +1 @@
+"""Application layer — use-case orchestration."""
diff --git a/smith/application/connect.py b/smith/application/connect.py
new file mode 100644
index 0000000..a99edb0
--- /dev/null
+++ b/smith/application/connect.py
@@ -0,0 +1,30 @@
+"""Connect use-case — wire a project to a template source."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from smith.domain.connection import Connection
+from smith.domain.value_objects import TemplateSource
+from smith.infrastructure.filesystem import AtomicFileSystem
+from smith.infrastructure.gitignore import GitignoreManager
+from smith.infrastructure.metadata import SectionMetadata
+from smith.infrastructure.template_source import TemplateSourceAdapter
+
+
+class ConnectUseCase:
+    """Orchestrate the connection of a project to a template source."""
+
+    def __init__(self, project_dir: Path) -> None:
+        """Initialise with the target project directory."""
+        self._project_dir = project_dir
+
+    def execute(self, source: TemplateSource, overwrite: bool = False) -> None:
+        """Connect the project to the given template source."""
+        connection = Connection(
+            template_source_port=TemplateSourceAdapter(source),
+            filesystem_port=AtomicFileSystem(self._project_dir),
+            gitignore_port=GitignoreManager(self._project_dir),
+            metadata_port=SectionMetadata(self._project_dir),
+        )
+        connection.connect(source=source, overwrite=overwrite)
diff --git a/smith/application/disconnect.py b/smith/application/disconnect.py
new file mode 100644
index 0000000..29a4efd
--- /dev/null
+++ b/smith/application/disconnect.py
@@ -0,0 +1,32 @@
+"""Disconnect use-case — remove agentic files from a project."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from smith.domain.connection import Connection
+from smith.domain.value_objects import TemplateSource
+from smith.infrastructure.filesystem import AtomicFileSystem
+from smith.infrastructure.gitignore import GitignoreManager
+from smith.infrastructure.metadata import SectionMetadata
+from smith.infrastructure.template_source import TemplateSourceAdapter
+
+
+class DisconnectUseCase:
+    """Orchestrate the disconnection of a project from its template source."""
+
+    def __init__(self, project_dir: Path) -> None:
+        """Initialise with the target project directory."""
+        self._project_dir = project_dir
+
+    def execute(self) -> list[Path]:
+        """Disconnect the project and return paths that were removed."""
+        connection = Connection(
+            template_source_port=TemplateSourceAdapter(
+                TemplateSource(kind="bundled", location="agents-smith"),
+            ),
+            filesystem_port=AtomicFileSystem(self._project_dir),
+            gitignore_port=GitignoreManager(self._project_dir),
+            metadata_port=SectionMetadata(self._project_dir),
+        )
+        return connection.disconnect()
diff --git a/smith/application/status.py b/smith/application/status.py
new file mode 100644
index 0000000..1892121
--- /dev/null
+++ b/smith/application/status.py
@@ -0,0 +1,32 @@
+"""Status use-case — report the current connection state of a project."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from smith.domain.connection import Connection
+from smith.domain.value_objects import ConnectionStatus, TemplateSource
+from smith.infrastructure.filesystem import AtomicFileSystem
+from smith.infrastructure.gitignore import GitignoreManager
+from smith.infrastructure.metadata import SectionMetadata
+from smith.infrastructure.template_source import TemplateSourceAdapter
+
+
+class StatusUseCase:
+    """Orchestrate querying the connection status of a project."""
+
+    def __init__(self, project_dir: Path) -> None:
+        """Initialise with the target project directory."""
+        self._project_dir = project_dir
+
+    def execute(self) -> ConnectionStatus:
+        """Return the current connection status of the project."""
+        connection = Connection(
+            template_source_port=TemplateSourceAdapter(
+                TemplateSource(kind="bundled", location="agents-smith"),
+            ),
+            filesystem_port=AtomicFileSystem(self._project_dir),
+            gitignore_port=GitignoreManager(self._project_dir),
+            metadata_port=SectionMetadata(self._project_dir),
+        )
+        return connection.status()
diff --git a/smith/application/update.py b/smith/application/update.py
new file mode 100644
index 0000000..e627ae5
--- /dev/null
+++ b/smith/application/update.py
@@ -0,0 +1,32 @@
+"""Update use-case — refresh agentic files in a connected project."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from smith.domain.connection import Connection
+from smith.domain.value_objects import TemplateSource
+from smith.infrastructure.filesystem import AtomicFileSystem
+from smith.infrastructure.gitignore import GitignoreManager
+from smith.infrastructure.metadata import SectionMetadata
+from smith.infrastructure.template_source import TemplateSourceAdapter
+
+
+class UpdateUseCase:
+    """Orchestrate updating agentic files in an already-connected project."""
+
+    def __init__(self, project_dir: Path) -> None:
+        """Initialise with the target project directory."""
+        self._project_dir = project_dir
+
+    def execute(self, source: TemplateSource | None = None) -> None:
+        """Update the project's agentic files, optionally from a new source."""
+        connection = Connection(
+            template_source_port=TemplateSourceAdapter(
+                source or TemplateSource(kind="bundled", location="agents-smith"),
+            ),
+            filesystem_port=AtomicFileSystem(self._project_dir),
+            gitignore_port=GitignoreManager(self._project_dir),
+            metadata_port=SectionMetadata(self._project_dir),
+        )
+        connection.update(source=source)
diff --git a/smith/data/.flowr/.gitignore b/smith/data/.flowr/.gitignore
new file mode 100644
index 0000000..7275442
--- /dev/null
+++ b/smith/data/.flowr/.gitignore
@@ -0,0 +1,2 @@
+# Ignore SVG files (regeneratable from mermaid)
+*.svg
\ No newline at end of file
diff --git a/smith/data/.flowr/flows/architecture-flow.yaml b/smith/data/.flowr/flows/architecture-flow.yaml
new file mode 100644
index 0000000..d0b69eb
--- /dev/null
+++ b/smith/data/.flowr/flows/architecture-flow.yaml
@@ -0,0 +1,145 @@
+flow: architecture-flow
+version: 4.0.0
+exits:
+  - complete
+  - needs_discovery
+
+states:
+  - id: architecture-assessment
+    attrs:
+      description: "SA evaluates whether the feature requires new architecture or fits the existing system, potentially overriding deployment decisions"
+      owner: SA
+      skills:
+        - assess-architecture
+      in:
+        - product_definition.md
+        - domain_model.md
+        - system.md
+        - technical_design.md
+        - context_map.md
+      out:
+        - product_definition.md:
+            - deployment
+            - quality_attributes
+    conditions:
+      architecture_complete:
+        system_md: ==true
+        technical_design_md: ==true
+        context_map_md: ==true
+        deployment_matches_codebase: ==true
+      architecture_exists:
+        system_md: ==true
+        technical_design_md: ==true
+        context_map_md: ==true
+      no_architecture_exists:
+        technical_design_md: ==false
+        context_map_md: ==false
+    next:
+      no_architecture_needed:
+        to: complete
+        when: architecture_complete
+      needs_context_update:
+        to: context-mapping
+        when: architecture_exists
+      needs_technical_design:
+        to: technical-design
+        when: architecture_exists
+      greenfield:
+        to: context-mapping
+        when: no_architecture_exists
+      delivery_mismatch_unresolvable: needs_discovery
+      needs_discovery: needs_discovery
+
+  - id: context-mapping
+    attrs:
+      description: "SA maps bounded context relationships, integration points, and anti-corruption layers"
+      owner: SA
+      skills:
+        - map-contexts
+      in:
+        - domain_model.md
+        - product_definition.md
+        - glossary.md
+      out:
+        - context_map.md:
+            - context_relationships
+            - context_map_diagram
+            - integration_points
+            - anti_corruption_layers
+    next:
+      done: technical-design
+      needs_discovery: needs_discovery
+
+  - id: technical-design
+    attrs:
+      description: "SA designs the technical solution — architectural style, stack, module structure, API/event contracts, interface definitions — and updates the system overview"
+      owner: SA
+      skills:
+        - design-technical-solution
+      in:
+        - context_map.md
+        - domain_model.md
+        - glossary.md
+        - system.md
+        - product_definition.md
+      out:
+        - technical_design.md:
+            - architectural_style
+            - quality_attributes
+            - stack
+            - module_structure
+            - api_contracts
+            - event_contracts
+            - interface_definitions
+            - c4_diagrams
+            - dependencies
+            - configuration_keys
+        - system.md:
+            - context
+            - container
+            - module_structure
+            - delivery
+    next:
+      done: review-signoff
+      needs_decisions: adr-draft
+
+  - id: adr-draft
+    attrs:
+      description: "SA documents architecturally significant decisions as ADRs and records key decisions and active constraints in system.md"
+      owner: SA
+      skills:
+        - draft-adr
+      in:
+        - technical_design.md
+        - context_map.md
+        - domain_model.md
+        - product_definition.md
+        - glossary.md
+        - system.md
+      out:
+        - system.md:
+            - key_decisions
+            - active_constraints
+        - adr/<slug>.md
+    next:
+      done: review-signoff
+
+  - id: review-signoff
+    attrs:
+      description: "R independently verifies architecture alignment with domain model and requirements, and cross-document consistency, before implementation begins"
+      owner: R
+      skills:
+        - review-architecture
+      in:
+        - context_map.md
+        - technical_design.md
+        - system.md
+        - adr/*.md
+        - product_definition.md
+        - domain_model.md
+        - glossary.md
+      out: []
+    next:
+      approved: complete
+      inconsistent: architecture-assessment
+      needs_discovery: needs_discovery
\ No newline at end of file
diff --git a/smith/data/.flowr/flows/branding-flow.yaml b/smith/data/.flowr/flows/branding-flow.yaml
new file mode 100644
index 0000000..33956cd
--- /dev/null
+++ b/smith/data/.flowr/flows/branding-flow.yaml
@@ -0,0 +1,61 @@
+flow: branding-flow
+version: 2.0.0
+exits:
+  - branded
+  - cancelled
+
+states:
+  - id: setup-branding
+    attrs:
+      description: "Interview stakeholder to establish brand identity: personality, visual metaphor, wording, and release naming"
+      owner: Design Agent
+      skills:
+        - setup-branding
+      in: []
+      out:
+        - branding.md:
+            - identity
+            - release_naming
+            - wording
+    next:
+      confirmed: design-colors
+      cancelled: cancelled
+
+  - id: design-colors
+    attrs:
+      description: "Select and validate a colour palette with WCAG contrast, dark-mode counterparts, and hue semantics"
+      owner: Design Agent
+      skills:
+        - design-colors
+      in:
+        - branding.md
+      out:
+        - branding.md:
+            - visual
+    next:
+      approved: design-assets
+      revise: design-colors
+      cancelled: cancelled
+
+  - id: design-assets
+    attrs:
+      description: "Create logo and banner using favicon-first, monochrome-first, progressive-simplification process"
+      owner: Design Agent
+      skills:
+        - design-assets
+      in:
+        - branding.md
+      out:
+        - docs/assets/logo.svg
+        - docs/assets/banner.svg
+    conditions:
+      monochrome_passed:
+        logo_monochrome: ==true
+      scalability_passed:
+        logo_scalability: ==true
+      blur_passed:
+        logo_blur_test: ==true
+    next:
+      approved: branded
+      revise: design-assets
+      cancelled: cancelled
\ No newline at end of file
diff --git a/smith/data/.flowr/flows/delivery-flow.yaml b/smith/data/.flowr/flows/delivery-flow.yaml
new file mode 100644
index 0000000..bd58922
--- /dev/null
+++ b/smith/data/.flowr/flows/delivery-flow.yaml
@@ -0,0 +1,80 @@
+flow: delivery-flow
+version: 4.0.0
+
+exits:
+  - next-feature
+  - rejected
+  - needs_development
+  - cancelled
+
+states:
+  - id: acceptance
+    attrs:
+      description: "PO validates business behavior against BDD scenarios and quality attributes"
+      owner: PO
+      skills:
+        - accept-feature
+      in:
+        - features/<feature_name>.feature
+        - product_definition.md
+      out:
+        - acceptance_evidence
+        - approval_record
+    conditions:
+      feature_accepted:
+        feature_status: ==ACCEPTED
+    next:
+      approved:
+        to: local-merge
+        when: feature_accepted
+      rejected: rejected
+
+  - id: local-merge
+    attrs:
+      description: "SE squash-merges feature commits into local main and resolves any conflicts"
+      owner: SE
+      skills:
+        - merge-local
+      in:
+        - feature_commits
+        - approval_record
+        - features/<feature_name>.feature
+      out:
+        - merged_commits
+    next:
+      merged: publish-decision
+      conflict: needs_development
+
+  - id: publish-decision
+    attrs:
+      description: "PO decides whether to publish the accumulated batch as a PR or continue accumulating features on local main"
+      owner: PO
+      skills:
+        - decide-batch-action
+      in:
+        - merged_commits
+      out: []
+    next:
+      accumulate: next-feature
+      publish: pr-creation
+
+  - id: pr-creation
+    attrs:
+      description: "SE creates an administrative PR for changes already on local main"
+      owner: SE
+      skills:
+        - create-pr
+      in:
+        - merged_commits
+        - features/<feature_name>.feature
+      out: []
+    conditions:
+      merged:
+        ci_passes: ==true
+        no_changes_requested: ==true
+    next:
+      approved:
+        to: next-feature
+        when: merged
+      changes_requested: needs_development
+      cancelled: cancelled
\ No newline at end of file
diff --git a/smith/data/.flowr/flows/development-flow.yaml b/smith/data/.flowr/flows/development-flow.yaml
new file mode 100644
index 0000000..ae9c1ab
--- /dev/null
+++ b/smith/data/.flowr/flows/development-flow.yaml
@@ -0,0 +1,72 @@
+flow: development-flow
+version: 4.0.0
+exits:
+  - done
+  - needs_planning
+
+states:
+  - id: project-structuring
+    attrs:
+      description: "SA creates the project skeleton — branch, package structure, port interfaces, aggregate root signatures — before any feature-specific stubs"
+      owner: SA
+      skills:
+        - structure-project
+      in:
+        - features/<feature_name>.feature
+        - technical_design.md
+        - domain_model.md
+        - glossary.md
+        - context_map.md
+        - adr/*.md
+        - product_definition.md
+      out:
+        - git_branch
+    next:
+      ready: tdd-cycle
+      needs_planning: needs_planning
+
+  - id: tdd-cycle
+    attrs:
+      description: "SE implements the feature through repeated RED-GREEN-REFACTOR cycles until all BDD examples pass"
+    flow: tdd-cycle-flow
+    flow-version: "^2"
+    conditions:
+      design_declared:
+        yagni: ==true
+        kiss: ==true
+        dry: ==true
+        oc: ==true
+        solid: ==true
+        patterns: ==true
+    next:
+      all_green:
+        to: review-gate
+        when: design_declared
+      blocked: project-structuring
+
+  - id: review-gate
+    attrs:
+      description: "R independently verifies implementation across three tiers — design, structure, and conventions — before commit"
+    flow: review-gate-flow
+    flow-version: "^2"
+    next:
+      pass: commit
+      fail: tdd-cycle
+
+  - id: commit
+    attrs:
+      description: "SE commits the reviewed, passing implementation with traceability to feature files"
+      owner: SE
+      skills:
+        - commit-implementation
+      in:
+        - test_implementations
+        - source_implementations
+        - design_review_evidence
+        - structure_review_evidence
+        - conventions_review_evidence
+        - features/<feature_name>.feature
+      out:
+        - feature_commits
+    next:
+      done: done
\ No newline at end of file
diff --git a/smith/data/.flowr/flows/discovery-flow.yaml b/smith/data/.flowr/flows/discovery-flow.yaml
new file mode 100644
index 0000000..d7aaecb
--- /dev/null
+++ b/smith/data/.flowr/flows/discovery-flow.yaml
@@ -0,0 +1,101 @@
+flow: discovery-flow
+version: 3.0.0
+exits:
+  - complete
+
+states:
+  - id: stakeholder-interview
+    attrs:
+      description: "PO interviews stakeholders to understand pain points, business goals, and domain terms, then decides how much discovery is needed"
+      owner: PO
+      skills:
+        - conduct-interview
+      in:
+        - interview-notes/*.md
+      out:
+        - interview-notes/<session>.md:
+            - pain_points
+            - business_goals
+            - terms_to_define
+            - quality_attributes
+    next:
+      needs_full_discovery: event-storming
+      needs_scope_only: scope-boundary
+      already_known: complete
+
+  - id: event-storming
+    attrs:
+      description: "DE facilitates an event storming workshop to surface domain events, commands, and aggregate candidates"
+      owner: DE
+      skills:
+        - facilitate-event-storming
+      in:
+        - interview-notes/*.md
+      out:
+        - domain_model.md:
+            - event_map
+            - context_candidates
+            - aggregate_candidates
+    next:
+      done: language-definition
+      needs_reinterview: stakeholder-interview
+
+  - id: language-definition
+    attrs:
+      description: "DE formalizes the ubiquitous language by defining domain terms into a glossary"
+      owner: DE
+      skills:
+        - define-ubiquitous-language
+      in:
+        - interview-notes/*.md
+        - domain_model.md
+      out:
+        - glossary.md
+    next:
+      done: domain-modeling
+      needs_restorming: event-storming
+
+  - id: domain-modeling
+    attrs:
+      description: "DE formalizes candidates into proper bounded contexts, entities, relationships, and aggregate boundaries"
+      owner: DE
+      skills:
+        - model-domain
+      in:
+        - glossary.md
+        - domain_model.md
+      out:
+        - domain_model.md:
+            - bounded_contexts
+            - entities
+            - relationships
+            - aggregate_boundaries
+            - summary
+        - glossary.md
+    next:
+      done: scope-boundary
+      contradiction_found: language-definition
+      needs_reinterview: stakeholder-interview
+
+  - id: scope-boundary
+    attrs:
+      description: "PO defines what the product IS and IS NOT, who the users are, and the delivery order"
+      owner: PO
+      skills:
+        - define-product-scope
+      in:
+        - domain_model.md
+        - glossary.md
+      out:
+        - product_definition.md:
+            - what_is
+            - what_is_not
+            - why
+            - users
+            - out_of_scope
+            - delivery_order
+            - quality_attributes
+            - deployment
+    next:
+      done: complete
+      needs_reinterview: stakeholder-interview
\ No newline at end of file
diff --git a/smith/data/.flowr/flows/feature-development-flow.yaml b/smith/data/.flowr/flows/feature-development-flow.yaml
new file mode 100644
index 0000000..50119dc
--- /dev/null
+++ b/smith/data/.flowr/flows/feature-development-flow.yaml
@@ -0,0 +1,40 @@
+flow: feature-development-flow
+version: 6.0.0
+
+exits:
+  - needs_architecture
+  - cancelled
+  - completed
+
+states:
+  - id: planning
+    flow: planning-flow
+    flow-version: "^4"
+    next:
+      complete: development
+      needs_architecture: needs_architecture
+      no_features: completed
+
+  - id: development
+    flow: development-flow
+    flow-version: "^4"
+    next:
+      done: delivery
+      needs_planning: planning
+
+  - id: delivery
+    flow: delivery-flow
+    flow-version: "^4"
+    next:
+      next-feature: planning
+      rejected: post-mortem
+      needs_development: development
+      cancelled: cancelled
+
+  - id: post-mortem
+    flow: post-mortem-flow
+    flow-version: "^2"
+    next:
+      complete: planning
+      needs_architecture: needs_architecture
+      no_action: cancelled
\ No newline at end of file
diff --git a/smith/data/.flowr/flows/main-flow.yaml b/smith/data/.flowr/flows/main-flow.yaml
new file mode 100644
index 0000000..43bfd59
--- /dev/null
+++ b/smith/data/.flowr/flows/main-flow.yaml
@@ -0,0 +1,31 @@
+flow: main-flow
+version: 7.0.0
+exits: [completed, cancelled]
+
+states:
+  - id: discovery
+    attrs:
+      description: "Understand the domain, define scope, and establish ubiquitous language through stakeholder interviews and domain modeling"
+    flow: discovery-flow
+    flow-version: "^3"
+    next:
+      complete: architecture
+
+  - id: architecture
+    attrs:
+      description: "Design technical architecture, context boundaries, and API contracts for the entire project"
+    flow: architecture-flow
+    flow-version: "^4"
+    next:
+      complete: feature-development
+      needs_discovery: discovery
+
+  - id: feature-development
+    attrs:
+      description: "Feature-level loop: Planning → Development → Acceptance → Delivery per feature"
+    flow: feature-development-flow
+    flow-version: "^6"
+    next:
+      needs_architecture: architecture
+      cancelled: cancelled
+      completed: completed
\ No newline at end of file
diff --git a/smith/data/.flowr/flows/planning-flow.yaml b/smith/data/.flowr/flows/planning-flow.yaml
new file mode 100644
index 0000000..7bb966d
--- /dev/null
+++ b/smith/data/.flowr/flows/planning-flow.yaml
@@ -0,0 +1,156 @@
+flow: planning-flow
+version: 4.0.0
+exits:
+  - complete
+  - needs_architecture
+  - no_features
+
+states:
+  - id: feature-selection
+    attrs:
+      description: "PO picks the next feature to develop based on business priority and delivery order, verifying that architecture covers it"
+      owner: PO
+      skills:
+        - select-feature
+      in:
+        - product_definition.md
+        - technical_design.md
+      out: []
+    next:
+      selected: feature-specification
+      needs_architecture: needs_architecture
+      no_features: no_features
+
+  - id: feature-specification
+    attrs:
+      description: "PO conducts a targeted conversation with stakeholders to capture feature-specific behavioral rules, scenarios, and acceptance criteria"
+      owner: PO
+      skills:
+        - specify-feature
+      in:
+        - product_definition.md
+        - domain_model.md
+        - glossary.md
+        - technical_design.md
+      out:
+        - interview-notes/<session>.md
+    next:
+      done: feature-breakdown
+      needs_architecture: needs_architecture
+
+  - id: feature-breakdown
+    attrs:
+      description: "PO decomposes the selected feature into Rule blocks (user stories) within the feature file based on specification interview and domain constraints"
+      owner: PO
+      skills:
+        - break-down-feature
+      in:
+        - features/<feature_name>.feature
+        - product_definition.md
+        - technical_design.md
+        - interview-notes/*.md
+      out:
+        - features/<feature_name>.feature:
+            - rules
+    conditions:
+      invest_passed:
+        independent: ==true
+        negotiable: ==true
+        valuable: ==true
+        estimable: ==true
+        small: ==true
+        testable: ==true
+    next:
+      done:
+        to: bdd-features
+        when: invest_passed
+      needs_respecification: feature-specification
+
+  - id: bdd-features
+    attrs:
+      description: "PO writes concrete Given/When/Then Example blocks for each Rule in the feature file using ubiquitous language from the glossary"
+      owner: PO
+      skills:
+        - write-bdd-features
+      in:
+        - features/<feature_name>.feature
+        - product_definition.md
+        - domain_model.md
+        - glossary.md
+      out:
+        - features/<feature_name>.feature:
+            - examples
+    conditions:
+      examples_have_ids:
+        all_examples_have_ids: ==true
+      examples_have_gherkin:
+        all_examples_have_gherkin: ==true
+      premortem_done:
+        premortem_done: ==true
+      decomposition_valid:
+        concerns: <=2
+        must_examples: <=8
+      examples_complete:
+        all_examples_have_ids: ==true
+        all_examples_have_gherkin: ==true
+        premortem_done: ==true
+        concerns: <=2
+        must_examples: <=8
+    next:
+      done:
+        to: create-py-stubs
+        when: examples_complete
+      needs_respecification: feature-specification
+
+  - id: create-py-stubs
+    attrs:
+      description: "SA creates minimum typed stubs and test stubs as domain model breadcrumbs for the current feature"
+      owner: SA
+      skills:
+        - create-py-stubs
+      in:
+        - features/<feature_name>.feature
+        - technical_design.md
+        - domain_model.md
+        - glossary.md
+      out:
+        - typed_source_stubs
+        - test_skeletons
+    conditions:
+      stubs_traceable:
+        all_ids_have_stubs: ==true
+    next:
+      done: definition-of-done
+
+  - id: definition-of-done
+    attrs:
+      description: "PO tailors the definition of done criteria based on the specific feature's requirements"
+      owner: PO
+      skills:
+        - define-done
+      in:
+        - features/<feature_name>.feature
+        - product_definition.md
+      out:
+        - product_definition.md:
+            - definition_of_done
+    next:
+      done: ready
+
+  - id: ready
+    attrs:
+      description: "PO confirms all planning artifacts are complete and the feature is ready for development"
+      owner: PO
+      skills:
+        - confirm-baseline
+      in:
+        - features/<feature_name>.feature
+        - product_definition.md
+      out: []
+    conditions:
+      feature_baselined:
+        feature_status: ==BASELINED
+    next:
+      done:
+        to: complete
+        when: feature_baselined
\ No newline at end of file
diff --git a/smith/data/.flowr/flows/post-mortem-flow.yaml b/smith/data/.flowr/flows/post-mortem-flow.yaml
new file mode 100644
index 0000000..065ae75
--- /dev/null
+++ b/smith/data/.flowr/flows/post-mortem-flow.yaml
@@ -0,0 +1,66 @@
+flow: post-mortem-flow
+version: 2.0.0
+exits:
+  - complete
+  - needs_architecture
+  - no_action
+
+states:
+  - id: root-cause-analysis
+    attrs:
+      description: "R investigates why the PR was rejected, identifying the failure point and missed gate"
+      owner: R
+      skills:
+        - analyze-root-cause
+      in: []
+      out:
+        - root_cause_analysis
+    next:
+      issues_found: document-findings
+      no_issues_found: no_action
+
+  - id: document-findings
+    attrs:
+      description: "R records what failed, why, and which quality gate was missed"
+      owner: R
+      skills:
+        - document-post-mortem
+      in:
+        - root_cause_analysis
+      out:
+        - post-mortem/PM_YYYYMMDD_<slug>.md:
+            - failed_at
+            - root_cause
+            - missed_gate
+    next:
+      done: extract-lessons
+
+  - id: extract-lessons
+    attrs:
+      description: "R determines the corrective fix and updates the post-mortem with remediation steps"
+      owner: R
+      skills:
+        - extract-lessons
+      in:
+        - post-mortem/PM_YYYYMMDD_<slug>.md
+      out:
+        - post-mortem/PM_YYYYMMDD_<slug>.md:
+            - fix
+    next:
+      done: action-items
+
+  - id: action-items
+    attrs:
+      description: "R determines whether the feature needs replanning, architecture changes, or should be abandoned"
+      owner: R
+      skills:
+        - determine-action-items
+      in:
+        - post-mortem/PM_YYYYMMDD_<slug>.md
+      out:
+        - post-mortem/PM_YYYYMMDD_<slug>.md:
+            - restart_check
+    next:
+      replan: complete
+      architecture_issue: needs_architecture
+      abandon: no_action
\ No newline at end of file
diff --git a/smith/data/.flowr/flows/review-gate-flow.yaml b/smith/data/.flowr/flows/review-gate-flow.yaml
new file mode 100644
index 0000000..6271383
--- /dev/null
+++ b/smith/data/.flowr/flows/review-gate-flow.yaml
@@ -0,0 +1,63 @@
+flow: review-gate-flow
+version: 2.0.0
+exits:
+  - pass
+  - fail
+
+states:
+  - id: design-review
+    attrs:
+      description: "R verifies implementation aligns with domain model, follows DDD patterns, and respects architectural decisions"
+      owner: R
+      skills:
+        - review-design
+      in:
+        - domain_model.md
+        - glossary.md
+        - technical_design.md
+        - context_map.md
+        - system.md
+        - product_definition.md
+        - adr/*.md
+        - refactored_source
+      out:
+        - design_review_evidence
+    next:
+      pass: structure-review
+      fail: fail
+
+  - id: structure-review
+    attrs:
+      description: "R verifies test coverage, BDD example pass rate, test coupling, and behavior-vs-structure testing"
+      owner: R
+      skills:
+        - review-structure
+      in:
+        - coverage_reports
+        - test_output
+        - refactored_source
+        - features/<feature_name>.feature
+        - domain_model.md
+        - glossary.md
+      out:
+        - structure_review_evidence
+    next:
+      pass: conventions-review
+      fail: fail
+
+  - id: conventions-review
+    attrs:
+      description: "R verifies formatting, docstrings, type hints, import ordering, and lint rules unrelated to design"
+      owner: R
+      skills:
+        - review-conventions
+      in:
+        - linter_output
+        - refactored_source
+        - product_definition.md
+        - glossary.md
+      out:
+        - conventions_review_evidence
+    next:
+      pass: pass
+      fail: fail
\ No newline at end of file
diff --git a/smith/data/.flowr/flows/setup-project-flow.yaml b/smith/data/.flowr/flows/setup-project-flow.yaml
new file mode 100644
index 0000000..8411373
--- /dev/null
+++ b/smith/data/.flowr/flows/setup-project-flow.yaml
@@ -0,0 +1,89 @@
+flow: setup-project-flow
+version: 2.0.0
+exits: [initialized, cancelled]
+
+states:
+  - id: assess-requirements
+    attrs:
+      description: "Interview user to understand project needs and assess parameters"
+      owner: Setup Agent
+      skills:
+        - setup-assess
+      in: []
+      out:
+        - requirements_assessment
+    next:
+      assessed: configure-parameters
+      cancelled: cancelled
+
+  - id: configure-parameters
+    attrs:
+      description: "Gather and confirm project parameters based on assessment"
+      owner: Setup Agent
+      skills:
+        - setup-configure
+      in:
+        - requirements_assessment
+      out:
+        - template-config.yaml
+    conditions:
+      template_files_exist:
+        pyproject_toml: ==true
+        readme_md: ==true
+        github_workflows_ci_yml: ==true
+        license: ==true
+        tests_unit_main_test_py: ==true
+        app_directory: ==true
+    next:
+      confirmed:
+        to: apply-substitutions
+        when: template_files_exist
+      missing_files: cancelled
+
+  - id: apply-substitutions
+    attrs:
+      description: "Apply all text substitutions, rename package, reset version"
+      owner: Setup Agent
+      skills:
+        - setup-apply
+      in:
+        - template-config.yaml
+      out:
+        - pyproject.toml
+        - README.md
+        - tests/unit/main_test.py
+        - .github/workflows/ci.yml
+        - LICENSE
+        - template-config.yaml
+        - package_directory
+    conditions:
+      substitutions_successful:
+        no_stale_app_imports: ==true
+        package_renamed: ==true
+        version_reset: ==true
+    next:
+      applied:
+        to: verify-and-finalize
+        when: substitutions_successful
+      failed: cancelled
+
+  - id: verify-and-finalize
+    attrs:
+      description: "Verify transformations, clean template artifacts, finalize project"
+      owner: Setup Agent
+      skills:
+        - setup-verify
+      in:
+        - package_directory
+      out:
+        - git_remote
+    conditions:
+      verification_passed:
+        tests_pass: ==true
+        imports_valid: ==true
+        artifacts_cleaned: ==true
+    next:
+      initialized:
+        to: initialized
+        when: verification_passed
+      failed: cancelled
\ No newline at end of file
diff --git a/smith/data/.flowr/flows/tdd-cycle-flow.yaml b/smith/data/.flowr/flows/tdd-cycle-flow.yaml
new file mode 100644
index 0000000..297ce8c
--- /dev/null
+++ b/smith/data/.flowr/flows/tdd-cycle-flow.yaml
@@ -0,0 +1,51 @@
+flow: tdd-cycle-flow
+version: 2.0.0
+exits:
+  - all_green
+  - blocked
+
+states:
+  - id: red
+    attrs:
+      description: "SE writes a failing test body for one BDD example, specifying expected behavior before implementation exists"
+      owner: SE
+      skills:
+        - write-test
+      in:
+        - test_skeletons
+        - typed_source_stubs
+      out:
+        - test_implementations
+    next:
+      test_written: green
+      blocked: blocked
+
+  - id: green
+    attrs:
+      description: "SE writes the minimum production code needed to make the failing test pass"
+      owner: SE
+      skills:
+        - implement-minimum
+      in:
+        - test_implementations
+        - typed_source_stubs
+      out:
+        - source_implementations
+    next:
+      test_passes: refactor
+
+  - id: refactor
+    attrs:
+      description: "SE improves code structure while keeping all tests passing, then cycles to the next example or exits when all pass"
+      owner: SE
+      skills:
+        - refactor
+      in:
+        - source_implementations
+        - test_implementations
+      out:
+        - source_implementations
+        - refactored_source
+    next:
+      next_example: red
+      all_examples_pass: all_green
\ No newline at end of file
diff --git a/smith/data/.flowr/sessions/current.yaml b/smith/data/.flowr/sessions/current.yaml
new file mode 100644
index 0000000..4fc6167
--- /dev/null
+++ b/smith/data/.flowr/sessions/current.yaml
@@ -0,0 +1,10 @@
+flow: feature-development-flow
+state: post-mortem
+stack:
+  - flow: main-flow
+    state: feature-development
+  - flow: feature-development-flow
+    state: delivery
+params:
+  selected_feature: smith-commands
+  post_mortem: PM_20260501_missing-overwrite-flag
\ No newline at end of file
diff --git a/smith/data/.opencode/agents/design-agent.md b/smith/data/.opencode/agents/design-agent.md
new file mode 100644
index 0000000..d680cd9
--- /dev/null
+++ b/smith/data/.opencode/agents/design-agent.md
@@ -0,0 +1,10 @@
+---
+description: "Design Agent — creates and maintains brand identity, visual assets, and colour systems"
+mode: subagent
+temperature: 0.4
+---
+
+# Design Agent
+
+You are the Design Agent. You create and maintain the project's brand identity, visual assets, and colour systems.
+You follow monochrome-first, favicon-first design principles and validate all colours against WCAG contrast requirements.
\ No newline at end of file
diff --git a/smith/data/.opencode/agents/domain-expert.md b/smith/data/.opencode/agents/domain-expert.md
new file mode 100644
index 0000000..2dcddc4
--- /dev/null
+++ b/smith/data/.opencode/agents/domain-expert.md
@@ -0,0 +1,10 @@
+---
+description: "Domain Expert — facilitates discovery and models the domain"
+mode: subagent
+temperature: 0.3
+---
+
+# Domain Expert
+
+You are the Domain Expert. You facilitate discovery and model the domain.
+You are responsible for event storming, ubiquitous language, and domain modeling.
\ No newline at end of file
diff --git a/smith/data/.opencode/agents/product-owner.md b/smith/data/.opencode/agents/product-owner.md
new file mode 100644
index 0000000..03cea68
--- /dev/null
+++ b/smith/data/.opencode/agents/product-owner.md
@@ -0,0 +1,10 @@
+---
+description: "Product Owner — owns scope, requirements, and acceptance"
+mode: subagent
+temperature: 0.4
+---
+
+# Product Owner
+
+You are the Product Owner. You own what gets built and when.
+You are the sole decision-maker on feature priority, scope, and acceptance.
\ No newline at end of file
diff --git a/smith/data/.opencode/agents/reviewer.md b/smith/data/.opencode/agents/reviewer.md
new file mode 100644
index 0000000..f58475d
--- /dev/null
+++ b/smith/data/.opencode/agents/reviewer.md
@@ -0,0 +1,10 @@
+---
+description: "Reviewer — independently verifies architecture and implementation"
+mode: subagent
+temperature: 0.3
+---
+
+# Reviewer
+
+You are the Reviewer. You independently verify architecture and implementation.
+You are never the same agent who designed or built the work under review.
\ No newline at end of file
diff --git a/smith/data/.opencode/agents/setup-agent.md b/smith/data/.opencode/agents/setup-agent.md
new file mode 100644
index 0000000..170c921
--- /dev/null
+++ b/smith/data/.opencode/agents/setup-agent.md
@@ -0,0 +1,10 @@
+---
+description: "Setup Agent — transforms templates into new projects"
+mode: subagent
+temperature: 0.3
+---
+
+# Setup Agent
+
+You are the Setup Agent. You transform a template into a new project by assessing requirements and applying configured substitutions.
+You decide when parameters are appropriate and when the project transformation is complete.
\ No newline at end of file
diff --git a/smith/data/.opencode/agents/software-engineer.md b/smith/data/.opencode/agents/software-engineer.md
new file mode 100644
index 0000000..99c4972
--- /dev/null
+++ b/smith/data/.opencode/agents/software-engineer.md
@@ -0,0 +1,10 @@
+---
+description: "Software Engineer — implements, tests, and ships production code"
+mode: subagent
+temperature: 0.3
+---
+
+# Software Engineer
+
+You are the Software Engineer. You write production code that passes tests and meets acceptance criteria.
+You are responsible for implementation, TDD cycles, commits, and merge operations.
\ No newline at end of file
diff --git a/smith/data/.opencode/agents/system-architect.md b/smith/data/.opencode/agents/system-architect.md
new file mode 100644
index 0000000..97b6ccc
--- /dev/null
+++ b/smith/data/.opencode/agents/system-architect.md
@@ -0,0 +1,10 @@
+---
+description: "System Architect — designs technical architecture and reviews implementation"
+mode: subagent
+temperature: 0.3
+---
+
+# System Architect
+
+You are the System Architect. You design the technical architecture and review implementation.
+You are responsible for technical decisions, ADRs, and project structuring.
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/agent-design/principles.md b/smith/data/.opencode/knowledge/agent-design/principles.md
new file mode 100644
index 0000000..b608395
--- /dev/null
+++ b/smith/data/.opencode/knowledge/agent-design/principles.md
@@ -0,0 +1,100 @@
+---
+domain: agent-design
+tags: [agents, identity, subagents, separation-of-concerns]
+last-updated: 2026-04-29
+---
+
+# Agent Design Principles
+
+## Key Takeaways
+
+- Agents contain identity only (who I am, what I decide); the flow YAML is the source of truth for routing, skills, and artifacts.
+- Use subagents for investigation tasks that rapidly exhaust context; they quarantine token cost and prevent anchoring bias (Tversky & Kahneman, 1974).
+- Maintain a three-file separation (AGENTS.md, agents, skills) to prevent conflicting instructions from competing sources, positional attention degradation (Liu et al., 2023), and redundant content creating competing attention targets.
+- Agents are minimal — the flow determines which skill to load, the skill determines how to do the work, the knowledge provides the reference material.
+- AGENTS.md must discover, not enumerate — provide discovery commands and naming conventions, never file inventories that go stale.
+
+## Concepts
+
+**Agent = Identity Only**: The agent file defines who the agent is and what it decides. It does NOT contain skill lists, ownership tables, routing logic, artifact paths, or knowledge references. The flow YAML is the single source of truth for routing (owner, skills, transitions, artifacts). Duplicating any of these in the agent creates a second source of truth that will drift.
+
+**Three-File Separation**: Three failure modes observed in LLM context windows produce a three-file split:
+- **Conflicting instructions** from multiple sources — each concern has one file
+- **Positional attention degradation** (Liu et al., 2023 — middle content receives less attention) — keep files short
+- **Redundant content** creating competing attention targets — each fact in one location
+
+| Concern | File | Purpose | Loaded When |
+|---|---|---|---|
+| Navigation | `AGENTS.md` | Where files live, how to resolve wikilinks | Every session |
+| Identity | `.opencode/agents/*.md` | Who I am, what I decide | When role invoked |
+| Procedure | `.opencode/skills/*/SKILL.md` | Step-by-step instructions | On demand |
+| Reference | `.opencode/knowledge/*/` | What and why | On demand, via wikilinks |
+
+**Subagents for Investigation**: When a task requires extensive reading (auditing code, researching decisions), use a subagent with read-only or restricted permissions. Subagents quarantine token cost and prevent anchoring bias from the main conversation context.
+
+**Effective Instruction Writing**: Specific IF-THEN triggers at decision points are 2-3x more likely to execute than general intentions (Gollwitzer, 1999). But these triggers belong in the skill steps at the decision point, NOT in the agent file. The agent file is too far from the work context for triggers to be effective.
+
+**Discover, Don't Enumerate**: AGENTS.md must never enumerate files that can go stale. Instead, it provides discovery commands (`ls`, `find`) and file naming conventions so agents discover what exists at runtime. This prevents drift between documentation and reality — an inventory that lists 30 skills will be wrong the moment a skill is added or removed, but a discovery command is always correct.
+
+**Naming Distinction**: `AGENTS.md` (project root) is the navigation file loaded every session. `.opencode/agents/*.md` are agent identity files loaded on demand. Despite the similar names, they serve different purposes: AGENTS.md tells you where things are; agent files tell you who you are.
+
+**Research Notes Are Consultable, Not Session-Loaded**: Research notes in `docs/research/` are source material cited by knowledge files. They are not loaded every session. An agent consults them only when a knowledge file references them and more detail is needed.
+
+## Content
+
+### Agent File Format
+
+```markdown
+---
+description: "<role> — <one-line summary>"
+mode: subagent
+temperature: <0.3-0.7>
+---
+
+# <Role Name>
+
+You are the <Role Name>. <One-sentence identity>.
+<One-sentence decision authority>.
+```
+
+That is the entire agent. No skill lists, no ownership tables, no IF-THEN triggers, no knowledge references, no routing.
+
+### What NOT to Put in an Agent File
+
+- **Skill lists** — the flow `skills` field determines which skill to load
+- **Ownership tables** — the flow `input/edited/output_artifacts` defines what each state reads and writes
+- **Routing logic** — the flow `next` field defines transitions
+- **Knowledge references** — the skill's `## Load` section handles knowledge loading
+- **Step procedures** — skills contain procedure, agents contain identity
+- **Quality gates** — the flow `conditions` field defines gate conditions
+
+### AGENTS.md Is Navigation Only
+
+AGENTS.md is loaded every session. It should contain ONLY:
+- Where files live (project structure)
+- How to resolve wikilinks
+- Session protocol (use `flowr status`, `flowr advance`)
+- File naming conventions
+- Discovery commands (not file inventories)
+
+It must NOT contain quality gates, priority orders, step procedures, knowledge content, or file enumerations.
+
+### Naming Conventions
+
+| Path | Purpose | Loaded When |
+|---|---|---|
+| `/AGENTS.md` | Root navigation (where things are, how to discover them) | Every session |
+| `.opencode/agents/{role}.md` | Agent identity (who I am, what I decide) | When role invoked |
+| `.opencode/skills/{skill}/SKILL.md` | Skill procedure (step-by-step instructions) | On demand |
+| `.opencode/knowledge/{domain}/{concept}.md` | Knowledge reference (progressive disclosure) | On demand, via wikilinks |
+| `.templates/{path}.template` | Artifact templates | When creating artifacts |
+| `docs/research/{domain}/{concept}.md` | Research source notes (cited by knowledge files) | When knowledge file references them |
+| `docs/adr/ADR_YYYYMMDD_{slug}.md` | Architecture decision records | When referenced |
+
+Note: Despite similar names, `AGENTS.md` (root navigation) and `.opencode/agents/` (identity files) serve different purposes.
+
+## Related
+
+- [[skill-design/principles]]
+- [[knowledge-design/principles]]
+- [[workflow/flowr-spec]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/architecture/adr.md b/smith/data/.opencode/knowledge/architecture/adr.md
new file mode 100644
index 0000000..2bda578
--- /dev/null
+++ b/smith/data/.opencode/knowledge/architecture/adr.md
@@ -0,0 +1,79 @@
+---
+domain: architecture
+tags: [adr, architecture-decision-records, decision-making]
+last-updated: 2026-04-29
+---
+
+# Architecture Decision Records
+
+## Key Takeaways
+
+- ADRs document architecturally significant decisions — decisions that are hard to change and affect multiple components (Nygard, 2011).
+- Each ADR follows a fixed structure: Status, Context, Decision, Reason, Alternatives, Consequences (Nygard, 2011).
+- ADRs are append-only — once written, they are never edited. Superseded ADRs get a new "Superseded by" reference, not a revision.
+- ADRs must be consistent with feature requirements — every ADR should reference the `@id` criteria it addresses.
+- ADR risk assessment uses Probability × Impact classification (Boehm, 1991) to prioritise mitigation effort on the highest-exposure risks.
+
+## Concepts
+
+**Architecturally Significant** — A decision is architecturally significant if it affects multiple components, is hard to reverse, or constrains future choices (Nygard, 2011; Fowler, 2003). Choosing a database is architecturally significant. Choosing a variable name is not. When in doubt, write the ADR.
+
+**ADR Structure** — Every ADR contains (Nygard, 2011): Status (Proposed, Accepted, Deprecated, Superseded), Context (the forces at play, the problem being solved), Decision (the choice made), Reason (why this choice over alternatives), Alternatives (other options considered and why they were rejected), Consequences (what changes because of this decision, both positive and negative).
+
+**Append-Only Discipline** — ADRs capture the decision as it was made at the time. If understanding changes, write a new ADR that supersedes the old one. This preserves the history of architectural reasoning and prevents retroactive justification.
+
+**ADR Consistency** — Every ADR must be consistent with the feature requirements it addresses. During review, check that each ADR aligns with the `@id` criteria in the feature file. An ADR that contradicts a requirement is a signal that either the ADR or the requirement needs updating. Architecture review is adversarial — the reviewer actively seeks inconsistencies and gaps, leveraging accountability to an unknown audience (Tetlock, 1985) to produce more rigorous decisions.
+
+**Risk Assessment** (Boehm, 1991) — Each ADR's Risk Assessment table uses Probability × Impact to classify and prioritise risks. Probability (Low/Medium/High) estimates how likely the risk is to materialise. Impact (Low/Medium/High) estimates how severe the consequence would be. Risks with High Probability and High Impact demand explicit mitigations or rejection of the decision. Risks with Low Probability and Low Impact may be accepted without mitigation. Risk leverage — the ratio of risk reduction to mitigation cost — helps prioritise which mitigations to invest in first.
+
+## Content
+
+### ADR Template Fields
+
+| Field | Content |
+|---|---|
+| Status | Proposed, Accepted, Deprecated, or Superseded |
+| Context | What is the issue that we're seeing that is motivating this decision? |
+| Decision | What is the change that we're proposing/making? |
+| Reason | Why is this the best choice given the alternatives? |
+| Alternatives | What other choices were considered and why were they rejected? |
+| Consequences | What becomes easier or harder to do because of this change? |
+
+### When to Write an ADR
+
+- Choosing a framework, library, or database
+- Choosing an architectural style (monolith, microservices, event-driven)
+- Choosing a communication pattern (sync HTTP, async events, gRPC)
+- Choosing a data storage strategy (SQL, NoSQL, event sourcing)
+- Choosing a deployment strategy (container, serverless, bare metal)
+- Introducing a new bounded context boundary
+- Changing a cross-cutting concern (authentication, logging, error handling)
+
+### Risk Assessment Classification
+
+| Probability \ Impact | Low | Medium | High |
+|---|---|---|---|
+| **High** | Monitor | Mitigate | Mitigate or Reject |
+| **Medium** | Accept | Monitor | Mitigate |
+| **Low** | Accept | Accept | Monitor |
+
+- **Mitigate**: Explicit mitigation strategy required before accepting the ADR
+- **Monitor**: Flag for future review; no immediate action needed
+- **Accept**: Risk is acceptable; document in ADR
+- **Reject**: Risk is too high; reconsider the decision or choose an alternative
+
+Risk leverage prioritises mitigations with the highest ratio of risk reduction to mitigation cost (Boehm, 1991).
+
+### When NOT to Write an ADR
+
+- Choosing a variable name
+- Choosing a code style (use project conventions)
+- Choosing a test framework (use project conventions)
+- Any decision that is easily reversible and affects only one component
+
+## Related
+
+- [[architecture/assessment]]
+- [[architecture/technical-design]]
+- [[architecture/reconciliation]]
+- [[architecture/quality-attributes]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/architecture/assessment.md b/smith/data/.opencode/knowledge/architecture/assessment.md
new file mode 100644
index 0000000..d2716d8
--- /dev/null
+++ b/smith/data/.opencode/knowledge/architecture/assessment.md
@@ -0,0 +1,69 @@
+---
+domain: architecture
+tags: [architecture, delivery-mechanism, bounded-contexts, hexagonal-architecture]
+last-updated: 2026-04-29
+---
+
+# Architecture Assessment
+
+## Key Takeaways
+
+- Delivery mechanism is the boundary between the domain and the outside world (Cockburn, 2005) — HTTP, CLI, message queue, etc. — it must be verified against the product definition before designing anything.
+- Architecture exists when system.md, technical_design.md, and context_map.md all contain meaningful content aligned with the current domain.
+- If architecture exists but delivery mechanism mismatches, record it as an ADR before proceeding.
+- Hexagonal architecture (Ports & Adapters — Cockburn, 2005) keeps the domain independent of delivery mechanism — verify this is followed.
+- SA conducts an assessment interview to verify and correct quality attributes, deployment constraints, and hidden requirements before routing.
+
+## Concepts
+
+**Delivery Mechanism Verification** — Before designing a feature, the architect must verify that the delivery mechanism stated in the product definition (e.g., "web application", "CLI tool", "API service") matches the actual codebase implementation. A mismatch (e.g., product says "web" but codebase is CLI) must be recorded as an ADR and resolved before proceeding. This checkpoint prevents building on a foundation that doesn't match the product's intent.
+
+**Architecture Existence Check** — Architecture is considered to exist when three documents contain meaningful, aligned content: system.md (current state snapshot), technical_design.md (technical decisions), and context_map.md (bounded context relationships). Empty or placeholder content does not count. If all three exist and are coherent, the architect evaluates whether the existing architecture covers the new feature or needs updating.
+
+**Hexagonal Architecture (Ports & Adapters — Cockburn, 2005)** — The domain core must not depend on infrastructure. Ports define what the domain needs; adapters provide concrete implementations. When reviewing architecture, verify that external dependencies (databases, frameworks, APIs) are behind Protocol interfaces, not directly referenced in domain code.
+
+**Assessment Interview** — The SA interviews the stakeholder to surface information not captured in the artifacts. Topics: quality attribute priorities (are the documented priorities accurate and complete?), deployment constraints (does the deployment section match reality?), hidden requirements (constraints not captured in the artifacts), and architecture gaps (does the current system fail to cover anything needed?). Apply gap-finding techniques from [[requirements/interview-techniques]]: use CIT to probe for specific past failures, use Laddering to climb from surface preferences to real constraints. Apply a pre-mortem from [[requirements/pre-mortem]]: "Imagine this architecture is built exactly as designed, all tests pass, but it fails in production — what would be missing?" Corrections are written into existing artifacts (product_definition.md), not into separate interview notes.
+
+## Content
+
+### Delivery Mechanism Checkpoint
+
+The delivery mechanism is the outermost layer — how users or systems interact with the product. Common delivery mechanisms:
+
+- Web application (HTTP server, browser-based)
+- CLI tool (terminal interface)
+- API service (REST, GraphQL, gRPC)
+- Desktop application (GUI)
+- Library/SDK (programmatic interface)
+
+When assessing architecture for a new feature:
+
+1. Read the product definition's deployment section
+2. Verify the codebase's actual entry points match
+3. If mismatched, create an ADR documenting the discrepancy and the resolution path
+4. Only proceed with technical design after the delivery mechanism is verified
+
+### Architecture Existence Decision Tree
+
+| Condition | Routing |
+|---|---|
+| No architecture documents exist | Needs full architecture (technical-design) |
+| Architecture exists, covers the feature | No architecture needed (proceed to planning) |
+| Architecture exists, needs updating for this feature | Needs context update (context-map then technical-design) |
+| Architecture exists, fundamental gap discovered | Needs discovery (back to discovery flow) |
+
+### Hexagonal Architecture Verification
+
+When reviewing existing architecture:
+
+- Every external dependency must have a Protocol (interface) in the domain layer
+- The domain layer must have zero imports from infrastructure packages
+- Adapters must implement domain-defined Ports, not the other way around
+- If the domain references a concrete technology, it's a violation
+
+## Related
+
+- [[architecture/quality-attributes]]
+- [[architecture/reconciliation]]
+- [[requirements/pre-mortem]]
+- [[workflow/flowr-spec]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/architecture/contract-design.md b/smith/data/.opencode/knowledge/architecture/contract-design.md
new file mode 100644
index 0000000..20e3c69
--- /dev/null
+++ b/smith/data/.opencode/knowledge/architecture/contract-design.md
@@ -0,0 +1,72 @@
+---
+domain: architecture
+tags: [api-contracts, event-contracts, interfaces, rest, hexagonal-architecture]
+last-updated: 2026-04-29
+---
+
+# Contract Design
+
+## Key Takeaways
+
+- API contracts, event contracts, and interface definitions are the boundaries between modules — design them before implementation (contract-first design).
+- REST constraints (Fielding, 2000) define API contracts as resource shapes and media types, not procedure calls — the contract is what data a resource contains and how it can transition, not a method signature.
+- Event contracts must specify not just payload schema but also ordering guarantees, delivery semantics, and error handling (Hohpe & Woolf, 2003).
+- Interface definitions (Protocol/abstract classes) in the domain layer define what the domain needs; infrastructure implements them — the dependency arrow always points inward (Cockburn, 2005; Evans, 2003).
+
+## Concepts
+
+**Contract-First Design** — Define the boundaries between modules before implementing them. API contracts specify request/response shapes, error codes, authentication, and versioning. Event contracts specify event names, payload schemas, ordering guarantees, and delivery semantics. Interface definitions specify the operations the domain requires without specifying how they are implemented. All three contract types are living documents that evolve with the system but must be versioned to maintain backward compatibility.
+
+**REST and API Contracts** (Fielding, 2000) — REST defines API contracts through resources (identified by URIs), representations (media types like JSON Schema), and standard methods (GET, POST, PUT, DELETE). The Uniform Interface constraint means the client only needs to understand media types and standard methods, not server implementation details. API contracts should specify: resource paths, request/response schemas, error response formats, authentication requirements, and rate limits.
+
+**Event Contracts** (Hohpe & Woolf, 2003) — Asynchronous messaging between systems requires explicit contracts covering: payload schema (event type, aggregate ID, timestamp, data fields), ordering guarantees (per-sender FIFO, causal ordering, or none), delivery semantics (at-most-once, at-least-once, exactly-once), and error handling (dead letter channels, retry policies, circuit breakers). Event contracts decouple time (producer and consumer don't need to be available simultaneously) and schema (each system retains its own model through translation layers).
+
+**Interface Definitions** — In hexagonal architecture (Cockburn, 2005), the domain layer defines Protocol interfaces (ports) that specify what operations the domain needs. Infrastructure adapters implement these ports. The domain never imports from infrastructure — the dependency arrow always points inward (infrastructure → application → domain). Interface definitions must specify: method signatures, parameter types, return types, error types, and preconditions.
+
+## Content
+
+### API Contract Specification
+
+Every API endpoint must document:
+
+| Element | Content |
+|---|---|
+| Path | `/<resource>/<id>` with path parameters |
+| Method | GET, POST, PUT, DELETE |
+| Request schema | Fields, types, required/optional, validation rules |
+| Response schema | Fields, types, status codes |
+| Error responses | Error code, message, retry guidance |
+| Authentication | Required auth mechanism |
+| Versioning | Header or URL-based version strategy |
+
+### Event Contract Specification
+
+Every event must document:
+
+| Element | Content |
+|---|---|
+| Event type | Past-tense domain event name (e.g., `OrderPlaced`) |
+| Payload schema | Fields, types, required/optional |
+| Ordering | None, per-sender FIFO, or causal |
+| Delivery | At-most-once, at-least-once, or exactly-once |
+| Error handling | Dead letter channel, retry policy, circuit breaker |
+| Produced by | Module/context that emits the event |
+| Consumed by | Module/context that handles the event |
+
+### Interface Definition Specification
+
+Every domain port must document:
+
+| Element | Content |
+|---|---|
+| Port name | Domain operation name (e.g., `PaymentGateway`) |
+| Methods | Method signatures with parameter and return types |
+| Errors | Domain error types the method can raise |
+| Preconditions | Conditions that must hold before calling |
+| Implementations | Which infrastructure adapter implements this port |
+
+## Related
+
+- [[architecture/technical-design]]
+- [[architecture/quality-attributes]]
+- [[domain-modeling/context-mapping]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/architecture/quality-attributes.md b/smith/data/.opencode/knowledge/architecture/quality-attributes.md
new file mode 100644
index 0000000..79e62ed
--- /dev/null
+++ b/smith/data/.opencode/knowledge/architecture/quality-attributes.md
@@ -0,0 +1,60 @@
+---
+domain: architecture
+tags: [quality-attributes, architectural-styles, trade-offs, ATAM]
+last-updated: 2026-04-29
+---
+
+# Quality Attributes
+
+## Key Takeaways
+
+- Quality attributes — not functional requirements — drive architectural decisions (Bass, Clements & Kazman, 2021).
+- Six architecturally significant quality attribute categories: Performance, Availability, Security, Modifiability, Reliability, and Usability (Bass et al., 2021).
+- Architectural style selection must be justified against quality attribute priorities, not personal preference — each style optimises for different attributes.
+- Quality attributes often conflict — optimising for Performance may harm Modifiability; the utility tree method (ATAM) forces explicit prioritisation with business value justification.
+
+## Concepts
+
+**Quality Attributes as Architectural Drivers** (Bass et al., 2021) — Quality attributes are measurable properties of a system's architecture, distinct from functional requirements (what the system does). Performance, Availability, and Security constrain the architecture; Modifiability, Reliability, and Usability shape its flexibility. Each quality attribute produces concrete architectural tactics that directly affect module structure, dependency direction, and communication patterns.
+
+**Quality Attribute Conflicts** — Performance (fast response, low latency) often conflicts with Modifiability (abstraction layers, indirection). Security (encryption, validation) often conflicts with Performance (overhead). Availability (redundancy, failover) often conflicts with cost constraints. The architect must prioritise which attributes matter most for the business and make trade-offs explicitly, documented as ADRs.
+
+**ATAM Utility Tree** — The Architecture Tradeoff Analysis Method provides a structured way to prioritise quality attributes: stakeholders rank attribute scenarios by business value (High/Medium/Low) and by architectural difficulty (High/Medium/Low). The intersection produces a prioritised set of scenarios that the architecture must address first. This prevents architects from over-engineering for low-value attributes or under-engineering for high-value ones.
+
+**Architectural Tactics** — Each quality attribute has a set of design tactics that directly address it: Performance uses resource arbitration, concurrency, and caching; Availability uses redundancy, fault detection, and recovery; Modifiability uses encapsulation, substitution, and binding time. Tactics are the building blocks that architects combine into architectural styles.
+
+## Content
+
+### Quality Attribute Taxonomy
+
+| Category | Definition | Key Tactics |
+|---|---|---|
+| Performance | Response time and throughput under load | Caching, concurrency, resource pooling, load balancing |
+| Availability | System uptime and fault tolerance | Redundancy, failover, circuit breaker, health checks |
+| Security | Protection against unauthorised access and data breaches | Authentication, authorisation, encryption, audit logging |
+| Modifiability | Ease of changing the system without side effects | Encapsulation, substitution, dependency inversion, binding time |
+| Reliability | Correct operation over time under stated conditions | Input validation, checksums, transaction boundaries, retry |
+| Usability | Ease of use for end users | Consistent UI patterns, clear error messages, progressive disclosure |
+
+### Architectural Styles and Quality Attributes
+
+| Style | Optimises For | Trades Off |
+|---|---|---|
+| Monolith | Simplicity, fast time-to-market, low-latency intra-module calls | Independent deployment, team autonomy |
+| Microservices | Independent deployment, team autonomy, fault isolation | Operational complexity, inter-service latency |
+| Event-driven | Loose coupling, async processing, scalability | Eventual consistency, debugging complexity |
+| Serverless | Cost optimisation (pay-per-use), auto-scaling | Cold starts, vendor lock-in, debugging difficulty |
+| Hexagonal | Testability, domain isolation, delivery-mechanism independence | Indirection overhead, architectural discipline required |
+
+### Quality Attributes in Architecture Documents
+
+When documenting quality attributes in `technical_design.md`:
+- Each attribute must link to an architectural decision that addresses it
+- Each architectural decision must link to an ADR
+- Priority order must be explicit (which attribute wins when they conflict)
+
+## Related
+
+- [[architecture/technical-design]]
+- [[architecture/adr]]
+- [[architecture/assessment]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/architecture/reconciliation.md b/smith/data/.opencode/knowledge/architecture/reconciliation.md
new file mode 100644
index 0000000..e25865a
--- /dev/null
+++ b/smith/data/.opencode/knowledge/architecture/reconciliation.md
@@ -0,0 +1,57 @@
+---
+domain: architecture
+tags: [reconciliation, cross-document-consistency, adversarial-review, verification]
+last-updated: 2026-04-29
+---
+
+# Reconciliation
+
+## Key Takeaways
+
+- Reconciliation is an adversarial cross-document consistency check — the reviewer actively seeks inconsistencies, not confirms consistency (Tetlock, 1985).
+- Five cross-document consistency checks verify alignment: system↔glossary, system↔feature, ADRs↔feature, glossary↔feature, product_definition↔scope.
+- Every inconsistency is a signal that either the architecture or the requirements need updating — the reviewer does not decide which side changes, only that a mismatch exists.
+- Reconciliation gates prevent forward progress until all five checks pass; this is the last chance to catch misalignment before implementation begins.
+
+## Concepts
+
+**Adversarial Reconciliation** — The reviewer's default hypothesis is that inconsistencies exist. Leveraging accountability to an unknown audience (Tetlock, 1985), the reviewer actively searches for mismatches rather than confirming alignment. This adversarial stance produces more rigorous verification than cooperative review because it prevents confirmation bias — the tendency to see what we expect rather than what is actually there.
+
+**Five Cross-Document Consistency Checks** — Each check compares two documents and verifies that their models, terms, and requirements align. A mismatch in any check is a hard blocker: the architecture must be corrected, or the requirements must be revised, before implementation can proceed.
+
+**Reconciliation Gate** — The reconciliation gate sits between architecture review and implementation. It is the last point where misalignment can be caught cheaply. After this gate, code is written against the architecture, and fixing misalignment becomes exponentially more expensive.
+
+## Content
+
+### The Five Checks
+
+| # | Check | Verify | Mismatch Signal |
+|---|---|---|---|
+| 1 | system ↔ glossary | Every glossary term matches how it is used in system.md Domain Model | A term defined in the glossary is used with a different meaning in the domain model |
+| 2 | system ↔ feature | Every entity, action, and relationship in system.md Domain Model matches feature requirements | An entity appears in the domain model but not in any feature, or vice versa |
+| 3 | ADRs ↔ feature | Every ADR aligns with feature requirements; each ADR references specific `@id` criteria | An ADR contradicts a feature requirement, or a feature requirement has no ADR addressing it |
+| 4 | glossary ↔ feature | Every domain term in the feature file matches its glossary definition | A term used in the feature has no glossary entry, or the glossary definition contradicts the feature's usage |
+| 5 | product_definition ↔ scope | Scope in the product definition stays within the stated boundaries (what_is, what_is_not, out_of_scope) | A feature requirement exceeds the product definition's stated scope |
+
+### Mismatch Resolution
+
+When a mismatch is found:
+
+1. **Record the mismatch**: Which two documents, which specific items, and how they disagree.
+2. **Determine which side changes**: If the architecture is wrong, update system.md, technical_design.md, or the ADR. If the requirements are wrong, update the feature file or product definition.
+3. **Update both documents**: Ensure the correction is reflected in all affected documents.
+4. **Re-run the affected check**: Verify the mismatch is resolved.
+
+### Reviewer Stance Declaration
+
+Before performing reconciliation, the reviewer declares:
+
+- Adversarial stance: "I will actively search for inconsistencies, not confirm consistency."
+- Boundary check: "I will verify every cross-document relationship, not just the ones that seem obvious."
+- Semantic read: "I will read for meaning, not just surface-level keyword matching."
+
+## Related
+
+- [[architecture/adr]]
+- [[architecture/assessment]]
+- [[requirements/pre-mortem]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/architecture/technical-design.md b/smith/data/.opencode/knowledge/architecture/technical-design.md
new file mode 100644
index 0000000..eb4869c
--- /dev/null
+++ b/smith/data/.opencode/knowledge/architecture/technical-design.md
@@ -0,0 +1,61 @@
+---
+domain: architecture
+tags: [technical-design, architectural-styles, c4-diagrams, hexagonal-architecture, module-structure]
+last-updated: 2026-04-29
+---
+
+# Technical Design
+
+## Key Takeaways
+
+- Architectural style must be selected based on quality attributes and deployment constraints — not personal preference.
+- C4 diagrams provide four levels of abstraction (Brown, 2018): Context (system in environment), Container (deployable units), Component (modules within containers), Code (classes and functions).
+- Module structure follows separation of concerns — domain logic must not depend on infrastructure (Cockburn, 2005; Evans, 2003).
+- API contracts, event contracts, and interface definitions are the boundaries between modules — design them before implementation.
+
+## Concepts
+
+**Architectural Styles** — Common styles and when to choose them: Monolith (single deployment, simple ops, low latency between modules), Microservices (independent deployment, team autonomy, high operational complexity), Event-driven (loose coupling, eventual consistency, async workflows), Serverless (pay-per-use, auto-scaling, cold starts), Hexagonal/Ports & Adapters (testability, domain isolation, delivery-mechanism independence).
+
+**C4 Diagrams** (Brown, 2018) — Four levels of architectural visualization: Context (actors and external systems), Container (deployable units and their tech stacks), Component (internal modules and their responsibilities), Code (individual classes — rarely needed). Always start with Context, then Container. Component diagrams are optional. Code diagrams are rarely necessary.
+
+**Module Structure** — Organize by bounded context first (Evans, 2003), then by layer (domain, application, infrastructure). Domain layer has zero infrastructure imports. Application layer orchestrates use cases. Infrastructure layer implements external concerns. The dependency arrow always points inward: infrastructure → application → domain (Clean Architecture — Martin, 2012; Hexagonal Architecture — Cockburn, 2005).
+
+**Contract-First Design** — Define the boundaries before the implementation: API contracts (request/response shapes, error codes, authentication), Event contracts (event names, payload schemas, ordering guarantees), Interface definitions (Protocol/abstract classes that the domain defines and infrastructure implements).
+
+## Content
+
+### Architectural Style Selection
+
+| Quality Attribute Priority | Recommended Style |
+|---|---|
+| Simplicity, fast time-to-market | Monolith |
+| Team autonomy, independent scaling | Microservices |
+| Loose coupling, async workflows | Event-driven |
+| Cost optimization, variable load | Serverless |
+| Testability, domain isolation | Hexagonal |
+
+Hybrid approaches are valid: a monolith with hexagonal internals, or microservices with event-driven communication between them.
+
+### C4 Diagram Guidelines
+
+- Context diagram: always include — shows the system boundary and external actors
+- Container diagram: always include — shows deployable units and tech choices
+- Component diagram: include when module structure is non-trivial
+- Code diagram: only for complex algorithms or critical paths
+
+### Module Structure Template
+
+```
+feature_name/
+  domain/        # Business logic, zero infrastructure imports
+  application/   # Use case orchestration
+  infrastructure/ # External concerns (DB, HTTP, queues)
+  api/           # Delivery mechanism (routes, serializers)
+```
+
+## Related
+
+- [[architecture/assessment]]
+- [[architecture/quality-attributes]]
+- [[architecture/contract-design]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/design/color-systems.md b/smith/data/.opencode/knowledge/design/color-systems.md
new file mode 100644
index 0000000..7c0ecd7
--- /dev/null
+++ b/smith/data/.opencode/knowledge/design/color-systems.md
@@ -0,0 +1,126 @@
+---
+domain: design
+tags: [color, palette, wcag, contrast, accessibility, hue-semantics, saturation, value, colour-harmony]
+last-updated: 2026-04-30
+---
+
+# Color Systems for Project Branding
+
+## Key Takeaways
+
+- Design monochrome-first; add colour only after the shape holds identity in pure black on white (Rand, 1985; Kare, 1984).
+- Use 1–2 colours maximum in a logo mark; three or more create reproduction problems and visual noise at small sizes.
+- Hue is one axis; saturation (vivid vs muted) and value (light vs dark) are independent levers that carry as much meaning. A desaturated blue signals "corporate"; a saturated blue signals "tech/digital."
+- Every text–background pair must meet WCAG 2.1 SC 1.4.3 minimum contrast: 4.5:1 for normal text, 3:1 for large text (W3C, 2018).
+- Verify colours on actual backgrounds, not in isolation — simultaneous contrast shifts perceived hue (Itten, 1961; Albers, 1963).
+- Define each brand colour with: hex value, RGB, dark-mode counterpart, and WCAG contrast ratio against primary backgrounds.
+- Choose colour harmony type based on emotional effect: analogous for calm, complementary for vibrancy, split-complementary for balanced contrast, triadic for energy.
+
+## Concepts
+
+**Monochrome-First Process**: Design the entire mark in black on white, then white on black. If it does not work in monochrome, colour will not save it. Only after the shape holds identity in one colour should a second colour be introduced — and only as an accent, never carrying meaning that must be read.
+
+**Hue, Saturation, and Value as Independent Axes**: Hue (which colour) is one dimension. Saturation (how vivid vs muted) and value (how light vs dark) carry as much meaning as hue and are independent levers. A single hue can express different personalities by varying saturation and value:
+- High saturation + medium value: "digital, energetic, modern" (e.g., #3B82F6)
+- Low saturation + medium value: "corporate, professional, subdued" (e.g., #6B7280)
+- High saturation + dark value: "premium, deep, authoritative" (e.g., #1E3A5F)
+- Low saturation + light value: "calm, approachable, subtle" (e.g., #DBEAFE)
+
+Choose personality → hue → saturation/value, not hue first then wonder why it doesn't feel right.
+
+**Hue Semantics**: Colours carry cultural associations. Blue signals trust and stability (most common in tech). Green signals growth and nature. Red signals energy and urgency. Orange signals creativity and enthusiasm. Purple signals innovation and premium quality. Yellow signals optimism and warmth. Choose a primary hue that reinforces the project's personality adjectives, not one that clashes with them.
+
+**Saturation–Value Personality Map**: The same hue communicates different personalities depending on saturation and value:
+
+| Personality | Hue | Saturation | Value | Example hex |
+|-------------|-----|-----------|-------|------------|
+| Precise, technical | Blue | Medium | Dark | #1E3A5F |
+| Trustworthy, stable | Blue | Medium | Medium | #3B82F6 |
+| Bold, disruptive | Red | High | Medium | #DC2626 |
+| Warm, creative | Orange | High | Medium | #F97316 |
+| Calm, approachable | Green | Low–Medium | Light–Medium | #86EFAC |
+| Premium, innovative | Purple | Medium | Dark | #7C3AED |
+
+**Colour Harmony Types**: Choose the harmony type based on the emotional effect you want:
+
+| Harmony type | Wheel relationship | Angle | Effect | Use when |
+|-------------|-------------------|-------|--------|----------|
+| Complementary | Opposite on wheel | 180° | Maximum contrast, vibrant tension | Accent needs to pop against primary |
+| Split-complementary | Complement ± 30° | 150° + 30° | Balanced contrast, less harsh than direct complement | Most versatile for branding |
+| Analogous | Adjacent on wheel | ± 30° | Calm, harmonious, unified | Personality is "calm, cohesive, subtle" |
+| Triadic | Evenly spaced 3 | 120° | Energetic, diverse, playful | Need 3 distinct brand colours |
+| Tetradic (square) | Evenly spaced 4 | 90° | Complex, rich, hard to balance | Only with experienced colour sense |
+
+For 2-colour brand marks, use complementary or split-complementary. Analogous palettes lack enough contrast for accent visibility. Triadic requires 3 colours, violating the 2-colour maximum rule.
+
+**Itten's Seven Contrast Types**: Each contrast type is a design tool that produces a different emotional effect (Itten, 1961). The three most useful for branding:
+
+1. **Light-Dark contrast**: Black on white. Maximum clarity. Foundation of WCAG accessibility. Use for text–background pairs where legibility is paramount.
+2. **Complementary contrast**: Opposite hues placed together (red–green, blue–orange). Maximum visual tension and vibrancy. Risk: at similar saturation, complements vibrate uncomfortably. Mitigation: vary the value (one lighter/darker) or desaturate one.
+3. **Saturation contrast**: A vivid colour next to a muted one. The vivid colour appears to glow. This is the primary tool for accent hierarchy — a saturated accent on a desaturated primary draws the eye precisely without requiring hue contrast.
+
+The other four (cold-warm, simultaneous, hue, extension) are documented in the research (see `docs/research/design/visual/itten_1961.md`) and are useful for advanced palette refinement.
+
+**Complementary Palette Construction**: A brand palette has 5 roles: (1) primary — the dominant colour; (2) accent — a contrasting highlight; (3) background — the surface colour; (4) text-primary — the main text colour; (5) text-secondary — muted text. Primary and accent are typically complementary or split-complementary. Background and text colours must achieve ≥4.5:1 contrast (WCAG AA).
+
+**WCAG Contrast Calculation**: Relative luminance L = 0.2126R + 0.7152G + 0.0722B (after gamma linearisation). Contrast ratio = (L_lighter + 0.05) / (L_darker + 0.05). Ratio ranges from 1:1 to 21:1. Normal text requires ≥4.5:1 (AA) or ≥7:1 (AAA). Large text (≥18pt or ≥14pt bold) requires ≥3:1 (AA) or ≥4.5:1 (AAA).
+
+**Dark-Mode Counterparts**: For each light-theme colour, define a dark-theme counterpart that maintains the same relative visual weight and contrast ratio. Do not simply invert (white-on-black is harsh). Use off-white (#e0e0e0) on dark backgrounds instead of pure white, and adjust accent saturation for dark contexts.
+
+**Simultaneous Contrast**: A neutral grey on a red background appears greenish; on a green background it appears reddish (Itten, 1961). Always test brand colours against both light and dark backgrounds before finalising. Adjust hex values to compensate for the perceptual shift, not the theoretical value.
+
+## Content
+
+### Hue Semantics Table
+
+| Hue | Signal | Common In | Avoid If |
+|-----|--------|-----------|----------|
+| Blue | Trust, stability, professionalism | Tech, finance, enterprise | Project personality is "bold, disruptive" |
+| Green | Growth, nature, health | Environment, fintech, health | Project personality is "precise, minimal" |
+| Red | Energy, urgency, danger | News, entertainment, alerts | Project personality is "calm, reliable" |
+| Orange | Creativity, enthusiasm, warmth | Creative tools, education | Project personality is "serious, formal" |
+| Purple | Innovation, premium, luxury | Design tools, premium SaaS | Project personality is "accessible, simple" |
+| Yellow | Optimism, warmth, caution | Children's products, warnings | Used as small accent only (low contrast on white) |
+| Teal | Balance, sophistication | Health tech, lifestyle | Combined with similar-value greens |
+| Grey | Neutrality, professionalism | Enterprise, documentation | Used as primary (no personality signal) |
+
+### Complementary Pair Examples
+
+| Primary | Accent | Relationship | Contrast on White |
+|---------|--------|-------------|-------------------|
+| #1a1a2e (dark navy) | #e94560 (warm red) | Split-complementary | Navy 15.7:1, Red 4.7:1 |
+| #2d5016 (forest green) | #c9a84c (antique gold) | Split-complementary | Green 7.2:1, Gold 3.1:1 (large text only) |
+| #0f3460 (mid blue) | #e94560 (warm red) | Triadic | Blue 11.4:1, Red 4.7:1 |
+| #3b2410 (deep brown) | #7baabf (steel blue) | Complementary | Brown 14.2:1, Blue 3.7:1 |
+
+### Visual Weight Proportions (Itten's Contrast of Extension)
+
+When balancing colour areas in a composition, visual weight depends on inherent brightness:
+
+| Colour pair | Visual weight ratio (area for equal perceived weight) |
+|------------|------------------------------------------------------|
+| Yellow : Violet | 1 : 3 |
+| Orange : Blue | 1 : 2 |
+| Red : Green | 1 : 1 |
+| Yellow : Orange | 1 : 1.5 |
+| Light grey : Dark navy | 1 : 2 |
+
+A thin gold line on a navy field reads as "balanced" because the yellow's visual weight per unit area is 3× the violet's.
+
+### WCAG Contrast Verification Checklist
+
+For each colour in the palette, verify contrast ratio against:
+
+1. **Primary on background** — must be ≥4.5:1 for normal text
+2. **Accent on background** — must be ≥3:1 for large text or ≥4.5:1 if carrying meaning
+3. **Secondary on background** — must be ≥3:1 minimum
+4. **Dark-mode primary on dark background** — must be ≥4.5:1
+5. **Dark-mode accent on dark background** — must be ≥3:1
+6. **Logo mark on white** — must be clearly legible (no numeric threshold, but test by squinting)
+7. **Logo mark on dark** — must be clearly legible in dark mode variant
+
+## Related
+
+- [[design/project-assets]]
+- [[design/identity-design]]
+- [[design/visual-harmony]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/design/identity-design.md b/smith/data/.opencode/knowledge/design/identity-design.md
new file mode 100644
index 0000000..be68c49
--- /dev/null
+++ b/smith/data/.opencode/knowledge/design/identity-design.md
@@ -0,0 +1,36 @@
+---
+domain: design
+tags: [brand, identity, naming, interview, personality, logo-type]
+last-updated: 2026-04-30
+---
+
+# Identity Design
+
+## Key Takeaways
+
+- Brand identity has five components: name, tagline, personality (3 adjectives), visual mark, and wording rules. All are captured in `docs/branding.md`.
+- Choose logo type based on project recognition and visual metaphor: combination mark for new brands, abstract mark for established names, pictogram for strong metaphors, letterform for compact marks.
+- Release naming convention lives in `docs/branding.md` under the Release Naming section — it is part of the brand identity, not separate from it.
+- Wording rules (words to avoid, words to prefer) are brand identity constraints, not style preferences — they prevent the brand voice from drifting.
+- The interview for brand identity uses [[requirements/interview-techniques#key-takeaways]] but focuses on personality, visual metaphor, and wording rather than requirements.
+
+## Concepts
+
+**Brand Identity Components**: A project's brand identity is the combination of (1) name — the project identifier; (2) tagline — one sentence describing what the project does; (3) personality — 3 adjectives that define tone and visual style; (4) visual mark — logo, banner, colour palette; (5) wording rules — words to avoid and prefer. These are captured in `docs/branding.md` per the template at `.templates/docs/branding/branding.md.template`.
+
+**Logo Type Selection**: Four logo types are appropriate for open-source projects. (1) Combination mark (symbol + wordmark) — best for new brands where the name is not yet widely known. (2) Abstract mark — best for established names that need a unique symbol. (3) Pictogram — best when the project name suggests a strong visual metaphor (e.g., Docker = whale). (4) Letterform/monogram — best for projects with long names needing a compact avatar mark. Choose based on: is the name well-known? (no → combination mark). Does the name suggest a metaphor? (yes → pictogram). Is the primary context small? (yes → letterform).
+
+**Personality Adjectives**: Three adjectives define the brand personality. They drive every design and writing decision: colour choices (warm vs cool, saturated vs muted), logo style (geometric vs organic, bold vs delicate), and wording (direct vs friendly, technical vs approachable). Examples: "precise, calm, reliable" → cool blues, geometric shapes, direct wording. "Bold, fast, disruptive" → warm reds, angular shapes, punchy wording.
+
+**Release Naming Convention**: Stored in `docs/branding.md` under the Release Naming section. Convention format (e.g., `adjective-greek-figure`), theme (e.g., Greek antiquity), rationale, and excluded words. This is part of brand identity because release names are public-facing communications that reinforce (or contradict) the project's personality. See [[software-craft/versioning#key-takeaways]] for the versioning scheme.
+
+**Wording Rules**: Two lists: words to avoid and words to prefer. These prevent brand voice drift across releases, documentation, and README. Example: avoid "easy, simple, just" (these are subjective and often false); prefer "minimal, precise, production-ready" (these are verifiable). Wording rules are identity constraints, not stylistic preferences — they define what the project sounds like.
+
+**Brand Interview Structure**: The interview for brand identity is structured in three phases: (1) personality — what 3 adjectives describe the project? what must it NOT convey? where will the logo appear most? (2) visual metaphor — does the project name suggest a visual? what are 5 peer/competitor logos? how should yours differ? (3) wording — what words should the project avoid? what words should it prefer? what is the tagline? See [[requirements/interview-techniques#concepts]] for interview techniques (CIT for specific examples, Laddering for climbing from surface preferences to real constraints).
+
+## Related
+
+- [[design/color-systems]]
+- [[design/project-assets]]
+- [[design/visual-harmony]]
+- [[requirements/interview-techniques]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/design/project-assets.md b/smith/data/.opencode/knowledge/design/project-assets.md
new file mode 100644
index 0000000..3cf0dfb
--- /dev/null
+++ b/smith/data/.opencode/knowledge/design/project-assets.md
@@ -0,0 +1,76 @@
+---
+domain: design
+tags: [logo, banner, favicon, svg, dark-mode, assets, delivery, progressive-simplification]
+last-updated: 2026-04-30
+---
+
+# Project Asset Design
+
+## Key Takeaways
+
+- Design favicon-first: if the mark cannot hold identity at 16×16, it is not strong enough (Kare, 1984).
+- Design monochrome-first: if it does not work in pure black on white, colour will not save it (Rand, 1985).
+- Progressive simplification: each size tier gets its own optimised version, not a scaled-down copy (Hicks, 2011).
+- Pass the 5-second test, blur test, monochrome test, scalability test, and proximity test before finalising (Airey, 2010).
+- SVG assets must use presentation attributes (not CSS classes), be self-contained (no external references), and be optimised with SVGO.
+- Provide dark-mode variants: embedded `@media (prefers-color-scheme: dark)` in SVGs, or separate files.
+- Minimum favicon delivery set: favicon.ico, icon.svg, apple-touch-icon.png, icon-192.png, icon-512.png.
+- Social preview image: 1280×640px minimum, critical content centred in 60–70% of frame.
+
+## Concepts
+
+**Favicon-First Design**: Design at the smallest target size (16×16 or 32×32) first, then scale up and add detail. Kare designed all original Macintosh icons on a 32×32 grid because "every pixel must carry meaning." If a mark cannot be recognised at favicon size, it relies on detail that will vanish in real usage. The favicon version is not a simplification of a larger design — it is the core identity, and the larger versions are elaborations of it.
+
+**Monochrome-First Process**: The mark must work in a single colour on a single background before any colour is applied. Rand tested his logos by blurring them (Gaussian blur 3–5px) to verify the silhouette held. If the blurred mark is still identifiable, the shape is strong. If not, it relies on detail that will fail at small sizes, in print, or on dark backgrounds.
+
+**Progressive Simplification**: Each size tier gets its own optimised version: Master (512px, full detail), Standard (180px, remove thin strokes, simplify curves), Small (32px, only core silhouette, strokes→fills), Tiny (16px, single bold shape, often hand-redrawn). Do not simply scale a 512px icon to 16px — it produces a muddy, unrecognisable mark.
+
+**Evaluation Checklist**: (1) 5-second test — show for 5 seconds, remove, ask "what did you see?"; (2) blur test — Gaussian blur 3–5px, silhouette must remain identifiable; (3) monochrome test — pure black on white, pure white on black; (4) scalability test — legible at 16px and 500px; (5) proximity test — distinguishable from 5 competitor logos; (6) "one thing" test — there should be one dominant feature.
+
+**SVG Construction Rules**: Use presentation attributes (`fill="#1a1a2e"`) not CSS classes (GitHub strips inline `<style>`). All content must be self-contained — no `<use href="external">`, no external fonts, no `<style>` blocks in production SVGs. Use a square `viewBox` (e.g., `0 0 512 512`) with 5–10% padding. Convert all strokes to filled paths for the production file. Optimise with SVGO (`removeMetadata`, `convertShapeToPath`, `mergePaths`, `cleanupNumericValues` at precision 1–2).
+
+**Dark-Mode Strategy**: Modern approach: embed `@media (prefers-color-scheme: dark)` in SVG favicon with human-readable CSS classes for dark-mode targeting. Fallback: maintain separate `logo-dark.svg` and `logo-light.svg` files for non-web contexts. Dark mode is not simply inverted — use off-white (#e0e0e0) instead of pure white, and increase stroke weight by 0.5–1px.
+
+**Favicon Delivery Set**: favicon.ico (32×32 containing 16×16 + 32×32), icon.svg (with dark-mode media query), apple-touch-icon.png (180×180), icon-192.png (Android), icon-512.png (PWA). Plus `manifest.webmanifest` referencing icon sizes. HTML: `<link rel="icon" href="/favicon.ico" sizes="32x32">`, `<link rel="icon" href="/icon.svg" type="image/svg+xml">`, `<link rel="apple-touch-icon" href="/apple-touch-icon.png">`.
+
+**Social Preview**: GitHub social preview is 1280×640px (2:1 ratio, PNG or JPG, under 1MB). Centre critical content in 60–70% of the frame to survive different platform crops. Show the logo mark and project name in large type. Avoid screenshots or code in the preview.
+
+**GitHub Camo**: GitHub proxies external images through Camo, which has a 5MB size limit, strips active content, and caches aggressively. Images committed directly to the repo are not proxied and update immediately on push. Always reference assets with relative paths in READMEs to avoid Camo.
+
+## Content
+
+### Logo Design Process
+
+1. **Brief**: Written brief with project name, tagline, 3 personality adjectives, forbidden elements, 5 reference logos
+2. **Sketch**: 20–40 rough concepts, black pen on white paper, monochrome only
+3. **Select**: Pick top 3–5, refine in vector, still monochrome
+4. **Colour**: Add 1 colour, then 2 maximum. Test on white, black, and mid-gray backgrounds
+5. **Stress-test**: Apply evaluation checklist (5-second, blur, monochrome, scalability, proximity, "one thing")
+6. **Deliver**: Export all sizes and formats per delivery checklist
+
+### Banner Design Process
+
+1. **Layout**: 1280×640px canvas (2:1 for social preview), centred content zone
+2. **Elements**: Logo mark (left or centre), project name in large type, tagline optional
+3. **Colours**: Use brand primary and background only. Accent sparingly for rules/dividers
+4. **Dark variant**: Same layout, inverted palette, tested on dark backgrounds
+5. **Export**: SVG for README, PNG at 1280×640 for GitHub social preview
+
+### File Delivery Checklist
+
+- [ ] `logo.svg` — Master SVG, square viewBox, clean paths, human-readable IDs
+- [ ] `logo-dark.svg` — Dark-mode variant (or embedded media query in logo.svg)
+- [ ] `logo-icon.svg` — Symbol only (no wordmark), with dark-mode support
+- [ ] `favicon.ico` — 32×32 ICO (with 16×16 embedded)
+- [ ] `icon.svg` — Favicon SVG with `prefers-color-scheme` media query
+- [ ] `apple-touch-icon.png` — 180×180 PNG
+- [ ] `icon-192.png` — 192×192 PNG for Android
+- [ ] `icon-512.png` — 512×512 PNG for PWA
+- [ ] `banner.svg` — README banner, SVG, tested on light and dark backgrounds
+- [ ] Social preview PNG — 1280×640, critical content in centre 60–70%
+
+## Related
+
+- [[design/color-systems]]
+- [[design/identity-design]]
+- [[design/visual-harmony]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/design/visual-harmony.md b/smith/data/.opencode/knowledge/design/visual-harmony.md
new file mode 100644
index 0000000..e1863c1
--- /dev/null
+++ b/smith/data/.opencode/knowledge/design/visual-harmony.md
@@ -0,0 +1,69 @@
+---
+domain: design
+tags: [shapes, forms, composition, gestalt, typography, harmony, visual-weight, proportion]
+last-updated: 2026-04-30
+---
+
+# Visual Harmony: Shapes, Composition, and Typography
+
+## Key Takeaways
+
+- Circles = unity/softness, squares = stability/structure, triangles = direction/energy, hexagons = technical/precision — choose shapes that reinforce your personality adjectives (Arnheim, 1954).
+- The best logos reduce to 1–3 geometric primitives; more than 3 creates visual noise at small sizes.
+- Use the rule of thirds (3×3 grid) or golden ratio (1:1.618) for composition — place primary elements at intersection points, not dead centre, for dynamic tension.
+- Gestalt principles (proximity, similarity, closure, figure-ground, continuation) explain why simplified shapes are recognised — design for closure and the brain fills in gaps (Wertheimer, 1923).
+- Serif typefaces signal tradition/authority; sans-serif signal modernity/clarity. Weight and spacing are more powerful personality levers than typeface selection (Lupton, 2010).
+- Visual weight depends on area × inherent brightness — yellow is visually heavier than an equal-area violet. A small saturated accent on a large muted background can balance visually (Itten, 1961).
+
+## Concepts
+
+**Shape Psychology**: Visual shapes carry innate perceived meaning derived from their physical properties. A circle has no edges (perceived as soft, unified, complete). A square rests on a broad base (perceived as stable, solid, rational). A triangle points in a direction (perceived as dynamic, energetic, aspirational). A hexagon combines the stability of a square with the technical efficiency of tessellation (perceived as precise, engineered). An organic/curved shape suggests naturalness and approachability. Choose shapes that reinforce your 3 personality adjectives: "calm, stable, reliable" → circles and squares; "bold, fast, disruptive" → triangles and angular forms; "precise, technical, rigorous" → hexagons and geometric primitives.
+
+**Geometric Reduction**: The best logos reduce to 1–3 geometric primitives. Arnheim (1954) demonstrated that complex shapes are perceived as combinations of simpler ones. A logo that requires 4+ primitives to explain is too complex for small sizes. Test: describe your logo using only circle, square, triangle, and hexagon. If you need more than 3, simplify.
+
+**Composition Rules**: Place elements using the rule of thirds (3×3 grid with focal points at the 4 intersections) or golden ratio (key element at 62% from left, 38% from top). Centred compositions create calm and stability but risk feeling static. Off-centre compositions create dynamic tension. The grid constrains placement but liberates proportion — elements can be large or small within the grid, but their positions relate rationally (Müller-Brockmann, 1981).
+
+**Gestalt Principles for Logo Design**: (1) Proximity — elements close together are perceived as a group; use proximity to create unity without connectors. (2) Similarity — elements sharing shape, colour, or size are grouped; use consistent shapes/colours to unify a mark. (3) Closure — the mind fills in gaps to see complete shapes; use deliberate gaps to simplify without losing recognition. (4) Figure-ground — elements are perceived as foreground or background; ensure the mark has clear figure-ground separation. (5) Continuation — the eye follows the smoothest path; use flowing lines to create perceived unity across separate elements. Simplify shapes until Gestalt grouping takes over, then stop — the viewer's brain will complete the form (Wertheimer, 1923).
+
+**Typography for Branding**: Three variables carry the most personality signal in a wordmark: (1) serif vs sans-serif — serif signals tradition, authority, and print culture; sans-serif signals modernity, clarity, and directness. (2) weight — light weights feel elegant and premium; bold weights feel assertive and powerful. (3) spacing/tracking — generous tracking feels open and relaxed; tight tracking feels urgent and dense. For open-source projects, use a sans-serif family with a wide weight range (300–700) for maximum flexibility from a single typeface. At favicon size, serifs become visual noise — always use sans-serif for the icon mark.
+
+**Visual Weight and Proportion**: Itten's contrast of extension states that visual weight depends on area × inherent brightness. Yellow is visually heavier than an equal-area of violet (proportion ≈ 1:3). Orange outweighs blue (≈ 1:2). Red and green are roughly equal (≈ 1:1). In logo composition, a small saturated accent on a large muted background can carry equal visual weight — this is why a thin gold line on a navy field reads as balanced. Use visual weight, not just area, to determine element sizing.
+
+**Harmony Types in Composition**: (1) Symmetrical balance — mirror image across an axis; creates formality and calm. (2) Asymmetrical balance — different elements of equal visual weight placed off-centre; creates dynamism. (3) Radial balance — elements radiate from a centre point; creates unity and focus. Choose based on personality: symmetrical for "stable, formal, reliable"; asymmetrical for "bold, dynamic, innovative"; radial for "unified, focused, holistic."
+
+## Content
+
+### Shape–Personality Mapping
+
+| Personality | Primary shapes | Avoid | Example marks |
+|-------------|---------------|-------|---------------|
+| Calm, stable, reliable | Circles, squares | Sharp angles, triangles | GitHub (rounded square), Docker (rounded rectangle) |
+| Bold, fast, disruptive | Triangles, angular forms | Soft curves, circles | React (angular atom), Rust (angular gear) |
+| Precise, technical, rigorous | Hexagons, geometric primitives | Organic curves | Node.js (hexagon), Kubernetes (hexagon) |
+| Creative, innovative | Curves, asymmetric forms | Rigid grids | Vue (asymmetric V), Svelte (curved S) |
+| Warm, approachable | Rounded rectangles, circles | Sharp points | Python (rounded interlock), Ruby (rounded diamond) |
+
+### Typography–Personality Mapping
+
+| Personality | Typeface class | Weight | Tracking | Examples |
+|-------------|---------------|--------|----------|---------|
+| Precise, minimal | Sans-serif geometric | 300–400 | Wide (0.05em+) | Inter Light, Roboto Light |
+| Bold, assertive | Sans-serif grotesque | 600–700 | Tight (−0.02em) | Helvetica Bold, Inter Bold |
+| Traditional, authoritative | Serif transitional | 400–500 | Normal | Georgia, Merriweather |
+| Creative, expressive | Sans-serif humanist | 300–500 | Wide | Nunito, Quicksand |
+| Technical, engineering | Monospace or geometric | 400–600 | Normal | JetBrains Mono, Fira Code |
+
+### Composition Decision Guide
+
+| Desired effect | Layout strategy | Grid | Where to place primary element |
+|---------------|----------------|------|-------------------------------|
+| Calm, stable | Centred, symmetrical | Rule of thirds (centre cell) | Dead centre |
+| Dynamic, modern | Off-centre, asymmetrical | Rule of thirds (upper-left intersection) | Upper-third left |
+| Focused, unified | Radial, centred mark | Golden ratio | Golden point (62% left, 38% top) |
+| Balanced, professional | Left-aligned with right breathing room | Rule of thirds (left-third) | Left-third, centred vertically |
+
+## Related
+
+- [[design/color-systems]]
+- [[design/project-assets]]
+- [[design/identity-design]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/domain-modeling/context-mapping.md b/smith/data/.opencode/knowledge/domain-modeling/context-mapping.md
new file mode 100644
index 0000000..a17230b
--- /dev/null
+++ b/smith/data/.opencode/knowledge/domain-modeling/context-mapping.md
@@ -0,0 +1,64 @@
+---
+domain: domain-modeling
+tags: [ddd, context-mapping, bounded-contexts, integration, anti-corruption-layer]
+last-updated: 2026-04-29
+---
+
+# Context Mapping
+
+## Key Takeaways
+
+- Context mapping defines how bounded contexts relate to each other — each relationship pattern carries specific coordination costs and risk trade-offs (Vernon, 2013).
+- Nine relationship patterns describe inter-context dynamics: Partnership, Shared Kernel, Customer-Supplier, Conformist, Anticorruption Layer, Separate Ways, Open Host Service, Published Language, and Big Ball of Mud (Vernon, 2013; Evans, 2003).
+- Selecting the correct pattern prevents model pollution and reduces integration friction — use ACL when downstream has limited influence, Customer-Supplier when both teams negotiate, Open Host Service when many consumers need a standardised protocol.
+- Integration points are the seams between contexts — each must define its mechanism (sync API, async event, shared DB), its contract, and its error handling.
+
+## Concepts
+
+**Context Relationship Patterns** (Vernon, 2013) — Nine patterns describe the social and technical contracts between bounded contexts. Each pattern carries obligations: Customer-Supplier demands upstream awareness of downstream needs; Conformist accepts upstream dominance; Anticorruption Layer isolates downstream from upstream model drift. Naming the relationship makes both teams' obligations explicit.
+
+**Pattern Selection** — The choice of relationship pattern depends on team influence, model purity needs, and integration complexity. When the downstream team has limited influence over the upstream model, use an Anticorruption Layer. When both teams can negotiate, use Customer-Supplier. When many consumers need access, use Open Host Service with a Published Language. When the cost of integration exceeds the benefit, use Separate Ways.
+
+**Integration Points** — Every seam between bounded contexts is an integration point. Each integration point must specify: the mechanism (synchronous API, asynchronous event, shared database, file exchange), the contract (schema, versioning, backward compatibility), and the error handling (retry, dead letter, circuit breaker). Undefined integration points are the primary source of coupling failures.
+
+**Anti-Corruption Layers** — An ACL is a translation boundary that prevents upstream concepts from leaking into the downstream model. It translates between the upstream's data model and the downstream's domain model, allowing the downstream context to maintain its own ubiquitous language even when consuming services from a differently-modelled context.
+
+## Content
+
+### Context Relationship Patterns
+
+| Pattern | Upstream Role | Downstream Role | When to Use |
+|---|---|---|---|
+| Partnership | Coordinates with downstream | Coordinates with upstream | Teams in close collaboration; high trust |
+| Shared Kernel | Maintains shared subset | Maintains shared subset | Small shared model; both teams commit to changes |
+| Customer-Supplier | Considers downstream needs | Influences upstream decisions | Upstream has power but values downstream input |
+| Conformist | Provides model as-is | Conforms to upstream model | Downstream has no influence; accept and adapt |
+| Anticorruption Layer | N/A (downstream decision) | Translates upstream model to own model | Downstream needs model purity despite external model |
+| Separate Ways | N/A | N/A | Integration cost exceeds benefit; each solves independently |
+| Open Host Service | Publishes standard protocol | Consumes via standard protocol | Many downstream consumers need access |
+| Published Language | Publishes well-documented model | Consumes via published schema | Standard interchange format (e.g., JSON Schema, XML) |
+| Big Ball of Mud | N/A | N/A | Legacy or poorly structured context; contain, don't extend |
+
+### Integration Point Specification
+
+Each integration point must document:
+
+1. **Mechanism**: Synchronous API, asynchronous event, shared database, or file exchange
+2. **Contract**: Request/response schemas, event payload schemas, versioning strategy
+3. **Error handling**: Retry policy, dead letter channel, circuit breaker, timeout behaviour
+4. **Ownership**: Which context owns the contract definition (upstream or downstream)
+
+### Anti-Corruption Layer Design
+
+An ACL has three components:
+- **Inbound adapter**: Receives data from upstream in the upstream's model
+- **Translator**: Maps upstream concepts to downstream domain concepts
+- **Domain interface**: Exposes upstream data in the downstream's ubiquitous language
+
+The ACL should be the only place where upstream model concepts appear in downstream code.
+
+## Related
+
+- [[domain-modeling/event-storming]]
+- [[architecture/assessment]]
+- [[architecture/contract-design]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/domain-modeling/event-storming.md b/smith/data/.opencode/knowledge/domain-modeling/event-storming.md
new file mode 100644
index 0000000..e6edec9
--- /dev/null
+++ b/smith/data/.opencode/knowledge/domain-modeling/event-storming.md
@@ -0,0 +1,58 @@
+---
+domain: domain-modeling
+tags: [ddd, event-storming, bounded-contexts, aggregates, domain-events]
+last-updated: 2026-04-29
+---
+
+# Event Storming & Domain Modeling
+
+## Key Takeaways
+
+- Event storming surfaces domain events (past-tense verbs), commands (imperative verbs), and aggregates (transactional consistency boundaries) from stakeholder interviews (Brandolini, 2012).
+- Bounded contexts group related events, commands, and entities — a context boundary is where a term changes meaning (Evans, 2003).
+- Aggregates define transactional consistency boundaries (Evans, 2003) — everything within an aggregate must be consistent after a transaction; everything between aggregates is eventually consistent.
+- Domain events are expressed in past tense (OrderPlaced, PaymentReceived); commands in imperative (PlaceOrder, ReceivePayment).
+
+## Concepts
+
+**Event Storming** (Brandolini, 2012) — A collaborative workshop technique where domain experts place domain events on a timeline. The process: identify events (what happens), identify commands (what triggers them), group into bounded contexts (areas of related meaning), and identify aggregates (consistency boundaries). Event storming produces: an event map, candidate bounded contexts, and candidate aggregates.
+
+**Bounded Contexts** (Evans, 2003) — A bounded context is a linguistic boundary — within it, every term has exactly one meaning. When the same word means different things in different parts of the domain, that's a context boundary. For example, "Product" might mean a catalog item in the Sales context and a physical item in the Warehouse context.
+
+**Aggregates** (Evans, 2003) — An aggregate is a cluster of domain objects treated as a single unit for data changes. Every aggregate has a root entity and a consistency boundary: all invariants must be satisfied within a single transaction. References from outside the aggregate point only to the root. Aggregates are the unit of transactional consistency.
+
+**Domain Events** — Something that happened in the domain, expressed in past tense. Events are facts — they cannot be undone, only compensated. Events capture the vocabulary of the domain: OrderPlaced, PaymentReceived, InventoryDepleted.
+
+**Commands** — An intent to make something happen, expressed in imperative. Commands may be rejected (insufficient funds, out of stock). When a command succeeds, it produces a domain event: PlaceOrder → OrderPlaced.
+
+## Content
+
+### Event Storming Steps
+
+1. Brainstorm domain events from interview notes — past-tense, business-relevant
+2. Place events on a chronological timeline
+3. For each event, identify the command that triggers it
+4. Group related events and commands into candidate bounded contexts
+5. Within each context, identify aggregate boundaries — which entities must be transactionally consistent
+6. Flag contradictions (same term, different meaning) as context boundaries
+7. Flag gaps (events without commands, or commands without events) for follow-up
+
+### Aggregate Design Rules
+
+- An aggregate must fit in memory — if it's too large, split it
+- An aggregate must be consistent after every transaction — if invariants span two aggregates, merge them or accept eventual consistency
+- References between aggregates use identity (ID), not object references
+- One aggregate per transaction — if you need to update two aggregates atomically, reconsider your boundaries
+
+### Context Mapping (Evans, 2003)
+
+- Upstream/Downstream: one context provides data/services, the other consumes
+- Anti-corruption layer: a translation boundary that prevents upstream concepts from leaking into downstream
+- Conformist: downstream accepts upstream's model as-is
+- Open-host service: upstream publishes a standardized protocol
+
+## Related
+
+- [[domain-modeling/context-mapping]]
+- [[requirements/ubiquitous-language]]
+- [[architecture/technical-design]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/knowledge-design/principles.md b/smith/data/.opencode/knowledge/knowledge-design/principles.md
new file mode 100644
index 0000000..2e5a6e0
--- /dev/null
+++ b/smith/data/.opencode/knowledge/knowledge-design/principles.md
@@ -0,0 +1,123 @@
+---
+domain: knowledge-design
+tags: [knowledge, wikilinks, diataxis, architecture, progressive-disclosure]
+last-updated: 2026-04-29
+---
+
+# Knowledge Design Principles
+
+## Key Takeaways
+
+- Knowledge, skills, and agents are separate concerns with one canonical location each; never duplicate knowledge across files.
+- Knowledge files use a 4-section body structure (Key Takeaways, Concepts, Content, Related) with strict correspondence between tiers.
+- Wikilinks reference knowledge on demand using `[[domain/concept]]` or `[[domain/concept#section]]` with cumulative extraction; skills are the authoritative routing mechanism.
+- Knowledge files contain reference and explanation only; procedural instructions belong in skills.
+- Maximum ~150 lines per file to avoid positional attention degradation (Liu et al., 2023); small focused files may omit the Content section.
+
+## Concepts
+
+**Separation of Concerns**: Knowledge, skills, and agents each have exactly one canonical location. Knowledge files hold reference and explanation; skills hold procedural instructions; agents hold role identity. The flow YAML holds routing, artifacts, and transitions. No knowledge is embedded in skills or agents — they reference it via wikilinks. Three failure modes observed in LLM context windows justify this separation: conflicting instructions from multiple sources (each concern gets one file), positional attention degradation (Liu et al., 2023 — middle content receives less attention; keep files short), and redundant content creating competing attention targets (each fact in one location).
+
+**Three-Tier Progressive Disclosure**: Every knowledge file has four body sections ordered by depth: Key Takeaways (bullets), Concepts (paragraphs), Content (full reference), and Related (wikilinks). Each bullet in Key Takeaways corresponds to exactly one paragraph in Concepts and one or more subsections in Content. Small focused files may omit the Content section if bullets and concepts are sufficient.
+
+**Wikilink Routing and Extraction**: Skills are the authoritative routing mechanism — they say when to load a knowledge file. Wikilinks support a `#section-name` fragment for cumulative extraction: `[[domain/concept#key-takeaways]]` loads frontmatter + Key Takeaways only (approximately 80% token savings), `[[domain/concept#concepts]]` loads through Concepts (approximately 65% savings), and no fragment loads the full file. Use `sed '/^## SectionName/Q' file.md` to extract up to but not including the next section header.
+
+**Reference and Explanation Only**: Knowledge files contain reference and explanation content (the what and why). Procedural instructions (the when and how) belong in skills. This separation follows the Diátaxis framework (Procida, 2021): knowledge serves the Reference and Explanation modes, skills serve the How-to and Tutorial modes.
+
+**Size Limit**: Maximum ~150 lines per file to avoid positional attention degradation (Liu et al., 2023). Files that exceed this should be split into separate knowledge files. Small focused topics may use only Key Takeaways and Concepts, omitting the Content section entirely.
+
+## Content
+
+### Philosophy
+
+**Knowledge is what. Skills are when and how. Agents are who. Flows are where.**
+
+| Concern | Location | Loaded When | Diátaxis Type |
+|---|---|---|---|
+| Project navigation | `AGENTS.md` | Every session | Reference |
+| Role identity | `.opencode/agents/*.md` | When role invoked | Tutorial |
+| Procedural instructions | `.opencode/skills/*/SKILL.md` | On demand | How-to guide |
+| Domain knowledge | `.opencode/knowledge/*/` | On demand, referenced by skill | Reference + Explanation |
+| Routing, artifacts, transitions | `.flowr/flows/*.yaml` | Via `flowr status` | — |
+
+### Knowledge File Format
+
+```markdown
+---
+domain: <domain-name>
+tags: [<tag1>, <tag2>]
+last-updated: <YYYY-MM-DD>
+---
+
+# <Title>
+
+## Key Takeaways
+
+- <one bullet per concept; closely related subsections may share a bullet>
+- <imperative mood: "Test observable behaviour, not implementation details">
+
+## Concepts
+
+<one paragraph per concept, same grouping as Key Takeaways>
+<paragraph 1 expands on bullet 1, paragraph 2 on bullet 2, etc.>
+
+## Content
+
+<Reference and explanatory content. No procedural instructions — those belong
+in skills. Self-contained: understandable without reading linked files.
+Subsections correspond to Key Takeaway bullets (1:1 or N:1 grouping).>
+
+## Related
+
+- [[domain/other-concept]]
+```
+
+### Format Rules
+
+1. **One concept per file** — each file covers exactly one topic
+2. **Max ~150 lines** — avoid positional attention degradation (Liu et al., 2023)
+3. **Self-contained** — understandable without reading linked files
+4. **Key Takeaways first** — one bullet per concept, imperative mood, enables fast relevance scanning
+5. **Concepts expand Key Takeaways** — one paragraph per bullet, same order and grouping
+6. **Correspondence rule** — bullet N in Key Takeaways corresponds to paragraph N in Concepts and subsection(s) N in Content
+7. **No procedural instructions** — how-to content belongs in skills (Diátaxis — Procida, 2021)
+8. **YAML frontmatter** — `domain`, `tags`, `last-updated` for search and filtering
+9. **Small files may omit Content** — focused topics with rules that fit in bullets and concepts need no expansion
+
+### Wikilink Convention
+
+Wikilinks reference knowledge using the format `[[domain/concept]]` or `[[domain/concept#section-name]]`.
+
+**Resolution rule**: When you encounter `[[domain/concept]]` in any file, read `.opencode/knowledge/{domain}/{concept}.md` to load that knowledge before proceeding.
+
+**Fragment syntax**: `#section-name` uses lowercase with hyphens. Fragments are cumulative:
+
+| Fragment | Loads | Bash Command | Token Savings |
+|---|---|---|---|
+| `#key-takeaways` | Frontmatter + Key Takeaways | `sed '/^## Concepts/Q' file.md` | ~80% |
+| `#concepts` | Frontmatter + Key Takeaways + Concepts | `sed '/^## Content/Q' file.md` | ~65% |
+| (no fragment) | Entire file | `cat file.md` | 0% |
+
+Wikilinks appear in skills, knowledge files, and agents. Wikilinks do NOT appear in `AGENTS.md` (always-loaded) except to document the convention itself.
+
+### Directory Structure
+
+```
+.opencode/knowledge/
+  requirements/
+  agent-design/
+  skill-design/
+  knowledge-design/
+  workflow/
+```
+
+Domain directories organize related concepts. Subdirectories within domains are allowed for deep hierarchies.
+
+### Knowledge Graph
+
+The knowledge graph emerges from wikilinks in the `## Related` sections. No separate edge file is needed. Validation can extract `[[...]]` patterns and check that target files exist and fragments resolve to valid sections.
+
+## Related
+
+- [[skill-design/principles]]
+- [[agent-design/principles]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/requirements/decomposition.md b/smith/data/.opencode/knowledge/requirements/decomposition.md
new file mode 100644
index 0000000..ab57d02
--- /dev/null
+++ b/smith/data/.opencode/knowledge/requirements/decomposition.md
@@ -0,0 +1,31 @@
+---
+domain: requirements
+tags: [decomposition, splitting, features, stories]
+last-updated: 2026-04-29
+---
+
+# Feature Decomposition Rules
+
+## Key Takeaways
+
+- If a feature spans more than 2 distinct concerns during discovery, split it immediately into separate features.
+- If a feature has more than 8 candidate Examples during specification, split the Rule immediately.
+- If Musts alone exceed 8 Examples after MoSCoW triage, split the Rule.
+- A completed feature whose description or Rules changed during discovery must move back to backlog.
+- Stories containing "and" must be split into two separate Rules.
+
+## Concepts
+
+**Real-Time Split During Discovery**: When the PO detects >2 distinct concerns OR >8 candidate Examples during feature questions, split immediately. Record the split in the session notes, create feature files for both parts, and continue feature questions for both in sequence within the same session.
+
+**Decomposition Check During Baselining**: A feature may only be baselined when it does not span >2 concerns AND does not have >8 candidate Examples. The `invest_passed` condition gate enforces INVEST-S (Small) and INVEST-I (Independent), which subsume the decomposition check.
+
+**Story-Level Splits**: If a story contains "and" → break it into two Rules. If a story duplicates another Rule → merge or differentiate. If a story spans multiple unrelated concerns → split immediately. "As the system, I want..." → no business value, must reframe.
+
+**Completed Feature Regression**: If a completed feature was touched during discovery and its description or Rules changed, move it back to backlog. Description changes always imply behaviour changes.
+
+## Related
+
+- [[requirements/invest]] — INVEST criteria that enforce decomposition
+- [[requirements/moscow]] — MoSCoW triage thresholds that trigger splits
+- [[requirements/gherkin]] — writing Examples for decomposed Rules
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/requirements/gherkin.md b/smith/data/.opencode/knowledge/requirements/gherkin.md
new file mode 100644
index 0000000..ba795b1
--- /dev/null
+++ b/smith/data/.opencode/knowledge/requirements/gherkin.md
@@ -0,0 +1,106 @@
+---
+domain: requirements
+tags: [gherkin, acceptance-criteria, specification, examples, bdd]
+last-updated: 2026-04-29
+---
+
+# Gherkin Specification Format
+
+## Key Takeaways
+
+- Write declarative Examples that describe behaviour, not UI steps; use `Example:` not `Scenario:` (BDD — North, 2006).
+- Each Example must have an `@id` tag (format `@id:<unique-id>`) for traceability from test to acceptance criterion.
+- `Then` must be a single, observable, measurable outcome; no "and" combining multiple behaviours in one `Then`.
+- Bug Examples use `@bug` and require both a specific feature test and a Hypothesis property test.
+- After criteria commit, Examples are frozen; changes require `@deprecated` on the old Example and a new Example with a new `@id`.
+
+## Concepts
+
+**Declarative vs Imperative Gherkin**: Declarative Examples describe behaviour, not UI steps (BDD — North, 2006). "Given a registered user Bob / When Bob logs in / Then Bob sees a personalized welcome" is correct. "Given I type 'bob' in the username field / When I click the Login button / Then I see 'Welcome, Bob'" is imperative and wrong. Declarative Examples express what the user observes, not how the system implements it.
+
+**Example Format and @id Tags**: Each Example uses the `Example:` keyword (not `Scenario:`), includes `Given/When/Then` in plain English, and must have an `@id` tag for traceability. The format is `@id:<unique-id>` where the unique ID is assigned when the feature is baselined. Each Example must be observably distinct from every other Example in the same Rule.
+
+**Single Observable Outcome per Then**: `Then` must be a single, observable, measurable outcome. No "and" combining multiple behaviours in one `Then` — split into separate Examples instead. Observable means observable by the end user, not by a test harness.
+
+**Frozen Examples**: After criteria commit, Examples are frozen. Changes require `@deprecated` on the old Example and a new Example with a new `@id`. No editing or deleting committed Examples. This prevents scope creep and maintains traceability from test to acceptance criterion.
+
+**Bug Examples**: When a defect is reported, add an `@bug` Example. Implement both a specific `@id` test and a Hypothesis property test covering the whole class of inputs. Both are required.
+
+## Content
+
+### Declarative vs Imperative
+
+| Imperative (wrong) | Declarative (correct) |
+|---|---|
+| Given I type "bob" in the username field | Given a registered user Bob |
+| When I click the Login button | When Bob logs in |
+| Then I see "Welcome, Bob" on the dashboard | Then Bob sees a personalized welcome |
+
+### Example Format Rules
+
+- `Example:` keyword (not `Scenario:`)
+- `Given/When/Then` in plain English
+- `Then` must be a single, observable, measurable outcome — no "and"
+- Observable means observable by the end user, not by a test harness
+- Declarative, not imperative — describe behaviour, not UI steps
+- Each Example must be observably distinct from every other
+
+### @id Tag Format
+
+- Format: `@id:<unique-id>`
+- Assigned when the feature is baselined
+- Globally unique across all feature files
+- Enables traceability from test to acceptance criterion
+
+### Frozen Examples Rule
+
+After criteria commit, Examples are frozen. This rule is stated explicitly in the feature template and enforced by the `bdd-features` conditions in the planning flow:
+
+- `all_examples_have_ids: ==true`
+- `all_examples_have_gherkin: ==true`
+- `premortem_done: ==true`
+
+Any change to committed Examples requires:
+1. `@deprecated` on the old Example
+2. A new Example with a new `@id`
+
+No editing or deleting committed Examples.
+
+### Bug Example Format
+
+When a defect is reported:
+
+```gherkin
+@bug
+Example: <what the bug is>
+  Given <conditions that trigger the bug>
+  When <action>
+  Then <correct behaviour>
+```
+
+Implement both:
+1. The specific `@id` test in `tests/features/<feature_slug>/`
+2. A Hypothesis property test covering the whole class of inputs
+
+### Common Mistakes
+
+- "Then: It works correctly" — not measurable
+- "Then: The system updates the database and sends an email" — split into two Examples
+- Multiple behaviours in one Example — split them
+- Examples that test implementation details ("Then: the Strategy pattern is used")
+- Imperative UI steps instead of declarative behaviour descriptions
+
+### Feature File Path Convention
+
+Feature files are located at `features/<file>.feature`. The flow works on one feature at a time, so `<file>.feature` refers to a single feature file, not a glob pattern.
+
+### Test Path Convention
+
+Tests follow the pattern `tests/features/<feature_slug>/<rule_slug>_test.py` with function names `test_<feature_stem>_<id>`.
+
+## Related
+
+- [[requirements/invest]] — story quality criteria applied before writing Examples
+- [[requirements/moscow]] — prioritizing Examples as Must/Should/Could
+- [[requirements/decomposition]] — splitting Rules with too many Examples
+- [[requirements/pre-mortem]] — finding hidden failure modes before writing Examples
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/requirements/interview-techniques.md b/smith/data/.opencode/knowledge/requirements/interview-techniques.md
new file mode 100644
index 0000000..74c4c74
--- /dev/null
+++ b/smith/data/.opencode/knowledge/requirements/interview-techniques.md
@@ -0,0 +1,86 @@
+---
+domain: requirements
+tags: [discovery, interview, elicitation, gap-finding, CIT, laddering]
+last-updated: 2026-04-29
+---
+
+# Interview Techniques
+
+## Key Takeaways
+
+- Use Critical Incident Technique (CIT — Flanagan, 1954) to ask about specific past events rather than general descriptions; concrete incidents force actual memory and surface edge cases.
+- Use Laddering (Reynolds & Gutman, 1988) to climb from surface features to underlying consequences and terminal values; stop when the answer produces a design constraint.
+- Use CI Perspective Change (Fisher & Geiselman, 1987) to ask stakeholders to describe the same situation from another actor's viewpoint; peripheral details and cross-role concerns surface.
+- Apply three levels of active listening (Rogers & Farson, 1957): per answer (paraphrase), per group (synthesis), end of session (full synthesis for approval).
+- Use the Funnel technique to order questions from broad to specific, preventing priming bias (Tversky & Kahneman, 1974) from early category labels.
+- Discovery interviews follow a three-level funnel: General (big picture) → Cross-cutting (behaviour groups) → Feature identification (feature names and rough boundaries). Feature specification happens separately in planning.
+- Feature specification interviews focus on one feature at a time, probing behavioral rules, edge cases (CIT), and real constraints (Laddering) to produce stories and scenarios.
+
+## Concepts
+
+**Critical Incident Technique (CIT — Flanagan, 1954)**: Ask about a specific past event rather than a general description. Schema-based recall ("usually we...") hides edge cases and workarounds (Christel & Kang, 1992). A concrete incident forces actual memory. Probe each incident with "What task were you doing? What happened next? What made it effective/ineffective?" In architecture context: "If this entity is misused, what breaks?" and "Tell me about a concrete case where this boundary would be crossed."
+
+**Laddering / Means-End Chain**: Climb from surface feature to underlying consequence to terminal value. The first answer is rarely the real constraint. Keep asking "Why is that important?", "What does that enable?", "What would break if that were not available?" Stop when the stakeholder reaches a value they cannot explain further. In architecture context, stop when the answer produces a design constraint writable into an ADR.
+
+**CI Perspective Change**: Ask the stakeholder to describe the same situation from another actor's point of view. Peripheral details and cross-role concerns surface that the primary perspective conceals. Ask "What do you think the end user experiences?", "What would your team lead's concern be?", "From the perspective of someone encountering this for the first time, what would they need to know?"
+
+**Active Listening Protocol**: Three levels apply throughout every interview session. Level 1 (per answer): immediately paraphrase each answer before moving to the next question. Level 2 (per group): brief synthesis when transitioning between behaviour groups. Level 3 (end of session): full synthesis of everything discussed, presented for stakeholder approval. Do not introduce topic labels or categories during active listening — the summary must reflect what the stakeholder said.
+
+**Funnel Technique**: Start with broad open-ended questions before narrowing to specifics. Priming bias (Tversky & Kahneman, 1974) is structural: any category name the interviewer introduces activates a schema that filters what the interviewee considers worth reporting. The funnel sequences questions so the interviewee's own categories emerge first.
+
+**Discovery Interview Structure**: Discovery interviews follow a three-level funnel aligned with the Funnel technique. Level 1 — General: seven standard questions (Who, What, Why, When/Where, Success, Failure, Out-of-scope) establish the big picture. Level 2 — Cross-cutting: behaviour groups, bounded contexts, integration points, and lifecycle events structure the domain. Level 3 — Feature identification: feature names and rough boundaries are identified; detailed feature specification (stories, criteria) happens later during planning interviews, not here. The purpose of the discovery interview is to understand the domain and identify what features exist, not to specify each feature in detail.
+
+**Feature Specification Interview**: Planning interviews focus on one feature at a time. The goal is to elicit behavioral rules and scenarios that will become user stories and acceptance criteria. CIT is used to probe for specific past failures related to this feature — "Tell me about a time when [feature behavior] went wrong." Laddering is used when a stated requirement lacks a clear scenario — "Why is that important? What would break if it weren't available?" A pre-mortem is applied when hidden failure modes are suspected. Feature specification interviews are narrower and deeper than discovery interviews — they already know what the feature is and need to define exactly how it behaves.
+
+## Content
+
+### Critical Incident Technique (CIT) — Flanagan 1954
+
+Ask about a specific past event rather than a general description. Schema-based recall ("usually we...") hides edge cases and workarounds. A concrete incident forces actual memory.
+
+- "Tell me about a specific time when [X] worked exactly as you needed."
+- "Tell me about a specific time when [X] broke down or frustrated you."
+- Probe each incident: "What task were you doing? What happened next? What made it effective / ineffective?"
+
+Architecture context: "If this entity is misused, what breaks?" and "Tell me about a concrete case where this boundary would be crossed."
+
+### Laddering / Means-End Chain — Reynolds & Gutman 1988
+
+Climb from surface feature to underlying consequence to terminal value. The first answer is rarely the real constraint.
+
+- "Why is that important to you?"
+- "What does that enable?"
+- "What would break if that were not available?"
+- Stop when the stakeholder reaches a value they cannot explain further.
+
+Architecture context: "Why does this need to be immutable?" and "What breaks if this is not behind a Protocol?" Stop when the answer produces a design constraint that can be written into an ADR.
+
+### CI Perspective Change — Fisher & Geiselman 1987
+
+Ask the stakeholder to describe the same situation from another actor's point of view. Peripheral details and cross-role concerns surface that the primary perspective conceals. The enhanced Cognitive Interview elicits approximately 35% more correct information than standard interviews with equal accuracy rates.
+
+- "What do you think the end user experiences in that situation?"
+- "What would your team lead's concern be here?"
+- "From the perspective of someone encountering this for the first time, what would they need to know?"
+
+### Active Listening Protocol — Rogers & Farson, 1957
+
+Three levels of active listening apply throughout every interview session:
+
+- **Level 1 — Per answer**: immediately paraphrase each answer before moving to the next question. "So if I understand correctly, you're saying that X happens when Y?" Catches misunderstanding in the moment.
+- **Level 2 — Per group**: brief synthesis when transitioning between behaviour groups. "We've covered [area A] and [area B]. Before I ask about [area C], here is what I understood so far: [summary]. Does that capture it?" Confirms completeness, gives stakeholder a recovery point.
+- **Level 3 — End of session**: full synthesis of everything discussed. Present to stakeholder for approval. This is the accuracy gate and the input to domain modelling.
+
+Do not introduce topic labels or categories during active listening. The summary must reflect what the stakeholder said, not new framing that prompts reactions to things they haven't considered.
+
+### Funnel Technique — Question Ordering to Prevent Priming (Tversky & Kahneman, 1974)
+
+Start with broad open-ended questions before narrowing to specifics. Priming bias is structural: any category name the interviewer introduces activates a schema that filters what the interviewee considers worth reporting. The funnel sequences questions so the interviewee's own categories emerge first.
+
+The standard seven general questions (Who, What, Why, When/Where, Success, Failure, Out-of-scope) are an application of the Funnel technique combined with the Kipling Method (5W1H).
+
+## Related
+
+- [[requirements/pre-mortem]] — prospective hindsight technique applied at specification and design stages
+- [[requirements/invest]] — story quality criteria applied after discovery
+- [[requirements/gherkin]] — writing specifications from discovered requirements
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/requirements/invest.md b/smith/data/.opencode/knowledge/requirements/invest.md
new file mode 100644
index 0000000..c38747f
--- /dev/null
+++ b/smith/data/.opencode/knowledge/requirements/invest.md
@@ -0,0 +1,77 @@
+---
+domain: requirements
+tags: [stories, INVEST, quality-criteria, self-declaration]
+last-updated: 2026-04-29
+---
+
+# INVEST Criteria — Wake, 2003
+
+## Key Takeaways
+
+- Every Rule must pass all six INVEST letters before committing: Independent, Negotiable, Valuable, Estimable, Small, Testable.
+- Each letter has a specific FAIL action: split or reorder dependencies (I), remove over-specification (N), reframe or drop (V), split or add discovery (E), split into smaller Rules (S), rewrite with observable outcomes (T).
+- Common mistakes: "As the system, I want..." has no business value; stories containing "and" should be split into two Rules; duplicate stories should be merged or differentiated.
+- Self-declare INVEST-I, INVEST-V, INVEST-S, and INVEST-T before committing stories; every DISAGREE is a hard blocker.
+- In the planning flow, the `invest_passed` condition on `feature-breakdown.done` requires all six letters to be `==true`.
+
+## Concepts
+
+**INVEST Criteria**: Every Rule (user story) must pass all six letters before committing. Independent means deliverable without other Rules. Negotiable means details open to discussion. Valuable means delivers something the user cares about. Estimable means a software-engineer can estimate effort. Small means completable in one feature cycle. Testable means verifiable with a concrete test.
+
+**FAIL Actions**: Each letter has a specific corrective action when a Rule fails. Independent → split or reorder dependencies. Negotiable → remove over-specification. Valuable → reframe or drop. Estimable → split or add discovery questions. Small → split into smaller Rules. Testable → rewrite with observable outcomes.
+
+**Self-Declaration**: Before committing stories, declare INVEST-I (each Rule is Independent), INVEST-V (each Rule delivers Value to a named user), INVEST-S (each Rule is Small enough for one development cycle), and INVEST-T (each Rule is Testable). Every DISAGREE is a hard blocker — fix before committing.
+
+**Flow Condition Gate**: The `invest_passed` condition on the `feature-breakdown.done` transition requires `independent: ==true`, `negotiable: ==true`, `valuable: ==true`, `estimable: ==true`, `small: ==true`, and `testable: ==true`. All six must pass before the flow advances to BDD features.
+
+## Content
+
+### INVEST Criteria Table
+
+| Letter | Question | FAIL action |
+|---|---|---|
+| **I**ndependent | Can this Rule be delivered without other Rules? | Split or reorder dependencies |
+| **N**egotiable | Are details open to discussion with the software-engineer? | Remove over-specification |
+| **V**aluable | Does it deliver something the end user cares about? | Reframe or drop |
+| **E**stimable | Can a software-engineer estimate the effort? | Split or add discovery questions |
+| **S**mall | Completable in one feature cycle? | Split into smaller Rules |
+| **T**estable | Can it be verified with a concrete test? | Rewrite with observable outcomes |
+
+### Common Mistakes to Avoid
+
+- "As the system, I want..." — no business value. Every story must name a user role who benefits.
+- Stories containing "and" — break them into two separate Rules.
+- Stories that duplicate another Rule — merge or differentiate.
+- Stories that span multiple unrelated concerns — split immediately.
+
+### Self-Declaration Protocol
+
+Before committing stories, declare:
+
+- INVEST-I: each Rule is Independent — AGREE/DISAGREE
+- INVEST-V: each Rule delivers Value to a named user — AGREE/DISAGREE
+- INVEST-S: each Rule is Small enough for one development cycle — AGREE/DISAGREE
+- INVEST-T: each Rule is Testable — AGREE/DISAGREE
+
+Every DISAGREE is a hard blocker — must be fixed before committing.
+
+### Flow Condition Gate
+
+In `planning-flow.yaml`, the `feature-breakdown.done` transition is guarded by `when: invest_passed`, which requires:
+
+```yaml
+invest_passed:
+  independent: ==true
+  negotiable: ==true
+  valuable: ==true
+  estimable: ==true
+  small: ==true
+  testable: ==true
+```
+
+## Related
+
+- [[requirements/moscow]] — prioritizing Examples within a Rule
+- [[requirements/decomposition]] — splitting Rules that fail INVEST-S or span too many concerns
+- [[requirements/gherkin]] — writing Examples from INVEST-qualified stories
+- [[requirements/pre-mortem]] — finding hidden failure modes before writing Examples
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/requirements/moscow.md b/smith/data/.opencode/knowledge/requirements/moscow.md
new file mode 100644
index 0000000..9f58ab6
--- /dev/null
+++ b/smith/data/.opencode/knowledge/requirements/moscow.md
@@ -0,0 +1,55 @@
+---
+domain: requirements
+tags: [prioritization, MoSCoW, examples, classification]
+last-updated: 2026-04-29
+---
+
+# MoSCoW Prioritization (Clegg & Barker, 1994)
+
+## Key Takeaways
+
+- Classify each candidate Example as Must (required for correctness), Should (high value but deferrable), or Could (nice-to-have edge case). This classification is for internal triage only — it must NOT appear as Gherkin tags or in the .feature file.
+- If Musts alone exceed 8 Examples or the Rule spans more than 2 concerns, split the Rule immediately.
+- Musts cannot exceed 60% of total effort at the story level (DSDM); if a story has 12 Examples and only 3 are Musts, the remaining 9 can be deferred.
+- MoSCoW triage is applied during criteria writing (planning-flow `bdd-features` state), not during discovery.
+
+## Concepts
+
+**Must**: Required for the Rule to be correct. Without it, the feature is wrong. Must Examples define the minimum viable behaviour that must be present for the Rule to be considered implemented.
+
+**Should**: High value but deferrable. The feature works without it but is diminished. Should Examples enhance the core behaviour but are not required for correctness.
+
+**Could**: Nice-to-have edge case. Low risk if deferred. Could Examples cover unusual conditions or minor enhancements that improve robustness.
+
+**Split Rule**: If Musts alone exceed 8 Examples or the Rule spans more than 2 concerns, split the Rule immediately. This prevents Rules from becoming unwieldy and ensures each Rule is independently testable and deliverable.
+
+## Content
+
+### Priority Definitions
+
+| Priority | Definition | Consequence if omitted |
+|---|---|---|
+| **Must** | Required for the Rule to be correct | The feature is wrong |
+| **Should** | High value but deferrable | The feature works but is diminished |
+| **Could** | Nice-to-have edge case | Low risk if deferred |
+
+### Split Rules
+
+Two conditions trigger an immediate split:
+
+1. **Musts alone exceed 8 Examples** — the Rule is too large and should be decomposed into smaller Rules, each with its own set of Examples.
+2. **The Rule spans more than 2 concerns** — distinct concerns should be expressed in separate Rules to maintain INVEST-I (Independence) and INVEST-S (Small).
+
+### Effort Allocation
+
+At the story level, Musts should not exceed 60% of total effort (DSDM). If a story has 12 Examples and only 3 are Musts, the remaining 9 can be deferred. This prevents gold-plating and keeps stories small and focused.
+
+### When to Apply
+
+MoSCoW triage is applied during criteria writing in the `bdd-features` state of the planning flow, after INVEST qualification in `feature-breakdown` and pre-mortem analysis. Each candidate Example receives a Must/Should/Could classification for internal triage — to decide which Examples to include and which to defer. MoSCoW labels must NOT appear as Gherkin tags, in `@id` tags, or anywhere in the .feature file.
+
+## Related
+
+- [[requirements/invest]] — story quality criteria applied before MoSCoW triage
+- [[requirements/decomposition]] — splitting Rules that fail MoSCoW thresholds
+- [[requirements/gherkin]] — writing Examples with MoSCoW classification
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/requirements/post-mortem.md b/smith/data/.opencode/knowledge/requirements/post-mortem.md
new file mode 100644
index 0000000..c271c60
--- /dev/null
+++ b/smith/data/.opencode/knowledge/requirements/post-mortem.md
@@ -0,0 +1,58 @@
+---
+domain: requirements
+tags: [post-mortem, root-cause-analysis, blameless, failure-analysis]
+last-updated: 2026-04-29
+---
+
+# Post-Mortem
+
+## Key Takeaways
+
+- Post-mortems are process-level root cause analysis, not person-level blame — every failure is a process failure, not a people failure (Beyer et al., 2016).
+- Keep post-mortems compact (max 15 lines) — focus on what failed, which gate was missed, and the fix.
+- Post-mortems are append-only — never edit an existing post-mortem; if understanding evolves, write a new one.
+- The restart check answers: "Can we safely re-enter the pipeline from the identified point, or is there a deeper issue?"
+
+## Concepts
+
+**Process Root Cause, Not People** — Post-mortems identify which process gate failed, not who made the mistake. "The design review didn't catch the missing validation" is correct; "Bob forgot to add validation" is incorrect. People operate within processes — if a person can make the mistake, the process should have caught it (Beyer et al., 2016).
+
+**Compact Format** — Every post-mortem must fit in 15 lines or fewer. The constraint forces discipline: state the failure, the missed gate, and the fix. Anything longer indicates the root cause hasn't been identified clearly enough. The template fields are: what failed, why, and which gate was missed.
+
+**Append-Only** — Once a post-mortem is written, it's immutable. If understanding deepens later, write a new post-mortem referencing the original. This preserves the history of understanding and prevents retroactively modifying the record.
+
+**Restart Check** — After identifying the fix, the post-mortem must answer the restart check: "Can we safely re-enter the pipeline from this point, or do we need to go further back?" This prevents the same failure from recurring because the re-entry point was too shallow.
+
+## Content
+
+### Post-Mortem Template Fields
+
+| Field | Content |
+|---|---|
+| What failed | The observable failure (e.g., "acceptance test rejected: feature doesn't handle edge case X") |
+| Why | The process root cause (e.g., "pre-mortem during discovery didn't surface edge case X") |
+| Missed gate | Which quality gate should have caught this (e.g., "design review didn't verify boundary conditions") |
+| Fix | The corrective action (e.g., "add boundary condition check to design review checklist") |
+| Restart check | Where to re-enter the pipeline (e.g., "re-enter at design review — the architecture is sound, only the design needs updating") |
+
+### Routing After Post-Mortem
+
+| Root cause location | Re-entry point |
+|---|---|
+| Discovery gap (missing requirement) | Back to discovery |
+| Architecture gap (wrong design) | Back to architecture |
+| Planning gap (wrong specification) | Back to planning |
+| Implementation gap (correct spec, wrong code) | Back to development |
+| Fundamentally flawed product | Abandon |
+
+### What Post-Mortems Are Not
+
+- Not a place for suggestions or wish-list items
+- Not a place for blame or finger-pointing
+- Not a detailed timeline of everything that happened
+- Not a design document for the fix (the fix is a single sentence, the design happens when re-entering the pipeline)
+
+## Related
+
+- [[requirements/pre-mortem]]
+- [[workflow/flowr-spec]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/requirements/pre-mortem.md b/smith/data/.opencode/knowledge/requirements/pre-mortem.md
new file mode 100644
index 0000000..f5cf6ec
--- /dev/null
+++ b/smith/data/.opencode/knowledge/requirements/pre-mortem.md
@@ -0,0 +1,67 @@
+---
+domain: requirements
+tags: [pre-mortem, prospective-hindsight, risk-identification, specification]
+last-updated: 2026-04-29
+---
+
+# Pre-Mortem Technique
+
+## Key Takeaways
+
+- Prospective hindsight catches approximately 30% more issues than forward-looking review (Klein, 1998); frame the question as "it already failed — why?" to activate explanation mode.
+- Apply the pre-mortem at three stages: before writing Examples (specification), before writing design or stubs (architecture), and before implementation (TDD).
+- At specification: "Imagine this feature was built exactly as described, all tests pass, but it doesn't work for the user. What would be missing?"
+- At architecture: for each candidate class check >2 instance variables (Object Calisthenics — Bay, 2008) or >1 reason to change (SRP — Martin, 2000); for each external dependency check if it is behind a Protocol (Hexagonal Architecture — Cockburn, 2005); for each noun check if it serves double duty across modules.
+- All pre-mortems must complete before moving to the next stage; they are condition gates in the flow, not optional exercises.
+
+## Concepts
+
+**Prospective Hindsight Mechanism**: Asking "imagine this failed — why?" catches 30% more issues than forward-looking review (Klein, 1998). The brain is better at explaining past events than predicting future ones (Kahneman, 2011). By framing as "it already failed," you activate explanation mode (System 2 reasoning) rather than prediction mode (System 1 heuristic).
+
+**Specification Pre-Mortem**: Before writing Examples for any Rule, ask "What observable behaviours must we prove for this Rule to be complete?" All Rules must have their pre-mortems completed before any Examples are written. This surfaces hidden requirements that forward-looking analysis misses.
+
+**Architecture Pre-Mortem**: Before writing design or stubs, ask "In 6 months this design is a mess. What mistakes did we make?" Check each candidate class for >2 instance variables → split (Object Calisthenics — Bay, 2008) or >1 reason to change → isolate (SRP — Martin, 2000). Check each external dependency for Protocol encapsulation (Hexagonal Architecture — Cockburn, 2005). Check each noun for cross-module double duty.
+
+**Flow Condition Gates**: Pre-mortem completion is encoded as condition gates in the flow YAML. The `premortem_done` condition on `bdd-features.done` requires `premortem_done: ==true`. The `design_declared` condition on `tdd-cycle.all_green` requires self-declaration including design correctness checks that subsume the architecture pre-mortem. Self-declaration uses explicit AGREE/DISAGREE commitments — a commitment device (Cialdini, 2001) that makes the declaration psychologically binding. Adversarial framing during pre-mortem analysis — "find what's wrong" rather than "confirm it's right" — leverages adversarial collaboration (Mellers et al., 2001) to produce stronger reasoning.
+
+## Content
+
+### Specification Pre-Mortem
+
+Before writing Examples for any Rule:
+
+> "Imagine the software-engineer builds this feature exactly as described, all tests pass, but the feature doesn't work for the user. What would be missing?"
+
+All Rules must have their pre-mortems completed before any Examples are written. Record the findings in the feature's Questions section or as additional Rules.
+
+In the planning flow, this is enforced by the `premortem_done` condition on the `bdd-features.done` transition.
+
+### Architecture Pre-Mortem
+
+Before writing design or stubs:
+
+> "In 6 months this design is a mess. What mistakes did we make?"
+
+For each candidate class:
+- >2 instance variables? → split (Object Calisthenics — Bay, 2008)
+- >1 reason to change? → isolate (SRP — Martin, 2000)
+
+For each external dependency:
+- Is it behind a Protocol? → if not, add one (Hexagonal Architecture — Cockburn, 2005)
+
+For each noun:
+- Serving double duty across modules? → isolate into separate contexts
+
+### Implementation Pre-Mortem
+
+Before the TDD cycle, the design self-declaration covers YAGNI, KISS, DRY, SOLID per [[software-craft/solid#key-takeaways]], Object Calisthenics per [[software-craft/object-calisthenics#key-takeaways]], pattern smells per [[software-craft/smell-catalogue#key-takeaways]], and semantic alignment. This is the TDD-stage pre-mortem, encoded as the `design_declared` condition on `tdd-cycle.all_green`.
+
+## Related
+
+- [[requirements/interview-techniques]] — gap-finding techniques used during discovery interviews
+- [[requirements/invest]] — story quality criteria that pre-mortem findings may affect
+- [[requirements/gherkin]] — writing Examples after pre-mortem analysis
+- [[software-craft/tdd]] — design self-declaration subsumes the implementation pre-mortem
+- [[software-craft/object-calisthenics]] — OC-7 (two instance variables) checked in architecture pre-mortem
+- [[software-craft/smell-catalogue]] — pattern smells checked in implementation pre-mortem
+- [[software-craft/solid]] — SOLID checks in implementation pre-mortem
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/requirements/ubiquitous-language.md b/smith/data/.opencode/knowledge/requirements/ubiquitous-language.md
new file mode 100644
index 0000000..5716f76
--- /dev/null
+++ b/smith/data/.opencode/knowledge/requirements/ubiquitous-language.md
@@ -0,0 +1,61 @@
+---
+domain: requirements
+tags: [ubiquitous-language, glossary, ddd, genus-differentia]
+last-updated: 2026-04-29
+---
+
+# Ubiquitous Language
+
+## Key Takeaways
+
+- Ubiquitous language is a shared vocabulary between domain experts and developers — the same word must mean the same thing to everyone (Evans, 2003).
+- Definitions use genus-differentia format: state the category the term belongs to, then state how it differs from other members of that category.
+- The glossary is append-only — never delete entries; mark them as retired with a reference to the replacement term.
+- Aliases (different words for the same concept) must be documented so teams know when terms are interchangeable.
+
+## Concepts
+
+**Ubiquitous Language** — A shared vocabulary between domain experts and developers where every term has exactly one meaning within a bounded context (Evans, 2003). The same word must mean the same thing to everyone. When a term changes meaning across contexts, that boundary must be made explicit.
+
+**Genus-Differentia Format** — Every definition follows the pattern: "[Term] is a [genus/category] that [differentia/distinguishing characteristic]." For example: "A Repository is a collection-like interface that abstracts persistence behind a domain-oriented lookup." The genus (collection-like interface) places it in a known category; the differentia (abstracts persistence behind domain-oriented lookup) distinguishes it from other interfaces.
+
+**Append-Only Glossary** — The glossary records every term the team uses. When understanding shifts and a term's definition changes, the old entry is marked retired (not deleted) and a new entry is written. This preserves the history of domain understanding and prevents confusion when old documents reference superseded terms.
+
+**Aliases** — When two words refer to the same concept (e.g., "Order" and "Purchase" in an e-commerce domain), both are documented with one marked as the primary term and the other as an alias. This prevents parallel vocabularies from forming.
+
+## Content
+
+### Definition Format
+
+Each glossary entry contains:
+
+| Field | Purpose |
+|---|---|
+| Term | The word or phrase being defined |
+| Definition | Genus-differentia format |
+| Aliases | Other words that mean the same thing |
+| Example | A concrete usage in context |
+| Source | Where the term originated (interview, document, etc.) |
+
+### Retirement Process
+
+When a term is superseded:
+
+1. Add `Status: Retired` to the existing entry
+2. Add `Superseded by: <new-term>` with a reference
+3. Write a new entry for the replacement term
+4. Never delete — retired entries remain for historical reference
+
+### Cross-Referencing
+
+After writing or updating definitions:
+
+- Verify each term matches how it's used in the domain model
+- Verify each term matches how it's used in feature files
+- Flag any term serving double duty across bounded contexts — this indicates a missing context boundary (Evans, 2003)
+
+## Related
+
+- [[domain-modeling/event-storming]]
+- [[requirements/interview-techniques]]
+- [[requirements/decomposition]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/requirements/wsjf.md b/smith/data/.opencode/knowledge/requirements/wsjf.md
new file mode 100644
index 0000000..4add863
--- /dev/null
+++ b/smith/data/.opencode/knowledge/requirements/wsjf.md
@@ -0,0 +1,86 @@
+---
+domain: requirements
+tags: [wsjf, prioritization, scoring, backlog, feature-selection]
+last-updated: 2026-04-29
+---
+
+# Weighted Shortest Job First — Reinertsen, 2009
+
+## Key Takeaways
+
+- Calculate WSJF as Value divided by Effort; select features with higher scores first.
+- Value (1-5) maps to Kano categories: 5=Must-have (core workflow blocked), 4=High, 3=Medium (performance), 2=Low (delighter), 1=Minimal (cosmetic).
+- Effort (1-5) maps to complexity: 1=Trivial (no new domain concepts), 2=Small (one new entity), 3=Medium (cross-cutting), 4=Large (multiple entities), 5=Very large (spans modules).
+- Dependency=1 features are ineligible regardless of WSJF score; ties broken by Value; if all features have Dependency=1, resolve the blocking dependency first.
+- Only features with `Status: BASELINED` are eligible for scoring; WIP limit is 1.
+
+## Concepts
+
+**WSJF Formula** (Reinertsen, 2009): `WSJF = Cost of Delay / Duration = Value / Effort`. Higher WSJF score means higher priority for selection. Value and Effort are each scored 1-5 using defined scales.
+
+**Value Scale**: Value maps to Kano model categories (Kano et al., 1984). Must-have (5) means core workflow is blocked without it. High (4) significantly improves the primary use case. Medium (3) is useful but not blocking. Low (2) is a nice-to-have. Minimal (1) is cosmetic or out-of-scope.
+
+**Effort Scale**: Effort maps to complexity. Trivial (1) has no new domain concepts. Small (2) introduces one new domain entity. Medium (3) is a cross-cutting concern. Large (4) involves multiple interacting domain entities. Very large (5) spans multiple modules or has unknown complexity.
+
+**Selection Rules**: Dependency=1 features are ineligible regardless of WSJF score. Pick the highest WSJF score among Dependency=0 candidates. Ties broken by Value. If all BASELINED features have Dependency=1, resolve the blocking dependency first.
+
+## Content
+
+### Formula
+
+```
+WSJF = Cost of Delay / Duration = Value / Effort
+```
+
+Higher WSJF score = higher priority for selection.
+
+### Value Scale (1-5)
+
+Estimate user/business impact, mapped to Kano model categories:
+
+| Score | Label | Kano Category | Description |
+|---|---|---|---|
+| 5 | Must-have | Basic need | Core workflow blocked without it |
+| 4 | High | — | Significantly improves the primary use case |
+| 3 | Medium | Performance | Useful but not blocking |
+| 2 | Low | Delighter | Nice-to-have |
+| 1 | Minimal | — | Cosmetic or out-of-scope edge case |
+
+Tiebreaker: use the number of Must Examples in the feature's Rule blocks. More Musts = higher value.
+
+### Effort Scale (1-5)
+
+Estimate implementation complexity:
+
+| Score | Label | Description |
+|---|---|---|
+| 1 | Trivial | No new domain concepts |
+| 2 | Small | One new domain entity |
+| 3 | Medium | Cross-cutting concern |
+| 4 | Large | Multiple interacting domain entities |
+| 5 | Very large | Spans multiple modules or has unknown complexity |
+
+### Dependency Scoring (0/1)
+
+| Score | Meaning |
+|---|---|
+| 0 | Independent — no hard prerequisite |
+| 1 | Blocked — requires another backlog feature to be completed first |
+
+### Selection Rules
+
+1. Dependency=1 features are ineligible regardless of WSJF score
+2. Pick the highest WSJF score among Dependency=0 candidates
+3. Ties broken by Value — user impact matters more than effort optimization
+4. If all BASELINED features have Dependency=1: resolve the blocking dependency first
+
+### Prerequisites
+
+- Only features with `Status: BASELINED` are eligible for scoring
+- WIP limit of 1 — only one feature in progress at a time
+- The PO selects and moves the feature; no other agent moves feature files
+
+## Related
+
+- [[requirements/invest]] — story quality criteria applied before scoring
+- [[requirements/moscow]] — prioritizing Examples within a Rule
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/skill-design/principles.md b/smith/data/.opencode/knowledge/skill-design/principles.md
new file mode 100644
index 0000000..537d560
--- /dev/null
+++ b/smith/data/.opencode/knowledge/skill-design/principles.md
@@ -0,0 +1,107 @@
+---
+domain: skill-design
+tags: [skills, on-demand-loading, context-budget, diataxis]
+last-updated: 2026-04-29
+---
+
+# Skill Design Principles
+
+## Key Takeaways
+
+- Skills are procedure only (HOW); the flow defines routing (WHEN), artifacts (WHAT), and transitions (WHERE TO NEXT).
+- Skills load into context only when invoked; keep them lean — target under 150 lines.
+- Skills reference knowledge via wikilinks and never inline knowledge content.
+- Embed IF-THEN triggers (Gollwitzer, 1999) at the decision point within steps, not in a separate section.
+- Skills are how-to guides (Diátaxis — Procida, 2021): step-by-step instructions for achieving a specific outcome.
+
+## Concepts
+
+**Skill = Procedure Only**: The flow YAML is the single source of truth for owner, skills, input_artifacts, output_artifacts, and next transitions. The skill contains only the procedure for doing the work. Do not duplicate artifact paths, routing decisions, or "when to use" sections — those come from the flow.
+
+**On-Demand Loading**: Skills load into context only when invoked. Bloated always-loaded files cause LLMs to ignore critical instructions (Liu et al., 2023). Every token in an unconditionally-loaded file competes for attention. Skills must be self-contained when loaded but must not duplicate content in `AGENTS.md` or other skills.
+
+**Skill as How-To Guide (Diátaxis — Procida, 2021)**: In the Diátaxis framework, skills serve as how-to guides: task-oriented, step-by-step instructions. Tutorials (learning a role) belong in agent files. Reference and explanation belong in knowledge files.
+
+**Reference Knowledge, Never Inline**: When a skill needs domain knowledge (e.g., INVEST criteria, Gherkin format), it must reference the knowledge file via wikilink rather than embedding the content. This prevents duplication and ensures the knowledge file remains the single source of truth.
+
+**Prospective Memory Cues** (Gollwitzer, 1999; McDaniel & Einstein, 2000): Memory for intended actions is better when cues are embedded at the decision point. Include the IF-THEN trigger and the knowledge reference at the point of decision, not in a separate reference document.
+
+**Lean Skill Design**: Target lengths: under 150 lines for focused skills, under 250 lines for complex multi-phase skills. Cut without hesitation: exhaustive examples (one is enough), reference documentation (use wikilinks), boilerplate configuration (belongs in project files), knowledge content (extract to knowledge files).
+
+## Content
+
+### Skill = Procedure Only (No Duplication)
+
+The flow YAML defines everything the agent needs to know about context:
+
+| Concern | Source | What the skill does |
+|---|---|---|
+| Which agent? | Flow `owner` | Agent identifies itself |
+| Which skill? | Flow `skills` | Agent loads it via `flowr status` |
+| Which transitions? | Flow `next` | Agent checks via `flowr status` |
+| Which artifacts? | Flow `input/edited/output_artifacts` | Skill says "write to output artifacts" |
+| How to do the work? | Skill steps + knowledge | Skill loads knowledge via wikilinks |
+
+The skill never hardcodes artifact paths, transition names, or "when to use" conditions.
+
+### Skill Body Structure
+
+```markdown
+---
+name: <skill-name>
+description: "<one-line description of what this skill does>"
+---
+
+# <Skill Title>
+
+Load [[domain/concept]] before starting.  ← Only if the skill references domain concepts
+
+1. <procedural step>
+2. <step referencing knowledge per [[domain/concept]]>  ← Link at point of use
+3. Write results to output artifacts.
+4. Check flow transitions to determine next state.
+```
+
+**When to include "Load" section:**
+- Include if the skill uses domain concepts, techniques, or criteria from knowledge files
+- Omit if the skill is pure mechanical procedure (e.g., "create git branch", "run tests")
+
+**When to reference knowledge in steps:**
+- Reference knowledge when applying a technique: "Apply pre-mortem per [[requirements/pre-mortem]]"
+- Reference knowledge when using criteria: "Validate each Rule per [[requirements/invest]]"
+- Do NOT inline knowledge content: Wrong: "split if >8 examples", Right: "per [[requirements/decomposition]]"
+
+**Standard final steps:**
+- "Write results to output artifacts" — the flow defines what these are
+- "Check flow transitions to determine next state" — the flow defines available transitions
+
+**What NOT to include:**
+- Lists of specific artifact names (these come from flow)
+- "Review input artifacts" as step 1 (flow already defines inputs)
+- Routing logic or transition conditions (flow owns this)
+- Knowledge content (reference via wikilinks only)
+
+### On-Demand Loading
+
+Skills load into context only when invoked. Skills must be self-contained when loaded but must not duplicate content that already exists in `AGENTS.md` or other skills.
+
+### Prospective Memory Cues
+
+Include the IF-THEN trigger and the knowledge reference at the decision point:
+
+```
+If a class has more than one reason to change, read [[software-craft/tdd#concepts]]
+and apply the Single Responsibility Principle.
+```
+
+Not just "Apply SOLID principles" (no cue for when) or just "See [[software-craft/tdd#concepts]]" (no cue for when to load).
+
+### De-Duplication
+
+When two skills need the same knowledge, both should reference the same knowledge file rather than each embedding a copy. Each piece of knowledge belongs in exactly one canonical location.
+
+## Related
+
+- [[knowledge-design/principles]]
+- [[agent-design/principles]]
+- [[workflow/flowr-spec]]
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/software-craft/code-review.md b/smith/data/.opencode/knowledge/software-craft/code-review.md
new file mode 100644
index 0000000..f97d349
--- /dev/null
+++ b/smith/data/.opencode/knowledge/software-craft/code-review.md
@@ -0,0 +1,80 @@
+---
+domain: software-craft
+tags: [code-review, adversarial-review, self-declaration, three-tier-review, inspection]
+last-updated: 2026-04-30
+---
+
+# Code Review
+
+## Key Takeaways
+
+- The reviewer's default hypothesis is "it might be broken despite green tests — prove otherwise" — adversarial review catches more defects than cooperative review (Fagan, 1976; Tetlock, 1985).
+- Fail-fast: stop at the first failure, write a minimal REJECTED report. Do not continue reviewing after finding a defect — the defect may invalidate subsequent findings.
+- The reviewer MUST NOT modify any files — produce APPROVED or REJECTED report only. On REJECTED, the flow routes back to the TDD cycle; the SE implements fixes, not the reviewer.
+- "Minor" is not a pass — code smells that are acknowledged must still be listed as findings for the SE to evaluate. The SE decides whether to fix or explicitly acknowledge.
+- Lint and type errors are findings to report, not to fix during review. The SE fixes them in the next TDD cycle. Running `ruff --fix` or manually fixing lint errors during review is the SE's job, not the reviewer's.
+- Design correctness is verified before conventions: YAGNI > KISS > DRY > SOLID > OC > patterns first, then coverage, lint, and type checking.
+- Self-declaration checklists (AGREE/DISAGREE on specific criteria) force the reviewer to articulate exactly what passes and what fails, preventing vague "looks good" approval (Hattie & Timperley, 2007; Fagan, 1976).
+- Three-tier review separates concerns: design (does it do the right thing?), structure (are tests good enough?), conventions (does it follow project standards?).
+
+## Concepts
+
+**Adversarial Review** (Fagan, 1976; Tetlock, 1985) — Structured inspections detect 60–90% of defects before testing. The key mechanism is forcing the reviewer to articulate specific failures rather than offering vague approval. Accountability to an unknown audience (Tetlock, 1985) shifts the reviewer from confirmation bias ("looking for reasons it works") to adversarial search ("looking for reasons it breaks"). The reviewer MUST NOT modify any files — produce APPROVED or REJECTED report only. On REJECTED, the flow routes back to the TDD cycle; the SE implements fixes. "Minor" is not a pass — acknowledged smells are still findings. Lint and type errors are findings to report, not to fix during review.
+
+**Fail-Fast Protocol** — Stop reviewing at the first defect found. Write a minimal REJECTED report containing: the defect, its file:line evidence, and the required action. Do not accumulate multiple findings — the first defect may invalidate everything that follows. Fix the defect, re-submit, and the reviewer starts over.
+
+**Three-Tier Review** — Each tier checks a different quality dimension with different knowledge. Design review verifies alignment with domain model, ADRs, and quality attributes. Structure review verifies test coverage, test quality, and abstraction level matching. Conventions review verifies formatting, naming, docstrings, and lint. The tiers are ordered by impact: a design defect invalidates tests, but a conventions defect does not invalidate design.
+
+**Self-Declaration as Commitment Device** (Hattie & Timperley, 2007; Cialdini, 2001) — Before handoff, the developer declares specific quality attributes as AGREE/DISAGREE. This forces explicit judgment on each criterion, preventing the "I skimmed it and nothing jumped out" pattern. DISAGREE is not automatic rejection — the developer states the reason, and the reviewer evaluates whether the reason is acceptable.
+
+**Design Before Conventions** — Never waste convention work (lint, coverage, formatting) on code that might be redesigned. Verify design correctness first (YAGNI, KISS, DRY, SOLID, OC, patterns), then verify structure (test quality, coverage), then verify conventions (formatting, naming). This ordering prevents rework when design review requires changes. During design review, the reviewer checks design correctness ONLY — they do NOT request lint fixes, coverage additions, pyright fixes, or docstring additions. Those are Conventions Phase concerns that the reviewer explicitly requests only after design approval. Running convention checks on code that might be redesigned is wasted effort.
+
+## Content
+
+### Three-Tier Review Structure
+
+| Tier | Checks | Key Knowledge |
+|---|---|---|
+| Design | Domain alignment, ADR consistency, quality attributes, design principle priority | [[architecture/reconciliation]], [[architecture/adr]] |
+| Structure | Test coverage, test quality, abstraction level, observable behaviour | [[software-craft/test-design]], [[software-craft/tdd]] |
+| Conventions | Formatting, naming, docstrings, lint, type hints | [[requirements/ubiquitous-language]] |
+
+### Review Stance Declaration
+
+Before performing any review tier, declare:
+
+- Adversarial stance: "I will actively search for defects, not confirm correctness."
+- Fail-fast: "I will stop at the first failure and write a minimal REJECTED report."
+- Design before conventions: "I will verify design correctness before checking conventions."
+
+### PASS/FAIL Report Format
+
+For each criterion checked, the reviewer records:
+
+- **Criterion**: Which specific quality attribute was checked
+- **Verdict**: PASS or FAIL
+- **Evidence**: File:line reference and specific observation
+- **Action**: What must change (only on FAIL)
+
+A REJECTED report contains: the first failure found, its evidence, and the required action. No additional findings are needed — fix the defect and re-submit.
+
+### Planned Code vs Dead Code
+
+During design review, distinguish between planned code and dead code:
+
+- **Planned code** matches the domain model, technical design, or interview notes but has not been exercised by a test yet. Flag as WARN (planned-not-reached), not REJECT. The stubs created at feature planning time are breadcrumbs from the domain model — the SE will reach them as TDD progresses through examples.
+- **Dead code** contradicts the architecture or was superseded by a design decision. Flag as REJECT and require removal.
+
+Before flagging code as dead or unnecessary, verify against domain model, technical design, and interview notes. Code that matches the architecture is planned, even if no test exercises it yet.
+
+## Related
+
+- [[architecture/reconciliation]] — adversarial cross-document consistency checking
+- [[software-craft/test-design]] — what makes a good test vs a bad test
+- [[software-craft/tdd]] — the design principle priority used in design review
+- [[software-craft/object-calisthenics]] — the 9 OC rules checked in design review
+- [[software-craft/smell-catalogue]] — smells checked during design review
+- [[software-craft/design-patterns]] — patterns verified during design review
+- [[software-craft/solid]] — SOLID violations checked during design review
+- [[software-craft/refactoring]] — when and how to refactor, clean code, technical debt
+- [[requirements/ubiquitous-language]] — naming conventions checked in conventions review
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/software-craft/design-patterns.md b/smith/data/.opencode/knowledge/software-craft/design-patterns.md
new file mode 100644
index 0000000..52fee23
--- /dev/null
+++ b/smith/data/.opencode/knowledge/software-craft/design-patterns.md
@@ -0,0 +1,106 @@
+---
+domain: software-craft
+tags: [design-patterns, gof, oop, refactoring, architecture]
+last-updated: 2026-04-30
+---
+
+# Design Patterns
+
+## Key Takeaways
+
+- Apply patterns during REFACTOR only when a smell triggers them; never speculatively (Shvets, 2014).
+- Creational smells (scattered construction, multi-step setup) trigger Factory Method, Abstract Factory, or Builder.
+- Structural smells (type-switching, feature envy, parallel hierarchies) trigger Strategy, Visitor, Move Method, or Bridge.
+- Behavioral smells (large state machines, scattered notifications, repeated algorithm skeletons) trigger State, Observer, or Template Method.
+- When procedural code requires modifying existing functions for new variants, OOP is the fix — the smell is always a place that must change every time the domain grows.
+
+## Concepts
+
+**Pattern Selection from Smells** (Shvets, 2014; Gamma et al., 1994) — GoF design patterns provide structural solutions to recurring code smells. Patterns are applied during REFACTOR only when a smell triggers them — never speculatively. The smell catalogue identifies the gap; the pattern provides the structural solution.
+
+**Creational Smells and Patterns** — Scattered object construction (same object built in 3+ places) triggers Factory Method or Factory Function. Multi-step construction with optional parts (object requires several setup calls before valid) triggers Builder. The key change is centralizing creation knowledge or making invalid intermediate states impossible.
+
+**Structural Smells and Patterns** — Type-switching (function branches on a type flag) triggers Strategy (behaviour varies per call) or Visitor (operation varies over fixed structure). Feature envy (method uses another class's data more than its own) triggers Move Method. Parallel inheritance hierarchies (two class hierarchies growing in lockstep) trigger Bridge.
+
+**Behavioral Smells and Patterns** — Large state machines in one class trigger State pattern. Scattered notification (source directly calls multiple downstream systems) triggers Observer. Repeated algorithm skeletons (two functions sharing structure but differing in one step) trigger Template Method.
+
+**Core Heuristic** — When procedural code requires modifying existing functions to add new variants, OOP is the fix. Procedural code is open to modification; OOP closes existing code to modification and opens it to extension through new types. The smell is always the same: a place in the codebase that must change every time the domain grows.
+
+## Content
+
+### GoF Pattern Catalogue
+
+#### Creational
+
+| Pattern | Intent | Problem | Applicability |
+|---|---|---|---|
+| Factory Method | Delegate object creation to a subclass or factory function | Direct construction couples code to concrete classes; adding a new type requires changes throughout | Don't know exact types beforehand; framework users extend components; reuse existing objects (caching, pooling) |
+| Abstract Factory | Create families of related objects without specifying concrete classes | Code depends on families of related objects and must work with any family | Multiple product variants that must be used together; configuration-driven object creation |
+| Builder | Construct complex objects step-by-step, separating construction from representation | Telescoping constructor with many optional parameters; subclass explosion for every configuration | Eliminate telescoping constructor; different representations with similar construction steps; construct Composite trees |
+| Prototype | Clone existing objects instead of creating new ones from scratch | Object creation is expensive or complex; object configuration is the hard part, not the class | Object has many configuration options; deep copy needed; avoid subclass explosion for configuration |
+| Singleton | Ensure a class has only one instance (use sparingly — prefer dependency injection) | Shared resource that must have exactly one instance | Database connection, configuration, logger — but prefer DI over Singleton |
+
+#### Structural
+
+| Pattern | Intent | Problem | Applicability |
+|---|---|---|---|
+| Adapter | Wrap an incompatible interface to match an expected interface | Existing class has the right behaviour but the wrong interface | Integrate third-party or legacy code; make incompatible interfaces work together |
+| Bridge | Separate abstraction from implementation so both can vary independently | Parallel inheritance hierarchies: creating a subclass for one forces a subclass for another | Two dimensions of variation that must evolve independently; avoid permanent binding between abstraction and implementation |
+| Composite | Treat individual objects and compositions uniformly via a shared interface | Code must treat primitive and container objects the same way | Tree structures; UI components; any part-whole hierarchy |
+| Decorator | Add responsibilities to an object dynamically without subclassing | Subclass explosion from combining optional features; need to add behaviour without modifying existing classes | Layered features (logging, caching, compression); avoid "God Object" from feature accumulation |
+| Facade | Provide a simplified interface to a complex subsystem | Complex subsystem requires initializing many objects, tracking dependencies, correct ordering; business logic coupled to implementation details | Limited straightforward interface to complex subsystem; structure subsystem into layers. Caution: can become god object |
+| Flyweight | Share fine-grained objects to reduce memory when many similar instances are needed | Too many similar objects consuming excessive memory | Large numbers of similar objects (characters in a text editor, tiles in a game); intrinsic vs extrinsic state separation |
+| Proxy | Control access to an object via a surrogate (lazy init, access control, logging) | Object is expensive to create, needs access control, or requires remote access | Lazy initialization; access control; logging; remote object access |
+
+#### Behavioral
+
+| Pattern | Intent | Problem | Applicability |
+|---|---|---|---|
+| Chain of Responsibility | Pass a request along a chain of handlers until one handles it | Multiple handlers may process a request, but which one is determined at runtime | Event bubbling; logging levels; validation pipelines; middleware stacks |
+| Command | Encapsulate a request as an object, enabling undo/redo and queuing | Requests are tied to specific callers; undo/redo needed; request execution must be deferred | Undo/redo; job queues; macro recording; UI action handling |
+| Iterator | Provide sequential access to elements without exposing the underlying structure | Collection internals are exposed to clients; different collection types need uniform traversal | Custom collection traversal; filtering; lazy iteration over large datasets |
+| Mediator | Centralize complex communication between objects through a mediator object | Many-to-many communication between objects; objects know too much about each other | UI form coordination; air traffic control; chat room; replace complex fan-out with central hub |
+| Memento | Capture and restore object state without violating encapsulation | Direct state access violates encapsulation; need to save/restore state | Undo/redo (with Command); snapshots; transaction rollback |
+| Observer | Define a one-to-many dependency so dependents are notified automatically | Either observers poll subject (wasteful) or subject notifies all (wasteful if not all interested); set of dependents is unknown or dynamic | State change in one object requires changing others; dynamic subscribe/unsubscribe; event systems. Structure: Publisher + Subscriber interface + Concrete Subscribers |
+| State | Allow an object to alter its behaviour when its internal state changes | State machine with conditionals grows into bloated mess; each new state requires changing conditionals in every method. States may know about each other and initiate transitions | Object behaves differently per current state with many states and frequent changes; class polluted with massive conditionals on state fields. Key distinction from Strategy: states may trigger transitions |
+| Strategy | Define a family of algorithms, encapsulate each, and make them interchangeable | Class has many variants of same algorithm, each adding bloat, merge conflicts, changes to one algorithm affect whole class. Strategies are independent and unaware of each other | Massive conditional switching between algorithm variants; many similar classes differing only in behavior execution; isolating business logic from algorithm details. Key distinction from State: strategies don't know about each other |
+| Template Method | Define the skeleton of an algorithm; let subclasses fill in specific steps | Two methods share the same algorithm structure but differ in one or two steps | Framework design; invariant algorithm with variable steps; avoid duplicate control flow in subclasses |
+| Visitor | Separate an algorithm from the object structure it operates on | Adding operations to a stable object structure requires modifying every element class | Compiler AST passes; reporting over stable data structures; double dispatch needs |
+
+### Quick Smell to Pattern Lookup
+
+| Smell | Pattern |
+|---|---|
+| Same object constructed in 3+ places | Factory Method / Factory Function |
+| Multi-step setup before object is valid | Builder |
+| Branching on a type, kind, or status field | Strategy |
+| Method uses another class's data more than its own | Move Method (Fowler) |
+| Two class hierarchies that grow in lockstep | Bridge |
+| Many methods branch on the same state field | State |
+| Object directly calls multiple downstream systems on change | Observer |
+| Two functions share the same algorithm skeleton, differ in one step | Template Method |
+| Subsystem is complex and callers need a simple entry point | Facade |
+| Need to add behaviour to objects without modifying their classes | Decorator |
+| Incompatible interface between collaborating classes | Adapter |
+| Need to treat individual and composite objects uniformly | Composite |
+
+### Pattern Smell Checks (Verification)
+
+| Code smell | Pattern missed | How to check |
+|---|---|---|
+| Multiple if/elif on type/state | State or Strategy | Search for `isinstance` chains |
+| Complex `__init__` | Factory or Builder | Check line count and side effects |
+| Callers know multiple components | Facade | Check caller coupling |
+| External dep without Protocol | Adapter or Repository | Check dependency injection |
+| 0 domain classes, many functions | Missing domain model | Count classes vs functions |
+| Repeated algorithm skeleton | Template Method | Find duplicate control flow |
+| Direct calls to multiple listeners | Observer | Find fan-out call sites |
+
+## Related
+
+- [[software-craft/smell-catalogue]] — smells trigger pattern selection
+- [[software-craft/refactoring-techniques]] — refactoring techniques that resolve smells before patterns are needed
+- [[software-craft/refactoring]] — when and how to refactor, clean code, technical debt
+- [[software-craft/solid]] — patterns resolve SOLID violations
+- [[software-craft/object-calisthenics]] — OC rules complement pattern application
+- [[software-craft/tdd]] — patterns are applied during REFACTOR phase
diff --git a/smith/data/.opencode/knowledge/software-craft/git-conventions.md b/smith/data/.opencode/knowledge/software-craft/git-conventions.md
new file mode 100644
index 0000000..9afdd0e
--- /dev/null
+++ b/smith/data/.opencode/knowledge/software-craft/git-conventions.md
@@ -0,0 +1,90 @@
+---
+domain: software-craft
+tags: [git, branching, commits, squash, pull-requests, conflict-resolution]
+last-updated: 2026-04-30
+---
+
+# Git Conventions
+
+## Key Takeaways
+
+- Feature branches use granular commits per achievement; local main receives single squashed commits per feature.
+- Before merging to local main: pull latest remote main, resolve any conflicts feature by feature, then squash-merge.
+- Granular commit format: `<type>(<scope>): <specific achievement>` (e.g., `feat(auth): add JWT signing method`).
+- Squashed commit format includes @id traceability, feature metadata, and approval trail.
+- Multiple features can accumulate on local main before creating a PR — the stakeholder decides when to publish.
+- PR is an administrative step required by the git hosting platform; changes are already on main.
+
+## Concepts
+
+**Two-Tier Commit Strategy** — Development happens on feature branches (`feat/<stem>` or `fix/<stem>`) with granular commits per achievement. Each small milestone gets its own commit for development traceability and easy bisection. Before merging to local main, all feature commits are squashed into a single meaningful commit with @id traceability.
+
+**Local Main as Staging Area** — Local main accumulates squashed feature commits. Multiple features can be developed and squash-merged to local main before publishing to remote. This allows integration testing of multiple features together and reduces PR noise. The stakeholder decides after each feature whether to continue developing more features or publish the batch.
+
+**Conflict Prevention** — Before squash-merging, pull the latest remote main to detect conflicts early. Resolve conflicts feature by feature on the feature branch. If conflicts require design decisions, present options to the stakeholder with consequences.
+
+**Administrative PR** — The PR serves the git hosting platform's approval requirement, not merge mechanics. Changes are already on local main. The PR documents what was built, provides traceability via @id tags, and enables the review/approval workflow.
+
+**Conventional Commits** — Every commit follows `<type>(<scope>): <description>`. Types: `feat` (feature), `fix` (bug fix), `test` (test addition/modification), `refactor` (structural change with no behavior change), `chore` (tooling, deps, CI), `docs` (documentation). Forbidden: `wip`, `temp`, any commit without a type prefix.
+
+## Content
+
+### Branch Naming
+
+| Branch Type | Format | Purpose |
+|---|---|---|
+| Feature | `feat/<feature-stem>` | New feature development |
+| Fix | `fix/<feature-stem>` | Post-mortem fix for a rejected feature |
+| Docs | `docs/<scope>` | Documentation changes |
+| Chore | `chore/<scope>` | Tooling, deps, CI |
+
+### Granular Commit Format
+
+During development on a feature branch, each small achievement gets its own commit:
+
+```
+feat(auth): add JWT signing key configuration
+test(auth): add failing test for token expiry
+feat(auth): implement token expiry validation
+refactor(auth): extract token validation to separate method
+fix(auth): handle malformed token errors
+```
+
+Rules:
+- One logical change per commit
+- Refactor commits are separate from feature commits
+- Never mix a structural change with a behavior addition in one commit
+- Every commit leaves tests green
+
+### Squashed Commit Format
+
+Before merging to local main, squash all feature commits into one:
+
+```
+feat(<scope>): <feature summary>
+
+- Implemented @id:<hex>: <acceptance criterion 1>
+- Implemented @id:<hex>: <acceptance criterion 2>
+- Implemented @id:<hex>: <acceptance criterion 3>
+
+Feature: <feature-name>
+Branch: feat/<feature-stem> → main
+Reviewed: SA approved (design + completion)
+Accepted: PO approved
+```
+
+### Feature-Type Verification
+
+Before any merge, verify the feature works in its delivery context:
+
+| Feature Type | Verification Command |
+|---|---|
+| CLI | `timeout 10s uv run task run` |
+| Library | `uv run python -c "import <package>; <public_api_call>"` |
+| Mixed | Both commands above |
+
+## Related
+
+- [[software-craft/tdd]] — commit discipline (separate refactor from feature)
+- [[software-craft/stub-design]] — branch setup during project structuring
+- [[software-craft/code-review]] — three-tier review before merge
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/software-craft/object-calisthenics.md b/smith/data/.opencode/knowledge/software-craft/object-calisthenics.md
new file mode 100644
index 0000000..4a6d266
--- /dev/null
+++ b/smith/data/.opencode/knowledge/software-craft/object-calisthenics.md
@@ -0,0 +1,50 @@
+---
+domain: software-craft
+tags: [object-calisthenics, oc, design-constraints, code-quality]
+last-updated: 2026-04-30
+---
+
+# Object Calisthenics
+
+## Key Takeaways
+
+- OC-1: Use only one level of indentation per method — deep nesting signals mixed concerns.
+- OC-2: Do not use the `else` keyword — early returns and guard clauses eliminate branching.
+- OC-3: Wrap all primitives and strings in small domain-specific types — `Age` instead of `int`, `Email` instead of `str`.
+- OC-4: Use only one dot per line — `a.b.c` is a Law of Demeter violation; the middle object should handle the request.
+- OC-5: Do not abbreviate names — if a name is long, the scope is too broad or the concept is unclear.
+- OC-6: Keep all entities small — classes ≤ 50 lines, methods ≤ 5 lines, packages with ≤ 10 classes.
+- OC-7: Do not use more than two instance variables per class — more signals the class has multiple responsibilities (Bay, 2008).
+- OC-8: Use first-class collections — a class that holds a collection should hold no other instance variables.
+- OC-9: Do not use getters, setters, or properties — tell objects to do work rather than asking for their data (Tell, Don't Ask).
+
+## Concepts
+
+**OC-1: One Level of Indentation** — Nested `if`/`for`/`try` blocks indicate that a method is doing too many things. Extract each nested block into its own method with a descriptive name. The result is a sequence of guard clauses and early returns, each expressing a single decision.
+
+**OC-2: No Else** — Every `else` branch can be replaced by an early return (guard clause), a ternary expression, or polymorphism. Early returns eliminate the cognitive load of tracking which branch you are in. Polymorphism replaces conditional branching with type-driven dispatch.
+
+**OC-3: Wrap Primitives** — A bare `int` could be an age, a quantity, a score, or an ID. Wrapping primitives in domain-specific types (`Age`, `Quantity`, `Score`, `ID`) gives the type system enforcement power, prevents invalid combinations (you cannot add an `Age` to a `Score`), and attaches behaviour to the data it describes.
+
+**OC-4: One Dot Per Line** — `a.b.c` means the caller knows about the internal structure of `a.b`. This violates encapsulation and the Law of Demeter. The fix: ask `a` to perform the operation, delegating through `b` internally. Each object should talk to its immediate neighbours, not their neighbours.
+
+**OC-5: No Abbreviations** — Abbreviations obscure meaning and create ambiguity. `usr` could mean user, usual, or USB. If a name is too long to type, the method or class is probably doing too much — extract until the name is naturally short.
+
+**OC-6: Small Entities** — A class longer than 50 lines is doing too many things. A method longer than 5 lines is mixing levels of abstraction. A package with more than 10 classes lacks a clear boundary. These thresholds force decomposition toward single-responsibility objects.
+
+**OC-7: Two Instance Variables** (Bay, 2008) — More than two instance variables means the class is holding multiple responsibilities. Each variable represents a cohesive cluster; three or more clusters signal the need to extract collaborators. This is the most impactful constraint: it directly prevents god-objects and forces distribution of behaviour.
+
+**OC-8: First-Class Collections** — When a class contains a collection (list, dict, set), that collection should be the only instance variable. The class becomes the collection's behaviour: filtering, mapping, validating, computing aggregates. This prevents a collection from being one of five variables in a class that also holds configuration, state, and identifiers.
+
+**OC-9: Tell, Don't Ask** — Calling `obj.get_x()` then making a decision based on the result means the caller owns the decision. Instead, `obj.do_thing()` lets the object own its own behaviour. Getters expose internal structure and invite misuse; telling objects to act preserves encapsulation and keeps behaviour where the data lives.
+
+## Related
+
+- [[software-craft/tdd]] — design principle priority includes OC
+- [[software-craft/code-review]] — self-declaration checks include OC
+- [[software-craft/smell-catalogue]] — OC violations overlap with smell signals
+- [[software-craft/design-patterns]] — patterns complement OC rules
+- [[software-craft/refactoring-techniques]] — OC violations signal specific refactoring opportunities
+- [[software-craft/solid]] — OC rules overlap with SOLID (OC-7 enforces SRP, OC-4 enforces DIP)
+- [[software-craft/refactoring]] — when and how to refactor, clean code, technical debt
+- [[requirements/pre-mortem]] — pre-mortem checks include OC-7 (two instance variables)
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/software-craft/refactoring-techniques.md b/smith/data/.opencode/knowledge/software-craft/refactoring-techniques.md
new file mode 100644
index 0000000..150702b
--- /dev/null
+++ b/smith/data/.opencode/knowledge/software-craft/refactoring-techniques.md
@@ -0,0 +1,137 @@
+---
+domain: software-craft
+tags: [refactoring, fowler, code-quality, refactoring-techniques]
+last-updated: 2026-04-30
+---
+
+# Refactoring Techniques
+
+## Key Takeaways
+
+- Composing Methods (9 techniques): extract, inline, and substitute to make methods short and focused — Long Method is the most common smell and Extract Method is the most common refactoring (Fowler, 1999).
+- Moving Features between Objects (8 techniques): relocate methods and fields to the class where they belong — Feature Envy and Inappropriate Intimacy are resolved by moving behaviour next to the data it depends on.
+- Organizing Data (15 techniques): replace primitives with objects, encapsulate fields, and change type codes — Primitive Obsession and Data Clumps are resolved by giving data domain-specific types.
+- Simplifying Conditional Expressions (8 techniques): decompose conditionals, replace conditionals with polymorphism, and introduce guard clauses — Switch Statements are resolved by distributing conditional logic to the types that own it.
+- Simplifying Method Calls (14 techniques): rename methods, introduce parameter objects, and replace constructors with factory methods — Long Parameter Lists and data class smells are resolved by giving methods clearer interfaces.
+- Dealing with Generalization (12 techniques): pull up, push down, extract, and collapse hierarchies — Refused Bequest and Parallel Inheritance Hierarchies are resolved by aligning inheritance with responsibility.
+
+## Concepts
+
+**Composing Methods** — The most fundamental category. Most other refactorings depend on methods being small enough to name clearly. Extract Method is the workhorse: when a method does more than one thing, extract each thing into its own method with a descriptive name. Inline Method is the inverse: when a method body is as clear as its name, inline it. Replace Temp with Query eliminates temporary variables by turning them into query methods.
+
+**Moving Features between Objects** — When a method uses another class's data more than its own (Feature Envy), Move Method relocates it. When a class is doing too much, Extract Class splits it. When a class is doing too little, Inline Class merges it back. Hide Delegate and Remove Middle Man adjust delegation levels.
+
+**Organizing Data** — Primitive Obsession is resolved by wrapping primitives in domain-specific types (Replace Data Value with Object). Type codes are resolved by replacing them with class hierarchies (Replace Type Code with Subclasses) or State/Strategy (Replace Type Code with State/Strategy). Encapsulate Field and Encapsulate Collection protect data from uncontrolled access.
+
+**Simplifying Conditional Expressions** — Switch Statements and nested conditionals are resolved by decomposing them (Decompose Conditional), replacing them with polymorphism (Replace Conditional with Polymorphism), or introducing guard clauses (Replace Nested Conditional with Guard Clauses). Introduce Null Object eliminates null checks by providing a do-nothing implementation.
+
+**Simplifying Method Calls** — Method interfaces are simplified by renaming (Rename Method), reducing parameters (Introduce Parameter Object, Preserve Whole Object), and replacing constructors with factory methods (Replace Constructor with Factory Method). Separate Query from Modifier ensures methods have clear single responsibilities.
+
+**Dealing with Generalization** — Inheritance hierarchies are refined by pulling up common behaviour (Pull Up Method/Field) and pushing down specialised behaviour (Push Down Method/Field). Extract Superclass and Extract Subclass create new levels of abstraction. Replace Inheritance with Delegation converts misapplied inheritance to composition.
+
+## Content
+
+### Composing Methods
+
+| Technique | When to Apply | Mechanics |
+|---|---|---|
+| Extract Method | Method is too long or needs a comment to understand a section | (1) Create new method with name describing intent (2) Copy extracted code (3) Replace original with call (4) Replace temp variables with parameters (5) Compile and test |
+| Inline Method | Method body is as clear as its name; delegation adds no value | (1) Find all callers (2) Replace calls with method body (3) Remove method (4) Compile and test |
+| Extract Variable | A complex expression needs a named intermediate result | (1) Declare new variable with descriptive name (2) Assign expression result (3) Replace expression with variable (4) Compile and test |
+| Inline Temp | A temporary variable is only used once and its name adds no clarity | (1) Find all references to the temp (2) Replace with the right-hand expression (3) Remove declaration (4) Compile and test |
+| Replace Temp with Query | A temporary variable holds the result of an expression used more than once | (1) Extract expression into a method (2) Replace temp with method call (3) Compile and test after each replacement |
+| Split Temporary Variable | A variable is assigned more than once and is not a loop counter or collecting variable | (1) Change variable name for second assignment (2) Declare new variable (3) Update references (4) Compile and test |
+| Remove Assignments to Parameters | A parameter is reassigned inside the method body | (1) Create local variable (2) Replace parameter assignments with local variable (3) Compile and test |
+| Replace Method with Method Object | A long method uses so many local variables that Extract Method is impractical | (1) Create new class with local variables as fields (2) Create constructor taking params and locals (3) Move method body to new class (4) Original method creates object and delegates |
+| Substitute Algorithm | A clearer or simpler algorithm replaces a complex one | (1) Prepare tests for existing algorithm (2) Replace body with new algorithm (3) Test — results must be identical |
+
+### Moving Features between Objects
+
+| Technique | When to Apply | Mechanics |
+|---|---|---|
+| Move Method | A method uses more data from another class than its own (Feature Envy) | (1) Copy method to target class (2) Adjust references — change `self` to source object param if needed (3) Replace body in source with delegation (4) Consider removing delegation later |
+| Move Field | A field is used more by another class than its owner | (1) Create field in target class (2) Change all source references to target (3) Remove field from source (4) Compile and test |
+| Extract Class | A class has too many responsibilities or instance variables | (1) Create new class (2) Move related fields and methods (3) Create reference from old to new (4) Adjust callers (5) Compile and test |
+| Inline Class | A class does too little to justify its existence | (1) Move all features into the host class (2) Remove class (3) Adjust callers (4) Compile and test |
+| Hide Delegate | A client calls a delegate through a chain of objects (Message Chains) | (1) Create delegate method on the middle object (2) Change client to call middle object (3) Remove direct delegate access (4) Compile and test |
+| Remove Middle Man | A class delegates most methods to another, adding no value | (1) Create getter for delegate in middle man (2) Replace delegation calls with direct delegate access (3) Remove delegation methods (4) Compile and test |
+| Introduce Foreign Method | A library class lacks a needed method and cannot be modified | (1) Create method in client class that takes library object as first param (2) Method operates on library object (3) Document as foreign method |
+| Introduce Local Extension | A library class needs multiple new methods and cannot be modified | (1) Create subclass or wrapper (2) Add extension methods (3) Use extension where needed (4) Compile and test |
+
+### Organizing Data
+
+| Technique | When to Apply | Mechanics |
+|---|---|---|
+| Self Encapsulate Field | A class needs to control access to its own field (e.g., for lazy init or validation) | (1) Create getter/setter (2) Replace direct field access with getter/setter inside class (3) Compile and test |
+| Replace Data Value with Object | A primitive field has associated behaviour (Primitive Obsession) | (1) Create value class wrapping the primitive (2) Replace field type (3) Move behaviour to value class (4) Compile and test |
+| Change Value to Reference | Multiple copies of the same data object exist and should be unified | (1) Create repository/factory for shared objects (2) Replace constructors with lookup (3) Compile and test |
+| Change Reference to Value | An object is used as a value and identity comparison is unnecessary | (1) Make object immutable (2) Implement value equality (3) Replace references with copies (4) Compile and test |
+| Replace Array with Object | An array is used to hold heterogeneous data that has named fields | (1) Create class with named fields (2) Replace array access with field access (3) Compile and test |
+| Duplicate Observed Data | Domain data must be kept in sync with UI data | (1) Make domain class Observable (2) Add Observer to UI (3) Remove direct UI state manipulation (4) Compile and test |
+| Change Unidirectional Association to Bidirectional | Two classes need to navigate to each other | (1) Add back-reference (2) Update setters to maintain both sides (3) Compile and test |
+| Change Bidirectional Association to Unidirectional | One direction of a bidirectional association is no longer needed | (1) Find all references to the removed direction (2) Replace with alternative navigation (3) Remove field (4) Compile and test |
+| Replace Magic Number with Symbolic Constant | A numeric or string literal has a meaning that should be named | (1) Declare constant with descriptive name (2) Replace literal (3) Compile and test |
+| Encapsulate Field | A public field needs accessor methods for controlled access | (1) Make field private (2) Create getter/setter (3) Replace direct access (4) Compile and test |
+| Encapsulate Collection | A collection field is directly accessible and needs read-only access | (1) Make collection private (2) Create getter returning read-only view or copy (3) Create add/remove methods (4) Compile and test |
+| Replace Type Code with Class | A type code is a primitive that should be a domain object | (1) Create class for type code (2) Replace primitive with class (3) Compile and test |
+| Replace Type Code with Subclasses | A type code affects behaviour differently per variant | (1) Create subclass per variant (2) Replace type code checks with polymorphism (3) Make superclass abstract (4) Compile and test |
+| Replace Type Code with State/Strategy | A type code changes at runtime and affects behaviour | (1) Create state/strategy hierarchy (2) Delegate type-dependent behaviour (3) Replace type code field with state object (4) Compile and test |
+| Replace Subclass with Fields | Subclasses differ only in constant data, not behaviour | (1) Add fields to superclass (2) Replace subclass methods with field values (3) Remove subclasses (4) Compile and test |
+
+### Simplifying Conditional Expressions
+
+| Technique | When to Apply | Mechanics |
+|---|---|---|
+| Decompose Conditional | A complex conditional has complicated condition, then, or else parts | (1) Extract condition into named method (2) Extract then block (3) Extract else block (4) Compile and test |
+| Consolidate Conditional Expression | Multiple conditionals lead to the same result | (1) Combine conditions with `and`/`or` (2) Extract combined condition into named method (3) Compile and test |
+| Consolidate Duplicate Conditional Fragments | The same code appears in all branches of a conditional | (1) Identify duplicate fragment (2) Move before or after conditional (3) Remove from branches (4) Compile and test |
+| Remove Control Flag | A boolean variable controls loop exit instead of using break/return | (1) Find control flag assignments (2) Replace with break or return (3) Remove flag (4) Compile and test |
+| Replace Nested Conditional with Guard Clauses | A method has nested if/else that can be replaced with early returns | (1) Identify edge-case conditions (2) Replace with guard clause (early return) (3) Flatten remaining logic (4) Compile and test |
+| Replace Conditional with Polymorphism | A conditional switches on a type code or variant (Switch Statements) | (1) Create type hierarchy with common interface (2) Create subclass per variant (3) Move conditional branch to each subclass (4) Replace conditional with polymorphic call (5) Compile and test |
+| Introduce Null Object | Repeated null checks appear for the same object | (1) Create null object class implementing same interface (2) Return null object instead of None (3) Remove null checks (4) Compile and test |
+| Introduce Assertion | A section of code assumes a condition is true but does not verify it | (1) Add assertion for the assumed condition (2) Keep assertion in development; consider removing in production (3) Compile and test |
+
+### Simplifying Method Calls
+
+| Technique | When to Apply | Mechanics |
+|---|---|---|
+| Rename Method | Method name does not communicate its purpose | (1) Check all callers (2) Create new method with better name (3) Copy body (4) Change callers (5) Remove old method (6) Compile and test |
+| Add Parameter | A method needs additional information from its caller | (1) Add parameter to signature (2) Update all callers (3) Compile and test |
+| Remove Parameter | A parameter is no longer used by the method body | (1) Verify parameter unused (2) Remove from signature (3) Update all callers (4) Compile and test |
+| Separate Query from Modifier | A method both returns a value and modifies state | (1) Create query method returning value (2) Modify original to only modify state (3) Update callers to use both (4) Compile and test |
+| Parameterize Method | Several methods do similar things with different numeric values | (1) Create parameterized method (2) Replace each variant with call passing value (3) Remove variants (4) Compile and test |
+| Replace Parameter with Explicit Methods | A method branches on a parameter value to do different things | (1) Create method per parameter value (2) Replace calls with specific method (3) Remove parameter (4) Compile and test |
+| Preserve Whole Object | A method receives multiple values from the same object as separate parameters | (1) Replace parameters with single object parameter (2) Update method body to access fields (3) Compile and test |
+| Replace Parameter with Method Call | A parameter is obtained by asking another object for data | (1) Remove parameter (2) Add method call inside method body (3) Update callers (4) Compile and test |
+| Introduce Parameter Object | A group of parameters naturally belong together (Data Clumps) | (1) Create value class for parameter group (2) Replace parameters with single object (3) Update callers (4) Compile and test |
+| Remove Setting Method | A field should not be changed after construction | (1) Remove setter (2) Verify all callers only set in constructor (3) Make field final/read-only (4) Compile and test |
+| Hide Method | A method is not used by other classes | (1) Change visibility to private (2) Compile and test |
+| Replace Constructor with Factory Method | Object creation needs to be delegated to a subclass or needs complex logic | (1) Create factory method (2) Replace constructor calls with factory calls (3) Consider making constructor private (4) Compile and test |
+| Replace Error Code with Exception | A method returns a special value to indicate an error | (1) Define exception class (2) Replace return with raise (3) Update callers to catch (4) Compile and test |
+| Replace Exception with Test | An exception is used for a condition that can be checked with a conditional | (1) Add condition check before operation (2) Replace try/except with conditional (3) Compile and test |
+
+### Dealing with Generalization
+
+| Technique | When to Apply | Mechanics |
+|---|---|---|
+| Pull Up Field | Subclasses have the same field | (1) Add field to superclass (2) Remove from subclasses (3) Compile and test |
+| Pull Up Method | Subclasses have methods with identical results | (1) Copy method to superclass (2) Remove from subclasses (3) Compile and test |
+| Pull Up Constructor Body | Subclass constructors have identical setup code | (1) Create superclass constructor with common code (2) Call from subclass constructors (3) Compile and test |
+| Push Down Method | A superclass method is relevant only to some subclasses | (1) Copy method to relevant subclasses (2) Remove from superclass (3) Compile and test |
+| Push Down Field | A superclass field is relevant only to some subclasses | (1) Copy field to relevant subclasses (2) Remove from superclass (3) Compile and test |
+| Extract Subclass | A class has features that apply only in some cases | (1) Create subclass (2) Move case-specific fields and methods (3) Replace type flags with type checks or polymorphism (4) Compile and test |
+| Extract Superclass | Two classes have similar features | (1) Create superclass (2) Move common fields and methods up (3) Make classes extend superclass (4) Compile and test |
+| Extract Interface | Multiple clients use the same subset of a class's interface | (1) Define Protocol/interface with the common methods (2) Make class implement it (3) Change client references to interface type (4) Compile and test |
+| Collapse Hierarchy | A superclass and subclass are no longer meaningfully different | (1) Merge subclass fields and methods into superclass (2) Remove subclass (3) Update callers (4) Compile and test |
+| Form Template Method | Two methods in subclasses have similar structure | (1) Decompose methods into steps (2) Pull identical steps to superclass (3) Define template method calling steps (4) Make varying steps abstract (5) Compile and test |
+| Replace Inheritance with Delegation | A subclass uses only part of a superclass's interface | (1) Create field holding superclass instance (2) Change methods to delegate (3) Remove inheritance (4) Compile and test |
+| Replace Delegation with Inheritance | A class delegates all methods to another and could inherit instead | (1) Make class inherit from delegate (2) Remove delegation methods (3) Remove delegate field (4) Compile and test |
+
+## Related
+
+- [[software-craft/smell-catalogue]] — each smell maps to specific refactoring techniques
+- [[software-craft/design-patterns]] — patterns are applied when refactoring techniques are insufficient
+- [[software-craft/refactoring]] — when and how to refactor, clean code, technical debt
+- [[software-craft/tdd]] — refactoring techniques are applied during REFACTOR phase
+- [[software-craft/object-calisthenics]] — OC violations signal specific refactoring opportunities
+- [[software-craft/solid]] — SOLID violations are resolved by specific refactoring techniques
diff --git a/smith/data/.opencode/knowledge/software-craft/refactoring.md b/smith/data/.opencode/knowledge/software-craft/refactoring.md
new file mode 100644
index 0000000..264e44b
--- /dev/null
+++ b/smith/data/.opencode/knowledge/software-craft/refactoring.md
@@ -0,0 +1,73 @@
+---
+domain: software-craft
+tags: [refactoring, clean-code, technical-debt, fowler, code-quality]
+last-updated: 2026-04-30
+---
+
+# Refactoring
+
+## Key Takeaways
+
+- Clean code is obvious for other programmers, contains no duplication, has a minimal number of moving parts, passes all tests, and is cheaper to maintain (Shvets, 2014).
+- Technical debt is the accumulated cost of shortcuts; like financial debt, it compounds — the longer you wait to refactor, the more dependent code must be reworked (Cunningham; Shvets, 2014).
+- Refactor when: Rule of Three (third duplication triggers refactor), when adding a feature, when fixing a bug, during code review (Fowler, 1999; Shvets, 2014).
+- Refactor how: small steps, tests green after each step, one refactoring at a time, never mix refactoring with feature changes (Fowler, 1999; Shvets, 2014).
+
+## Concepts
+
+**Clean Code** — The goal of refactoring is to transform messy code into clean code without changing its external behaviour. Clean code is: (1) obvious for other programmers — good naming, no magic numbers, no bloated classes or methods; (2) free of duplication — each change point has a single source; (3) minimal — fewer classes and moving parts means less to keep in your head, less maintenance, fewer bugs; (4) tested — code that passes all tests, not just 95% of them; (5) cheaper to maintain — the economic justification for refactoring (Shvets, 2014).
+
+**Technical Debt** — Ward Cunningham's metaphor: like a bank loan, shortcuts let you move faster now but you pay interest — the extra effort required to work around the messy code. The debt compounds because new code is written on top of the messy code, increasing the amount that must be reworked when you eventually refactor. Causes: business pressure (roll out before ready), lack of understanding of debt consequences, tight component coupling (monolith), lack of tests (enables risky workarounds), lack of documentation, lack of team interaction, long-lived branches, delayed refactoring, lack of coding standards, developer incompetence (Shvets, 2014).
+
+**When to Refactor** — (1) Rule of Three: first time just do it, second time cringe but repeat, third time refactor. (2) When adding a feature: refactor first to understand the existing code and to make the new feature easier to add (prepared refactoring). (3) When fixing a bug: bugs live in the dirtiest code; clean it and the bugs surface. (4) During code review: the last chance to tidy up before code goes public (Fowler, 1999; Shvets, 2014).
+
+**How to Refactor** — Refactoring is a series of small changes, each of which makes the code slightly better while keeping the program in working order. Checklist: (1) Code should become cleaner — if it doesn't, you wasted time; this usually happens when you mix multiple refactorings into one big change. (2) No new functionality during refactoring — separate refactoring from feature development, at minimum within individual commits. (3) All existing tests must pass after refactoring — if tests break, either you made an error or the tests were too low-level (testing private methods) (Fowler, 1999; Shvets, 2014).
+
+## Content
+
+### Technical Debt Causes and Countermeasures
+
+| Cause | Signal | Countermeasure |
+|---|---|---|
+| Business pressure | Patches and kludges hiding unfinished parts | Negotiate time for refactoring in the sprint |
+| Lack of understanding of consequences | Management won't dedicate time to refactoring | Quantify debt interest (time lost to workarounds) |
+| Tight component coupling | Changes in one part affect many others | Extract modules, define interfaces, decouple |
+| Lack of tests | Risky workarounds deployed without verification | Write tests first (TDD), require green tests before merge |
+| Lack of documentation | New people take too long to onboard | Write minimal docs, keep them close to code |
+| Lack of team interaction | People work with outdated understanding | Pair programming, code reviews, knowledge sharing |
+| Long-lived branches | Merge debt accumulates | Short-lived branches, frequent integration |
+| Delayed refactoring | Obsolete code has more dependent code built on it | Refactor continuously during TDD REFACTOR phase |
+| Lack of coding standards | Everyone writes code as they see fit | Adopt and enforce coding standards (lint, OC, SOLID) |
+| Developer incompetence | Developer doesn't know how to write decent code | Training, mentoring, pair programming |
+
+### When to Refactor — Trigger Table
+
+| Trigger | What to Do | Knowledge Reference |
+|---|---|---|
+| Third occurrence of duplication | Extract Method, Pull Up Method | [[software-craft/smell-catalogue#concepts]] (Duplicate Code) |
+| Adding a feature to messy code | Refactor first, then add feature (prepared refactoring) | [[software-craft/tdd#concepts]] (REFACTOR phase) |
+| Bug found in dirty code | Clean the code, the bug will surface | [[software-craft/smell-catalogue#key-takeaways]] |
+| Code review | Refactor as part of review, pair with author | [[software-craft/code-review#concepts]] |
+| Code smell identified | Apply the corresponding refactoring technique | [[software-craft/smell-catalogue#concepts]], [[software-craft/refactoring-techniques#key-takeaways]] |
+
+### Refactoring Process Checklist
+
+1. Ensure tests exist and are green before starting.
+2. Identify the smell per [[software-craft/smell-catalogue#key-takeaways]].
+3. Choose the refactoring technique per [[software-craft/refactoring-techniques#key-takeaways]].
+4. Apply one small step of the refactoring.
+5. Run tests — they must pass.
+6. Repeat until the refactoring is complete.
+7. Commit the refactoring separately from any feature change per [[software-craft/git-conventions#concepts]].
+8. If code doesn't become cleaner, consider rewriting — but only with tests and dedicated time.
+
+## Related
+
+- [[software-craft/smell-catalogue]] — smells trigger refactoring
+- [[software-craft/refactoring-techniques]] — the techniques to apply when refactoring
+- [[software-craft/design-patterns]] — patterns are applied when refactoring techniques are insufficient
+- [[software-craft/tdd]] — refactoring is the REFACTOR phase of TDD
+- [[software-craft/object-calisthenics]] — OC rules guide refactoring toward clean code
+- [[software-craft/solid]] — SOLID violations are resolved by refactoring
+- [[software-craft/git-conventions]] — refactoring commits are separate from feature commits
+- [[software-craft/code-review]] — code review is a trigger for refactoring
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/software-craft/smell-catalogue.md b/smith/data/.opencode/knowledge/software-craft/smell-catalogue.md
new file mode 100644
index 0000000..2904759
--- /dev/null
+++ b/smith/data/.opencode/knowledge/software-craft/smell-catalogue.md
@@ -0,0 +1,87 @@
+---
+domain: software-craft
+tags: [code-smells, refactoring, fowler, code-quality]
+last-updated: 2026-04-30
+---
+
+# Smell Catalogue
+
+## Key Takeaways
+
+- Bloaters (Long Method, Large Class, Primitive Obsession, Long Parameter List, Data Clumps) are structures that have grown too large; apply Extract Function/Class, Replace Primitive with Object, or Introduce Parameter Object (Fowler, 1999; Shvets, 2014).
+- OO Abusers (Switch Statements, Temporary Field, Refused Bequest, Alternative Classes with Different Interfaces) misapply OOP; apply polymorphism, Extract Class, or Replace Inheritance with Delegation.
+- Change Preventers (Divergent Change, Shotgun Surgery, Parallel Inheritance Hierarchies) cause changes to ripple; restructure by axis of change or move functions/fields.
+- Dispensables (Comments, Duplicate Code, Lazy Class, Data Class, Dead Code, Speculative Generality) are dead weight; extract, inline, or delete.
+- Couplers (Feature Envy, Inappropriate Intimacy, Message Chains, Middle Man, Incomplete Library Class) create excessive inter-object dependency; move functions, extract classes, or hide delegates.
+
+## Concepts
+
+**Bloaters**: Structures that have grown to gargantuan proportions. They accumulate over time as the program evolves, usually unnoticed until they become blockers. Unlike other smell categories, bloaters are not introduced deliberately — they creep in because "it's easier to add one more line than to refactor." Long Method needs a comment to understand sections. Large Class has too many responsibilities or instance variables. Primitive Obsession uses raw primitives for domain concepts (strings as field names, constants for type codes). Long Parameter List has 3+ parameters or recurring parameter groups. Data Clumps have 2-3 data items always appearing together across multiple signatures or fields (Shvets, 2014).
+
+**OO Abusers**: Incomplete or incorrect application of object-oriented programming. These smells arise when developers use OOP tools (inheritance, polymorphism, encapsulation) incorrectly or partially. Switch Statements use repeated `if/elif` or match on a type flag instead of polymorphism. Temporary Field is an instance variable set only in some code paths — an object is not always fully populated. Refused Bequest is a subclass that inherits methods it does not use, or overrides them to do nothing. Alternative Classes with Different Interfaces are two classes doing the same thing under different names (Shvets, 2014).
+
+**Change Preventers**: Changes that ripple unexpectedly across the codebase. These are the most damaging smells because they make the codebase resistant to change. Divergent Change requires one class to change for multiple unrelated reasons (one-to-many axis). Shotgun Surgery requires touching many classes for one concept change (many-to-one axis). Parallel Inheritance Hierarchies require new subclasses in lockstep across two hierarchies (Shvets, 2014).
+
+**Dispensables**: Dead weight that makes the codebase harder to maintain. Something unnecessary is present, and removing it would make the code cleaner. Comments explain what code could make obvious. Duplicate Code copies logic in 2+ places. Lazy Class does too little to justify its existence. Data Class holds only fields with getters/setters. Dead Code is unreachable. Speculative Generality adds abstractions for future use with no current caller (Shvets, 2014).
+
+**Couplers**: Excessive coupling between classes, or excessive delegation that replaces one form of coupling with another. Feature Envy uses another class's data more than its own. Inappropriate Intimacy accesses another's private fields. Message Chains navigate through `a.b().c().d()`. Middle Man delegates most methods to another class, adding no value. Incomplete Library Class lacks a needed method on an external class (Shvets, 2014).
+
+## Content
+
+### Bloaters
+
+| Smell | Signal | Why It Happens | Detection | Refactoring |
+|---|---|---|---|---|
+| Long Method | Method body needs a comment to understand any section; >10 lines is a warning sign | Easier to add a line than refactor; conditional branches grow; comments replace extraction | Method line count >10; comments inside method body that label sections | Extract Method, Decompose Conditional, Replace Temp with Query |
+| Large Class | Class has too many responsibilities or instance variables | Class grows by accretion; each new feature adds fields and methods; "God Object" | Instance variable count >2 (OC-7); class line count >50; class name contains "Manager" or "Handler" | Extract Class, Extract Subclass, Extract Interface |
+| Primitive Obsession | Domain concept represented as a raw primitive; constants for type codes; strings as field names | Creating a class feels like overkill; habit from non-OO languages; fear of "too many classes" | `isinstance` checks on primitives; string/integer constants used as type flags; dictionaries with fixed keys | Replace Data Value with Object, Replace Type Code with Class, Introduce Parameter Object |
+| Long Parameter List | Function takes 3+ parameters, or parameter group repeats across signatures | Method needs data from several sources; fear of making object dependencies explicit; passing individual fields instead of objects | Parameter count >3; same group of parameters appears in 2+ method signatures | Introduce Parameter Object, Preserve Whole Object, Replace Parameter with Method Call |
+| Data Clumps | Same 2-3 data items always appear together across signatures or fields | Related data not yet recognised as a domain concept; laziness in creating a value object | Search for repeated parameter groups; fields that always appear together in constructors | Introduce Parameter Object, Extract Class, Preserve Whole Object |
+
+### OO Abusers
+
+| Smell | Signal | Why It Happens | Detection | Refactoring |
+|---|---|---|---|---|
+| Switch Statements | Repeated `if/elif` or match on a type flag across callers | Developer hasn't learned polymorphism; performance fear; habit from procedural languages | `isinstance` chains; `if x.type ==` patterns; match statements on type/kind/status fields | Replace Conditional with Polymorphism, Replace Type Code with State/Strategy, Extract Method |
+| Temporary Field | Instance variable set only in some code paths; `None` in others | A field is needed only for a specific algorithm or code path; object partially initialised | `None` default values that "should never be None during usage"; fields used only in one method | Extract Class, Introduce Null Object |
+| Refused Bequest | Subclass inherits methods/data it does not use or overrides to do nothing | Subclass created for reuse of only part of a superclass; hierarchy doesn't match responsibility | `NotImplementedError` overrides; methods that just call `super()` with no addition; `pass` bodies | Push Down Method/Field, Replace Inheritance with Delegation, Extract Subclass |
+| Alternative Classes with Different Interfaces | Two classes do the same thing under different names/signatures | Independent development by different people; renaming one was never done | Two classes with similar method sets but different names; same data, different method names | Rename Method, Extract Superclass, unify via Protocol |
+
+### Change Preventers
+
+| Smell | Signal | Why It Happens | Detection | Refactoring |
+|---|---|---|---|---|
+| Divergent Change | One class must change for multiple unrelated reasons | Class accumulated multiple responsibilities over time; no Extract Class was done | When a single requirement change touches one class in multiple places; class name contains multiple nouns | Extract Class (split by axis of change) |
+| Shotgun Surgery | One concept change touches many classes | Behaviour scattered across classes instead of co-located; over-decomposition | Touch count per requirement; many files changed in one PR for a single concept | Move Method, Move Field, Inline Class, combine scattered behaviour |
+| Parallel Inheritance Hierarchies | Adding a subclass to one hierarchy forces a new subclass in another | Two hierarchies evolved in tandem but weren't unified; duplicate dispatch | New subclass requires creating matching subclass elsewhere; hierarchy depth is identical | Move Method, Move Field to flatten or unify hierarchies |
+
+### Dispensables
+
+| Smell | Signal | Why It Happens | Detection | Refactoring |
+|---|---|---|---|---|
+| Comments | Comment explains *what* or *why* when code could be self-explanatory | Complex code that should be simplified; fear of deleting comments; documenting intent instead of improving code | Comments inside methods that label sections; "This hack is needed because…" | Extract Method, Rename Variable, Rename Method |
+| Duplicate Code | Same logic copied in 2+ places | Copy-paste for "quick fix"; fear of breaking existing code by extracting; different context same logic | Same 3+ lines appearing in multiple methods; similar conditional structures | Extract Method, Pull Up Method, Form Template Method, Extract Class |
+| Lazy Class | Class does too little to justify its existence | Once-useful class reduced by refactoring; over-decomposition; premature extraction | Class with <3 methods; class used in only one place | Inline Class, Collapse Hierarchy |
+| Data Class | Class holds only fields with getters/setters; no behaviour | Database-first design; DTOs that never gained behaviour; anemic domain model | Class with only `__init__`, getters, setters; all logic in callers | Move Method into class, Encapsulate Field |
+| Dead Code | Unreachable code, unused variable, never-called function | Feature removed but code left behind; commented-out code; conditional always false | Linter warnings; code coverage gaps; imports of unused modules | Delete it |
+| Speculative Generality | Abstractions added "for future use" with no current caller | Over-engineering; "we might need this"; framework mindset in application code | Abstract classes with one subclass; unused parameters; methods never called from production code | Inline Class, Inline Method, Remove unused parameters |
+
+### Couplers
+
+| Smell | Signal | Why It Happens | Detection | Refactoring |
+|---|---|---|---|---|
+| Feature Envy | Method uses another class's data more than its own | Method was placed in the wrong class; data and behaviour separated; anemic domain model | Method makes more calls to another class than its own; accesses another's fields via getters | Move Method, Extract Method |
+| Inappropriate Intimacy | Class accesses another's private fields or implementation details | Over-familiarity between classes; evolved from one class split incompletely; test class accessing internals | Direct access to `_private` attributes; one class importing internal modules of another | Move Method, Move Field, Extract Class, Replace Inheritance with Delegation |
+| Message Chains | `a.b().c().d()` — navigating a chain of objects | Client knows too much about object structure; intermediate objects treated as mere pass-throughs | Chained method calls where each returns a different object; `getattr` chains | Hide Delegate, Extract Method to encapsulate the chain |
+| Middle Man | Class delegates most of its methods to another class | Overzealous delegation; class existed for an interface that was later simplified | >50% of methods are one-line delegations; class adds no logic beyond forwarding | Inline Class, Remove Middle Man |
+| Incomplete Library Class | External class lacks a needed method | Third-party library doesn't support your use case; library API incomplete for your domain | Utility functions that take a library object as first argument; wrapper classes that add one method | Introduce Foreign Method, Introduce Local Extension |
+
+## Related
+
+- [[software-craft/design-patterns]] — pattern selection starts from smell identification
+- [[software-craft/refactoring-techniques]] — the refactoring techniques referenced in each smell entry
+- [[software-craft/refactoring]] — when and how to refactor, clean code, technical debt
+- [[software-craft/solid]] — SOLID violations manifest as specific smells
+- [[software-craft/tdd]] — smells are identified and resolved during REFACTOR phase
+- [[software-craft/object-calisthenics]] — OC violations overlap with smell signals
+- [[software-craft/code-review]] — smells are checked during design review
diff --git a/smith/data/.opencode/knowledge/software-craft/solid.md b/smith/data/.opencode/knowledge/software-craft/solid.md
new file mode 100644
index 0000000..2a90b5f
--- /dev/null
+++ b/smith/data/.opencode/knowledge/software-craft/solid.md
@@ -0,0 +1,57 @@
+---
+domain: software-craft
+tags: [solid, design-principles, oop, code-quality]
+last-updated: 2026-04-30
+---
+
+# SOLID Principles
+
+## Key Takeaways
+
+- SRP: A class should have only one reason to change — multiple responsibilities mean multiple change axes that conflict (Martin, 2000).
+- OCP: Software entities should be open for extension but closed for modification — add new behaviour by adding new code, not by changing existing code.
+- LSP: Subtypes must be substitutable for their base types — a subclass that breaks the contract of its superclass violates Liskov Substitution.
+- ISP: Clients should not be forced to depend on interfaces they do not use — split fat interfaces into smaller, cohesive ones.
+- DIP: Depend on abstractions, not concretions — high-level modules should not depend on low-level modules; both should depend on abstractions.
+
+## Concepts
+
+**Single Responsibility Principle (SRP)** — A class with more than one reason to change has more than one responsibility. When requirements change, a class that handles multiple responsibilities must change in multiple unrelated ways, creating coupling between unrelated concerns. The fix: extract each responsibility into its own class. A class with >2 instance variables often has >1 responsibility (OC-7 overlap).
+
+**Open-Closed Principle (OCP)** — When new variants are added to a system, existing code should not be modified. Instead, new behaviour is added by creating new types that implement existing interfaces. Type-switching (`if/elif` on a kind field) is an OCP violation — each new variant requires modifying every switch statement. The fix: Replace Conditional with Polymorphism, Strategy, or State.
+
+**Liskov Substitution Principle (LSP)** — If a function expects a base type, it must work correctly with any subtype. A subclass that overrides methods to do nothing, throws `NotImplementedError`, or narrows pre-conditions violates LSP. Refused Bequest is the corresponding smell. The fix: Replace Inheritance with Delegation, or Push Down Method to isolate the problematic inheritance.
+
+**Interface Segregation Principle (ISP)** — A fat interface forces all clients to depend on methods they do not use. When a client depends on an interface with 10 methods but only calls 2, any change to the other 8 methods triggers a recompile or retest. The fix: Extract Interface into smaller, cohesive interfaces, each serving one client role.
+
+**Dependency Inversion Principle (DIP)** — High-level policy should not depend on low-level detail; both should depend on abstractions. A module that imports a concrete database adapter is tightly coupled to that adapter's implementation. The fix: define a Protocol (abstract interface) and inject the adapter via dependency injection. External dependencies should always be behind a Protocol.
+
+## Content
+
+### SOLID Violation Smell Mapping
+
+| Principle | Smell | Signal | Fix |
+|---|---|---|---|
+| SRP | Divergent Change | One class changes for multiple unrelated reasons | Extract Class by axis of change |
+| SRP | Large Class | Class has too many instance variables or methods | Extract Class, Extract Subclass |
+| OCP | Switch Statements | `if/elif` on type/kind/status that must change for each new variant | Replace Conditional with Polymorphism, Strategy, State |
+| OCP | Shotgun Surgery | Adding one variant requires modifying many call sites | Move dispatch to type hierarchy |
+| LSP | Refused Bequest | Subclass overrides methods to do nothing or raises NotImplementedError | Push Down Method, Replace Inheritance with Delegation |
+| LSP | Alternative Classes with Different Interfaces | Two classes doing the same thing with different signatures | Extract Superclass, unify via Protocol |
+| ISP | Fat Interface | Client depends on methods it does not use | Extract Interface per client role |
+| ISP | Temporary Field | Interface forces fields that are only set in some code paths | Extract Class, Introduce Null Object |
+| DIP | Direct Dependency on Concrete | Module imports a concrete class instead of an abstraction | Define Protocol, inject via constructor |
+| DIP | Hard-coded Construction | `__init__` creates concrete dependencies | Replace Constructor with Factory Method, inject dependencies |
+
+### SOLID in the Design Principle Priority
+
+The design principle priority in TDD is: YAGNI > KISS > DRY > OC > SOLID > Design Patterns. SOLID principles are checked during REFACTOR only when a smell triggers them. They are not applied speculatively.
+
+## Related
+
+- [[software-craft/smell-catalogue]] — each SOLID violation maps to specific smells
+- [[software-craft/design-patterns]] — patterns resolve SOLID violations (e.g., Strategy resolves OCP violations)
+- [[software-craft/refactoring-techniques]] — refactoring techniques fix SOLID violations
+- [[software-craft/refactoring]] — when and how to refactor, clean code, technical debt
+- [[software-craft/object-calisthenics]] — OC rules overlap with SOLID (OC-7 enforces SRP, OC-4 enforces DIP)
+- [[software-craft/tdd]] — SOLID is part of the design principle priority in REFACTOR
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/software-craft/stub-design.md b/smith/data/.opencode/knowledge/software-craft/stub-design.md
new file mode 100644
index 0000000..9a9e534
--- /dev/null
+++ b/smith/data/.opencode/knowledge/software-craft/stub-design.md
@@ -0,0 +1,45 @@
+---
+domain: software-craft
+tags: [stub-design, typed-stubs, traceability, package-structure]
+last-updated: 2026-04-30
+---
+
+# Stub Design
+
+## Key Takeaways
+
+- Stubs are created in two phases: project-level creates the package skeleton; feature-level creates typed source stubs and test stubs per feature.
+- Every `@id` tag in the feature file must have a corresponding test stub; traceability is verified before implementation begins.
+- Package structure mirrors the module structure from technical design — each module maps to a Python package, each contract maps to a Protocol file.
+- Feature branches are created from the latest main before any implementation begins.
+- Source stubs are breadcrumbs from the domain model — the SE can add, remove, or modify them during implementation as the real shape emerges from TDD.
+
+## Concepts
+
+**Two-Phase Stub Creation** — Stubs are created at two different points in the flow. At project-structuring time, the SA creates the package skeleton only: directory structure mirroring the module layout, `__init__.py` files, port interfaces (Protocol abstractions from hexagonal architecture), and aggregate root class signatures. No entity, value object, or use case stubs are created at this stage. At feature planning time (after BDD examples are written), the SA creates minimum typed stubs for the entities, value objects, and use cases referenced by the current feature's Examples, plus test stubs for each `@id` tag. These feature-level stubs are breadcrumbs from the domain model — the SE can add, remove, or modify them during TDD as the real implementation shape emerges.
+
+**Typed Stubs from Contracts** — At feature level, Protocol interfaces are derived from the three contract types defined in technical design: API contracts (REST endpoints → Protocol methods), event contracts (domain events → dataclasses with event schema), and interface definitions (hexagonal ports → Protocol abstractions). These stubs compile but have no behaviour — they serve as the architecture's skeleton.
+
+**Minimum Stub Principle** — Source stubs contain the absolute minimum needed to compile and trace. Protocol method signatures with `raise NotImplementedError` bodies — no docstrings, no type hints beyond the contract (return types and parameter types required by the Protocol). Docstrings, type hints, and lint compliance (ruff check, ruff format) are added when reviewers require them, not proactively during stubbing or TDD. Adding them early is waste because refactoring changes code shape and invalidates them.
+
+**@id Traceability Chain** — Each `@id` tag in the feature file produces one test stub at `tests/features/<feature_slug>/<rule_slug>_test.py` with a function named `test_<feature_stem>_<id>`. The `stubs_traceable` condition on the `create-py-stubs.done` transition verifies that `all_ids_have_stubs: ==true`. This ensures no acceptance criterion is lost between planning and implementation.
+
+**Test Stub Format** — Every test stub follows the project's test stub template (located in `.templates/`). The format requires:
+- Decorated with `@pytest.mark.skip(reason="not yet implemented")` — never use `...` ellipsis bodies
+- Docstring contains the raw Gherkin steps (Given/When/Then) for traceability
+- Body is `raise NotImplementedError` — so the test fails explicitly if the skip decorator is removed prematurely
+- Function name follows `test_<feature_stem>_<id>` naming convention
+- No MoSCoW tags or priority labels anywhere in the stub
+
+**Package Structure from Module Structure** — The module structure section of technical design maps directly to the package layout: each module becomes a Python package, each Protocol becomes a file in that package, and each test module mirrors its production counterpart. The domain package depends on nothing; infrastructure packages depend on domain Protocols, never the reverse.
+
+**Branch Setup** — Implementation begins on a feature branch (`feat/<stem>`) created from the latest main. Branch naming follows [[software-craft/git-conventions]]. The branch exists before any stubs are written — the first commit on the branch is the project structure.
+
+**Scaffolding Order** — Create artifacts in this order: (1) feature branch from main, (2) package directories from module structure, (3) port interfaces and aggregate root signatures, (4) per-feature: Protocol stubs from contracts + test stubs from @id tags, (5) verify all @ids have corresponding test stubs.
+
+## Related
+
+- [[architecture/contract-design]] — the three contract types that define stub shapes
+- [[architecture/technical-design]] — module structure and package layout
+- [[requirements/gherkin]] — @id tag format and traceability convention
+- [[software-craft/git-conventions]] — branch naming and commit format
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/software-craft/tdd.md b/smith/data/.opencode/knowledge/software-craft/tdd.md
new file mode 100644
index 0000000..2c9949c
--- /dev/null
+++ b/smith/data/.opencode/knowledge/software-craft/tdd.md
@@ -0,0 +1,101 @@
+---
+domain: software-craft
+tags: [tdd, yagni, kiss, red-green-refactor, test-first]
+last-updated: 2026-04-30
+---
+
+# Test-Driven Development
+
+## Key Takeaways
+
+- TDD follows three phases: RED (write a failing test), GREEN (write the minimum code to pass), REFACTOR (improve structure while keeping tests green) — Beck, 2002.
+- "Minimum code" means no speculative generalization, no premature abstraction, no future-proofing — only what the test requires right now.
+- YAGNI (You Aren't Gonna Need It — Beck & Jeffries, 1999) is the highest-priority design principle: never add functionality until it is required by a failing test.
+- KISS (Keep It Simple, Stupid) is the second priority: choose the simplest implementation that passes the test.
+- Tests specify observable behaviour, not implementation — a test that breaks during refactoring is coupled to the wrong thing (Meszaros, 2007).
+- TDD has two quality phases: Design Phase (test-fast only, design correctness) followed by Conventions Phase (coverage, lint, pyright, docstrings) — only after design approval.
+
+## Concepts
+
+**RED-GREEN-REFACTOR Cycle** (Beck, 2002; Freeman & Pryce, 2009) — The TDD cycle has three phases: RED (write a test that specifies desired behavior and fails), GREEN (write the minimum production code to make the test pass), REFACTOR (improve the code's structure without changing behavior, keeping all tests green). Never skip RED — a test written after the code doesn't drive design.
+
+**Minimum Code (GREEN phase)** — Write the simplest code that makes the failing test pass. This means: no speculative generalization (don't add parameters "for later"), no premature abstraction (don't extract a base class for one implementation), no future-proofing (don't handle cases the test doesn't require). If the test says "return 42", write `return 42` — not a configurable constant.
+
+**Design Principle Priority** — When writing code or refactoring, follow this priority order (Beck & Jeffries, 1999; Martin, 2000): YAGNI > KISS > DRY > OC > SOLID > Design Patterns. YAGNI overrides everything: if the test doesn't require it, don't write it. KISS overrides DRY: sometimes a small duplication is simpler than the wrong abstraction.
+
+**Test as Specification** (North, 2006) — In TDD, tests are specifications, not verification. Each test specifies one observable behavior. The test is written first because it drives the design of the production code, not because it verifies implementation after the fact.
+
+**Specific Feedback Drives Improvement** (Hattie & Timperley, 2007) — The most effective feedback is specific about what needs to change and how. Self-declaration checklists (AGREE/DISAGREE on specific criteria) are more effective than vague "looks good" reviews because they force the reviewer to articulate exactly what passes and what fails.
+
+**Test List Mechanics** — Build the test list from `@id` tags in the feature file. Order tests by dependency: fewest dependencies first, most impactful within that set. Work on one `@id` at a time (WIP limit of 1 per `@id`). Each `@id` gets a full RED-GREEN-REFACTOR cycle before moving to the next.
+
+**Commit Discipline** — Refactor commits are separate from feature commits. Never mix a structural change with a behavior addition in one commit — this keeps history bisectable and every commit leaves tests green. See [[software-craft/git-conventions]] for granular and squashed commit formats.
+
+**Design-Only Refactoring** — During REFACTOR, apply only design transformations (SOLID, OC, DRY, KISS, YAGNI, pattern catalogue entries). Do not apply convention compliance (import ordering, docstring additions, type annotations, format changes) — those belong in the Conventions Phase after design approval.
+
+**Two-Phase Quality Gate** — TDD operates in two distinct phases with different tooling and goals. **Design Phase** (during tdd-cycle): run `test-fast` only; no lint, no pyright, no docstring checks, no coverage unit tests. Write minimum code following best design principles (YAGNI > KISS > DRY > OC > SOLID > patterns). The goal is proving design correctness — never waste convention work on code that might be redesigned. Exit is gated by the `design_declared` condition. **Conventions Phase** (after design approval): add coverage unit tests, run lint, run pyright, add docstrings. These are convention concerns that the reviewer explicitly requests only after design is approved. Running lint or coverage on code that might be redesigned is wasted effort.
+
+## Content
+
+### RED Phase Rules
+
+- Write exactly one test for the next unimplemented behavior
+- The test must fail for the right reason (not a syntax error)
+- The test must express the desired behavior from the user's perspective
+
+### GREEN Phase Rules
+
+- Write the minimum code to pass the test
+- Hard-coded values are acceptable if the test only requires that value
+- Do not add parameters, abstractions, or features the test doesn't require
+- If the test is trivially satisfied, write a more specific test
+
+### REFACTOR Phase Rules
+
+- All tests must remain green throughout refactoring
+- Only refactor if there is a test that would break if the refactoring is wrong
+- Apply design principles in priority order: YAGNI > KISS > DRY > OC > SOLID > patterns
+- If no improvement is needed, skip refactoring and proceed to the next test
+- Design-only refactoring: no convention compliance during this phase
+
+### Test List
+
+- List all `@id` tags from the feature file before starting
+- Order by fewest dependencies first; most impactful within that set
+- Mark each `@id` as pending, in-progress, or done
+- WIP limit: exactly one `@id` in-progress at a time
+
+### Design Phase Rules (during tdd-cycle)
+
+- Run `test-fast` only — no lint, no pyright, no docstring checks, no coverage unit tests
+- Write minimum code following design principle priority: YAGNI > KISS > DRY > OC > SOLID > patterns
+- Refactor for design correctness only — no convention compliance
+- The goal is proving design correctness, not convention compliance
+- Exit gated by `design_declared` condition (all 6 checks == true)
+
+### Conventions Phase Rules (after design approval)
+
+- Add coverage unit tests for uncovered branches (in `tests/unit/`, not `@id` tests)
+- Run lint (`uv run task lint`), pyright (`uv run task static-check`), full test suite
+- Add docstrings to all public classes and methods
+- Add type annotations to all public signatures
+- Only after design approval — never before
+
+### Commit Strategy
+
+- Feature commits: one per `@id` achievement (RED→GREEN→REFACTOR)
+- Refactor commits: separate from feature commits, one per catalogue entry
+- See [[software-craft/git-conventions]] for commit message format
+
+## Related
+
+- [[requirements/gherkin]]
+- [[architecture/technical-design]]
+- [[software-craft/test-design]]
+- [[software-craft/git-conventions]]
+- [[software-craft/object-calisthenics]]
+- [[software-craft/smell-catalogue]] — smells are identified and resolved during REFACTOR phase
+- [[software-craft/design-patterns]] — patterns are applied during REFACTOR when smells trigger them
+- [[software-craft/refactoring-techniques]] — refactoring techniques are applied during REFACTOR phase
+- [[software-craft/solid]] — SOLID is part of the design principle priority
+- [[software-craft/refactoring]] — when and how to refactor, clean code, technical debt
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/software-craft/test-design.md b/smith/data/.opencode/knowledge/software-craft/test-design.md
new file mode 100644
index 0000000..b1a0653
--- /dev/null
+++ b/smith/data/.opencode/knowledge/software-craft/test-design.md
@@ -0,0 +1,58 @@
+---
+domain: software-craft
+tags: [test-design, observable-behavior, test-coupling, semantic-alignment, abstraction-level]
+last-updated: 2026-04-29
+---
+
+# Test Design
+
+## Key Takeaways
+
+- Tests should specify observable behaviour, not verify implementation — a test that breaks when refactoring preserves behaviour is coupled to the wrong thing (Meszaros, 2007).
+- The semantic alignment rule: tests must operate at the same abstraction level as the acceptance criterion they verify. If the AC says "the user presses W," the test sends W through the input mechanism, not through an internal method call.
+- Test coupling exists on a spectrum: feature tests (most resilient) > unit contract tests > property-based tests > white-box tests (most brittle, avoid).
+- One observable behaviour per test — each test should fail for exactly one reason and pass for exactly one reason.
+- Hard-coded values are acceptable when the test only requires that value; parameterising prematurely couples the test to assumptions about future needs.
+
+## Concepts
+
+**Observable Behaviour vs Implementation Coupling** (Meszaros, 2007; Google Testing Blog, 2013; Martin, 2017) — A test coupled to implementation uses private methods, internal state, or implementation-specific assertions. When the implementation changes — even if behaviour is identical — coupled tests fail. This produces false negatives that erode trust in the suite. Decoupled tests use public interfaces and assert on observable outcomes, remaining green through refactoring because they verify what the system does, not how it does it.
+
+**Semantic Alignment Rule** — Tests must operate at the same abstraction level as their acceptance criteria. If the AC says "the user presses W," the test should send W through the actual input mechanism. If the AC says "`update_player` receives 'W'," the test calls `update_player("W")` directly. Mismatched abstraction levels create either brittle tests (too low-level) or vague tests (too high-level).
+
+**Test Coupling Spectrum** (Meszaros, 2007; Feathers, 2004; MacIver, 2016; King, 1991) — Feature tests exercise the system through its public interface and are most resilient to refactoring. Unit contract tests verify a module's protocol (its inputs, outputs, and invariants) without depending on internals. Property-based tests (e.g., Hypothesis) verify invariants across a range of inputs rather than specific cases. White-box tests inspect internal state or private methods and are the most brittle — avoid them unless characterising legacy code (Feathers, 2004).
+
+**Characterization Tests** (Feathers, 2004) — When modifying code without existing tests, write characterization tests first: tests that document what the code currently does, not what it should do. This creates a regression net before any changes. Characterization tests are temporary — once the code is under test, replace them with specification tests that assert desired behaviour.
+
+## Content
+
+### Test Coupling Spectrum
+
+| Level | What it tests | Resilience | When to use |
+|---|---|---|---|
+| Feature test | Observable behaviour through public interface | Highest | Every @id acceptance criterion |
+| Unit contract test | Module protocol (inputs, outputs, invariants) | High | Complex domain logic with clear contracts |
+| Property test | Invariants across input ranges | Moderate | Bug @id requirements; edge-case classes |
+| White-box test | Internal state or private methods | Lowest | Legacy characterization only |
+
+### Semantic Alignment Examples
+
+| Acceptance Criterion | Correct Test | Wrong Test |
+|---|---|---|
+| "The player moves north" | Send W through input handler, assert position changes | Call `_update_coordinates(0, 1)` directly |
+| "`update_player` receives 'W'" | Call `update_player("W")`, assert it returns the expected state | Simulate keyboard event at OS level |
+| "An invalid move is rejected" | Send invalid input, assert error response | Check `_valid_moves` list internally |
+
+### One Behaviour Per Test
+
+- Each test should fail for exactly one reason
+- Each test should pass for exactly one reason
+- If a test has multiple assertions, they must all verify the same behaviour from different angles
+- Multiple behaviours → multiple tests, each with its own @id traceability
+
+## Related
+
+- [[software-craft/tdd]] — the RED-GREEN-REFACTOR cycle that produces these tests
+- [[software-craft/code-review]] — reviewing whether tests meet these quality criteria
+- [[requirements/gherkin]] — the specification format that drives test design
+- [[software-craft/stub-design]] — creating typed stubs that maintain semantic alignment
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/software-craft/versioning.md b/smith/data/.opencode/knowledge/software-craft/versioning.md
new file mode 100644
index 0000000..202cc53
--- /dev/null
+++ b/smith/data/.opencode/knowledge/software-craft/versioning.md
@@ -0,0 +1,34 @@
+---
+domain: software-craft
+tags: [semver, calver, versioning, release, pypi]
+last-updated: 2026-04-30
+---
+
+# Versioning Scheme
+
+## Key Takeaways
+
+- Version scheme: `major.minor.patch` (semver) in `pyproject.toml`; git tags append build date: `v{major}.{minor}.{patch}+{YYYYMMDD}`
+- PyPI strips `+build_metadata` — `pyproject.toml` must contain only `major.minor.patch`
+- Breaking changes bump major; new features bump minor; fixes bump patch
+- Changelog format: `## [v{version}+{date}] - {codename} - {date}`
+- Release codenames come from `docs/branding.md`
+- New projects start at `0.1.0` (pure semver, no date suffix)
+- Pre-v8 tags used a broken hybrid where the date occupied the patch field; v8+ uses correct semver+build-metadata
+
+## Concepts
+
+**Why semver+calver hybrid**: Semver communicates breaking changes for dependency resolution — `pip install agents-smith>=8.0.0,<9.0.0` works correctly because the major version signals compatibility. Pure calver (e.g. `2026.4.30`) communicates timing but not compatibility. The `+YYYYMMDD` build metadata suffix (semver §10) provides release-date traceability without affecting version ordering or dependency resolution. See [[software-craft/versioning#key-takeaways]].
+
+**pyproject.toml as single source of truth**: The `version` field in `pyproject.toml` contains only `major.minor.patch` (e.g. `8.0.0`). The `tag-release` workflow reads this field and appends `+YYYYMMDD` when creating the git tag. PyPI reads the version directly from the built wheel/sdist, which contains only the semver core.
+
+**Git tags with build metadata**: Tags follow `v{version}+{YYYYMMDD}` format (e.g. `v8.0.0+20260430`). The `tag-release` CI workflow creates these automatically when `pyproject.toml` changes on main. The date is the tag creation date, not the commit date.
+
+**Changelog entries**: Each release section uses `## [v{version}+{date}] - {codename} - {date}`. The codename follows the convention in `docs/branding.md` (default: adjective-greek-figure).
+
+**Historical note**: Pre-v8 releases used tags like `v7.2.20260423` where `20260423` occupied the semver patch field. This is neither valid semver (patch should be a small integer) nor proper calver. Starting with v8.0.0, the date is correctly placed in build metadata after `+`.
+
+## References
+
+- [[process/preston-werner_2013]] — SemVer 2.0.0 specification
+- [[process/calver_2020]] — Calendar Versioning convention
\ No newline at end of file
diff --git a/smith/data/.opencode/knowledge/workflow/flowr-spec.md b/smith/data/.opencode/knowledge/workflow/flowr-spec.md
new file mode 100644
index 0000000..77fc1d3
--- /dev/null
+++ b/smith/data/.opencode/knowledge/workflow/flowr-spec.md
@@ -0,0 +1,144 @@
+---
+domain: workflow
+tags: [fsm, state-machine, flow, yaml, flowr, transitions, conditions]
+last-updated: 2026-04-29
+---
+
+# Flowr Specification
+
+## Key Takeaways
+
+- Define flows as FSMs in YAML with states, transitions, guards, and exits; the flow YAML is the single source of truth for workflow routing.
+- Declare `exits` on every flow as its contract with parent flows; parent `next` keys must match child `exits` exactly.
+- Use `conditions` blocks on states to define named condition groups; reference them in transitions with `when`.
+- Guarded transitions use `when` dicts with expression strings (`==true`, `>=80%`, `~=100`); conditions are AND-combined with no inheritance.
+- Carry runtime metadata in state-level `attrs` (agent, skills, input_artifacts, etc.); `attrs` is opaque to the engine.
+- Immutable loaded flows, closed evidence schema, isolated subflow context, filesystem wins over session on conflict.
+
+## Concepts
+
+**YAML Flow Definitions**: Flows are finite state machines defined in `.flowr/flows/` as YAML files. Each flow has a name, version, exits, and states. The first state is the initial state. The flow YAML is the single source of truth for what happens at each state; agents read it to determine routing; skills define how to execute.
+
+**Exits as Contracts**: Every flow declares `exits` — the list of ways it can terminate. Parent flows reference these exit names in their `next` maps. This creates a typed contract between flows. Adding a new exit is a minor version bump; removing or renaming one is a major breaking change.
+
+**Conditions and Guards**: States may define `conditions` blocks containing named condition groups. Transitions reference these groups with `when` to create guarded transitions. Condition expressions use operators like `==value`, `!=value`, `>=N`, `<=N`, `>N`, `<N`, `~=value`. All conditions in a `when` dict are AND-combined with no inheritance — every condition must be explicit.
+
+**State Attrs**: State-level `attrs` carry runtime metadata that the flowr engine ignores but agents and skills read. Common keys: `description`, `owner`, `skills`, `input_artifacts`, `edited_artifacts`, `output_artifacts`. State-level `attrs` replace flow-level attrs entirely (no merge, no deep merge).
+
+**Subflow Invocation**: A state with a `flow:` field becomes a subflow invocation. The parent's `next` keys must match the child's `exits` exactly. Subflows use a call-stack mechanism: push on entry, pop on exit. Context is isolated: only the current flow is visible. Cross-flow cycles are forbidden.
+
+## Content
+
+### Top-Level Fields
+
+| Field | Required | Description |
+|---|---|---|
+| `flow` | yes | Unique name string, used for subflow references |
+| `version` | yes | Semver (e.g., `1.2.0`) |
+| `params` | no | List of parameter declarations |
+| `exits` | yes | List of exit names — the contract this flow offers to parent flows |
+| `attrs` | no | Opaque dict for project-specific data; the library ignores this entirely |
+| `states` | yes | Ordered list of state objects; first state is the initial state |
+
+### State Fields
+
+| Field | Required | Description |
+|---|---|---|
+| `id` | yes | Unique identifier within this flow |
+| `next` | yes* | Trigger → target mapping; required unless exit-only |
+| `flow` | no | If present, makes this state a subflow invocation |
+| `flow-version` | no | Semver constraint for the referenced flow (e.g., `"^1"`) |
+| `attrs` | no | Opaque dict; replaces flow-level attrs entirely (no merge) |
+| `conditions` | no | Named condition groups for guarded transitions |
+
+*States must have `next` or be referenced only by exit targets.
+
+### Transition Format (`next` values)
+
+| Form | Syntax | Description |
+|---|---|---|
+| Simple | `approved: step-5` | String target, no conditions |
+| Guarded | `approved: { to: step-5, when: {...} }` | Mapping with conditions |
+| Mixed | Both in same `next` | Simple and guarded targets coexist |
+
+### Condition Syntax (`when` values)
+
+| Operator | Meaning | Example |
+|---|---|---|
+| `==value` | Equality match | `==true`, `==BASELINED` |
+| `!=value` | Inequality match | `!=false` |
+| `>=N` | Greater than or equal | `>=80%` (compares 80) |
+| `<=N` | Less than or equal | `<=5`, `<=8` |
+| `>N` | Greater than | `>0` |
+| `<N` | Less than | `<3` |
+| `~=value` | Approximate numeric match (5% tolerance) | `~=100` |
+
+Numeric portion is extracted from both condition and evidence values before comparison. Plain strings without operators are treated as `==value`. Evidence keys must exactly match `when` keys — closed schema, no extra or missing keys. `~=` applies only to numeric values; it is not valid for string matching.
+
+### Conditions Block
+
+States may define a `conditions` block (sibling of `attrs` and `next`) containing named condition groups:
+
+```yaml
+conditions:
+  invest_passed:
+    independent: ==true
+    negotiable: ==true
+    valuable: ==true
+next:
+  done:
+    to: bdd-features
+    when: invest_passed
+```
+
+Named condition references in `when` clauses must resolve to a key in the same state's `conditions` block. Unknown references are validation errors.
+
+### Exit System
+
+- `exits` is a flat list at flow level, always required
+- Any state can reference an exit name in its `next` map
+- A `next` target that matches both a state id and an exit name is a validation error (ambiguous reference)
+- Every `next` target must resolve to either a state id or an exit name
+- Multiple exits can map to the same parent state
+
+### Subflow Model
+
+- `flow: <name>` on a state makes it a subflow (no `type` field needed)
+- `flow-version: "^1"` constrains which versions are compatible
+- Parent `next` keys must match child's `exits` list exactly
+- Subflows use a call-stack: push on entry, pop on exit
+- Context is isolated: only current flow visible
+- Cross-flow cycles are forbidden (detected via DFS at load time)
+
+### Semver Conventions
+
+| Change | Version impact |
+|---|---|
+| Adding a new exit | Minor bump |
+| Adding states or requirements | Patch (non-breaking) |
+| Removing or renaming exits | Major (breaking) |
+
+### Validation Rules (Load-Time)
+
+1. Every `next` target resolves to a state id or an exit name
+2. No `next` target is ambiguous (matches both a state id and an exit name)
+3. Parent `next` keys match child's `exits` list exactly
+4. No cross-flow cycles (DFS detection)
+5. Exit names in `exits` must have at least one state referencing them
+6. Named condition references in `when` must resolve to the same state's `conditions` block
+7. Params without defaults must be provided at invocation time
+
+### Design Principles
+
+1. **Immutable loaded flows** — edits produce copies
+2. **Closed evidence schema** — keys must exactly match
+3. **Isolated subflow context** — only current flow visible
+4. **Session truth assumption** — filesystem wins over session on conflict
+5. **Thin enforcement** — validate only, no execution
+6. **No auto-rollback** — no transition limits
+
+## Related
+
+- [[agent-design/principles]]
+- [[skill-design/principles]]
+- [[knowledge-design/principles]]
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/accept-feature/SKILL.md b/smith/data/.opencode/skills/accept-feature/SKILL.md
new file mode 100644
index 0000000..58dfdff
--- /dev/null
+++ b/smith/data/.opencode/skills/accept-feature/SKILL.md
@@ -0,0 +1,15 @@
+---
+name: accept-feature
+description: "Validate business behavior against BDD scenarios from the end user's perspective"
+---
+
+# Accept Feature
+
+Load [[requirements/gherkin#key-takeaways]] and [[software-craft/test-design#key-takeaways]] before starting.
+
+1. Verify all BDD scenarios pass from the end user's perspective, not the test harness, per [[software-craft/test-design#key-takeaways]].
+2. IF a scenario passes in the test harness but fails from the user's perspective → flag it as a semantic alignment gap per [[software-craft/test-design#concepts]].
+3. Verify quality attributes are met.
+4. Verify definition of done criteria are satisfied.
+5. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+6. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/analyze-root-cause/SKILL.md b/smith/data/.opencode/skills/analyze-root-cause/SKILL.md
new file mode 100644
index 0000000..8a5bbc6
--- /dev/null
+++ b/smith/data/.opencode/skills/analyze-root-cause/SKILL.md
@@ -0,0 +1,13 @@
+---
+name: analyze-root-cause
+description: "Investigate why the PR was rejected, identifying the failure point and missed gate"
+---
+
+# Analyze Root Cause
+
+Load [[requirements/post-mortem#key-takeaways]] before starting.
+
+1. Identify the failure point — which quality gate was missed per [[requirements/post-mortem#key-takeaways]].
+2. Determine whether the root cause is in planning, architecture, or implementation.
+3. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+4. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/assess-architecture/SKILL.md b/smith/data/.opencode/skills/assess-architecture/SKILL.md
new file mode 100644
index 0000000..780c19d
--- /dev/null
+++ b/smith/data/.opencode/skills/assess-architecture/SKILL.md
@@ -0,0 +1,16 @@
+---
+name: assess-architecture
+description: "Evaluate whether the feature requires new architecture or fits the existing system, verify delivery mechanism"
+---
+
+# Assess Architecture
+
+Load [[architecture/assessment#key-takeaways]] and [[requirements/interview-techniques#key-takeaways]] before starting.
+
+1. Check if architecture already exists per [[architecture/assessment#key-takeaways]].
+2. If architecture exists, verify the delivery mechanism per [[architecture/assessment#concepts]].
+3. Conduct an assessment interview per [[architecture/assessment#concepts]], applying
+   gap-finding techniques per [[requirements/interview-techniques#concepts]].
+4. If hidden failure modes are suspected, apply a pre-mortem per [[requirements/pre-mortem]].
+5. Write corrections to edited artifacts.
+6. Check flow transitions to determine next state.
diff --git a/smith/data/.opencode/skills/break-down-feature/SKILL.md b/smith/data/.opencode/skills/break-down-feature/SKILL.md
new file mode 100644
index 0000000..b35c741
--- /dev/null
+++ b/smith/data/.opencode/skills/break-down-feature/SKILL.md
@@ -0,0 +1,16 @@
+---
+name: break-down-feature
+description: "Decompose a selected feature into user stories that pass INVEST criteria"
+---
+
+# Break Down Feature
+
+Load [[requirements/invest]] and [[requirements/decomposition]] before starting.
+
+1. Derive Rule blocks from the feature description — one Rule per user story.
+2. Validate each Rule per [[requirements/invest]].
+3. If a story contains "and", split into two Rules per [[requirements/decomposition]].
+4. If a story lacks a named user role or business value, reframe per [[requirements/invest]].
+5. If a Rule spans more than 2 concerns or has more than 8 candidate Examples, split per [[requirements/decomposition]].
+6. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+7. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/commit-implementation/SKILL.md b/smith/data/.opencode/skills/commit-implementation/SKILL.md
new file mode 100644
index 0000000..e24d5eb
--- /dev/null
+++ b/smith/data/.opencode/skills/commit-implementation/SKILL.md
@@ -0,0 +1,14 @@
+---
+name: commit-implementation
+description: "Commit the reviewed, passing implementation with traceability to feature files"
+---
+
+# Commit Implementation
+
+Load [[software-craft/git-conventions#key-takeaways]] before starting.
+
+1. Verify all tests pass and all three review gate evidences (design, structure, conventions) are present.
+2. Commit with traceability per [[software-craft/git-conventions#content]] — use granular commit format with @id tags.
+3. IF the commit is a refactoring (no behavior change) → use `refactor(<scope>):` type per [[software-craft/git-conventions#concepts]].
+4. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+5. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/conduct-interview/SKILL.md b/smith/data/.opencode/skills/conduct-interview/SKILL.md
new file mode 100644
index 0000000..9cde6c2
--- /dev/null
+++ b/smith/data/.opencode/skills/conduct-interview/SKILL.md
@@ -0,0 +1,18 @@
+---
+name: conduct-interview
+description: "Interview stakeholders to elicit pain points, business goals, domain terms, and quality attributes"
+---
+
+# Conduct Stakeholder Interview
+
+Load [[requirements/interview-techniques#key-takeaways]] before starting.
+
+1. Start with general questions per [[requirements/interview-techniques#concepts]].
+2. If general questions reveal multiple behaviour groups, probe each as a
+   cross-cutting group per [[requirements/interview-techniques#concepts]].
+3. If specific features are identified, drill into feature-level questions to
+   define feature names and rough boundaries per [[requirements/interview-techniques#concepts]].
+4. If >2 concerns emerge for a single feature, split per [[requirements/decomposition]].
+5. Write confirmation gate before any file writes.
+6. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in interview notes for the appropriate step.
+7. Check flow transitions to determine next state.
diff --git a/smith/data/.opencode/skills/confirm-baseline/SKILL.md b/smith/data/.opencode/skills/confirm-baseline/SKILL.md
new file mode 100644
index 0000000..9456529
--- /dev/null
+++ b/smith/data/.opencode/skills/confirm-baseline/SKILL.md
@@ -0,0 +1,13 @@
+---
+name: confirm-baseline
+description: "Confirm all planning artifacts are complete and the feature is ready for development"
+---
+
+# Confirm Baseline
+
+Load [[requirements/decomposition#key-takeaways]] before starting.
+
+1. Verify the feature passes decomposition checks per [[requirements/decomposition#key-takeaways]] — no more than 2 concerns, no more than 8 Must Examples.
+2. Verify all planning artifacts are present and consistent.
+3. Verify feature status is BASELINED.
+4. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/create-pr/SKILL.md b/smith/data/.opencode/skills/create-pr/SKILL.md
new file mode 100644
index 0000000..d9e1dfb
--- /dev/null
+++ b/smith/data/.opencode/skills/create-pr/SKILL.md
@@ -0,0 +1,15 @@
+---
+name: create-pr
+description: "Push local main to remote and create an administrative PR for changes already merged"
+---
+
+# Create PR
+
+Load [[software-craft/git-conventions#key-takeaways]] before starting.
+
+1. Push local main to remote: `git push origin main`.
+2. Create a pull request with the squashed commit format from [[software-craft/git-conventions#content]], including @id traceability for all acceptance criteria.
+3. IF the PR is approved → write results to output artifacts, advance to next state.
+4. IF changes are requested → address feedback on a fix branch per [[software-craft/git-conventions#concepts]], then re-push and update the PR.
+5. IF the PR is cancelled → write results to output artifacts, route to post-mortem.
+6. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/create-py-stubs/SKILL.md b/smith/data/.opencode/skills/create-py-stubs/SKILL.md
new file mode 100644
index 0000000..6df9585
--- /dev/null
+++ b/smith/data/.opencode/skills/create-py-stubs/SKILL.md
@@ -0,0 +1,15 @@
+---
+name: create-py-stubs
+description: "Create minimum typed stubs and test stubs as domain model breadcrumbs for the current feature"
+---
+
+# Create Python Stubs
+
+Load [[architecture/technical-design]], [[software-craft/stub-design]], and [[software-craft/tdd]] before starting.
+
+1. Read the feature file and identify all `@id` tags and the domain entities, value objects, and use cases referenced by the Examples.
+2. For each referenced entity/value object/use case not yet implemented, create a minimal typed stub per [[software-craft/stub-design#concepts]] — Protocol method signatures with `raise NotImplementedError` bodies, no docstrings, no type hints beyond the contract. These stubs are breadcrumbs from the domain model — the SE can add, remove, or modify them during implementation.
+3. Create test stubs from the project's test stub template with `@id` traceability per [[software-craft/stub-design#concepts]]. Each stub uses `@pytest.mark.skip(reason="not yet implemented")`, follows `test_<feature_stem>_<id>` naming, and contains raw Gherkin steps in the docstring — never MoSCoW tags or `...` ellipsis bodies.
+4. Verify all `@id` tags from the feature file have corresponding test stubs per [[software-craft/stub-design#key-takeaways]].
+5. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+6. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/decide-batch-action/SKILL.md b/smith/data/.opencode/skills/decide-batch-action/SKILL.md
new file mode 100644
index 0000000..2220dcf
--- /dev/null
+++ b/smith/data/.opencode/skills/decide-batch-action/SKILL.md
@@ -0,0 +1,10 @@
+---
+name: decide-batch-action
+description: "Ask the stakeholder whether to publish the accumulated batch as a PR or continue accumulating features on local main"
+---
+
+# Decide Batch Action
+
+1. Present the stakeholder with the current state: how many features are on local main, whether integration tests pass, and what features remain in the backlog.
+2. Ask: publish this batch as a PR, or continue accumulating features on local main?
+3. Write the decision to output artifacts.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/define-done/SKILL.md b/smith/data/.opencode/skills/define-done/SKILL.md
new file mode 100644
index 0000000..33395ac
--- /dev/null
+++ b/smith/data/.opencode/skills/define-done/SKILL.md
@@ -0,0 +1,13 @@
+---
+name: define-done
+description: "Define the quality gates that must pass before the feature is considered complete"
+---
+
+# Define Done
+
+Load [[software-craft/code-review#key-takeaways]] before starting.
+
+1. Define quality gates per [[software-craft/code-review#key-takeaways]] — design correctness, test quality, and conventions.
+2. Incorporate quality attributes from the product definition into the gates.
+3. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+4. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/define-product-scope/SKILL.md b/smith/data/.opencode/skills/define-product-scope/SKILL.md
new file mode 100644
index 0000000..252b9fa
--- /dev/null
+++ b/smith/data/.opencode/skills/define-product-scope/SKILL.md
@@ -0,0 +1,14 @@
+---
+name: define-product-scope
+description: "Define what the product IS and IS NOT, who the users are, and the delivery order"
+---
+
+# Define Product Scope
+
+Load [[architecture/quality-attributes#key-takeaways]] and [[requirements/pre-mortem#key-takeaways]] before starting.
+
+1. Define product scope per the domain model and glossary.
+2. Define quality attributes per [[architecture/quality-attributes#concepts]].
+3. If scope boundaries are unclear, apply a pre-mortem per [[requirements/pre-mortem]].
+4. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+5. Check flow transitions to determine next state.
diff --git a/smith/data/.opencode/skills/define-ubiquitous-language/SKILL.md b/smith/data/.opencode/skills/define-ubiquitous-language/SKILL.md
new file mode 100644
index 0000000..c4f0b18
--- /dev/null
+++ b/smith/data/.opencode/skills/define-ubiquitous-language/SKILL.md
@@ -0,0 +1,14 @@
+---
+name: define-ubiquitous-language
+description: "Formalize the ubiquitous language by defining domain terms into a glossary"
+---
+
+# Define Ubiquitous Language
+
+Load [[requirements/ubiquitous-language]] before starting.
+
+1. For each candidate term, write a genus-differentia definition per [[requirements/ubiquitous-language#key-takeaways]].
+2. Cross-reference with existing glossary entries — mark retired terms rather than deleting per [[requirements/ubiquitous-language#key-takeaways]].
+3. Identify aliases (different words for the same concept) and document them per [[requirements/ubiquitous-language]].
+4. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+5. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/design-assets/SKILL.md b/smith/data/.opencode/skills/design-assets/SKILL.md
new file mode 100644
index 0000000..43a4a8c
--- /dev/null
+++ b/smith/data/.opencode/skills/design-assets/SKILL.md
@@ -0,0 +1,32 @@
+---
+name: design-assets
+description: "Create logo and banner using favicon-first, monochrome-first, progressive-simplification process"
+---
+
+# Design Assets
+
+Load [[design/project-assets#key-takeaways]] and [[design/visual-harmony#key-takeaways]] before starting.
+
+1. Read `docs/branding.md` and extract the personality adjectives, visual metaphor (if any), and colour palette from the Visual section.
+2. Determine the logo type per [[design/identity-design#concepts]]: combination mark (new brands), abstract mark (established names), pictogram (strong visual metaphor), or letterform (compact avatar).
+3. Select primary shapes that reinforce the personality adjectives per [[design/visual-harmony#concepts]]: circles for unity/calm, squares for stability, triangles for energy, hexagons for precision. Reduce to 1–3 geometric primitives maximum.
+4. Select typeface class, weight, and tracking per the typography–personality mapping in [[design/visual-harmony#content]]: serif for tradition/authority, sans-serif for modernity/clarity, with weight (light=premium, bold=assertive) and spacing (wide=open, tight=urgent) as personality levers.
+5. **Sketch phase**: Generate 20–40 rough concepts in monochrome (black on white) per [[design/project-assets#concepts]]. Work small. No colour yet. Apply Gestalt principles: use proximity for unity, similarity for grouping, and closure for simplification per [[design/visual-harmony#concepts]].
+6. Select the top 3–5 concepts. Refine in vector. Still monochrome. Compose using rule of thirds or golden ratio per [[design/visual-harmony#concepts]] — place primary element at an intersection point for dynamic tension, or dead centre for calm stability.
+7. **Stress-test each concept** against the evaluation checklist per [[design/project-assets#concepts]]:
+   - 5-second test (show, remove, ask "what did you see?")
+   - Blur test (Gaussian blur 3–5px; silhouette must hold)
+   - Monochrome test (pure black on white, pure white on black)
+   - Scalability test (legible at 16px and 500px)
+   - Proximity test (distinguishable from 5 competitor logos)
+   - "One thing" test (one dominant feature, not multiple)
+8. Present the strongest concept to the stakeholder with stress-test results. IF stakeholder rejects → return to step 5.
+9. **Add colour**: Apply the brand primary and accent colours from `docs/branding.md`. Use the harmony type (complementary, split-complementary, or analogous) already selected in the design-colors state. Maximum 2 colours in the mark. Apply saturation–value personality mapping per [[design/color-systems#concepts]]. Test on white, black, and mid-gray backgrounds per [[design/color-systems#key-takeaways]].
+10. Balance visual weight using Itten's contrast of extension per [[design/color-systems#content]] — a small saturated accent can carry equal visual weight to a large muted background area.
+11. **Create delivery set** per [[design/project-assets#concepts]]:
+    - `docs/assets/logo.svg` — Master SVG, square viewBox, presentation attributes only, SVGO-optimised
+    - `docs/assets/logo-dark.svg` — Dark-mode variant (or embedded `@media (prefers-color-scheme: dark)`)
+    - `docs/assets/banner.svg` — README banner, composed using rule of thirds, tested on light and dark backgrounds
+12. **Create favicon set** per [[design/project-assets#concepts]]: favicon.ico, icon.svg (with dark-mode media query), apple-touch-icon.png, icon-192.png, icon-512.png. Each size tier progressively simplified per [[design/project-assets#concepts]].
+13. Update `docs/branding.md` Visual section with asset paths.
+14. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/design-colors/SKILL.md b/smith/data/.opencode/skills/design-colors/SKILL.md
new file mode 100644
index 0000000..58d19e7
--- /dev/null
+++ b/smith/data/.opencode/skills/design-colors/SKILL.md
@@ -0,0 +1,21 @@
+---
+name: design-colors
+description: "Select and validate a colour palette with WCAG contrast, dark-mode counterparts, and hue semantics"
+---
+
+# Design Colours
+
+Load [[design/color-systems#key-takeaways]] before starting.
+
+1. Read `docs/branding.md` and extract the personality adjectives from the Identity section.
+2. Propose a primary hue based on the hue-semantics table in [[design/color-systems#content]]. The primary must reinforce the personality adjectives.
+3. Determine the saturation and value for the primary using the saturation–value personality map in [[design/color-systems#concepts]]. The same hue expresses different personalities at different saturation/value levels (e.g., blue at high saturation = "tech/digital"; blue at low saturation, dark value = "corporate/authoritative").
+4. Select a harmony type based on the desired emotional effect per [[design/color-systems#concepts]]: complementary for vibrancy, split-complementary for balanced contrast, analogous for calm unity. For 2-colour brand marks, use complementary or split-complementary.
+5. Propose an accent colour using the selected harmony type. Maximum 2 colours in the logo mark.
+6. Propose background, text-primary, and text-secondary colours.
+7. For each colour pair (text on background, accent on background, dark-mode primary on dark background), calculate the WCAG contrast ratio using the formula in [[design/color-systems#concepts]]. Every text–background pair must meet ≥4.5:1 (AA). Every large-text pair must meet ≥3:1.
+8. Balance visual weight in the composition using Itten's contrast of extension per [[design/color-systems#content]] — verify that accent and primary areas follow the visual weight ratios (yellow:violet ≈ 1:3, orange:blue ≈ 1:2, red:green ≈ 1:1).
+9. Propose dark-mode counterparts for each colour. Do not simply invert — use off-white (#e0e0e0 or similar) on dark backgrounds, and adjust accent saturation for dark contexts per [[design/color-systems#key-takeaways]].
+10. Present the full palette to the stakeholder as a table: colour role, hex value, RGB, dark-mode hex, WCAG ratio on primary background, and the Itten visual weight ratio.
+11. IF stakeholder approves → write the Visual section of `docs/branding.md`. IF stakeholder requests changes → revise and re-verify contrast ratios (go to step 2).
+12. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/design-technical-solution/SKILL.md b/smith/data/.opencode/skills/design-technical-solution/SKILL.md
new file mode 100644
index 0000000..0b89356
--- /dev/null
+++ b/smith/data/.opencode/skills/design-technical-solution/SKILL.md
@@ -0,0 +1,23 @@
+---
+name: design-technical-solution
+description: "Design the technical solution — architectural style, stack, module structure, API/event contracts, interface definitions"
+---
+
+# Design Technical Solution
+
+Load [[architecture/quality-attributes#key-takeaways]], [[architecture/technical-design#key-takeaways]], and [[architecture/contract-design#key-takeaways]] before starting.
+
+1. Rank quality attributes by business priority per [[architecture/quality-attributes#concepts]].
+2. Select architectural style per the quality-attribute-to-style mapping in
+   [[architecture/quality-attributes#concepts]].
+3. Define the stack.
+4. Define module structure per [[architecture/technical-design#concepts]].
+5. For each integration point in the context map, define a contract per
+   [[architecture/contract-design#concepts]].
+6. Draw C4 diagrams per [[architecture/technical-design#concepts]].
+7. Document dependencies and configuration keys.
+8. Update system overview sections to reflect the current design.
+9. If a decision is architecturally significant per [[architecture/adr#key-takeaways]],
+   route to needs_decisions.
+10. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+11. Check flow transitions to determine next state.
diff --git a/smith/data/.opencode/skills/determine-action-items/SKILL.md b/smith/data/.opencode/skills/determine-action-items/SKILL.md
new file mode 100644
index 0000000..e520cd2
--- /dev/null
+++ b/smith/data/.opencode/skills/determine-action-items/SKILL.md
@@ -0,0 +1,13 @@
+---
+name: determine-action-items
+description: "Determine whether the feature needs replanning, architecture changes, or should be abandoned"
+---
+
+# Determine Action Items
+
+Load [[requirements/post-mortem#concepts]] before starting.
+
+1. Determine routing per [[requirements/post-mortem#concepts]].
+2. Update the post-mortem with the restart check per [[requirements/post-mortem#key-takeaways]].
+3. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+4. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/document-post-mortem/SKILL.md b/smith/data/.opencode/skills/document-post-mortem/SKILL.md
new file mode 100644
index 0000000..c44cb95
--- /dev/null
+++ b/smith/data/.opencode/skills/document-post-mortem/SKILL.md
@@ -0,0 +1,12 @@
+---
+name: document-post-mortem
+description: "Record what failed, why, and which quality gate was missed"
+---
+
+# Document Post-Mortem
+
+Load [[requirements/post-mortem]] before starting.
+
+1. Record what failed, why, and which quality gate was missed per [[requirements/post-mortem#key-takeaways]].
+2. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+3. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/draft-adr/SKILL.md b/smith/data/.opencode/skills/draft-adr/SKILL.md
new file mode 100644
index 0000000..4b5a600
--- /dev/null
+++ b/smith/data/.opencode/skills/draft-adr/SKILL.md
@@ -0,0 +1,15 @@
+---
+name: draft-adr
+description: "Document architecturally significant decisions as ADRs and record key decisions in system.md"
+---
+
+# Draft ADR
+
+Load [[architecture/adr#key-takeaways]] before starting.
+
+1. Identify architecturally significant decisions per [[architecture/adr#concepts]].
+2. For each significant decision, write an ADR per [[architecture/adr#concepts]].
+3. For each ADR, assess risks per [[architecture/adr#concepts]].
+4. Record key decisions and active constraints in system.md.
+5. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+6. Check flow transitions to determine next state.
diff --git a/smith/data/.opencode/skills/extract-lessons/SKILL.md b/smith/data/.opencode/skills/extract-lessons/SKILL.md
new file mode 100644
index 0000000..69316ad
--- /dev/null
+++ b/smith/data/.opencode/skills/extract-lessons/SKILL.md
@@ -0,0 +1,13 @@
+---
+name: extract-lessons
+description: "Determine the corrective fix and update the post-mortem with remediation steps"
+---
+
+# Extract Lessons
+
+Load [[requirements/post-mortem#key-takeaways]] before starting.
+
+1. Determine the corrective fix per [[requirements/post-mortem#key-takeaways]].
+2. Update the post-mortem with remediation steps.
+3. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+4. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/facilitate-event-storming/SKILL.md b/smith/data/.opencode/skills/facilitate-event-storming/SKILL.md
new file mode 100644
index 0000000..6f5cf68
--- /dev/null
+++ b/smith/data/.opencode/skills/facilitate-event-storming/SKILL.md
@@ -0,0 +1,16 @@
+---
+name: facilitate-event-storming
+description: "Facilitate an event storming workshop to surface domain events, commands, and aggregate candidates"
+---
+
+# Facilitate Event Storming
+
+Load [[domain-modeling/event-storming#key-takeaways]] before starting.
+
+1. Identify domain events (past-tense verbs) from interview data per [[domain-modeling/event-storming#key-takeaways]].
+2. Chronologically order events on a timeline.
+3. Identify commands (imperative verbs) that trigger each event per [[domain-modeling/event-storming#key-takeaways]].
+4. Group events and commands into candidate bounded contexts per [[domain-modeling/event-storming#key-takeaways]].
+5. Identify aggregate candidates per [[domain-modeling/event-storming#key-takeaways]].
+6. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+7. Check flow transitions to determine next state.
diff --git a/smith/data/.opencode/skills/implement-minimum/SKILL.md b/smith/data/.opencode/skills/implement-minimum/SKILL.md
new file mode 100644
index 0000000..3c41d2a
--- /dev/null
+++ b/smith/data/.opencode/skills/implement-minimum/SKILL.md
@@ -0,0 +1,14 @@
+---
+name: implement-minimum
+description: "Write the minimum production code needed to make the failing test pass"
+---
+
+# Implement Minimum
+
+Load [[software-craft/tdd]], [[software-craft/smell-catalogue]], [[software-craft/object-calisthenics]], and [[software-craft/solid]] before starting.
+
+1. Write the minimum code to make the failing test pass AND satisfy reviewer checks per [[software-craft/tdd#key-takeaways]]. Add docstrings, type hints, and lint compliance only when reviewers require them — not proactively.
+2. IF a spec gap or inconsistency is discovered during implementation → do NOT modify specification documents (domain_model.md, technical_design.md, glossary.md, product_definition.md, system.md, context_map.md, ADRs, feature files). These are owned by other flow states. Flag the gap in output notes. The SE may ONLY modify production code and test code.
+3. Run the test to confirm it passes (GREEN).
+4. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+5. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/map-contexts/SKILL.md b/smith/data/.opencode/skills/map-contexts/SKILL.md
new file mode 100644
index 0000000..61a2aea
--- /dev/null
+++ b/smith/data/.opencode/skills/map-contexts/SKILL.md
@@ -0,0 +1,18 @@
+---
+name: map-contexts
+description: "Map bounded context relationships, integration points, and anti-corruption layers"
+---
+
+# Map Contexts
+
+Load [[domain-modeling/context-mapping#key-takeaways]] before starting.
+
+1. For each pair of interacting bounded contexts, select a relationship pattern
+   per [[domain-modeling/context-mapping#concepts]].
+2. Draw a context map diagram showing all relationships.
+3. For each cross-context interaction, define an integration point per
+   [[domain-modeling/context-mapping#concepts]].
+4. If a downstream context needs isolation from an upstream model, design an
+   anti-corruption layer per [[domain-modeling/context-mapping#concepts]].
+5. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+6. Check flow transitions to determine next state.
diff --git a/smith/data/.opencode/skills/merge-local/SKILL.md b/smith/data/.opencode/skills/merge-local/SKILL.md
new file mode 100644
index 0000000..00b448b
--- /dev/null
+++ b/smith/data/.opencode/skills/merge-local/SKILL.md
@@ -0,0 +1,20 @@
+---
+name: merge-local
+description: "Squash-merge feature commits into local main, pull remote main, and resolve conflicts"
+---
+
+# Merge Local
+
+Load [[software-craft/git-conventions#key-takeaways]] before starting.
+
+1. Pull latest remote main: `git fetch origin main && git merge --ff-only origin/main` into local main.
+2. If remote main has diverged, rebase the feature branch on updated main before squash-merging.
+3. Squash all feature commits into a single commit per [[software-craft/git-conventions#concepts]].
+4. Merge the squashed commit into local main.
+5. Run feature-type verification per [[software-craft/git-conventions#content]].
+6. Run `uv run task test-fast` to verify all tests pass on local main.
+7. If conflicts arise during rebase or merge:
+   - IF the conflict is a straightforward text merge → resolve and continue.
+   - IF the conflict requires a design decision → present options to the stakeholder with consequences before resolving.
+8. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+9. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/model-domain/SKILL.md b/smith/data/.opencode/skills/model-domain/SKILL.md
new file mode 100644
index 0000000..e382c65
--- /dev/null
+++ b/smith/data/.opencode/skills/model-domain/SKILL.md
@@ -0,0 +1,16 @@
+---
+name: model-domain
+description: "Formalize candidates into bounded contexts, entities, relationships, and aggregate boundaries"
+---
+
+# Model Domain
+
+Load [[domain-modeling/event-storming#key-takeaways]] before starting.
+
+1. Define bounded contexts per [[domain-modeling/event-storming#key-takeaways]].
+2. Define entities within each context — name, attributes, lifecycle.
+3. Define relationships between entities — associations, dependencies, invariants.
+4. Define aggregate boundaries per [[domain-modeling/event-storming#key-takeaways]].
+5. Write a summary of the domain model.
+6. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+7. Check flow transitions to determine next state.
diff --git a/smith/data/.opencode/skills/refactor/SKILL.md b/smith/data/.opencode/skills/refactor/SKILL.md
new file mode 100644
index 0000000..5ec25f3
--- /dev/null
+++ b/smith/data/.opencode/skills/refactor/SKILL.md
@@ -0,0 +1,27 @@
+---
+name: refactor
+description: "Improve code structure while keeping all tests passing, then cycle to the next example or exit"
+---
+
+# Refactor
+
+Load [[software-craft/tdd]], [[software-craft/refactoring]], [[software-craft/object-calisthenics]], [[software-craft/smell-catalogue]], and [[software-craft/refactoring-techniques]] before starting.
+
+1. Review the code for improvement opportunities while keeping all tests passing per [[software-craft/tdd#concepts]].
+2. Refactor only if there is a test that would break if the refactoring is wrong per [[software-craft/tdd#key-takeaways]].
+3. Apply small steps: one refactoring at a time, tests green after each step, no new functionality per [[software-craft/refactoring#key-takeaways]].
+4. Apply design-only transformations per [[software-craft/tdd#concepts]] — YAGNI > KISS > DRY > OC > SOLID > patterns. Do not apply convention compliance (docstrings, type hints, import ordering, format changes) — those belong in the Conventions Phase.
+4. IF a class has >2 instance variables → split per [[software-craft/object-calisthenics#key-takeaways]].
+5. IF a method uses `else` → replace with early return or guard clause per [[software-craft/object-calisthenics#key-takeaways]].
+6. IF code calls `obj.get_x()` then decides → replace with Tell, Don't Ask per [[software-craft/object-calisthenics#key-takeaways]].
+7. IF Long Method → Extract Method per [[software-craft/smell-catalogue#concepts]].
+8. IF Switch Statements or repeated `if/elif` on type → Replace Conditional with Polymorphism per [[software-craft/smell-catalogue#concepts]].
+9. IF Feature Envy → Move Method per [[software-craft/smell-catalogue#concepts]].
+10. IF Primitive Obsession → Replace Data Value with Object per [[software-craft/smell-catalogue#concepts]].
+11. IF Data Clumps → Introduce Parameter Object per [[software-craft/smell-catalogue#concepts]].
+12. IF Shotgun Surgery or Divergent Change → Extract Class per [[software-craft/smell-catalogue#concepts]].
+13. IF no improvement is needed → skip refactoring and proceed to the next test.
+14. IF a spec gap or inconsistency is discovered during refactoring → do NOT modify specification documents (domain_model.md, technical_design.md, glossary.md, product_definition.md, system.md, context_map.md, ADRs, feature files). Flag it in output notes. The SE may ONLY modify production code and test code.
+15. Commit refactor changes separately from feature changes per [[software-craft/git-conventions#concepts]].
+15. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+16. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/review-architecture/SKILL.md b/smith/data/.opencode/skills/review-architecture/SKILL.md
new file mode 100644
index 0000000..896411c
--- /dev/null
+++ b/smith/data/.opencode/skills/review-architecture/SKILL.md
@@ -0,0 +1,17 @@
+---
+name: review-architecture
+description: "Independently verify architecture alignment with domain model and requirements, and cross-document consistency"
+---
+
+# Review Architecture
+
+Load [[architecture/reconciliation#key-takeaways]] and [[architecture/adr#key-takeaways]] before starting.
+
+1. Declare adversarial stance per [[architecture/reconciliation#concepts]].
+2. Run cross-document consistency checks per [[architecture/reconciliation#concepts]].
+3. Verify ADR consistency per [[architecture/adr#concepts]].
+4. Verify architectural style satisfies quality attribute priorities per
+   [[architecture/quality-attributes#concepts]].
+5. If any inconsistency is found, resolve per [[architecture/reconciliation#concepts]].
+6. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+7. Check flow transitions to determine next state.
diff --git a/smith/data/.opencode/skills/review-conventions/SKILL.md b/smith/data/.opencode/skills/review-conventions/SKILL.md
new file mode 100644
index 0000000..1d96e3c
--- /dev/null
+++ b/smith/data/.opencode/skills/review-conventions/SKILL.md
@@ -0,0 +1,16 @@
+---
+name: review-conventions
+description: "Verify formatting, docstrings, type hints, and lint rules"
+---
+
+# Review Conventions
+
+Load [[requirements/ubiquitous-language]] and [[software-craft/code-review]] before starting.
+
+1. Declare fail-fast stance per [[software-craft/code-review#concepts]] — stop at the first failure.
+2. Verify formatting and lint rules pass.
+3. Verify docstrings, type hints, and lint compliance (ruff check, ruff format) are present.
+4. Verify naming follows domain language per [[requirements/ubiquitous-language#key-takeaways]].
+5. The reviewer MUST NOT modify any files per [[software-craft/code-review#key-takeaways]] — lint and type errors are findings to report, not to fix during review.
+5. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+6. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/review-design/SKILL.md b/smith/data/.opencode/skills/review-design/SKILL.md
new file mode 100644
index 0000000..4c54f2c
--- /dev/null
+++ b/smith/data/.opencode/skills/review-design/SKILL.md
@@ -0,0 +1,22 @@
+---
+name: review-design
+description: "Verify implementation aligns with domain model, architectural decisions, and quality attributes"
+---
+
+# Review Design
+
+Load [[architecture/reconciliation]], [[architecture/adr]], [[software-craft/code-review]], [[software-craft/refactoring]], [[software-craft/object-calisthenics]], [[software-craft/smell-catalogue]], [[software-craft/design-patterns]], [[software-craft/solid]], and [[software-craft/tdd]] before starting.
+
+1. Declare adversarial stance per [[software-craft/code-review#concepts]] — default hypothesis: "it might be broken despite green tests."
+2. Verify implementation aligns with domain model and bounded contexts per [[architecture/reconciliation#concepts]].
+3. Verify architectural decisions from ADRs are respected per [[architecture/adr#key-takeaways]].
+4. Verify Object Calisthenics per [[software-craft/object-calisthenics#key-takeaways]] — check each OC rule for violations.
+5. Check for code smells per [[software-craft/smell-catalogue#key-takeaways]] — Bloaters, OO Abusers, Change Preventers, Dispensables, Couplers. Check detection heuristics per [[software-craft/smell-catalogue#concepts]].
+6. Before flagging code as dead or unnecessary, verify against domain model, technical design, and interview notes. Code that matches the architecture but hasn't been exercised by a test yet is **planned code** — flag as WARN (planned-not-reached), not REJECT. Only code that contradicts the architecture or was superseded is **dead code** — REJECT.
+7. IF a smell is found → list it in findings per [[software-craft/code-review#key-takeaways]]. "Minor" is not a pass — acknowledged smells are still findings for the SE to evaluate.
+8. IF multiple `if/elif` on type/state → check for missing State or Strategy pattern per [[software-craft/design-patterns#concepts]].
+9. IF technical debt is identified → verify it is tracked or being paid down per [[software-craft/refactoring#concepts]].
+10. IF a quality attribute from the product definition has no corresponding design decision → flag it as a gap.
+11. Stop at the first failure per [[software-craft/code-review#key-takeaways]] — write a minimal REJECTED report with file:line evidence.
+12. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+13. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/review-structure/SKILL.md b/smith/data/.opencode/skills/review-structure/SKILL.md
new file mode 100644
index 0000000..1d902c6
--- /dev/null
+++ b/smith/data/.opencode/skills/review-structure/SKILL.md
@@ -0,0 +1,17 @@
+---
+name: review-structure
+description: "Verify test coverage, test quality, and behavior-vs-implementation coupling"
+---
+
+# Review Structure
+
+Load [[software-craft/test-design]], [[software-craft/tdd]], and [[software-craft/code-review]] before starting.
+
+1. Declare adversarial stance per [[software-craft/code-review#concepts]] — default hypothesis: "tests might be coupled to the wrong thing."
+2. Verify tests specify observable behaviour, not implementation details, per [[software-craft/test-design#key-takeaways]].
+3. IF a test breaks when refactoring preserves behaviour → flag it as implementation-coupled per [[software-craft/test-design#concepts]].
+4. Verify tests operate at the same abstraction level as their acceptance criteria per [[software-craft/test-design#key-takeaways]].
+5. Verify test coverage meets the project threshold.
+6. Stop at the first failure per [[software-craft/code-review#key-takeaways]] — write a minimal REJECTED report with file:line evidence.
+7. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+8. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/select-feature/SKILL.md b/smith/data/.opencode/skills/select-feature/SKILL.md
new file mode 100644
index 0000000..b0b7673
--- /dev/null
+++ b/smith/data/.opencode/skills/select-feature/SKILL.md
@@ -0,0 +1,17 @@
+---
+name: select-feature
+description: "Pick the next feature to develop based on business priority and delivery order"
+---
+
+# Select Feature
+
+Load [[requirements/wsjf]] before starting.
+
+1. IF more than one feature directory exists in `docs/features/` → stop; WIP limit is 1.
+2. Verify that architecture covers the candidate features.
+3. Score BASELINED features per [[requirements/wsjf]].
+4. IF no features have `Status: BASELINED` → exit; features need scoping first.
+5. Select the highest WSJF score among Dependency=0 features.
+6. If all features have Dependency=1, resolve the blocking dependency first.
+7. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+8. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/setup-apply/SKILL.md b/smith/data/.opencode/skills/setup-apply/SKILL.md
new file mode 100644
index 0000000..f8fc71b
--- /dev/null
+++ b/smith/data/.opencode/skills/setup-apply/SKILL.md
@@ -0,0 +1,24 @@
+---
+name: setup-apply
+description: "Apply text substitutions, rename package directory, and write templates"
+---
+
+# Setup Apply
+
+1. Rename the package directory: `mv app {package_name}`
+2. Apply every substitution from `template-config.yaml` substitutions section in order:
+   - **pyproject.toml**: 7 substitutions (including version reset to `0.1.0`)
+   - **README.md**: many nullhack→github_username, agents-smith→project_name, 1 eol→author_name (only in author credit line — do not replace other occurrences)
+   - **.github/workflows/ci.yml**: 2 import app→package_name, 1 href api fix
+   - **LICENSE**: 1 copyright substitution
+   - **tests/unit/main_test.py**: 1 import substitution
+   - **template-config.yaml**: 6 defaults section updates (always last)
+3. Write `CHANGELOG.md` from template `.templates/CHANGELOG.md.template`, replacing `{project_name}` with the project name and `{YYYYMMDD}` with today's date.
+4. Verify version in `pyproject.toml` is `0.1.0` (should already be set by substitution; if not, reset manually — per [[software-craft/versioning]]).
+5. Verify no stale references remain: `grep -rn "from app" tests/` must return empty.
+6. Verify package directory was renamed: old `app/` must not exist, new `{package_name}/` must exist.
+7. Set evidence:
+   - `no_stale_app_imports`: true if grep returns empty
+   - `package_renamed`: true if old `app/` is gone and new directory exists
+   - `version_reset`: true if pyproject.toml version is `0.1.0`
+8. IF all evidence is true → check flow transitions to determine next state; ELSE exit `failed`.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/setup-assess/SKILL.md b/smith/data/.opencode/skills/setup-assess/SKILL.md
new file mode 100644
index 0000000..84566bc
--- /dev/null
+++ b/smith/data/.opencode/skills/setup-assess/SKILL.md
@@ -0,0 +1,31 @@
+---
+name: setup-assess
+description: "Interview user to understand project needs and assess parameters for template setup"
+---
+
+# Setup Assess
+
+Load [[requirements/interview-techniques#key-takeaways]] before starting.
+
+1. Use interview techniques per [[requirements/interview-techniques#concepts]] to understand project context.
+2. Start with general questions (Funnel Level 1):
+   - What problem will this Python project solve?
+   - What kind of Python project is this? (CLI tool, web service, library, data pipeline, etc.)
+   - Who will use this project? (just you, your team, public users, other developers)
+   - When do you need this project ready for first use?
+3. Probe CI workflow needs (Funnel Level 2):
+   - Do you want the full CI pipeline (linting, testing, docs, PyPI publishing)?
+   - What level of testing do you plan? (basic unit tests, full coverage, property testing)
+   - Will you need automated documentation generation?
+   - Do you plan to publish this to PyPI or keep it private?
+4. Validate parameter constraints (Funnel Level 3):
+   - Are there any naming conventions you need to follow? (company standards, existing project family)
+   - Do you already know of any major dependencies you'll need? (for context only)
+5. Use CIT probes if setup concerns emerge:
+   - Tell me about the last Python project you set up — what was tedious about the initial configuration?
+   - Describe a time when project setup took longer than expected — what caused the delay?
+6. Use Laddering if constraints are unclear:
+   - Why is [constraint] important for this project?
+   - What would happen if [setup choice] were different?
+7. Write assessment summary for confirmation before proceeding.
+8. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/setup-branding/SKILL.md b/smith/data/.opencode/skills/setup-branding/SKILL.md
new file mode 100644
index 0000000..85b1706
--- /dev/null
+++ b/smith/data/.opencode/skills/setup-branding/SKILL.md
@@ -0,0 +1,19 @@
+---
+name: setup-branding
+description: "Interview stakeholder to establish brand identity: personality, visual metaphor, wording, and release naming"
+---
+
+# Setup Branding
+
+Load [[design/identity-design#key-takeaways]] and [[requirements/interview-techniques#key-takeaways]] before starting.
+
+1. Ask the stakeholder for the project name (if not already set in `pyproject.toml`).
+2. Ask for a one-sentence tagline describing what the project does.
+3. Ask for 3 personality adjectives that define the brand tone per [[design/identity-design#concepts]]. If the stakeholder cannot articulate them, offer the hue-semantics table in [[design/color-systems#content]] as a prompt.
+4. Ask what the project must NOT convey. Record forbidden adjectives or visual metaphors.
+5. Ask where the logo will appear most (GitHub avatar, README, npm, terminal, website). Rank by frequency.
+6. Ask for 5 peer/competitor project logos. Record what they have in common and how this project should differ.
+7. Ask for the release naming convention per [[software-craft/versioning#key-takeaways]]. If not decided, offer the `adjective-noun` pattern and ask for a theme word (e.g., "Greek antiquity", "space", "mythology").
+8. Ask for wording rules: words to avoid and words to prefer per [[design/identity-design#concepts]].
+9. Write results to `docs/branding.md` using the template at `.templates/docs/branding/branding.md.template`. Fill in Identity, Release Naming, and Wording sections. Leave Visual section with placeholder headings for the next state.
+10. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/setup-configure/SKILL.md b/smith/data/.opencode/skills/setup-configure/SKILL.md
new file mode 100644
index 0000000..0b7616e
--- /dev/null
+++ b/smith/data/.opencode/skills/setup-configure/SKILL.md
@@ -0,0 +1,42 @@
+---
+name: setup-configure
+description: "Gather and confirm project parameters, validate template files exist"
+---
+
+# Setup Configure
+
+Template resolution: templates live in `.templates/`. The instance path is the template path with `.templates/` prefix removed and `.template` suffix removed. Discover templates at runtime with `find .templates -name '*.template'`. Some templates contain `{variable}` tokens (e.g. `{project_name}`, `{YYYYMMDD}`) that the setup skill replaces with actual values.
+
+1. Check that all required template files exist and set evidence:
+   - `pyproject_toml`: Check `pyproject.toml` exists
+   - `readme_md`: Check `README.md` exists
+   - `github_workflows_ci_yml`: Check `.github/workflows/ci.yml` exists
+   - `license`: Check `LICENSE` exists
+   - `tests_unit_main_test_py`: Check `tests/unit/main_test.py` exists
+   - `app_directory`: Check `app/` directory exists
+2. IF any template files are missing → set evidence to false, exit `missing_files`.
+3. Read `template-config.yaml` defaults section.
+4. Present the 6 current values to the user based on the assessment:
+   - `github_username` — GitHub handle used in URLs and git remote
+   - `project_name` — kebab-case repository name (e.g. my-awesome-project)
+   - `package_name` — snake_case Python package directory (e.g. my_awesome_project)
+   - `project_description` — one sentence describing what the project does
+   - `author_name` — author's full name
+   - `author_email` — author's email address
+5. For each parameter, ask: "Use this value or enter a new one?" Accept default if confirmed.
+6. Show summary table:
+
+   | Parameter | Old (default) | New |
+   |---|---|---|
+   | github_username | ... | ... |
+   | project_name | ... | ... |
+   | package_name | ... | ... |
+   | project_description | ... | ... |
+   | author_name | ... | ... |
+   | author_email | ... | ... |
+
+   Note: `github_username` will be used in both `pyproject.toml` URLs and `git remote set-url`. Confirm they are correct.
+7. Ask the user to confirm before proceeding.
+8. Update the defaults section in `template-config.yaml` with confirmed values.
+9. Set all template existence evidence to `==true`.
+10. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/setup-verify/SKILL.md b/smith/data/.opencode/skills/setup-verify/SKILL.md
new file mode 100644
index 0000000..261f30b
--- /dev/null
+++ b/smith/data/.opencode/skills/setup-verify/SKILL.md
@@ -0,0 +1,34 @@
+---
+name: setup-verify
+description: "Verify transformations, clean template artifacts, and finalize the project"
+---
+
+# Setup Verify
+
+1. Run smoke test: `uv sync --all-extras && uv run task test-fast`
+2. IF smoke test fails:
+   - IF failure is a stale import (contains "from app" or "import app") → fix using the same substitution pattern from `template-config.yaml`, then re-run
+   - ELSE → report the error, set `tests_pass: false`, exit `failed`
+3. Clean template-specific artifacts:
+   - Delete `flowviz/data.js` (regeneratable)
+   - Delete `docs/branding.md` (user creates their own)
+   - Delete `.flowr/sessions/` contents if any exist
+   - Delete `docs/adr/.gitkeep`, `docs/features/.gitkeep`, `docs/spec/` (template scaffolding)
+4. Set git remote: `git remote set-url origin git@github.com:{github_username}/{project_name}.git`
+5. Commit all changes: `git add -A && git commit -m "chore: initialize project from agents-smith"`
+6. Set evidence:
+   - `tests_pass`: true if smoke test passed
+   - `imports_valid`: true if no import errors during test run
+   - `artifacts_cleaned`: true if cleanup completed
+7. IF all evidence is true → exit `initialized`; ELSE exit `failed`.
+8. Tell the user which files were changed and next steps:
+
+   ```bash
+   # Push the initialized project
+   git push -u origin main
+
+   # Optional: rename the project folder (run from parent directory)
+   cd .. && mv agents-smith {project_name}
+   ```
+
+   Then direct them to start the delivery workflow: `@product-owner` — the PO picks the first feature and begins planning.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/specify-feature/SKILL.md b/smith/data/.opencode/skills/specify-feature/SKILL.md
new file mode 100644
index 0000000..742bb71
--- /dev/null
+++ b/smith/data/.opencode/skills/specify-feature/SKILL.md
@@ -0,0 +1,19 @@
+---
+name: specify-feature
+description: "Conduct a targeted conversation with stakeholders to capture feature-specific behavioral rules and scenarios"
+---
+
+# Specify Feature
+
+Load [[requirements/interview-techniques#key-takeaways]] before starting.
+
+1. Conduct a feature-specific interview per [[requirements/interview-techniques#concepts]],
+   focusing on behavioral rules and scenarios.
+2. If the stakeholder describes a past failure related to this feature, probe for the
+   specific incident per [[requirements/interview-techniques#concepts]].
+3. If a stated requirement lacks a clear scenario, ladder to the real constraint per
+   [[requirements/interview-techniques#concepts]].
+4. If hidden failure modes are suspected, apply a pre-mortem per [[requirements/pre-mortem]].
+5. Write confirmation gate before any file writes.
+6. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+7. Check flow transitions to determine next state.
diff --git a/smith/data/.opencode/skills/structure-project/SKILL.md b/smith/data/.opencode/skills/structure-project/SKILL.md
new file mode 100644
index 0000000..bef3efc
--- /dev/null
+++ b/smith/data/.opencode/skills/structure-project/SKILL.md
@@ -0,0 +1,13 @@
+---
+name: structure-project
+description: "Create project skeleton — branch, package directories, port interfaces, aggregate root signatures — from design artifacts"
+---
+
+# Structure Project
+
+Load [[architecture/technical-design#key-takeaways]], [[software-craft/stub-design]], and [[software-craft/git-conventions#key-takeaways]] before starting.
+
+1. Create feature branch per [[software-craft/git-conventions#content]] — `feat/<stem>` from latest main.
+2. Create package structure per [[architecture/technical-design#key-takeaways]]: directories, `__init__.py` files, port interfaces (Protocol abstractions from hexagonal architecture), and aggregate root class signatures.
+3. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+4. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/write-bdd-features/SKILL.md b/smith/data/.opencode/skills/write-bdd-features/SKILL.md
new file mode 100644
index 0000000..9efce99
--- /dev/null
+++ b/smith/data/.opencode/skills/write-bdd-features/SKILL.md
@@ -0,0 +1,18 @@
+---
+name: write-bdd-features
+description: "Write concrete Given/When/Then BDD scenarios for each user story using ubiquitous language"
+---
+
+# Write BDD Features
+
+Load [[requirements/gherkin]], [[requirements/moscow]], [[requirements/pre-mortem]], and [[requirements/decomposition]] before starting.
+
+1. Run a pre-mortem per [[requirements/pre-mortem]] for each Rule before writing any Examples.
+2. If hidden failure modes surface from the pre-mortem, add Examples to cover them per [[requirements/gherkin]].
+3. Write Example blocks per [[requirements/gherkin]].
+4. Classify each Example per [[requirements/moscow]]; MoSCoW classification is for internal triage only — do NOT add Must/Should/Could tags to Examples in the .feature file.
+5. If a Rule has more than 8 Must Examples, split the Rule per [[requirements/decomposition]].
+6. If a Rule spans more than 2 concerns, split per [[requirements/decomposition]].
+7. Assign `@id` tags to all Examples.
+8. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+9. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.opencode/skills/write-test/SKILL.md b/smith/data/.opencode/skills/write-test/SKILL.md
new file mode 100644
index 0000000..9ee72bb
--- /dev/null
+++ b/smith/data/.opencode/skills/write-test/SKILL.md
@@ -0,0 +1,15 @@
+---
+name: write-test
+description: "Write a failing test body for one BDD example"
+---
+
+# Write Test
+
+Load [[software-craft/tdd]], [[software-craft/smell-catalogue]], [[software-craft/object-calisthenics]], and [[software-craft/solid]] before starting.
+
+1. Pick the next unimplemented `@id` from the feature file — order by fewest dependencies first per [[software-craft/tdd#concepts]].
+2. Write a failing test that specifies the expected behavior per [[software-craft/tdd#key-takeaways]].
+3. IF a spec gap or inconsistency is discovered → do NOT modify specification documents (domain_model.md, technical_design.md, glossary.md, product_definition.md, system.md, context_map.md, ADRs, feature files). Flag it in output notes. The SE may ONLY modify production code and test code.
+4. Run the test to confirm it fails for the right reason (RED) per [[software-craft/tdd#key-takeaways]].
+4. Write results to artifacts listed in the current state's `out` attrs. If findings affect artifacts outside the `out` contract, flag them in output notes for the appropriate step.
+5. Check flow transitions to determine next state.
\ No newline at end of file
diff --git a/smith/data/.templates/CHANGELOG.md.template b/smith/data/.templates/CHANGELOG.md.template
new file mode 100644
index 0000000..21deb6f
--- /dev/null
+++ b/smith/data/.templates/CHANGELOG.md.template
@@ -0,0 +1,9 @@
+# Changelog
+
+All notable changes to {project_name} will be documented in this file.
+
+## [0.1.0] - {YYYYMMDD}
+
+### Added
+
+- Initial release.
\ No newline at end of file
diff --git a/smith/data/.templates/docs/adr/ADR_YYYYMMDD_<slug>.md.template b/smith/data/.templates/docs/adr/ADR_YYYYMMDD_<slug>.md.template
new file mode 100644
index 0000000..13c9cba
--- /dev/null
+++ b/smith/data/.templates/docs/adr/ADR_YYYYMMDD_<slug>.md.template
@@ -0,0 +1,41 @@
+# ADR_YYYYMMDD_<slug>
+
+## Status
+
+Proposed | Accepted | Deprecated
+
+## Context
+
+<What is the situation that triggered these questions? Describe the forces at
+play — constraints from the feature file, glossary, and existing ADRs that
+narrow the design space.>
+
+## Interview
+
+| Question | Answer |
+|---|---|
+| <clear question about the decision> | <concise answer — the chosen option> |
+
+## Decision
+
+<One sentence stating the chosen option.>
+
+## Reason
+
+<One sentence explaining why this option was chosen over the alternatives.>
+
+## Alternatives Considered
+
+- **<option A>**: <reason for rejection>
+- **<option B>**: <reason for rejection>
+
+## Consequences
+
+- (+) <positive outcome>
+- (-) <negative outcome — include mitigations where applicable>
+
+## Risk Assessment
+
+| Risk | Probability | Impact | Mitigation | Accepted? |
+|------|------------|--------|------------|-----------|
+| <risk description> | Low / Medium / High | Low / Medium / High | <how to mitigate> | Yes / No |
diff --git a/smith/data/.templates/docs/assets/banner.svg.template b/smith/data/.templates/docs/assets/banner.svg.template
new file mode 100644
index 0000000..288c89f
--- /dev/null
+++ b/smith/data/.templates/docs/assets/banner.svg.template
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1200 630" width="1200" height="630">
+  <!-- Banner placeholder — branding/design-assets skill replaces this -->
+  <rect width="1200" height="630" fill="#000000"/>
+</svg>
diff --git a/smith/data/.templates/docs/assets/logo.svg.template b/smith/data/.templates/docs/assets/logo.svg.template
new file mode 100644
index 0000000..709b278
--- /dev/null
+++ b/smith/data/.templates/docs/assets/logo.svg.template
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" width="512" height="512">
+  <!-- Logo placeholder — branding/design-assets skill replaces this -->
+  <rect width="512" height="512" fill="#000000"/>
+</svg>
diff --git a/smith/data/.templates/docs/branding/branding.md.template b/smith/data/.templates/docs/branding/branding.md.template
new file mode 100644
index 0000000..59f7c39
--- /dev/null
+++ b/smith/data/.templates/docs/branding/branding.md.template
@@ -0,0 +1,52 @@
+# Branding — <project-name>
+
+> *<tagline>*
+
+Agents read this file before generating release names, C4 diagrams, README banners, or any document with visual or copy identity. All fields are optional; absent or blank fields fall back to defaults (adjective-animal release names, Mermaid default colours, no wording constraints).
+
+**Ownership**: The stakeholder owns this file. The design agent proposes changes (colour palettes, visual assets, wording updates); the stakeholder approves them. No other agent edits this file.
+
+---
+
+## Identity
+
+- **Project name:** <project-name>
+- **Tagline:** <one sentence describing what the project does>
+- **Mission:** <one sentence: what problem does this project solve?>
+- **Vision:** <one sentence: what does success look like long-term?>
+- **Tone of voice:** <3 adjectives describing brand personality — e.g. "direct, precise, minimal">
+
+## Visual
+
+The palette is drawn from <describe colour inspiration and rationale>. Every colour choice serves legibility first; decoration is secondary.
+
+- **Background/parchment:** `<hex-light>` → `<hex-dark>` — <role and rationale>
+- **Primary text:** `<hex-light>` → `<hex-dark>` — <role and rationale>
+- **Accent/gold:** `<hex-light>` → `<hex-dark>` — <role and rationale; note if decorative-only>
+- **Secondary/blue:** `<hex-light>` → `<hex-dark>` — <role and rationale>
+- **Stone/marble:** `<hex-light>` → `<hex-dark>` — <role and rationale>
+- **Logo:** `docs/assets/logo.svg`
+- **Banner:** `docs/assets/banner.svg`
+
+> <primary-text hex> on <background hex> achieves <N>:1 contrast (WCAG AA/AAA). <accent hex> is decorative; it never carries meaning that must be read.
+
+### Logo
+
+<Describe the logo mark: what it depicts, its visual structure, fill colours, stroke colours, and any special rendering rules. Include dark-mode variant rules if applicable.>
+
+### Banner
+
+<Describe the banner layout: background, logo placement, typography, accent rules, and any special rendering rules.>
+
+## Release Naming
+
+- **Convention:** <naming pattern — e.g. `adjective-greek-figure`, `adjective-animal`, `adjective-stone`>
+- **Theme:** <theme description and rationale>
+- **Excluded words:** <words not to use in release names, or "none">
+
+## Wording
+
+Every word carries weight.
+
+- **Avoid:** <comma-separated list of words to avoid — e.g. "easy, simple, just, quick, scaffold, superseded, boilerplate">
+- **Prefer:** <comma-separated list of words to prefer — e.g. "minimal, precise, production-ready, rigorous">
\ No newline at end of file
diff --git a/smith/data/.templates/docs/context_map.md.template b/smith/data/.templates/docs/context_map.md.template
new file mode 100644
index 0000000..8c6ac60
--- /dev/null
+++ b/smith/data/.templates/docs/context_map.md.template
@@ -0,0 +1,47 @@
+# Context Map: <project-name>
+
+> DDD context map showing relationships between bounded contexts.
+> Updated by the Software Architect when contexts or relationships change.
+> Follows the DDD strategic design patterns for inter-context relationships.
+
+---
+
+## Context Relationships
+
+| Upstream Context | Downstream Context | Relationship Pattern | Translation / Anti-Corruption Layer |
+|-----------------|-------------------|---------------------|-------------------------------------|
+| `<Upstream>` | `<Downstream>` | <Customer-Supplier / Conformist / Anti-Corruption Layer / Open-Host Service / Published Language / Shared Kernel / Separate Ways> | <description of translation or ACL> |
+
+---
+
+## Context Map Diagram
+
+```mermaid
+graph LR
+    ContextA[Context A] -->|Customer-Supplier| ContextB[Context B]
+    ContextB -->|ACL| ContextC[Context C]
+```
+
+---
+
+## Integration Points
+
+| Integration | From | To | Mechanism | Contract |
+|-------------|------|----|-----------|----------|
+| <name> | <Context> | <Context> | <Sync API / Async Event / Shared DB / File> | <reference to API schema or event schema> |
+
+---
+
+## Anti-Corruption Layers
+
+| ACL | Protects Context | From Context | Translation Rules |
+|-----|-----------------|--------------|-------------------|
+| `<ACL Name>` | `<Protected>` | `<External>` | <how models are translated> |
+
+---
+
+## Changes
+
+| Date | Source | Change | Reason |
+|------|--------|--------|--------|
+| YYYY-MM-DD | <ADR ref or feature ref> | <what changed> | <why> |
diff --git a/smith/data/.templates/docs/domain_model.md.template b/smith/data/.templates/docs/domain_model.md.template
new file mode 100644
index 0000000..8aa9e45
--- /dev/null
+++ b/smith/data/.templates/docs/domain_model.md.template
@@ -0,0 +1,96 @@
+# Domain Model: <project-name>
+
+> Current understanding of the business domain.
+> Updated by the Domain Expert when domain understanding evolves.
+> This document captures what code cannot express: WHY entities exist, HOW aggregates are bounded, and WHAT business capabilities each context serves.
+>
+> **Evolving document:** Event Storming fills the Event Map, Aggregate Candidates, and Context Candidates sections (workshop draft). Domain Modeling then formalizes them into Entities, Relationships, and Aggregate Boundaries.
+
+---
+
+## Summary
+
+<3–5 sentence description of the domain, its core concepts, and primary business capabilities.>
+
+---
+
+## Event Map
+
+### Domain Events
+
+| Event | Description | Trigger | Bounded Context |
+|-------|-------------|---------|-----------------|
+| `<EventName>` | <what happened> | <what causes it> | <Context> |
+
+### Commands
+
+| Command | Description | Produces Event | Actor |
+|---------|-------------|----------------|-------|
+| `<CommandName>` | <what it does> | `<EventName>` | <who triggers it> |
+
+### Read Models
+
+| Read Model | Description | Consumes Event | Used By |
+|------------|-------------|----------------|---------|
+| `<ReadModel>` | <what it shows> | `<EventName>` | <who reads it> |
+
+---
+
+## Context Candidates
+
+> Filled during Event Storming. Formalized in Bounded Contexts section below by Domain Modeling.
+
+| Candidate | Responsibility | Grouped Aggregates | Notes |
+|-----------|---------------|--------------------|-------|
+| `<ContextName>` | <what this context owns> | `<Aggregate1>`, `<Aggregate2>` | <why this boundary was suggested> |
+
+---
+
+## Aggregate Candidates
+
+> Filled during Event Storming. Formalized in Aggregate Boundaries section below by Domain Modeling.
+
+| Candidate | Events Grouped | Tentative Root Entity | Notes |
+|-----------|---------------|-----------------------|-------|
+| `<AggregateName>` | `<Event1>`, `<Event2>` | `<EntityName>` | <why these events share a transactional boundary> |
+
+---
+
+## Bounded Contexts
+
+| Context | Responsibility | Key Entities | Integration Points |
+|---------|----------------|--------------|-------------------|
+| `<Context>` | <responsibility> | `<Entity1>`, `<Entity2>` | <how it connects to other contexts> |
+
+---
+
+## Entities
+
+| Name | Type | Description | Bounded Context | Aggregate Root? |
+|------|------|-------------|-----------------|-----------------|
+| `<Name>` | Entity | <description> | <Context> | Yes / No |
+| `<Name>` | Value Object | <description> | <Context> | — |
+
+---
+
+## Relationships
+
+| Subject | Relation | Object | Cardinality | Notes |
+|---------|----------|--------|-------------|-------|
+| `<A>` | <has / uses / emits> | `<B>` | <1:1 / 1:N / M:N> | <notes> |
+
+---
+
+## Aggregate Boundaries
+
+| Aggregate | Root Entity | Invariants | Bounded Context |
+|-----------|-------------|------------|-----------------|
+| `<Aggregate>` | `<RootEntity>` | <business rules this aggregate enforces> | <Context> |
+
+---
+
+## Changes
+
+| Date | Source | Change | Reason |
+|------|--------|--------|--------|
+| YYYY-MM-DD | <session or feature ref> | <what changed> | <why> |
diff --git a/smith/data/.templates/docs/features/feature.feature.template b/smith/data/.templates/docs/features/feature.feature.template
new file mode 100644
index 0000000..5c5a548
--- /dev/null
+++ b/smith/data/.templates/docs/features/feature.feature.template
@@ -0,0 +1,50 @@
+Feature: <Feature title>
+
+  <2–4 sentence description of what this feature does and why it exists.
+  Written in plain language, always kept current by the PO.>
+
+  Status: ELICITING | BASELINED (YYYY-MM-DD)
+
+  Rules (Business):
+  - <Business rule that applies across multiple Examples>
+
+  Constraints:
+  - <Non-functional requirement specific to this feature — reference product_definition.md#quality_attributes for measurable targets>
+
+  ## Frozen Examples Rule
+
+  After a feature is BASELINED, all `Example:` blocks are immutable. Changes require
+  `@deprecated` on the old Example (preserving the original @id) and a new Example
+  with a new @id. This prevents scope creep and maintains traceability.
+
+  `@id` tags are for traceability only — do NOT add priority tags (e.g. @must, @should,
+  @could) to Examples. MoSCoW classification is an internal triage step, not a Gherkin tag.
+
+  ## Questions
+
+  | ID | Question | Status | Answer / Assumption |
+  |----|----------|--------|---------------------|
+  | Q1 | <unresolved question from specification session> | Open / Assumed / Resolved | <if assumed: what was assumed; if resolved: answer> |
+
+  ## Changes
+
+  | Session | Q-IDs | Change |
+  |---------|-------|--------|
+  | YYYY-MM-DD SN | Q-IDs | Created: <summary> |
+
+  Rule: <User story title>
+    As a <role>
+    I want <goal>
+    So that <benefit>
+
+@id:<unique-id>
+    Example: <Concrete scenario title>
+      Given <initial context>
+      When <event or action>
+      Then <observable outcome>
+
+    @deprecated @id:<unique-id>
+    Example: <Replaced scenario>
+      Given ...
+      When ...
+      Then ...
diff --git a/smith/data/.templates/docs/glossary.md.template b/smith/data/.templates/docs/glossary.md.template
new file mode 100644
index 0000000..c8fc332
--- /dev/null
+++ b/smith/data/.templates/docs/glossary.md.template
@@ -0,0 +1,36 @@
+# Glossary: <project-name>
+
+> Living glossary of domain terms used in this project.
+> Written and maintained by the Domain Expert during Discovery.
+> Append-only: never edit or remove past entries. If a term changes, mark it retired in favor of the new entry and write a new entry.
+> Code and tests take precedence over this glossary — if they diverge, refactor the code, not this file.
+
+---
+
+## Entry Format
+
+```
+## <Term>
+
+**Definition:** <one sentence — genus + differentia: "A [category] that [distinguishes it from others in that category]">
+
+**Aliases:** <deprecated synonyms the team should stop using, or "none">
+
+**Example:** <one sentence showing the term in use in this project; optional but encouraged>
+
+**Source:** <feature stem or discovery session date>
+```
+
+Entries are sorted alphabetically.
+
+---
+
+## <Term>
+
+**Definition:** <one sentence — "A [category] that [distinguishes it from others in that category]">
+
+**Aliases:** <deprecated synonyms, or "none">
+
+**Example:** <one sentence showing the term in use in this project; optional>
+
+**Source:** <feature stem (e.g. `display-version`) or discovery session date (e.g. `2025-01-15`)>
diff --git a/smith/data/.templates/docs/interview-notes/IN_YYYYMMDD_<slug>.md.template b/smith/data/.templates/docs/interview-notes/IN_YYYYMMDD_<slug>.md.template
new file mode 100644
index 0000000..91a911c
--- /dev/null
+++ b/smith/data/.templates/docs/interview-notes/IN_YYYYMMDD_<slug>.md.template
@@ -0,0 +1,59 @@
+# IN_YYYYMMDD_<slug> — <short-description>
+
+> **Status:** IN-PROGRESS | COMPLETE
+> **Interviewer:** PO
+> **Participant(s):** <stakeholder name(s)>
+> **Session type:** Initial discovery | Domain deep-dive | Scope refinement | Feature specification
+
+---
+
+## General
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q1 | Who are the users? | ... |
+| Q2 | What does the product do at a high level? | ... |
+| Q3 | Why does it exist — what problem does it solve? | ... |
+| Q4 | When and where is it used? | ... |
+| Q5 | Success — what does "done" look like? | ... |
+| Q6 | Failure — what must never happen? | ... |
+| Q7 | Out-of-scope — what are we explicitly not building? | ... |
+
+## <Group Name>
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q8 | ... | ... |
+
+## Feature: <feature-stem>
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q9 | ... | ... |
+
+---
+
+## Quality Attributes
+
+| ID | Attribute | Scenario | Target | Priority |
+|----|-----------|----------|--------|----------|
+| QA1 | <e.g. Performance / Availability / Security> | <stimulus-response: "When X happens, the system responds within Y"> | <measurable threshold> | <Must / Should / Nice> |
+| QA2 | ... | ... | ... | ... |
+
+---
+
+## Pain Points Identified
+
+- <Pain point from this session>
+
+## Business Goals Identified
+
+- <Goal from this session>
+
+## Terms to Define (for glossary)
+
+- <Term heard during interview that needs a glossary entry>
+
+## Action Items
+
+- [ ] <Follow-up action from this session>
diff --git a/smith/data/.templates/docs/post-mortem/PM_YYYYMMDD_<slug>.md.template b/smith/data/.templates/docs/post-mortem/PM_YYYYMMDD_<slug>.md.template
new file mode 100644
index 0000000..d97b55a
--- /dev/null
+++ b/smith/data/.templates/docs/post-mortem/PM_YYYYMMDD_<slug>.md.template
@@ -0,0 +1,21 @@
+# PM_YYYYMMDD_<slug>: <One-line failure>
+
+## Failed At
+
+<Which workflow step> — <who>: "<exact complaint>"
+
+## Root Cause
+
+<One sentence: what process gap allowed this?>
+
+## Missed Gate
+
+<Which step's gate failed and why>
+
+## Fix
+
+<Process change to prevent recurrence>
+
+## Restart Check
+
+<How SA verifies this mode is handled>
diff --git a/smith/data/.templates/docs/product_definition.md.template b/smith/data/.templates/docs/product_definition.md.template
new file mode 100644
index 0000000..ebc4a07
--- /dev/null
+++ b/smith/data/.templates/docs/product_definition.md.template
@@ -0,0 +1,131 @@
+# Product Definition: <project-name>
+
+> **Status:** ELICITING | BASELINED (YYYY-MM-DD)
+> Fill in each section during Discovery. Replace placeholders with project-specific content.
+> This document is the single source of truth for project scope and conventions.
+
+---
+
+## What <project-name> IS
+
+- <A [category] that [distinguishes it from others in that category]>
+- <A [category] that [distinguishes it from others in that category]>
+- <A [category] that [distinguishes it from others in that category]>
+
+## What <project-name> IS NOT
+
+- Does NOT <boundary that prevents scope creep>
+- Does NOT <boundary that prevents scope creep>
+- Does NOT <boundary that prevents scope creep>
+
+## Why does this exist
+
+<2–4 sentences: the problem, the gap in existing solutions, and the value this project delivers.>
+
+## Users
+
+- **<Persona A>** — <what they use the product for>
+- **<Persona B>** — <what they use the product for>
+
+## Quality Attributes
+
+<Measurable quality attribute scenarios that drive architectural decisions. Each scenario follows the form: stimulus → response → measure. **Rows are ordered by priority — the first row is the highest priority attribute.** This ordering must be consistent across all spec documents.>
+
+| Attribute | Scenario | Target | Priority |
+|-----------|----------|--------|----------|
+| <e.g. Performance> | <When X happens, the system responds within Y> | <measurable threshold> | <Must / Should / Nice> |
+| <e.g. Availability> | <If a component fails, the system remains available for Z> | <measurable threshold> | <Must / Should / Nice> |
+| <e.g. Security> | <Unauthorized access is prevented by mechanism M> | <measurable threshold> | <Must / Should / Nice> |
+
+---
+
+## Out of Scope
+
+- <Explicit non-goal>
+- <Explicit non-goal>
+
+## Delivery Order
+
+<Numbered list showing dependency chain between features. Update as features are discovered.>
+
+1 → 2 (<reason for dependency>)
+
+---
+
+## Project Conventions
+
+### Definition of Done
+
+All criteria must be met before a feature is considered done.
+
+**Development:**
+
+- [ ] All BDD scenarios from the .feature file pass
+- [ ] Quality Gate passes all three tiers (Design → Structure → Conventions)
+- [ ] Test coverage meets project threshold (≥ <X>%)
+- [ ] No test coupling — tests verify behavior, not structure
+- [ ] Production code follows priority order: YAGNI > DRY > KISS > OC > SOLID > Design Patterns
+- [ ] Code uses ubiquitous language from glossary.md
+
+**Review:**
+
+- [ ] CI pipeline passes all three tiers (Design → Structure → Conventions)
+- [ ] Code Review approved by R (independent reviewer, not the SE who wrote the code)
+- [ ] Acceptance Testing passed — PO verifies BDD scenarios behave as expected
+
+**Deployment:**
+
+- [ ] Release Verification checklist completed
+- [ ] Staging deployment healthy (if applicable)
+- [ ] CHANGELOG.md updated
+
+### Deployment
+
+<Define the deployment type and checklist for this project. Choose the relevant sections below and remove the rest. This section evolves as the project grows.>
+
+**Deployment type:** CLI | Web | API | Library | Desktop
+
+#### Common (all deployment types)
+
+- [ ] Version bumped in pyproject.toml
+- [ ] CHANGELOG.md updated with version and delivered scenarios
+- [ ] Git tag created (format: `v<semver>`)
+
+#### CLI / Library
+
+- [ ] Package builds without errors (`python -m build`)
+- [ ] Package published to PyPI (`twine upload dist/*`)
+- [ ] Installable from PyPI in clean environment
+
+#### Web / API
+
+- [ ] Environment variables set for target environment
+- [ ] Secrets rotated or verified (no secrets in code)
+- [ ] Database migrations tested against staging data
+- [ ] Rollback migration tested
+- [ ] Application starts without errors
+- [ ] Health endpoint responds
+- [ ] Staging deployment successful
+- [ ] Smoke tests pass against staging
+- [ ] BDD scenarios pass against staging
+- [ ] Production deployment successful
+- [ ] Error rate within normal bounds
+- [ ] Monitoring alerts configured
+
+#### Rollback Plan
+
+<Rollback procedure specific to this project's deployment type.>
+
+### Branch Strategy
+
+- **Convention:** Trunk-based (short-lived feature branches from trunk, PR before merge)
+- **Branch naming:** `<type>/<PROJ-123>-<short-description>` (e.g., `feature/PROJ-123-add-payment`)
+- **Merge policy:** Squash merge to trunk after approval
+
+---
+
+## Scope Changes
+
+| Date | Session | Change | Reason |
+|------|---------|--------|--------|
+| YYYY-MM-DD | <session ref> | <what changed> | <why> |
diff --git a/smith/data/.templates/docs/research/TEMPLATE.md.template b/smith/data/.templates/docs/research/TEMPLATE.md.template
new file mode 100644
index 0000000..756acd8
--- /dev/null
+++ b/smith/data/.templates/docs/research/TEMPLATE.md.template
@@ -0,0 +1,49 @@
+# Research Source Note Template
+
+One file per source, named `author_year.md` (or `firstauthor_et_al_year.md` for 3+ authors, `org_year.md` for organizations, `topic_synthesis.md` for synthesized entries).
+
+---
+
+# <Title> — <Author>, <Year>
+
+## Citation
+
+<Full bibliographic reference in APA format.>
+
+## Source Type
+
+<Academic Paper | Industry Standard | Practitioner Book | Blog/Article | Specification | Meta-analysis | Synthesis>
+
+## Method
+
+<Experiment | Observational | Theoretical | Survey | Case Study | Review>
+
+## Verification Status
+
+<Verified | Needs Verification | Disputed | Superseded>
+
+## Confidence
+
+<High | Moderate | Low | Very Low>
+
+## Key Insight
+
+<One sentence: the actionable takeaway.>
+
+## Core Findings
+
+1. <First distinct finding>
+2. <Second distinct finding>
+3. <...as many as the source warrants>
+
+## Mechanism
+
+<Why/how it works — the causal chain.>
+
+## Relevance
+
+<How this applies to our workflow, templates, or agent design.>
+
+## Related Research
+
+- <Bulleted list of connected sources with brief descriptions.>
\ No newline at end of file
diff --git a/smith/data/.templates/docs/system.md.template b/smith/data/.templates/docs/system.md.template
new file mode 100644
index 0000000..eab1cfd
--- /dev/null
+++ b/smith/data/.templates/docs/system.md.template
@@ -0,0 +1,114 @@
+# System Overview: <project-name>
+
+> Current-state description of the production system.
+> Updated by the Software Architect when domain understanding changes (rare).
+> Contains only completed features — nothing from backlog or in-progress.
+> This document captures what code cannot express: WHY contexts exist, HOW they relate, WHAT the aggregate boundaries are and why.
+
+---
+
+## Summary
+
+<3–5 sentence description of what the system currently does, who uses it, and its primary boundaries.>
+
+---
+
+## Delivery
+
+**Mechanism:** CLI | Web | API | Desktop | <other>
+
+<The mechanism by which users interact with the system. Reconcile against the discovery Q&A.>
+
+---
+
+## Context (C4 Level 1)
+
+### Actors
+
+| Actor | Description |
+|-------|-------------|
+| `<role>` | <description> |
+
+### Systems
+
+| System | Kind | Description |
+|--------|------|-------------|
+| `<name>` | Internal / External | <description> |
+
+### Interactions
+
+| Interaction | Behaviour | Technology |
+|-------------|-----------|------------|
+| `<from> → <to>` | <behaviour description> | <technology> |
+
+---
+
+## Container (C4 Level 2)
+
+### Boundary: <system-name>
+
+| Container | Technology | Responsibility |
+|-----------|------------|----------------|
+| `<name>` | <technology> | <responsibility> |
+
+### Interactions
+
+| Interaction | Behaviour |
+|-------------|-----------|
+| `<from> → <to>` | <behaviour description> |
+
+---
+
+## Module Structure
+
+| Module | Responsibility | Bounded Context |
+|--------|----------------|-----------------|
+| `<module>` | <responsibility> | <Context> |
+
+---
+
+## Domain Model Documentation
+
+### Why Each Context Exists
+
+| Bounded Context | Business Capability | Why It's Separate |
+|-----------------|---------------------|-------------------|
+| `<Context>` | <what business need it serves> | <why it can't be merged with another context> |
+
+### Aggregate Boundary Rationale
+
+| Aggregate | Why These Entities Are Grouped | Transactional Invariant |
+|-----------|-------------------------------|------------------------|
+| `<Aggregate>` | <business reason for the boundary> | <what must always be true within this aggregate> |
+
+---
+
+## Active Constraints
+
+- <system-wide constraint from ADRs or completed features>
+
+---
+
+## Key Decisions
+
+- <system-wide decision from ADRs — one bullet per decision>
+
+---
+
+## ADRs
+
+See `docs/adr/` for the full decision record.
+
+---
+
+## Completed Features
+
+See `docs/features/` for accepted features.
+
+---
+
+## Changes
+
+| Date | Source | Change | Reason |
+|------|--------|--------|--------|
+| YYYY-MM-DD | <ADR or feature ref> | <what changed> | <why> |
diff --git a/smith/data/.templates/docs/technical_design.md.template b/smith/data/.templates/docs/technical_design.md.template
new file mode 100644
index 0000000..dfd335e
--- /dev/null
+++ b/smith/data/.templates/docs/technical_design.md.template
@@ -0,0 +1,139 @@
+# Technical Design: <project-name>
+
+> Technical design document for the current feature or initiative.
+> Updated by the Software Architect when stack, contracts, or interfaces change.
+> Contract-first design: API and event schemas are defined here before implementation begins.
+
+---
+
+## Feature
+
+<Reference to the .feature file(s) this design serves.>
+
+---
+
+## Architectural Style
+
+**Style:** <Monolithic / Microservices / Event-Driven / Hexagonal / Serverless / Layered>
+
+**Rationale:** <why this style fits the quality attributes and domain constraints>
+
+---
+
+## Quality Attributes
+
+| Attribute | Architectural Decision | ADR Ref |
+|-----------|----------------------|---------|
+| <e.g. Performance> | <which part of the design addresses this> | <ADR-YYYY-MM-DD-slug> |
+
+---
+
+## Stack
+
+| Layer | Technology | Version | Rationale |
+|-------|-----------|---------|-----------|
+| Language | Python | 3.x | <why> |
+| Framework | <name> | <version> | <why> |
+| Database | <name> | <version> | <why> |
+| Messaging | <name> | <version> | <why> |
+
+---
+
+## Module Structure
+
+```
+src/
+  <bounded-context>/
+    domain/         # Entities, value objects, aggregates, domain services
+    application/    # Use cases, application services
+    infrastructure/ # Repositories, external service adapters
+    api/            # Entry points (CLI, REST, gRPC)
+```
+
+---
+
+## API Contracts
+
+### <Endpoint/Command Name>
+
+**Method:** GET | POST | PUT | DELETE | <command>
+
+**Path:** `/<path>`
+
+**Request:**
+```json
+{
+  "field": "type"
+}
+```
+
+**Response:**
+```json
+{
+  "field": "type"
+}
+```
+
+**Errors:**
+| Code | Meaning |
+|------|---------|
+| <code> | <description> |
+
+---
+
+## Event Contracts
+
+### <Event Name>
+
+**Schema:**
+```json
+{
+  "event_type": "<EventName>",
+  "aggregate_id": "uuid",
+  "payload": {}
+}
+```
+
+**Produced by:** `<module>/<service>`
+**Consumed by:** `<module>/<service>`
+
+---
+
+## Interface Definitions
+
+### <Interface Name>
+
+```python
+class <InterfaceName>(Protocol):
+    def <method>(self, <param>: <type>) -> <return_type>: ...
+```
+
+---
+
+## C4 Diagrams
+
+<Reference or embed C4 diagrams. These may overlap with system.md — this document focuses on the current feature's technical design, system.md focuses on the overall system state.>
+
+---
+
+## Dependencies
+
+| Dependency | What it provides | Why not replaced |
+|------------|------------------|-----------------|
+| `<dep>` | <purpose> | <reason> |
+
+---
+
+## Configuration Keys
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `<key>` | string | `"<default>"` | <description> |
+
+---
+
+## Changes
+
+| Date | Source | Change | Reason |
+|------|--------|--------|--------|
+| YYYY-MM-DD | <ADR or feature ref> | <what changed> | <why> |
diff --git a/smith/data/.templates/tests/features/<rule_slug>_test.py.template b/smith/data/.templates/tests/features/<rule_slug>_test.py.template
new file mode 100644
index 0000000..9bb188d
--- /dev/null
+++ b/smith/data/.templates/tests/features/<rule_slug>_test.py.template
@@ -0,0 +1,11 @@
+import pytest
+
+
+@pytest.mark.skip(reason="not yet implemented")
+def test_<feature_stem>_<id>() -> None:
+    """
+    Given <initial context>
+    When <event or action>
+    Then <observable outcome>
+    """
+    raise NotImplementedError
\ No newline at end of file
diff --git a/smith/data/AGENTS.md b/smith/data/AGENTS.md
new file mode 100644
index 0000000..0cd9d76
--- /dev/null
+++ b/smith/data/AGENTS.md
@@ -0,0 +1,136 @@
+# Agents-Smith
+
+## Project Structure
+- `.flowr/flows/` — YAML state machine definitions (source of truth for routing)
+- `.flowr/sessions/` — runtime session state
+- `.templates/` — artifact templates (strip `.templates/` prefix and `.template` suffix → destination path)
+- `.opencode/` — agents, skills, and knowledge
+
+## Artifact Templates
+
+When creating a document, use the template in `.templates/` that matches the artifact type. Strip the `.templates/` prefix and `.template` suffix to determine the destination path. For example:
+- `.templates/docs/adr/ADR_YYYYMMDD_<slug>.md.template` → `docs/adr/ADR_20260430_my-decision.md`
+- `.templates/docs/features/feature.feature.template` → `docs/features/my-feature.feature`
+- `.templates/docs/interview-notes/IN_YYYYMMDD_<slug>.md.template` → `docs/interview-notes/IN_20260430_session-management.md`
+
+If no template exists for an artifact type, create the document without one.
+
+## Knowledge Resolution
+`[[domain/concept]]` → `.opencode/knowledge/{domain}/{concept}.md`
+
+### Progressive Knowledge Loading
+
+Knowledge files use 4-section progressive disclosure. Choose the level that matches the task depth:
+
+| Fragment | Loads | When to Use |
+|----------|-------|-------------|
+| `#key-takeaways` | Frontmatter + Key Takeaways | Quick reference or reminders when knowledge is already familiar |
+| `#concepts` | Frontmatter + Key Takeaways + Concepts | Understanding concepts without detailed examples or procedures |
+| (no fragment) | Entire file | Performing evaluation, review, or implementation that needs detection heuristics, examples, tables, and procedures |
+
+**Rule of thumb:** If the agent needs to **find violations, detect patterns, or apply detailed criteria**, load the full document. If it only needs to **recall a principle or definition**, `#key-takeaways` is sufficient.
+
+### Extraction Commands
+
+```bash
+sed '/^## Concepts/Q' file.md    # Frontmatter + Key Takeaways only
+sed '/^## Content/Q' file.md     # Frontmatter + Key Takeaways + Concepts
+cat file.md                       # Full document
+```
+
+Examples:
+- `[[requirements/invest#key-takeaways]]` — quick reference for INVEST criteria
+- `[[requirements/invest#concepts]]` — understanding what each letter means with context
+- `[[software-craft/smell-catalogue]]` — full catalogue needed to detect code smells during review
+
+## Discovery
+Do not enumerate files — they go stale. Discover what exists at runtime:
+
+```bash
+ls .opencode/agents/                    # agent identity definitions
+ls .opencode/skills/                    # skill directories (each has SKILL.md)
+find .opencode/knowledge -name '*.md'   # knowledge files
+find .templates -name '*.template'   # artifact templates
+find docs/research -name '*.md'          # research source notes (cited by knowledge files)
+```
+
+## File Naming Conventions
+
+### Artifact Names in Flow Attrs
+
+Artifact names in `in` and `out` lists use these conventions:
+
+| Pattern | Meaning | Example |
+|---------|---------|---------|
+| `filename.md` | A specific document | `domain_model.md`, `product_definition.md` |
+| `dir/<param>.ext` | A specific instance identified by parameter | `features/<feature_name>.feature`, `interview-notes/<session>.md`, `adr/<slug>.md` |
+| `dir/*.ext` | Multiple documents of that type available in `in` | `interview-notes/*.md`, `adr/*.md` |
+| `conceptual_name` | A runtime artifact that passes between states within a flow | `typed_source_stubs`, `test_implementations` |
+
+**Wildcards (`*`)** in `in` indicate that multiple documents of that type are available. List the directory contents first, then read selectively based on the task. When a state creates a single instance, use a `<parameter>` name instead.
+
+**Runtime artifacts** (not backed by files) use descriptive names that make their purpose clear: `typed_source_stubs` (source files with type signatures only), `test_skeletons` (test files with structure only), `test_implementations` (tests with bodies), `source_implementations` (production code with behavior), `refactored_source` (code after refactoring pass), `feature_commits` (git commits for one feature), `merged_commits` (commits merged to local main), `root_cause_analysis` (analysis findings).
+
+**Environment artifacts** are produced by tooling rather than flow states: `coverage_reports` (test coverage output), `test_output` (test runner output), `linter_output` (linter output). These exist on disk after running the relevant tool and are referenced in `in` but not in any state's `out`.
+
+## Flowr Commands
+
+All commands require the virtual environment: `source .venv/bin/activate`
+
+| Command | Purpose |
+|---------|---------|
+| `python -m flowr validate <flow>.yaml` | Validate a flow definition |
+| `python -m flowr validate` (no arg) | Validate all flows |
+| `python -m flowr states <flow>.yaml` | List all states in a flow |
+| `python -m flowr next <flow>.yaml <state>` | Show valid transitions from a state |
+| `python -m flowr transition <flow>.yaml <state> <trigger>` | Compute next state given a trigger |
+| `python -m flowr transition <flow>.yaml <state> <trigger> --evidence key=value` | Compute next state with condition evidence |
+| `python -m flowr check <flow>.yaml <state> [target]` | Check condition guards for a state |
+| `python -m flowr mermaid <flow>.yaml` | Export flow as Mermaid diagram |
+| `./scripts/flowr-utils.sh validate [flow]` | Validate flow(s) via wrapper script |
+| `./scripts/flowr-utils.sh view <flow>` | View flow as Mermaid diagram |
+| `./scripts/flowr-utils.sh list` | List all available flows |
+| `./scripts/flowr-utils.sh graph` | Generate interactive D3.js visualization |
+
+## Project Commands
+
+Check `pyproject.toml` for taskipy tasks and tool configuration. Common commands:
+
+| Command | Purpose |
+|---------|---------|
+| `task test` | Run tests with short tracebacks |
+| `task test-fast` | Run fast tests only (excludes slow marker) |
+| `task test-coverage` | Run tests with coverage report |
+| `task test-build` | Run full test suite with coverage, hypothesis stats, and HTML report |
+| `task run` | Run the application |
+
+Linting and formatting:
+
+| Command | Purpose |
+|---------|---------|
+| `ruff check .` | Lint check |
+| `ruff format .` | Auto-format |
+| `ruff check --fix .` | Auto-fix lint issues |
+
+## Session Protocol
+1. `flowr status` → current state, owner, skills, transitions
+2. Announce the state (one line), then load the skill and do the work
+3. `flowr advance <transition>` to move to next state
+
+### Within a State
+
+Announce the state once at the top, then go quiet:
+
+- **State entry:** One line stating where you are — e.g. `→ specify-feature`. No preamble, no recap of how you got there.
+- **Dispatch to the owner agent:** The state's `owner` field names the responsible agent. Call that agent as a subagent with the state's `skills` loaded, passing the state attrs as context. Owner mapping: `PO` → product-owner, `DE` → domain-expert, `SE` → software-engineer, `SA` → system-architect, `R` → reviewer, `Design Agent` → design-agent, `Setup Agent` → setup-agent.
+- **Respect the artifact contract:** The state's attrs define what the owner agent may read and write:
+  - `in`: Read-only context. List what's available first, then read only what the task requires. No section specifications.
+  - `out`: May create or edit. Section sub-lists indicate which sections the state should produce or update.
+  - Files not in `out` must not be written to. If findings affect an artifact outside the output contract, flag them in output notes and defer the change to the step that owns that artifact.
+  - The flow contract must always be followed unless the stakeholder explicitly asks to break it.
+  - **Artifact existence guarantee:** When a flow state needs a file artifact that does not yet exist, it is created from the matching template in `.templates/` (if one exists). If no template exists for a non-Python file referenced in `in`/`out`, raise an error for the stakeholder to decide. Files are then updated when a state writes to them or their sections. Environment artifacts (e.g., `coverage_reports`, `test_output`, `linter_output`) are produced by tooling rather than flow states — they exist on disk after running the relevant tool and are referenced in `in` but not in any state's `out`.
+- **Read inputs on demand, not eagerly.** When `in` lists artifacts, discover what's available first (`ls`, `find`), then read only the files and sections needed for the current task. The `in` list defines what you *may* read, not what you *must* read up front. This applies to all files — spec documents, production code, and test code. List directories first, read selectively.
+- **Specification documents are read-only during development.** During TDD and review cycles, the SE and reviewer may ONLY modify production code and test code. Spec document inconsistencies must be FLAGGED in output notes, not fixed directly. Spec docs are owned by other flow states and can only be changed through the appropriate flow step — after code is reviewed and approved.
+- **Do the work with the fewest, quietest commands.** Suppress verbose output. If a command can be scoped with a flag, pipe, or limit — use it. Don't dump full files or directory listings when a targeted query answers the question.
+- **No narration between steps.** The command and its output are the conversation. Don't echo what you're about to do or what you just did.
+- **State exit:** `flowr advance` and the transition name. Nothing more.
\ No newline at end of file
diff --git a/smith/data/__init__.py b/smith/data/__init__.py
new file mode 100644
index 0000000..3380bcd
--- /dev/null
+++ b/smith/data/__init__.py
@@ -0,0 +1 @@
+"""Bundled template data package."""
diff --git a/smith/delivery/__init__.py b/smith/delivery/__init__.py
new file mode 100644
index 0000000..89cb49a
--- /dev/null
+++ b/smith/delivery/__init__.py
@@ -0,0 +1 @@
+"""Delivery layer — CLI and other entry points."""
diff --git a/smith/delivery/cli.py b/smith/delivery/cli.py
new file mode 100644
index 0000000..7f653e8
--- /dev/null
+++ b/smith/delivery/cli.py
@@ -0,0 +1,170 @@
+"""CLI entry point for the smith command-line tool."""
+
+import argparse
+import sys
+from pathlib import Path
+
+from smith.application.connect import ConnectUseCase
+from smith.application.disconnect import DisconnectUseCase
+from smith.application.status import StatusUseCase
+from smith.application.update import UpdateUseCase
+from smith.domain.ports import TemplateSourceError
+from smith.domain.value_objects import TemplateSource
+
+EXIT_SUCCESS = 0
+EXIT_ERROR = 1
+
+
+def build_parser() -> argparse.ArgumentParser:
+    """Build and return the argument parser for the smith CLI."""
+    parser = argparse.ArgumentParser(
+        prog="smith", description="Connect AI agent configurations to any project"
+    )
+    parser.add_argument("--version", action="version", version=_get_version())
+
+    subparsers = parser.add_subparsers(dest="command", help="Available commands")
+
+    connect_parser = subparsers.add_parser(
+        "connect", help="Connect agentic files to a project"
+    )
+    connect_parser.add_argument(
+        "--from",
+        dest="source",
+        help="Template source (bundled:agents-smith, local path, or URL)",
+    )
+    connect_parser.add_argument(
+        "--overwrite",
+        action="store_true",
+        dest="overwrite",
+        help="Replace existing agentic files without prompting",
+    )
+    connect_parser.set_defaults(func=handle_connect)
+
+    disconnect_parser = subparsers.add_parser(
+        "disconnect", help="Disconnect agentic files from a project"
+    )
+    disconnect_parser.set_defaults(func=handle_disconnect)
+
+    update_parser = subparsers.add_parser("update", help="Update agentic files")
+    update_parser.add_argument(
+        "--from",
+        dest="source",
+        help="Template source (bundled:agents-smith, local path, or URL)",
+    )
+    update_parser.set_defaults(func=handle_update)
+
+    status_parser = subparsers.add_parser("status", help="Show connection status")
+    status_parser.add_argument("--json", action="store_true", help="Output as JSON")
+    status_parser.set_defaults(func=handle_status)
+
+    return parser
+
+
+def _get_version() -> str:
+    """Return the current smith version string."""
+    try:
+        from importlib.metadata import metadata
+
+        meta = metadata("agents-smith")
+        return f"smith {meta['Version']}"
+    except Exception:
+        return "smith 0.1.0"
+
+
+def _parse_source(source_arg: str | None) -> TemplateSource:
+    """Parse a source argument string into a TemplateSource value object."""
+    if source_arg is None:
+        return TemplateSource(kind="bundled", location="agents-smith")
+    if source_arg.startswith("/"):
+        return TemplateSource(kind="local", location=source_arg)
+    if source_arg.startswith("http://") or source_arg.startswith("https://"):
+        return TemplateSource(kind="url", location=source_arg)
+    if ":" in source_arg:
+        kind, _, location = source_arg.partition(":")
+        return TemplateSource(kind=kind, location=location)  # type: ignore[arg-type]
+    return TemplateSource(kind="local", location=source_arg)
+
+
+def handle_connect(args: argparse.Namespace) -> int:
+    """Handle the ``connect`` sub-command."""
+    project_dir = Path(getattr(args, "project_dir", ".")).resolve()
+    source = _parse_source(getattr(args, "source", None))
+    overwrite = getattr(args, "overwrite", False)
+    try:
+        ConnectUseCase(project_dir=project_dir).execute(
+            source=source, overwrite=overwrite
+        )
+        return EXIT_SUCCESS
+    except TemplateSourceError as e:
+        sys.stderr.write(f"Error: {e}\n")
+        return EXIT_ERROR
+    except Exception as e:
+        sys.stderr.write(f"Error: {e}\n")
+        return EXIT_ERROR
+
+
+def handle_disconnect(args: argparse.Namespace) -> int:
+    """Handle the ``disconnect`` sub-command."""
+    project_dir = Path(getattr(args, "project_dir", ".")).resolve()
+    try:
+        DisconnectUseCase(project_dir=project_dir).execute()
+        return EXIT_SUCCESS
+    except Exception as e:
+        sys.stderr.write(f"Error: {e}\n")
+        return EXIT_ERROR
+
+
+def handle_update(args: argparse.Namespace) -> int:
+    """Handle the ``update`` sub-command."""
+    project_dir = Path(getattr(args, "project_dir", ".")).resolve()
+    source = _parse_source(getattr(args, "source", None))
+    try:
+        UpdateUseCase(project_dir=project_dir).execute(
+            source=source if args.source else None
+        )
+        return EXIT_SUCCESS
+    except TemplateSourceError as e:
+        sys.stderr.write(f"Error: {e}\n")
+        return EXIT_ERROR
+    except Exception as e:
+        sys.stderr.write(f"Error: {e}\n")
+        return EXIT_ERROR
+
+
+def handle_status(args: argparse.Namespace) -> int:
+    """Handle the ``status`` sub-command."""
+    project_dir = Path(getattr(args, "project_dir", ".")).resolve()
+    try:
+        status = StatusUseCase(project_dir=project_dir).execute()
+        if getattr(args, "json", False):
+            import json
+
+            sys.stdout.write(json.dumps(status.to_dict(), indent=2) + "\n")
+        else:
+            sys.stdout.write(f"State: {status.state.value}\n")
+            if status.source:
+                sys.stdout.write(
+                    f"Source: {status.source.kind}:{status.source.location}\n"
+                )
+            if status.present_files:
+                sys.stdout.write("Present files:\n")
+                for f in status.present_files:
+                    sys.stdout.write(f"  {f}\n")
+            if status.missing_files:
+                sys.stdout.write("Missing files:\n")
+                for f in status.missing_files:
+                    sys.stdout.write(f"  {f}\n")
+        return EXIT_SUCCESS
+    except Exception as e:
+        sys.stderr.write(f"Error: {e}\n")
+        return EXIT_ERROR
+
+
+def main(argv: list[str] | None = None) -> int:
+    """Run the smith CLI and return an exit code."""
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    if not hasattr(args, "func"):
+        parser.print_help()
+        return EXIT_ERROR
+    return args.func(args)
diff --git a/smith/domain/__init__.py b/smith/domain/__init__.py
new file mode 100644
index 0000000..a92a617
--- /dev/null
+++ b/smith/domain/__init__.py
@@ -0,0 +1 @@
+"""Domain layer — core business logic and value objects."""
diff --git a/smith/domain/connection.py b/smith/domain/connection.py
new file mode 100644
index 0000000..d1f5b5d
--- /dev/null
+++ b/smith/domain/connection.py
@@ -0,0 +1,153 @@
+"""Connection aggregate — core domain logic for connect/disconnect/update/status."""
+
+from pathlib import Path
+
+from smith.domain.ports import (
+    FileSystemPort,
+    GitignorePort,
+    MetadataPort,
+    TemplateSourcePort,
+)
+from smith.domain.value_objects import (
+    ConnectionState,
+    ConnectionStatus,
+    FileSpec,
+    TemplateSource,
+)
+
+
+class Connection:
+    """Domain aggregate that manages the lifecycle of agentic file connections."""
+
+    def __init__(
+        self,
+        template_source_port: TemplateSourcePort,
+        filesystem_port: FileSystemPort,
+        gitignore_port: GitignorePort,
+        metadata_port: MetadataPort,
+    ) -> None:
+        """Initialise the Connection with its required ports."""
+        self._template_source_port = template_source_port
+        self._filesystem_port = filesystem_port
+        self._gitignore_port = gitignore_port
+        self._metadata_port = metadata_port
+
+    def connect(
+        self,
+        source: TemplateSource,
+        overwrite: bool = False,
+    ) -> None:
+        """Write agentic files and register the connection in .gitignore."""
+        specs = self._template_source_port.resolve()
+        paths = [spec.relative_path for spec in specs]
+
+        specs = self._resolve_specs(specs, paths, overwrite)
+        self._commit(specs, source)
+
+    def _commit(self, specs: list[FileSpec], source: TemplateSource) -> None:
+        self._filesystem_port.write_atomic(specs)
+        self._gitignore_port.add_section(
+            self._template_source_port.gitignore_patterns()
+        )
+        self._metadata_port.save_source(source)
+
+    def _resolve_specs(
+        self,
+        specs: list[FileSpec],
+        paths: list[Path],
+        overwrite: bool = False,
+    ) -> list[FileSpec]:
+        conflicting = self._filesystem_port.check_conflicts(paths)
+        if not conflicting:
+            return specs
+
+        unmanaged_existing = {
+            p
+            for p in conflicting
+            if not self._is_path_managed(p, self._gitignore_port.get_patterns())
+        }
+
+        if overwrite:
+            return [s for s in specs if s.relative_path not in unmanaged_existing]
+
+        if not unmanaged_existing:
+            return specs
+        return [s for s in specs if s.relative_path not in unmanaged_existing]
+
+    @staticmethod
+    def _is_path_managed(path: Path, managed_patterns: list[str]) -> bool:
+        path_str = str(path)
+        return any(
+            path_str == pattern or path_str.startswith(pattern)
+            for pattern in managed_patterns
+        )
+
+    def disconnect(self) -> list[Path]:
+        """Remove agentic files and the .gitignore section; return removed paths."""
+        if not self._gitignore_port.has_section():
+            return []
+
+        managed_patterns = self._gitignore_port.get_patterns()
+        if not managed_patterns:
+            return []
+
+        all_template_specs = self._template_source_port.resolve()
+        all_template_paths = [spec.relative_path for spec in all_template_specs]
+
+        managed_paths = [
+            p for p in all_template_paths if self._is_path_managed(p, managed_patterns)
+        ]
+
+        existence = self._filesystem_port.exists(managed_paths)
+        paths_to_remove = [p for p in managed_paths if existence.get(p, False)]
+
+        self._filesystem_port.remove(paths_to_remove)
+        return paths_to_remove
+
+    def update(
+        self,
+        source: TemplateSource | None = None,
+    ) -> None:
+        """Refresh agentic files, optionally from a new template source."""
+        if not self._gitignore_port.has_section():
+            fallback_source = source or TemplateSource(
+                kind="bundled", location="agents-smith"
+            )
+            return self.connect(source=fallback_source)
+
+        resolved_source = (
+            source
+            or self._metadata_port.load_source()
+            or TemplateSource(kind="bundled", location="agents-smith")
+        )
+
+        specs = self._template_source_port.resolve()
+        paths = [spec.relative_path for spec in specs]
+
+        specs = self._resolve_specs(specs, paths)
+
+        self._commit(specs, resolved_source)
+        return None
+
+    def status(self) -> ConnectionStatus:
+        """Return the current connection status of the project."""
+        source = self._metadata_port.load_source()
+        all_specs = self._template_source_port.resolve()
+        all_paths = [spec.relative_path for spec in all_specs]
+        existence = self._filesystem_port.exists(all_paths)
+        present_files = [p for p in all_paths if existence.get(p, False)]
+        missing_files = [p for p in all_paths if not existence.get(p, False)]
+
+        if not self._gitignore_port.has_section() or not present_files:
+            state = ConnectionState.DISCONNECTED
+        elif missing_files:
+            state = ConnectionState.PARTIAL
+        else:
+            state = ConnectionState.CONNECTED
+
+        return ConnectionStatus(
+            state=state,
+            source=source,
+            present_files=present_files,
+            missing_files=missing_files,
+        )
diff --git a/smith/domain/ports.py b/smith/domain/ports.py
new file mode 100644
index 0000000..39ead75
--- /dev/null
+++ b/smith/domain/ports.py
@@ -0,0 +1,73 @@
+"""Port definitions — interfaces that infrastructure adapters must implement."""
+
+from pathlib import Path
+from typing import Protocol
+
+from smith.domain.value_objects import (
+    FileSpec,
+    TemplateSource,
+)
+
+
+class TemplateSourceError(Exception):
+    """Raised when a template source cannot be resolved."""
+
+
+class TemplateSourcePort(Protocol):
+    """Interface for resolving template file specifications."""
+
+    def resolve(self) -> list[FileSpec]:
+        """Return the list of file specs from this template source."""
+        ...
+
+    def gitignore_patterns(self) -> list[str]:
+        """Return the gitignore patterns for this template source."""
+        ...
+
+
+class FileSystemPort(Protocol):
+    """Interface for atomic file-system operations."""
+
+    def check_conflicts(self, paths: list[Path]) -> list[Path]:
+        """Return the subset of *paths* that already exist on disk."""
+        ...
+
+    def write_atomic(self, specs: list[FileSpec]) -> None:
+        """Write all specs atomically; roll back on failure."""
+        ...
+
+    def remove(self, paths: list[Path]) -> None:
+        """Remove the given paths from the project directory."""
+        ...
+
+    def exists(self, paths: list[Path]) -> dict[Path, bool]:
+        """Return a mapping of each path to whether it exists on disk."""
+        ...
+
+
+class GitignorePort(Protocol):
+    """Interface for managing the smith-managed .gitignore section."""
+
+    def add_section(self, patterns: list[str]) -> None:
+        """Add or replace the smith-managed section in .gitignore."""
+        ...
+
+    def has_section(self) -> bool:
+        """Return whether a smith-managed section exists in .gitignore."""
+        ...
+
+    def get_patterns(self) -> list[str]:
+        """Return the patterns listed inside the smith-managed section."""
+        ...
+
+
+class MetadataPort(Protocol):
+    """Interface for persisting and loading template source metadata."""
+
+    def save_source(self, source: TemplateSource) -> None:
+        """Write the source identifier into the smith-managed section header."""
+        ...
+
+    def load_source(self) -> TemplateSource | None:
+        """Read the source identifier from the smith-managed section header."""
+        ...
diff --git a/smith/domain/value_objects.py b/smith/domain/value_objects.py
new file mode 100644
index 0000000..b5ccbb8
--- /dev/null
+++ b/smith/domain/value_objects.py
@@ -0,0 +1,62 @@
+"""Domain value objects — immutable data structures used across the domain."""
+
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+from typing import Literal
+
+
+@dataclass(frozen=True)
+class FileSpec:
+    """A template file's relative path and binary content."""
+
+    relative_path: Path
+    content: bytes
+
+
+@dataclass(frozen=True)
+class TemplateSource:
+    """Identifies where a template originates (bundled, local, or URL)."""
+
+    kind: Literal["bundled", "local", "url"]
+    location: str
+
+
+class ConnectionState(Enum):
+    """Possible states of a project's connection to a template source."""
+
+    CONNECTED = "connected"
+    DISCONNECTED = "disconnected"
+    PARTIAL = "partial"
+
+
+@dataclass(frozen=True)
+class ConnectionStatus:
+    """Snapshot of a project's connection state and file presence."""
+
+    state: ConnectionState
+    source: TemplateSource | None
+    present_files: list[Path]
+    missing_files: list[Path]
+
+    def to_dict(self) -> dict:
+        """Serialise the status to a plain dictionary."""
+        return {
+            "state": self.state.value,
+            "source": (
+                {"kind": self.source.kind, "location": self.source.location}
+                if self.source
+                else None
+            ),
+            "present_files": [str(p) for p in self.present_files],
+            "missing_files": [str(p) for p in self.missing_files],
+        }
+
+
+@dataclass(frozen=True)
+class GitignoreSection:
+    """A delimited block of patterns inside .gitignore managed by smith."""
+
+    patterns: list[str]
+    start_marker: str = "# smith managed"
+    end_marker: str = "# end smith managed"
diff --git a/smith/infrastructure/__init__.py b/smith/infrastructure/__init__.py
new file mode 100644
index 0000000..f5d5831
--- /dev/null
+++ b/smith/infrastructure/__init__.py
@@ -0,0 +1 @@
+"""Infrastructure layer — adapters for external services."""
diff --git a/smith/infrastructure/filesystem.py b/smith/infrastructure/filesystem.py
new file mode 100644
index 0000000..4af58d0
--- /dev/null
+++ b/smith/infrastructure/filesystem.py
@@ -0,0 +1,54 @@
+"""Atomic file-system adapter — write files transactionally or remove them."""
+
+import shutil
+import tempfile
+from pathlib import Path
+
+from smith.domain.value_objects import FileSpec
+
+
+class FileSystemError(Exception):
+    """Raised when an atomic file operation fails."""
+
+
+class AtomicFileSystem:
+    """File-system adapter that writes files atomically via a staging directory."""
+
+    def __init__(self, project_dir: Path) -> None:
+        """Initialise with the project root directory."""
+        self._project_dir = project_dir
+
+    def check_conflicts(self, paths: list[Path]) -> list[Path]:
+        """Return the subset of *paths* that already exist on disk."""
+        return [p for p in paths if (self._project_dir / p).exists()]
+
+    def write_atomic(self, specs: list[FileSpec]) -> None:
+        """Write all specs atomically; roll back on failure."""
+        if not specs:
+            return
+        staging = Path(tempfile.mkdtemp(dir=self._project_dir))
+        try:
+            for spec in specs:
+                dest = staging / spec.relative_path
+                dest.parent.mkdir(parents=True, exist_ok=True)
+                dest.write_bytes(spec.content)
+            for spec in specs:
+                final = self._project_dir / spec.relative_path
+                final.parent.mkdir(parents=True, exist_ok=True)
+                Path(staging / spec.relative_path).replace(final)
+        except Exception as err:
+            shutil.rmtree(staging, ignore_errors=True)
+            raise FileSystemError("Atomic write failed") from err
+        else:
+            shutil.rmtree(staging, ignore_errors=True)
+
+    def remove(self, paths: list[Path]) -> None:
+        """Remove the given paths from the project directory."""
+        for p in paths:
+            full = self._project_dir / p
+            if full.exists():
+                full.unlink()
+
+    def exists(self, paths: list[Path]) -> dict[Path, bool]:
+        """Return a mapping of each path to whether it exists on disk."""
+        return {p: (self._project_dir / p).exists() for p in paths}
diff --git a/smith/infrastructure/gitignore.py b/smith/infrastructure/gitignore.py
new file mode 100644
index 0000000..0be6b24
--- /dev/null
+++ b/smith/infrastructure/gitignore.py
@@ -0,0 +1,94 @@
+"""Gitignore adapter — manage the smith-managed section in .gitignore."""
+
+from pathlib import Path
+
+from smith.domain.value_objects import GitignoreSection
+
+START_MARKER = "# smith managed"
+END_MARKER = "# end smith managed"
+
+
+class GitignoreManager:
+    """Read and mutate the smith-managed section of a project's .gitignore."""
+
+    def __init__(self, project_dir: Path) -> None:
+        """Initialise with the project root directory."""
+        self._gitignore = project_dir / ".gitignore"
+
+    def add_section(self, patterns: list[str]) -> None:
+        """Add or replace the smith-managed section in .gitignore."""
+        section = GitignoreSection(patterns=patterns)
+        lines = self._read_lines()
+        if self._find_section_bounds(lines) is not None:
+            self._replace_section(lines, section)
+        else:
+            self._append_section(lines, section)
+
+    def has_section(self) -> bool:
+        """Return whether a smith-managed section exists in .gitignore."""
+        lines = self._read_lines()
+        return self._find_section_bounds(lines) is not None
+
+    def get_patterns(self) -> list[str]:
+        """Return the patterns listed inside the smith-managed section."""
+        lines = self._read_lines()
+        bounds = self._find_section_bounds(lines)
+        if bounds is None:
+            return []
+        start, end = bounds
+        patterns = []
+        for line in lines[start + 1 : end]:
+            stripped = line.strip()
+            if stripped and not stripped.startswith("#"):
+                patterns.append(stripped)
+        return patterns
+
+    def _read_lines(self) -> list[str]:
+        if not self._gitignore.exists():
+            return []
+        return self._gitignore.read_text().splitlines(keepends=True)
+
+    def _write_lines(self, lines: list[str]) -> None:
+        content = "".join(lines)
+        if content and not content.endswith("\n"):
+            content += "\n"
+        self._gitignore.write_text(content)
+
+    def _find_section_bounds(self, lines: list[str]) -> tuple[int, int] | None:
+        start = None
+        for i, line in enumerate(lines):
+            if line.strip().startswith(START_MARKER):
+                start = i
+                break
+        if start is None:
+            return None
+        for i in range(start + 1, len(lines)):
+            if lines[i].strip() == END_MARKER:
+                return (start, i)
+        return None
+
+    def _replace_section(self, lines: list[str], section: GitignoreSection) -> None:
+        bounds = self._find_section_bounds(lines)
+        if bounds is None:
+            self._append_section(lines, section)
+            return
+        start, end = bounds
+        header = lines[start].rstrip("\n")
+        source_match = [p for p in header.split() if p.startswith("source:")]
+        source_part = f" {source_match[0]}" if source_match else ""
+        new_section = [f"{START_MARKER}{source_part}\n"]
+        for p in section.patterns:
+            new_section.append(f"{p}\n")
+        new_section.append(f"{END_MARKER}\n")
+        lines[start : end + 1] = new_section
+        self._write_lines(lines)
+
+    def _append_section(self, lines: list[str], section: GitignoreSection) -> None:
+        new_lines = [f"{START_MARKER}\n"]
+        for p in section.patterns:
+            new_lines.append(f"{p}\n")
+        new_lines.append(f"{END_MARKER}\n")
+        if lines and lines[-1].strip():
+            new_lines.insert(0, "\n")
+        lines.extend(new_lines)
+        self._write_lines(lines)
diff --git a/smith/infrastructure/metadata.py b/smith/infrastructure/metadata.py
new file mode 100644
index 0000000..4d44dfb
--- /dev/null
+++ b/smith/infrastructure/metadata.py
@@ -0,0 +1,40 @@
+"""Metadata adapter — persist and load template source info in .gitignore."""
+
+from pathlib import Path
+
+from smith.domain.value_objects import TemplateSource
+from smith.infrastructure.gitignore import START_MARKER, GitignoreManager
+
+
+class SectionMetadata:
+    """Store and retrieve template source metadata inside the gitignore section."""
+
+    def __init__(self, project_dir: Path) -> None:
+        """Initialise with the project root directory."""
+        self._gitignore = GitignoreManager(project_dir)
+
+    def save_source(self, source: TemplateSource) -> None:
+        """Write the source identifier into the smith-managed section header."""
+        lines = self._gitignore._read_lines()
+        bounds = self._gitignore._find_section_bounds(lines)
+        if bounds is None:
+            return
+        start = bounds[0]
+        header = f"{START_MARKER} source:{source.kind}:{source.location}\n"
+        lines[start] = header
+        self._gitignore._write_lines(lines)
+
+    def load_source(self) -> TemplateSource | None:
+        """Read the source identifier from the smith-managed section header."""
+        lines = self._gitignore._read_lines()
+        bounds = self._gitignore._find_section_bounds(lines)
+        if bounds is None:
+            return None
+        header = lines[bounds[0]].strip()
+        for part in header.split():
+            if part.startswith("source:"):
+                value = part[len("source:") :]
+                if ":" in value:
+                    kind, location = value.split(":", 1)
+                    return TemplateSource(kind=kind, location=location)  # type: ignore[arg-type]
+        return None
diff --git a/smith/infrastructure/template_source.py b/smith/infrastructure/template_source.py
new file mode 100644
index 0000000..5a2eaee
--- /dev/null
+++ b/smith/infrastructure/template_source.py
@@ -0,0 +1,215 @@
+"""Template source adapters — resolve files from bundled, local, or URL sources."""
+
+import importlib.resources
+import os
+import tarfile
+import tempfile
+import zipfile
+from pathlib import Path
+
+import requests
+
+from smith.domain.ports import TemplateSourceError
+from smith.domain.value_objects import FileSpec, TemplateSource
+
+AGENTIC_FILE_PATTERNS = [
+    "AGENTS.md",
+    ".opencode/agents/",
+    ".opencode/knowledge/",
+    ".opencode/skills/",
+    ".opencode/tools/",
+    ".templates/",
+    ".flowr/",
+]
+
+GITIGNORE_PATTERNS = ["AGENTS.md", ".opencode/", ".templates/", ".flowr/"]
+
+
+def _is_agentic_path(path: Path) -> bool:
+    """Return whether *path* belongs to an agentic file pattern."""
+    path_str = str(path)
+    if path_str == "AGENTS.md":
+        return True
+    return any(path_str.startswith(pattern) for pattern in AGENTIC_FILE_PATTERNS[1:])
+
+
+def _collect_specs_from_directory(directory: Path) -> list[FileSpec]:
+    """Walk *directory* and collect FileSpec for every file found."""
+    specs: list[FileSpec] = []
+    for root, _dirs, files in os.walk(directory):
+        for f in files:
+            full = Path(root) / f
+            rel = full.relative_to(directory)
+            specs.append(FileSpec(relative_path=rel, content=full.read_bytes()))
+    return specs
+
+
+class BundledTemplateSource:
+    """Resolve templates shipped inside the ``smith.data`` package."""
+
+    def resolve(self) -> list[FileSpec]:
+        """Return FileSpec list for every agentic file in the bundled data."""
+        try:
+            data_dir = importlib.resources.files("smith.data")
+        except Exception as exc:
+            raise TemplateSourceError(
+                f"Failed to locate bundled template data: {exc}"
+            ) from exc
+
+        if not hasattr(data_dir, "joinpath"):
+            raise TemplateSourceError("Bundled template data directory not found")
+
+        data_path = Path(str(data_dir.joinpath()))
+        if not data_path.is_dir():
+            raise TemplateSourceError(
+                f"Bundled template data directory not found: {data_path}"
+            )
+
+        all_specs = _collect_specs_from_directory(data_path)
+        specs = [s for s in all_specs if _is_agentic_path(s.relative_path)]
+
+        if not specs:
+            raise TemplateSourceError("Bundled template data contains no agentic files")
+
+        return specs
+
+    def gitignore_patterns(self) -> list[str]:
+        """Return the standard gitignore patterns for bundled templates."""
+        return list(GITIGNORE_PATTERNS)
+
+
+class LocalTemplateSource:
+    """Resolve templates from a local directory on disk."""
+
+    def __init__(self, path: Path) -> None:
+        """Initialise with the local template directory path."""
+        self._path = path
+
+    def resolve(self) -> list[FileSpec]:
+        """Return FileSpec list for every file in the local directory."""
+        if not self._path.is_dir():
+            raise TemplateSourceError(f"Template directory not found: {self._path}")
+        return _collect_specs_from_directory(self._path)
+
+    def gitignore_patterns(self) -> list[str]:
+        """Return gitignore patterns derived from the local directory contents."""
+        patterns: list[str] = []
+        for item in sorted(self._path.iterdir()):
+            name = item.name
+            if item.is_dir():
+                patterns.append(f"{name}/")
+            else:
+                patterns.append(name)
+        return patterns
+
+
+class UrlTemplateSource:
+    """Resolve templates from a remote tar.gz or zip archive."""
+
+    def __init__(self, url: str) -> None:
+        """Initialise with the URL of the template archive."""
+        self._url = url
+
+    def resolve(self) -> list[FileSpec]:
+        """Download, extract, and return FileSpec list for agentic files."""
+        try:
+            response = requests.get(self._url, timeout=30)
+            response.raise_for_status()
+        except requests.RequestException as exc:
+            raise TemplateSourceError(
+                f"Failed to download template from {self._url}: {exc}"
+            ) from exc
+
+        tmp_dir = tempfile.mkdtemp(prefix="smith_url_")
+        try:
+            try:
+                if self._url.endswith(".zip"):
+                    self._extract_zip(response.content, tmp_dir)
+                else:
+                    self._extract_tar(response.content, tmp_dir)
+            except (tarfile.TarError, zipfile.BadZipFile, OSError) as exc:
+                raise TemplateSourceError(
+                    f"Failed to extract template archive from {self._url}: {exc}"
+                ) from exc
+
+            all_specs = _collect_specs_from_directory(Path(tmp_dir))
+        finally:
+            import shutil
+
+            shutil.rmtree(tmp_dir, ignore_errors=True)
+
+        specs = [s for s in all_specs if _is_agentic_path(s.relative_path)]
+
+        if not specs:
+            raise TemplateSourceError(
+                f"Template archive from {self._url} contains no agentic files"
+            )
+
+        return specs
+
+    def gitignore_patterns(self) -> list[str]:
+        """Return the standard gitignore patterns for URL-sourced templates."""
+        return list(GITIGNORE_PATTERNS)
+
+    @staticmethod
+    def _extract_tar(content: bytes, target_dir: str) -> None:
+        """Extract a tar.gz archive, stripping the top-level directory."""
+        import io
+
+        with tarfile.open(fileobj=io.BytesIO(content), mode="r:gz") as tar:
+            members = tar.getmembers()
+            if not members:
+                raise TemplateSourceError("Tar archive is empty")
+
+            root_dir = members[0].name.split("/")[0]
+            for member in members:
+                member_path = Path(member.name)
+                if member_path.parts[0] == root_dir:
+                    member.name = str(Path(*member_path.parts[1:]))
+                if member.name:
+                    tar.extract(member, target_dir, filter="data")
+
+    @staticmethod
+    def _extract_zip(content: bytes, target_dir: str) -> None:
+        """Extract a zip archive, stripping the top-level directory."""
+        import io
+
+        with zipfile.ZipFile(io.BytesIO(content)) as zf:
+            names = zf.namelist()
+            if not names:
+                raise TemplateSourceError("Zip archive is empty")
+
+            root_dir = names[0].split("/")[0]
+            for name in names:
+                if name == root_dir + "/":
+                    continue
+                if name.startswith(root_dir + "/"):
+                    zf.extract(name, target_dir)
+
+
+class TemplateSourceAdapter:
+    """Dispatch to the correct TemplateSourcePort implementation based on kind."""
+
+    def __init__(self, source: TemplateSource) -> None:
+        """Initialise with the TemplateSource value object."""
+        self._source = source
+
+    def resolve(self) -> list[FileSpec]:
+        """Resolve template files via the appropriate source strategy."""
+        if self._source.kind == "bundled":
+            return BundledTemplateSource().resolve()
+        if self._source.kind == "local":
+            return LocalTemplateSource(Path(self._source.location)).resolve()
+        if self._source.kind == "url":
+            return UrlTemplateSource(self._source.location).resolve()
+        raise TemplateSourceError(f"Unknown template source kind: {self._source.kind}")
+
+    def gitignore_patterns(self) -> list[str]:
+        """Return gitignore patterns via the appropriate source strategy."""
+        if self._source.kind == "bundled":
+            return BundledTemplateSource().gitignore_patterns()
+        if self._source.kind == "local":
+            return LocalTemplateSource(Path(self._source.location)).gitignore_patterns()
+        if self._source.kind == "url":
+            return UrlTemplateSource(self._source.location).gitignore_patterns()
+        raise TemplateSourceError(f"Unknown template source kind: {self._source.kind}")
diff --git a/template-config.yaml b/template-config.yaml
new file mode 100644
index 0000000..500e275
--- /dev/null
+++ b/template-config.yaml
@@ -0,0 +1,113 @@
+# Template configuration — single source of truth for project setup.
+# Read by the setup-project agent. The "defaults" section reflects the current
+# template values. The "substitutions" section maps every literal string in every
+# template file to its replacement pattern, using {variable} tokens.
+#
+# After running @setup-project the defaults section is updated with the values
+# the user provided.
+#
+# Parameter descriptions:
+#   github_username     — GitHub handle used in URLs and git remote
+#   project_name        — kebab-case repository name (e.g. my-awesome-project)
+#   package_name        — snake_case Python package directory (e.g. my_awesome_project)
+#   project_description — one sentence describing what the project does
+#   author_name         — author's full name
+#   author_email        — author's email address
+
+defaults:
+  github_username: nullhack
+  project_name: agents-smith
+  package_name: agents_smith
+  project_description: "AI-assisted software delivery system with flow-based agent orchestration"
+  author_name: eol
+  author_email: nullhack@users.noreply.github.com
+
+# Substitution map — every file the setup agent must edit.
+# Each entry has:
+#   old:   literal string currently in the file
+#   new:   replacement string with {variable} tokens
+#   count: expected number of replacements (for agent verification)
+
+substitutions:
+  pyproject.toml:
+    - old: 'name = "agents-smith"'
+      new: 'name = "{project_name}"'
+      count: 1
+    - old: '"Python template with some awesome tools to quickstart any Python project"'
+      new: '"{project_description}"'
+      count: 1
+    - old: '{ name = "eol", email = "nullhack@users.noreply.github.com" }'
+      new: '{{ name = "{author_name}", email = "{author_email}" }}'
+      count: 2
+    - old: "https://github.com/nullhack/agents-smith"
+      new: "https://github.com/{github_username}/{project_name}"
+      count: 2
+    - old: 'packages = ["app"]'
+      new: 'packages = ["{package_name}"]'
+      count: 1
+    - old: "python -m app"
+      new: "python -m {package_name}"
+      count: 1
+    - old: "--cov=app"
+      new: "--cov={package_name}"
+      count: 2
+    - old: "pdoc ./app"
+      new: "pdoc ./{package_name}"
+      count: 2
+    - old: 'version = "8.0.0"'
+      new: 'version = "0.1.0"'
+      count: 1
+    # Version: set to 0.1.0 in pyproject.toml (pure semver, no date — see [[software-craft/versioning]])
+
+  README.md:
+    - old: "nullhack"
+      new: "{github_username}"
+      count: many
+    - old: "agents-smith"
+      new: "{project_name}"
+      count: many
+    - old: "eol"
+      new: "{author_name}"
+      count: 1
+      note: "only the author credit line — do not replace occurrences in other contexts"
+
+  .github/workflows/ci.yml:
+    - old: "import app"
+      new: "import {package_name}"
+      count: 2
+    - old: 'href="api/app.html"'
+      new: 'href="api/{package_name}.html"'
+      count: 1
+
+  LICENSE:
+    - old: "Copyright (c) 2026, eol"
+      new: "Copyright (c) 2026, {author_name}"
+      count: 1
+
+  tests/unit/main_test.py:
+    - old: "from app.__main__ import"
+      new: "from {package_name}.__main__ import"
+      count: 1
+
+  CHANGELOG.md:
+    note: "Overwrite entire file from .templates/CHANGELOG.md.template, replacing {project_name} and {YYYYMMDD} tokens. No find-replace substitutions — the template replaces all content."
+
+  template-config.yaml:
+    - old: "github_username: nullhack"
+      new: "github_username: {github_username}"
+      count: 1
+    - old: "project_name: agents-smith"
+      new: "project_name: {project_name}"
+      count: 1
+    - old: "package_name: app"
+      new: "package_name: {package_name}"
+      count: 1
+    - old: '"Python template with some awesome tools to quickstart any Python project"'
+      new: '"{project_description}"'
+      count: 1
+    - old: "author_name: eol"
+      new: "author_name: {author_name}"
+      count: 1
+    - old: "author_email: nullhack@users.noreply.github.com"
+      new: "author_email: {author_email}"
+      count: 1
diff --git a/tests/conftest.py b/tests/conftest.py
index a5c8f50..32bedb3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,23 +1,8 @@
 import pytest
 
 
-def pytest_html_report_title(report):
-    report.title = "Test Report"
-
-
-@pytest.hookimpl(tryfirst=True, hookwrapper=True)
-def pytest_runtest_makereport(item, call):
-    outcome = yield
-    report = outcome.get_result()
-
-    docstring = item.obj.__doc__ or ""
-    report.docstrings = docstring
-
-
-def pytest_html_results_table_header(cells):
-    cells.insert(2, "<th>Documentation</th>")
-
-
-def pytest_html_results_table_row(report, cells):
-    docstring = getattr(report, "docstrings", "") or ""
-    cells.insert(2, f"<td style='white-space: pre-wrap;'>{docstring}</td>")
+def pytest_collection_modifyitems(items):
+    skip_deprecated = pytest.mark.skip(reason="deprecated acceptance criterion")
+    for item in items:
+        if "deprecated" in item.keywords:
+            item.add_marker(skip_deprecated)
diff --git a/tests/features/cli_entrypoint/help_output_test.py b/tests/features/cli_entrypoint/help_output_test.py
new file mode 100644
index 0000000..65cd2e3
--- /dev/null
+++ b/tests/features/cli_entrypoint/help_output_test.py
@@ -0,0 +1,38 @@
+"""Tests for help output story."""
+
+import subprocess
+import sys
+
+
+def test_cli_entrypoint_c1a2b3d4() -> None:
+    """
+    Given: the application package is installed
+    When: the user runs `python -m smith --help`
+    Then: the output contains the application name "smith"
+    And: the output contains the tagline
+    And: the process exits with code 0
+    """
+    result = subprocess.run(
+        [sys.executable, "-m", "smith", "--help"],
+        capture_output=True,
+        text=True,
+    )
+    assert "smith" in result.stdout
+    assert "Connect" in result.stdout or "smith" in result.stdout
+    assert result.returncode == 0
+
+
+def test_cli_entrypoint_e5f6a7b8() -> None:
+    """
+    Given: the application package is installed
+    When: the user runs `python -m smith --help`
+    Then: the output contains "--help"
+    And: the output contains "--version"
+    """
+    result = subprocess.run(
+        [sys.executable, "-m", "smith", "--help"],
+        capture_output=True,
+        text=True,
+    )
+    assert "--help" in result.stdout
+    assert "--version" in result.stdout
diff --git a/tests/features/cli_entrypoint/unrecognised_arguments_test.py b/tests/features/cli_entrypoint/unrecognised_arguments_test.py
new file mode 100644
index 0000000..2043e33
--- /dev/null
+++ b/tests/features/cli_entrypoint/unrecognised_arguments_test.py
@@ -0,0 +1,32 @@
+"""Tests for unrecognised arguments story."""
+
+import subprocess
+import sys
+
+
+def test_cli_entrypoint_e7f8a9b0() -> None:
+    """
+    Given: the application package is installed
+    When: the user runs `python -m smith --unknown-flag`
+    Then: the process exits with a non-zero code
+    """
+    result = subprocess.run(
+        [sys.executable, "-m", "smith", "--unknown-flag"],
+        capture_output=True,
+        text=True,
+    )
+    assert result.returncode != 0
+
+
+def test_cli_entrypoint_b1c2d3e4() -> None:
+    """
+    Given: the application package is installed
+    When: the user runs `python -m smith` with no arguments
+    Then: the process exits with a non-zero code (no subcommand)
+    """
+    result = subprocess.run(
+        [sys.executable, "-m", "smith"],
+        capture_output=True,
+        text=True,
+    )
+    assert result.returncode != 0
diff --git a/tests/features/cli_entrypoint/version_output_test.py b/tests/features/cli_entrypoint/version_output_test.py
new file mode 100644
index 0000000..24c5f80
--- /dev/null
+++ b/tests/features/cli_entrypoint/version_output_test.py
@@ -0,0 +1,39 @@
+"""Tests for version output story."""
+
+import importlib.metadata
+import subprocess
+import sys
+
+
+def test_cli_entrypoint_c9d0e1f2() -> None:
+    """
+    Given: the application package is installed
+    When: the user runs `python -m smith --version`
+    Then: the output contains "smith"
+    And: the output contains the version string from package metadata
+    And: the process exits with code 0
+    """
+    version = importlib.metadata.version("agents-smith")
+    result = subprocess.run(
+        [sys.executable, "-m", "smith", "--version"],
+        capture_output=True,
+        text=True,
+    )
+    assert "smith" in result.stdout
+    assert version in result.stdout
+    assert result.returncode == 0
+
+
+def test_cli_entrypoint_a3b4c5d6() -> None:
+    """
+    Given: the application package is installed
+    When: the user runs `python -m smith --version`
+    Then: the version in the output matches `importlib.metadata.version("agents-smith")`
+    """
+    version = importlib.metadata.version("agents-smith")
+    result = subprocess.run(
+        [sys.executable, "-m", "smith", "--version"],
+        capture_output=True,
+        text=True,
+    )
+    assert version in result.stdout
diff --git a/tests/features/smith_commands/__init__.py b/tests/features/smith_commands/__init__.py
new file mode 100644
index 0000000..7abaf4c
--- /dev/null
+++ b/tests/features/smith_commands/__init__.py
@@ -0,0 +1,5 @@
+"""Feature tests for smith-commands — BDD scenarios from smith-commands.feature.
+
+Each test function corresponds to one @id tag in the feature file.
+Test function naming: test_smith_commands_<id>
+"""
diff --git a/tests/features/smith_commands/conftest.py b/tests/features/smith_commands/conftest.py
new file mode 100644
index 0000000..d1c692d
--- /dev/null
+++ b/tests/features/smith_commands/conftest.py
@@ -0,0 +1,134 @@
+from pathlib import Path
+
+import pytest
+
+from smith.domain.value_objects import (
+    FileSpec,
+    TemplateSource,
+)
+
+
+class InMemoryFileSystem:
+    def __init__(self) -> None:
+        self._files: dict[Path, bytes] = {}
+
+    def check_conflicts(self, paths: list[Path]) -> list[Path]:
+        return [p for p in paths if p in self._files]
+
+    def write_atomic(self, specs: list[FileSpec]) -> None:
+        for spec in specs:
+            self._files[spec.relative_path] = spec.content
+
+    def remove(self, paths: list[Path]) -> None:
+        for p in paths:
+            self._files.pop(p, None)
+
+    def exists(self, paths: list[Path]) -> dict[Path, bool]:
+        return {p: p in self._files for p in paths}
+
+    def written_paths(self) -> set[Path]:
+        return set(self._files.keys())
+
+    def read(self, path: Path) -> bytes | None:
+        return self._files.get(path)
+
+
+class InMemoryGitignore:
+    def __init__(self) -> None:
+        self._patterns: list[str] = []
+        self._has_section: bool = False
+        self._source: TemplateSource | None = None
+        self._pre_existing_lines: list[str] = []
+
+    def set_pre_existing_lines(self, lines: list[str]) -> None:
+        self._pre_existing_lines = lines
+
+    def get_all_lines(self) -> list[str]:
+        lines = list(self._pre_existing_lines)
+        if self._has_section:
+            header = (
+                f"# smith managed source:{self._source.location}"
+                if self._source
+                else "# smith managed"
+            )
+            lines.extend([header])
+            lines.extend(self._patterns)
+            lines.append("# end smith managed")
+        return lines
+
+    def add_section(self, patterns: list[str]) -> None:
+        self._patterns = list(patterns)
+        self._has_section = True
+
+    def has_section(self) -> bool:
+        return self._has_section
+
+    def get_patterns(self) -> list[str]:
+        return list(self._patterns)
+
+    def save_source(self, source: TemplateSource) -> None:
+        self._source = source
+
+    def load_source(self) -> TemplateSource | None:
+        return self._source
+
+
+class StubTemplateSource:
+    def __init__(
+        self,
+        specs: list[FileSpec] | None = None,
+        patterns: list[str] | None = None,
+    ) -> None:
+        self._specs = specs or []
+        self._patterns = patterns or [
+            "AGENTS.md",
+            ".opencode/",
+            ".templates/",
+            ".flowr/",
+        ]
+
+    def resolve(self) -> list[FileSpec]:
+        return list(self._specs)
+
+    def gitignore_patterns(self) -> list[str]:
+        return list(self._patterns)
+
+
+DEFAULT_SPECS = [
+    FileSpec(relative_path=Path("AGENTS.md"), content=b"# Agents configuration\n"),
+    FileSpec(
+        relative_path=Path(".opencode/agents/po.md"),
+        content=b"# Product Owner agent\n",
+    ),
+    FileSpec(
+        relative_path=Path(".templates/docs/ADR.template"),
+        content=b"# ADR template\n",
+    ),
+    FileSpec(
+        relative_path=Path(".flowr/flows/main.yaml"),
+        content=b"# Main flow\n",
+    ),
+]
+
+
+@pytest.fixture
+def default_template() -> StubTemplateSource:
+    return StubTemplateSource(
+        specs=DEFAULT_SPECS,
+        patterns=["AGENTS.md", ".opencode/", ".templates/", ".flowr/"],
+    )
+
+
+@pytest.fixture
+def fs() -> InMemoryFileSystem:
+    return InMemoryFileSystem()
+
+
+@pytest.fixture
+def gitignore() -> InMemoryGitignore:
+    return InMemoryGitignore()
+
+
+@pytest.fixture
+def metadata(gitignore: InMemoryGitignore) -> InMemoryGitignore:
+    return gitignore
diff --git a/tests/features/smith_commands/connect_fresh_project_test.py b/tests/features/smith_commands/connect_fresh_project_test.py
new file mode 100644
index 0000000..21a01e5
--- /dev/null
+++ b/tests/features/smith_commands/connect_fresh_project_test.py
@@ -0,0 +1,417 @@
+"""Rule: Connect to a fresh project.
+
+BDD scenarios for smith connect in a project directory with no existing
+agentic files and no '# smith managed' section in .gitignore.
+"""
+
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from smith.domain.connection import Connection
+from smith.domain.ports import TemplateSourceError
+from smith.domain.value_objects import FileSpec, TemplateSource
+from smith.infrastructure.filesystem import FileSystemError
+from smith.infrastructure.template_source import (
+    UrlTemplateSource,
+)
+from tests.features.smith_commands.conftest import (
+    InMemoryFileSystem,
+    InMemoryGitignore,
+    StubTemplateSource,
+)
+
+
+class FailingTemplateSource:
+    """Template source that always raises TemplateSourceError."""
+
+    def resolve(self):
+        raise TemplateSourceError("Template source not found")
+
+    def gitignore_patterns(self):
+        return []
+
+
+class FailingFileSystem(InMemoryFileSystem):
+    """Filesystem that raises FileSystemError on write_atomic."""
+
+    def write_atomic(self, specs):
+        raise FileSystemError("Simulated write failure")
+
+    def check_conflicts(self, paths):
+        return []
+
+    def exists(self, paths):
+        return dict.fromkeys(paths, False)
+
+
+def test_smith_commands_c928a845(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files and no `# smith managed`
+        section in .gitignore
+    When the engineer runs `smith connect`
+    Then all agentic files (AGENTS.md, .opencode/, .templates/, .flowr/) are
+        written to the project directory
+    And a `# smith managed` section is added to .gitignore with entries for
+        all agentic file patterns
+    """
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    result = connection.connect(
+        source=TemplateSource(kind="bundled", location="agents-smith")
+    )
+
+    assert result is None
+    expected_paths = {spec.relative_path for spec in default_template.resolve()}
+    assert fs.written_paths() == expected_paths
+    assert gitignore.has_section()
+    patterns = gitignore.get_patterns()
+    assert "AGENTS.md" in patterns
+    assert ".opencode/" in patterns
+    assert ".templates/" in patterns
+    assert ".flowr/" in patterns
+
+
+def test_smith_commands_86c8e268(
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files
+    When the engineer runs `smith connect --from ./my-templates`
+    Then agentic files are written from the local path template source to the
+        project directory
+    And a `# smith managed` section is added to .gitignore
+    """
+    local_specs = [
+        FileSpec(relative_path=Path("AGENTS.md"), content=b"# Local agents\n"),
+        FileSpec(relative_path=Path(".opencode/agents/po.md"), content=b"# Local PO\n"),
+    ]
+    local_template = StubTemplateSource(
+        specs=local_specs, patterns=["AGENTS.md", ".opencode/"]
+    )
+
+    connection = Connection(
+        template_source_port=local_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    result = connection.connect(
+        source=TemplateSource(kind="local", location="./my-templates")
+    )
+
+    assert result is None
+    assert fs.read(Path("AGENTS.md")) == b"# Local agents\n"
+    assert fs.read(Path(".opencode/agents/po.md")) == b"# Local PO\n"
+    assert gitignore.has_section()
+    patterns = gitignore.get_patterns()
+    assert "AGENTS.md" in patterns
+    assert ".opencode/" in patterns
+
+
+def test_smith_commands_577156bb(
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files
+    When the engineer runs `smith connect --from https://example.com/templates.tar.gz`
+    Then agentic files are downloaded from the URL and written to the project directory
+    And a `# smith managed` section is added to .gitignore
+    """
+    url_specs = [
+        FileSpec(relative_path=Path("AGENTS.md"), content=b"# URL agents\n"),
+        FileSpec(relative_path=Path(".opencode/"), content=b""),
+    ]
+    url_template = StubTemplateSource(
+        specs=url_specs, patterns=["AGENTS.md", ".opencode/"]
+    )
+
+    connection = Connection(
+        template_source_port=url_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    result = connection.connect(
+        source=TemplateSource(
+            kind="url", location="https://example.com/templates.tar.gz"
+        )
+    )
+
+    assert result is None
+    assert fs.read(Path("AGENTS.md")) == b"# URL agents\n"
+    assert gitignore.has_section()
+
+
+def test_smith_commands_4fdd38a4(
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files
+    When the engineer runs `smith connect --from https://example.com/templates/my-template.zip`
+    Then agentic files are downloaded from the remote URL
+        and written to the project directory
+    And a `# smith managed` section is added to .gitignore
+    """
+    git_specs = [
+        FileSpec(relative_path=Path("AGENTS.md"), content=b"# Git agents\n"),
+        FileSpec(relative_path=Path(".flowr/flows/main.yaml"), content=b"# Git flow\n"),
+    ]
+    git_template = StubTemplateSource(
+        specs=git_specs, patterns=["AGENTS.md", ".flowr/"]
+    )
+
+    connection = Connection(
+        template_source_port=git_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    result = connection.connect(
+        source=TemplateSource(
+            kind="url", location="https://example.com/templates/my-template.zip"
+        )
+    )
+
+    assert result is None
+    assert fs.read(Path("AGENTS.md")) == b"# Git agents\n"
+    assert fs.read(Path(".flowr/flows/main.yaml")) == b"# Git flow\n"
+    assert gitignore.has_section()
+    assert "AGENTS.md" in gitignore.get_patterns()
+    assert ".flowr/" in gitignore.get_patterns()
+
+
+def test_smith_commands_f79d40f4(
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files
+    When the engineer runs `smith connect --from /nonexistent/path`
+    Then smith exits with code 1
+    And an error message indicates the template source could not be found
+    """
+    failing_source = FailingTemplateSource()
+    connection = Connection(
+        template_source_port=failing_source,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    with pytest.raises(TemplateSourceError):
+        connection.connect(
+            source=TemplateSource(kind="local", location="/nonexistent/path")
+        )
+
+    assert fs.written_paths() == set()
+    assert not gitignore.has_section()
+
+
+def test_smith_commands_060390bf(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files and no .gitignore file
+    When the engineer runs `smith connect`
+    Then a new .gitignore file is created containing the `# smith managed`
+        section with entries for all agentic file patterns
+    """
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    result = connection.connect(
+        source=TemplateSource(kind="bundled", location="agents-smith")
+    )
+
+    assert result is None
+    assert gitignore.has_section()
+    patterns = gitignore.get_patterns()
+    assert "AGENTS.md" in patterns
+    assert ".opencode/" in patterns
+    assert ".templates/" in patterns
+    assert ".flowr/" in patterns
+
+
+def test_smith_commands_e8245392(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files and an existing .gitignore
+        without a `# smith managed` section
+    When the engineer runs `smith connect`
+    Then the `# smith managed` section is appended to the existing .gitignore
+    And existing .gitignore content is preserved
+    """
+    gitignore.set_pre_existing_lines(["*.pyc", "__pycache__/", ".env"])
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    result = connection.connect(
+        source=TemplateSource(kind="bundled", location="agents-smith")
+    )
+
+    assert result is None
+    assert gitignore.has_section()
+    all_lines = gitignore.get_all_lines()
+    assert "*.pyc" in all_lines
+    assert "__pycache__/" in all_lines
+    assert ".env" in all_lines
+    patterns = gitignore.get_patterns()
+    assert "AGENTS.md" in patterns
+    assert ".opencode/" in patterns
+
+
+def test_smith_commands_fc22c286(
+    default_template: "StubTemplateSource",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files
+    When smith fails to write .opencode/ after writing AGENTS.md
+    Then AGENTS.md is removed (rolled back)
+    And no agentic files remain in the project directory
+    """
+    failing_fs = FailingFileSystem()
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=failing_fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    with pytest.raises(FileSystemError):
+        connection.connect(
+            source=TemplateSource(kind="bundled", location="agents-smith")
+        )
+
+    assert failing_fs.written_paths() == set()
+    assert not gitignore.has_section()
+
+
+@pytest.mark.deprecated
+def test_smith_commands_a1b2c3d4(
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files and no cached templates
+    When the engineer runs `smith connect` and the GitHub archive download fails
+    Then smith exits with code 1
+    And an error message indicates the bundled template source could not be downloaded
+    """
+
+
+@pytest.mark.deprecated
+def test_smith_commands_e5f6g7h8(
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+    tmp_path: Path,
+) -> None:
+    """Given a project directory with no agentic files and cached templates
+        from a previous connect
+    When the engineer runs `smith connect` and the GitHub archive download fails
+    Then smith uses the cached templates and connects successfully
+    And smith exits with code 0
+    """
+
+
+def test_smith_commands_a2b3c4d5(
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files
+    When the engineer runs `smith connect --from https://example.com/templates.tar.gz`
+        and the download fails
+    Then smith exits with code 1
+    And an error message indicates the URL template source could not be downloaded
+    """
+    url_source = UrlTemplateSource("https://example.com/notfound.tar.gz")
+    connection = Connection(
+        template_source_port=url_source,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    with patch("smith.infrastructure.template_source.requests") as mock_req:
+        mock_req.get.side_effect = ConnectionError("network unreachable")
+        mock_req.RequestException = ConnectionError
+        with pytest.raises(TemplateSourceError, match="Failed to download"):
+            connection.connect(
+                source=TemplateSource(
+                    kind="url", location="https://example.com/notfound.tar.gz"
+                )
+            )
+
+    assert fs.written_paths() == set()
+    assert not gitignore.has_section()
+
+
+def test_smith_commands_e4f5g6h7(
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files
+    When the engineer runs `smith connect --from https://example.com/templates.tar.gz`
+        and the downloaded archive is invalid
+    Then smith exits with code 1
+    And an error message indicates the archive could not be extracted
+    """
+    url_source = UrlTemplateSource("https://example.com/broken.tar.gz")
+    connection = Connection(
+        template_source_port=url_source,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    mock_response = MagicMock()
+    mock_response.content = b"not a valid tar.gz"
+    mock_response.raise_for_status = MagicMock()
+
+    with patch("smith.infrastructure.template_source.requests") as mock_req:
+        mock_req.get.return_value = mock_response
+        mock_req.RequestException = Exception
+        with pytest.raises(TemplateSourceError):
+            connection.connect(
+                source=TemplateSource(
+                    kind="url", location="https://example.com/broken.tar.gz"
+                )
+            )
+
+    assert fs.written_paths() == set()
+    assert not gitignore.has_section()
diff --git a/tests/features/smith_commands/disconnect_test.py b/tests/features/smith_commands/disconnect_test.py
new file mode 100644
index 0000000..4e19792
--- /dev/null
+++ b/tests/features/smith_commands/disconnect_test.py
@@ -0,0 +1,153 @@
+"""Rule: Disconnect from a project.
+
+BDD scenarios for smith disconnect removing managed files while preserving
+user-tracked files and the '# smith managed' section.
+"""
+
+from pathlib import Path
+
+from smith.domain.connection import Connection
+from smith.domain.value_objects import FileSpec, TemplateSource
+from tests.features.smith_commands.conftest import (
+    DEFAULT_SPECS,
+    InMemoryFileSystem,
+    InMemoryGitignore,
+    StubTemplateSource,
+)
+
+
+def test_smith_commands_cd5ba959(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with all agentic files present and a
+        `# smith managed` section in .gitignore
+    When the engineer runs `smith disconnect`
+    Then all agentic files that are gitignored by `# smith managed` are removed
+        from the project directory
+    And the `# smith managed` section is preserved in .gitignore
+    And files not gitignored by `# smith managed` are not removed
+    """
+    for spec in DEFAULT_SPECS:
+        fs.write_atomic([spec])
+    gitignore.add_section(["AGENTS.md", ".opencode/", ".templates/", ".flowr/"])
+    metadata.save_source(TemplateSource(kind="bundled", location="agents-smith"))
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    removed = connection.disconnect()
+
+    assert Path("AGENTS.md") in removed
+    assert Path(".opencode/agents/po.md") in removed
+    assert Path(".templates/docs/ADR.template") in removed
+    assert Path(".flowr/flows/main.yaml") in removed
+    assert gitignore.has_section()
+    patterns = gitignore.get_patterns()
+    assert "AGENTS.md" in patterns
+    assert ".opencode/" in patterns
+
+
+def test_smith_commands_9411ceb4(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files and no `# smith managed`
+        section in .gitignore
+    When the engineer runs `smith disconnect`
+    Then smith exits with code 0
+    And no files are modified
+    """
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    removed = connection.disconnect()
+
+    assert removed == []
+    assert not gitignore.has_section()
+
+
+def test_smith_commands_b755bfae(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory where .opencode/ exists and is gitignored by
+        `# smith managed` but .flowr/ is missing
+    When the engineer runs `smith disconnect`
+    Then .opencode/ is removed
+    And no error is raised for the missing .flowr/
+    And the `# smith managed` section is preserved in .gitignore
+    """
+    fs.write_atomic(
+        [
+            FileSpec(relative_path=Path(".opencode/agents/po.md"), content=b"# PO\n"),
+        ]
+    )
+    gitignore.add_section(["AGENTS.md", ".opencode/", ".templates/", ".flowr/"])
+    metadata.save_source(TemplateSource(kind="bundled", location="agents-smith"))
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    removed = connection.disconnect()
+
+    assert Path(".opencode/agents/po.md") in removed
+    assert Path(".flowr/flows/main.yaml") not in removed
+    assert gitignore.has_section()
+
+
+def test_smith_commands_8f2a9018(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory where AGENTS.md is NOT gitignored by
+        `# smith managed` (user tracks it) but .opencode/ IS gitignored by
+        `# smith managed`
+    When the engineer runs `smith disconnect`
+    Then .opencode/ is removed
+    And AGENTS.md is not removed (it is not in the smith-managed section)
+    And the `# smith managed` section is preserved in .gitignore
+    """
+    user_content = b"# user's agents\n"
+    fs.write_atomic(
+        [
+            FileSpec(relative_path=Path("AGENTS.md"), content=user_content),
+            FileSpec(relative_path=Path(".opencode/agents/po.md"), content=b"# PO\n"),
+        ]
+    )
+    gitignore.add_section([".opencode/"])
+    metadata.save_source(TemplateSource(kind="bundled", location="agents-smith"))
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    removed = connection.disconnect()
+
+    assert Path(".opencode/agents/po.md") in removed
+    assert Path("AGENTS.md") not in removed
+    assert fs.read(Path("AGENTS.md")) == user_content
+    assert gitignore.has_section()
diff --git a/tests/features/smith_commands/skip_user_tracked_test.py b/tests/features/smith_commands/skip_user_tracked_test.py
new file mode 100644
index 0000000..368d374
--- /dev/null
+++ b/tests/features/smith_commands/skip_user_tracked_test.py
@@ -0,0 +1,220 @@
+"""Rule: Auto-update on connected projects, skip user-tracked files on fresh projects.
+
+BDD scenarios for smith connect auto-updating managed files and skipping
+user-tracked files.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from smith.domain.connection import Connection
+from smith.domain.value_objects import FileSpec, TemplateSource
+from tests.features.smith_commands.conftest import (
+    InMemoryFileSystem,
+    InMemoryGitignore,
+    StubTemplateSource,
+)
+
+
+def test_smith_commands_df0455a5(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory where .opencode/ exists and is listed in the
+        `# smith managed` section of .gitignore
+    When the engineer runs `smith connect`
+    Then .opencode/ is updated with the template version (auto-update)
+    And all other agentic files are written
+    And smith exits with code 0
+    """
+    old_content = b"# old content\n"
+    fs.write_atomic(
+        [FileSpec(relative_path=Path(".opencode/agents/po.md"), content=old_content)]
+    )
+
+    gitignore.add_section([".opencode/"])
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    result = connection.connect(
+        source=TemplateSource(kind="bundled", location="agents-smith")
+    )
+
+    assert result is None
+    new_content = fs.read(Path(".opencode/agents/po.md"))
+    assert new_content != old_content
+    assert new_content == b"# Product Owner agent\n"
+    assert Path("AGENTS.md") in fs.written_paths()
+    assert Path(".templates/docs/ADR.template") in fs.written_paths()
+    assert Path(".flowr/flows/main.yaml") in fs.written_paths()
+
+
+def test_smith_commands_21c05bbb(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory where AGENTS.md exists but is NOT in the
+        `# smith managed` section of .gitignore (the user tracks it manually)
+    When the engineer runs `smith connect`
+    Then AGENTS.md is not overwritten
+    And the remaining agentic files (.opencode/, .templates/, .flowr/) are written
+    And a `# smith managed` section is added to .gitignore
+    """
+    user_content = b"# user's agents config\n"
+    fs.write_atomic([FileSpec(relative_path=Path("AGENTS.md"), content=user_content)])
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    result = connection.connect(
+        source=TemplateSource(kind="bundled", location="agents-smith")
+    )
+
+    assert result is None
+    assert fs.read(Path("AGENTS.md")) == user_content
+    assert Path(".opencode/agents/po.md") in fs.written_paths()
+    assert Path(".templates/docs/ADR.template") in fs.written_paths()
+    assert Path(".flowr/flows/main.yaml") in fs.written_paths()
+    assert gitignore.has_section()
+
+
+def test_smith_commands_2a5f83d0(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory where .opencode/ exists and is listed in the
+        `# smith managed` section of .gitignore
+    When the engineer runs `smith connect --overwrite`
+    Then .opencode/ is replaced with the template version
+    And all agentic files are written
+    And files not in the `# smith managed` section are not touched
+    """
+    old_content = b"# old content\n"
+    fs.write_atomic(
+        [FileSpec(relative_path=Path(".opencode/agents/po.md"), content=old_content)]
+    )
+    gitignore.add_section([".opencode/"])
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    result = connection.connect(
+        source=TemplateSource(kind="bundled", location="agents-smith"),
+        overwrite=True,
+    )
+
+    assert result is None
+    new_content = fs.read(Path(".opencode/agents/po.md"))
+    assert new_content != old_content
+    assert new_content == b"# Product Owner agent\n"
+    assert Path("AGENTS.md") in fs.written_paths()
+    assert Path(".templates/docs/ADR.template") in fs.written_paths()
+    assert Path(".flowr/flows/main.yaml") in fs.written_paths()
+
+
+def test_smith_commands_3e206149(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with all agentic files present and a
+        `# smith managed` section in .gitignore
+    When the engineer runs `smith connect`
+    Then smith behaves as `smith update` — all managed agentic files are
+        overwritten with the template versions
+    And smith exits with code 0
+    """
+    old_specs = [
+        FileSpec(relative_path=Path("AGENTS.md"), content=b"# old agents\n"),
+        FileSpec(relative_path=Path(".opencode/agents/po.md"), content=b"# old po\n"),
+        FileSpec(
+            relative_path=Path(".templates/docs/ADR.template"),
+            content=b"# old template\n",
+        ),
+        FileSpec(relative_path=Path(".flowr/flows/main.yaml"), content=b"# old flow\n"),
+    ]
+    fs.write_atomic(old_specs)
+    gitignore.add_section(["AGENTS.md", ".opencode/", ".templates/", ".flowr/"])
+    metadata.save_source(TemplateSource(kind="bundled", location="agents-smith"))
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    result = connection.connect(
+        source=TemplateSource(kind="bundled", location="agents-smith")
+    )
+
+    assert result is None
+    assert fs.read(Path("AGENTS.md")) == b"# Agents configuration\n"
+    assert fs.read(Path(".opencode/agents/po.md")) == b"# Product Owner agent\n"
+    assert fs.read(Path(".templates/docs/ADR.template")) == b"# ADR template\n"
+    assert fs.read(Path(".flowr/flows/main.yaml")) == b"# Main flow\n"
+    assert gitignore.has_section()
+
+
+def test_smith_commands_7d22e1d6(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory where AGENTS.md is NOT in `# smith managed`
+        (user-tracked) and .opencode/ IS in `# smith managed`
+    When the engineer runs `smith connect --overwrite`
+    Then .opencode/ is replaced with the template version
+    And AGENTS.md is not touched (it is not in the smith-managed section)
+    """
+    user_content = b"# user's agents config\n"
+    old_opencode = b"# old opencode\n"
+    fs.write_atomic(
+        [
+            FileSpec(relative_path=Path("AGENTS.md"), content=user_content),
+            FileSpec(
+                relative_path=Path(".opencode/agents/po.md"), content=old_opencode
+            ),
+        ]
+    )
+    gitignore.add_section([".opencode/"])
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    result = connection.connect(
+        source=TemplateSource(kind="bundled", location="agents-smith"),
+        overwrite=True,
+    )
+
+    assert result is None
+    assert fs.read(Path("AGENTS.md")) == user_content
+    assert fs.read(Path(".opencode/agents/po.md")) == b"# Product Owner agent\n"
+    assert Path(".templates/docs/ADR.template") in fs.written_paths()
+    assert Path(".flowr/flows/main.yaml") in fs.written_paths()
diff --git a/tests/features/smith_commands/status_test.py b/tests/features/smith_commands/status_test.py
new file mode 100644
index 0000000..d412c53
--- /dev/null
+++ b/tests/features/smith_commands/status_test.py
@@ -0,0 +1,177 @@
+"""Rule: Check connection status.
+
+BDD scenarios for smith status reporting connection state, present/missing
+files, and template source information.
+"""
+
+from pathlib import Path
+
+from smith.domain.connection import Connection
+from smith.domain.value_objects import (
+    ConnectionState,
+    FileSpec,
+    TemplateSource,
+)
+from tests.features.smith_commands.conftest import (
+    DEFAULT_SPECS,
+    InMemoryFileSystem,
+    InMemoryGitignore,
+    StubTemplateSource,
+)
+
+
+def test_smith_commands_447e3cbf(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with all agentic files present and a
+        `# smith managed` section in .gitignore
+    When the engineer runs `smith status`
+    Then smith reports "Connected" with a list of present agentic files
+    """
+    for spec in DEFAULT_SPECS:
+        fs.write_atomic([spec])
+    gitignore.add_section(["AGENTS.md", ".opencode/", ".templates/", ".flowr/"])
+    metadata.save_source(TemplateSource(kind="bundled", location="agents-smith"))
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    status = connection.status()
+
+    assert status.state == ConnectionState.CONNECTED
+    assert len(status.present_files) == len(DEFAULT_SPECS)
+    assert len(status.missing_files) == 0
+    assert status.source == TemplateSource(kind="bundled", location="agents-smith")
+
+
+def test_smith_commands_3f364b1d(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory where .opencode/ and AGENTS.md exist but
+        .templates/ and .flowr/ are missing
+    When the engineer runs `smith status`
+    Then smith reports "Partial" with a list of present and missing agentic files
+    And suggests `smith connect --overwrite` or `smith disconnect`
+    """
+    fs.write_atomic(
+        [
+            FileSpec(relative_path=Path("AGENTS.md"), content=b"# Agents\n"),
+            FileSpec(relative_path=Path(".opencode/agents/po.md"), content=b"# PO\n"),
+        ]
+    )
+    gitignore.add_section(["AGENTS.md", ".opencode/", ".templates/", ".flowr/"])
+    metadata.save_source(TemplateSource(kind="bundled", location="agents-smith"))
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    status = connection.status()
+
+    assert status.state == ConnectionState.PARTIAL
+    assert Path("AGENTS.md") in status.present_files
+    assert Path(".opencode/agents/po.md") in status.present_files
+    assert Path(".templates/docs/ADR.template") in status.missing_files
+    assert Path(".flowr/flows/main.yaml") in status.missing_files
+
+
+def test_smith_commands_76e27d0a(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files but a `# smith managed`
+        section in .gitignore
+    When the engineer runs `smith status`
+    Then smith reports "Disconnected"
+    And suggests `smith connect` to reconnect
+    """
+    gitignore.add_section(["AGENTS.md", ".opencode/", ".templates/", ".flowr/"])
+    metadata.save_source(TemplateSource(kind="bundled", location="agents-smith"))
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    status = connection.status()
+
+    assert status.state == ConnectionState.DISCONNECTED
+    assert len(status.present_files) == 0
+    assert len(status.missing_files) == len(DEFAULT_SPECS)
+
+
+def test_smith_commands_94ebcd86(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files and no `# smith managed`
+        section in .gitignore
+    When the engineer runs `smith status`
+    Then smith reports "Not connected"
+    And suggests `smith connect` to get started
+    """
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    status = connection.status()
+
+    assert status.state == ConnectionState.DISCONNECTED
+    assert len(status.present_files) == 0
+    assert len(status.missing_files) == len(DEFAULT_SPECS)
+
+
+def test_smith_commands_10843402(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a connected project directory
+    When the engineer runs `smith status --json`
+    Then smith outputs machine-readable JSON with connection status,
+        present files list, and template source
+    """
+    for spec in DEFAULT_SPECS:
+        fs.write_atomic([spec])
+    gitignore.add_section(["AGENTS.md", ".opencode/", ".templates/", ".flowr/"])
+    metadata.save_source(TemplateSource(kind="bundled", location="agents-smith"))
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    status = connection.status()
+
+    json_output = status.to_dict()
+
+    assert json_output["state"] == "connected"
+    assert isinstance(json_output["present_files"], list)
+    assert isinstance(json_output["missing_files"], list)
+    assert json_output["source"]["kind"] == "bundled"
+    assert json_output["source"]["location"] == "agents-smith"
diff --git a/tests/features/smith_commands/update_test.py b/tests/features/smith_commands/update_test.py
new file mode 100644
index 0000000..078483c
--- /dev/null
+++ b/tests/features/smith_commands/update_test.py
@@ -0,0 +1,161 @@
+"""Rule: Update agentic files.
+
+BDD scenarios for smith update refreshing connected projects from the
+template source, including auto-connect for not-connected projects.
+"""
+
+from pathlib import Path
+
+import pytest
+
+from smith.domain.connection import Connection
+from smith.domain.ports import TemplateSourceError
+from smith.domain.value_objects import FileSpec, TemplateSource
+from tests.features.smith_commands.conftest import (
+    InMemoryFileSystem,
+    InMemoryGitignore,
+    StubTemplateSource,
+)
+
+
+def test_smith_commands_e4d06612(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with all agentic files present and a
+        `# smith managed` section in .gitignore
+    When the engineer runs `smith update`
+    Then all agentic files that are in the `# smith managed` section are
+        overwritten with the latest template versions
+    And files not managed by smith are not touched
+    And smith exits with code 0
+    """
+    old_specs = [
+        FileSpec(relative_path=Path("AGENTS.md"), content=b"# old agents\n"),
+        FileSpec(relative_path=Path(".opencode/agents/po.md"), content=b"# old po\n"),
+    ]
+    fs.write_atomic(old_specs)
+    gitignore.add_section(["AGENTS.md", ".opencode/"])
+    metadata.save_source(TemplateSource(kind="bundled", location="agents-smith"))
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    result = connection.update()
+
+    assert result is None
+    assert fs.read(Path("AGENTS.md")) == b"# Agents configuration\n"
+    assert fs.read(Path(".opencode/agents/po.md")) == b"# Product Owner agent\n"
+
+
+def test_smith_commands_d348166e(
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with all agentic files present and a
+        `# smith managed` section in .gitignore
+    When the engineer runs `smith update --from ./new-templates`
+    Then all managed agentic files are overwritten with files from the new
+        template source
+    And smith exits with code 0
+    """
+    new_template = StubTemplateSource(
+        specs=[
+            FileSpec(relative_path=Path("AGENTS.md"), content=b"# New agents\n"),
+            FileSpec(
+                relative_path=Path(".opencode/agents/po.md"), content=b"# New PO\n"
+            ),
+        ],
+        patterns=["AGENTS.md", ".opencode/"],
+    )
+    old_specs = [
+        FileSpec(relative_path=Path("AGENTS.md"), content=b"# old agents\n"),
+        FileSpec(relative_path=Path(".opencode/agents/po.md"), content=b"# old po\n"),
+    ]
+    fs.write_atomic(old_specs)
+    gitignore.add_section(["AGENTS.md", ".opencode/"])
+    metadata.save_source(TemplateSource(kind="bundled", location="agents-smith"))
+
+    connection = Connection(
+        template_source_port=new_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    result = connection.update(
+        source=TemplateSource(kind="local", location="./new-templates")
+    )
+
+    assert result is None
+    assert fs.read(Path("AGENTS.md")) == b"# New agents\n"
+    assert fs.read(Path(".opencode/agents/po.md")) == b"# New PO\n"
+
+
+def test_smith_commands_9a01f4e2(
+    default_template: "StubTemplateSource",
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a project directory with no agentic files and no `# smith managed`
+        section in .gitignore
+    When the engineer runs `smith update`
+    Then smith behaves as `smith connect` — all agentic files are written
+        and a `# smith managed` section is added to .gitignore
+    And smith exits with code 0
+    """
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    result = connection.update()
+
+    assert result is None
+    expected_paths = {spec.relative_path for spec in default_template.resolve()}
+    assert fs.written_paths() == expected_paths
+    assert gitignore.has_section()
+
+
+def test_smith_commands_7af2f4d1(
+    fs: "InMemoryFileSystem",
+    gitignore: "InMemoryGitignore",
+    metadata: "InMemoryGitignore",
+) -> None:
+    """Given a connected project directory
+    When the engineer runs `smith update --from /nonexistent/path`
+    Then smith exits with code 1
+    And an error message indicates the template source could not be found
+    """
+
+    class FailingTemplateSource:
+        def resolve(self):
+            raise TemplateSourceError("Template source not found")
+
+        def gitignore_patterns(self):
+            return ["AGENTS.md"]
+
+    gitignore.add_section(["AGENTS.md", ".opencode/"])
+    metadata.save_source(TemplateSource(kind="bundled", location="agents-smith"))
+
+    connection = Connection(
+        template_source_port=FailingTemplateSource(),
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    with pytest.raises(TemplateSourceError):
+        connection.update(
+            source=TemplateSource(kind="local", location="/nonexistent/path")
+        )
diff --git a/tests/unit/app_test.py b/tests/unit/app_test.py
deleted file mode 100644
index d3f7efe..0000000
--- a/tests/unit/app_test.py
+++ /dev/null
@@ -1,18 +0,0 @@
-"""Unit tests for the application entry point."""
-
-import pytest
-from hypothesis import example, given
-from hypothesis import strategies as st
-
-from smith.__main__ import main
-
-
-@given(verbosity=st.sampled_from(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]))
-@example(verbosity="INFO")
-def test_app_main_runs_with_valid_verbosity(verbosity: str) -> None:
-    """
-    Given: A valid verbosity level string
-    When: main() is called with that verbosity
-    Then: It completes without raising an exception
-    """
-    main(verbosity)
diff --git a/tests/unit/application/__init__.py b/tests/unit/application/__init__.py
new file mode 100644
index 0000000..8192226
--- /dev/null
+++ b/tests/unit/application/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for the application layer."""
diff --git a/tests/unit/delivery/__init__.py b/tests/unit/delivery/__init__.py
new file mode 100644
index 0000000..1e50268
--- /dev/null
+++ b/tests/unit/delivery/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for the delivery layer."""
diff --git a/tests/unit/domain/__init__.py b/tests/unit/domain/__init__.py
new file mode 100644
index 0000000..5d10c44
--- /dev/null
+++ b/tests/unit/domain/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for the domain layer."""
diff --git a/tests/unit/domain/test_connection.py b/tests/unit/domain/test_connection.py
new file mode 100644
index 0000000..1e4c494
--- /dev/null
+++ b/tests/unit/domain/test_connection.py
@@ -0,0 +1,26 @@
+from smith.domain.connection import Connection
+from tests.features.smith_commands.conftest import (
+    InMemoryFileSystem,
+    InMemoryGitignore,
+    StubTemplateSource,
+)
+
+
+def test_disconnect_with_empty_patterns_returns_empty_list():
+    gitignore = InMemoryGitignore()
+    gitignore.add_section([])
+    fs = InMemoryFileSystem()
+    metadata = gitignore
+    default_template = StubTemplateSource()
+
+    connection = Connection(
+        template_source_port=default_template,
+        filesystem_port=fs,
+        gitignore_port=gitignore,
+        metadata_port=metadata,
+    )
+
+    removed = connection.disconnect()
+
+    assert removed == []
+    assert gitignore.has_section()
diff --git a/tests/unit/infrastructure/__init__.py b/tests/unit/infrastructure/__init__.py
new file mode 100644
index 0000000..41c184b
--- /dev/null
+++ b/tests/unit/infrastructure/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for the infrastructure layer."""
diff --git a/tests/unit/infrastructure/filesystem_test.py b/tests/unit/infrastructure/filesystem_test.py
new file mode 100644
index 0000000..3cf555e
--- /dev/null
+++ b/tests/unit/infrastructure/filesystem_test.py
@@ -0,0 +1,84 @@
+import contextlib
+from pathlib import Path
+
+from smith.domain.value_objects import FileSpec
+from smith.infrastructure.filesystem import AtomicFileSystem, FileSystemError
+
+
+class TestAtomicFileSystem:
+    def test_write_atomic_creates_files(self, tmp_path: Path) -> None:
+        fs = AtomicFileSystem(tmp_path)
+        specs = [
+            FileSpec(relative_path=Path("AGENTS.md"), content=b"# Agents\n"),
+            FileSpec(
+                relative_path=Path(".opencode/config.yaml"),
+                content=b"key: value\n",
+            ),
+        ]
+        fs.write_atomic(specs)
+        assert (tmp_path / "AGENTS.md").read_bytes() == b"# Agents\n"
+        assert (tmp_path / ".opencode/config.yaml").read_bytes() == b"key: value\n"
+
+    def test_write_atomic_creates_parent_directories(self, tmp_path: Path) -> None:
+        fs = AtomicFileSystem(tmp_path)
+        specs = [FileSpec(relative_path=Path("a/b/c.txt"), content=b"deep\n")]
+        fs.write_atomic(specs)
+        assert (tmp_path / "a/b/c.txt").read_bytes() == b"deep\n"
+
+    def test_write_atomic_replaces_existing_files(self, tmp_path: Path) -> None:
+        (tmp_path / "AGENTS.md").write_bytes(b"old content\n")
+        fs = AtomicFileSystem(tmp_path)
+        specs = [FileSpec(relative_path=Path("AGENTS.md"), content=b"new content\n")]
+        fs.write_atomic(specs)
+        assert (tmp_path / "AGENTS.md").read_bytes() == b"new content\n"
+
+    def test_write_atomic_is_atomic_on_failure(self, tmp_path: Path) -> None:
+        fs = AtomicFileSystem(tmp_path)
+        existing = tmp_path / "existing.txt"
+        existing.write_bytes(b"keep me\n")
+        specs = [
+            FileSpec(relative_path=Path("good.txt"), content=b"good\n"),
+            FileSpec(relative_path=Path("/dev/null/impossible.txt"), content=b"bad\n"),
+        ]
+        with contextlib.suppress(FileSystemError):
+            fs.write_atomic(specs)
+        assert existing.read_bytes() == b"keep me\n"
+        assert not (tmp_path / "good.txt").exists()
+
+    def test_check_conflicts_returns_conflicting_paths(self, tmp_path: Path) -> None:
+        (tmp_path / "AGENTS.md").write_bytes(b"existing\n")
+        fs = AtomicFileSystem(tmp_path)
+        conflicts = fs.check_conflicts([Path("AGENTS.md"), Path("new.txt")])
+        assert Path("AGENTS.md") in conflicts
+        assert Path("new.txt") not in conflicts
+
+    def test_check_conflicts_returns_empty_when_no_conflicts(
+        self, tmp_path: Path
+    ) -> None:
+        fs = AtomicFileSystem(tmp_path)
+        conflicts = fs.check_conflicts([Path("AGENTS.md"), Path("new.txt")])
+        assert conflicts == []
+
+    def test_exists_returns_dict(self, tmp_path: Path) -> None:
+        (tmp_path / "AGENTS.md").write_bytes(b"exists\n")
+        fs = AtomicFileSystem(tmp_path)
+        result = fs.exists([Path("AGENTS.md"), Path("missing.txt")])
+        assert result[Path("AGENTS.md")] is True
+        assert result[Path("missing.txt")] is False
+
+    def test_remove_deletes_files(self, tmp_path: Path) -> None:
+        (tmp_path / "a.txt").write_bytes(b"a\n")
+        (tmp_path / "b.txt").write_bytes(b"b\n")
+        fs = AtomicFileSystem(tmp_path)
+        fs.remove([Path("a.txt")])
+        assert not (tmp_path / "a.txt").exists()
+        assert (tmp_path / "b.txt").exists()
+
+    def test_remove_nonexistent_does_not_raise(self, tmp_path: Path) -> None:
+        fs = AtomicFileSystem(tmp_path)
+        fs.remove([Path("nonexistent.txt")])
+
+    def test_write_atomic_with_empty_specs_does_nothing(self, tmp_path: Path) -> None:
+        fs = AtomicFileSystem(tmp_path)
+        fs.write_atomic([])
+        assert list(tmp_path.iterdir()) == []
diff --git a/tests/unit/infrastructure/gitignore_test.py b/tests/unit/infrastructure/gitignore_test.py
new file mode 100644
index 0000000..3bccce6
--- /dev/null
+++ b/tests/unit/infrastructure/gitignore_test.py
@@ -0,0 +1,54 @@
+from pathlib import Path
+
+from smith.infrastructure.gitignore import END_MARKER, START_MARKER, GitignoreManager
+
+
+class TestGitignoreManager:
+    def test_add_section_creates_gitignore(self, tmp_path: Path) -> None:
+        gi = GitignoreManager(tmp_path)
+        gi.add_section(["AGENTS.md", ".opencode/"])
+        content = (tmp_path / ".gitignore").read_text()
+        assert START_MARKER in content
+        assert END_MARKER in content
+        assert "AGENTS.md" in content
+        assert ".opencode/" in content
+
+    def test_has_section_returns_true_when_present(self, tmp_path: Path) -> None:
+        gi = GitignoreManager(tmp_path)
+        gi.add_section(["AGENTS.md"])
+        assert gi.has_section() is True
+
+    def test_has_section_returns_false_when_absent(self, tmp_path: Path) -> None:
+        gi = GitignoreManager(tmp_path)
+        assert gi.has_section() is False
+
+    def test_get_patterns_returns_patterns(self, tmp_path: Path) -> None:
+        gi = GitignoreManager(tmp_path)
+        gi.add_section(["AGENTS.md", ".opencode/", ".templates/", ".flowr/"])
+        patterns = gi.get_patterns()
+        assert "AGENTS.md" in patterns
+        assert ".opencode/" in patterns
+        assert ".templates/" in patterns
+        assert ".flowr/" in patterns
+
+    def test_get_patterns_returns_empty_when_no_section(self, tmp_path: Path) -> None:
+        gi = GitignoreManager(tmp_path)
+        assert gi.get_patterns() == []
+
+    def test_add_section_appends_to_existing_gitignore(self, tmp_path: Path) -> None:
+        (tmp_path / ".gitignore").write_text("node_modules/\n*.log\n")
+        gi = GitignoreManager(tmp_path)
+        gi.add_section(["AGENTS.md"])
+        content = (tmp_path / ".gitignore").read_text()
+        assert "node_modules/" in content
+        assert START_MARKER in content
+
+    def test_add_section_replaces_existing_section(self, tmp_path: Path) -> None:
+        gi = GitignoreManager(tmp_path)
+        gi.add_section(["AGENTS.md", ".opencode/"])
+        gi.add_section(["AGENTS.md", ".opencode/", ".templates/", ".flowr/"])
+        content = (tmp_path / ".gitignore").read_text()
+        assert content.count(START_MARKER) == 1
+        patterns = gi.get_patterns()
+        assert ".templates/" in patterns
+        assert ".flowr/" in patterns
diff --git a/tests/unit/infrastructure/metadata_test.py b/tests/unit/infrastructure/metadata_test.py
new file mode 100644
index 0000000..26ccc58
--- /dev/null
+++ b/tests/unit/infrastructure/metadata_test.py
@@ -0,0 +1,38 @@
+from pathlib import Path
+
+from smith.domain.value_objects import TemplateSource
+from smith.infrastructure.gitignore import GitignoreManager
+from smith.infrastructure.metadata import SectionMetadata
+
+
+class TestSectionMetadata:
+    def test_save_source_stores_in_gitignore_header(self, tmp_path: Path) -> None:
+        gi = GitignoreManager(tmp_path)
+        gi.add_section(["AGENTS.md", ".opencode/"])
+        meta = SectionMetadata(tmp_path)
+        meta.save_source(TemplateSource(kind="bundled", location="agents-smith"))
+        content = (tmp_path / ".gitignore").read_text()
+        assert "source:bundled:agents-smith" in content
+
+    def test_load_source_returns_template_source(self, tmp_path: Path) -> None:
+        gi = GitignoreManager(tmp_path)
+        gi.add_section(["AGENTS.md", ".opencode/"])
+        meta = SectionMetadata(tmp_path)
+        meta.save_source(TemplateSource(kind="bundled", location="agents-smith"))
+        source = meta.load_source()
+        assert source is not None
+        assert source.kind == "bundled"
+        assert source.location == "agents-smith"
+
+    def test_load_source_returns_none_when_no_section(self, tmp_path: Path) -> None:
+        meta = SectionMetadata(tmp_path)
+        assert meta.load_source() is None
+
+    def test_save_source_preserves_patterns(self, tmp_path: Path) -> None:
+        gi = GitignoreManager(tmp_path)
+        gi.add_section(["AGENTS.md", ".opencode/", ".templates/", ".flowr/"])
+        meta = SectionMetadata(tmp_path)
+        meta.save_source(TemplateSource(kind="local", location="/path/to/tmpl"))
+        patterns = gi.get_patterns()
+        assert "AGENTS.md" in patterns
+        assert ".opencode/" in patterns
diff --git a/tests/unit/infrastructure/template_source_test.py b/tests/unit/infrastructure/template_source_test.py
new file mode 100644
index 0000000..735dd0d
--- /dev/null
+++ b/tests/unit/infrastructure/template_source_test.py
@@ -0,0 +1,246 @@
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from smith.domain.ports import TemplateSourceError
+from smith.domain.value_objects import TemplateSource
+from smith.infrastructure.template_source import (
+    BundledTemplateSource,
+    LocalTemplateSource,
+    TemplateSourceAdapter,
+    UrlTemplateSource,
+    _collect_specs_from_directory,
+    _is_agentic_path,
+)
+
+
+class TestBundledTemplateSource:
+    def test_resolve_returns_agentic_files(self) -> None:
+        source = BundledTemplateSource()
+        specs = source.resolve()
+        paths = [s.relative_path for s in specs]
+        assert any(str(p) == "AGENTS.md" for p in paths)
+        assert any(str(p).startswith(".opencode/") for p in paths)
+
+    def test_resolve_filters_non_agentic_files(self) -> None:
+        source = BundledTemplateSource()
+        specs = source.resolve()
+        for spec in specs:
+            assert _is_agentic_path(spec.relative_path), (
+                f"Non-agentic file in bundle: {spec.relative_path}"
+            )
+
+    def test_resolve_raises_when_data_missing(self) -> None:
+        source = BundledTemplateSource()
+        with (
+            patch(
+                "smith.infrastructure.template_source.importlib.resources.files",
+                side_effect=ModuleNotFoundError("smith.data"),
+            ),
+            pytest.raises(TemplateSourceError, match="Failed to locate"),
+        ):
+            source.resolve()
+
+    def test_gitignore_patterns_returns_list(self) -> None:
+        source = BundledTemplateSource()
+        patterns = source.gitignore_patterns()
+        assert isinstance(patterns, list)
+        assert "AGENTS.md" in patterns
+        assert ".opencode/" in patterns
+        assert ".templates/" in patterns
+        assert ".flowr/" in patterns
+
+
+class TestLocalTemplateSource:
+    def test_resolve_from_directory(self, tmp_path: Path) -> None:
+        (tmp_path / "AGENTS.md").write_bytes(b"# Local agents\n")
+        (tmp_path / ".opencode").mkdir()
+        (tmp_path / ".opencode" / "config.yaml").write_bytes(b"key: local\n")
+        source = LocalTemplateSource(tmp_path)
+        specs = source.resolve()
+        paths = [s.relative_path for s in specs]
+        assert Path("AGENTS.md") in paths
+
+    def test_gitignore_patterns_from_directory(self, tmp_path: Path) -> None:
+        (tmp_path / "AGENTS.md").write_bytes(b"# Local\n")
+        source = LocalTemplateSource(tmp_path)
+        patterns = source.gitignore_patterns()
+        assert isinstance(patterns, list)
+        assert "AGENTS.md" in patterns
+
+    def test_resolve_nonexistent_directory_raises(self, tmp_path: Path) -> None:
+        source = LocalTemplateSource(tmp_path / "nonexistent")
+        with pytest.raises(TemplateSourceError):
+            source.resolve()
+
+
+class TestUrlTemplateSource:
+    def test_resolve_downloads_tar_gz(self, tmp_path: Path) -> None:
+        import io
+        import tarfile
+
+        src_dir = tmp_path / "src"
+        src_dir.mkdir()
+        (src_dir / "AGENTS.md").write_bytes(b"# URL agents\n")
+        (src_dir / ".opencode").mkdir()
+        (src_dir / ".opencode" / "agents").mkdir()
+        (src_dir / ".opencode" / "agents" / "po.md").write_bytes(b"# PO\n")
+        (src_dir / "README.md").write_bytes(b"# Not agentic\n")
+
+        tar_bytes = io.BytesIO()
+        with tarfile.open(fileobj=tar_bytes, mode="w:gz") as tar:
+            tar.add(str(src_dir / "AGENTS.md"), arcname="agents-smith/AGENTS.md")
+            tar.add(
+                str(src_dir / ".opencode"),
+                arcname="agents-smith/.opencode",
+            )
+            tar.add(
+                str(src_dir / ".opencode" / "agents"),
+                arcname="agents-smith/.opencode/agents",
+            )
+            tar.add(
+                str(src_dir / ".opencode" / "agents" / "po.md"),
+                arcname="agents-smith/.opencode/agents/po.md",
+            )
+            tar.add(str(src_dir / "README.md"), arcname="agents-smith/README.md")
+        tar_bytes.seek(0)
+
+        mock_response = MagicMock()
+        mock_response.content = tar_bytes.read()
+        mock_response.raise_for_status = MagicMock()
+
+        source = UrlTemplateSource("https://example.com/templates.tar.gz")
+        with patch("smith.infrastructure.template_source.requests") as mock_req:
+            mock_req.get.return_value = mock_response
+            mock_req.RequestException = Exception
+            specs = source.resolve()
+
+        paths = [s.relative_path for s in specs]
+        assert Path("AGENTS.md") in paths
+        assert any(str(p).startswith(".opencode/") for p in paths)
+        assert not any(str(p) == "README.md" for p in paths)
+
+    def test_resolve_raises_on_download_failure(self) -> None:
+        source = UrlTemplateSource("https://example.com/notfound.tar.gz")
+        with patch("smith.infrastructure.template_source.requests") as mock_req:
+            mock_req.get.side_effect = ConnectionError("network unreachable")
+            mock_req.RequestException = ConnectionError
+            with pytest.raises(TemplateSourceError, match="Failed to download"):
+                source.resolve()
+
+    def test_resolve_raises_on_empty_archive(self, tmp_path: Path) -> None:
+        import io
+        import tarfile
+
+        tar_bytes = io.BytesIO()
+        with tarfile.open(fileobj=tar_bytes, mode="w:gz"):
+            pass
+        tar_bytes.seek(0)
+
+        mock_response = MagicMock()
+        mock_response.content = tar_bytes.read()
+        mock_response.raise_for_status = MagicMock()
+
+        source = UrlTemplateSource("https://example.com/empty.tar.gz")
+        with patch("smith.infrastructure.template_source.requests") as mock_req:
+            mock_req.get.return_value = mock_response
+            mock_req.RequestException = Exception
+            with pytest.raises(TemplateSourceError):
+                source.resolve()
+
+    def test_gitignore_patterns_returns_list(self) -> None:
+        source = UrlTemplateSource("https://example.com/templates.tar.gz")
+        patterns = source.gitignore_patterns()
+        assert patterns == ["AGENTS.md", ".opencode/", ".templates/", ".flowr/"]
+
+
+class TestTemplateSourceAdapter:
+    def test_adapter_delegates_to_bundled(self) -> None:
+        adapter = TemplateSourceAdapter(
+            TemplateSource(kind="bundled", location="agents-smith"),
+        )
+        specs = adapter.resolve()
+        paths = [s.relative_path for s in specs]
+        assert any(str(p) == "AGENTS.md" for p in paths)
+
+    def test_adapter_delegates_gitignore_patterns(self) -> None:
+        adapter = TemplateSourceAdapter(
+            TemplateSource(kind="bundled", location="agents-smith"),
+        )
+        patterns = adapter.gitignore_patterns()
+        assert "AGENTS.md" in patterns
+
+    def test_adapter_delegates_to_local(self, tmp_path: Path) -> None:
+        (tmp_path / "AGENTS.md").write_bytes(b"# Local\n")
+        adapter = TemplateSourceAdapter(
+            TemplateSource(kind="local", location=str(tmp_path)),
+        )
+        specs = adapter.resolve()
+        assert len(specs) >= 1
+
+    def test_adapter_delegates_to_url(self) -> None:
+        with patch("smith.infrastructure.template_source.requests") as mock_req:
+            mock_req.get.side_effect = ConnectionError("fail")
+            mock_req.RequestException = ConnectionError
+            adapter = TemplateSourceAdapter(
+                TemplateSource(
+                    kind="url",
+                    location="https://example.com/templates.tar.gz",
+                ),
+            )
+            with pytest.raises(TemplateSourceError, match="Failed to download"):
+                adapter.resolve()
+
+    def test_adapter_raises_on_unknown_kind(self) -> None:
+        adapter = TemplateSourceAdapter(
+            TemplateSource(kind="unknown", location="test"),  # type: ignore[arg-type]
+        )
+        with pytest.raises(TemplateSourceError, match="Unknown template source kind"):
+            adapter.resolve()
+
+
+class TestCollectSpecsFromDirectory:
+    def test_collects_all_files(self, tmp_path: Path) -> None:
+        (tmp_path / "AGENTS.md").write_bytes(b"# Agents\n")
+        (tmp_path / ".opencode").mkdir()
+        (tmp_path / ".opencode" / "config.yaml").write_bytes(b"key: val\n")
+        specs = _collect_specs_from_directory(tmp_path)
+        paths = [s.relative_path for s in specs]
+        assert Path("AGENTS.md") in paths
+        assert Path(".opencode/config.yaml") in paths
+
+    def test_empty_directory_raises(self, tmp_path: Path) -> None:
+        empty_dir = tmp_path / "empty"
+        empty_dir.mkdir()
+        specs = _collect_specs_from_directory(empty_dir)
+        assert specs == []
+
+
+class TestIsAgenticPath:
+    def test_agents_md(self) -> None:
+        assert _is_agentic_path(Path("AGENTS.md")) is True
+
+    def test_opencode_subdir(self) -> None:
+        assert _is_agentic_path(Path(".opencode/agents/po.md")) is True
+
+    def test_templates_subdir(self) -> None:
+        assert _is_agentic_path(Path(".templates/docs/ADR.template")) is True
+
+    def test_flowr_subdir(self) -> None:
+        assert _is_agentic_path(Path(".flowr/flows/main.yaml")) is True
+
+    def test_non_agentic(self) -> None:
+        assert _is_agentic_path(Path("README.md")) is False
+        assert _is_agentic_path(Path("app/main.py")) is False
+        assert _is_agentic_path(Path("pyproject.toml")) is False
+
+    def test_opencode_node_modules_excluded(self) -> None:
+        assert (
+            _is_agentic_path(Path(".opencode/node_modules/effect/package.json"))
+            is False
+        )
+        assert _is_agentic_path(Path(".opencode/package.json")) is False
+
+    def test_opencode_tools(self) -> None:
+        assert _is_agentic_path(Path(".opencode/tools/README.md")) is True
diff --git a/tests/unit/main_test.py b/tests/unit/main_test.py
new file mode 100644
index 0000000..da90d29
--- /dev/null
+++ b/tests/unit/main_test.py
@@ -0,0 +1,42 @@
+"""Unit tests for smith.__main__ — in-process coverage."""
+
+import argparse
+import sys
+
+import pytest
+
+from smith.delivery.cli import build_parser, main
+
+
+def test_build_parser_returns_argument_parser() -> None:
+    """build_parser returns a configured ArgumentParser instance."""
+    parser = build_parser()
+    assert isinstance(parser, argparse.ArgumentParser)
+
+
+def test_build_parser_description_is_set() -> None:
+    """build_parser sets a description for the CLI."""
+    parser = build_parser()
+    assert parser.description is not None
+
+
+def test_main_exits_0_with_no_args(monkeypatch: pytest.MonkeyPatch) -> None:
+    """main() with no argv exits cleanly (code 0)."""
+    monkeypatch.setattr(sys, "argv", ["smith"])
+    main()
+
+
+def test_main_exits_0_with_help(monkeypatch: pytest.MonkeyPatch) -> None:
+    """main() with --help exits with SystemExit(0)."""
+    monkeypatch.setattr(sys, "argv", ["smith", "--help"])
+    with pytest.raises(SystemExit) as exc_info:
+        main()
+    assert exc_info.value.code == 0
+
+
+def test_main_exits_0_with_version(monkeypatch: pytest.MonkeyPatch) -> None:
+    """main() with --version exits with SystemExit(0)."""
+    monkeypatch.setattr(sys, "argv", ["smith", "--version"])
+    with pytest.raises(SystemExit) as exc_info:
+        main()
+    assert exc_info.value.code == 0
diff --git a/uv.lock b/uv.lock
index 0e7bda5..16217d4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,6 +2,48 @@ version = 1
 revision = 3
 requires-python = ">=3.13"
 
+[[package]]
+name = "agents-smith"
+version = "0.1.0"
+source = { virtual = "." }
+dependencies = [
+    { name = "requests" },
+]
+
+[package.optional-dependencies]
+dev = [
+    { name = "flowr" },
+    { name = "gherkin-official" },
+    { name = "ghp-import" },
+    { name = "hypothesis" },
+    { name = "pdoc" },
+    { name = "pyright" },
+    { name = "pytest" },
+    { name = "pytest-cov" },
+    { name = "pytest-mock" },
+    { name = "ruff" },
+    { name = "safety" },
+    { name = "taskipy" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "flowr", marker = "extra == 'dev'", specifier = ">=0.3" },
+    { name = "gherkin-official", marker = "extra == 'dev'", specifier = ">=39.0.0" },
+    { name = "ghp-import", marker = "extra == 'dev'", specifier = ">=2.1.0" },
+    { name = "hypothesis", marker = "extra == 'dev'", specifier = ">=6.148.4" },
+    { name = "pdoc", marker = "extra == 'dev'", specifier = ">=14.0" },
+    { name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.407" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.3" },
+    { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=6.1.1" },
+    { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.14.0" },
+    { name = "requests", specifier = ">=2.32" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.11.5" },
+    { name = "safety", marker = "extra == 'dev'", specifier = ">=3.7.0" },
+    { name = "taskipy", marker = "extra == 'dev'", specifier = ">=1.14.1" },
+]
+provides-extras = ["dev"]
+
 [[package]]
 name = "annotated-doc"
 version = "0.0.4"
@@ -321,15 +363,15 @@ wheels = [
 ]
 
 [[package]]
-name = "fire"
-version = "0.7.1"
+name = "flowr"
+version = "0.3.20260427"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "termcolor" },
+    { name = "pyyaml" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c0/00/f8d10588d2019d6d6452653def1ee807353b21983db48550318424b5ff18/fire-0.7.1.tar.gz", hash = "sha256:3b208f05c736de98fb343310d090dcc4d8c78b2a89ea4f32b837c586270a9cbf", size = 88720, upload-time = "2025-08-16T20:20:24.175Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/38/e1/6e1050ef5b7eaeb390d7c1500c6930b91e5a389ef5be357e0f56002f8ca4/flowr-0.3.20260427.tar.gz", hash = "sha256:5f98acfa1ff9494a2a6af6a84f7863c0b1285eb35d6471f4fc5aba2537ccdf39", size = 13605, upload-time = "2026-04-27T06:49:06.434Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e5/4c/93d0f85318da65923e4b91c1c2ff03d8a458cbefebe3bc612a6693c7906d/fire-0.7.1-py3-none-any.whl", hash = "sha256:e43fd8a5033a9001e7e2973bab96070694b9f12f2e0ecf96d4683971b5ab1882", size = 115945, upload-time = "2025-08-16T20:20:22.87Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/6e/1912d7364694209829838a62d44d4ab02517ef11d8e1db39e41b37938dd9/flowr-0.3.20260427-py3-none-any.whl", hash = "sha256:a88bbf230e3492fc314108cb967dbd490313e8f4805150e83c017914378410e2", size = 13786, upload-time = "2026-04-27T06:49:05.194Z" },
 ]
 
 [[package]]
@@ -746,24 +788,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" },
 ]
 
-[[package]]
-name = "pytest-beehave"
-version = "3.3.20260419"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "fire" },
-    { name = "gherkin-official" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/70/55/5cac9181f3f30a37e700e002a3d6bc51f6d9b62f3ab13a142f56bec6039f/pytest_beehave-3.3.20260419.tar.gz", hash = "sha256:86f199b213b77cec7082e0d7f96b11ba61b92fc3dcecc2f2753a07f407ad1cbe", size = 35117, upload-time = "2026-04-20T01:01:02.031Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f6/76/2766d4dc64137ad05128728f84bd7f2c8ef7a433d30cada289dc6c1bcd0b/pytest_beehave-3.3.20260419-py3-none-any.whl", hash = "sha256:e0048a15af8ccd9e50c0841f43ac1e90bc9d2f5f59b22048d4b79e01016c5f0a", size = 35535, upload-time = "2026-04-20T01:00:59.897Z" },
-]
-
-[package.optional-dependencies]
-html = [
-    { name = "pytest-html" },
-]
-
 [[package]]
 name = "pytest-cov"
 version = "7.1.0"
@@ -778,32 +802,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" },
 ]
 
-[[package]]
-name = "pytest-html"
-version = "4.2.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "jinja2" },
-    { name = "pytest" },
-    { name = "pytest-metadata" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/c4/08/2076aa09507e51c1119d16a84c6307354d16270558f1a44fc9a2c99fdf1d/pytest_html-4.2.0.tar.gz", hash = "sha256:b6a88cba507500d8709959201e2e757d3941e859fd17cfd4ed87b16fc0c67912", size = 108634, upload-time = "2026-01-19T11:25:26.471Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/84/47/07046e0acedc12fe2bae79cf6c73ad67f51ae9d67df64d06b0f3eac73d36/pytest_html-4.2.0-py3-none-any.whl", hash = "sha256:ff5caf3e17a974008e5816edda61168e6c3da442b078a44f8744865862a85636", size = 23801, upload-time = "2026-01-19T11:25:25.008Z" },
-]
-
-[[package]]
-name = "pytest-metadata"
-version = "3.1.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pytest" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a6/85/8c969f8bec4e559f8f2b958a15229a35495f5b4ce499f6b865eac54b878d/pytest_metadata-3.1.1.tar.gz", hash = "sha256:d2a29b0355fbc03f168aa96d41ff88b1a3b44a3b02acbe491801c98a048017c8", size = 9952, upload-time = "2024-02-12T19:38:44.887Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl", hash = "sha256:c8e0844db684ee1c798cfa38908d20d67d0463ecb6137c72e91f418558dd5f4b", size = 11428, upload-time = "2024-02-12T19:38:42.531Z" },
-]
-
 [[package]]
 name = "pytest-mock"
 version = "3.15.1"
@@ -828,6 +826,42 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
 ]
 
+[[package]]
+name = "pyyaml"
+version = "6.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" },
+    { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" },
+    { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" },
+    { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" },
+    { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" },
+    { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" },
+    { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" },
+    { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" },
+    { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" },
+    { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" },
+    { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" },
+    { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
+]
+
 [[package]]
 name = "regex"
 version = "2026.4.4"
@@ -1024,56 +1058,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
 ]
 
-[[package]]
-name = "smith"
-version = "0.1.20260420"
-source = { virtual = "." }
-dependencies = [
-    { name = "fire" },
-]
-
-[package.optional-dependencies]
-dev = [
-    { name = "ghp-import" },
-    { name = "hypothesis" },
-    { name = "pdoc" },
-    { name = "pyright" },
-    { name = "pytest" },
-    { name = "pytest-beehave", extra = ["html"] },
-    { name = "pytest-cov" },
-    { name = "pytest-mock" },
-    { name = "ruff" },
-    { name = "taskipy" },
-]
-
-[package.dev-dependencies]
-dev = [
-    { name = "gherkin-official" },
-    { name = "safety" },
-]
-
-[package.metadata]
-requires-dist = [
-    { name = "fire", specifier = ">=0.7.1" },
-    { name = "ghp-import", marker = "extra == 'dev'", specifier = ">=2.1.0" },
-    { name = "hypothesis", marker = "extra == 'dev'", specifier = ">=6.148.4" },
-    { name = "pdoc", marker = "extra == 'dev'", specifier = ">=14.0" },
-    { name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.407" },
-    { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.3" },
-    { name = "pytest-beehave", extras = ["html"], marker = "extra == 'dev'", specifier = ">=3.3,<4" },
-    { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=6.1.1" },
-    { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.14.0" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.11.5" },
-    { name = "taskipy", marker = "extra == 'dev'", specifier = ">=1.14.1" },
-]
-provides-extras = ["dev"]
-
-[package.metadata.requires-dev]
-dev = [
-    { name = "gherkin-official", specifier = ">=39.0.0" },
-    { name = "safety", specifier = ">=3.7.0" },
-]
-
 [[package]]
 name = "sortedcontainers"
 version = "2.4.0"
@@ -1107,15 +1091,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d7/c1/eb8f9debc45d3b7918a32ab756658a0904732f75e555402972246b0b8e71/tenacity-9.1.4-py3-none-any.whl", hash = "sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55", size = 28926, upload-time = "2026-02-07T10:45:32.24Z" },
 ]
 
-[[package]]
-name = "termcolor"
-version = "3.3.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/46/79/cf31d7a93a8fdc6aa0fbb665be84426a8c5a557d9240b6239e9e11e35fc5/termcolor-3.3.0.tar.gz", hash = "sha256:348871ca648ec6a9a983a13ab626c0acce02f515b9e1983332b17af7979521c5", size = 14434, upload-time = "2025-12-29T12:55:21.882Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/33/d1/8bb87d21e9aeb323cc03034f5eaf2c8f69841e40e4853c2627edf8111ed3/termcolor-3.3.0-py3-none-any.whl", hash = "sha256:cf642efadaf0a8ebbbf4bc7a31cec2f9b5f21a9f726f4ccbb08192c9c26f43a5", size = 7734, upload-time = "2025-12-29T12:55:20.718Z" },
-]
-
 [[package]]
 name = "tomli"
 version = "2.4.1"