From fddc77dcb87611551019f8978863887990236ac0 Mon Sep 17 00:00:00 2001 From: nullhack Date: Fri, 1 May 2026 12:13:04 -0400 Subject: [PATCH] feat(smith): implement connect/disconnect/update/status CLI commands with local bundled templates --- .dockerignore | 212 ----- .github/workflows/ci.yml | 32 +- .github/workflows/dependency-review.yml | 6 +- .github/workflows/pypi-publish.yml | 61 +- .github/workflows/tag-release.yml | 33 +- .gitignore | 15 +- AGENTS.md | 256 ------- CHANGELOG.md | 367 +-------- README.md | 179 ++--- TODO.md | 4 - docs/{c4 => adr}/.gitkeep | 0 .../ADR_20260501_argparse-cli-framework.md | 55 ++ ...1_atomic-file-writes-via-temp-directory.md | 57 ++ ...0501_github-bundled-template-resolution.md | 64 ++ .../ADR_20260501_hexagonal-architecture.md | 51 ++ ...60501_local-bundled-template-resolution.md | 66 ++ docs/adr/ADR_20260501_no-smart-merge.md | 53 ++ docs/adr/ADR_20260501_smith-yaml-metadata.md | 55 ++ docs/architecture.md | 19 - docs/discovery.md | 39 - docs/discovery_journal.md | 67 -- docs/features/{backlog => }/.gitkeep | 0 .../features/backlog/smith-assimilate.feature | 117 --- docs/features/backlog/smith-commands.feature | 297 +++++++ docs/features/backlog/smith-new.feature | 110 --- .../completed/display-version.feature | 60 -- docs/index.html | 510 +++++++++++-- .../IN_20260422_scope-discovery.md | 65 ++ .../IN_20260501_local-bundle-reversal.md | 67 ++ ...N_20260501_smith-commands-specification.md | 158 ++++ .../IN_20260501_stakeholder-reinterview.md | 98 +++ ..._20260501_temple8-dependency-resolution.md | 72 ++ .../completed => post-mortem}/.gitkeep | 0 .../2026-04-14-ping-pong-cli-workflow-gaps.md | 176 ----- ...ping-pong-cli-package-and-design-review.md | 108 --- .../PM_20260501_conflict-exit-code-removal.md | 26 + ...260501_coverage-test-in-features-folder.md | 28 + ..._20260501_missing-feature-test-template.md | 39 + .../PM_20260501_missing-overwrite-flag.md | 54 ++ .../PM_20260501_moscow-gherkin-tags.md | 23 + .../PM_20260501_reviewer-fixing-code.md | 41 + .../PM_20260501_se-dirtying-living-docs.md | 27 + .../artificial-intelligence/liu_et_al_2023.md | 45 ++ .../design/accessibility/w3c_wcag21_2018.md | 48 ++ docs/research/design/visual/airey_2010.md | 48 ++ docs/research/design/visual/albers_1963.md | 48 ++ docs/research/design/visual/arnheim_1954.md | 48 ++ docs/research/design/visual/biederman_1987.md | 48 ++ docs/research/design/visual/hicks_2011.md | 47 ++ docs/research/design/visual/itten_1961.md | 48 ++ docs/research/design/visual/kare_1984.md | 47 ++ docs/research/design/visual/lupton_2010.md | 47 ++ .../design/visual/muller_brockmann_1981.md | 47 ++ docs/research/design/visual/rand_1985.md | 46 ++ .../research/design/visual/wertheimer_1923.md | 48 ++ .../documentation/procida_2021.md | 45 ++ .../domain-modeling/brandolini_2012.md | 45 ++ .../domain-modeling/evans_2003.md | 45 ++ .../domain-modeling/vernon_2013.md | 45 ++ .../cognitive/craik_lockhart_1972.md | 48 ++ .../cognitive/fisher_geiselman_1987.md | 47 ++ .../psychology/cognitive/flanagan_1954.md | 47 ++ .../psychology/cognitive/gollwitzer_1999.md | 48 ++ .../cognitive/hattie_timperley_2007.md | 48 ++ .../psychology/cognitive/kahneman_2011.md | 47 ++ .../psychology/cognitive/klein_1998.md | 47 ++ .../cognitive/mcdaniel_einstein_2000.md | 47 ++ .../psychology/cognitive/miller_1956.md | 48 ++ .../cognitive/reynolds_gutman_1988.md | 46 ++ .../cognitive/tversky_kahneman_1974.md | 46 ++ .../psychology/social/cialdini_2001.md | 47 ++ .../psychology/social/mellers_et_al_2001.md | 46 ++ .../psychology/social/rogers_farson_1957.md | 46 ++ .../psychology/social/tetlock_1985.md | 45 ++ .../architecture/bass_et_al_2021.md | 48 ++ .../architecture/boehm_1991.md | 47 ++ .../architecture/brown_2018.md | 48 ++ .../architecture/cockburn_2005.md | 47 ++ .../architecture/conway_1968.md | 47 ++ .../architecture/fielding_2000.md | 47 ++ .../architecture/fowler_2003.md | 47 ++ .../architecture/hohpe_woolf_2003.md | 47 ++ .../kazman_klein_clements_2000.md | 47 ++ .../architecture/kruchten_1995.md | 47 ++ .../architecture/martin_2012_clean.md | 48 ++ .../architecture/nygard_2011.md | 48 ++ .../architecture/parnas_1972.md | 47 ++ .../architecture/skelton_pais_2019.md | 48 ++ .../process/beck_1999_yagni.md | 45 ++ .../process/beyer_et_al_2016.md | 45 ++ .../process/calver_2020.md | 45 ++ .../process/clegg_barker_1994.md | 45 ++ .../process/fagan_1976.md | 45 ++ .../process/preston-werner_2013.md | 45 ++ .../process/reinertsen_2009.md | 45 ++ .../software-engineering/quality/bay_2008.md | 46 ++ .../software-engineering/quality/beck_2002.md | 48 ++ .../quality/demillo_lipton_sayward_1978.md | 45 ++ .../quality/feathers_2004.md | 46 ++ .../quality/fowler_1999.md | 46 ++ .../quality/freeman_pryce_2009.md | 45 ++ .../quality/gamma_et_al_1994.md | 46 ++ .../quality/google_testing_2013.md | 45 ++ .../quality/maciver_2016.md | 47 ++ .../quality/martin_2000_solid.md | 47 ++ .../quality/martin_2017_first_class_tests.md | 45 ++ .../quality/meszaros_2007.md | 46 ++ .../quality/north_2006.md | 45 ++ .../quality/shvets_2014.md | 46 ++ .../requirements/christel_kang_1992.md | 45 ++ .../requirements/kano_et_al_1984.md | 45 ++ .../requirements/wake_2003.md | 45 ++ .../requirements/wynne_2015.md | 45 ++ docs/scientific-research/README.md | 16 - docs/scientific-research/ai-agents.md | 118 --- docs/scientific-research/architecture.md | 86 --- docs/scientific-research/cognitive-science.md | 150 ---- docs/scientific-research/documentation.md | 69 -- docs/scientific-research/domain-modeling.md | 115 --- docs/scientific-research/oop-design.md | 64 -- .../refactoring-empirical.md | 100 --- .../requirements-elicitation.md | 246 ------ .../scientific-research/software-economics.md | 24 - docs/scientific-research/testing.md | 137 ---- docs/{features/in-progress => spec}/.gitkeep | 0 docs/spec/context_map.md | 84 ++ docs/spec/domain_model.md | 123 +++ docs/spec/glossary.md | 255 +++++++ docs/spec/product_definition.md | 156 ++++ docs/spec/system.md | 157 ++++ docs/spec/technical_design.md | 722 ++++++++++++++++++ docs/spec/workflow-design.md | 565 ++++++++++++++ pyproject.toml | 43 +- scripts/flowr-utils.sh | 121 +++ scripts/generate-flowviz-data.py | 248 ++++++ scripts/generate-svg.sh | 92 +++ scripts/update-bundle.sh | 65 ++ smith/__init__.py | 2 +- smith/__main__.py | 24 +- smith/application/__init__.py | 1 + smith/application/connect.py | 30 + smith/application/disconnect.py | 32 + smith/application/status.py | 32 + smith/application/update.py | 32 + smith/data/.flowr/.gitignore | 2 + .../data/.flowr/flows/architecture-flow.yaml | 145 ++++ smith/data/.flowr/flows/branding-flow.yaml | 61 ++ smith/data/.flowr/flows/delivery-flow.yaml | 80 ++ smith/data/.flowr/flows/development-flow.yaml | 72 ++ smith/data/.flowr/flows/discovery-flow.yaml | 101 +++ .../flows/feature-development-flow.yaml | 40 + smith/data/.flowr/flows/main-flow.yaml | 31 + smith/data/.flowr/flows/planning-flow.yaml | 156 ++++ smith/data/.flowr/flows/post-mortem-flow.yaml | 66 ++ smith/data/.flowr/flows/review-gate-flow.yaml | 63 ++ .../data/.flowr/flows/setup-project-flow.yaml | 89 +++ smith/data/.flowr/flows/tdd-cycle-flow.yaml | 51 ++ smith/data/.flowr/sessions/current.yaml | 10 + smith/data/.opencode/agents/design-agent.md | 10 + smith/data/.opencode/agents/domain-expert.md | 10 + smith/data/.opencode/agents/product-owner.md | 10 + smith/data/.opencode/agents/reviewer.md | 10 + smith/data/.opencode/agents/setup-agent.md | 10 + .../.opencode/agents/software-engineer.md | 10 + .../data/.opencode/agents/system-architect.md | 10 + .../knowledge/agent-design/principles.md | 100 +++ .../.opencode/knowledge/architecture/adr.md | 79 ++ .../knowledge/architecture/assessment.md | 69 ++ .../knowledge/architecture/contract-design.md | 72 ++ .../architecture/quality-attributes.md | 60 ++ .../knowledge/architecture/reconciliation.md | 57 ++ .../architecture/technical-design.md | 61 ++ .../knowledge/design/color-systems.md | 126 +++ .../knowledge/design/identity-design.md | 36 + .../knowledge/design/project-assets.md | 76 ++ .../knowledge/design/visual-harmony.md | 69 ++ .../domain-modeling/context-mapping.md | 64 ++ .../domain-modeling/event-storming.md | 58 ++ .../knowledge/knowledge-design/principles.md | 123 +++ .../knowledge/requirements/decomposition.md | 31 + .../knowledge/requirements/gherkin.md | 106 +++ .../requirements/interview-techniques.md | 86 +++ .../knowledge/requirements/invest.md | 77 ++ .../knowledge/requirements/moscow.md | 55 ++ .../knowledge/requirements/post-mortem.md | 58 ++ .../knowledge/requirements/pre-mortem.md | 67 ++ .../requirements/ubiquitous-language.md | 61 ++ .../.opencode/knowledge/requirements/wsjf.md | 86 +++ .../knowledge/skill-design/principles.md | 107 +++ .../knowledge/software-craft/code-review.md | 80 ++ .../software-craft/design-patterns.md | 106 +++ .../software-craft/git-conventions.md | 90 +++ .../software-craft/object-calisthenics.md | 50 ++ .../software-craft/refactoring-techniques.md | 137 ++++ .../knowledge/software-craft/refactoring.md | 73 ++ .../software-craft/smell-catalogue.md | 87 +++ .../knowledge/software-craft/solid.md | 57 ++ .../knowledge/software-craft/stub-design.md | 45 ++ .../.opencode/knowledge/software-craft/tdd.md | 101 +++ .../knowledge/software-craft/test-design.md | 58 ++ .../knowledge/software-craft/versioning.md | 34 + .../knowledge/workflow/flowr-spec.md | 144 ++++ .../.opencode/skills/accept-feature/SKILL.md | 15 + .../skills/analyze-root-cause/SKILL.md | 13 + .../skills/assess-architecture/SKILL.md | 16 + .../skills/break-down-feature/SKILL.md | 16 + .../skills/commit-implementation/SKILL.md | 14 + .../skills/conduct-interview/SKILL.md | 18 + .../skills/confirm-baseline/SKILL.md | 13 + .../data/.opencode/skills/create-pr/SKILL.md | 15 + .../.opencode/skills/create-py-stubs/SKILL.md | 15 + .../skills/decide-batch-action/SKILL.md | 10 + .../.opencode/skills/define-done/SKILL.md | 13 + .../skills/define-product-scope/SKILL.md | 14 + .../define-ubiquitous-language/SKILL.md | 14 + .../.opencode/skills/design-assets/SKILL.md | 32 + .../.opencode/skills/design-colors/SKILL.md | 21 + .../skills/design-technical-solution/SKILL.md | 23 + .../skills/determine-action-items/SKILL.md | 13 + .../skills/document-post-mortem/SKILL.md | 12 + .../data/.opencode/skills/draft-adr/SKILL.md | 15 + .../.opencode/skills/extract-lessons/SKILL.md | 13 + .../skills/facilitate-event-storming/SKILL.md | 16 + .../skills/implement-minimum/SKILL.md | 14 + .../.opencode/skills/map-contexts/SKILL.md | 18 + .../.opencode/skills/merge-local/SKILL.md | 20 + .../.opencode/skills/model-domain/SKILL.md | 16 + smith/data/.opencode/skills/refactor/SKILL.md | 27 + .../skills/review-architecture/SKILL.md | 17 + .../skills/review-conventions/SKILL.md | 16 + .../.opencode/skills/review-design/SKILL.md | 22 + .../skills/review-structure/SKILL.md | 17 + .../.opencode/skills/select-feature/SKILL.md | 17 + .../.opencode/skills/setup-apply/SKILL.md | 24 + .../.opencode/skills/setup-assess/SKILL.md | 31 + .../.opencode/skills/setup-branding/SKILL.md | 19 + .../.opencode/skills/setup-configure/SKILL.md | 42 + .../.opencode/skills/setup-verify/SKILL.md | 34 + .../.opencode/skills/specify-feature/SKILL.md | 19 + .../skills/structure-project/SKILL.md | 13 + .../skills/write-bdd-features/SKILL.md | 18 + .../data/.opencode/skills/write-test/SKILL.md | 15 + smith/data/.templates/CHANGELOG.md.template | 9 + .../docs/adr/ADR_YYYYMMDD_.md.template | 41 + .../docs/assets/banner.svg.template | 4 + .../.templates/docs/assets/logo.svg.template | 4 + .../docs/branding/branding.md.template | 52 ++ .../.templates/docs/context_map.md.template | 47 ++ .../.templates/docs/domain_model.md.template | 96 +++ .../docs/features/feature.feature.template | 50 ++ .../data/.templates/docs/glossary.md.template | 36 + .../IN_YYYYMMDD_.md.template | 59 ++ .../PM_YYYYMMDD_.md.template | 21 + .../docs/product_definition.md.template | 131 ++++ .../docs/research/TEMPLATE.md.template | 49 ++ smith/data/.templates/docs/system.md.template | 114 +++ .../docs/technical_design.md.template | 139 ++++ .../features/_test.py.template | 11 + smith/data/AGENTS.md | 136 ++++ smith/data/__init__.py | 1 + smith/delivery/__init__.py | 1 + smith/delivery/cli.py | 170 +++++ smith/domain/__init__.py | 1 + smith/domain/connection.py | 153 ++++ smith/domain/ports.py | 73 ++ smith/domain/value_objects.py | 62 ++ smith/infrastructure/__init__.py | 1 + smith/infrastructure/filesystem.py | 54 ++ smith/infrastructure/gitignore.py | 94 +++ smith/infrastructure/metadata.py | 40 + smith/infrastructure/template_source.py | 215 ++++++ template-config.yaml | 113 +++ tests/conftest.py | 25 +- .../cli_entrypoint/help_output_test.py | 38 + .../unrecognised_arguments_test.py | 32 + .../cli_entrypoint/version_output_test.py | 39 + tests/features/smith_commands/__init__.py | 5 + tests/features/smith_commands/conftest.py | 134 ++++ .../connect_fresh_project_test.py | 417 ++++++++++ .../smith_commands/disconnect_test.py | 153 ++++ .../smith_commands/skip_user_tracked_test.py | 220 ++++++ tests/features/smith_commands/status_test.py | 177 +++++ tests/features/smith_commands/update_test.py | 161 ++++ tests/unit/app_test.py | 18 - tests/unit/application/__init__.py | 1 + tests/unit/delivery/__init__.py | 1 + tests/unit/domain/__init__.py | 1 + tests/unit/domain/test_connection.py | 26 + tests/unit/infrastructure/__init__.py | 1 + tests/unit/infrastructure/filesystem_test.py | 84 ++ tests/unit/infrastructure/gitignore_test.py | 54 ++ tests/unit/infrastructure/metadata_test.py | 38 + .../infrastructure/template_source_test.py | 246 ++++++ tests/unit/main_test.py | 42 + uv.lock | 191 ++--- 295 files changed, 16606 insertions(+), 3063 deletions(-) delete mode 100644 .dockerignore delete mode 100644 AGENTS.md delete mode 100644 TODO.md rename docs/{c4 => adr}/.gitkeep (100%) create mode 100644 docs/adr/ADR_20260501_argparse-cli-framework.md create mode 100644 docs/adr/ADR_20260501_atomic-file-writes-via-temp-directory.md create mode 100644 docs/adr/ADR_20260501_github-bundled-template-resolution.md create mode 100644 docs/adr/ADR_20260501_hexagonal-architecture.md create mode 100644 docs/adr/ADR_20260501_local-bundled-template-resolution.md create mode 100644 docs/adr/ADR_20260501_no-smart-merge.md create mode 100644 docs/adr/ADR_20260501_smith-yaml-metadata.md delete mode 100644 docs/architecture.md delete mode 100644 docs/discovery.md delete mode 100644 docs/discovery_journal.md rename docs/features/{backlog => }/.gitkeep (100%) delete mode 100644 docs/features/backlog/smith-assimilate.feature create mode 100644 docs/features/backlog/smith-commands.feature delete mode 100644 docs/features/backlog/smith-new.feature delete mode 100644 docs/features/completed/display-version.feature create mode 100644 docs/interview-notes/IN_20260422_scope-discovery.md create mode 100644 docs/interview-notes/IN_20260501_local-bundle-reversal.md create mode 100644 docs/interview-notes/IN_20260501_smith-commands-specification.md create mode 100644 docs/interview-notes/IN_20260501_stakeholder-reinterview.md create mode 100644 docs/interview-notes/IN_20260501_temple8-dependency-resolution.md rename docs/{features/completed => post-mortem}/.gitkeep (100%) delete mode 100644 docs/post-mortem/2026-04-14-ping-pong-cli-workflow-gaps.md delete mode 100644 docs/post-mortem/2026-04-16-ping-pong-cli-package-and-design-review.md create mode 100644 docs/post-mortem/PM_20260501_conflict-exit-code-removal.md create mode 100644 docs/post-mortem/PM_20260501_coverage-test-in-features-folder.md create mode 100644 docs/post-mortem/PM_20260501_missing-feature-test-template.md create mode 100644 docs/post-mortem/PM_20260501_missing-overwrite-flag.md create mode 100644 docs/post-mortem/PM_20260501_moscow-gherkin-tags.md create mode 100644 docs/post-mortem/PM_20260501_reviewer-fixing-code.md create mode 100644 docs/post-mortem/PM_20260501_se-dirtying-living-docs.md create mode 100644 docs/research/computer-science/artificial-intelligence/liu_et_al_2023.md create mode 100644 docs/research/design/accessibility/w3c_wcag21_2018.md create mode 100644 docs/research/design/visual/airey_2010.md create mode 100644 docs/research/design/visual/albers_1963.md create mode 100644 docs/research/design/visual/arnheim_1954.md create mode 100644 docs/research/design/visual/biederman_1987.md create mode 100644 docs/research/design/visual/hicks_2011.md create mode 100644 docs/research/design/visual/itten_1961.md create mode 100644 docs/research/design/visual/kare_1984.md create mode 100644 docs/research/design/visual/lupton_2010.md create mode 100644 docs/research/design/visual/muller_brockmann_1981.md create mode 100644 docs/research/design/visual/rand_1985.md create mode 100644 docs/research/design/visual/wertheimer_1923.md create mode 100644 docs/research/information-science/documentation/procida_2021.md create mode 100644 docs/research/information-science/domain-modeling/brandolini_2012.md create mode 100644 docs/research/information-science/domain-modeling/evans_2003.md create mode 100644 docs/research/information-science/domain-modeling/vernon_2013.md create mode 100644 docs/research/psychology/cognitive/craik_lockhart_1972.md create mode 100644 docs/research/psychology/cognitive/fisher_geiselman_1987.md create mode 100644 docs/research/psychology/cognitive/flanagan_1954.md create mode 100644 docs/research/psychology/cognitive/gollwitzer_1999.md create mode 100644 docs/research/psychology/cognitive/hattie_timperley_2007.md create mode 100644 docs/research/psychology/cognitive/kahneman_2011.md create mode 100644 docs/research/psychology/cognitive/klein_1998.md create mode 100644 docs/research/psychology/cognitive/mcdaniel_einstein_2000.md create mode 100644 docs/research/psychology/cognitive/miller_1956.md create mode 100644 docs/research/psychology/cognitive/reynolds_gutman_1988.md create mode 100644 docs/research/psychology/cognitive/tversky_kahneman_1974.md create mode 100644 docs/research/psychology/social/cialdini_2001.md create mode 100644 docs/research/psychology/social/mellers_et_al_2001.md create mode 100644 docs/research/psychology/social/rogers_farson_1957.md create mode 100644 docs/research/psychology/social/tetlock_1985.md create mode 100644 docs/research/software-engineering/architecture/bass_et_al_2021.md create mode 100644 docs/research/software-engineering/architecture/boehm_1991.md create mode 100644 docs/research/software-engineering/architecture/brown_2018.md create mode 100644 docs/research/software-engineering/architecture/cockburn_2005.md create mode 100644 docs/research/software-engineering/architecture/conway_1968.md create mode 100644 docs/research/software-engineering/architecture/fielding_2000.md create mode 100644 docs/research/software-engineering/architecture/fowler_2003.md create mode 100644 docs/research/software-engineering/architecture/hohpe_woolf_2003.md create mode 100644 docs/research/software-engineering/architecture/kazman_klein_clements_2000.md create mode 100644 docs/research/software-engineering/architecture/kruchten_1995.md create mode 100644 docs/research/software-engineering/architecture/martin_2012_clean.md create mode 100644 docs/research/software-engineering/architecture/nygard_2011.md create mode 100644 docs/research/software-engineering/architecture/parnas_1972.md create mode 100644 docs/research/software-engineering/architecture/skelton_pais_2019.md create mode 100644 docs/research/software-engineering/process/beck_1999_yagni.md create mode 100644 docs/research/software-engineering/process/beyer_et_al_2016.md create mode 100644 docs/research/software-engineering/process/calver_2020.md create mode 100644 docs/research/software-engineering/process/clegg_barker_1994.md create mode 100644 docs/research/software-engineering/process/fagan_1976.md create mode 100644 docs/research/software-engineering/process/preston-werner_2013.md create mode 100644 docs/research/software-engineering/process/reinertsen_2009.md create mode 100644 docs/research/software-engineering/quality/bay_2008.md create mode 100644 docs/research/software-engineering/quality/beck_2002.md create mode 100644 docs/research/software-engineering/quality/demillo_lipton_sayward_1978.md create mode 100644 docs/research/software-engineering/quality/feathers_2004.md create mode 100644 docs/research/software-engineering/quality/fowler_1999.md create mode 100644 docs/research/software-engineering/quality/freeman_pryce_2009.md create mode 100644 docs/research/software-engineering/quality/gamma_et_al_1994.md create mode 100644 docs/research/software-engineering/quality/google_testing_2013.md create mode 100644 docs/research/software-engineering/quality/maciver_2016.md create mode 100644 docs/research/software-engineering/quality/martin_2000_solid.md create mode 100644 docs/research/software-engineering/quality/martin_2017_first_class_tests.md create mode 100644 docs/research/software-engineering/quality/meszaros_2007.md create mode 100644 docs/research/software-engineering/quality/north_2006.md create mode 100644 docs/research/software-engineering/quality/shvets_2014.md create mode 100644 docs/research/software-engineering/requirements/christel_kang_1992.md create mode 100644 docs/research/software-engineering/requirements/kano_et_al_1984.md create mode 100644 docs/research/software-engineering/requirements/wake_2003.md create mode 100644 docs/research/software-engineering/requirements/wynne_2015.md delete mode 100644 docs/scientific-research/README.md delete mode 100644 docs/scientific-research/ai-agents.md delete mode 100644 docs/scientific-research/architecture.md delete mode 100644 docs/scientific-research/cognitive-science.md delete mode 100644 docs/scientific-research/documentation.md delete mode 100644 docs/scientific-research/domain-modeling.md delete mode 100644 docs/scientific-research/oop-design.md delete mode 100644 docs/scientific-research/refactoring-empirical.md delete mode 100644 docs/scientific-research/requirements-elicitation.md delete mode 100644 docs/scientific-research/software-economics.md delete mode 100644 docs/scientific-research/testing.md rename docs/{features/in-progress => spec}/.gitkeep (100%) create mode 100644 docs/spec/context_map.md create mode 100644 docs/spec/domain_model.md create mode 100644 docs/spec/glossary.md create mode 100644 docs/spec/product_definition.md create mode 100644 docs/spec/system.md create mode 100644 docs/spec/technical_design.md create mode 100644 docs/spec/workflow-design.md create mode 100755 scripts/flowr-utils.sh create mode 100755 scripts/generate-flowviz-data.py create mode 100755 scripts/generate-svg.sh create mode 100755 scripts/update-bundle.sh create mode 100644 smith/application/__init__.py create mode 100644 smith/application/connect.py create mode 100644 smith/application/disconnect.py create mode 100644 smith/application/status.py create mode 100644 smith/application/update.py create mode 100644 smith/data/.flowr/.gitignore create mode 100644 smith/data/.flowr/flows/architecture-flow.yaml create mode 100644 smith/data/.flowr/flows/branding-flow.yaml create mode 100644 smith/data/.flowr/flows/delivery-flow.yaml create mode 100644 smith/data/.flowr/flows/development-flow.yaml create mode 100644 smith/data/.flowr/flows/discovery-flow.yaml create mode 100644 smith/data/.flowr/flows/feature-development-flow.yaml create mode 100644 smith/data/.flowr/flows/main-flow.yaml create mode 100644 smith/data/.flowr/flows/planning-flow.yaml create mode 100644 smith/data/.flowr/flows/post-mortem-flow.yaml create mode 100644 smith/data/.flowr/flows/review-gate-flow.yaml create mode 100644 smith/data/.flowr/flows/setup-project-flow.yaml create mode 100644 smith/data/.flowr/flows/tdd-cycle-flow.yaml create mode 100644 smith/data/.flowr/sessions/current.yaml create mode 100644 smith/data/.opencode/agents/design-agent.md create mode 100644 smith/data/.opencode/agents/domain-expert.md create mode 100644 smith/data/.opencode/agents/product-owner.md create mode 100644 smith/data/.opencode/agents/reviewer.md create mode 100644 smith/data/.opencode/agents/setup-agent.md create mode 100644 smith/data/.opencode/agents/software-engineer.md create mode 100644 smith/data/.opencode/agents/system-architect.md create mode 100644 smith/data/.opencode/knowledge/agent-design/principles.md create mode 100644 smith/data/.opencode/knowledge/architecture/adr.md create mode 100644 smith/data/.opencode/knowledge/architecture/assessment.md create mode 100644 smith/data/.opencode/knowledge/architecture/contract-design.md create mode 100644 smith/data/.opencode/knowledge/architecture/quality-attributes.md create mode 100644 smith/data/.opencode/knowledge/architecture/reconciliation.md create mode 100644 smith/data/.opencode/knowledge/architecture/technical-design.md create mode 100644 smith/data/.opencode/knowledge/design/color-systems.md create mode 100644 smith/data/.opencode/knowledge/design/identity-design.md create mode 100644 smith/data/.opencode/knowledge/design/project-assets.md create mode 100644 smith/data/.opencode/knowledge/design/visual-harmony.md create mode 100644 smith/data/.opencode/knowledge/domain-modeling/context-mapping.md create mode 100644 smith/data/.opencode/knowledge/domain-modeling/event-storming.md create mode 100644 smith/data/.opencode/knowledge/knowledge-design/principles.md create mode 100644 smith/data/.opencode/knowledge/requirements/decomposition.md create mode 100644 smith/data/.opencode/knowledge/requirements/gherkin.md create mode 100644 smith/data/.opencode/knowledge/requirements/interview-techniques.md create mode 100644 smith/data/.opencode/knowledge/requirements/invest.md create mode 100644 smith/data/.opencode/knowledge/requirements/moscow.md create mode 100644 smith/data/.opencode/knowledge/requirements/post-mortem.md create mode 100644 smith/data/.opencode/knowledge/requirements/pre-mortem.md create mode 100644 smith/data/.opencode/knowledge/requirements/ubiquitous-language.md create mode 100644 smith/data/.opencode/knowledge/requirements/wsjf.md create mode 100644 smith/data/.opencode/knowledge/skill-design/principles.md create mode 100644 smith/data/.opencode/knowledge/software-craft/code-review.md create mode 100644 smith/data/.opencode/knowledge/software-craft/design-patterns.md create mode 100644 smith/data/.opencode/knowledge/software-craft/git-conventions.md create mode 100644 smith/data/.opencode/knowledge/software-craft/object-calisthenics.md create mode 100644 smith/data/.opencode/knowledge/software-craft/refactoring-techniques.md create mode 100644 smith/data/.opencode/knowledge/software-craft/refactoring.md create mode 100644 smith/data/.opencode/knowledge/software-craft/smell-catalogue.md create mode 100644 smith/data/.opencode/knowledge/software-craft/solid.md create mode 100644 smith/data/.opencode/knowledge/software-craft/stub-design.md create mode 100644 smith/data/.opencode/knowledge/software-craft/tdd.md create mode 100644 smith/data/.opencode/knowledge/software-craft/test-design.md create mode 100644 smith/data/.opencode/knowledge/software-craft/versioning.md create mode 100644 smith/data/.opencode/knowledge/workflow/flowr-spec.md create mode 100644 smith/data/.opencode/skills/accept-feature/SKILL.md create mode 100644 smith/data/.opencode/skills/analyze-root-cause/SKILL.md create mode 100644 smith/data/.opencode/skills/assess-architecture/SKILL.md create mode 100644 smith/data/.opencode/skills/break-down-feature/SKILL.md create mode 100644 smith/data/.opencode/skills/commit-implementation/SKILL.md create mode 100644 smith/data/.opencode/skills/conduct-interview/SKILL.md create mode 100644 smith/data/.opencode/skills/confirm-baseline/SKILL.md create mode 100644 smith/data/.opencode/skills/create-pr/SKILL.md create mode 100644 smith/data/.opencode/skills/create-py-stubs/SKILL.md create mode 100644 smith/data/.opencode/skills/decide-batch-action/SKILL.md create mode 100644 smith/data/.opencode/skills/define-done/SKILL.md create mode 100644 smith/data/.opencode/skills/define-product-scope/SKILL.md create mode 100644 smith/data/.opencode/skills/define-ubiquitous-language/SKILL.md create mode 100644 smith/data/.opencode/skills/design-assets/SKILL.md create mode 100644 smith/data/.opencode/skills/design-colors/SKILL.md create mode 100644 smith/data/.opencode/skills/design-technical-solution/SKILL.md create mode 100644 smith/data/.opencode/skills/determine-action-items/SKILL.md create mode 100644 smith/data/.opencode/skills/document-post-mortem/SKILL.md create mode 100644 smith/data/.opencode/skills/draft-adr/SKILL.md create mode 100644 smith/data/.opencode/skills/extract-lessons/SKILL.md create mode 100644 smith/data/.opencode/skills/facilitate-event-storming/SKILL.md create mode 100644 smith/data/.opencode/skills/implement-minimum/SKILL.md create mode 100644 smith/data/.opencode/skills/map-contexts/SKILL.md create mode 100644 smith/data/.opencode/skills/merge-local/SKILL.md create mode 100644 smith/data/.opencode/skills/model-domain/SKILL.md create mode 100644 smith/data/.opencode/skills/refactor/SKILL.md create mode 100644 smith/data/.opencode/skills/review-architecture/SKILL.md create mode 100644 smith/data/.opencode/skills/review-conventions/SKILL.md create mode 100644 smith/data/.opencode/skills/review-design/SKILL.md create mode 100644 smith/data/.opencode/skills/review-structure/SKILL.md create mode 100644 smith/data/.opencode/skills/select-feature/SKILL.md create mode 100644 smith/data/.opencode/skills/setup-apply/SKILL.md create mode 100644 smith/data/.opencode/skills/setup-assess/SKILL.md create mode 100644 smith/data/.opencode/skills/setup-branding/SKILL.md create mode 100644 smith/data/.opencode/skills/setup-configure/SKILL.md create mode 100644 smith/data/.opencode/skills/setup-verify/SKILL.md create mode 100644 smith/data/.opencode/skills/specify-feature/SKILL.md create mode 100644 smith/data/.opencode/skills/structure-project/SKILL.md create mode 100644 smith/data/.opencode/skills/write-bdd-features/SKILL.md create mode 100644 smith/data/.opencode/skills/write-test/SKILL.md create mode 100644 smith/data/.templates/CHANGELOG.md.template create mode 100644 smith/data/.templates/docs/adr/ADR_YYYYMMDD_.md.template create mode 100644 smith/data/.templates/docs/assets/banner.svg.template create mode 100644 smith/data/.templates/docs/assets/logo.svg.template create mode 100644 smith/data/.templates/docs/branding/branding.md.template create mode 100644 smith/data/.templates/docs/context_map.md.template create mode 100644 smith/data/.templates/docs/domain_model.md.template create mode 100644 smith/data/.templates/docs/features/feature.feature.template create mode 100644 smith/data/.templates/docs/glossary.md.template create mode 100644 smith/data/.templates/docs/interview-notes/IN_YYYYMMDD_.md.template create mode 100644 smith/data/.templates/docs/post-mortem/PM_YYYYMMDD_.md.template create mode 100644 smith/data/.templates/docs/product_definition.md.template create mode 100644 smith/data/.templates/docs/research/TEMPLATE.md.template create mode 100644 smith/data/.templates/docs/system.md.template create mode 100644 smith/data/.templates/docs/technical_design.md.template create mode 100644 smith/data/.templates/tests/features/_test.py.template create mode 100644 smith/data/AGENTS.md create mode 100644 smith/data/__init__.py create mode 100644 smith/delivery/__init__.py create mode 100644 smith/delivery/cli.py create mode 100644 smith/domain/__init__.py create mode 100644 smith/domain/connection.py create mode 100644 smith/domain/ports.py create mode 100644 smith/domain/value_objects.py create mode 100644 smith/infrastructure/__init__.py create mode 100644 smith/infrastructure/filesystem.py create mode 100644 smith/infrastructure/gitignore.py create mode 100644 smith/infrastructure/metadata.py create mode 100644 smith/infrastructure/template_source.py create mode 100644 template-config.yaml create mode 100644 tests/features/cli_entrypoint/help_output_test.py create mode 100644 tests/features/cli_entrypoint/unrecognised_arguments_test.py create mode 100644 tests/features/cli_entrypoint/version_output_test.py create mode 100644 tests/features/smith_commands/__init__.py create mode 100644 tests/features/smith_commands/conftest.py create mode 100644 tests/features/smith_commands/connect_fresh_project_test.py create mode 100644 tests/features/smith_commands/disconnect_test.py create mode 100644 tests/features/smith_commands/skip_user_tracked_test.py create mode 100644 tests/features/smith_commands/status_test.py create mode 100644 tests/features/smith_commands/update_test.py delete mode 100644 tests/unit/app_test.py create mode 100644 tests/unit/application/__init__.py create mode 100644 tests/unit/delivery/__init__.py create mode 100644 tests/unit/domain/__init__.py create mode 100644 tests/unit/domain/test_connection.py create mode 100644 tests/unit/infrastructure/__init__.py create mode 100644 tests/unit/infrastructure/filesystem_test.py create mode 100644 tests/unit/infrastructure/gitignore_test.py create mode 100644 tests/unit/infrastructure/metadata_test.py create mode 100644 tests/unit/infrastructure/template_source_test.py create mode 100644 tests/unit/main_test.py diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 268dbdf..0000000 --- a/.dockerignore +++ /dev/null @@ -1,212 +0,0 @@ -# Docker ignore file for pairai -# Optimized for minimal context and security - -# Version control -.git/ -.gitignore -.gitattributes - -# Development files -.vscode/ -.idea/ -*.swp -*.swo -*~ - -# OS generated files -.DS_Store -.DS_Store? -._* -.Spotlight-V100 -.Trashes -ehthumbs.db -Thumbs.db - -# Python -__pycache__/ -*.py[cod] -*$py.class -*.so -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ -docs/tests/ -docs/coverage/ -docs/mutation/ - -# Translations -*.mo -*.pot - -# Django stuff -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff -instance/ -.webassets-cache - -# Scrapy stuff -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -Pipfile.lock - -# poetry -poetry.lock - -# pdm -.pdm.toml -.pdm-python -.pdm-build/ - -# PEP 582 -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -.idea/ - -# Project specific -docs/api/ -docs/tests/ -docs/coverage/ -docs/mutation/ -.mutmut-cache/ -mutants/ -*.db -*.sqlite -*.sqlite3 - -# Docker -.dockerignore -Dockerfile* -docker-compose*.yml - -# CI/CD -.github/ -.gitlab-ci.yml -.travis.yml -.circleci/ - -# Package managers -node_modules/ -package-lock.json -yarn.lock - -# Logs -*.log -logs/ - -# Temporary files -tmp/ -temp/ -.tmp/ - -# Security -.secrets -credentials.json -*.pem -*.key -*.crt - -# Backup files -*.bak -*.backup -*.old \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ba51540..6045076 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,10 +33,10 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1 - name: Install uv - uses: astral-sh/setup-uv@cdfb2ee6dde255817c739680168ad81e184c4bfb # v4.0.0 + uses: astral-sh/setup-uv@c0c76fcf76c37099e6a452584d04b015240faefc # v4.0.0 with: enable-cache: true cache-dependency-glob: "uv.lock" @@ -67,21 +67,23 @@ jobs: fail-fast: false matrix: python-version: ["3.13"] + env: + UV_SYSTEM_PYTHON: "false" steps: - name: Checkout code - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1 - name: Install uv - uses: astral-sh/setup-uv@cdfb2ee6dde255817c739680168ad81e184c4bfb # v4.0.0 + uses: astral-sh/setup-uv@c0c76fcf76c37099e6a452584d04b015240faefc # v4.0.0 with: enable-cache: true cache-dependency-glob: "uv.lock" python-version: ${{ matrix.python-version }} - name: Install dependencies - run: uv sync --locked --all-extras --dev - + run: uv sync --locked --all-extras --dev && uv pip install -e . + - name: Run fast tests run: uv run task test-fast @@ -115,13 +117,15 @@ jobs: permissions: contents: read actions: read - + env: + UV_SYSTEM_PYTHON: "false" + steps: - name: Checkout code - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1 - name: Install uv - uses: astral-sh/setup-uv@cdfb2ee6dde255817c739680168ad81e184c4bfb # v4.0.0 + uses: astral-sh/setup-uv@c0c76fcf76c37099e6a452584d04b015240faefc # v4.0.0 with: enable-cache: true cache-dependency-glob: "uv.lock" @@ -130,7 +134,7 @@ jobs: run: uv python install 3.13 - name: Install dependencies - run: uv sync --locked --all-extras --dev + run: uv sync --locked --all-extras --dev && uv pip install -e . - name: Build documentation run: uv run task doc-build @@ -168,13 +172,15 @@ jobs: if: github.event_name == 'push' && github.ref == 'refs/heads/main' permissions: contents: write + env: + UV_SYSTEM_PYTHON: "false" steps: - name: Checkout code - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1 - name: Install uv - uses: astral-sh/setup-uv@cdfb2ee6dde255817c739680168ad81e184c4bfb # v4.0.0 + uses: astral-sh/setup-uv@c0c76fcf76c37099e6a452584d04b015240faefc # v4.0.0 with: enable-cache: true cache-dependency-glob: "uv.lock" @@ -183,7 +189,7 @@ jobs: run: uv python install 3.13 - name: Install dependencies - run: uv sync --locked --all-extras --dev + run: uv sync --locked --all-extras --dev && uv pip install -e . - name: Build and publish documentation run: uv run task doc-publish diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index e4712b6..185d0ca 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -17,7 +17,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1 - name: Dependency Review uses: actions/dependency-review-action@2031cfc080254a8a887f58cffee85186f0e49e48 # v4.9.0 @@ -36,10 +36,10 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1 - name: Install uv - uses: astral-sh/setup-uv@cdfb2ee6dde255817c739680168ad81e184c4bfb # v4.0.0 + uses: astral-sh/setup-uv@c0c76fcf76c37099e6a452584d04b015240faefc # v4.0.0 - name: Set up Python run: uv python install 3.13 diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 26d06f3..08011c1 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -4,6 +4,12 @@ on: push: tags: - "v*" + workflow_dispatch: + inputs: + tag: + description: "Tag to publish (e.g. v0.1.0+20260501)" + required: true + type: string permissions: contents: read @@ -17,10 +23,12 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1 + with: + ref: ${{ github.event_name == 'workflow_dispatch' && inputs.tag || github.ref }} - name: Install uv - uses: astral-sh/setup-uv@cdfb2ee6dde255817c739680168ad81e184c4bfb # v4.0.0 + uses: astral-sh/setup-uv@c0c76fcf76c37099e6a452584d04b015240faefc # v4.0.0 with: enable-cache: true cache-dependency-glob: "uv.lock" @@ -28,14 +36,34 @@ jobs: - name: Set up Python 3.13 run: uv python install 3.13 + - name: Install dependencies + run: uv sync --locked --all-extras --dev + + - name: Install package in editable mode + run: uv pip install -e . + + - name: Run quality gate + run: | + uv run task lint + uv run task static-check + uv run task test + - name: Clean dist run: rm -rf dist/ - name: Build wheel and sdist run: uv build + - name: Verify package installation (wheel) + run: | + uv run --isolated --no-project --with dist/*.whl python -c "import smith; print('Wheel install successful')" + + - name: Verify package installation (sdist) + run: | + uv run --isolated --no-project --with dist/*.tar.gz python -c "import smith; print('Source dist install successful')" + - name: Upload dist artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v4.6.2 with: name: dist path: dist/ @@ -50,7 +78,7 @@ jobs: steps: - name: Download dist artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: name: dist path: dist/ @@ -59,27 +87,34 @@ jobs: uses: pypa/gh-action-pypi-publish@release/v1 release: - name: Create GitHub Release + name: Update GitHub Release runs-on: ubuntu-latest needs: publish permissions: - contents: write # required to create a release + contents: write # required to create/edit a release steps: - name: Checkout code - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1 + with: + ref: ${{ github.event_name == 'workflow_dispatch' && inputs.tag || github.ref }} - name: Download dist artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: name: dist path: dist/ - - name: Create GitHub Release + - name: Create or update GitHub Release env: GH_TOKEN: ${{ github.token }} + TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.tag || github.ref_name }} run: | - gh release create "${{ github.ref_name }}" \ - --title "${{ github.ref_name }}" \ - --generate-notes \ - dist/* + if gh release view "$TAG" > /dev/null 2>&1; then + gh release upload "$TAG" dist/* --clobber + else + gh release create "$TAG" \ + --title "$TAG" \ + --generate-notes \ + dist/* + fi diff --git a/.github/workflows/tag-release.yml b/.github/workflows/tag-release.yml index 4fbc251..b639b09 100644 --- a/.github/workflows/tag-release.yml +++ b/.github/workflows/tag-release.yml @@ -14,16 +14,20 @@ jobs: runs-on: ubuntu-latest permissions: contents: write + env: + UV_SYSTEM_PYTHON: "false" steps: - name: Checkout code - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4.1.1 - name: Extract version from pyproject.toml id: version run: | + # Extract semver core (major.minor.patch) from pyproject.toml VERSION=$(grep '^version' pyproject.toml | head -1 | sed 's/version = "\(.*\)"/\1/') echo "version=${VERSION}" >> "$GITHUB_OUTPUT" + # Build metadata (+YYYYMMDD) is appended at tag time, not stored in pyproject.toml echo "tag=v${VERSION}" >> "$GITHUB_OUTPUT" - name: Check if tag already exists @@ -35,6 +39,25 @@ jobs: echo "exists=false" >> "$GITHUB_OUTPUT" fi + - name: Install uv + if: steps.check.outputs.exists == 'false' + uses: astral-sh/setup-uv@c0c76fcf76c37099e6a452584d04b015240faefc # v4.0.0 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - name: Set up Python 3.13 + if: steps.check.outputs.exists == 'false' + run: uv python install 3.13 + + - name: Install dependencies + if: steps.check.outputs.exists == 'false' + run: uv sync --locked --all-extras --dev && uv pip install -e . + + - name: Run release-check + if: steps.check.outputs.exists == 'false' + run: uv run task release-check + - name: Create and push tag if: steps.check.outputs.exists == 'false' env: @@ -43,9 +66,11 @@ jobs: GIT_COMMITTER_NAME: github-actions[bot] GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com run: | - git tag "${{ steps.version.outputs.tag }}" - git push origin "${{ steps.version.outputs.tag }}" - echo "Created tag ${{ steps.version.outputs.tag }} at $(git rev-parse HEAD)" + DATE=$(date +%Y%m%d) + TAG="v${{ steps.version.outputs.version }}+${DATE}" + git tag "$TAG" + git push origin "$TAG" + echo "Created tag $TAG at $(git rev-parse HEAD)" - name: Skip (tag already exists) if: steps.check.outputs.exists == 'true' diff --git a/.gitignore b/.gitignore index 8131edd..33cbce9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,8 @@ .DS_Store -.opencode/ +*.swp .coverage -.vscode/\n.idea/ +.vscode/ +.idea/ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -169,4 +170,12 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ .mutmut-cache -# Trigger CI run to verify linting fixes + +# Agentic files (managed by smith, not versioned) +.opencode/ +AGENTS.md +.templates/ +.flowr/ + +# Generated flow visualization data (regenerate with: task regenerate-flowviz) +flowviz/ diff --git a/AGENTS.md b/AGENTS.md deleted file mode 100644 index e62ef6f..0000000 --- a/AGENTS.md +++ /dev/null @@ -1,256 +0,0 @@ -# Python Project Template - -A Python template to quickstart any project with a production-ready workflow, quality tooling, and AI-assisted development. - -## Workflow Overview - -Features flow through 5 steps with a WIP limit of 1 feature at a time. The filesystem enforces WIP: -- `docs/features/backlog/.feature` — features waiting to be worked on -- `docs/features/in-progress/.feature` — exactly one feature being built right now -- `docs/features/completed/.feature` — accepted and shipped features - -``` -STEP 1: SCOPE (product-owner) → discovery + Gherkin stories + criteria -STEP 2: ARCH (software-engineer) → read all features + existing package files, write domain stubs (signatures only, no bodies); decisions appended to docs/architecture.md -STEP 3: TDD LOOP (software-engineer) → RED → GREEN → REFACTOR, one @id at a time -STEP 4: VERIFY (reviewer) → run all commands, review code -STEP 5: ACCEPT (product-owner) → demo, validate, move .feature to completed/ (PO only) -``` - -**PO picks the next feature from backlog. Software-engineer never self-selects.** - -**Verification is adversarial.** The reviewer's job is to try to break the feature, not to confirm it works. The default hypothesis is "it might be broken despite green checks; prove otherwise." - -## Roles - -- **Product Owner (PO)** — AI agent. Interviews the stakeholder, writes discovery docs, Gherkin features, and acceptance criteria. Accepts or rejects deliveries. **Sole owner of all `.feature` file moves** (backlog → in-progress before Step 2; in-progress → completed after Step 5 acceptance). -- **Stakeholder** — Human. Answers PO's questions, provides domain knowledge, approves PO syntheses to confirm discovery is complete. -- **Software Engineer** — AI agent. Architecture, test bodies, implementation, git. Never creates, edits, or moves `.feature` files. Escalates spec gaps to PO. If no `.feature` file is in `in-progress/`, stops and escalates to PO. -- **Reviewer** — AI agent. Adversarial verification. Reports spec gaps to PO. Never creates, edits, or moves `.feature` files. After APPROVED report, stops and escalates to PO for Step 5. - -## Feature File Chain of Responsibility - -`.feature` files are owned exclusively by the PO. **No other agent ever creates, edits, or moves them.** - -| Transition | Who | When | -|---|---|---| -| `backlog/` → `in-progress/` | PO only | Before Step 2 begins; only if `Status: BASELINED` | -| `in-progress/` → `completed/` | PO only | After Step 5 acceptance | - -**If an agent (SE or reviewer) finds no `.feature` in `in-progress/`**: update TODO.md with the correct `Next:` escalation line and stop. Never self-select a backlog feature. - -## Agents - -- **product-owner** — defines scope (Stage 1 Discovery + Stage 2 Specification), picks features, accepts deliveries -- **software-engineer** — architecture, tests, code, git, releases (Steps 2-3 + release) -- **reviewer** — runs commands and reviews code at Step 4, produces APPROVED/REJECTED report -- **designer** — creates and updates visual assets (SVG banners, logos) and maintains `docs/branding.md` -- **setup-project** — one-time setup to initialize a new project from this template - -## Skills - -| Skill | Used By | Step | -|---|---|---| -| `run-session` | all agents | every session | -| `select-feature` | product-owner | between features (idle state) | -| `define-scope` | product-owner | 1 | -| `implement` | software-engineer | 2, 3 | -| `apply-patterns` | software-engineer | 2, 3 (on-demand, when GoF pattern needed) | -| `refactor` | software-engineer | 3 (REFACTOR phase + preparatory refactoring) | -| `verify` | reviewer | 4 | -| `check-quality` | software-engineer | pre-handoff (redirects to `verify`) | -| `create-pr` | software-engineer | 5 | -| `git-release` | software-engineer | 5 (after acceptance) | -| `update-docs` | product-owner | 5 (after acceptance) + on stakeholder demand | -| `design-colors` | designer | branding, color, WCAG compliance | -| `design-assets` | designer | SVG asset creation and updates | -| `create-skill` | software-engineer | meta | -| `create-agent` | human-user | meta | - -**Branding**: Agents that generate docs, diagrams, release names, or visual assets read `docs/branding.md` if present. Absent or blank fields fall back to defaults (adjective-animal release names, Mermaid default colors, no wording constraints). `docs/branding.md` and `docs/assets/` are owned by the designer agent. - -**Session protocol**: Every agent loads `skill run-session` at session start. Load additional skills as needed for the current step. - -## Step 1 — SCOPE - -Step 1 has two stages: - -### Stage 1 — Discovery (PO + stakeholder, iterative) - -Discovery is a continuous process. Sessions happen whenever scope needs to be established or refined — for a new project, new features, or new information. Every session follows the same structure: - -**Session question order:** -1. **General** (5Ws + Success + Failure + Out-of-scope) — first session only, if the journal doesn't exist yet -2. **Cross-cutting** — behavior groups, bounded contexts, integration points, lifecycle events -3. **Per-feature** — one feature at a time; extract entities from `docs/discovery.md` Domain Model; gap-finding with CIT, Laddering, CI Perspective Change - -**Real-time split rule**: if the PO detects >2 concerns or >8 candidate Examples for a feature during per-feature questions, split immediately — record the split in the journal, create stub `.feature` files, continue questions for both in the same session. - -**After questions (PO alone, in order):** -1. Append answered Q&A (in groups) to `docs/discovery_journal.md` — only answered questions -2. Rewrite `.feature` description for each feature touched — others stay unchanged -3. Append session synthesis block to `docs/discovery.md` — LAST, after all `.feature` updates - -**Session status**: the journal session header begins with `Status: IN-PROGRESS` (written before questions). Updated to `Status: COMPLETE` after all writes. If a session is interrupted, the next agent detects `IN-PROGRESS` and resumes the pending writes before starting a new session. - -**Baselining**: PO writes `Status: BASELINED (YYYY-MM-DD)` in the `.feature` file when the stakeholder approves that feature's discovery and the decomposition check passes. - -Commit per session: `feat(discovery): ` - -### Stage 2 — Specification (PO alone, per feature) - -Only runs on features with `Status: BASELINED`. No stakeholder involvement. If a gap requires stakeholder input, open a new Stage 1 session first. - -**Step A — Stories**: derive one `Rule:` block per user story from the baselined feature description. INVEST gate: all 6 letters must pass. -Commit: `feat(stories): write user stories for ` - -**Step B — Criteria**: PO writes `Example:` blocks with `@id` tags under each `Rule:`. Pre-mortem per Rule before writing any Examples. MoSCoW triage per Example. Examples are frozen after commit. -Commit: `feat(criteria): write acceptance criteria for ` - -**Criteria are frozen**: no `Example:` changes after commit. Adding a new Example with a new `@id` replaces old. - -### Bug Handling - -When a defect is reported: -1. **PO** adds a `@bug` Example to the relevant `Rule:` in the `.feature` file and moves (or keeps) the feature in `backlog/` for normal scheduling. -2. **SE** handles the bug when the feature is selected for development (standard Step 2–3 flow): implements the specific `@bug`-tagged test in `tests/features//` and also writes a `@given` Hypothesis property test in `tests/unit/` covering the whole class of inputs. -3. Both tests are required. SE follows the normal TDD loop (Step 3). - -## Filesystem Structure - -``` -docs/ - discovery_journal.md ← raw Q&A, PO appends after every session - discovery.md ← synthesis changelog, PO appends after every session - architecture.md ← all architectural decisions, SE appends after Step 2 - glossary.md ← living glossary, PO updates via update-docs skill - branding.md ← project identity, colors, release naming, wording (designer owns) - assets/ ← logo.svg, banner.svg, and other visual assets (designer owns) - c4/ - context.md ← C4 Level 1 diagram, PO updates via update-docs skill - container.md ← C4 Level 2 diagram, PO updates via update-docs skill - features/ - backlog/.feature ← narrative + Rules + Examples - in-progress/.feature - completed/.feature - -tests/ - features// - _test.py ← one per Rule: block, software-engineer-written - unit/ - _test.py ← software-engineer-authored extras (no @id traceability) -``` - -Tests in `tests/unit/` are software-engineer-authored extras not covered by any `@id` criterion. Any test style is valid — plain `assert` or Hypothesis `@given`. Use Hypothesis when the test covers a **property** that holds across many inputs (mathematical invariants, parsing contracts, value object constraints). Use plain pytest for specific behaviors or single edge cases discovered during refactoring. - -- `@pytest.mark.slow` is mandatory on every `@given`-decorated test (Hypothesis is genuinely slow) -- `@example(...)` is optional but encouraged when using `@given` to document known corner cases -- No `@id` tags — tests with `@id` belong in `tests/features/`, written by software-engineer - -## Test File Layout - -``` -tests/features//_test.py -``` - -### Stub Format - -Stubs are auto-generated by pytest-beehave. The SE triggers generation at Step 2 end by running `uv run task test-fast`. pytest-beehave reads the in-progress `.feature` file and creates one skipped function per `@id`: - -```python -@pytest.mark.skip(reason="not yet implemented") -def test__<@id>() -> None: - """ - <@id steps raw text including new lines> - """ -``` - -### Markers -- `@pytest.mark.slow` — takes > 50ms; applied to Hypothesis tests and any test with I/O, network, or DB -- `@pytest.mark.deprecated` — auto-skipped by pytest-beehave; used for superseded Examples - -## Development Commands - -```bash -# Install dependencies -uv sync --all-extras - -# Run the application (for humans) -uv run task run - -# Run the application with timeout (for agents — prevents hanging) -timeout 10s uv run task run - -# Run tests (fast, no coverage) -uv run task test-fast - -# Run full test suite with coverage -uv run task test - -# Run tests with coverage report generation -uv run task test-build - -# Lint and format -uv run task lint - -# Type checking -uv run task static-check - -# Build documentation -uv run task doc-build -``` - -## Code Quality Standards - -- **Principles (in priority order)**: YAGNI > KISS > DRY > SOLID > Object Calisthenics > appropriate design patterns > complex code > complicate code > failing code > no code -- **Linting**: ruff format, ruff check, Google docstring convention, `noqa` forbidden -- **Type checking**: pyright, 0 errors required -- **Coverage**: 100% (measured against your actual package) -- **Function length**: ≤ 20 lines (code lines only, excluding docstrings) -- **Class length**: ≤ 50 lines (code lines only, excluding docstrings) -- **Max nesting**: 2 levels -- **Instance variables**: ≤ 2 per class *(exception: dataclasses, Pydantic models, value objects, and TypedDicts are exempt — they may carry as many fields as the domain requires)* -- **Semantic alignment**: tests must operate at the same abstraction level as the acceptance criteria they cover - -### Software-Engineer Quality Gate Priority Order - -During Step 3 (TDD Loop), correctness priorities are: - -1. **Design correctness** — YAGNI > KISS > DRY > SOLID > Object Calisthenics > appropriated design patterns > complex code > complicated code > failing code > no code -2. **One test green** — the specific test under work passes, plus `test-fast` still passes -3. **Reviewer code-design check** — reviewer verifies design + semantic alignment (no lint/pyright/coverage yet) -5. **Quality tooling** — `lint`, `static-check`, full `test` with coverage run only at software-engineer handoff (before Step 4) - -Design correctness is far more important than lint/pyright/coverage compliance. A well-designed codebase with minor lint issues is better than a lint-clean codebase with poor design. - -## Verification Philosophy - -- **Automated checks** (lint, typecheck, coverage) verify **syntax-level** correctness — the code is well-formed. -- **Human review** (semantic alignment, code review, manual testing) verifies **semantic-level** correctness — the code does what the user needs. -- Both are required. All-green automated checks are necessary but not sufficient for APPROVED. -- Reviewer defaults to REJECTED unless correctness is proven. - -## Release Management - -Version format: `v{major}.{minor}.{YYYYMMDD}` - -- Minor bump for new features; major bump for breaking changes -- Same-day second release: increment minor, keep same date -- Release name: defined by `docs/branding.md > Release Naming > Convention`; absent or blank defaults to version string only (no name) - -Use `@software-engineer /skill git-release` for the full release process. When requested by the stakeholder - -## Session Management - -Every session: load `skill run-session`. Read `TODO.md` first, update it at the end. - -`TODO.md` is a session bookmark — not a project journal. See `.opencode/skills/run-session/SKILL.md` for the full structure including the Cycle State block used during Step 3. - -## Setup - -To initialize a new project from this template: -```bash -@setup-project -``` - -The setup agent will ask for your project name, GitHub username, author info, and configure all template placeholders. diff --git a/CHANGELOG.md b/CHANGELOG.md index e81b0bd..bdc3a85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,370 +1,9 @@ # Changelog -All notable changes to this template will be documented in this file. +All notable changes to agents-smith will be documented in this file. -## [v6.4.20260420] - Minimal Prometheus - 2026-04-20 +## [0.1.0] - 20260501 ### Added -- **Branding system**: `docs/branding.md` — project identity, colour palette, release naming convention, and wording guidelines; agents read this file to personalise release names, C4 diagram colours, and docs without touching `.opencode/` (#89) -- **Designer agent** (`designer.md`): owns `docs/branding.md` and `docs/assets/`; uses `design-colors` and `design-assets` skills (#89) -- **`design-colors` skill**: step-by-step colour palette selection with WCAG 2.1 AA 4.5:1 contrast validation; Itten/Albers colour theory embedded inline (#89) -- **`design-assets` skill**: SVG banner and logo creation workflow; W3C SVG 2 spec and WCAG 1.1.1 `aria-label` requirements embedded (#89) -- **`setup-project` Step 6 Branding**: collects tagline, mission, vision, tone, theme, and colours; suggests WCAG-validated palettes when user provides a theme but no colours; writes `docs/branding.md` (#89) -- **Output Style + Rule #8** in `run-session` skill: minimalist output discipline — signal only, no tool narration, session ends with `Next:` line (#89) -### Changed -- **Skill renames to verb-noun convention**: `session-workflow → run-session`, `scope → define-scope`, `implementation → implement`, `feature-selection → select-feature`, `living-docs → update-docs`, `pr-management → create-pr`, `design-patterns → apply-patterns`, `code-quality → check-quality` — all references updated across agents, skills, and `AGENTS.md` (#89) -- **`docs/images/` → `docs/assets/`**: asset directory renamed; `README.md` path updated (#89) -- **`git-release` v1.1**: reads `docs/branding.md` for optional release naming and theme; release name omitted from commit/release title if convention is absent (#89) -- **`update-docs` skill**: reads `docs/branding.md` primary/accent colours to apply `%%{init:...}%%` theming to Mermaid C4 diagrams (#89) - -## [v6.2.20260419] - Autonomous Stenella - 2026-04-19 - -### Added -- **pytest-beehave integration**: `@id` tags now auto-assigned to untagged `Example:` blocks on every `pytest` run; test stubs auto-generated from `.feature` files at Step 2 end — no manual ID generation or stub writing required (#78) -- **Self-declaration defense in depth**: all 25 items numbered 1–25 in `implementation/SKILL.md`; `verify/SKILL.md` now hard-gates on completeness (count must equal 25, sequence must be gapless) before item audit begins (#78) - -### Changed -- **Naming convention**: `.feature` file paths now use `` (kebab); test directories use `` (underscore) — applied consistently across all skills, `AGENTS.md`, and docs (#78) -- **`conftest.py`**: removed manual `deprecated` marker skip hook — now owned entirely by pytest-beehave (#78) -- **`scope/SKILL.md`**: removed all manual `@id` generation instructions and `@id` uniqueness checklist items — assignment is automatic (#78) -- **`product-owner.md`**: removed `@id` from bug handling and gap-resolution table — PO writes `Example:` blocks only (#78) -- **README**: added "Why this template?" section; added `pytest-beehave` to tooling table; replaced static stub example with a two-part Gherkin-in → stub-out illustration (#78) -- **`verify/SKILL.md` report table**: expanded Self-Declaration Audit from 21 collapsed rows to 25 numbered rows matching the implementation template exactly (#78) - -## [v6.1.20260419] - Contextual Ambystoma - 2026-04-19 (hotfix) - -### Added -- **living-docs skill**: new PO skill for generating C4 architecture diagrams (`docs/c4/context.md`, `docs/c4/container.md`) and maintaining the living glossary (`docs/glossary.md`) after each feature acceptance (Step 5) or on stakeholder demand -- **docs/c4/**: new directory for C4 Level 1 (Context) and Level 2 (Container) Mermaid diagrams; placeholder `.gitkeep` added -- **docs/glossary.md**: new living glossary file owned by `living-docs` skill (PO); terms sourced from completed feature files, `docs/discovery.md` Domain Model, and `docs/architecture.md` decisions -- **Scientific research — documentation.md**: new file with 4 entries (#59–62): Ko et al. 2007 (information needs), Winters et al. 2020 (docs-as-code), Procida 2021 (Diátaxis framework), Allspaw 2012 (blameless post-mortems) -- **Scientific research — domain-modeling.md**: 6 new DDD entries (#63–68): Evans DDD Reference CC-BY, Fowler UbiquitousLanguage bliki, Fowler BoundedContext bliki, Vernon IDDD, Verraes "UL is not a glossary", Evans Whirlpool process -- **Scientific research — architecture.md**: 4 new entries (#55–58): Nygard ADRs, Kruchten 4+1 View Model, Brown C4 Model, Parnas information hiding - -### Changed -- **discovery.md template**: `### Scope` section renamed to `### Context` — the section is a session-level general-context synthesis, not a complete project scope definition -- **scope/SKILL.md**: updated `### Scope` references to `### Context` in Step C instructions and template block -- **living-docs/SKILL.md**: glossary entry format updated — `**Context:**` renamed to `**Bounded context:**` (mandatory for multi-context projects); `Domain Event` added as a distinct Type value; secondary-artifact note added to preamble; source-traceability rule replaces "do not invent" rule; checklist updated accordingly -- **implementation/SKILL.md**: Step 2 Read Phase now includes `docs/glossary.md` as item 2 — SE reads existing domain terms before naming classes, methods, and modules to avoid inventing synonyms -- **create-skill/SKILL.md**: `living-docs` added to available skills table -- **AGENTS.md**: skills table updated with `living-docs`; filesystem structure section updated (`docs/c4/`, `docs/glossary.md` added; `docs/architecture/` subtree removed; TODO.md reference updated) - -### Removed -- **docs/architecture/**: folder deleted; the ADR log lives at `docs/architecture.md` (SE-owned); the old `adr-template.md` inside the folder was redundant -- **docs/workflow.md**: deleted; canonical workflow reference is `AGENTS.md` and the skills under `.opencode/skills/` -- **Dockerfile / docker-compose.yml**: removed as unused template artifacts - -## [v6.0.20260419] - Declarative Nautilus - 2026-04-19 - -### Added -- **PO Self-Declaration**: mandatory 11-claim checklist (INVEST I/V/S/T, observable Then, no impl details, entity coverage, distinct examples, unique IDs, pre-mortem, scope boundary) written into TODO.md at end of Stage 2 Step B before criteria commit; every DISAGREE is a hard blocker (#71) -- **Reviewer Stance Declaration**: 5-claim block (adversarial mindset, manual trace, boundary check, semantic read, independence) added to verify/SKILL.md report template before APPROVED/REJECTED verdict; DISAGREE allowed with explanation, unexplained DISAGREE = REJECTED (#71) -- **session-workflow**: Step 1 Stage 2 Criteria TODO format section with full Self-Declaration template and Rule 9 enforcing the declaration before criteria commit (#71) -- **Three append-only project docs**: `docs/discovery_journal.md` (raw Q&A), `docs/discovery.md` (synthesis changelog), `docs/architecture.md` (architectural decisions) replace the old flat `docs/features/discovery.md` (#70) - -### Changed -- **Discovery model** (breaking): Phase 1 / Phase 2 / Phase 3 / Phase 4 replaced by 2-stage model — Stage 1 Discovery (unified iterative sessions, PO + stakeholder) and Stage 2 Specification (PO alone, per BASELINED feature) (#70) -- **Feature file moves** (breaking): PO is now the sole owner of all `.feature` file moves (backlog → in-progress and in-progress → completed); SE and reviewer explicitly prohibited from moving files with clear escalation protocol (#70) -- **Session protocol**: discovery journal sessions use `Status: IN-PROGRESS` / `Status: COMPLETE` markers; real-time split rule (>2 concerns or >8 candidate Examples splits within the same session); journal writes only answered Q&A in groups (#70) -- **Bug handling**: explicit protocol — PO adds `@bug @id` Example, SE writes both the `@id` test in `tests/features/` and a `@given` Hypothesis property test in `tests/unit/`; both required (#70) -- **scope/SKILL.md**: full rewrite to 2-stage model with session start checklist, question order (general → cross-cutting → per-feature), after-questions steps, baselining section, and bug handling section (#70) -- **feature-selection/SKILL.md**: updated "Phase 4 (Criteria)" reference to "Stage 2 Step B (Criteria)" (#70) -- **All agent files and skills**: updated to reflect new document model, terminology, and chain of responsibility (#70, #71) - -## [v5.2.20260418] - Emergent Colugo - 2026-04-18 (hotfix) - -### Fixed -- **Role naming**: Replaced stale `developer` agent-role references with `software-engineer` in `implementation/SKILL.md`, `docs/scientific-research/ai-agents.md`, `docs/scientific-research/cognitive-science.md`, and `docs/features/completed/display-version.feature` -- **session-workflow**: Replaced hardcoded agent names in `## Next` line examples with `@` placeholders; added note pointing to `AGENTS.md` as source of truth; added missing Step 2 (Architecture) example - -## [v5.1.20260418] - Emergent Colugo - 2026-04-18 - -### Added -- **refactor skill**: Standalone skill with Fowler's full catalogue, green-bar rule, two-hats rule, SOLID/OC self-declaration table, and preparatory refactoring protocol — loaded on demand at REFACTOR phase -- **feature-selection skill**: WSJF-based backlog prioritisation (Reinertsen 2009) with Kano value scoring and dependency gate — PO loads this when `TODO.md` is idle -- **ADR template**: `docs/architecture/adr-template.md` for Step 2 architectural decisions -- **Logo and banner**: visual identity added to README (SVG assets in `docs/images/`) - -### Changed -- **Architecture stubs**: Step 2 now writes stubs directly into `/` instead of an Architecture section in the feature file; stubs have no docstrings (add after GREEN when lint enforces them); folder structure is suggested, not prescribed — `ports/` and `adapters/` only created when a concrete external dependency is confirmed -- **design-patterns skill**: Narrowed to pure GoF catalogue (23 patterns, smell-triggered before/after examples); SOLID, OC, LoD, CQS, Python Zen moved to refactor skill self-declaration checklist -- **session-workflow**: `Next` line in TODO.md now requires `Run @` prefix so the human always knows which agent to invoke; idle state loads `skill feature-selection` instead of a vague prompt -- **verify skill**: Added orphaned-stub check (skip-marked tests that were never implemented); report template now includes structured `Next Steps` block directing the human to the correct agent -- **Scientific research**: `docs/academic_research.md` split into 9 domain files under `docs/scientific-research/` (cognitive-science, testing, architecture, oop-design, refactoring-empirical, requirements-elicitation, domain-modeling, software-economics, ai-agents) - -### Fixed -- Stale `docs/architecture/STEP2-ARCH.md` reference removed from workflow diagram and skill -- Protocol smell-check gate now marked N/A when no external dependencies are identified in scope - -## [v5.0.20260418] - Structured Phascolarctos - 2026-04-18 - -### Added -- **design-patterns skill**: Full GoF pattern catalogue with smell-triggered patterns, SOLID, Object Calisthenics, Python Zen, Law of Demeter, CQS, Tell Don't Ask — loaded on demand at Steps 2-3 -- **create-agent skill**: Research-backed agent creation guide with OpenAI/Anthropic best practices, ownership boundaries, tool surface design, and escalation rules -- **software-engineer agent**: Dedicated agent file replacing `developer.md`; owns Steps 2-3 and release -- **3-session discovery structure**: Phase 1 and Phase 2 now each use a 3-session template with template gates (§1/§2/§3 must be confirmed before proceeding); active listening protocol (3 levels) codified in scope skill - -### Changed -- **5-step workflow** (breaking): Steps restructured — TDD loop merged into Step 3, Verify is Step 4, Accept is Step 5; all agents, skills, and docs updated to match -- **Behavior groups terminology**: "Cluster" renamed to "behavior group" throughout scope skill, AGENTS.md, workflow.md, and templates for clearer AI focus -- **Story candidates terminology**: Phase 3 now derives "story candidates" → `Rule:` blocks, removing ambiguity from the cluster-to-story mapping -- **Test stub format** (breaking): Stubs now use `@pytest.mark.skip(reason="not yet implemented")` instead of `raise NotImplementedError`; skip marker is removed when implementing in RED phase -- **Dropped `@pytest.mark.unit` and `@pytest.mark.integration`**: Only `@pytest.mark.slow` and `@pytest.mark.deprecated` remain; folder structure (`tests/features/` vs `tests/unit/`) encodes test type -- **BASELINED gate enforced**: PO may not move a feature to `in-progress/` unless its discovery section has `Status: BASELINED`; enforced in product-owner.md and session-workflow -- **tdd skill removed**: Replaced by implementation skill with inline TDD guidance -- **gen_test_stubs.py removed**: Script deleted along with tdd skill - -### Fixed -- **pyproject.toml**: Removed broken `gen-tests` task; removed `raise NotImplementedError` from coverage exclusions; removed `unit`/`integration` marker definitions -- **Role naming**: `developer` → `software-engineer` across all files -- **Step count**: All references to "6 steps" updated to "5 steps" - -## [v4.1.20260416] - Recursive Acinonyx - 2026-04-16 - -### Added -- **Single `.feature` file per feature**: Each feature is now one `.feature` file with `Rule:` blocks for user stories and `Example:` blocks for ACs — discovery content embedded in the feature description free text; replaces the folder-per-feature structure -- **Rule-scoped test files**: `gen_test_stubs.py` rewritten to parse `Rule:` blocks; each Rule maps to one test file (`_test.py`); function naming is now `test__()` -- **Hypothesis-only `tests/unit/`**: Every test in `tests/unit/` must use `@given`; `@pytest.mark.slow` is mandatory on all Hypothesis tests; plain `assert` tests without `@given` are forbidden -- **Mandatory `## Self-Declaration` in TODO.md**: Developer writes the 21-item checklist into a `## Self-Declaration (@id:)` block in `TODO.md` at `SELF-DECLARE` phase before requesting reviewer check (Rule 8 in session-workflow) - -### Changed -- **`gen_test_stubs.py`**: Scans `docs/features/{backlog,in-progress,completed}/*.feature` directly (not subfolders); generates one test file per `Rule:` block -- **`gen_todo.py`**: `find_in_progress_feature()` now finds `.feature` files directly in `in-progress/`; source path is `docs/features/in-progress/.feature` -- **`skills/tdd/SKILL.md`**: Test Tool Decision table updated to separate `tests/features/` (plain pytest, generated) from `tests/unit/` (Hypothesis only); `tests/unit/` rules section added -- **`skills/implementation/SKILL.md`**: Unit test rule tightened — `@given` required, `@pytest.mark.slow` mandatory, plain tests forbidden -- **`skills/verify/SKILL.md`**: Two new rows in section 4f: `@given` check and `@slow` check; two new rows in Standards Summary -- **`skills/scope/SKILL.md`**: All four phases rewritten for file-based workflow; `discovery-template.md` converted to `.feature` file template -- **`skills/session-workflow/SKILL.md`**: Step 4 TODO format updated with mandatory `## Self-Declaration` block template; Rule 8 added -- **Completed feature migrated**: `docs/features/completed/display-version/` (three files) merged into `docs/features/completed/display-version.feature` (single file with two `Rule:` blocks) - -### Fixed -- **OC-8 clarification**: The only valid fix for > 2 `self.x` is a new named class (Rule 3 or Rule 4); hardcoded constants, class-level variables, inlined literals, and parent-class moves are all invalid workarounds and remain FAIL - -## [v4.0.20260416] - Precise Tarsius - 2026-04-16 - -### Added -- **Per-test Design Self-Declaration**: After REFACTOR, developer fills a 20-item checklist (YAGNI → KISS → DRY → SOLID-S/O/L/I/D → OC rules 1–9) with `file:line` evidence before requesting reviewer check; reviewer independently audits claims using an 11-row comparison table (#58) -- **Package Verification step**: Mandatory before writing any code — read `pyproject.toml → [tool.setuptools] packages`, confirm directory exists on disk; hard stop if missing (#58) -- **SELF-DECLARE phase**: New phase added to the Red-Green-Refactor cycle between REFACTOR and REVIEWER; Cycle State now `RED | GREEN | REFACTOR | SELF-DECLARE | REVIEWER(code-design) | COMMITTED` (#58) -- **template-config.yaml**: Declarative single source of truth for all setup-project substitutions — `defaults:` block with 6 parameters, `substitutions:` map with literal `old:` strings, `{variable}` `new:` patterns, and expected `count:` per file (#58) -- **Post-mortem docs**: Two ping-pong-cli post-mortems documenting the systemic failures that drove this release (#58) - -### Changed -- **verify/SKILL.md Scope Guard**: Reviewer receives completed Design Self-Declaration and independently verifies each claim; responds using structured 11-row comparison table (#58) -- **verify/SKILL.md section 4g**: New row — `Imports use correct package name` (check imports match `[tool.setuptools] packages`); existing rows made more precise with `pyproject.toml` references (#58) -- **reviewer.md per-test Step 4 section**: Rewritten to reference `skill implementation` verification table; clarifies no commands run during Step 4 reviews (#58) -- **reviewer.md Zero-Tolerance Rule 1**: Scoped to `(Step 5 only — per-test Step 4 checks are code-design only, no commands)` (#58) -- **setup-project.md**: Reads `template-config.yaml`; each apply step delegates to the config map rather than carrying implicit pattern knowledge (#58) -- **Template app simplified**: `app/version.py` deleted; `app/__main__.py` reduced from 41 to 23 lines (#58) - -### Fixed -- **gen_todo.py path**: `parents[5]` → `parents[4]` — was resolving one directory above the project root (#58) -- **session-workflow Cycle State**: `SELF-DECLARE` phase added to documented phase list and Rule 6 (#58) -- **code-quality/SKILL.md**: Removed "has been absorbed" migration language (#58) -- **Dockerfile stale references**: `python_package_template.python_module_template` → `app` in HEALTHCHECK and CMD (#58) -- **docker-compose.yml stale references**: `python_package_template` → `app` in volume mounts and command (#58) - -### Breaking Changes -- `project_defaults.json` deleted — replaced by `template-config.yaml` (#58) -- `app/version.py` and `tests/version_test.py` deleted — template app simplified to minimal `__main__.py` + one Hypothesis unit test (#58) - -## [v3.2.20260415] - Vigilant Mantis - 2026-04-15 - -### Added -- **Adversarial verification mandate**: Reviewer's default hypothesis is now "the code is broken despite green checks" — job is to find the failure mode, not confirm it works (#54) -- **Production-grade gate**: New step 3 in verification — app must exit cleanly AND output must change when input changes; static output regardless of input = REJECTED (#54) -- **UUID Drift bash check**: One-liner detects duplicate UUIDs across test functions; any duplicate = REJECTED with fix instructions (#54) -- **docs/academic_research.md**: 15 cognitive and social science mechanisms with full citations grounding every workflow design decision (pre-mortem, implementation intentions, adversarial collaboration, elaborative encoding, and 11 more) (#54) -- **Design pattern decision table**: Added to `developer.md` and `implementation/SKILL.md`; any detected anti-pattern = REJECTED (#54) -- **Architecture contradiction check**: Developer must cross-check ADRs against ACs before writing production code (#54) -- **PO pre-mortem**: Added at scope step and acceptance step (#54) -- **Semantic alignment rule**: Tests must operate at same abstraction level as AC (#54) -- **Integration test requirement**: Multi-component features require at least one integration test through the public entry point (#54) -- **Verification Philosophy section**: Added to AGENTS.md — automated checks verify syntax-level correctness; human review verifies semantic-level correctness; both required (#54) - -### Changed -- **Verification order**: Code review before automated commands; run app first as production-grade gate (#54) -- **All review sections converted to tables**: Correctness, KISS, SOLID, ObjCal, Design Patterns, Tests, Versions/Build all have PASS/FAIL/Fix columns (#54) -- **UUID Uniqueness rule**: If only Given varies it is a property — use Hypothesis `@given` + `@example`, not multiple test functions; if When/Then differs use `extend-criteria` (#54) -- **Production-grade self-check in implementation**: Developer must verify output changes with input before handoff (#54) - -## [v3.1.20260414] - Tidal Capybara - 2026-04-14 - -### Added -- **extend-criteria skill**: New skill for any agent to add acceptance criteria discovered mid-flight or post-merge, with decision rule (gap within scope vs. new feature), per-role procedures, and commit protocol -- **Source: field on acceptance criteria**: Mandatory traceability field on every criterion (`stakeholder | po | developer | reviewer | bug`) — records who originated the requirement - -### Changed -- **Test function naming**: `test_` replaces `test__should_` -- **Test docstring first line**: UUID only (no trailing description) — `"""\n\nGiven: ...` -- **development commands**: All skill and agent files now use `uv run task` consistently (not bare `task`) -- **tests/ layout**: Documented as flat (no unit/ or integration/ subdirectories) -- **pytest.skip prohibition**: Aligned across files — allowed with written justification in the docstring -- **Marker decision table**: Moved to tdd/SKILL.md only (developer's decision, not PO's) -- **mv to in-progress**: Ownership reassigned to developer Step 2 (not PO scope step) -- **TODO.md status markers**: Added `[~]` (in progress) and `[-]` (cancelled) to documented legend -- **--doctest-modules**: Documented in implementation/SKILL.md (task test runs doctest modules) -- **verify/SKILL.md**: Report template uses flat `tests/:` path format -- **exit code wording**: `exit non-124` (was ambiguous `exit 0 or 124`) in developer.md -- **README.md**: `uv sync --all-extras` and `uv run task` commands throughout - -### Fixed -- Removed stale `docs/features/in-progress/auto-publish-docs.md` -- Split compound acceptance criterion (two outcomes in one Then) into two single-outcome criteria -- Added `@pytest.mark.slow` to Hypothesis tests in reference implementation -- Added `# Given / # When / # Then` body comments to all reference tests -- Removed duplicate assertion from `test_version_logs_correct_message` -- Moved `StringIO` import from test body to module-level imports - -## [v3.0.20260414] - Drifting Axolotl - 2026-04-14 - -### Breaking Changes -- **Workflow redesigned**: 8-phase/6-role system replaced with 6-step/3-role (Product Owner, Developer, Reviewer) -- **Roles removed**: architect, manager, repo-manager, requirements-gatherer, overseer agents deleted -- **Feature directories restructured**: `docs/features/{business,architecture}/` replaced with flat `docs/features/{backlog,in-progress,completed}/` - -### Added -- **product-owner agent**: Defines scope, acceptance criteria, picks features, accepts deliveries (Steps 1 + 6) -- **reviewer agent**: Read+bash only, runs all commands, produces APPROVED/REJECTED report (Step 5) -- **scope skill**: PO guide for writing user stories + UUID acceptance criteria -- **verify skill**: Reviewer guide for running commands and code review checklist -- **Unified docs site**: `docs/index.html` landing page linking to API Reference, Coverage, Test Results -- **ghp-import**: One-liner `task doc-publish` replaces complex inline Python - -### Changed -- **developer agent**: Owns all of Steps 2-4+6 including architecture, tests, code, and release -- **9 skills rewritten**: session-workflow, tdd, implementation, code-quality, pr-management, git-release, create-skill (lean, <150 lines each) -- **Test markers reduced**: from 11 (with duplicate) to 3: `unit`, `integration`, `slow` -- **doc-build**: Now generates all three outputs (pdoc API + pytest-cov HTML + pytest-html) -- **CI workflow**: Cleaned up to use `uv run task ` consistently -- **setup-project agent**: No longer uses setup_project.py; agent applies changes directly - -### Removed -- 11 skills deleted (architectural-analysis, delegation-coordination, epic-workflow, feature-definition, qa-enforcement, requirements-management, signature-design, workflow-coordination, prototype-script, create-agent, reference/) -- `setup_project.py` script and `.opencode/templates/` directory -- Wrong `dotenv` dependency (replaced nothing — was unused) -- `mutmut` dev dependency (YAGNI) - -## [v2.2.20260413] - Luminous Kestrel - 2026-04-13 - -### Added -- **Architecture-First Feature System** - New directory structure separating business and architecture features -- **Architectural Analysis Skill** - Systematic architecture documentation for each feature -- **8-Phase Development Cycle** - Expanded from 7-phase with dedicated Architecture Analysis phase - -### Changed -- **BDD → Acceptance Criteria** - Renamed gherkin-validation to acceptance-criteria-validation for accurate terminology -- **Consistency Updates** - Fixed phase numbering, cross-references, and documentation across all agents and skills -- **Epic-Workflow Refactor** - Converted from epic-based to feature-selection with architecture-first priority -- **Manager Agent** - Enhanced with test signature creation capabilities - -### Migration Notes -- No breaking changes in this release -- Projects can continue using existing workflow - -## [v2.1.20260413] - Polished Gecko - 2026-04-13 - -### Added -- Docker simplification and cleanup -- V2 Development Workflow with CI/CD fixes -- Template refactoring for generic app package -- Enhanced QA enforcement skills - -### Changed -- Complexity fixes for CI compliance -- CodeQL config conflict resolved - -## [v2.0.20260411] - Armored Pangolin - 2026-04-11 - -### 🚀 MAJOR RELEASE - V1 → V2 Architecture Transition - -This represents a fundamental architectural shift from V1 (template validation workflows) to V2 (project development workflows). - -### Breaking Changes -- **Workflow Architecture**: Complete transition from template validation (V1) to project development (V2) -- **CI/CD Pipeline**: New comprehensive GitHub Actions workflow replacing template-specific workflows -- **Branch Structure**: V2/init becomes the new development foundation -- **Agent Configuration**: Updated agent roles and capabilities for project development - -### Security Improvements -- Enhanced GitHub Actions workflow security with proper permissions blocks -- Removed risky PIP_USER environment variable from CI/CD pipeline -- Added secure error handling to shell scripts with 'set -euo pipefail' -- Implemented job-level permissions for all CI workflow operations - -### Infrastructure & DevOps -- Modernized Docker setup with security-first containerization approach -- Comprehensive CI/CD pipeline with GitHub Actions integration -- Improved workflow security following GitHub Advanced Security recommendations -- Full project development workflow implementation - -### Development Experience -- Complete project-focused development environment -- Better error handling and security practices in automation -- Enhanced development workflow with secure defaults -- Improved CI/CD reliability and security posture - -### Migration Notes -- **BREAKING**: This is a major version requiring migration from V1 template workflows -- V1 template validation workflows are replaced by V2 project development workflows -- Projects using V1 should plan migration to V2 architecture -- All security improvements follow GitHub security best practices - -## [v1.7.20260410] - Vivid Cardinal - 2026-04-10 - -### Added -- **QA-gated Epic Workflow** - Complete epic-based development with mandatory quality checkpoints at each phase -- **Epic-workflow Skill** - Manages epic-based development with automatic feature progression -- **EPICS.md Template** - Epic tracking and management file for generated projects - -### Changed -- Updated all agent descriptions to use industry-standard roles (Development Lead, Software Architect, QA Specialist, Business Analyst, Release Engineer) -- Removed model specifications from all agents to make template model-agnostic -- Updated AGENTS.md to properly document all 5 generated project agents and all skills -- Updated README.md with new workflow and agent roles - -### Fixed -- Documentation now accurately reflects what exists in template - -## [v1.6.20260409] - Guardian Owl - 2026-04-09 - -### Added -- **Overseer Agent** - Quality assurance agent that reviews work after each test implementation and requests changes if needed -- **Requirements Gatherer Agent** - Agent that asks questions to understand project needs, updates documentation, creates detailed analysis for architect - -### Changed -- Updated developer workflow to include `@overseer` calls after Phase 3 (TDD tests) and Phase 7 (Quality Assurance) -- Updated AGENTS.md with new agents and updated workflow examples - -## [v1.0.0] - 2026-03-12 - -### Added -- **AI-Enhanced Development Workflow** - Complete OpenCode integration for AI-powered development -- **Developer Agent** - Main development agent with 8-phase TDD workflow -- **Architect Agent** - Design review agent for SOLID principles and object calisthenics compliance -- **Repository Manager Agent** - Git operations, PRs, and themed releases management -- **Development Skills** - feature-definition, prototype-script, tdd, signature-design, implementation, code-quality -- **Repository Skills** - git-release (hybrid calver versioning with themed releases), pr-management -- **Meta Skills** - create-skill, create-agent for extending OpenCode -- **Template Management** - template-manager agent, template-test, template-release skills -- **Comprehensive CI Workflow** - Template validation, generated project tests, Docker builds -- **Validation Scripts** - cookiecutter.json, pyproject.toml, YAML frontmatter validation - -### Changed -- Updated README.md with modern AI-focused branding -- Updated generated project README template with AI development workflow - -### Features -- **7-Phase Development Cycle**: Feature Definition → Prototype → TDD → Signature Design → Architecture Review → Implementation → Quality Assurance -- **SOLID Principles Enforcement** - Single responsibility, dependency inversion, interface segregation -- **Object Calisthenics** - No primitives, small classes, behavior-rich objects -- **Hybrid Calver Versioning**: v1.2.20260302 format with themed releases -- **Themed Release Names**: "Swift Cheetah", "Vigilant Owl", "Creative Fox" based on PR sentiment -- **Property-Based Testing**: Hypothesis integration for robust test coverage - -### Migration Notes -- This is the first semantic version release -- No breaking changes to cookiecutter.json structure -- Generated projects now include OpenCode agents and skills -- Existing projects can regenerate to get new features +- Initial release. \ No newline at end of file diff --git a/README.md b/README.md index 3744d34..4884775 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@
-Python Project Template +agents-smith

@@ -9,156 +9,69 @@ [![Stargazers][stars-shield]][stars-url] [![Issues][issues-shield]][issues-url] [![MIT License][license-shield]][license-url] -[![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen?style=for-the-badge)](https://nullhack.github.io/smith/coverage/) -[![CI](https://img.shields.io/github/actions/workflow/status/nullhack/smith/ci.yml?style=for-the-badge&label=CI)](https://github.com/nullhack/smith/actions/workflows/ci.yml) +[![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen?style=for-the-badge)](https://nullhack.github.io/agents-smith/coverage/) +[![CI](https://img.shields.io/github/actions/workflow/status/nullhack/agents-smith/ci.yml?style=for-the-badge&label=CI)](https://github.com/nullhack/agents-smith/actions/workflows/ci.yml) [![Python](https://img.shields.io/badge/python-3.13-blue?style=for-the-badge)](https://www.python.org/downloads/) -**From zero to hero — production-ready Python, without the ceremony.** +**AI-assisted software delivery system with flow-based agent orchestration.**
--- -## Quick Start +A delivery system that treats documentation as a first-class artifact and enforces production rigor through an AI-assisted workflow. Your team ships features, not broken promises. -```bash -git clone https://github.com/nullhack/smith -cd smith -curl -LsSf https://astral.sh/uv/install.sh | sh # skip if uv installed -uv sync --all-extras -opencode && @setup-project # personalise for your project -uv run task test && uv run task lint && uv run task static-check -``` - ---- - -## Why this template? - -Most Python templates give you a folder structure and a `Makefile`. This one gives you a **complete delivery system**: five AI agents, a structured five-step workflow, and quality gates that cannot be silenced by convention. - -The goal is to give every project — from its first commit — the same rigour that mature teams take years to establish. - -- **No feature starts without written acceptance criteria** — Gherkin `Example:` blocks traced to tests -- **No feature ships without adversarial review** — the reviewer's default hypothesis is "broken" -- **No guesswork on test stubs** — generated automatically from `.feature` files -- **No manual `@id` tags** — assigned automatically when you run tests -- **AI agents for every role** — each agent has scoped instructions and cannot exceed its authority +Developers get TDD by default with traceability from requirement to test. Product Owners get living documentation that never drifts from code. Architects get adversarial review that catches what automated checks miss. --- -## How it works - -### The delivery cycle - -``` -SCOPE → ARCH → TDD LOOP → VERIFY → ACCEPT -``` - -| Step | Role | Output | -|------|------|--------| -| **1 · SCOPE** | Product Owner | Discovery interviews + Gherkin stories + acceptance criteria | -| **2 · ARCH** | Software Engineer | Module stubs, ADRs, auto-generated test stubs | -| **3 · TDD LOOP** | Software Engineer | RED → GREEN → REFACTOR, one criterion at a time | -| **4 · VERIFY** | Reviewer | Adversarial check — lint, types, coverage, semantic review | -| **5 · ACCEPT** | Product Owner | Demo, validate, ship | - -**WIP limit: 1 feature at a time.** Features are `.feature` files that move through folders: +## Quick start +```bash +git clone https://github.com/nullhack/agents-smith +cd agents-smith +curl -LsSf https://astral.sh/uv/install.sh | sh # skip if uv is already installed +uv sync --all-extras +opencode && @setup-project # personalise for your project +uv run task test && uv run task lint && uv run task static-check ``` -docs/features/backlog/ ← waiting -docs/features/in-progress/ ← building (max 1) -docs/features/completed/ ← shipped -``` - -### AI agents included - -| Agent | Responsibility | -|-------|---------------| -| `@product-owner` | Scope, stories, acceptance criteria, delivery acceptance | -| `@software-engineer` | Architecture, TDD loop, git, releases | -| `@reviewer` | Adversarial verification — default position: broken | -| `@designer` | Visual identity, colour palette, SVG assets | -| `@setup-project` | One-time project initialisation | - -### Quality tooling, pre-configured - -| Tool | Role | -|------|------| -| `uv` | Package & environment management | -| `ruff` | Lint + format (Google docstrings) | -| `pyright` | Static type checking — 0 errors | -| `pytest` + `hypothesis` | Tests + property-based testing | -| `pytest-beehave` | Auto-generates test stubs from `.feature` files | -| `pytest-cov` | Coverage — 100% required | -| `pdoc` | API docs → GitHub Pages | -| `taskipy` | Task runner | --- ## Commands +### Development + ```bash -uv run task test # Full suite + coverage -uv run task test-fast # Fast, no coverage (use during TDD loop) -uv run task lint # ruff check + format -uv run task static-check # pyright -uv run task run # Run the app +uv run task test # full suite + coverage +uv run task test-fast # fast, no coverage (use during TDD loop) +uv run task lint # ruff format + check +uv run task static-check # pyright type checking +uv run task run # run the app +uv run task doc-build # build API docs + coverage report ``` ---- - -## Code standards - -| | | -|---|---| -| Coverage | 100% | -| Type errors | 0 | -| Function length | ≤ 20 lines | -| Class length | ≤ 50 lines | -| Max nesting | 2 levels | -| Principles | YAGNI › KISS › DRY › SOLID › Object Calisthenics | - ---- - -## Test convention +### Smith CLI -Write acceptance criteria in Gherkin: +`smith` connects your project to the agents-smith agentic workflow files. It manages the agentic file lifecycle — connect, update, and disconnect — so your project stays in sync without manual file copying. -```gherkin -@id:a3f2b1c4 -Example: User sees version on startup - Given the application starts - When no arguments are passed - Then the version string is printed to stdout -``` - -Run tests once — a traced, skipped stub appears automatically: - -```python -@pytest.mark.skip(reason="not yet implemented") -def test_display_version_a3f2b1c4() -> None: - """ - Given the application starts - When no arguments are passed - Then the version string is printed to stdout - """ +```bash +smith connect # write agentic files from the default template source +smith connect --from PATH # write agentic files from a local path +smith connect --from URL # write agentic files from a remote tarball +smith connect --overwrite # overwrite existing agentic files +smith update # re-write agentic files from the connected source +smith disconnect # remove all agentic files and gitignore entries +smith status # show connection state and source ``` -Each test traces to exactly one acceptance criterion. No orphan tests. No untested criteria. - ---- - -## Branding - -When you run `@setup-project`, the agent collects your project's identity — name, tagline, mission, colour palette, and release naming convention — and writes `docs/branding.md`. All agents read this file. Release names, C4 diagram colours, and generated copy all reflect your project's identity without you touching `.opencode/`. - -Absent or blank fields fall back to defaults: adjective-animal release names, Mermaid default colours, no wording constraints. - --- -## Versioning +## Documentation -`v{major}.{minor}.{YYYYMMDD}` — each release gets a unique name derived from your branding convention. By default: an adjective paired with an animal (scientific name). Configure your own theme in `docs/branding.md`. +- **[Product Definition](docs/product-definition.md)** — product boundaries, users, and scope +- **[System Overview](docs/system.md)** — architecture, domain model, module structure, and constraints +- **[Glossary](docs/glossary.md)** — living domain glossary --- @@ -166,16 +79,16 @@ Absent or blank fields fall back to defaults: adjective-animal release names, Me MIT — see [LICENSE](LICENSE). -**Author:** [@nullhack](https://github.com/nullhack) · [Documentation](https://nullhack.github.io/smith) +**Author:** [@nullhack](https://github.com/nullhack) · [Documentation](https://nullhack.github.io/agents-smith) -[contributors-shield]: https://img.shields.io/github/contributors/nullhack/smith.svg?style=for-the-badge -[contributors-url]: https://github.com/nullhack/smith/graphs/contributors -[forks-shield]: https://img.shields.io/github/forks/nullhack/smith.svg?style=for-the-badge -[forks-url]: https://github.com/nullhack/smith/network/members -[stars-shield]: https://img.shields.io/github/stars/nullhack/smith.svg?style=for-the-badge -[stars-url]: https://github.com/nullhack/smith/stargazers -[issues-shield]: https://img.shields.io/github/issues/nullhack/smith.svg?style=for-the-badge -[issues-url]: https://github.com/nullhack/smith/issues +[contributors-shield]: https://img.shields.io/github/contributors/nullhack/agents-smith.svg?style=for-the-badge +[contributors-url]: https://github.com/nullhack/agents-smith/graphs/contributors +[forks-shield]: https://img.shields.io/github/forks/nullhack/agents-smith.svg?style=for-the-badge +[forks-url]: https://github.com/nullhack/agents-smith/network/members +[stars-shield]: https://img.shields.io/github/stars/nullhack/agents-smith.svg?style=for-the-badge +[stars-url]: https://github.com/nullhack/agents-smith/stargazers +[issues-shield]: https://img.shields.io/github/issues/nullhack/agents-smith.svg?style=for-the-badge +[issues-url]: https://github.com/nullhack/agents-smith/issues [license-shield]: https://img.shields.io/badge/license-MIT-green?style=for-the-badge -[license-url]: https://github.com/nullhack/smith/blob/main/LICENSE +[license-url]: https://github.com/nullhack/agents-smith/blob/main/LICENSE \ No newline at end of file diff --git a/TODO.md b/TODO.md deleted file mode 100644 index f8f8910..0000000 --- a/TODO.md +++ /dev/null @@ -1,4 +0,0 @@ -# Current Work - -No feature in progress. -Next: Run @product-owner — select next BASELINED feature from backlog and move to in-progress for Step 2 (Architecture). diff --git a/docs/c4/.gitkeep b/docs/adr/.gitkeep similarity index 100% rename from docs/c4/.gitkeep rename to docs/adr/.gitkeep diff --git a/docs/adr/ADR_20260501_argparse-cli-framework.md b/docs/adr/ADR_20260501_argparse-cli-framework.md new file mode 100644 index 0000000..f83b83d --- /dev/null +++ b/docs/adr/ADR_20260501_argparse-cli-framework.md @@ -0,0 +1,55 @@ +# ADR_20260501_argparse-cli-framework + +## Status + +Accepted + +## Context + +smith needs a CLI framework to support four subcommands (`connect`, `disconnect`, `update`, `status`) with options (`--from `, `--overwrite`). The project has a hard constraint of zero runtime dependencies. The current codebase already uses argparse (stdlib) for `--help` and `--version`. The CLI framework choice is architecturally significant because it constrains the entire command dispatch structure and is hard to change later without rewriting all command handlers. + +**Feature:** smith-commands (all four CLI commands) + +Forces: +- Zero runtime dependency constraint from `product_definition.md` and `system.md` +- Four subcommands with options — argparse supports subparsers natively +- The quality attribute ranking places Usability below Safety, Atomicity, and Clean Separation — a simpler CLI is acceptable if it meets the four-command requirement +- The delivery mechanism is CLI-only — no HTTP, no TUI, no GUI + +## Interview + +| Question | Answer | +|---|---| +| Which CLI framework should smith use? | argparse (stdlib) | + +## Decision + +Use argparse as the CLI framework for all four subcommands. + +## Reason + +argparse is part of the Python stdlib, satisfies the zero-runtime-dependency constraint, and supports subparsers for multi-command CLIs. The four subcommands are well within argparse's capability — no complex nested commands, no shell completion, no rich terminal output required. + +## Alternatives Considered + +- **Click**: Mature, excellent for complex CLIs, but introduces a runtime dependency (`click`). Rejected because it violates the zero-dependency constraint. +- **Typer**: Built on Click with type annotations, but also introduces a runtime dependency (`typer` + `click`). Rejected for the same reason. +- **Docopt**: Declarative CLI from docstrings, but introduces a runtime dependency and has weaker subparser support. Rejected. +- **Cleo**: Full-featured CLI framework used by Poetry, but introduces a runtime dependency and is over-engineered for four commands. Rejected. + +## Consequences + +- (+) Zero runtime dependencies maintained — `pip install agents-smith` works with no additional packages +- (+) Consistent with existing codebase (`__main__.py` already uses argparse) +- (+) Stdlib guarantee — argparse will always be available on any Python 3.13 installation +- (-) argparse subparser API is more verbose than Click/Typer — more boilerplate per command +- (-) No built-in shell completion, rich formatting, or progress bars — mitigated by keeping CLI output simple (text + exit codes) +- (-) Switching to Click/Typer later would require rewriting all command handlers — mitigated by hexagonal architecture (command handlers are thin adapters; domain logic is independent) + +## Risk Assessment + +| Risk | Probability | Impact | Mitigation | Accepted? | +|------|------------|--------|------------|-----------| +| argparse subparser ergonomics lead to verbose command dispatch code | Medium | Low | Keep command handlers thin (dispatch to application use cases) | Yes | +| Future CLI complexity exceeds argparse capability | Low | Medium | Hexagonal architecture isolates CLI framework — can swap without domain changes | Yes | +| argparse `required` subparser behavior differs across Python versions | Low | Low | Python 3.13 is the only target version | Yes | \ No newline at end of file diff --git a/docs/adr/ADR_20260501_atomic-file-writes-via-temp-directory.md b/docs/adr/ADR_20260501_atomic-file-writes-via-temp-directory.md new file mode 100644 index 0000000..bee5219 --- /dev/null +++ b/docs/adr/ADR_20260501_atomic-file-writes-via-temp-directory.md @@ -0,0 +1,57 @@ +# ADR_20260501_atomic-file-writes-via-temp-directory + +## Status + +Accepted + +## Context + +smith must guarantee atomicity for the `connect` and `update` commands: either all agentic files are written to the project directory or none are. Partial connections are explicitly forbidden by the Safety and Atomicity quality attributes. The file set includes AGENTS.md (single file), .opencode/ (directory tree), .templates/ (directory tree), and .flowr/ (directory tree). The mechanism for achieving atomicity is architecturally significant because it affects the entire write path, rollback strategy, and failure recovery. + +**Feature:** smith-commands (connect, update) + +Forces: +- Atomicity quality attribute: "When smith connects, either all agentic files are written or none are" — Must priority +- Safety quality attribute: "When smith connects to a project that already has agentic files, it refuses to overwrite without explicit `--overwrite` flag" — Must priority +- Clean Separation quality attribute: "When smith disconnects, no agentic files remain" — Must priority +- Zero runtime dependency constraint — no external transaction managers +- The write set includes both single files and directory trees — the mechanism must handle both + +## Interview + +| Question | Answer | +|---|---| +| How should smith guarantee atomicity for file writes? | Temp-directory staging with atomic rename | + +## Decision + +Use temp-directory staging for all file writes: write all files to a temporary directory first, validate the complete set exists, then move files to their final locations. On any failure during the write phase, discard the temporary directory — no cleanup of partial writes needed because nothing was moved to the final location yet. + +## Reason + +Temp-directory staging is the simplest mechanism that satisfies the atomicity invariant without runtime dependencies. It uses only `tempfile.mkdtemp()` and `os.replace()` from the stdlib. The two-phase approach (stage → commit) means that failures during staging leave zero trace in the project directory, eliminating the need for complex rollback logic. + +## Alternatives Considered + +- **Transaction log with rollback**: Write a log of all operations before executing them; on failure, reverse the log. Rejected because it requires tracking individual file operations, makes rollback complex (reverse order, handle partial failures during rollback), and risks leaving the project in an inconsistent state if rollback itself fails. +- **Shadow directory (write to `.smith-staging/`, then rename)**: Similar to temp-directory staging but uses a fixed directory name in the project root. Rejected because it pollutes the project directory with a staging directory that must be cleaned up even on success, and could conflict with an existing `.smith-staging/` directory. +- **In-place writes with backup-and-restore**: Write files directly, keeping backups of any overwritten files. On failure, restore from backups. Rejected because it's the most complex approach — requires backup management, handles partial writes, and risks data loss if the restore fails. + +## Consequences + +- (+) Atomicity guarantee: either all files are written or none are — no partial connections possible +- (+) Simple rollback: on failure, just discard the temp directory — no cleanup of partial writes +- (+) Zero runtime dependencies — uses only stdlib (`tempfile`, `os.replace`, `shutil.move`) +- (+) Clear failure mode: if staging fails, the project directory is untouched +- (-) Disk space: staging requires temporary disk space for the full file set — mitigated by the fact that agentic files are typically small (a few KB for AGENTS.md, a few MB for .opencode/) +- (-) Two-phase write adds latency: all files must be staged before any are committed — mitigated by the small file set size +- (-) `os.replace()` is atomic on POSIX but not on Windows for cross-device moves — mitigated by ensuring temp directory is on the same filesystem as the project directory (use `dir=` parameter of `mkdtemp`) + +## Risk Assessment + +| Risk | Probability | Impact | Mitigation | Accepted? | +|------|------------|--------|------------|-----------| +| Cross-device temp directory causes non-atomic rename on Windows | Low | Medium | Use `dir=` parameter of `mkdtemp` to create temp directory in the project directory's filesystem | Yes | +| Staging directory leaked if process is killed between stage and commit | Low | Low | Add a `.smith-staging` cleanup check at the start of `smith connect` and `smith status` | Yes | +| Disk full during staging causes write failure | Low | Low | Pre-check available disk space before staging; report clear error message | Yes | +| File permissions differ between temp and project directories | Low | Low | Explicitly set permissions after `os.replace()` using `os.chmod()` | Yes | \ No newline at end of file diff --git a/docs/adr/ADR_20260501_github-bundled-template-resolution.md b/docs/adr/ADR_20260501_github-bundled-template-resolution.md new file mode 100644 index 0000000..1bda96b --- /dev/null +++ b/docs/adr/ADR_20260501_github-bundled-template-resolution.md @@ -0,0 +1,64 @@ +# ADR_20260501_github-bundled-template-resolution + +## Status + +Superseded by ADR-007 (local bundled template resolution) + +## Context + +The `BundledTemplateSource` currently resolves template files from `smith/data/` — a directory of 85 hardcoded copies of the project's own `.opencode/`, `.flowr/`, `.templates/`, and `AGENTS.md` files. This approach has several problems: + +1. **Staleness:** The bundled files are copies that must be manually updated. They will drift from the actual agents-smith templates over time. +2. **Coupling:** Every template update requires a new smith package release, even though smith is the consumer, not the owner, of these templates. +3. **Size:** 85 files add unnecessary bulk to the package distribution. +4. **Wrong ownership:** The agents-smith repository (specifically its `v8_release` branch) is the authoritative source for these files, not the smith package. + +The agents-smith PyPI package (v7.2.20260423) only contains `app/__init__.py` and `app/__main__.py` — it does not expose template data as package resources, so `importlib.resources` cannot be used to read templates from a agents-smith package. + +Forces: +- Templates should always be current without requiring a new smith release +- The default template source must work reliably (network or cache) +- Network failure should not prevent `smith connect` if the cache is populated +- The solution should be simple and maintainable + +## Interview + +| Question | Answer | +|---|---| +| Should we use stdlib `urllib.request` or `requests` for HTTP? | Use `requests` — cleaner API, better error handling, worth the dependency | +| Should downloaded templates be cached locally? | Yes — cache in `~/.cache/smith/` to avoid re-downloading on every connect/update | +| Should the default GitHub branch/tag be configurable? | No — default to `v8_release` for now; will change in future but not configurable today | +| Should `smith/data/` be removed? | Yes — delete the entire directory; it contains stale copies | + +## Decision + +Resolve the bundled `agents-smith` template source by downloading the GitHub archive at runtime from `https://github.com/nullhack/agents-smith/archive/refs/heads/v8_release.tar.gz`, extracting it, and caching the files locally in `~/.cache/smith/agents-smith/`. Delete `smith/data/` entirely. Add `requests` as the only external runtime dependency. + +## Reason + +GitHub-based resolution ensures templates are always current without requiring a new smith package release. Local caching avoids redundant network requests. The `requests` library provides significantly better HTTP handling than `urllib.request` for this use case. Removing `smith/data/` eliminates stale copies and the maintenance burden of keeping them in sync. + +## Alternatives Considered + +- **importlib.resources with packaged templates (status quo):** Templates in `smith/data/` will go stale and require manual updates. The agents-smith PyPI package does not expose template data as resources. Rejected because of staleness and coupling. +- **urllib.request for HTTP downloads:** The stdlib HTTP client lacks connection pooling, timeout defaults, and clean error handling that `requests` provides. Rejected because the API is harder to use correctly and test. +- **Git submodule for agents-smith:** Adds complexity to the build process and still requires packaging template files. Rejected because it doesn't solve the staleness problem. +- **No caching (re-download every time):** Wasteful network requests on every `smith connect`/`smith update`. Rejected because of performance and usability impact on repeated commands. + +## Consequences + +- (+) Templates are always current — no need for a new smith release when templates change +- (+) `smith/data/` is removed — no stale copies, smaller package distribution +- (+) Local cache enables offline use after first download +- (-) `requests` is added as a runtime dependency — breaks the previous "zero runtime dependencies" constraint; mitigated by `requests` being the only external dependency +- (-) First `smith connect` requires network access — mitigated by clear error message on failure and cache fallback for subsequent use +- (-) Cache directory management adds implementation complexity — mitigated by using standard OS cache directories (`~/.cache/smith/`) + +## Risk Assessment + +| Risk | Probability | Impact | Mitigation | Accepted? | +|------|------------|--------|------------|-----------| +| GitHub is unavailable on first `smith connect` | Low | Medium | Clear error message with exit code 1; suggest retrying or using `--from ` | Yes | +| Cache corruption | Low | Low | Delete cache directory and re-download; smith does not rely on cache integrity for safety | Yes | +| `v8_release` branch is renamed or deleted | Low | High | Default URL is a module-level constant that can be updated in a patch release; future enhancement could make it configurable | Yes | +| `requests` security vulnerability | Low | Medium | Pin minimum version in pyproject.toml; dependabot alerts for known CVEs | Yes | \ No newline at end of file diff --git a/docs/adr/ADR_20260501_hexagonal-architecture.md b/docs/adr/ADR_20260501_hexagonal-architecture.md new file mode 100644 index 0000000..57df821 --- /dev/null +++ b/docs/adr/ADR_20260501_hexagonal-architecture.md @@ -0,0 +1,51 @@ +# ADR_20260501_hexagonal-architecture + +## Status + +Accepted + +## Context + +smith's domain logic enforces four critical invariants (atomicity, safety, clean separation, consistency) that must not be coupled to infrastructure details like filesystem operations, network access, or CLI argument parsing. The quality attributes rank Safety, Atomicity, and Clean Separation above Usability — the domain invariants are the core value, and the delivery mechanism (CLI) is a thin adapter. The project also needs to support multiple template source types (bundled, local path, remote URL) without changing domain logic. + +Forces: +- Safety, Atomicity, and Clean Separation are Must-quality attributes that must be enforced in the domain layer +- Multiple template source types (bundled, local path, URL) require different infrastructure implementations +- Zero runtime dependency constraint means no framework can provide dependency injection +- Testability is a Should-quality attribute — domain logic must be testable without filesystem or network access +- The domain is small and cohesive (single bounded context, single aggregate) + +## Interview + +| Question | Answer | +|---|---| +| Which architectural style should smith use? | Hexagonal (Ports & Adapters) | + +## Decision + +Use Hexagonal Architecture (Ports & Adapters) with four layers: domain, application, infrastructure, delivery. Domain defines Protocol interfaces (ports); infrastructure implements them as adapters. The dependency arrow always points inward. + +## Reason + +Hexagonal architecture keeps the domain invariant enforcement independent of filesystem, network, and CLI concerns. The four quality attributes (Safety, Atomicity, Clean Separation, Consistency) are all enforced in the pure domain layer — no filesystem or network imports in domain code. Template source variations are handled by infrastructure adapters implementing a TemplateSourcePort interface, satisfying Modifiability without domain changes. + +## Alternatives Considered + +- **Layered architecture (traditional 3-tier):** Would work but doesn't enforce the strict dependency inversion needed. Domain could accidentally import infrastructure through shared layers. Rejected because it doesn't make the port/adapter boundary explicit. +- **Microservices architecture:** Over-engineered for a single-bounded-context CLI tool. Rejected because there's no inter-service communication need. +- **Event-driven architecture:** No asynchronous processing or event sourcing requirements. Rejected because smith's commands are synchronous request-response. + +## Consequences + +- (+) Domain invariants are testable in isolation via port mocks — no filesystem or network in unit tests +- (+) New template source types added as infrastructure adapters without domain changes +- (+) CLI is a thin delivery adapter — can be replaced without touching domain logic +- (-) More files and indirection than a simple script — mitigated by the domain being small (single aggregate) +- (-) Protocol interfaces must be maintained alongside implementations — mitigated by keeping ports minimal (4 ports) + +## Risk Assessment + +| Risk | Probability | Impact | Mitigation | Accepted? | +|------|------------|--------|------------|-----------| +| Over-engineering for a small CLI tool | Medium | Low | Domain is a single aggregate; the overhead is 4 port interfaces and 6 adapter classes — proportional to the problem | Yes | +| Port interfaces drift from actual needs | Low | Medium | Write tests against ports first (TDD); ports evolve with domain needs | Yes | \ No newline at end of file diff --git a/docs/adr/ADR_20260501_local-bundled-template-resolution.md b/docs/adr/ADR_20260501_local-bundled-template-resolution.md new file mode 100644 index 0000000..8b13426 --- /dev/null +++ b/docs/adr/ADR_20260501_local-bundled-template-resolution.md @@ -0,0 +1,66 @@ +# ADR_20260501_local-bundled-template-resolution + +## Status + +Accepted — supersedes ADR-006 (GitHub-based bundled template resolution) + +## Context + +ADR-006 replaced the local `smith/data/` bundle with GitHub-based runtime download + local cache. This introduced problems discovered during end-to-end testing: + +1. **Runtime network dependency:** `smith connect` without `--from` requires network access on first run, violating the principle that the default source should "just work" +2. **Cache staleness:** A stale cache directory with incomplete content was served instead of re-downloading, producing incorrect results +3. **Complexity:** Download, extraction, caching, and cache invalidation logic added significant implementation overhead for the default use case +4. **Wrong default behavior:** The default template source should be the most reliable path, not one that depends on external infrastructure + +The agents-smith v8_release branch is the source of truth for agentic files, but smith should carry a local copy as part of its distribution rather than downloading at runtime. + +Forces: +- The default `smith connect` experience should be instant and offline-capable +- Template freshness is a release-time concern, not a runtime concern +- `requests` is still needed for UrlTemplateSource (non-default source types) +- The `smith/data/` directory must be kept in sync with agents-smith v8_release via a manual script + +## Interview + +| Question | Answer | +|---|---| +| Should bundled templates be packaged locally or downloaded at runtime? | Packaged locally — `smith connect` without `--from` must work offline | +| Should `requests` still be a dependency? | Yes — UrlTemplateSource needs it for tar.gz/zip downloads | +| How is `smith/data/` kept in sync with agents-smith? | Manual script (`scripts/update-bundle.sh`) that downloads and copies agentic files | +| Should URL sources cache downloads? | No — re-download every time; no persistent cache for any source type | +| What about the deprecated BDD examples a1b2c3d4 and e5f6g7h8? | Deprecate — they test network failure and cache fallback for bundled source, which no longer applies | + +## Decision + +Package agentic files in `smith/data/` and resolve them at runtime via `importlib.resources`. BundledTemplateSource reads from the package directory — no network calls, no caching. UrlTemplateSource (fully implemented) downloads tar.gz/zip archives via `requests`, extracts to a temp directory, applies the agentic filter, and returns FileSpec objects — no persistent cache. Delete the GitHub download and caching code from BundledTemplateSource. Add `scripts/update-bundle.sh` for manual sync from agents-smith v8_release. + +## Reason + +Local packaging ensures `smith connect` works offline and instantly for the default case. Runtime download complexity is unnecessary for the default source. Template freshness is maintained through release-time updates, not runtime downloads. + +## Alternatives Considered + +- **GitHub-based download + local cache (ADR-006, superseded):** Adds runtime network dependency, cache staleness risk, and implementation complexity. Rejected because the default experience should be instant and offline. +- **No `requests` dependency at all:** Would prevent UrlTemplateSource from working. Rejected because URL source support is a required feature. +- **Git submodule for agents-smith:** Adds build complexity and still requires packaging files. Rejected because it doesn't simplify the distribution. +- **Persistent cache for URL sources:** Adds cache invalidation complexity with minimal benefit since URL sources are used infrequently. Rejected for simplicity. + +## Consequences + +- (+) `smith connect` without `--from` works instantly and offline +- (+) No cache staleness or invalidation issues for the default source +- (+) Simpler implementation — no download, extraction, or caching for BundledTemplateSource +- (+) `requests` dependency is only used for UrlTemplateSource, not the default path +- (-) `smith/data/` must be kept in sync with agents-smith v8_release via manual script +- (-) Template updates require a new smith release (same as pre-ADR-006 behavior) +- (-) `smith/data/` adds ~85 files to the package distribution + +## Risk Assessment + +| Risk | Probability | Impact | Mitigation | Accepted? | +|------|------------|--------|------------|-----------| +| `smith/data/` drifts from agents-smith v8_release | Medium | Low | `scripts/update-bundle.sh` syncs agentic files; CI could automate this in future | Yes | +| Package size increases by ~85 files | Low | Low | Agentic files are small text files; total size is negligible | Yes | +| UrlTemplateSource download fails | Medium | Medium | Clear error message with exit code 1; user can retry or use `--from ` | Yes | +| `importlib.resources` path resolution differs across Python versions | Low | Low | Use `importlib.resources.files()` which is stable in Python 3.9+ | Yes | \ No newline at end of file diff --git a/docs/adr/ADR_20260501_no-smart-merge.md b/docs/adr/ADR_20260501_no-smart-merge.md new file mode 100644 index 0000000..b4fcbf5 --- /dev/null +++ b/docs/adr/ADR_20260501_no-smart-merge.md @@ -0,0 +1,53 @@ +# ADR_20260501_no-smart-merge + +## Status + +Accepted + +## Context + +When `smith connect` encounters an existing `.flowr/` or `.templates/` directory in the target project, a decision must be made about how to handle the conflict. These directories may contain project-specific data (flows, templates) that the engineer has customised. The stakeholder deferred this decision to the architect. + +Forces: +- Safety quality attribute: "Zero silent overwrites, ever" +- Atomicity quality attribute: "No partial connections, ever" +- Clean Separation quality attribute: "Zero orphaned files after disconnect" +- `.flowr/` and `.templates/` may contain project-specific data that the engineer wants to preserve +- Smart merge logic (comparing files, choosing which to keep) adds complexity and failure modes +- smith disconnect must be able to cleanly remove everything smith wrote — merge makes this ambiguous + +## Interview + +| Question | Answer | +|---|---| +| How should smith handle existing .flowr/ and .templates/ when connecting? | Refuse without --overwrite; replace entirely with --overwrite (no merge) | + +## Decision + +Treat `.flowr/` and `.templates/` identically to all other agentic files: skip user-tracked files and auto-update smith-managed files. When `--overwrite` is passed, replace managed files entirely. No smart merge logic. + +## Reason + +This decision applies the YAGNI principle over DRY. Smart merge logic would violate Atomicity (partial connections where some files are merged and others are skipped), Safety (silent modification of existing content), and Clean Separation (disconnect wouldn't know which files were smith's vs pre-existing). The simple mental model — "smith writes its files; if they exist, use `--overwrite`" — is more usable than complex merge rules. + +## Alternatives Considered + +- **Smart merge (file-by-file comparison):** Compare each file and only write files that don't exist. Rejected because it violates Atomicity (partial state: some files merged, some skipped), Safety (silently modifying existing directory content), and Clean Separation (disconnect can't determine which files were smith's). +- **Selective skip:** Skip `.flowr/` and `.templates/` if they exist, but write `AGENTS.md` and `.opencode/`. Rejected because it violates Atomicity (partial connection) and creates an inconsistent state where some agentic files are present but others are not. +- **Interactive prompt:** Ask the user what to do for each conflicting directory. Rejected because it breaks the non-interactive CLI workflow and adds complexity for a marginal benefit. + +## Consequences + +- (+) Atomicity is preserved: all files or nothing +- (+) Safety is preserved: no silent overwrites without `--overwrite` +- (+) Clean Separation is preserved: disconnect removes everything smith wrote, unambiguously +- (+) Simple mental model: "smith writes its files; if they exist, use `--overwrite`" +- (+) YAGNI: no merge logic to maintain, test, or debug +- (-) Engineers who want to combine project-specific flows/templates with smith's templates must manually manage that outside of smith — mitigated by the fact that `smith update --overwrite` replaces all files, making the workflow explicit + +## Risk Assessment + +| Risk | Probability | Impact | Mitigation | Accepted? | +|------|------------|--------|------------|-----------| +| Engineers lose project-specific .flowr/ or .templates/ data when using --overwrite | Medium | High | Warn before overwrite; suggest backing up the directory first. Future feature could add `--backup` flag | Yes | +| Engineers want selective merge in future | Low | Low | Can be added as a future feature without architectural changes — the TemplateSourcePort and FileSystemPort interfaces support this | Yes | \ No newline at end of file diff --git a/docs/adr/ADR_20260501_smith-yaml-metadata.md b/docs/adr/ADR_20260501_smith-yaml-metadata.md new file mode 100644 index 0000000..1247c3b --- /dev/null +++ b/docs/adr/ADR_20260501_smith-yaml-metadata.md @@ -0,0 +1,55 @@ +# ADR_20260501_smith-yaml-metadata + +## Status + +Superseded — The stakeholder decided smith should be stateless. Connection state is now inferred from the `# smith managed` section in `.gitignore`, with source metadata stored in the section header (e.g., `# smith managed source:agents-smith`). No separate `.smith.yaml` file is created. This decision supersedes ADR-004's original recommendation of a dedicated metadata file. + +## Context + +smith needs to persist connection state between commands. `smith status` must report which template source was used and when the connection was established. `smith update` must know which template source to refresh from. `smith disconnect` must know what to remove. This state must survive process termination — it cannot be in-memory only. + +Forces: +- The connection state must be queryable by `smith status` without re-deriving it +- `smith update` must know the original template source (default agents-smith, or `--from `) +- The project directory is the only reliable persistence location (smith has no config directory) +- Zero runtime dependency constraint means no PyYAML or other parsing libraries +- The metadata file is created on connect and removed on disconnect, following the same lifecycle as the agentic files + +## Interview + +| Question | Answer | +|---|---| +| How should smith persist connection state? | Simple YAML file in the project root (.smith.yaml) | + +## Decision + +Use a `.smith.yaml` file in the project root to persist connection state. The file contains `template_source` and `connected_at` fields in simple `key: value` format. The file is created on `smith connect`, read by `smith status` and `smith update`, and removed on `smith disconnect`. + +## Reason + +A simple key-value YAML file in the project root is the most discoverable and debuggable persistence mechanism. It's human-readable, version-controllable, and can be parsed without PyYAML by using a simple line-splitting approach. The file follows the same lifecycle as the agentic files (created on connect, removed on disconnect). + +## Alternatives Considered + +- **SQLite database in .smith/:** Over-engineered for two fields. Rejected because it adds binary state and requires a runtime dependency for proper SQLite handling. +- **JSON file (.smith.json):** Valid alternative, but JSON is less human-readable for simple key-value data and doesn't support comments. Rejected in favor of YAML's comment support for explaining fields. +- **No persistence (derive state from file presence):** Fragile — cannot distinguish between "connected with default agents-smith" and "files happened to be there." Also cannot determine the original `--from` source for `smith update`. Rejected because it violates the consistency invariant. +- **Git configuration (git config):** Only works in git repositories. smith must work in non-git directories. Rejected. + +## Consequences + +- (+) Connection state is human-readable and debuggable +- (+) `.smith.yaml` follows the same lifecycle as the agentic files — created on connect, removed on disconnect +- (+) Simple format parseable without PyYAML — maintains zero runtime dependency constraint +- (+) `smith status` can report template source and connection time without re-deriving +- (-) `.smith.yaml` is visible in the project directory — mitigated by adding it to the managed .gitignore section +- (-) If a user manually edits `.smith.yaml`, state could become inconsistent — mitigated by documenting that `.smith.yaml` is managed by smith and should not be edited manually +- (-) Simple YAML format cannot represent complex nested structures — mitigated by YAGNI; only two fields are needed + +## Risk Assessment + +| Risk | Probability | Impact | Mitigation | Accepted? | +|------|------------|--------|------------|-----------| +| User manually edits .smith.yaml causing inconsistent state | Low | Medium | Document in .smith.yaml comments that it is managed by smith; detect corruption on status/update commands | Yes | +| .smith.yaml conflicts with other tools using same filename | Low | Low | The `.smith.` prefix is specific to this tool; collision is unlikely | Yes | +| Simple YAML parser cannot handle edge cases | Low | Low | Only two fields with string values; no complex types needed | Yes | \ No newline at end of file diff --git a/docs/architecture.md b/docs/architecture.md deleted file mode 100644 index 2edabcd..0000000 --- a/docs/architecture.md +++ /dev/null @@ -1,19 +0,0 @@ -# Architecture: - ---- - -## YYYY-MM-DD — : - -Decision: -Reason: -Alternatives considered: -Feature: - ---- - -## YYYY-MM-DD — Cross-feature: - -Decision: -Reason: -Alternatives considered: -Affected features: , diff --git a/docs/discovery.md b/docs/discovery.md deleted file mode 100644 index 58b7151..0000000 --- a/docs/discovery.md +++ /dev/null @@ -1,39 +0,0 @@ -# Discovery: smith - ---- - -## Session: 2026-04-20 - -### Context -`smith` is a CLI tool for Python developers who want a production-grade project setup without manual scaffolding. It solves the problem of bootstrapping and maintaining a consistent project structure aligned with the nullhack/python-project-template. Users are developers starting new projects or upgrading existing ones. Success means a new project runs immediately after creation, and an existing project gains template tooling without losing any existing content. Failure means data loss in an existing project or a broken new project. Out of scope: running projects, version-tracking template updates, multi-project management, PyPI publishing, and IDE integration. - -### Feature List -- `smith-init` — creates a new Python project using `uv init` then layers template add-ons with interactive metadata prompts -- `smith-assimilate` — applies template structure/tooling to an existing project with per-file conflict resolution and dry-run mode - -### Domain Model -| Type | Name | Description | In Scope | -|------|------|-------------|----------| -| Noun | Project | A Python project directory being created or upgraded | Yes | -| Noun | Template | The nullhack/python-project-template add-ons (`.opencode/`, CI, folder structure, `AGENTS.md`) | Yes | -| Noun | Metadata | User-provided values (name, author, GitHub username) substituted into template placeholders | Yes | -| Noun | ConflictResolution | Per-file user decision when a template file already exists: skip, overwrite, or diff | Yes | -| Noun | DryRun | A preview mode that shows planned changes without writing any files | Yes | -| Verb | init | Create a new project via `uv init` then apply template add-ons | Yes | -| Verb | assimilate | Apply template add-ons to an existing project | Yes | -| Verb | merge | Add missing `pyproject.toml` entries without overwriting existing ones | Yes | -| Verb | prompt | Ask the user for metadata or conflict resolution decisions interactively | Yes | - ---- - -## Session: 2026-04-20 (Session 2) - -### Feature List -- `smith-new` — renamed from `smith-init`; command confirmed as `smith new [path]`; template source confirmed as uv GitHub dependency pinned by rev -- `smith-assimilate` — path argument confirmed (`smith assimilate [path]`, defaults to cwd); both features baselined - -### Domain Model -| Type | Name | Description | In Scope | -|------|------|-------------|----------| -| Noun | TemplateDependency | The nullhack/python-project-template installed as a uv GitHub dep, rev-pinned in pyproject.toml | Yes | -| Verb | new | Create a new project via `uv init` then apply template add-ons | Yes | diff --git a/docs/discovery_journal.md b/docs/discovery_journal.md deleted file mode 100644 index aa2edfd..0000000 --- a/docs/discovery_journal.md +++ /dev/null @@ -1,67 +0,0 @@ -# Discovery Journal: smith - ---- - -## 2026-04-20 — Session 1 -Status: IN-PROGRESS - -### General - -| ID | Question | Answer | -|----|----------|--------| -| Q1 | Who are the users? | Python developers starting or improving their Python projects | -| Q2 | What does the product do at a high level? | CLI tool with two commands: `smith init ` creates a new project using `uv init` then layers template add-ons on top; `smith assimilate` applies template structure/tooling to an existing project | -| Q3 | Why does it exist — what problem does it solve? | Solves the problem of having a proper production-grade project setup and being able to update/upgrade it later | -| Q4 | When and where is it used? | CLI tool used locally; two modes: create new project, or apply template to existing project | -| Q5 | Success — what does "done" look like? | New project runs straightaway after creation; existing project is upgraded without affecting existing content | -| Q6 | Failure — what must never happen? | Destroys anything in an existing project; spawns a project that doesn't work as intended | -| Q7 | Out-of-scope — what are we explicitly not building? | Running the project (smith only sets up), version-tracking template updates, managing multiple projects, publishing to PyPI, IDE/editor integration | - -### Cross-cutting: Commands - -| ID | Question | Answer | -|----|----------|--------| -| Q8 | What is the second command word? | `assimilate` — Matrix-branded, fits the Agent Smith aesthetic | -| Q9 | Are these two separate commands or one command with a flag? | Two separate commands: `smith init ` and `smith assimilate` | -| Q10 | What does `smith init` actually do? | Runs `uv init` then layers template add-ons on top — NOT a git clone of the template | -| Q11 | For `smith assimilate` — what does it touch? | `.opencode/` folder (skills, agents, prompts), `pyproject.toml` additions (merge/add missing, don't overwrite existing), CI files (`.github/workflows/`), folder structure (create `docs/`, `tests/` if missing), `AGENTS.md` | - -### Cross-cutting: Safety - -| ID | Question | Answer | -|----|----------|--------| -| Q12 | What happens if `smith init` is run in a directory that already exists? | Prompt per conflicting file: skip / overwrite / show diff | -| Q13 | What happens if `smith assimilate` is run twice on the same project? | Safe to run again — always prompts on conflicts; idempotent by design | -| Q14 | Is there a dry-run or preview mode? | Yes — show what would change before writing anything | - -### Cross-cutting: Configuration - -| ID | Question | Answer | -|----|----------|--------| -| Q15 | Does the user provide project metadata during creation? | Yes — interactive prompts during creation (name, author, GitHub username, etc.) that substitute placeholders in template files | - -### Out-of-scope - -| ID | Question | Answer | -|----|----------|--------| -| Q17 | Is smith responsible for running the project after setup? | No — smith only creates/upgrades; running is out of scope | -| Q18 | Should smith update an already-applied template to a newer version? | No — version-tracking template updates is out of scope | -| Q19 | Should smith manage multiple projects? | No — one project at a time; multi-project management is out of scope | - -Status: COMPLETE - ---- - -## 2026-04-20 — Session 2 -Status: IN-PROGRESS - -### Refinements and Baseline Approval - -| ID | Question | Answer | -|----|----------|--------| -| Q20 | What is the final command name for project creation? | `smith new` (changed from `smith init`) — command is `smith new [path]` | -| Q21 | How is the template distributed? | As a uv GitHub dependency pinned by commit `rev` in `pyproject.toml`; no runtime download — smith reads template files from the installed package | -| Q22 | Does `smith assimilate` accept a path argument? | Yes — `smith assimilate [path]`, defaults to cwd if no path given | -| Q23 | Has the stakeholder approved both features for baselining? | Yes — both `smith-new` and `smith-assimilate` are approved for baselining as of 2026-04-20 | - -Status: COMPLETE diff --git a/docs/features/backlog/.gitkeep b/docs/features/.gitkeep similarity index 100% rename from docs/features/backlog/.gitkeep rename to docs/features/.gitkeep diff --git a/docs/features/backlog/smith-assimilate.feature b/docs/features/backlog/smith-assimilate.feature deleted file mode 100644 index b7a9cd3..0000000 --- a/docs/features/backlog/smith-assimilate.feature +++ /dev/null @@ -1,117 +0,0 @@ -Feature: smith assimilate - - Applies the nullhack/python-project-template structure and tooling to an existing - Python project. The template is bundled as a uv GitHub dependency (pinned by commit - rev) — no runtime download occurs. Touches: `.opencode/` (skills, agents, prompts), - `pyproject.toml` (merges missing entries only, never overwrites existing), - `.github/workflows/` CI files, `docs/` and `tests/` folders (created if missing), and - `AGENTS.md`. Supports a `--dry-run` flag that shows all planned changes without writing - any files. Conflict resolution is per-file (skip / overwrite / diff). Safe to run - multiple times — always prompts on conflicts. - - Status: BASELINED (2026-04-20) - - Rules (Business): - - Applies template add-ons to an existing project without destroying existing content - - `pyproject.toml` entries are merged: missing entries are added, existing entries are never overwritten - - Folder structure additions (`docs/`, `tests/`) are created only if missing - - Conflict resolution is per-file: user chooses skip, overwrite, or view diff - - `--dry-run` flag shows all planned changes without writing any files - - Operation is idempotent: running `smith assimilate` again is safe and always prompts on conflicts - - Template source: nullhack/python-project-template installed as uv GitHub dependency (rev-pinned) - - Constraints: - - Entry point: `smith assimilate [path]` CLI command; defaults to cwd if no path given - - Must never delete or overwrite files without explicit user confirmation - - Template source: nullhack/python-project-template installed as uv GitHub dependency - - Rule: Template application - As a Python developer - I want to run `smith assimilate` on an existing project - So that I get the template tooling without recreating the project from scratch - - @id:a1b2c3d4 - Example: Template files are added to an existing project - Given an existing Python project at the target path that lacks `.opencode/` and `AGENTS.md` - When the developer runs `smith assimilate` and confirms all prompts - Then `.opencode/`, `AGENTS.md`, `.github/workflows/`, `docs/`, and `tests/` are present in the project - - @id:e5f6a7b8 - Example: Existing project files are not deleted - Given an existing Python project with files not part of the template - When the developer runs `smith assimilate` and confirms all prompts - Then all pre-existing project files remain present and unmodified - - Rule: Safe pyproject.toml merge - As a Python developer - I want missing pyproject.toml entries added without touching existing ones - So that my existing configuration is preserved - - @id:9c0d1e2f - Example: Missing pyproject.toml entries are added - Given an existing `pyproject.toml` that lacks template-required entries - When the developer runs `smith assimilate` - Then the missing entries are added to `pyproject.toml` - - @id:3f4a5b6c - Example: Existing pyproject.toml entries are never overwritten - Given an existing `pyproject.toml` with a `[project.name]` entry set to "my-existing-name" - When the developer runs `smith assimilate` - Then `[project.name]` remains "my-existing-name" after assimilation - - Rule: Dry-run preview - As a Python developer - I want to preview all planned changes before they are written - So that I can decide whether to proceed without risk of accidental overwrites - - @id:7d8e9f0a - Example: Dry-run shows planned changes without writing files - Given an existing project that would receive template add-ons - When the developer runs `smith assimilate --dry-run` - Then a list of all files that would be added or modified is displayed and no files are written - - @id:b1c2d3e4 - Example: Dry-run on an up-to-date project reports no changes - Given an existing project that already has all template add-ons applied - When the developer runs `smith assimilate --dry-run` - Then smith reports that no changes would be made - - Rule: Per-file conflict resolution - As a Python developer - I want to choose skip, overwrite, or diff for each conflicting file - So that I have full control over what gets changed in my existing project - - @id:f5a6b7c8 - Example: Conflicting file triggers a per-file prompt - Given a template file already exists in the target project with different content - When the developer runs `smith assimilate` - Then smith prompts the user for that file with options: skip, overwrite, diff - - @id:d9e0f1a2 - Example: Choosing skip leaves the existing file unchanged - Given a conflict prompt is shown for an existing file during assimilation - When the developer chooses "skip" - Then the existing file is left unchanged and smith continues to the next file - - @id:b3c4d5e6 - Example: Choosing overwrite replaces the existing file with the template version - Given a conflict prompt is shown for an existing file during assimilation - When the developer chooses "overwrite" - Then the existing file is replaced with the template version - - @id:f7a8b9c0 - Example: Choosing diff shows a unified diff before re-prompting - Given a conflict prompt is shown for an existing file during assimilation - When the developer chooses "diff" - Then a unified diff of the existing file vs the template version is displayed and the prompt is shown again - - Rule: Idempotent operation - As a Python developer - I want to run `smith assimilate` multiple times safely - So that re-running it never silently overwrites my work - - @id:d1e2f3a4 - Example: Re-running assimilate on an already-assimilated project prompts on conflicts - Given a project that has already had `smith assimilate` applied - When the developer runs `smith assimilate` again - Then smith prompts for any conflicting files and makes no changes without explicit confirmation diff --git a/docs/features/backlog/smith-commands.feature b/docs/features/backlog/smith-commands.feature new file mode 100644 index 0000000..e64f4fe --- /dev/null +++ b/docs/features/backlog/smith-commands.feature @@ -0,0 +1,297 @@ +Feature: smith-commands + + smith connects standardised agent configurations (AGENTS.md, .opencode/, + .templates/, .flowr/) to any project directory and disconnects cleanly when + done. Four commands — connect, disconnect, update, status — validate the + full connect/work/disconnect cycle end-to-end. + + Status: BASELINED (2026-05-01) + + Rules (Business): + - Connection state is inferred from the `# smith managed` section in .gitignore, not from a metadata file + - All agentic files are written atomically: either all are written or none are + - .templates/ and .flowr/ follow the same atomic rules as AGENTS.md and .opencode/ + - Existing files gitignored by `# smith managed` are auto-updated; files NOT in that section are skipped + - Disconnect removes only gitignored managed files; user-tracked files are preserved + - `# smith managed` section is kept on disconnect (guard for future usage) + - Connect on already-connected project auto-updates; update on not-connected project auto-connects + + Constraints: + - Safety: zero silent overwrites, ever (product_definition.md #1) + - Atomicity: no partial connections, ever (product_definition.md #2) + - Clean separation: zero orphaned files after disconnect (product_definition.md #3) + - Usability: smith connect must complete in under 1 minute (product_definition.md #4) + + ## Frozen Examples Rule + + After a feature is BASELINED, all `Example:` blocks are immutable. Changes require + `@deprecated` on the old Example (preserving the original @id) and a new Example + with a new @id. This prevents scope creep and maintains traceability. + + ## Pre-Mortem Findings + + | Rule | Failure Mode | Mitigation Example | + |------|-------------|-------------------| + | 1 — Connect | .gitignore does not exist → smith cannot add managed section | c928a845: creates .gitignore | + | 1 — Connect | .gitignore exists without smith section → append fails silently | 86c8e268: appends section to existing file | + | 2 — Skip | User adds .gitignore entries outside `# smith managed` for smith files → smith treats them as user-tracked | df0455a5: smith-managed file is auto-updated on reconnect | + | 3 — Disconnect | User manually deleted a managed file before disconnect | b755bfae: partial disconnect is idempotent | + | 4 — Update | Template source has changed since last connect (files added/removed) | 9a01f4e2: update reflects current template state | + + ## Questions + + | ID | Question | Status | Answer / Assumption | + |----|----------|--------|---------------------| + | Q1 | Should `smith status --json` include the template source URL in the output? | Assumed | Yes — status should report whatever connection metadata is available | + | Q2 | What happens if .gitignore is read-only? | Assumed | Exit 1 with IO error (standard filesystem error, not a smith-specific exit code) | + | Q3 | What happens if a managed file is a broken symlink? | Assumed | Treat as present (it exists on disk as a symlink); smith does not resolve symlinks | +| Q4 | What happens if the bundled template download fails on first connect? | Assumed | N/A — bundled source reads from local package data, no network required | + | Q5 | What happens if the bundled template download fails but cache exists? | Assumed | N/A — bundled source reads from local package data, no caching | + + ## Changes + + | Session | Q-IDs | Change | + |---------|-------|--------| + | 2026-05-01 SN | — | Created: initial BDD specification for smith-commands | +| 2026-05-01 SN | IN_20260501_agents-smith-dependency-resolution | Added: bundled template network failure and cache fallback examples (a1b2c3d4, e5f6g7h8); added Q4 and Q5 about network failure behavior | +| 2026-05-01 SN | IN_20260501_local-bundle-reversal | Deprecated: a1b2c3d4 and e5f6g7h8 (bundled source no longer uses network); Added: URL source download failure examples (a2b3c4d5, e4f5g6h7); Updated Q4/Q5 to reflect local bundle | + + Rule: Connect to a fresh project + As an engineer + I want to run smith connect in a fresh project directory + So that I can immediately start using standard AI agent workflows + + @id:c928a845 + Example: Connect with default template source + Given a project directory with no agentic files and no `# smith managed` section in .gitignore + When the engineer runs `smith connect` + Then all agentic files (AGENTS.md, .opencode/, .templates/, .flowr/) are written to the project directory + And a `# smith managed` section is added to .gitignore with entries for all agentic file patterns + + @id:86c8e268 + Example: Connect with a local path template source + Given a project directory with no agentic files + When the engineer runs `smith connect --from ./my-templates` + Then agentic files are written from the local path template source to the project directory + And a `# smith managed` section is added to .gitignore + + @id:577156bb + Example: Connect with a URL template source + Given a project directory with no agentic files + When the engineer runs `smith connect --from https://example.com/templates.tar.gz` + Then agentic files are downloaded from the URL and written to the project directory + And a `# smith managed` section is added to .gitignore + + @id:4fdd38a4 + Example: Connect with a remote URL template source + Given a project directory with no agentic files + When the engineer runs `smith connect --from https://example.com/templates/my-template.zip` + Then agentic files are downloaded from the remote URL and written to the project directory + And a `# smith managed` section is added to .gitignore + + @id:f79d40f4 + Example: Template source not found + Given a project directory with no agentic files + When the engineer runs `smith connect --from /nonexistent/path` + Then smith exits with code 1 + And an error message indicates the template source could not be found + + @id:a1b2c3d4 @deprecated(reason="bundled source no longer uses network; see a2b3c4d5 for URL failure") + Example: Bundled template source network failure + Given a project directory with no agentic files and no cached templates + When the engineer runs `smith connect` and the GitHub archive download fails + Then smith exits with code 1 + And an error message indicates the bundled template source could not be downloaded + + @id:e5f6g7h8 @deprecated(reason="bundled source no longer caches; see e4f5g6h7 for URL failure") + Example: Bundled template source uses cache when network unavailable + Given a project directory with no agentic files and cached templates from a previous connect + When the engineer runs `smith connect` and the GitHub archive download fails + Then smith uses the cached templates and connects successfully + And smith exits with code 0 + + @id:a2b3c4d5 Should + Example: URL template source download failure + Given a project directory with no agentic files + When the engineer runs `smith connect --from https://example.com/templates.tar.gz` and the download fails + Then smith exits with code 1 + And an error message indicates the URL template source could not be downloaded + + @id:e4f5g6h7 Should + Example: URL template source invalid archive + Given a project directory with no agentic files + When the engineer runs `smith connect --from https://example.com/templates.tar.gz` and the downloaded archive is invalid + Then smith exits with code 1 + And an error message indicates the archive could not be extracted + + @id:060390bf + Example: Connect creates .gitignore when it does not exist + Given a project directory with no agentic files and no .gitignore file + When the engineer runs `smith connect` + Then a new .gitignore file is created containing the `# smith managed` section with entries for all agentic file patterns + + @id:e8245392 + Example: Connect appends section to existing .gitignore + Given a project directory with no agentic files and an existing .gitignore without a `# smith managed` section + When the engineer runs `smith connect` + Then the `# smith managed` section is appended to the existing .gitignore + And existing .gitignore content is preserved + + @id:fc22c286 + Example: Pair-atomic write rollback on failure + Given a project directory with no agentic files + When smith fails to write .opencode/ after writing AGENTS.md + Then AGENTS.md is removed (rolled back) + And no agentic files remain in the project directory + + Rule: Auto-update on connected projects, skip user-tracked files on fresh projects + As an engineer + I want smith to auto-update managed files when the project is already connected + And to skip user-tracked files when connecting to a fresh project + So that my existing work is never silently overwritten + + @id:df0455a5 Must + Example: Existing smith-managed file is auto-updated on reconnect + Given a project directory where .opencode/ exists and is listed in the `# smith managed` section of .gitignore + When the engineer runs `smith connect` + Then .opencode/ is updated with the template version (auto-update) + And all other agentic files are written + And smith exits with code 0 + + @id:21c05bbb Must + Example: Existing user-tracked file is skipped + Given a project directory where AGENTS.md exists but is NOT in the `# smith managed` section of .gitignore (the user tracks it manually) + When the engineer runs `smith connect` + Then AGENTS.md is not overwritten + And the remaining agentic files (.opencode/, .templates/, .flowr/) are written + And a `# smith managed` section is added to .gitignore + + @id:2a5f83d0 Must + Example: Overwrite flag replaces all managed files + Given a project directory where .opencode/ exists and is listed in the `# smith managed` section of .gitignore + When the engineer runs `smith connect --overwrite` + Then .opencode/ is replaced with the template version + And all agentic files are written + And files not in the `# smith managed` section are not touched + + @id:3e206149 Must + Example: Connect on already-connected project auto-updates + Given a project directory with all agentic files present and a `# smith managed` section in .gitignore + When the engineer runs `smith connect` + Then smith behaves as `smith update` — all managed agentic files are overwritten with the template versions + And smith exits with code 0 + + @id:7d22e1d6 Should + Example: Overwrite with user-tracked files preserved + Given a project directory where AGENTS.md is NOT in `# smith managed` (user-tracked) and .opencode/ IS in `# smith managed` + When the engineer runs `smith connect --overwrite` + Then .opencode/ is replaced with the template version + And AGENTS.md is not touched (it is not in the smith-managed section) + + Rule: Disconnect from a project + As an engineer + I want to run smith disconnect so that all smith-managed files are removed from my project + So that I can cleanly separate smith from my project without leaving orphaned files + + @id:cd5ba959 Must + Example: Disconnect a fully connected project + Given a project directory with all agentic files present and a `# smith managed` section in .gitignore + When the engineer runs `smith disconnect` + Then all agentic files that are gitignored by `# smith managed` are removed from the project directory + And the `# smith managed` section is preserved in .gitignore + And files not gitignored by `# smith managed` are not removed + + @id:9411ceb4 Must + Example: Disconnect a not-connected project is a no-op + Given a project directory with no agentic files and no `# smith managed` section in .gitignore + When the engineer runs `smith disconnect` + Then smith exits with code 0 + And no files are modified + + @id:b755bfae Should + Example: Disconnect a partially connected project removes present gitignored files + Given a project directory where .opencode/ exists and is gitignored by `# smith managed` but .flowr/ is missing + When the engineer runs `smith disconnect` + Then .opencode/ is removed + And no error is raised for the missing .flowr/ + And the `# smith managed` section is preserved in .gitignore + + @id:8f2a9018 Must + Example: User-tracked agentic file is preserved on disconnect + Given a project directory where AGENTS.md is NOT gitignored by `# smith managed` (user tracks it) but .opencode/ IS gitignored by `# smith managed` + When the engineer runs `smith disconnect` + Then .opencode/ is removed + And AGENTS.md is not removed (it is not in the smith-managed section) + And the `# smith managed` section is preserved in .gitignore + + Rule: Update agentic files + As an engineer + I want to run smith update so that my connected project gets the latest template files + So that I can stay current with template changes without reconnecting + + @id:e4d06612 Must + Example: Update a connected project + Given a project directory with all agentic files present and a `# smith managed` section in .gitignore + When the engineer runs `smith update` + Then all agentic files that are in the `# smith managed` section are overwritten with the latest template versions + And files not managed by smith are not touched + And smith exits with code 0 + + @id:d348166e Should + Example: Update with a new template source + Given a project directory with all agentic files present and a `# smith managed` section in .gitignore + When the engineer runs `smith update --from ./new-templates` + Then all managed agentic files are overwritten with files from the new template source + And smith exits with code 0 + + @id:9a01f4e2 Must + Example: Update on a not-connected project auto-connects + Given a project directory with no agentic files and no `# smith managed` section in .gitignore + When the engineer runs `smith update` + Then smith behaves as `smith connect` — all agentic files are written and a `# smith managed` section is added to .gitignore + And smith exits with code 0 + + @id:7af2f4d1 Must + Example: Update source not found + Given a connected project directory + When the engineer runs `smith update --from /nonexistent/path` + Then smith exits with code 1 + And an error message indicates the template source could not be found + + Rule: Check connection status + As an engineer + I want to run smith status so that I know whether my project is connected and which agentic files are present + So that I can take appropriate action + + @id:447e3cbf Must + Example: Connected project status + Given a project directory with all agentic files present and a `# smith managed` section in .gitignore + When the engineer runs `smith status` + Then smith reports "Connected" with a list of present agentic files + + @id:3f364b1d Must + Example: Partially connected project status + Given a project directory where .opencode/ and AGENTS.md exist but .templates/ and .flowr/ are missing + When the engineer runs `smith status` + Then smith reports "Partial" with a list of present and missing agentic files + And suggests `smith connect --overwrite` or `smith disconnect` + + @id:76e27d0a Must + Example: Disconnected project status + Given a project directory with no agentic files but a `# smith managed` section in .gitignore + When the engineer runs `smith status` + Then smith reports "Disconnected" + And suggests `smith connect` to reconnect + + @id:94ebcd86 Must + Example: Not connected project status + Given a project directory with no agentic files and no `# smith managed` section in .gitignore + When the engineer runs `smith status` + Then smith reports "Not connected" + And suggests `smith connect` to get started + + @id:10843402 Should + Example: Status with JSON output + Given a connected project directory + When the engineer runs `smith status --json` + Then smith outputs machine-readable JSON with connection status, present files list, and template source \ No newline at end of file diff --git a/docs/features/backlog/smith-new.feature b/docs/features/backlog/smith-new.feature deleted file mode 100644 index 863ee98..0000000 --- a/docs/features/backlog/smith-new.feature +++ /dev/null @@ -1,110 +0,0 @@ -Feature: smith new - - Creates a new Python project by running `uv init ` and then layering - nullhack/python-project-template add-ons on top. The template is bundled as a uv - GitHub dependency (pinned by commit rev) — no runtime download occurs. The user is - prompted interactively for project metadata (name, author, GitHub username, email, - description), which are substituted into template placeholders. The resulting project - is immediately runnable. If the target directory already exists, conflicts are resolved - per-file via prompt (skip / overwrite / diff). - - Status: BASELINED (2026-04-20) - - Rules (Business): - - Project is created using `uv init` as the foundation, not by cloning the template repo - - Template add-ons are read from the installed nullhack/python-project-template package (uv GitHub dep, rev-pinned) - - User is prompted interactively for: project name, author, GitHub username, email, description - - Metadata placeholders in template files are substituted with user-provided values - - If the target directory already exists, conflicts are resolved per-file: skip / overwrite / diff - - The resulting project must be immediately runnable after creation - - Constraints: - - Entry point: `smith new [path]` CLI command - - Requires `uv` to be available on the system PATH - - Template source: nullhack/python-project-template installed as uv GitHub dependency - - Rule: Project scaffolding - As a Python developer - I want to run `smith new ` to create a new project - So that I get a production-ready project structure without manual setup - - @id:c1a2b3d4 - Example: New project directory is created with uv init structure - Given no directory named "myproject" exists at the target path - When the developer runs `smith new myproject` - Then a directory "myproject" is created containing a uv-initialized project structure - - @id:e5f6a7b8 - Example: Template add-ons are present in the new project - Given no directory named "myproject" exists at the target path - When the developer runs `smith new myproject` - Then the new project contains `.opencode/`, `AGENTS.md`, `.github/workflows/`, `docs/`, and `tests/` - - @id:9c0d1e2f - Example: Missing uv on PATH produces a clear error - Given `uv` is not available on the system PATH - When the developer runs `smith new myproject` - Then smith exits with a non-zero code and an error message indicating uv is required - - Rule: Metadata substitution - As a Python developer - I want to provide my project metadata interactively - So that template placeholders are replaced with my actual project details - - @id:3f4a5b6c - Example: User is prompted for all required metadata fields - Given the developer runs `smith new myproject` - When smith reaches the metadata collection step - Then smith prompts for: project name, author name, GitHub username, email, and description - - @id:7d8e9f0a - Example: Placeholders in template files are replaced with provided metadata - Given the developer provides name "myproject", author "Alice", GitHub username "alice", email "alice@example.com", description "My project" - When smith applies the template add-ons - Then all placeholder tokens in template files are replaced with the corresponding provided values - - @id:b1c2d3e4 - Example: Empty required metadata field is rejected - Given the developer leaves a required metadata field blank - When smith processes the metadata input - Then smith re-prompts for the blank field with a message indicating it is required - - Rule: Conflict resolution on existing directory - As a Python developer - I want to be prompted per-file when the target directory already exists - So that I can choose to skip, overwrite, or diff each conflicting file without losing existing work - - @id:f5a6b7c8 - Example: Existing directory triggers per-file conflict prompt - Given a directory "myproject" already exists at the target path with existing files - When the developer runs `smith new myproject` - Then smith prompts the user for each conflicting file with options: skip, overwrite, diff - - @id:d9e0f1a2 - Example: Choosing skip leaves the existing file unchanged - Given a conflict prompt is shown for an existing file - When the developer chooses "skip" - Then the existing file is left unchanged and smith continues to the next file - - @id:b3c4d5e6 - Example: Choosing overwrite replaces the existing file with the template version - Given a conflict prompt is shown for an existing file - When the developer chooses "overwrite" - Then the existing file is replaced with the template version - - @id:f7a8b9c0 - Example: Choosing diff shows a unified diff before re-prompting - Given a conflict prompt is shown for an existing file - When the developer chooses "diff" - Then a unified diff of the existing file vs the template version is displayed and the prompt is shown again - - Rule: Runnable result - As a Python developer - I want the created project to be immediately runnable - So that I can start working without additional setup steps - - @id:d1e2f3a4 - Example: New project passes its own test suite immediately after creation - Given the developer runs `smith new myproject` and provides all metadata - When the developer runs `uv run task test-fast` inside the new project directory - Then all tests pass with no configuration required diff --git a/docs/features/completed/display-version.feature b/docs/features/completed/display-version.feature deleted file mode 100644 index 0dfc3dd..0000000 --- a/docs/features/completed/display-version.feature +++ /dev/null @@ -1,60 +0,0 @@ -Feature: Display version - - Reads the application version from pyproject.toml at runtime and logs it at INFO - level. Log output is controlled by a verbosity parameter; the version is visible - at DEBUG and INFO but suppressed at WARNING and above. An invalid verbosity value - raises a descriptive error. - - Status: COMPLETED - - Rules (Business): - - Version is read from pyproject.toml at runtime using tomllib - - Log verbosity is controlled by a ValidVerbosity parameter passed to main() - - Valid verbosity levels are: DEBUG, INFO, WARNING, ERROR, CRITICAL - - An invalid verbosity value raises a ValueError with the invalid value and valid options - - The version string is logged at INFO level; visible at DEBUG and INFO, not at WARNING+ - - Constraints: - - No hardcoded __version__ constant — pyproject.toml is the single source of truth - - Entry point: app/__main__.py (main(verbosity) function) - - Version logic: app/version.py (version() function) - - Rule: Version retrieval - As a software-engineer - I want to retrieve the application version programmatically - So that I can display or log it at runtime - - @id:3f2a1b4c - Example: Version string is read from pyproject.toml - Given pyproject.toml exists with a version field - When version() is called - Then the returned string matches the version in pyproject.toml - - @id:7a8b9c0d - Example: Version call emits an INFO log message - Given pyproject.toml exists with a version field - When version() is called - Then an INFO log message in the format "Version: " is emitted - - Rule: Verbosity control - As a software-engineer - I want to control log verbosity via a parameter - So that I can tune output for different environments - - @id:a1b2c3d4 - Example: Version appears in logs at DEBUG and INFO verbosity - Given a verbosity level of DEBUG or INFO is passed to main() - When main() is called - Then the version string appears in the log output - - @id:b2c3d4e5 - Example: Version is absent from logs at WARNING and above - Given a verbosity level of WARNING, ERROR, or CRITICAL is passed to main() - When main() is called - Then the version string does not appear in the log output - - @id:e5f6a7b8 - Example: Invalid verbosity raises a descriptive error - Given an invalid verbosity string is passed to main() - When main() is called - Then a ValueError is raised with the invalid value and valid options listed diff --git a/docs/index.html b/docs/index.html index 741957e..6b426cf 100644 --- a/docs/index.html +++ b/docs/index.html @@ -3,66 +3,486 @@ - Project Documentation + agents-smith — Documentation -

Documentation

-

Generated project documentation

- -
Built with pdoc · pytest-cov · pytest-html
+ + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+

AGENTS·SMITH

+

AI-assisted software delivery system with flow-based agent orchestration.

+
+
+ +
+ + +

Generated Documentation

+ + + + + +

Features

+ +
+
+ + + +
+
+ +
+
+

No feature currently in progress.

+
+
+

Backlog is empty — ready for the next feature.

+
+
+ + + +

Architecture Decisions

+ +
    +
  • + 2026-04-22 + parser-library + Accepted +
  • +
  • + 2026-04-22 + source + Accepted +
  • +
+ + + +

Research Library

+ +
+ +
Scientific Research — AI Agent Design
+
Foundations for the agent architecture, file structure, and context management decisions in this template.
+
+ +
Scientific Research — Architecture
+
Foundations for the architectural decisions and patterns used in this template.
+
+ +
Scientific Research — Cognitive Science
+
Mechanisms from cognitive and social psychology that justify workflow design decisions in this template.
+
+ +
Scientific Research — Documentation
+
Foundations for living documentation, docs-as-code, information architecture, and post-mortem practices used in this template.
+
+ +
Scientific Research — Domain Modeling
+
Foundations for bounded context identification, ubiquitous language, and feature decomposition used in this template.
+
+ +
Scientific Research — OOP Design
+
Foundations for object-oriented design principles used in this template.
+
+ +
Scientific Research — Refactoring (Empirical)
+
Empirical studies on code smells, refactoring prioritization, and OOP complexity used in this template.
+
+ +
Scientific Research — Requirements Elicitation
+
Foundations for the PO interview structure, Gherkin criteria, and feature discovery in this template.
+
+ +
Scientific Research — Software Economics
+
Foundations for the shift-left, early defect detection, and workflow ordering decisions in this template.
+
+ +
Scientific Research — Testing
+
Foundations for test design, TDD, BDD, and property-based testing used in this template.
+
+ +
Version Control & Branching Strategies
+
**Source**: Chacon, S., & Straub, B. (2014). *Pro Git* (2nd ed.). Apress. Free online: https://git-scm.com/book
+
+
+ + +
+ + + + + + + diff --git a/docs/interview-notes/IN_20260422_scope-discovery.md b/docs/interview-notes/IN_20260422_scope-discovery.md new file mode 100644 index 0000000..a64bd9f --- /dev/null +++ b/docs/interview-notes/IN_20260422_scope-discovery.md @@ -0,0 +1,65 @@ +# IN_20260422_scope-discovery — Initial product scope discovery + +> **Status:** COMPLETE +> **Interviewer:** PO +> **Participant(s):** Stakeholder +> **Session type:** Initial discovery + +--- + +## General + +| ID | Question | Answer | +|----|----------|--------| +| Q1 | Who are the users? | Python engineers starting a new project who want rigorous tooling without the setup cost. | +| Q2 | What does the product do at a high level? | Provides a fully configured Python project skeleton: CI, quality tooling, test infrastructure, and an AI-assisted five-step delivery workflow. | +| Q3 | Why does it exist — what problem does it solve? | Setting up a production-grade Python environment from scratch is expensive and often skipped; engineers then accrue quality debt from day one. | +| Q4 | When and where is it used? | At project inception — cloned once, then evolved as features are added via the built-in workflow. | +| Q5 | Success — what does "done" look like? | An engineer clones the template and ships a meaningful first feature within a single session, with all quality gates passing. | +| Q6 | Failure — what must never happen? | The template introduces more friction than it removes, or locks engineers into choices they cannot override. | +| Q7 | Out-of-scope — what are we explicitly not building? | Runtime infrastructure (databases, queues, cloud deployment), UI frameworks, domain-specific business logic. | + +## Runtime Behaviour + +| ID | Question | Answer | +|----|----------|--------| +| Q8 | Should the template ship with any working feature, or be purely empty? | It should ship with exactly one working demonstration feature so engineers see the full workflow end-to-end. | + +## Feature: cli-entrypoint + +| ID | Question | Answer | +|----|----------|--------| +| Q9 | Which behavioural areas are in scope for the template's own feature backlog? | Just one simple command in the base package — useful for any starting project, simple enough not to bloat the app, and showcasing the template's capabilities end-to-end. | +| Q10 | What kind of command would be "useful for any starting project"? Candidate options presented: version, hello/greet, info/about, config show, health. | Stakeholder asked: "if I choose version, what will it add to my app/ folder?" — confirmed interest in version-style command after seeing the footprint (one file, ~10 lines, zero new dependencies). | +| Q11 | Three options presented: (A) `--help` only, (B) `--version` only, (C) `--help` + `--version` combined. Stakeholder also asked how a help/usage command would look in code and terminal. Full code sketches and tradeoff table provided. Which option for the demonstration feature? | Option C — `--help` + `--version` combined. `python -m agents_smith --help` shows app name, tagline, and available options. `python -m agents_smith --version` shows `agents-smith ` read from package metadata. Zero new dependencies, all code in `agents_smith/__main__.py`. | + +--- + +## Quality Attributes + +| ID | Attribute | Scenario | Target | Priority | +|----|-----------|----------|--------|----------| +| QA1 | Usability | When an engineer clones the template, they can ship a first feature within one session | < 60 minutes to first passing CI | Must | +| QA2 | Extensibility | When the engineer wants to override a default choice, they can do so without forking | Zero fork-required overrides | Must | + +--- + +## Pain Points Identified + +- Setting up production-grade Python from scratch is expensive and often skipped +- Quality debt accrues from day one when tooling is deferred + +## Business Goals Identified + +- Engineers clone and ship within a single session with all gates passing +- Template removes friction rather than adding it + +## Terms to Define (for glossary) + +- quality debt +- five-step delivery workflow + +## Action Items + +- [ ] Define detailed behavioural specification for `cli-entrypoint` feature +- [ ] Update glossary with new terms \ No newline at end of file diff --git a/docs/interview-notes/IN_20260501_local-bundle-reversal.md b/docs/interview-notes/IN_20260501_local-bundle-reversal.md new file mode 100644 index 0000000..a31a54d --- /dev/null +++ b/docs/interview-notes/IN_20260501_local-bundle-reversal.md @@ -0,0 +1,67 @@ +# IN_20260501_local-bundle-reversal — Revert GitHub-based bundled resolution to local bundle + +> **Status:** COMPLETE +> **Interviewer:** PO +> **Participant(s):** Stakeholder +> **Session type:** Scope refinement + +--- + +## General + +| ID | Question | Answer | +|----|----------|--------| +| Q1 | Why revert from GitHub-based download to local bundle? | GitHub-based resolution adds runtime network dependency, cache staleness risk, and implementation complexity that outweighs the freshness benefit for the default template source | +| Q2 | What should BundledTemplateSource do instead? | Read agentic files from `smith/data/` package directory via `importlib.resources` — no network calls, no caching, no external dependency for the default source | +| Q3 | What files go in `smith/data/`? | Agentic files only: AGENTS.md, .opencode/, .templates/, .flowr/ — derived from the agents-smith v8_release branch | +| Q4 | How is `smith/data/` kept in sync with agents-smith? | Manual script (`scripts/update-bundle.sh`) that downloads the agents-smith v8_release archive and copies agentic files to `smith/data/` | +| Q5 | Should `requests` still be a dependency? | Yes — UrlTemplateSource needs it for tar.gz/zip downloads. But bundled source has no runtime network dependency | +| Q6 | Should URL sources use caching? | No — URL sources re-download every time. No persistent cache for any source type | +| Q7 | What about BDD examples a1b2c3d4 (network failure) and e5f6g7h8 (cache fallback)? | Deprecate both — bundled source no longer needs network access. Add new Should examples for URL source download failure | +| Q8 | Should `TemplateSource.kind` still include "bundled"? | Yes — `kind="bundled"` stays. `smith connect` without `--from` defaults to `bundled:agents-smith` | +| Q9 | Should the `TemplateSourceAdapter` fallback be removed? | Yes — the adapter should just dispatch on `source.kind` with no fallback. Use cases pass the source directly | + +## Feature: smith-commands + +| ID | Question | Answer | +|----|----------|--------| +| Q10 | What happens on URL source download failure? | `smith connect --from ` exits with code 1 and an error message. No fallback to bundled source — the user explicitly chose a URL source | +| Q11 | What archive formats should UrlTemplateSource support? | `.tar.gz` and `.zip` — the two formats GitHub provides for branch/tag archives | +| Q12 | Should the agentic file filter apply to URL sources? | Yes — `_is_agentic_path()` filter applies to all URL sources. Only AGENTS.md, .opencode/, .templates/, .flowr/ are written | + +--- + +## Quality Attributes + +| ID | Attribute | Scenario | Target | Priority | +|----|-----------|----------|--------|----------| +| QA1 | Simplicity | When `smith connect` runs without `--from`, no network call is made | 0 network calls for bundled source | Must | +| QA2 | Reliability | When `smith connect --from ` fails to download, exit code 1 with clear error | < 1 second to report failure | Must | +| QA3 | Maintainability | When agents-smith v8_release updates, a single script updates `smith/data/` | 1 command to update bundled files | Must | + +--- + +## Pain Points Identified + +- GitHub-based resolution introduced runtime network dependency for the default use case +- Cache staleness was discovered during end-to-end testing (stale cache had only 2 files) +- GitHub download + cache logic was more complex than local bundle + +## Business Goals Identified + +- `smith connect` with no arguments should "just work" — no network required +- Template freshness is maintained by the update script, not runtime downloads + +## Terms to Define (for glossary) + +- Local Bundle (update Bundled Template Resolution entry) + +## Action Items + +- [x] Revert BundledTemplateSource to `importlib.resources`-based local bundle +- [x] Implement UrlTemplateSource (tar.gz/zip via requests, agentic filter, no cache) +- [x] Remove TemplateSourceAdapter fallback parameter +- [x] Deprecate BDD examples a1b2c3d4 and e5f6g7h8 +- [x] Add new URL source failure examples +- [x] Create ADR-007 superseding ADR-006 +- [x] Update spec documents \ No newline at end of file diff --git a/docs/interview-notes/IN_20260501_smith-commands-specification.md b/docs/interview-notes/IN_20260501_smith-commands-specification.md new file mode 100644 index 0000000..b5ec965 --- /dev/null +++ b/docs/interview-notes/IN_20260501_smith-commands-specification.md @@ -0,0 +1,158 @@ +# Interview Notes: smith-commands Feature Specification + +> **Date:** 2026-05-01 +> **Feature:** smith-commands (connect, disconnect, update, status) +> **Interviewer:** PO +> **Stakeholder:** nullhack +> **Session type:** Feature specification (behavioral rules and edge cases) + +--- + +## General Behavioral Rules + +### Stateless Operation + +- **smith is stateless.** There is no `.smith.yaml` metadata file. No connection state is stored. +- Connection state is inferred from the presence of the `# smith managed` section in `.gitignore` and which agentic files exist on disk. +- `smith connect`, `smith update`, and `smith disconnect` are stateless operations — they write or remove files based on what's currently on disk. + +### Agentic Files + +- The agentic file set is: **AGENTS.md**, **.opencode/**, **.templates/**, **.flowr/** +- **Pair-atomic rule:** AGENTS.md and .opencode/ are a pair — either both are written or neither. This is the core atomicity invariant. +- **.templates/ and .flowr/ are independent:** if they don't exist, they are written; if they already exist (and are gitignored by the `# smith managed` section), they are refused unless `--overwrite` is used. + +### .gitignore Section + +- On connect, smith adds a `# smith managed` section to `.gitignore` with entries for each agentic file pattern. +- On disconnect, smith **keeps** the `# smith managed` section in `.gitignore`. It serves as a guard for future smith usage — it records which files are agentic and should be treated specially. +- Disconnect removes the agentic files that are gitignored (i.e., listed in the `# smith managed` section). If a file/folder is in the agentic set but the `.gitignore` section does NOT ignore it, that means the user explicitly wants to track it — so smith does NOT remove it. +- If a `.gitignore` entry for an agentic file/folder is NOT preceded by `# smith managed` (i.e., the user added it manually outside the section), smith does not modify that entry. + +### Template Source + +- Default template source: **agents-smith** (bundled with the agents-smith package). +- Override with `--from `: + - Local path: `--from ./my-templates` + - URL: `--from https://example.com/templates.tar.gz` + - Git repo: `--from git+https://github.com/user/repo.git#branch` (standard URL format with ref parameter) +- If `--from` points to a non-existent path or unreachable URL: **error, exit 1**. + +--- + +## Command Behavioral Rules + +### `smith connect [--from ] [--overwrite]` + +**Default behavior (fresh project, no agentic files):** +1. Resolve template source (default: agents-smith, or `--from`). +2. Stage all agentic files in a temp directory. +3. Validate: check for conflicts (existing agentic files). +4. Write AGENTS.md + .opencode/ atomically (pair-atomic: both or neither). +5. Write .templates/ independently (if absent, write it; if present and not gitignored by smith, refuse). +6. Write .flowr/ independently (same rule as .templates/). +7. Add `# smith managed` section to `.gitignore` with entries for all agentic files. +8. Report success: list files written. + +**When agentic files already exist:** +- If any agentic file/folder exists and IS gitignored by the `# smith managed` section → **conflict, exit 2**, list conflicting files, suggest `--overwrite`. +- If an agentic file/folder exists but is NOT in the `# smith managed` section (user tracks it manually) → do not overwrite it, skip it, write the rest. The user explicitly chose to track this file. +- `--overwrite`: replace ALL agentic files that are in the `# smith managed` section, regardless of conflicts. Does NOT touch files not in the smith-managed section. + +**When already connected (`.gitignore` has `# smith managed` section):** +- `smith connect` on an already-connected project = **auto-update** (same behavior as `smith update`). +- `smith connect --from ` on an already-connected project = update from the new source. + +**Exit codes:** 0 = success, 1 = error (invalid args, source not found, IO failure), 2 = conflict (files exist without `--overwrite`). + +### `smith disconnect` + +**Default behavior (connected project):** +1. Identify agentic files listed in the `# smith managed` section of `.gitignore`. +2. Remove only the agentic files that ARE gitignored by the `# smith managed` section. +3. If an agentic file/folder is NOT gitignored by `# smith managed` (user chose to track it), do NOT remove it. +4. Keep the `# smith managed` section in `.gitignore` (it serves as a guard for future usage). +5. Report success: list files removed. + +**When not connected (no `# smith managed` section):** +- **No-op, exit 0.** No error, no message needed. + +**When partially connected (some agentic files missing):** +- Remove whatever agentic files ARE present and gitignored by `# smith managed`. No error for missing files. + +**Exit codes:** 0 = success (including no-op), 1 = error (IO failure). + +### `smith update [--from ]` + +**Default behavior (connected project):** +1. Resolve template source (default: agents-smith, or `--from`). +2. Re-download all agentic files from the template source. +3. Overwrite ALL agentic files that are in the `# smith managed` section (this is an intentional overwrite — update is the "refresh" operation). +4. Do NOT touch files not managed by smith. +5. Maintain the pair-atomic rule for AGENTS.md + .opencode/ (both or neither). +6. Report success: list files updated. + +**When not connected (no `# smith managed` section in `.gitignore`):** +- **Auto-connect:** same behavior as `smith connect` with the same `--from` flag. + +**When `--from` source is not found:** +- **Error, exit 1.** + +**Exit codes:** 0 = success, 1 = error (source not found, IO failure). + +### `smith status [--json]` + +**Default behavior (human-readable):** +- Check which agentic files exist on disk. +- If all agentic files present → report "Connected" with file list and template source (if determinable). +- If some agentic files present → report "Partial" with which files are present/missing, suggest `smith connect --overwrite` or `smith disconnect`. +- If no agentic files present but `# smith managed` section exists in `.gitignore` → report "Disconnected" with suggestion to `smith connect` to reconnect. +- If no agentic files and no `# smith managed` section → report "Not connected" with "Run smith connect to get started." + +**With `--json` flag:** +- Machine-readable JSON output with same information, suitable for scripting. + +**Exit codes:** 0 = success, 1 = error. + +--- + +## Edge Cases and Failure Modes + +### Partial connection (some files written, then failure) + +- The pair-atomic rule for AGENTS.md + .opencode/ means: if writing .opencode/ fails, AGENTS.md must be rolled back too. Both succeed or neither. +- .templates/ and .flowr/ are independent — a failure writing .flowr/ does not roll back .templates/. +- Temp-directory staging is used for AGENTS.md + .opencode/ to ensure atomicity. + +### .gitignore section manipulation + +- If `.gitignore` doesn't exist, create it with the `# smith managed` section. +- If `.gitignore` exists but doesn't have a `# smith managed` section, append the section at the end. +- If `.gitignore` has a `# smith managed` section already, update entries within it (add missing entries, do not remove existing entries unless they are for smith-managed files being disconnected). + +### User-modified agentic files + +- `smith disconnect` removes agentic files that are gitignored by `# smith managed`, regardless of whether the user modified them. This is clean separation. +- If the user wants to keep their changes, they should NOT gitignore the file (remove it from the `# smith managed` section), and smith will not remove it on disconnect. + +### Template source failure mid-write + +- If the template source fails during download/extraction, exit 1 with an error message. No partial writes for the atomic pair (AGENTS.md + .opencode/). + +--- + +## Decisions Summary + +| Decision | Choice | Rationale | +|----------|--------|-----------| +| State management | Stateless — no .smith.yaml | Simpler model; .gitignore section is sufficient to track managed files | +| Atomicity scope | AGENTS.md + .opencode/ are pair-atomic; .templates/ and .flowr/ are independent | Core agent config must be consistent; template/flow dirs are independent concerns | +| .gitignore on disconnect | Keep `# smith managed` section | Serves as guard for future smith usage | +| File removal on disconnect | Remove only gitignored agentic files | Files the user explicitly tracks are preserved | +| Connect on already-connected | Auto-update (same as update) | No need to force disconnect first | +| Update on not-connected | Auto-connect | Convenient; same as connect | +| Disconnect on not-connected | No-op, exit 0 | Idempotent; no error for clean state | +| Template source types | Bundled (agents-smith), local path, URL, git repo | Full flexibility from the start | +| Git source format | `git+https://...#branch` | Standard URL format with ref parameter | +| Status output | Human-readable by default, `--json` for scripting | Dual audience | +| Exit codes | 0/1/2 — success/error/conflict | Simple, covers the main cases; conflict maps to safety invariant | \ No newline at end of file diff --git a/docs/interview-notes/IN_20260501_stakeholder-reinterview.md b/docs/interview-notes/IN_20260501_stakeholder-reinterview.md new file mode 100644 index 0000000..7a1bdff --- /dev/null +++ b/docs/interview-notes/IN_20260501_stakeholder-reinterview.md @@ -0,0 +1,98 @@ +# IN_20260501_stakeholder-reinterview — Corrected product scope discovery + +> **Status:** COMPLETE +> **Interviewer:** PO +> **Participant(s):** Stakeholder +> **Session type:** Scope refinement (replaces IN_20260422 which captured the wrong product) + +--- + +## General + +| ID | Question | Answer | +|----|----------|--------| +| Q1 | Who are the users? | Software engineers/developers who work on multiple projects and want consistent AI-assisted workflows across all of them. | +| Q2 | What does the product do at a high level? | smith is an AI pair programming platform that assimilates ordinary projects into high-performing, AI-augmented systems. It connects standardised agent configurations (AGENTS.md, .opencode/, .templates/, .flowr/) to any project, enabling consistent AI-assisted workflows. Like Agent Smith in the Matrix, smith takes control of a project — then detaches when done. | +| Q3 | Why does it exist — what problem does it solve? | AI agents need structure. Without consistent agent configurations, each project has different .opencode agents, different workflows, and different templates. Engineers waste time maintaining these across projects. smith standardises the AI agent experience — connect, work, detach — the same agents, the same flows, every project. | +| Q4 | When and where is it used? | Anytime — plug in or out as needed. Works on any project directory, even legacy ones. Not limited to project inception. | +| Q5 | Success — what does "done" look like? | A uniform experience across projects: `smith connect` in any directory and you're immediately working with standard flows and agents. `smith disconnect` and the project is clean (no agentic files left, only .gitignore entries). Customisation is per-template: use `--from ` for a different agent template. | +| Q6 | Failure — what must never happen? | Destructive overwrites without explicit `--overwrite` flag. Rigid workflows that don't adapt to different projects. Complex connect/disconnect workflows. Partial connections — smith must either connect fully or write no files at all (atomic). Never silently overwrite customizations. | +| Q7 | Out-of-scope — what are we explicitly not building? | AI execution engine (smith configures agents, doesn't run them). CI/CD infrastructure. Package management. Language/framework enforcement. | + +## Connect/Disconnect + +| ID | Question | Answer | +|----|----------|--------| +| Q8 | When smith connects, what happens? | `smith connect` copies the default template's agentic files (AGENTS.md, .opencode/, .templates/, .flowr/) into the project directory. `smith connect --from ` copies from a specified template source instead of the default (agents-smith). | +| Q9 | When smith disconnects, what happens? | Removes agentic files from the project. Keeps .gitignore entries (managed section). If the user wants to push agentic files, they can remove entries from .gitignore. Disconnect means "I don't want the files here anymore" — if they want to continue with the files, they wouldn't call disconnect. | +| Q10 | How does --overwrite work? | smith refuses to connect if agentic files already exist (must disconnect first), unless `--overwrite` is explicitly passed. Destructive overwrites are only possible when the stakeholder forces it. | +| Q11 | How does smith handle .gitignore? | smith manages its own section in .gitignore, marked with a comment like `# smith managed`. On connect, it adds entries for the agentic files. On disconnect, it removes those entries (unless the user has removed them manually to push the files). | +| Q12 | Which agentic files get connected? | AGENTS.md, .opencode/, .templates/, .flowr/ — these four items are the standard set that smith connects to a project. | + +## Conflict Handling + +| ID | Question | Answer | +|----|----------|--------| +| Q13 | What if AGENTS.md or .opencode/ already exist in the project? | Warn and refuse to overwrite unless `--overwrite` is passed. These are core agent configs and should not be silently replaced. | +| Q14 | What if .flowr/ or .templates/ already exist? | Needs architect decision. Projects may have their own .flowr and .templates specific to that project. We should not overwrite those if already existing, but the user may want to override. The exact merge/replacement strategy needs architectural input. | + +## Naming and Branding + +| ID | Question | Answer | +|----|----------|--------| +| Q15 | What are the CLI and package names? | CLI command: `smith`. PyPI package: `agents-smith`. The branding uses the Matrix/Agent Smith theme from the official remote repo (https://github.com/nullhack/agents-smith). The local branding file is stale and wrong — it still has the old agents-smith/Greek theme. Must be replaced with the remote version. | + +## Template Source + +| ID | Question | Answer | +|----|----------|--------| +| Q16 | What is the relationship between smith and agents-smith? | Agents-Smith is the default template. `smith connect` uses agents-smith's agentic files by default. `smith connect --from ` uses a different template source. | +| Q17 | How does multi-project support work? | Each project gets its own copy of the agentic files. `smith connect` copies files into the project directory. The same template can be connected to multiple projects independently. `smith update` refreshes a project's agentic files from the source template. | + +## Feature: smith-commands + +| ID | Question | Answer | +|----|----------|--------| +| Q18 | What CLI commands should smith support? | Four commands: `smith connect [--from ]`, `smith disconnect`, `smith update`, `smith status`. All four are needed for the first feature to demonstrate the full connect/work/disconnect cycle end-to-end. | +| Q19 | Which commands go in the first feature? | All four — connect, disconnect, update, and status — as the single demonstration feature. | + +--- + +## Quality Attributes + +| ID | Attribute | Scenario | Target | Priority | +|----|-----------|----------|--------|----------| +| QA1 | Usability | When an engineer runs `smith connect` in any project directory, they can immediately start working with standard flows and agents | < 1 minute from connect to working | Must | +| QA2 | Safety | When smith connects to a project that already has agentic files, it refuses to overwrite without explicit `--overwrite` flag | Zero silent overwrites, ever | Must | +| QA3 | Clean separation | When smith disconnects from a project, no agentic files remain (only .gitignore entries) | Zero orphaned files after disconnect | Must | +| QA4 | Atomicity | When smith connects, either all agentic files are written or none are | No partial connections, ever | Must | + +--- + +## Pain Points Identified + +- Maintaining different .opencode agents and workflows across multiple projects is wasteful and inconsistent +- AI agents lack structure — each project reinvents agent configs from scratch +- No standardised way to "plug in" AI-assisted workflows to existing/legacy projects + +## Business Goals Identified + +- Uniform AI agent experience across all projects +- Instant setup — connect and go within minutes +- Clean connect/disconnect cycle — projects should be transformable and reversible + +## Terms to Define (for glossary) + +- connect — smith command that copies agentic files into a project directory +- disconnect — smith command that removes agentic files from a project directory +- agentic files — the set of files smith manages: AGENTS.md, .opencode/, .templates/, .flowr/ +- template source — the origin of agentic files (default: agents-smith; override with --from) +- assimilate — smith's core metaphor: enter a project, configure it with standard AI agents, transform it +- managed .gitignore section — a marked block in .gitignore that smith creates and maintains + +## Action Items + +- [ ] Architect to decide merge/overwrite strategy for .flowr/ and .templates/ when they already exist in a project +- [ ] Replace local branding.md with the remote version (Matrix/Agent Smith theme) +- [ ] Replace local logo.svg and banner.svg with remote versions +- [ ] Retire or archive the old IN_20260422 interview notes (captured wrong product scope) \ No newline at end of file diff --git a/docs/interview-notes/IN_20260501_temple8-dependency-resolution.md b/docs/interview-notes/IN_20260501_temple8-dependency-resolution.md new file mode 100644 index 0000000..9e5b882 --- /dev/null +++ b/docs/interview-notes/IN_20260501_temple8-dependency-resolution.md @@ -0,0 +1,72 @@ +# Interview Notes: Agents-Smith Dependency Resolution + +> **Status:** COMPLETE +> **Interviewer:** SA +> **Participant(s):** nullhack +> **Session type:** Scope refinement + +--- + +## General + +| ID | Question | Answer | +|----|----------|--------| +| Q1 | How should the bundled template source resolve template files? | Download from the agents-smith GitHub repository's `v8_release` branch at runtime, not from packaged local files | +| Q2 | Should we use stdlib `urllib.request` or `requests` for HTTP? | Use `requests` — cleaner API, better error handling, worth the dependency | +| Q3 | Should downloaded templates be cached locally? | Yes — cache in `~/.cache/smith/` to avoid re-downloading on every connect/update | +| Q4 | Should the default GitHub branch/tag be configurable? | No — default to `v8_release` for now; will change in future but not configurable today | +| Q5 | Should `smith/data/` (85 stale bundled files) be removed? | Yes — delete the entire `smith/data/` directory; it contains stale copies of the project's own agentic files | + +## Architecture: Bundled Template Source + +| ID | Question | Answer | +|----|----------|--------| +| Q6 | How should BundledTemplateSource download the archive? | Download `https://github.com/nullhack/agents-smith/archive/refs/heads/v8_release.tar.gz` as a tarball via GitHub's archive API | +| Q7 | How should the archive be extracted and resolved into FileSpec objects? | Extract to a temp directory, walk the extracted directory, and collect files matching the agentic file set (AGENTS.md, .opencode/, .templates/, .flowr/) | +| Q8 | What should happen on network failure? | Exit with code 1 and a clear error message indicating the bundled template source could not be downloaded | +| Q9 | What is the cache structure? | `~/.cache/smith/agents-smith/` — store the extracted template files; on subsequent resolves, check if cached files exist and are fresh enough before re-downloading | +| Q10 | What is the cache invalidation strategy? | Re-download when the cache is empty or on explicit `smith update`; future enhancement could add ETag/Last-Modified checking | + +## Dependency Change + +| ID | Question | Answer | +|----|----------|--------| +| Q11 | What dependency does this add? | `requests` — the only runtime dependency beyond stdlib | +| Q12 | Does this change the "zero runtime dependencies" constraint? | Yes — the constraint changes from "zero runtime dependencies" to "one runtime dependency (requests)" | + +--- + +## Quality Attributes + +| ID | Attribute | Scenario | Target | Priority | +|----|-----------|----------|--------|----------| +| QA1 | Usability | When an engineer runs `smith connect` without network, they get a clear error message | Error message within 1 second | Must | +| QA2 | Performance | When cache is warm, `smith connect` resolves templates from local cache | < 100ms for cached resolution | Should | +| QA3 | Reliability | When GitHub is temporarily unavailable, `smith update` fails gracefully with exit code 1 | No partial state on failure | Must | + +--- + +## Pain Points Identified + +- `smith/data/` contains 85 stale copies of the project's own `.opencode/`, `.flowr/`, `.templates/`, and `AGENTS.md` — these will go stale and are architecturally wrong + +## Business Goals Identified + +- The bundled template source should always resolve the latest agents-smith templates without requiring a new smith release +- Network-based resolution allows template updates to propagate without smith package updates + +## Terms to Define (for glossary) + +- **Bundled template resolution**: The process by which the default `agents-smith` template source downloads and caches template files from the agents-smith GitHub repository +- **Cache directory**: `~/.cache/smith/` — local storage for downloaded template files to avoid redundant network requests + +## Action Items + +- [ ] Add `requests` to `pyproject.toml` dependencies +- [ ] Rewrite `BundledTemplateSource` to download from GitHub instead of reading `smith/data/` +- [ ] Add local caching in `~/.cache/smith/agents-smith/` +- [ ] Delete `smith/data/` directory +- [ ] Update technical design doc (stack, module structure, template source resolution section) +- [ ] Update system.md (dependency constraint change) +- [ ] Write ADR for GitHub-based bundled template resolution +- [ ] Update glossary (Agents-Smith entry) \ No newline at end of file diff --git a/docs/features/completed/.gitkeep b/docs/post-mortem/.gitkeep similarity index 100% rename from docs/features/completed/.gitkeep rename to docs/post-mortem/.gitkeep diff --git a/docs/post-mortem/2026-04-14-ping-pong-cli-workflow-gaps.md b/docs/post-mortem/2026-04-14-ping-pong-cli-workflow-gaps.md deleted file mode 100644 index 7f1d054..0000000 --- a/docs/post-mortem/2026-04-14-ping-pong-cli-workflow-gaps.md +++ /dev/null @@ -1,176 +0,0 @@ -# Post-Mortem: ping-pong-cli — Workflow Gaps (v3.1) - -## Release Details - -| Field | Value | -|-------|-------| -| Version | v3.1.20260414 | -| Date | April 14, 2026 | -| Feature | ping-pong-cli | -| Status | APPROVED and shipped | -| Broken | Yes — game doesn't work | - ---- - -## What Was Shipped - -`ping_pong_cli/game.py` — 240 lines: - -- 15 top-level functions, zero classes -- No keyboard input (`get_input()` always returns `""`) -- Runs a hardcoded 100-frame demo then exits -- Uses raw `int` and `tuple[int,int]` — no value objects -- `render_game` has 3 levels of nesting -- 8-parameter function signatures - -Yet it passed: lint, typecheck, 100% coverage, 31 tests, reviewer APPROVED. - ---- - -## What Failed - -The acceptance criteria said: -> Given: The game is running and waiting for input -> When: The left or right arrow key is pressed -> Then: The paddle moves - -The implementation maps this to a unit test of `update_player("W")`. That test proves the function works in isolation. No test verifies that keyboard input actually reaches `update_player`. - -The game shipped with the acceptance criterion satisfied in a narrow technical sense ("paddle moves when 'W' is passed to the function") but broken in the broad user sense ("paddle doesn't move when I press W in the running game"). - ---- - -## Gap 1: Acceptance Criteria Don't Require End-to-End Verification - -### Problem - -The `scope` skill defines "Then must be a single observable, measurable outcome" but doesn't define **observable by whom**. The developer interpreted this as "observable in a unit test" — test calls `update_player("W")` returns expected result. - -### Fix - -In `scope` skill, add: - -> **Observable means observable by the end user.** If the criterion says "When the user presses W", the test must verify that pressing W in the running app produces the expected result — not just that calling `update_player("W")` returns the right number. If end-to-end testing isn't feasible, the criterion must explicitly state the boundary (e.g., "When update_player receives 'W'") so the gap is visible. - -In `verify` skill, add: - -> **Acceptance Criteria vs. Reality Check** -> -> For each criterion whose Given/When/Then describes user-facing behavior: -> - Read the test that covers it -> - If the test only exercises an internal function without going through the actual user-facing entry point, flag it as **COVERED BUT NOT VERIFIED** -> - A criterion that says "When the user presses W" is NOT verified by `test_update_player("W")` — it's verified by a test or manual check that sends W to the running app -> -> Any COVERED BUT NOT VERIFIED criterion → REJECTED - ---- - -## Gap 2: Object Calisthenics Listed But Not Enforced by Reviewer - -### Problem - -The `verify` skill listed all 9 Object Calisthenics rules. The reviewer read them but approved code with: - -| # | Rule | Violation in shipped code | -|---|------|--------------------------| -| 3 | Wrap primitives | `PlayerPosition = int`, `BallState = tuple[int,int]` are type aliases, not value objects | -| 4 | First-class collections | No collection classes | -| 7 | Small entities | `run_game_loop` is ~40 lines | -| 8 | ≤ 2 instance vars | No classes at all, but 8-parameter function signatures | - -The skill didn't say **what to do when violations are found**. Violations were treated as observations, not blockers. - -### Fix - -In `verify` skill, replace ObjCal prose with a structured table: - -> **Object Calisthenics — ANY violation is a REJECT** -> -> | # | Rule | How to check | PASS/FAIL | -> |---|------|-------------|-----------| -> | 1 | One level of indentation | Check nest depth in source | -> | 2 | No `else` after return | Search for `else` inside functions | -> | 3 | Wrap primitives | Bare `int`, `str` as domain concepts = FAIL | -> | 4 | First-class collections | `list[Type]` not wrapped = FAIL | -> | 5 | One dot per line | `a.b.c()` = FAIL | -> | 6 | No abbreviations | `calc`, `mgr` = FAIL | -> | 7 | Small entities | Lines per function >20 or class >50 = FAIL | -> | 8 | ≤ 2 instance vars | More than 2 per class = FAIL | -> | 9 | No getters/setters | `get_x()`, `set_x()` = FAIL | - ---- - -## Gap 3: REFACTOR Step Has No Verification Gate - -### Problem - -The `implementation` skill says to apply DRY, SOLID, Object Calisthenics during REFACTOR, but when done, it only runs `task test`, `task lint`, `task static-check`. None of those tools check nesting depth, function length, or value objects. The developer skips the self-check, runs the three commands, they all pass. - -### Fix - -In `implementation` skill, add after REFACTOR section: - -> **REFACTOR Self-Check (MANDATORY before commit)** -> -> 1. Count lines per function you changed. Any >20 → extract helper -> 2. Check nesting. Any >2 levels → extract function -> 3. Check bare primitives as domain concepts. `int` for paddle position → value object -> 4. Check parameters per function. >4 positional → group into dataclass -> -> If you skip this step, the reviewer WILL reject your code. - ---- - -## Gap 4: `timeout 10s uv run task run` Is Not a Playability Test - -### Problem - -The `verify` skill said: "check that startup completes without error before the timeout." The demo ran for 1.6 seconds and exited cleanly — startup completed, no error. The app passed without being interactive at all. - -### Fix - -In `verify` skill, replace the timeout check with: - -> **For apps with user interaction** (games, CLIs with prompts, web servers): -> - Run the app, provide sample input via stdin/subprocess -> - Verify output changes in response to input -> - A hardcoded demo that auto-plays without input is NOT a playability test -> -> If the app doesn't respond to user input → REJECTED - ---- - -## Gap 5: Tests Verify Functions, Not Behavior - -### Problem - -The `tdd` skill produces unit tests. Every test calls an isolated function. No test sends input to the running game. No test verifies the game loop integrates these functions correctly. 31 tests pass with 100% coverage but none test the actual gameplay loop. - -### Fix - -In `tdd` skill, add: - -> **Integration Test Requirement** -> -> For features with multiple components (game loops, handlers, pipelines): -> - Add at least ONE `@pytest.mark.integration` test -> - Test must exercise the full path from entry point to observable outcome -> - Must NOT call internal helpers directly — use the public entry point - ---- - -## Summary - -| Gap | Skill | Problem | Fix | -|-----|-------|---------|-----| -| 1 | scope + verify | "Observable" undefined = unit test passes | Define user-observable; add COVERED BUT NOT VERIFIED | -| 2 | verify | Object Calisthenics listed = suggestions | Any rule FAIL = REJECTED (table) | -| 3 | implementation | REFACTOR has no self-check gate | Add mandatory line/nesting check | -| 4 | verify | `timeout` = "doesn't hang" not "works" | Must accept and respond to input | -| 5 | tdd | All unit, no integration | Require one integration test | - ---- - -## Root Cause - -The skills already contained the right standards. The problem is that violations were treated as observations, not blockers. Each check needs a clear **FAIL = REJECTED** consequence with a structured table to fill in — so violations can't be glossed over in prose. diff --git a/docs/post-mortem/2026-04-16-ping-pong-cli-package-and-design-review.md b/docs/post-mortem/2026-04-16-ping-pong-cli-package-and-design-review.md deleted file mode 100644 index d9b6995..0000000 --- a/docs/post-mortem/2026-04-16-ping-pong-cli-package-and-design-review.md +++ /dev/null @@ -1,108 +0,0 @@ -# Post-Mortem: ping-pong-cli — Package Directory and Design Review Gaps - -## Context - -| Field | Value | -|-------|-------| -| Date | April 16, 2026 | -| Feature | ping-pong-cli (follow-up run after v3.1 workflow fixes) | -| Branch | feat/po-workflow-redesign-v4 | - -This post-mortem was conducted after a second ping-pong-cli test run on the updated v3.1 workflow. Two systemic failures were identified that the v3.1 fixes did not address. - ---- - -## Failure 1: Code Created in Wrong Package Directory - -### What Happened - -The developer created production code under `python_project_template/` (the template's own package) instead of `ping_pong_cli/` (the feature's package). The correct package name was visible in `pyproject.toml` under `[tool.setuptools] packages`, but no step in the workflow required the developer to read it before writing code. - -### Why It Happened - -The `implementation` skill's Step 2 (Architecture) listed prerequisites and module structure instructions, but contained no explicit step to: -1. Read `pyproject.toml` to determine the correct package name -2. Confirm the package directory exists on disk -3. Record the package name as a hard constraint before writing any files - -Without this verification, the developer defaulted to a plausible-looking name rather than the actual configured name. - -### Impact - -All production code was placed in the wrong directory. The feature appeared to work during development (imports resolved within the wrong package) but would have failed on any fresh install or CI run. - -### Fix Applied - -Added a **Package Verification** block at the top of Step 2 in `implementation/SKILL.md` (before prerequisites): - -``` -1. Read pyproject.toml → [tool.setuptools] → record packages = [""] -2. Confirm that directory exists on disk: ls / -3. Write the correct package name at the top of working notes -4. All new source files go under / — never under a template placeholder -``` - -Added a corresponding check row to `verify/SKILL.md` section 4g: - -> `Imports use correct package name` — confirm all imports match `[tool.setuptools] packages`, not a template placeholder - ---- - -## Failure 2: Design Principle Violations Not Caught in Review - -### What Happened - -The reviewer approved code containing getters and setters (`get_x()` / `set_x()` pairs), violating Object Calisthenics Rule 9. The violation was visible in the code but was not caught because the review process had no structured mechanism for the developer to declare their own compliance before asking for review. - -### Why It Happened - -The per-test reviewer check asked the reviewer to verify YAGNI > KISS > DRY > SOLID > ObjCal, but provided no structured checklist or required evidence format. The reviewer was scanning for violations rather than verifying explicit claims. When a reviewer is reading unfamiliar code for the first time, getter/setter patterns can be overlooked if they are not explicitly flagged. - -Additionally, the reviewer had no "audit target" — there was nothing the developer had committed to that the reviewer could directly compare against the code. - -### Impact - -OC Rule 9 (tell-don't-ask) was violated. The design choice propagated into the committed codebase, requiring a later refactor. - -### Fix Applied - -Added a **Design Self-Declaration** step between REFACTOR and REVIEWER CHECK in `implementation/SKILL.md`: - -- Developer fills a checklist covering YAGNI, KISS, DRY, SOLID (all 5 principles), and OC Rules 1–9 -- Each item requires `file:line` evidence or an explicit "does not apply" note -- The filled checklist is sent to the reviewer as the audit target - -Updated the **REVIEWER CHECK** response template from a 3-line compact format to an 11-row structured comparison table (YAGNI, KISS, DRY, SOLID-S/O/L/I/D, OC-1-9, Design patterns, Semantic alignment): - -- Developer Claims column (what the developer declared) -- Reviewer Verdict column (independent verification) -- Evidence column (`file:line` required for every FAIL) -- Any FAIL row = rejection - -Updated the Cycle State phases to include `SELF-DECLARE` between REFACTOR and REVIEWER: - -``` -RED → GREEN → REFACTOR → SELF-DECLARE → REVIEWER(code-design) → COMMITTED -``` - -Updated `session-workflow/SKILL.md` Cycle State phase list and Rule 6 to include `SELF-DECLARE`. - -Updated `reviewer.md` per-test Step 4 section to reference the structured table and load `skill implementation` for the full protocol. - ---- - -## Summary - -| Failure | Root Cause | Fix | -|---------|-----------|-----| -| Code in wrong package | No package verification step before writing code | Package Verification block added to Step 2 | -| OC Rule 9 violation approved | No structured self-declaration; reviewer had no audit target | Design Self-Declaration checklist per test; 11-row verification table | - ---- - -## Systemic Pattern - -Both failures share the same root cause: **the workflow relied on agents noticing problems rather than proving compliance**. The fixes shift the burden: - -- Package verification: developer must prove the package name is correct before writing the first line -- Design self-declaration: developer must prove each principle is satisfied before asking for review; reviewer verifies claims rather than scanning from scratch diff --git a/docs/post-mortem/PM_20260501_conflict-exit-code-removal.md b/docs/post-mortem/PM_20260501_conflict-exit-code-removal.md new file mode 100644 index 0000000..4f670bf --- /dev/null +++ b/docs/post-mortem/PM_20260501_conflict-exit-code-removal.md @@ -0,0 +1,26 @@ +# PM_20260501_conflict-exit-code-removal: Planned code removed as "dead" — distinction between dead code and TDD-not-yet-reached code + +## Failed At + +Design review (7th pass) — reviewer flagged ConflictReport and EXIT_CONFLICT = 2 as "dead code" and "cross-document inconsistency." They were removed. On reflection, the interview notes (`IN_20260501_smith-commands-specification.md:58,66`) explicitly define exit code 2 for conflicts. The code was removed, then restored, then removed again. + +## Root Cause + +The reviewer operates at the feature level (current TDD examples) while the SA created typed stubs at the architecture level. Code that matches the domain model and technical design but hasn't been reached by the TDD cycle yet is **planned code**, not dead code. Dead code contradicts the architecture; planned code hasn't been exercised yet. + +## Missed Gate + +The review skill has no step to check whether flagged "dead code" exists in the domain model, technical design, or interview notes before recommending removal. Without this check, planned code is indistinguishable from dead code. + +## Fix + +1. **Remove planned code now (TDD perspective):** From a strict TDD perspective, code that no test exercises should not exist yet. ConflictReport and EXIT_CONFLICT = 2 are removed. When the feature requires exit code 2 scenarios (e.g., a fresh project with existing agentic files), they will be added back organically through the RED-GREEN-REFACTOR cycle. +2. **Process change for the template:** Stubs should be created per-feature during feature planning, not all at once during project-structuring. This eliminates the planned-but-not-reached gap entirely. The SA creates the package skeleton (directories, `__init__.py`, port interfaces, aggregate root signatures); feature planning creates typed stubs only for the examples defined in the `.feature` file. +3. **Review skill update:** Before flagging code as "dead," the reviewer must check the domain model / technical design / interview notes. Code that matches the architecture but lacks tests should be flagged as WARN (planned-not-reached), not REJECT (dead). + +## Restart Check + +- [x] ConflictReport and EXIT_CONFLICT = 2 removed from code +- [x] Spec docs consistent: exit codes are 0 (success) and 1 (error) +- [x] Feature file has no examples asserting exit code 2 +- [ ] When a future feature requires exit code 2, it will be added via TDD (RED test first, then implementation) \ No newline at end of file diff --git a/docs/post-mortem/PM_20260501_coverage-test-in-features-folder.md b/docs/post-mortem/PM_20260501_coverage-test-in-features-folder.md new file mode 100644 index 0000000..5124c3f --- /dev/null +++ b/docs/post-mortem/PM_20260501_coverage-test-in-features-folder.md @@ -0,0 +1,28 @@ +# PM_20260501/coverage-test-in-features-folder: Coverage-boosting test placed in features folder instead of unit folder + +## Failed At + +Structure review — adding a test for the `disconnect()` empty-patterns branch to reach 100% coverage. + +## Root Cause + +The reviewer identified that `connection.py:82` (`return []` when `has_section()` is True but `get_patterns()` returns empty) was not covered by any BDD example. Instead of flagging this as a gap that requires either a new BDD example (if the behavior is user-facing) or a unit test in `tests/unit/` (if it's an implementation branch), the SE added a test function directly to `tests/features/smith_commands/disconnect_test.py`. + +The `tests/features/` folder is exclusively for BDD scenario tests that trace back to `@id` tags in the `.feature` file. The new test `test_smith_commands_disconnect_empty_patterns` has no corresponding `@id` tag in the feature file, violating the traceability contract. + +## Missed Gate + +The TDD skill and structure review skill both require that feature tests correspond to BDD examples. Coverage-boosting tests that exercise implementation branches not covered by BDD examples belong in `tests/unit/`, not `tests/features/`. + +## Fix + +1. Move `test_smith_commands_disconnect_empty_patterns` from `tests/features/smith_commands/disconnect_test.py` to `tests/unit/domain/test_connection.py` (or a new unit test file for Connection). +2. Remove the BDD-style docstring (Given/When/Then) since it's a unit test, not a feature test. +3. Write the test as a plain unit test with a descriptive function name. +4. Ensure the test still covers the `connection.py:82` branch for 100% coverage. + +## Restart Check + +- [ ] No tests in `tests/features/` lack a corresponding `@id` tag in the feature file +- [ ] Coverage-boosting tests are in `tests/unit/`, not `tests/features/` +- [ ] Feature tests use BDD-style docstrings with `@id` tags; unit tests use plain descriptive names \ No newline at end of file diff --git a/docs/post-mortem/PM_20260501_missing-feature-test-template.md b/docs/post-mortem/PM_20260501_missing-feature-test-template.md new file mode 100644 index 0000000..a8793a6 --- /dev/null +++ b/docs/post-mortem/PM_20260501_missing-feature-test-template.md @@ -0,0 +1,39 @@ +# PM_20260501_missing-feature-test-template: No test stub template for BDD feature scenarios + +## Failed At + +project-structuring — SA generated test stubs using `...` ellipsis bodies and carried MoSCoW tags into docstrings, instead of the established `@pytest.mark.skip(reason="not yet implemented")` pattern. + +## Root Cause + +The `.templates/` directory has no template for feature BDD test stubs. The `structure-project` skill references design artifacts (feature file, technical design, domain model) but provides no test stub format to follow. Without a template, the agent invented its own conventions: + +1. **`...` ellipsis bodies** instead of `@pytest.mark.skip(reason="not yet implemented")` — silently passing tests instead of being explicitly skipped +2. **MoSCoW tags (`Must`/`Should`) in `@id` lines** — the feature file had `@id:xxx Must` which leaked into test docstrings +3. **Naming convention** — `test_smith_commands_` used instead of `test__` + +## Missed Gate + +The `stubs_traceable` condition checks that all `@id` tags have corresponding test stubs, but does not validate: +- Whether stubs use the correct skip pattern +- Whether docstrings contain extraneous content (MoSCoW tags) +- Whether the naming convention matches project standards + +## Fix + +1. **Add template** `.templates/tests/features//_test.py.template` with the canonical format: +```python +import pytest + +@pytest.mark.skip(reason="not yet implemented") +def test__<@id>() -> None: + """ + <@id steps raw text including new lines> + """ +``` +2. **Update `structure-project` skill** to reference the new template when generating test stubs. +3. **Update `stubs_traceable` condition** to validate stub format (skip decorator, no MoSCoW in docstrings, naming convention). + +## Restart Check + +SA verifies that all test stubs use `@pytest.mark.skip(reason="not yet implemented")`, have no MoSCoW tags in docstrings, and follow the `test__` naming convention. \ No newline at end of file diff --git a/docs/post-mortem/PM_20260501_missing-overwrite-flag.md b/docs/post-mortem/PM_20260501_missing-overwrite-flag.md new file mode 100644 index 0000000..e38049f --- /dev/null +++ b/docs/post-mortem/PM_20260501_missing-overwrite-flag.md @@ -0,0 +1,54 @@ +# PM_20260501_missing-overwrite-flag: --overwrite CLI flag not implemented despite Must-priority BDD examples + +## Failed At + +Development — TDD cycle (green phase). The `--overwrite` flag was present in the interview notes, feature spec, domain model, and technical design, but never implemented in `cli.py` or `Connection.connect()`. + +## Root Cause + +The `--overwrite` flag was specified at **every planning stage** but dropped during implementation: + +1. **Interview notes** (IN_20260501_smith-commands-specification.md): Q6 "Destructive overwrites without explicit `--overwrite` flag" listed as a failure mode. Q10 explicitly describes `--overwrite` behavior. Q13 confirms "refuse to overwrite unless `--overwrite` is passed." + +2. **Feature spec** (smith-commands.feature): Two Must-priority examples reference it — `@id:2a5f83d0` and `@id:7d22e1d6`, both using `smith connect --overwrite`. + +3. **Domain model** (domain_model.md line 23): `ConnectionRequested` event includes `[--overwrite]` in the command signature. + +4. **Technical design** (technical_design.md lines 93, 101, 617, 624, 650): `--overwrite` is documented as a CLI flag, a configuration key, and part of the safety invariant. + +5. **Implementation**: **Missing entirely.** `cli.py` has no `--overwrite` argument. `Connection.connect()` has no `overwrite` parameter. `Connection._resolve_specs()` has the domain logic for skipping user-tracked files, but the flag to bypass it was never wired through. + +The failure occurred at the **TDD green phase** — when writing the minimum production code to make failing tests pass, the `--overwrite` flag was never added because no test exercised the CLI handler with the `--overwrite` argument. All tests for examples `2a5f83d0` and `7d22e1d6` tested the domain logic (`Connection._resolve_specs`) through in-memory stubs, which validated the skip/overwrite behavior in isolation but never verified the CLI-to-domain wiring. + +## Missed Gate + +**Structure review** — the review verified test coverage and BDD example pass rate, but did not trace each `@id` example from CLI invocation through to domain behavior. The gate checked "does a test function exist for each `@id`" but not "does each `@id` that references a CLI flag actually test that the flag reaches the domain layer." + +Additionally, the **definition-of-done** gate (if it was applied) should have verified that the technical design spec's CLI interface section matches the actual `cli.py` argument parser. The spec lists `smith connect [--from ] [--overwrite]` but the parser only has `--from`. + +## Stage-by-Stage Trace + +| Stage | `--overwrite` Present? | Gap | +|-------|----------------------|-----| +| Interview (IN_20260501) | Yes — Q6, Q10, Q13, QA2 | None | +| Feature spec (smith-commands.feature) | Yes — `@id:2a5f83d0`, `@id:7d22e1d6` | None | +| Domain model | Yes — `ConnectionRequested` event | None | +| Technical design | Yes — CLI interface, config keys, safety invariant | None | +| TDD green phase | **No** — `cli.py` and `Connection.connect()` never received the flag | **Dropped here** | +| Structure review | Not checked — no CLI-to-domain traceability gate | **Missed here** | + +## Fix + +1. Add `--overwrite` argument to the `connect_parser` in `cli.py` +2. Wire `--overwrite` through `handle_connect` to `Connection.connect()` +3. Update `Connection.connect()` to accept an `overwrite` parameter that bypasses the skip-user-tracked-files logic +4. Write CLI-level integration tests for `smith connect --overwrite` +5. Add a review gate that verifies every `@id` example in the feature spec traces to a CLI handler test (not just a domain-layer test) +6. Add a definition-of-done check that compares the technical design's CLI interface section against the actual argument parser + +## Restart Check + +- Verify `smith connect --overwrite` works end-to-end from CLI through domain layer +- Verify BDD examples `2a5f83d0` and `7d22e1d6` pass as CLI integration tests +- Verify `smith status` suggests `--overwrite` in the partial connection message +- Run `task lint && task static-check && task test` and confirm 0 errors \ No newline at end of file diff --git a/docs/post-mortem/PM_20260501_moscow-gherkin-tags.md b/docs/post-mortem/PM_20260501_moscow-gherkin-tags.md new file mode 100644 index 0000000..e166c25 --- /dev/null +++ b/docs/post-mortem/PM_20260501_moscow-gherkin-tags.md @@ -0,0 +1,23 @@ +# PM_20260501_moscow-gherkin-tags: MoSCoW priority injected into Gherkin @id tags + +## Failed At + +bdd-features — stakeholder: "why should must etc [be] in the top of the examples?" + +## Root Cause + +The `moscow.md` knowledge file instructs the PO to "classify each candidate Example as Must/Should/Could" but doesn't specify where to record the classification. The `feature.feature.template` has no field for MoSCoW priority. The agent conflated classification (an internal triage step) with Gherkin output, appending MoSCoW labels to the `@id` tag line (`@id:SC-001 Must`) and later as separate tags (`@must @id:...`), neither of which belongs in the feature file. + +## Missed Gate + +The `write-bdd-features` skill loads `[[requirements/moscow]]` and `[[requirements/gherkin]]` but neither document states where MoSCoW classification should be recorded or that it should NOT appear in the .feature file. The skill instructions say "Classify each Example per [[requirements/moscow]]" without specifying the output location. The template's `@id:` format gives no hint about priority tagging. + +## Fix + +1. **`moscow.md`**: Add a note that MoSCoW classification is for internal triage only and must NOT appear as Gherkin tags or in the .feature file. Priority can be tracked in stories.md or a separate planning artifact. +2. **`write-bdd-features` skill (SKILL.md)**: Clarify step 4 — "Classify each Example per [[requirements/moscow]]" → add "Record classification in stories.md; do NOT add MoSCoW tags to Examples in the .feature file." +3. **`feature.feature.template`**: Add a comment in the template that `@id:` tags are for traceability only, not for priority classification. + +## Restart Check + +SA verifies that no `.feature` file contains `@must`, `@should`, `@could`, or MoSCoW labels on `@id` lines. \ No newline at end of file diff --git a/docs/post-mortem/PM_20260501_reviewer-fixing-code.md b/docs/post-mortem/PM_20260501_reviewer-fixing-code.md new file mode 100644 index 0000000..0e0aad4 --- /dev/null +++ b/docs/post-mortem/PM_20260501_reviewer-fixing-code.md @@ -0,0 +1,41 @@ +# PM_20260501_reviewer-fixing-code: Reviewer fixing code instead of rejecting and routing back to TDD + +## Failed At + +review-gate (Design/Structure/Conventions review) — stakeholder: "Why are reviewers not done properly? Why is R fixing code instead of moving the state back to TDD with a description of what needs to be changed?" + +## Root Cause + +Three process violations occurred simultaneously, all stemming from conflating the reviewer role with the implementer role: + +1. **Reviewer approved despite code smells**: The design review passed on checks 4-9 (Object Calisthenics, code smells, pattern gaps) with "minor" or "acceptable trade-off" verdicts for issues that should have been REJECTED — specifically: union return type in `_filter_conflicts`, duplicated logic between `_filter_conflicts`/`_skip_unmanaged`, dead `detect_state()` method, unreachable ConflictReport path, unused `project_dir` parameter, and two duplicated write-and-commit blocks in `connect()`. + +2. **Fixes applied by orchestrator instead of routing back to TDD**: When the first design review REJECTED on `.smith.yaml` inconsistency, the orchestrator fixed `technical_design.md`, `system.md`, `glossary.md`, and ADR-004 directly, then re-ran the review. This should have been: REJECT → route back to TDD cycle with findings → SE implements fixes → re-review. + +3. **Conventions review bypassed the flow**: Lint errors (ruff) and type errors (pyright) were fixed directly by the orchestrator instead of being treated as review findings. The conventions review should have REJECTED with a list of violations, then routed back for the SE to fix them in a TDD cycle. + +## Missed Gate + +The **review-gate** state in the development flow. The flow defines three review sub-gates (Design, Structure, Conventions). Each should produce either APPROVED or REJECTED. On REJECTED, the flow should transition back to the TDD cycle with specific findings — not apply fixes inline. + +The reviewer's job is to find problems and report them. The SE's job is to fix them. The orchestrator conflated these by having the reviewer/subagent report findings, then immediately fixing them itself before re-running the review. + +## Fix + +1. **Reviewer MUST NOT modify production code or tests.** The reviewer's output contract is findings only — a REJECTED report with file:line evidence or an APPROVED verdict. No edits. + +2. **On REJECTED, route back to TDD cycle** with the specific findings as input. The SE (or orchestrator acting as SE) picks up the findings, implements fixes, re-runs tests, then re-enters the review gate. + +3. **"Minor" is not a pass.** Code smells that are acknowledged but hand-waved as "acceptable trade-offs" should still be flagged. The reviewer should note them; the SE decides whether to fix or defer. Deferring requires explicit acknowledgment, not silent approval. + +4. **Spec doc fixes are still code changes.** When a review finds that spec docs are inconsistent with implementation, the fix is: REJECT → route back to the appropriate flow state (e.g., technical-design or adr-draft) → that state's owner applies the fix → re-review. The orchestrator should not fix docs on behalf of another state's owner. + +5. **Lint/type errors are review findings, not auto-fix opportunities.** Running `ruff --fix` or manually fixing lint errors during review is the SE's job, not the reviewer's. The conventions review should report violations; the SE fixes them in the next TDD cycle. + +## Restart Check + +SA verifies that: +- [ ] All three review sub-gates produce APPROVED/REJECTED verdicts without modifying any files +- [ ] On REJECTED, the flow transitions back to TDD with a findings document +- [ ] No code or spec doc changes are made during the review-gate state +- [ ] Code smells are explicitly listed in findings rather than dismissed as "minor" diff --git a/docs/post-mortem/PM_20260501_se-dirtying-living-docs.md b/docs/post-mortem/PM_20260501_se-dirtying-living-docs.md new file mode 100644 index 0000000..d969291 --- /dev/null +++ b/docs/post-mortem/PM_20260501_se-dirtying-living-docs.md @@ -0,0 +1,27 @@ +# PM_20260501/se-dirtying-living-docs: SE modified spec documents during TDD/review cycle without flow approval + +## Failed At + +Design review (passes 3–8) — the SE directly modified living specification documents (domain_model.md, technical_design.md, glossary.md, product_definition.md, system.md, context_map.md, feature file, ADRs) to fix inconsistencies found by the reviewer, without routing those changes through the appropriate flow states. + +## Root Cause + +The review-design skill has no instruction to distinguish between production code fixes (which the SE can make directly) and specification document fixes (which belong to different flow states and require approval). When the reviewer found cross-document inconsistencies, the SE treated spec docs the same as code — direct edit during the review cycle. + +This violates the flow contract: spec documents are owned by specific states (architecture-assessment owns domain_model.md, technical-design owns technical_design.md, etc.). The development flow has no state that owns spec docs, so the SE has no authority to modify them. + +## Missed Gate + +The review-design skill says: "IF a smell is found → list it in findings" and "Write results to artifacts listed in the current state's out attrs. If findings affect artifacts outside the output contract, flag them in output notes for the appropriate step." The skill already instructs the SE to **flag** out-of-contract changes, not **make** them. The orchestrator ignored this instruction. + +## Fix + +1. **Process rule:** During TDD/review, the SE may ONLY modify production code and test code. Spec document inconsistencies must be FLAGGED in review output notes, not fixed directly. +2. **Review-design skill update:** Add an explicit rule: "NEVER modify specification documents (domain_model.md, technical_design.md, glossary.md, product_definition.md, system.md, context_map.md, ADRs, feature files) during review. These are owned by other flow states. Flag inconsistencies in output notes for the appropriate step." +3. **Flow mechanism:** When the reviewer flags spec doc inconsistencies, the orchestrator should create a separate issue/task to route those fixes through the appropriate flow state, rather than fixing them inline during development. + +## Restart Check + +- [ ] Review output notes contain flagged spec doc inconsistencies instead of inline fixes +- [ ] No spec documents are modified during the TDD/review cycle +- [ ] Spec doc fixes are routed through the appropriate flow state (architecture, planning, etc.) \ No newline at end of file diff --git a/docs/research/computer-science/artificial-intelligence/liu_et_al_2023.md b/docs/research/computer-science/artificial-intelligence/liu_et_al_2023.md new file mode 100644 index 0000000..8ae77e7 --- /dev/null +++ b/docs/research/computer-science/artificial-intelligence/liu_et_al_2023.md @@ -0,0 +1,45 @@ +# Lost in the Middle (Positional Attention Degradation) — Liu et al., 2023 + +## Citation + +Liu, N. F., Lin, K., Hewitt, J., Paranjape, A., Bevilacqua, M., Petroni, F., & Liang, P. (2023). "Lost in the Middle: How Language Models Use Long Contexts." *Transactions of the Association for Computational Linguistics (TACL)*, arXiv preprint arXiv:2307.03172. + +## Source Type + +Academic Paper + +## Method + +Experiment + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Language models exhibit U-shaped attention pattern - information at beginning and end of long context receives significantly more attention than middle content. + +## Core Findings + +1. **U-Shaped Performance Pattern**: Performance highest when relevant information occurs at beginning or end of input context, degrades significantly in middle positions +2. **Multi-Document QA Results**: Even explicitly long-context models struggle to access information in middle of long contexts +3. **Key-Value Retrieval Degradation**: Performance drops substantially when target information positioned in middle sections +4. **Primacy/Recency Effects**: Beginning benefits from setting attention baseline, end benefits from proximity to output position +5. **Context Length Impact**: Performance degradation becomes more pronounced as context length increases + +## Mechanism + +Transformer attention patterns distribute weight unevenly across sequence positions. Beginning content benefits from primacy effects (first tokens establish attention baseline), end content benefits from recency effects (proximity to output). Middle content competes with both extremes and receives proportionally less attention weight, causing information retrieval failures. + +## Relevance + +Critical for long-context AI applications, prompt engineering strategies, context window utilization. Essential for understanding attention limitations in large language models, designing effective retrieval-augmented generation systems, optimizing document processing workflows. + +## Related Research + +Published in TACL 2023, builds on transformer attention mechanisms research. Authors include Nelson Liu, John Hewitt (Stanford), Percy Liang (Stanford). Connects to attention analysis, long-context modeling, retrieval-augmented generation literature. Foundational for understanding positional biases in modern language models. diff --git a/docs/research/design/accessibility/w3c_wcag21_2018.md b/docs/research/design/accessibility/w3c_wcag21_2018.md new file mode 100644 index 0000000..372c03e --- /dev/null +++ b/docs/research/design/accessibility/w3c_wcag21_2018.md @@ -0,0 +1,48 @@ +# Web Content Accessibility Guidelines 2.1 — W3C, 2018 + +## Citation + +W3C. (2018). *Web Content Accessibility Guidelines (WCAG) 2.1*, W3C Recommendation 5 June 2018. https://www.w3.org/TR/WCAG21/ + +## Source Type + +Industry Standard + +## Method + +Specification + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Text and images of text must have a contrast ratio of at least 4.5:1 against their background (Level AA) to ensure readability for users with visual impairments, including low vision and color blindness. + +## Core Findings + +1. **Contrast requirements**: Normal text requires 4.5:1 contrast (AA) or 7:1 (AAA); large text (18pt+ or 14pt+ bold) requires 3:1 (AA) or 4.5:1 (AAA). +2. **Four principles framework**: Web content must be perceivable, operable, understandable, and robust (POUR). +3. **Three conformance levels**: A (minimum), AA (standard target), AAA (enhanced) with 61 success criteria total in WCAG 2.1. +4. **Legal adoption worldwide**: WCAG 2.1 AA is legally mandated by EU Web Accessibility Directive, US Section 508, and many national laws. +5. **Calculation formula**: Contrast ratio = (L1 + 0.05) / (L2 + 0.05) where L1 is lighter color's relative luminance, L2 is darker. +6. **Exemptions**: Incidental text, logotypes, and inactive UI components are exempt from contrast requirements. +7. **New 2.1 criteria**: Added 17 new success criteria focused on mobile accessibility, low vision, and cognitive disabilities. + +## Mechanism + +Relative luminance is computed from sRGB values via gamma correction: for each RGB channel, divide by 255, then apply gamma function (≤ 0.04045 divide by 12.92; > 0.04045 use ((V + 0.055) / 1.055)^2.4), then combine as L = 0.2126×R + 0.7152×G + 0.0722×B. The contrast ratio formula produces values from 1:1 (identical colors) to 21:1 (black on white). This mathematical approach ensures consistent, measurable accessibility standards. + +## Relevance + +Essential standard for web accessibility compliance and legal requirements globally. Critical for brand palette design, interface color systems, and any digital content requiring inclusive design. Directly applicable to color contrast validation, automated accessibility testing, and design system documentation. Required for government, education, and increasingly private sector websites. + +## Related Research + +- (Albers, 1963) — Color theory foundations showing why contrast relationships matter more than absolute colors +- (EN 301 549, 2014) — European standard incorporating WCAG requirements for ICT accessibility \ No newline at end of file diff --git a/docs/research/design/visual/airey_2010.md b/docs/research/design/visual/airey_2010.md new file mode 100644 index 0000000..34f349b --- /dev/null +++ b/docs/research/design/visual/airey_2010.md @@ -0,0 +1,48 @@ +# Logo Design Love — Airey, 2010 + +## Citation + +Airey, D. (2010). *Logo Design Love: A Guide to Creating Iconic Brand Identities*. New Riders. ISBN 978-0-321-66436-3. + +## Source Type + +Practitioner Book + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Strong logos have one dominant feature — not two, three, or four. The design process must be monochrome-first to focus on the core idea. + +## Core Findings + +1. **Single dominant feature rule**: Effective logos have exactly one memorable element that viewers can identify and recall. +2. **Monochrome-first methodology**: Color should be added only after the black-and-white shape proves its strength and recognition. +3. **Systematic stress-testing**: Five evaluation checkpoints ensure logo robustness across contexts: 5-second test, blur test, scalability test, monochrome test, proximity test. +4. **Written brief requirement**: No design work should proceed without documented project parameters and goals. +5. **Iterative refinement process**: 20-40 rough concepts narrowed to 3-5 for vector refinement before color application. +6. **Appropriateness over novelty**: Logo style should match the brand's personality and context rather than following design trends. +7. **Scalability imperative**: Logos must work effectively from 16px favicon size to 500px+ display applications. + +## Mechanism + +Airey's process follows explicit checkpoints: (1) written brief before sketching, (2) 20-40 rough concepts in black ink on paper, (3) select top 3-5 and refine in vector format, (4) add color only after monochrome shape proves strong, (5) stress-test across sizes and backgrounds. The "one thing" test eliminates designs with competing elements. Each evaluation method targets specific failure modes: blur test catches detail-dependent designs, scalability test reveals breakdown points, proximity test ensures differentiation from competitors. + +## Relevance + +Essential methodology for brand identity design, startup logo creation, and visual identity systems. The stress-testing framework applies to any visual design requiring recognition across contexts. Monochrome-first principle prevents common failures in digital interfaces where color may not be available (accessibility, printing, small sizes). + +## Related Research + +- (Rand, 1985) — Foundational principles of logo simplicity and testing methods +- (Wertheimer, 1923) — Gestalt principles underlying logo recognition and memorability \ No newline at end of file diff --git a/docs/research/design/visual/albers_1963.md b/docs/research/design/visual/albers_1963.md new file mode 100644 index 0000000..ebd30cb --- /dev/null +++ b/docs/research/design/visual/albers_1963.md @@ -0,0 +1,48 @@ +# Interaction of Color — Albers, 1963 + +## Citation + +Albers, J. (1963). *Interaction of Color*. Yale University Press. ISBN 978-0-300-01846-8. + +## Source Type + +Practitioner Book + +## Method + +Observational + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Color is the most relative medium in art — the same physical color is perceived differently depending on its surrounding colors and the quantity in which it appears. + +## Core Findings + +1. **Color relativity principle**: "Every perception of color is an illusion... we do not see colors as they really are. In our perception they alter one another." +2. **Context-dependent perception**: The same physical color appears dramatically different when surrounded by different colors or used in different proportions. +3. **Experiential learning approach**: Color is best studied through hands-on experimentation and observation rather than theoretical study alone. +4. **"Making one color appear as two"**: Albers' key exercise demonstrates that color perception is determined by relationships, not absolute values. +5. **Systematic color exploration**: The "Homage to the Square" series methodically explored chromatic interactions with nested squares across hundreds of works. +6. **Pedagogical methodology**: Prioritized experience over theory - "what counts is not so-called knowledge of so-called facts, but vision — seeing." +7. **Brand design implications**: Colors must be defined as relationships (contrast ratios, visual weight proportions) rather than fixed values applied without context. + +## Mechanism + +Albers' key exercise — making one color appear as two different colors by changing its surroundings — proves that color perception is determined by relationships, not absolute values. This has direct implications for brand design: an accent color that reads clearly on white may appear muddy on dark backgrounds, not because the accent changed, but because its relationship to the background changed. The actionable rule is to define brand colors as relationships (primary is always N× the visual weight of accent; text maintains ≥4.5:1 contrast) rather than fixed hex values applied without context. + +## Relevance + +Foundational for brand palette design, interface color systems, and accessibility guidelines. Essential for understanding why colors must be tested in context rather than isolation. Critical for digital design where the same color appears across various backgrounds, screen types, and lighting conditions. Directly applies to contrast ratios, color accessibility standards, and responsive design systems. + +## Related Research + +- (Land, 1977) — Retinex theory of color constancy challenging some of Albers' assumptions +- (Jameson, 1985) — Defense of Albers' pigment-based approach vs. theoretical color mixing \ No newline at end of file diff --git a/docs/research/design/visual/arnheim_1954.md b/docs/research/design/visual/arnheim_1954.md new file mode 100644 index 0000000..cff6ac8 --- /dev/null +++ b/docs/research/design/visual/arnheim_1954.md @@ -0,0 +1,48 @@ +# Art and Visual Perception — Arnheim, 1954 + +## Citation + +Arnheim, R. (1954). *Art and Visual Perception: A Psychology of the Creative Eye*. University of California Press. (Revised edition 1974, ISBN 978-0-520-02623-5.) + +## Source Type + +Practitioner Book + +## Method + +Observational + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Visual shapes carry innate psychological meaning that is perceived instantly, before conscious thought. Circles are perceived as soft, unified, and complete; squares as stable, solid, and rational; triangles as dynamic, directional, and energetic. + +## Core Findings + +1. **Gestalt-based shape psychology**: Visual forms carry inherent meaning derived from their structural properties, not arbitrary cultural conventions. +2. **Circle associations**: Perceived as soft, unified, complete due to lack of edges and continuous form. +3. **Square associations**: Perceived as stable, solid, rational due to broad base and balanced structure. +4. **Triangle associations**: Perceived as dynamic, directional, energetic due to pointed direction and asymmetry. +5. **Visual force fields**: Off-center placement creates tension; angled placement adds dynamism by disrupting expected stability. +6. **Compositional principles**: Visual forces within compositions create perceived tension or calm based on geometric relationships. +7. **Simplicity rule**: The simplest geometric form expressing intended meaning is the most effective design solution. + +## Mechanism + +Arnheim demonstrated that shape perception follows Gestalt principles where "visual forces" within compositions create perceived tension or calm. A circle placed off-center creates visual tension because symmetry demands centering. A square at an angle creates dynamism by disrupting expected stability. Complex shapes combine meanings of their geometric primitives - reducing logos to geometric components reveals whether shape language is coherent or contradictory. + +## Relevance + +Foundational for logo design, visual identity systems, and interface design. Essential for understanding how geometric forms communicate brand personality and user expectations. Applied in icon design, where shape psychology determines immediate recognition and emotional response. Critical for any visual communication requiring instant psychological impact. + +## Related Research + +- (Wertheimer, 1923) — Gestalt principles underlying visual perception +- (Rand, 1985) — Practical application of simplicity principles in logo design \ No newline at end of file diff --git a/docs/research/design/visual/biederman_1987.md b/docs/research/design/visual/biederman_1987.md new file mode 100644 index 0000000..a728d37 --- /dev/null +++ b/docs/research/design/visual/biederman_1987.md @@ -0,0 +1,48 @@ +# Recognition-by-Components — Biederman, 1987 + +## Citation + +Biederman, I. (1987). "Recognition-by-Components: A Theory of Human Image Understanding." *Psychological Review*, 94(2), 115–147. https://doi.org/10.1037/0033-295X.94.2.115 + +## Source Type + +Academic Paper + +## Method + +Experiment + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Humans recognize objects by decomposing them into simple geometric components called "geons" (geometric ions). Line drawings containing only essential edges are recognized as quickly as full-color photographs, proving that edge information alone is sufficient for rapid object recognition. + +## Core Findings + +1. **Geon-based recognition**: Approximately 36 basic 3-dimensional shapes (geons) can be combined to describe virtually all common objects we encounter. +2. **Edge primacy**: Line drawings with critical edges present are recognized as quickly and accurately as full-color photographs of the same objects. +3. **Viewpoint invariance**: Objects can be recognized from almost any viewing angle due to invariant edge properties of geons (curvature, parallel lines, co-termination, symmetry, co-linearity). +4. **Combinatorial power**: With just 24 geons, there are 306 billion possible combinations of 3 geons, allowing recognition of virtually unlimited objects. +5. **Speech analogy**: Just as ~44 phonemes create all English words, ~36 geons create all recognizable objects through systematic combination. +6. **Early development**: Geon recognition develops in infants as early as 4 months old, making it one of fundamental perceptual skills. +7. **Noise resistance**: Objects remain recognizable despite visual noise provided the constituent geons are visible. + +## Mechanism + +Biederman's experiments showed that recognition speed and accuracy were nearly identical for line drawings and full-color photographs of the same objects, provided critical edges were present. The visual system extracts edge-based structural descriptions (geon assemblies) as the primary recognition pathway, with color and texture serving only as secondary confirmation. For logo design, this provides perceptual science basis for monochrome-first design: if edges/silhouette carry the recognition signal, color and detail are secondary and can be added later without affecting core identifiability. + +## Relevance + +Foundational theory for understanding how visual recognition works, with direct applications to logo design, icon design, and visual identity systems. Provides scientific justification for monochrome-first design approaches and edge-based recognition. Essential for creating visual marks that work across scales, contexts, and viewing conditions. Critical for understanding why simple geometric shapes are most effective for brand symbols. + +## Related Research + +- (Wertheimer, 1923) — Gestalt principles that complement geon-based recognition +- (Kare, 1984) — Practical application of edge-based recognition in icon design \ No newline at end of file diff --git a/docs/research/design/visual/hicks_2011.md b/docs/research/design/visual/hicks_2011.md new file mode 100644 index 0000000..1fafcb7 --- /dev/null +++ b/docs/research/design/visual/hicks_2011.md @@ -0,0 +1,47 @@ +# The Icon Handbook — Hicks, 2011 + +## Citation + +Hicks, J. (2011). *The Icon Handbook*. Five Simple Steps. ISBN 978-1-907828-00-3. + +## Source Type + +Practitioner Book + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Icons must be designed at the smallest target size first and then scaled up, adding detail only at larger sizes. + +## Core Findings + +1. **Progressive simplification methodology**: Create separate pixel-perfect versions at each target size tier (16px, 24px, 32px, 48px, 128px, 256px, 512px) rather than scaling a single vector. +2. **Size-tier optimization**: At each tier, remove details that cannot be rendered at that resolution and exaggerate key features for clarity. +3. **Visual acuity limitations**: At 16×16 pixels, fine lines (under 2px), subtle gradients, and interior details vanish or create visual noise. +4. **Industry standard practice**: Progressive simplification is the standard methodology used by professional icon designers including major software companies. +5. **Firefox and Skype precedent**: Hicks' work on major brand icons demonstrates the effectiveness of this approach at scale. +6. **Downscaling failure**: Simply downscaling a 512px icon to 16px produces a muddy, unrecognizable mark that fails usability tests. + +## Mechanism + +Hicks' tier system works because human visual acuity is finite. At 16×16 pixels, fine lines (under 2px), subtle gradients, and interior details vanish or create visual noise. At 512×512, those same details add richness. Progressive simplification acknowledges this by treating each size tier as a distinct design problem: the 16px version is a hand-optimized silhouette, the 32px version may add one key interior detail, the 128px version adds secondary features, and the 512px version is the full design. + +## Relevance + +Essential methodology for any icon system, mobile app design, or interface requiring icons at multiple resolutions. Critical for responsive design systems, desktop applications, and brand identity systems that must work across various scales and contexts. Directly applicable to favicon design, app icon creation, and UI iconography. + +## Related Research + +- (Wertheimer, 1923) — Gestalt principles that inform icon recognition at small sizes +- (Miller, 1956) — Cognitive load implications of visual complexity in small-scale graphics \ No newline at end of file diff --git a/docs/research/design/visual/itten_1961.md b/docs/research/design/visual/itten_1961.md new file mode 100644 index 0000000..47cb222 --- /dev/null +++ b/docs/research/design/visual/itten_1961.md @@ -0,0 +1,48 @@ +# The Art of Color — Itten, 1961 + +## Citation + +Itten, J. (1961). *The Art of Color: The Subjective Experience and Objective Rationale of Color*. Reinhold Publishing. ISBN 978-0-442-24037-6. + +## Source Type + +Practitioner Book + +## Method + +Observational + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Color interaction is relational — the same hue appears different depending on its neighbors. Itten's seven contrast types provide a systematic framework for predicting how colors will read in context. + +## Core Findings + +1. **Seven color contrasts framework**: (1) contrast of hue, (2) light-dark contrast, (3) cold-warm contrast, (4) complementary contrast, (5) simultaneous contrast, (6) contrast of saturation, (7) contrast of extension (proportion). +2. **Relational color perception**: Same physical color appears different depending on surrounding colors and proportions - colors cannot be judged in isolation. +3. **Light-dark contrast supremacy**: The strongest contrast foundation - creates clarity, drama, and legibility. Forms basis of accessibility guidelines (4.5:1 contrast ratios). +4. **Complementary contrast power**: Opposite hues create maximum visual tension and vibrancy, but risk uncomfortable vibration at similar saturation levels. +5. **Simultaneous contrast effect**: Neutral colors shift toward the complement of their background (grey on red appears greenish). +6. **Proportional color weight**: Visual weight depends on area and inherent brightness - yellow is "heavier" than equal-area violet (proportion ratios: yellow:violet ≈ 1:3). +7. **Contextual color testing requirement**: Brand colors must be evaluated in actual usage contexts, not in isolation. + +## Mechanism + +Itten identified seven contrasts as foundation of color composition. The most actionable for brand design: **complementary contrast** creates maximum visual tension between opposite hues; **simultaneous contrast** makes neutrals shift toward complement of their background. Each contrast produces different emotional effects from calm (analogous, low contrast) to vibrant (complementary, high contrast). Colors must be defined as relationships rather than absolute values. + +## Relevance + +Foundational for brand palette design, interface color systems, and accessibility guidelines. Essential for understanding why colors must be tested in context rather than isolation. Directly applies to contrast ratios, WCAG accessibility standards, and responsive design systems where colors appear across various backgrounds and contexts. + +## Related Research + +- (Albers, 1963) — Further development of color interaction principles focusing on pigment-based relationships +- (Chevreul, 1839) — Early simultaneous contrast observations that influenced Itten's work \ No newline at end of file diff --git a/docs/research/design/visual/kare_1984.md b/docs/research/design/visual/kare_1984.md new file mode 100644 index 0000000..768e355 --- /dev/null +++ b/docs/research/design/visual/kare_1984.md @@ -0,0 +1,47 @@ +# Macintosh Icon Design — Kare, 1984 + +## Citation + +Kare, S. (1984). Original Macintosh icon designs. Museum of Modern Art (MoMA), New York. Documented in: Kindy, D. (2019). "How Susan Kare Designed User-Friendly Icons for the First Macintosh." *Smithsonian Magazine*. + +## Source Type + +Practitioner Book + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Icons designed at the smallest target size first, on a pixel grid, are more recognizable and enduring than icons designed at large size and scaled down. + +## Core Findings + +1. **Pixel-perfect methodology**: Kare designed all original Macintosh icons on a 32×32 pixel grid using graph paper, working at exact output resolution before touching software. +2. **"Favicon-first" principle**: If a mark cannot hold its identity at 16×16 or 32×32 pixels, it is not strong enough for any size. +3. **Constraint-driven design**: At 32×32 (1,024 pixels, monochrome), there is no room for decoration — each pixel must contribute to recognition. +4. **Enduring recognition**: Her icons remain instantly recognizable 40+ years later because every pixel carried meaning. +5. **Metaphor-driven approach**: Icons were inspired by art history, Japanese logograms, hieroglyphics, and real-world objects to create instant recognition. +6. **Progressive simplification**: Detail that does not read at the smallest size should not exist in the core design. + +## Mechanism + +Kare's methodology — designing on the smallest graph paper she could find, composing each icon at its actual pixel resolution before touching software — enforces progressive simplification by constraint. Working within severe technological constraints (32×32 pixels, monochrome) made her an early pioneer of pixel art, drawing from her fine art experience in mosaics, needlepoint, and pointillism. This "peculiar sort of minimal pointillism" required solving recognition problems with only horizontal, vertical, or 45-degree lines. + +## Relevance + +Foundational methodology for icon design across all digital interfaces. Established the principle that icons must work at their smallest intended size first. Essential for favicon design, mobile app icons, and any interface requiring recognizable symbols at multiple scales. Her approach directly influences modern icon design systems and responsive iconography. + +## Related Research + +- (Hicks, 2011) — Modern application of progressive simplification principles in "The Icon Handbook" +- (Wertheimer, 1923) — Gestalt principles that inform instant icon recognition \ No newline at end of file diff --git a/docs/research/design/visual/lupton_2010.md b/docs/research/design/visual/lupton_2010.md new file mode 100644 index 0000000..7eabe8d --- /dev/null +++ b/docs/research/design/visual/lupton_2010.md @@ -0,0 +1,47 @@ +# Thinking with Type — Lupton, 2010 + +## Citation + +Lupton, E. (2010). *Thinking with Type: A Critical Guide for Designers, Writers, Editors, & Students* (2nd ed.). Princeton Architectural Press. ISBN 978-1-56898-969-3. + +## Source Type + +Practitioner Book + +## Method + +Observational + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Typography is a system of relationships — between letterforms, between text and space, between type and image. For branding, three typographic choices carry the most personality signal: serif vs sans-serif (traditional/authoritative vs modern/approachable), weight (light/delicate vs bold/assertive), and spacing/tracking (tight/urgent vs open/relaxed). + +## Core Findings + +1. **Serif vs. sans-serif psychology**: Serif typefaces (Times, Garamond, Georgia) carry centuries of association with authority, tradition, and print culture, while sans-serif faces (Helvetica, Futura, Inter) signal modernity, clarity, and directness. +2. **Readability considerations**: Serifs create horizontal rhythm that guides the eye along lines, making serif text more readable at small sizes in long passages. +3. **Logo scalability**: Sans-serif faces dominate in logo marks because they survive reduction to small sizes better than serifs — fine serif details become visual noise at favicon sizes. +4. **Weight as personality lever**: A single typeface at light weight with generous tracking feels elegant and premium; the same typeface at bold weight with tight tracking feels urgent and powerful. +5. **Spacing psychology**: Tight tracking conveys urgency and intensity, while generous tracking suggests luxury and thoughtfulness. +6. **Systematic flexibility**: For brand systems, Lupton recommends sans-serif families with wide weight ranges (300-700) for maximum flexibility from single typeface choices. + +## Mechanism + +Typography functions as a visual language where formal qualities communicate before content is read. Serif typefaces leverage historical associations with authority and permanence, while their horizontal flow aids sustained reading. Sans-serif typefaces eliminate decorative elements to emphasize pure form and modernity. Weight and spacing act as volume controls for typographic voice — lighter weights whisper, bolder weights shout, tight spacing creates urgency, loose spacing creates calm. These relationships work pre-cognitively, making typographic choices powerful tools for brand personality expression. + +## Relevance + +Essential framework for brand identity design, web typography, and interface design. Critical for understanding how typographic choices communicate brand personality before users read content. Directly applicable to logo design, UI typography, marketing materials, and any system requiring consistent typographic voice across multiple touchpoints. + +## Related Research + +- (Wertheimer, 1923) — Gestalt principles that inform typographic hierarchy and spacing +- (Bringhurst, 2004) — Classical typography principles and historical context \ No newline at end of file diff --git a/docs/research/design/visual/muller_brockmann_1981.md b/docs/research/design/visual/muller_brockmann_1981.md new file mode 100644 index 0000000..c7b0a82 --- /dev/null +++ b/docs/research/design/visual/muller_brockmann_1981.md @@ -0,0 +1,47 @@ +# Grid Systems in Graphic Design — Müller-Brockmann, 1981 + +## Citation + +Müller-Brockmann, J. (1981). *Grid Systems in Graphic Design: A Visual Communication Manual for Graphic Designers, Typographers, and Three Dimensional Designers* (4th ed.). Arthur Niggli. ISBN 978-3-7212-0145-1. + +## Source Type + +Practitioner Book + +## Method + +Observational + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +A grid provides the rational structure upon which creative work becomes legible, ordered, and balanced, producing visual order that the viewer perceives as clarity and competence even without consciously recognizing the grid. + +## Core Findings + +1. **Grid-based composition principles**: Grid systems create visual order that viewers subconsciously recognize as professional and trustworthy, even when they cannot identify the underlying structure. +2. **Rule of thirds effectiveness**: The 3×3 grid creates four natural focal points at intersections where the eye is drawn preferentially over the center. +3. **Golden ratio applications**: The golden ratio (1:1.618) produces naturally pleasing asymmetric balance with the "golden point" at roughly 62% from left, 38% from top. +4. **Dynamic vs. static positioning**: Placing primary elements at intersection points creates dynamic tension; centered compositions create calm stability but risk appearing static. +5. **Constraint liberation principle**: The grid constrains placement but liberates proportion - elements can be large/small, bold/light within the rational structure. +6. **Swiss design methodology**: Müller-Brockmann's International Typographic Style demonstrated that systematic approaches produce more effective communication than intuitive placement. + +## Mechanism + +The rule of thirds divides a canvas into 9 equal zones (3 columns × 3 rows). The four intersections are natural focal points — the eye is drawn to them preferentially over the center. Placing the primary mark at an upper-third intersection and secondary elements along the lower third creates dynamic tension. A centered composition (mark at dead center) creates calm and stability but risks feeling static. Müller-Brockmann's key rule: the grid constrains placement but liberates proportion. Within a grid, elements can be large or small, bold or light — the grid ensures they relate to each other rationally. Without a grid, elements appear arbitrary and the composition feels disorganized regardless of individual element quality. + +## Relevance + +Foundational methodology for all layout design, logo positioning, and visual composition. Essential for creating professional, trustworthy visual communications across print, digital, and environmental design. Critical for brand identity systems, web design, publication design, and any visual communication requiring systematic organization and hierarchy. + +## Related Research + +- (Wertheimer, 1923) — Gestalt principles that inform grid-based visual organization +- (Rand, 1985) — Application of grid principles to logo and identity design \ No newline at end of file diff --git a/docs/research/design/visual/rand_1985.md b/docs/research/design/visual/rand_1985.md new file mode 100644 index 0000000..187733f --- /dev/null +++ b/docs/research/design/visual/rand_1985.md @@ -0,0 +1,46 @@ +# A Designer's Art — Rand, 1985 + +## Citation + +Rand, P. (1985). *Paul Rand: A Designer's Art*. Yale University Press. ISBN 978-0-300-03242-6. + +## Source Type + +Practitioner Book + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +A logo cannot survive unless it is designed with the utmost simplicity and restraint. + +## Core Findings + +1. Strong logos must work in monochrome first — color is added only after the shape proves its identity in black and white. +2. Logos should be tested by blurring and degrading them to verify they remain recognizable under poor conditions. +3. "Ideas do not need to be esoteric to be original or exciting" — simplicity does not mean lack of sophistication. +4. The most enduring corporate marks are those reducible to the fewest recognizable elements. +5. A logo must function across all applications: small sizes, poor printing, different backgrounds, and various media. + +## Mechanism + +Rand's design process was explicitly monochrome-first: design in black and white, test under degraded conditions, then add color only when the shape is proven. The blur test applies Gaussian blur to probe whether the global silhouette carries recognition independent of detail. If the blurred mark remains identifiable, the shape is strong; if not, it relies too much on detail and will fail at small sizes or poor reproduction quality. + +## Relevance + +Foundational methodology for logo design and brand identity systems. The monochrome-first approach and stress-testing principles directly apply to creating robust visual identities that work across all contexts and scales. Essential for preventing late-stage failures when implementing brand systems. + +## Related Research + +- (Airey, 2010) — Contemporary application of Rand's principles in Logo Design Love +- (Arnheim, 1954) — Psychological foundations of visual perception that inform Rand's approach \ No newline at end of file diff --git a/docs/research/design/visual/wertheimer_1923.md b/docs/research/design/visual/wertheimer_1923.md new file mode 100644 index 0000000..a821b04 --- /dev/null +++ b/docs/research/design/visual/wertheimer_1923.md @@ -0,0 +1,48 @@ +# Laws of Organization in Perceptual Forms — Wertheimer, 1923 + +## Citation + +Wertheimer, M. (1923). "Laws of Organization in Perceptual Forms." *Psychologische Forschung*, 4, 301–350. Translated in: Ellis, W.D. (ed.), *A Source Book of Gestalt Psychology*, 1938. + +## Source Type + +Academic Paper + +## Method + +Experiment + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +The human visual system automatically organizes visual elements into coherent groups using hardwired perceptual principles that operate pre-attentively, making them the most powerful tool for creating instantly recognizable visual designs. + +## Core Findings + +1. **Proximity principle**: Elements close together are perceived as belonging to one group - the strongest grouping force. +2. **Similarity principle**: Elements sharing color, shape, or size are visually grouped together, even when scattered. +3. **Closure principle**: The mind fills in gaps to perceive complete shapes from incomplete visual information. +4. **Figure-ground separation**: Elements are automatically perceived as either foreground objects or background context. +5. **Continuation principle**: The eye follows the smoothest path through intersecting lines and curves. +6. **Pre-attentive processing**: These organizational principles operate before conscious thought, making them universally reliable. +7. **Simplicity preference**: Given visual elements, the brain imposes the simplest stable structure it can perceive. + +## Mechanism + +Wertheimer demonstrated that perceptual organization is hardwired, not learned. Given visual elements, the brain automatically imposes the simplest stable structure. Proximity grouping dominates (closest elements group first), followed by similarity grouping. Closure allows the brain to complete partial shapes automatically. Figure-ground separation makes elements stand out from backgrounds without conscious effort. For design, this means simplifying shapes until Gestalt grouping takes over - the viewer's brain will complete forms more reliably than added detail. + +## Relevance + +Foundational for all visual design, user interface design, and logo creation. Essential for creating layouts that group related information automatically, icons that remain recognizable at any size, and visual hierarchies that guide attention without conscious effort. Critical for any design requiring instant visual organization and recognition. + +## Related Research + +- (Arnheim, 1954) — Application of Gestalt principles to art and visual perception +- (Köhler, 1920) — Gestalt psychology's figure-ground and organizational principles \ No newline at end of file diff --git a/docs/research/information-science/documentation/procida_2021.md b/docs/research/information-science/documentation/procida_2021.md new file mode 100644 index 0000000..25e5c7c --- /dev/null +++ b/docs/research/information-science/documentation/procida_2021.md @@ -0,0 +1,45 @@ +# Diátaxis Documentation Framework — Procida, 2021 + +## Citation + +Procida, D. (2021). *Diátaxis: A systematic approach to technical documentation authoring*. https://diataxis.fr/ + +## Source Type + +Specification + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Technical documentation has four distinct modes organized along two axes (action vs cognition, acquisition vs application) - mixing modes in single document produces confusion. + +## Core Findings + +1. **Four Documentation Types**: Tutorial (learning-oriented), How-to guide (task-oriented), Reference (information-oriented), Explanation (understanding-oriented) +2. **Two-Axis Framework**: Action vs Cognition crossed with Acquisition vs Application creates systematic quadrant organization +3. **Mode Separation**: Each quadrant demands distinct writing approach - combining forces mental state switching in readers +4. **Practical Application**: Framework adopted successfully in hundreds of documentation projects including Gatsby, Cloudflare, Vonage +5. **Quality Principle**: Provides active principle for maintainers to think effectively about documentation work + +## Mechanism + +Two axes create systematic quadrant: Tutorials (learning-oriented, action + acquisition), How-to guides (task-oriented, action + application), Reference (information-oriented, cognition + application), Explanation (understanding-oriented, cognition + acquisition). Each quadrant requires different content approach, style, and architecture. + +## Relevance + +Essential for technical writing, documentation architecture, developer experience. Applied in software documentation, API guides, educational content. Foundational for organizing complex technical information systems and improving user documentation experience. + +## Related Research + +Created by Daniele Procida. Name from Ancient Greek διάταξις (diataxis): "dia" (across) + "taxis" (arrangement). Adopted by major tech companies and open-source projects. Addresses content (what to write), style (how to write), architecture (how to organize) problems in technical documentation. diff --git a/docs/research/information-science/domain-modeling/brandolini_2012.md b/docs/research/information-science/domain-modeling/brandolini_2012.md new file mode 100644 index 0000000..a3595d0 --- /dev/null +++ b/docs/research/information-science/domain-modeling/brandolini_2012.md @@ -0,0 +1,45 @@ +# Event Storming — Brandolini, 2012 + +## Citation + +Brandolini, A. (2012–present). *Event Storming*. eventstorming.com. Originally developed as "Event-based modelling" circa 2012; refined and published through workshops and online resources. + +## Source Type + +Practitioner Book + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Start with what the business cares about (events that happened) rather than data structures or process flows to discover domain boundaries collaboratively. + +## Core Findings + +1. **Event-First Approach**: Begin with domain events (past-tense, business-relevant verbs) placed on timeline +2. **Collaborative Discovery**: Visual, tactile format (sticky notes) lowers barrier for non-technical stakeholder participation +3. **Natural Boundary Detection**: Grouping events and commands surfaces bounded context boundaries where terms change meaning +4. **Multiple Flavors**: Improve existing business, envision startup ecosystem, explore new services, design critical software +5. **Temporal Dependencies**: Placing events on timeline reveals causal chains and business flow patterns + +## Mechanism + +EventStorming works by starting with business-relevant events rather than technical structures. Participants naturally discover temporal dependencies and causal chains by placing events chronologically. Commands reveal intent; aggregates reveal consistency boundaries. The visual format enables cross-discipline conversation between stakeholders with different backgrounds, delivering collaboration beyond silo boundaries. + +## Relevance + +Essential for Domain-Driven Design, collaborative domain modeling, microservices architecture design. Applied in startup ecosystem exploration, business process improvement, software design workshops. Fundamental for breaking down silos between business and technical teams in complex domain discovery. + +## Related Research + +Connects to (Evans, 2003) on Domain-Driven Design principles, (Vernon, 2013) on implementing DDD. Part of broader collaborative modeling approaches alongside Design Thinking and Lean Startup methodologies. Related to workshop facilitation techniques and business process modeling frameworks. \ No newline at end of file diff --git a/docs/research/information-science/domain-modeling/evans_2003.md b/docs/research/information-science/domain-modeling/evans_2003.md new file mode 100644 index 0000000..799ae51 --- /dev/null +++ b/docs/research/information-science/domain-modeling/evans_2003.md @@ -0,0 +1,45 @@ +# Domain-Driven Design — Evans, 2003 + +## Citation + +Evans, E. (2003). *Domain-Driven Design: Tackling Complexity in the Heart of Software*. Addison-Wesley. + +## Source Type + +Practitioner Book + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Complex software must be built around shared domain model - a ubiquitous language used by both domain experts and developers in conversation, code, and documentation. + +## Core Findings + +1. **Ubiquitous Language**: Single terminology shared between domain experts and developers eliminates translation costs and catches misunderstandings early +2. **Bounded Contexts**: Define where terms have single meaning, preventing incoherent unified models when terms mean different things in different subdomains +3. **Aggregates**: Define transactional consistency boundaries - all invariants within aggregate must hold after each operation +4. **Context Mapping Patterns**: Upstream/Downstream, Anti-corruption Layer, Conformist, Open-host Service define how separate bounded contexts interact +5. **Strategic vs. Tactical Design**: Strategic focuses on bounded contexts and context mapping; tactical focuses on entities, value objects, services + +## Mechanism + +Ubiquitous language eliminates translation costs between domain experts and developers. When "Order" means same thing in conversation and code, misunderstandings are caught early. Bounded contexts prevent alternative unified model becoming incoherent. Aggregates enforce transactional consistency boundaries with operations spanning aggregates accepting eventual consistency. + +## Relevance + +Essential for complex software architecture, microservices design, team organization, domain modeling. Applied in enterprise software development, distributed systems architecture. Foundational for strategic system design aligning technical implementation with business domains and expert knowledge. + +## Related Research + +Connects to (Brandolini, 2012) on Event Storming for domain discovery, (Vernon, 2013) on DDD implementation. Part of broader software architecture approaches alongside microservices, CQRS, event sourcing. Related to Conway's Law and team topologies for organizational design. \ No newline at end of file diff --git a/docs/research/information-science/domain-modeling/vernon_2013.md b/docs/research/information-science/domain-modeling/vernon_2013.md new file mode 100644 index 0000000..72d60a8 --- /dev/null +++ b/docs/research/information-science/domain-modeling/vernon_2013.md @@ -0,0 +1,45 @@ +# Implementing Domain-Driven Design — Vernon, 2013 + +## Citation + +Vernon, V. (2013). *Implementing Domain-Driven Design*. Addison-Wesley. + +## Source Type + +Practitioner Book + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Context mapping provides nine inter-context relationship patterns describing how bounded contexts relate to each other, preventing model pollution and reducing integration friction. + +## Core Findings + +1. **Nine Context Mapping Patterns**: Shared Kernel, Customer-Supplier, Conformist, Anticorruption Layer, Separate Ways, Open Host Service, Published Language, Big Ball of Mud, Partnership +2. **Relationship Trade-offs**: Each pattern carries specific coordination costs and risk implications requiring careful selection +3. **Pattern Selection Guidance**: Use ACL when downstream has limited influence; Customer-Supplier when teams can negotiate; Open Host Service for many standardized consumers +4. **Social Contract Explicit**: Context maps make team relationships, obligations, and constraints transparent +5. **Practical DDD Implementation**: Extends Evans' foundational work with concrete implementation patterns and guidance + +## Mechanism + +Context mapping makes social and technical contracts between teams explicit. Customer-Supplier demands upstream awareness; Conformist accepts upstream dominance; Anticorruption Layer isolates from model drift. Named relationships clarify obligations and constraints, preventing accidental coupling and model contamination. + +## Relevance + +Essential for microservices architecture, distributed systems design, team organization. Applied in bounded context definition, API design, organizational patterns. Critical for implementing DDD at scale in complex enterprise environments with multiple development teams. + +## Related Research + +Vaughn Vernon builds on (Evans, 2003) foundational DDD work. Author of "Reactive Messaging Patterns with the Actor Model" (2015), "Domain-Driven Design Distilled" (2016). Leading DDD practitioner and educator providing concrete implementation guidance for Evans' theoretical framework. diff --git a/docs/research/psychology/cognitive/craik_lockhart_1972.md b/docs/research/psychology/cognitive/craik_lockhart_1972.md new file mode 100644 index 0000000..682be6a --- /dev/null +++ b/docs/research/psychology/cognitive/craik_lockhart_1972.md @@ -0,0 +1,48 @@ +# Levels of Processing — Craik & Lockhart, 1972 + +## Citation + +Craik, F. I. M., & Lockhart, R. S. (1972). "Levels of processing: A framework for memory research." *Journal of Verbal Learning and Verbal Behavior*, 11(6), 671–684. https://doi.org/10.1016/S0022-5371(72)80001-X + +## Source Type + +Academic Paper + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Deeper processing—explaining *why* a rule matters—leads to better retention and application than shallow processing. Memory is a byproduct of the depth of cognitive processing, not repetition. + +## Core Findings + +1. **Three levels of processing**: Structural/visual (appearance), phonemic (sound), and semantic (meaning) — with deeper levels producing stronger memory traces. +2. **Semantic processing superiority**: Information processed for meaning creates more durable memory traces than information processed for appearance or sound. +3. **Elaborative encoding advantage**: Adding semantic processing requirements (explaining rationale) improves both immediate compliance and long-term adherence. +4. **Contradiction of multi-store model**: Challenges Atkinson-Shiffrin model by showing that rehearsal type matters more than rehearsal amount. +5. **Self-reference effect enhancement**: Information relating to oneself receives the deepest processing and highest recall. +6. **Neural correlates**: Brain imaging shows increased left prefrontal cortex activity during semantic vs. shallow processing tasks. + +## Mechanism + +The "levels of processing" framework shows that semantic processing (meaning-based) creates stronger memory traces than phonetic (sound-based) or visual (appearance-based) processing. Forcing reviewers to explain why rules matter engages semantic processing, creating widespread activation in semantic networks through meaningful connections to existing knowledge. + +## Relevance + +Enforcement tables with "Why it matters" columns force elaborative encoding. Instead of superficial rule checking, reviewers must process the underlying rationale, leading to better internalization of design principles. Essential for any learning system requiring deep understanding rather than rote compliance. + +## Related Research + +- (Craik & Tulving, 1975) — Empirical validation showing semantic encoding superiority in recall tasks +- (Hyde & Jenkins, 1973) — Orienting tasks and incidental learning effects +- (Lockhart & Craik, 1990) — Retrospective commentary on levels of processing framework \ No newline at end of file diff --git a/docs/research/psychology/cognitive/fisher_geiselman_1987.md b/docs/research/psychology/cognitive/fisher_geiselman_1987.md new file mode 100644 index 0000000..a0eb6a9 --- /dev/null +++ b/docs/research/psychology/cognitive/fisher_geiselman_1987.md @@ -0,0 +1,47 @@ +# The Enhanced Cognitive Interview — Fisher & Geiselman, 1987 + +## Citation + +Fisher, R. P., & Geiselman, R. E. (1987). "Enhancing enhanced eyewitness memory: Refining the cognitive interview." *Journal of Police Science and Administration*, 15, 291-297. (Enhanced version of original 1984 cognitive interview) + +## Source Type + +Academic Paper + +## Method + +Experiment + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +The enhanced Cognitive Interview elicits approximately 35% more correct information than standard interviews with equal accuracy rates. + +## Core Findings + +1. **Four retrieval mnemonics**: (1) Mental reinstatement of context, (2) Report everything, (3) Temporal reversal, (4) Perspective change - each opens different memory access routes. +2. **Enhanced version improvements**: 45% increase in correct information over original CI through better interview structure and social dynamics. +3. **Equal accuracy rates**: 85% accuracy for CI vs 82% for standard interviews - more information without sacrificing reliability. +4. **Encoding specificity leverage**: Context reinstatement increases memory availability by overlapping retrieval cues with original encoding conditions. +5. **Multi-component memory access**: Different retrieval routes surface information that direct questions cannot access. +6. **Field effectiveness**: Real police officers trained in CI gather significantly more accurate information from actual crime witnesses. + +## Mechanism + +Four retrieval mnemonics open different memory access routes, collectively surfacing what direct questions cannot reach. Mental reinstatement leverages encoding specificity principle - memories encoded with environmental/emotional context become accessible when similar context is recreated. "Report everything" and perspective changes tap the multi-component view of memory, accessing different aspects of the complex memory trace through alternative retrieval pathways. + +## Relevance + +Foundational technique for investigative interviewing, user research, and requirements gathering. Widely adopted by police departments, private investigators, and attorneys. The principles apply to any situation requiring complete information extraction: incident analysis, post-mortem reviews, and stakeholder interviews where comprehensive recall is essential. + +## Related Research + +- (Tulving & Thomson, 1973) — Encoding specificity principle underlying context reinstatement +- (Flanagan, 1954) — Critical incident technique as complementary approach to incident-based recall diff --git a/docs/research/psychology/cognitive/flanagan_1954.md b/docs/research/psychology/cognitive/flanagan_1954.md new file mode 100644 index 0000000..2ebd13f --- /dev/null +++ b/docs/research/psychology/cognitive/flanagan_1954.md @@ -0,0 +1,47 @@ +# The Critical Incident Technique — Flanagan, 1954 + +## Citation + +Flanagan, J. C. (1954). "The critical incident technique." *Psychological Bulletin*, 51(4), 327–357. https://doi.org/10.1037/h0061470 + +## Source Type + +Academic Paper + +## Method + +Observational + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Anchoring an interview on a specific past incident breaks schema-based recall, revealing real workarounds, edge cases, and failure modes that never surface when asked "how does this usually work?" + +## Core Findings + +1. **Direct observations of critical behavior**: CIT focuses on specific incidents where the purpose and consequences are clear, avoiding generalized schema-based responses. +2. **Schema bypass mechanism**: Direct questions elicit sanitized mental models of "how things should work," while incidents access episodic memory anchored to specific sensory and emotional details. +3. **Flexible data collection**: Method allows participants to describe experiences in their own words without forcing them into predetermined frameworks. +4. **Rare event identification**: CIT captures uncommon but significant events that routine methods miss by focusing only on everyday patterns. +5. **Practical problem solving**: Originally developed for Aviation Psychology Program during WWII to identify pilot errors and improve training/systems design. +6. **Wide applicability**: Successfully used in healthcare, organizational development, market research, and information-seeking behavior studies. + +## Mechanism + +Direct questions elicit the stakeholder's mental schema — a sanitized, gap-free description of how things should work. Critical incidents bypass the schema because episodic memory is anchored to specific sensory and emotional detail. The technique requires incidents to be "sufficiently complete" and "critical" (making positive or negative contribution) where purpose and consequences are clear to the observer. + +## Relevance + +Foundational technique for requirements gathering, user research, and system design. Essential for uncovering actual user behaviors, workarounds, and failure modes that structured interviews miss. Widely adopted in UX research, safety analysis, and organizational problem-solving where understanding real-world behavior patterns is crucial. + +## Related Research + +- (Bitner, Booms & Tetreault, 1990) — Service encounter satisfaction research using CIT +- (Klein, 1998) — Recognition-primed decision making and naturalistic observation methods diff --git a/docs/research/psychology/cognitive/gollwitzer_1999.md b/docs/research/psychology/cognitive/gollwitzer_1999.md new file mode 100644 index 0000000..f2f9c32 --- /dev/null +++ b/docs/research/psychology/cognitive/gollwitzer_1999.md @@ -0,0 +1,48 @@ +# Implementation Intentions — Gollwitzer, 1999 + +## Citation + +Gollwitzer, P. M. (1999). "Implementation intentions: Strong effects of simple plans." *American Psychologist*, 54(7), 493-503. https://doi.org/10.1037/0003-066X.54.7.493 + +## Source Type + +Academic Paper + +## Method + +Experiment + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +"If X then Y" plans are 2–3x more likely to execute than general intentions because they create automatic cue-response links in memory. + +## Core Findings + +1. **If-then plan effectiveness**: Implementation intentions result in 2-3x higher goal achievement rates compared to general goal intentions alone. +2. **Strategic automaticity**: If-then plans automate action initiation by creating strong mental links between situational cues and desired behaviors. +3. **Cognitive load reduction**: Pre-planned responses eliminate hesitation and deliberation, freeing cognitive resources for other tasks. +4. **Goal shielding**: Implementation intentions protect ongoing goal pursuit from distracting thoughts, competing goals, and emotional interference. +5. **Broad applicability**: Effective across diverse domains including health behaviors (breast self-examination: 100% vs 53% completion), voting (4.1 percentage point increase), and emotion regulation. +6. **Planning specificity requirement**: Plans must specify when, where, and how the behavior will be performed to achieve maximum effectiveness. + +## Mechanism + +If-then plans create automatic cue-response links in memory. The brain processes "if function > 20 lines then extract helper" as an action trigger, not a suggestion to consider. The anticipated situation becomes highly activated in memory, leading to immediate, efficient action initiation without conscious intent when the cue is encountered. + +## Relevance + +Foundational for automated behavioral interventions, habit formation systems, and decision-making tools. Essential for any system requiring reliable execution of intended behaviors — from code review processes to health interventions. The specificity requirement directly applies to creating effective automation rules and behavioral prompts. + +## Related Research + +- (Gollwitzer & Brandstätter, 1997) — Original empirical validation of implementation intentions +- (Rogers et al., 2015) — Planning prompts and follow-through effectiveness +- (Achtziger, Gollwitzer & Sheeran, 2008) — Goal shielding mechanisms diff --git a/docs/research/psychology/cognitive/hattie_timperley_2007.md b/docs/research/psychology/cognitive/hattie_timperley_2007.md new file mode 100644 index 0000000..df053a6 --- /dev/null +++ b/docs/research/psychology/cognitive/hattie_timperley_2007.md @@ -0,0 +1,48 @@ +# The Power of Feedback — Hattie & Timperley, 2007 + +## Citation + +Hattie, J., & Timperley, H. (2007). "The power of feedback." *Review of Educational Research*, 77(1), 81–112. https://doi.org/10.3102/003465430298487 + +## Source Type + +Academic Paper + +## Method + +Meta-analysis + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Feedback is most effective when it tells the agent exactly what went wrong and what the correct action is. "FAIL: function > 20 lines at file:47" is actionable; "Apply function length rules" is not. + +## Core Findings + +1. **Feedback power**: Among the most powerful influences on learning and achievement, but impact can be positive or negative depending on type and delivery. +2. **Specific feedback superiority**: Task-specific feedback that identifies exact errors and correct actions is significantly more effective than general comments. +3. **Four feedback levels**: Task level (correctness), process level (strategies), self-regulation level (monitoring), and self level (personal praise) — with task and process being most effective. +4. **Three key questions framework**: "Where am I going?" (goals), "How am I going?" (progress), "Where to next?" (improvement strategies). +5. **Timing effects**: Immediate feedback works best for procedural tasks, delayed feedback for complex learning requiring reflection. +6. **Cognitive load management**: Effective feedback reduces rather than increases cognitive burden by providing clear direction. + +## Mechanism + +Specific feedback creates a direct mapping between error and correction, reducing cognitive load by eliminating interpretation steps. Vague feedback requires the recipient to infer what went wrong, which introduces interpretation errors and reduces action likelihood. The model emphasizes reducing gaps between current performance and goals through precise, actionable information. + +## Relevance + +Foundational for automated feedback systems, code review processes, and instructional design. Critical for any system providing performance feedback — from linting tools to learning management systems. The specificity principle directly applies to error messaging, validation feedback, and progress indicators in software interfaces. + +## Related Research + +- (Black & Wiliam, 1998) — Formative assessment and feedback loops in learning +- (Kulhavy & Stock, 1989) — Feedback timing and learning effectiveness +- (Kluger & DeNisi, 1996) — Feedback intervention theory diff --git a/docs/research/psychology/cognitive/kahneman_2011.md b/docs/research/psychology/cognitive/kahneman_2011.md new file mode 100644 index 0000000..616c09f --- /dev/null +++ b/docs/research/psychology/cognitive/kahneman_2011.md @@ -0,0 +1,47 @@ +# Thinking, Fast and Slow — Kahneman, 2011 + +## Citation + +Kahneman, D. (2011). *Thinking, Fast and Slow*. Farrar, Straus and Giroux. ISBN 978-0-374-27563-1. [Bestseller with millions of copies sold] + +## Source Type + +Practitioner Book + +## Method + +Synthesis + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Human thinking operates through two distinct systems: System 1 (fast, automatic, intuitive) and System 2 (slow, deliberate, effortful), with System 1 dominating most decisions despite being vulnerable to systematic biases. + +## Core Findings + +1. **Dual-process theory**: System 1 operates automatically and quickly with little conscious effort; System 2 requires attention and operates more slowly and deliberately. +2. **System 1 dominance**: Most of our thinking and decision-making is handled by System 1, while System 2 often endorses impressions and intuitions generated by System 1. +3. **Cognitive biases**: System 1 is susceptible to predictable errors including anchoring bias, availability heuristic, confirmation bias, and overconfidence. +4. **Cognitive ease**: When information is processed fluently by System 1, we experience cognitive ease, leading to increased belief and positive affect. +5. **Loss aversion**: People feel losses more intensely than equivalent gains, leading to irrational decision-making patterns. +6. **Prospect theory**: People evaluate outcomes relative to reference points rather than in absolute terms, and overweight small probabilities. + +## Mechanism + +System 2 must be deliberately activated before System 1's automatic judgments become anchored. This requires effortful cognitive work that most people avoid due to mental laziness. Running deliberate analytical processes (like systematic review checklists) before allowing intuitive responses prevents System 1's fast impressions from contaminating careful evaluation. + +## Relevance + +Foundational for understanding human decision-making biases in design, product management, user research, and team processes. Essential for creating systems that account for predictable human cognitive limitations. Critical for designing decision-support tools and review processes. + +## Related Research + +- (Tversky & Kahneman, 1974) — Original heuristics and biases research +- (Stanovich & West, 2000) — Dual-process theory development diff --git a/docs/research/psychology/cognitive/klein_1998.md b/docs/research/psychology/cognitive/klein_1998.md new file mode 100644 index 0000000..150da87 --- /dev/null +++ b/docs/research/psychology/cognitive/klein_1998.md @@ -0,0 +1,47 @@ +# Sources of Power / PreMortem — Klein, 1998/2007 + +## Citation + +Klein, G. (1998). *Sources of Power: How People Make Decisions*. MIT Press. ISBN 0-262-61146-5. PreMortem method described in Harvard Business Review (2007) and further developed as risk assessment technique. + +## Source Type + +Practitioner Book + +## Method + +Observational + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Prospective hindsight ("imagine this failed — why?") catches 30% more potential issues than forward-looking review because the brain is better at explaining past events than predicting future ones. + +## Core Findings + +1. **Recognition-primed decision (RPD) model**: Experts don't compare multiple options; they rapidly recognize situations and implement the first viable solution that comes to mind. +2. **PreMortem technique**: By framing failure as having already occurred, teams activate explanation mode rather than prediction mode, uncovering more failure scenarios. +3. **Naturalistic decision making**: Laboratory models cannot adequately describe decision-making under time pressure and uncertainty in real-world settings. +4. **Expertise development**: Experts rely on pattern recognition and mental simulation rather than analytical comparison of alternatives. +5. **Intuition validation**: Expert intuition is based on rapid pattern recognition from extensive domain experience, not mystical insight. +6. **Time pressure effects**: Under pressure, people satisfice (find first adequate solution) rather than optimize (find best possible solution). + +## Mechanism + +PreMortem shifts cognitive frame from prediction (weak) to explanation (strong). By asking "imagine this already failed — why?" the technique activates the brain's superior ability to generate causal explanations for past events. This reveals failure modes that forward-looking analysis ("what could go wrong?") typically misses because prediction requires cognitive resources that explanation does not. + +## Relevance + +Essential methodology for project risk assessment, decision-making improvement, and team planning processes. Widely adopted in software development, military planning, and organizational risk management. Critical for any high-stakes decision where failure analysis is valuable. + +## Related Research + +- (Kahneman & Klein, 2009) — Conditions for intuitive expertise +- (Mitchell et al., 1989) — Prospective hindsight effectiveness studies diff --git a/docs/research/psychology/cognitive/mcdaniel_einstein_2000.md b/docs/research/psychology/cognitive/mcdaniel_einstein_2000.md new file mode 100644 index 0000000..71c3668 --- /dev/null +++ b/docs/research/psychology/cognitive/mcdaniel_einstein_2000.md @@ -0,0 +1,47 @@ +# Strategic and Automatic Processes in Prospective Memory — McDaniel & Einstein, 2000 + +## Citation + +McDaniel, M. A., & Einstein, G. O. (2000). "Strategic and automatic processes in prospective memory retrieval." *Applied Cognitive Psychology*, 14(7), S127–S144. https://doi.org/10.1002/acp.775 + +## Source Type + +Academic Paper + +## Method + +Experiment + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Memory for intended actions is better when cues are embedded at the point of action, not in a separate appendix or reference document. + +## Core Findings + +1. **Embedded cue effectiveness**: Prospective memory performance significantly improves when contextual cues appear exactly where the intended action should occur. +2. **Multi-process model**: Prospective memory can operate through both strategic monitoring and automatic retrieval, depending on cue-action associations. +3. **Event-based vs. time-based differences**: Event-based prospective memory (triggered by external cues) consistently outperforms time-based (triggered by internal monitoring). +4. **Strategic automaticity**: Strong cue-action associations can trigger retrieval automatically, reducing cognitive load on working memory. +5. **Point-of-decision placement**: Inline reminders and checks are more effective than separate reference materials or appendices. +6. **Immediate-execute vs. delayed-execute**: Performance deteriorates when delays or interruptions occur between cue perception and intended action. + +## Mechanism + +Placing if-then gates inline rather than in a separate reference document increases adherence because the cue appears exactly when the developer is about to make the relevant decision. Embedded cues leverage both the encoding specificity principle (context overlap) and reduce the cognitive load of having to remember to check separate reference materials. Strong cue-action associations can trigger automatic retrieval without conscious monitoring. + +## Relevance + +Critical for interface design, process documentation, and workflow systems. Applies to code review checklists, safety procedures, quality gates, and any system requiring reliable execution of intended actions. Essential for designing effective reminders, notifications, and decision-support systems where timing and context are crucial. + +## Related Research + +- (Gollwitzer, 1999) — Implementation intentions creating automatic cue-response links +- (Miller, 1956) — Working memory limitations affecting monitoring-based prospective memory diff --git a/docs/research/psychology/cognitive/miller_1956.md b/docs/research/psychology/cognitive/miller_1956.md new file mode 100644 index 0000000..53c1cad --- /dev/null +++ b/docs/research/psychology/cognitive/miller_1956.md @@ -0,0 +1,48 @@ +# The Magical Number Seven, Plus or Minus Two — Miller, 1956 + +## Citation + +Miller, G. A. (1956). "The magical number seven, plus or minus two: Some limits on our capacity for processing information." *Psychological Review*, 63(2), 81–97. https://doi.org/10.1037/h0043158 + +## Source Type + +Academic Paper + +## Method + +Experiment + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Working memory is limited to 7±2 items. Chunking related information into structured patterns allows processing of larger information sets by treating chunks as single items. + +## Core Findings + +1. **Working memory capacity**: Humans can hold approximately 7±2 discrete items in short-term memory before performance degrades. +2. **Chunking mechanism**: Related items can be grouped into meaningful units ("chunks"), allowing more information to be retained by treating each chunk as a single item. +3. **Information channel capacity**: One-dimensional absolute judgment is limited to approximately 2-3 bits of information (4-8 alternatives). +4. **Coincidence observation**: Miller noted the correspondence between judgment limits and memory span was only coincidental, not indicative of a deeper principle. +5. **Structured presentation advantage**: Tables and organized formats reduce cognitive load compared to narrative text by enabling parallel processing. +6. **Recognition vs. capacity**: The limit applies to simultaneous retention, not recognition or learned associations. + +## Mechanism + +Rather than processing each item sequentially, structured presentations allow the reviewer to scan patterns and identify exceptions. Tables with consistent column structure create predictable information architecture that reduces cognitive load. Chunking leverages existing knowledge to group related information into higher-order units, effectively multiplying working memory capacity by the complexity of each chunk. + +## Relevance + +Foundational principle for interface design, information architecture, and cognitive load management. Enforcement tables in verification tasks leverage chunking principles. Instead of prose checklists requiring sequential processing, structured tables allow reviewers to process multiple items in parallel while maintaining consistency. Essential for any system requiring human information processing. + +## Related Research + +- (Sweller, 1988) — Cognitive load theory and instructional design +- (Cowan, 2001) — Revised capacity estimate of ~4 chunks rather than 7±2 +- (Baddeley, 1992) — Working memory model with multiple components \ No newline at end of file diff --git a/docs/research/psychology/cognitive/reynolds_gutman_1988.md b/docs/research/psychology/cognitive/reynolds_gutman_1988.md new file mode 100644 index 0000000..1b85d7e --- /dev/null +++ b/docs/research/psychology/cognitive/reynolds_gutman_1988.md @@ -0,0 +1,46 @@ +# Laddering Theory / Means-End Chain — Reynolds & Gutman, 1988 + +## Citation + +Reynolds, T. J., & Gutman, J. (1988). "Laddering theory, method, analysis, and interpretation." *Journal of Advertising Research*, 28(1), 11–31. DOI: 10.1080/00218499.1988.12467766. [Highly cited: 3,779+ citations] + +## Source Type + +Academic Paper + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +The laddering technique reveals that the stakeholder's first answer about what they want is rarely the real constraint — repeatedly asking "Why is that important to you?" climbs from surface attributes to deeper motivational values. + +## Core Findings + +1. **Means-end chain structure**: Concrete attribute → functional consequence → psychosocial consequence → terminal value represents four levels of consumer motivation. +2. **Progressive revelation**: Stakeholders typically start with concrete attributes but the real decision drivers lie at the consequence and value levels. +3. **Value conflict identification**: Stakeholders whose surface requirements look identical often have ladders that diverge at the consequence level, revealing hidden conflicts. +4. **Interview methodology**: Systematic probing with "Why is that important?" uncovers deeper motivational structures that traditional surveys miss. +5. **Advertising applications**: Understanding means-end chains enables more effective positioning by connecting product features to personal values. + +## Mechanism + +The laddering interview technique systematically probes upward through levels of abstraction using "Why is that important to you?" prompts. Each level reveals different types of motivation: attributes (what the product has), functional consequences (what it does), psychosocial consequences (how it makes you feel/appear), and values (what life goals it serves). This climbing process reveals the complete motivational pathway. + +## Relevance + +Essential technique for requirements gathering, user research, and stakeholder analysis. Helps product managers, UX researchers, and business analysts uncover the true drivers behind stated requirements. Critical for avoiding surface-level solutions that miss deeper user needs and organizational goals. + +## Related Research + +- (Rokeach, 1973) — Values theory underlying the terminal value concept +- (Gutman, 1982) — Original means-end chain model foundation diff --git a/docs/research/psychology/cognitive/tversky_kahneman_1974.md b/docs/research/psychology/cognitive/tversky_kahneman_1974.md new file mode 100644 index 0000000..099369d --- /dev/null +++ b/docs/research/psychology/cognitive/tversky_kahneman_1974.md @@ -0,0 +1,46 @@ +# Judgment Under Uncertainty: Heuristics and Biases — Tversky & Kahneman, 1974 + +## Citation + +Tversky, A., & Kahneman, D. (1974). Judgment under uncertainty: Heuristics and biases. *Science*, 185(4157), 1124-1131. https://doi.org/10.1126/science.185.4157.1124 + +## Source Type + +Academic Paper + +## Method + +Experiment + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +People use mental shortcuts (heuristics) that systematically bias judgment under uncertainty. + +## Core Findings + +1. People rely on three main heuristics when making judgments under uncertainty: representativeness, availability, and anchoring and adjustment. +2. Representativeness heuristic: People judge probability by similarity to mental prototypes, ignoring base rates and sample size. +3. Availability heuristic: People assess probability by how easily examples come to mind, biasing toward memorable or recent events. +4. Anchoring and adjustment: People adjust insufficiently from initial values, even when the anchor is irrelevant. +5. These heuristics are useful but lead to severe and systematic errors in prediction and judgment. + +## Mechanism + +Heuristics serve as cognitive shortcuts that reduce complex probability assessments to simpler judgmental operations. However, they rely on selective accessibility of information rather than comprehensive analysis. Anchoring activates associative networks around initial values, making adjustment insufficient. Availability conflates memorability with frequency. Representativeness ignores statistical principles in favor of similarity matching. + +## Relevance + +Foundational for understanding cognitive biases in decision-making processes. Critical for designing systems that account for human judgment limitations. Directly applicable to forecast accuracy, risk assessment, and quality control processes where human judgment is involved. + +## Related Research + +- (Kahneman, 2011) — System 1 vs System 2 thinking framework +- (Gilovich, Griffin & Kahneman, 2002) — Heuristics and biases comprehensive review diff --git a/docs/research/psychology/social/cialdini_2001.md b/docs/research/psychology/social/cialdini_2001.md new file mode 100644 index 0000000..812e9f6 --- /dev/null +++ b/docs/research/psychology/social/cialdini_2001.md @@ -0,0 +1,47 @@ +# Influence: The Psychology of Persuasion — Cialdini, 1984/2001 + +## Citation + +Cialdini, R. B. (1984). *Influence: The Psychology of Persuasion*. William Morrow and Company. Revised edition (2001). HarperBusiness. ISBN 0-688-12816-5. + +## Source Type + +Practitioner Book + +## Method + +Observational + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Influence operates through six universal principles of persuasion that create automatic compliance responses in human behavior. + +## Core Findings + +1. **Six principles of influence**: Reciprocity, commitment and consistency, social proof, authority, liking, and scarcity create predictable persuasion outcomes. +2. **Commitment and consistency**: People feel compelled to remain consistent with prior commitments, especially when made publicly or in writing. +3. **Micro-commitments**: Small initial commitments (like checking a box or filling in a form) create resistance to reversals and increase likelihood of larger compliance. +4. **Automatic responses**: These principles trigger "click-whirr" automatic responses where people comply without conscious deliberation. +5. **Research methodology**: Based on three years of undercover participant observation at car dealerships, fundraising organizations, and telemarketing firms. +6. **Unity principle** (added 2016): People are more influenced by those they identify with or consider part of their in-group. + +## Mechanism + +Commitment devices work by engaging the consistency principle — once someone has made an explicit commitment (especially written or public), psychological pressure to remain consistent makes reversal psychologically costly. Structured tables with PASS/FAIL cells create micro-commitments where marking "FAIL" requires explicit justification, making silent passes feel inconsistent. + +## Relevance + +Foundational framework for understanding persuasion in business, marketing, negotiation, and social influence contexts. Essential for both applying ethical influence techniques and defending against manipulation. Widely used in UX design, sales processes, and behavioral change interventions. + +## Related Research + +- (Kiesler, 1971) — Psychology of commitment and consistency +- (Festinger, 1957) — Cognitive dissonance theory underlying consistency principle diff --git a/docs/research/psychology/social/mellers_et_al_2001.md b/docs/research/psychology/social/mellers_et_al_2001.md new file mode 100644 index 0000000..8a5c980 --- /dev/null +++ b/docs/research/psychology/social/mellers_et_al_2001.md @@ -0,0 +1,46 @@ +# Adversarial Collaboration — Kahneman & Various, Multiple Studies + +## Citation + +Various studies on adversarial collaboration including: Kahneman, D. & Klein, G. (2009). "Conditions for intuitive expertise: A failure to disagree." *American Psychologist*, 64(6), 515-526. Clark, C. J., Costello, T., Mitchell, G., & Tetlock, P. E. (2022). "Keep your enemies close: Adversarial collaborations will improve behavioral science." *Journal of Applied Research in Memory and Cognition*, 11(1), 1-18. + +## Source Type + +Academic Paper + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Adversarial collaboration produces highest-quality thinking when parties with opposing hypotheses work together to find flaws in each other's reasoning and jointly design experiments. + +## Core Findings + +1. **Collaborative antagonism**: Scientists with competing hypotheses can co-design experiments that satisfy both groups regarding bias and experimental weaknesses. +2. **Quality improvement**: Adversarial collaboration reduces cognitive-motivational biases and improves scientific reasoning quality. +3. **Feasibility paradox**: "Most feasible when least needed" — works best when camps have testable theories and common methodological standards, but is hardest to implement when most needed (when communities lack falsification criteria). +4. **Neutral moderation**: Often requires neutral third-party moderators to facilitate the collaboration process. +5. **Open science framework**: Emphasizes transparency throughout research process, fitting within broader open science movement. + +## Mechanism + +Explicitly framing reviewers as "your job is to break this feature" activates adversarial collaboration mode. Reviewers seek disconfirmation rather than confirmation. Joint experimental design forces both sides to agree on methodology, reducing bias. Co-publication of results ensures accountability and prevents selective interpretation. + +## Relevance + +Powerful methodology for resolving scientific disputes, improving research quality, and reducing confirmation bias in academic work. Applicable to peer review, experimental design, hypothesis testing, and conflict resolution in research communities. Essential for advancing contentious scientific questions. + +## Related Research + +- (Tetlock & Mitchell, 2009) — Implicit bias and accountability systems +- (Latham, Erez & Locke, 1988) — Early example of adversarial collaboration in goal-setting research diff --git a/docs/research/psychology/social/rogers_farson_1957.md b/docs/research/psychology/social/rogers_farson_1957.md new file mode 100644 index 0000000..2c53bd3 --- /dev/null +++ b/docs/research/psychology/social/rogers_farson_1957.md @@ -0,0 +1,46 @@ +# Active Listening — Rogers & Farson, 1957 + +## Citation + +Rogers, C. R., & Farson, R. E. (1957). "Active Listening." Industrial Relations Center, University of Chicago. Reprinted in Newman, R. G., Danziger, M. A., & Cohen, M. (1987). *Communicating in Business Today*. D.C. Heath & Company. + +## Source Type + +Academic Paper + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Active listening is a transformative communication technique that creates non-threatening, empathic environments where people feel heard and understood, promoting personal and relational growth. + +## Core Findings + +1. **Three-step process**: Paraphrasing what was heard, asking clarifying questions, then summarizing main points and intent reduces misunderstandings and builds trust. +2. **Therapeutic foundation**: Rooted in Rogers' three facilitative conditions for effective counseling: empathy, genuineness, and unconditional positive regard. +3. **Transformative power**: "Sensitive listening is a most effective agent for individual personality change and group development." +4. **Behavioral outcomes**: People who have been listened to become "more emotionally mature, more open to their experiences, less defensive, more democratic, and less authoritarian." +5. **Active vs. passive**: Despite popular notion that listening is passive, clinical and research evidence shows it actively brings about changes in people's attitudes and values. + +## Mechanism + +Paraphrasing forces the listener to reconstruct the speaker's meaning, immediately surfacing gaps in understanding. Clarifying questions address residual ambiguity. Summarizing creates a shared record that both parties can confirm or correct. This process establishes empathic connection and creates psychological safety for the speaker to be vulnerable and authentic. + +## Relevance + +Foundational communication technique applicable across therapeutic, business, educational, and interpersonal contexts. Essential skill for leaders, counselors, managers, and anyone seeking to build trust and understanding in human relationships. Forms the basis for modern conflict resolution and negotiation strategies. + +## Related Research + +- (Gottman, 1999) — Critique of active listening effectiveness in marriage therapy +- (McNaughton et al., 2008) — LAFF strategy development for educational contexts diff --git a/docs/research/psychology/social/tetlock_1985.md b/docs/research/psychology/social/tetlock_1985.md new file mode 100644 index 0000000..6b3baaa --- /dev/null +++ b/docs/research/psychology/social/tetlock_1985.md @@ -0,0 +1,45 @@ +# Accountability: A Social Check on the Fundamental Attribution Error — Tetlock, 1985 + +## Citation + +Tetlock, P. E. (1985). Accountability: A social check on the fundamental attribution error. *Social Psychology Quarterly*, 48(3), 227-236. + +## Source Type + +Academic Paper + +## Method + +Experiment + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Anticipating accountability to an unknown audience improves reasoning quality and reduces attribution errors. + +## Core Findings + +1. Accountability to an unknown audience with unknown views improves reasoning quality compared to no accountability conditions. +2. People anticipating being audited adjust their reasoning to be more careful and systematic. +3. Accountability reduces the fundamental attribution error - the tendency to overemphasize personality-based explanations while underemphasizing situational factors. +4. The effect is stronger when people don't know what position they'll be expected to defend. + +## Mechanism + +When people expect to justify their judgments to others, they engage in more effortful, systematic processing. The anticipation of having to explain one's reasoning to an unknown audience motivates more careful consideration of multiple perspectives and evidence, leading to less biased judgments. + +## Relevance + +Critical for review processes and agent design. Creating accountability structures (like APPROVED/REJECTED with evidence requirements) prompts more careful analysis. Unknown audience accountability is particularly powerful because reviewers can't game their response to please a specific viewpoint. + +## Related Research + +- (Kahneman, 2011) — System 1 vs System 2 thinking and cognitive biases +- (Lerner & Tetlock, 1999) — Comprehensive review of accountability effects on judgment diff --git a/docs/research/software-engineering/architecture/bass_et_al_2021.md b/docs/research/software-engineering/architecture/bass_et_al_2021.md new file mode 100644 index 0000000..7ebd225 --- /dev/null +++ b/docs/research/software-engineering/architecture/bass_et_al_2021.md @@ -0,0 +1,48 @@ +# Software Architecture in Practice — Bass, Clements & Kazman, 2021 + +## Citation + +Bass, L., Clements, P., & Kazman, R. (2021). *Software Architecture in Practice* (4th ed.). Addison-Wesley. ISBN 978-0-13-534613-8. First edition published 1998. + +## Source Type + +Academic Paper + +## Method + +Synthesis + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Quality attributes — not functional requirements — drive architectural decisions. Performance, availability, security, modifiability, reliability, and usability create measurable constraints that determine system structure. + +## Core Findings + +1. **Quality attribute primacy**: Six architecturally significant quality attributes (Performance, Availability, Security, Modifiability, Reliability, Usability) drive structural decisions more than functional requirements. +2. **Architectural tactics catalog**: Each quality attribute produces concrete architectural tactics — Performance tactics include resource arbitration, concurrency, caching; Modifiability tactics include encapsulation, substitution, binding time. +3. **Style-attribute alignment**: Architectural style selection must be justified against quality attribute priorities, not personal preference or technology trends. +4. **Utility tree methodology**: Systematic approach to prioritize quality attributes against business value, producing ranked constraints for architectural decision-making. +5. **Trade-off recognition**: Quality attributes often conflict — optimizing for Performance may harm Modifiability, requiring explicit trade-off decisions. +6. **ATAM integration**: Architecture Tradeoff Analysis Method provides structured evaluation framework for discovering architectural risks early. +7. **Measurable constraints**: Quality attributes work because they create concrete, testable constraints on system structure rather than abstract goals. + +## Mechanism + +Quality attributes work as architectural drivers because they create measurable constraints on system structure. Performance requires specific structural patterns (caching layers, async processing, resource pooling); Modifiability requires different patterns (abstraction layers, dependency inversion, plugin architectures). These constraints are often in tension — optimizing for Performance may harm Modifiability. The utility tree method forces stakeholders to prioritize quality attributes against business value, producing a ranked list that architects use to make trade-off decisions with explicit justification. + +## Relevance + +Foundational methodology for architectural decision-making and system design. Essential for understanding how non-functional requirements translate into concrete structural choices. Critical for architectural evaluation, technology selection, and trade-off analysis. Widely adopted framework used by enterprise architects, system designers, and software engineering teams globally. + +## Related Research + +- (Kazman et al., 2000) — ATAM methodology for architectural trade-off analysis +- (Fowler, 2003) — Architect's role in making significant decisions that are hard to change later diff --git a/docs/research/software-engineering/architecture/boehm_1991.md b/docs/research/software-engineering/architecture/boehm_1991.md new file mode 100644 index 0000000..0c3d574 --- /dev/null +++ b/docs/research/software-engineering/architecture/boehm_1991.md @@ -0,0 +1,47 @@ +# Software Risk Management — Boehm, 1991 + +## Citation + +Boehm, B. W. (1991). "Software Risk Management: Principles and Practices." *IEEE Software*, 8(1), 32–41. https://doi.org/10.1109/52.62930 + +## Source Type + +Academic Paper + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Architecture risk can be systematically assessed using Probability × Impact classification, replacing intuitive risk assessment with an explicit, auditable evaluation framework. + +## Core Findings + +1. **Risk quantification framework**: Each identified risk is rated on two dimensions: Probability (likelihood of materialization) and Impact (severity of consequence), with risks prioritized by their product. +2. **Risk leverage concept**: The ratio of risk reduction to mitigation cost enables teams to focus effort on high-leverage interventions (significant risk reduction for low cost). +3. **Systematic risk identification**: Boehm's 10 top software risk items provide a checklist for proactive risk identification across personnel, requirements, technology, and schedule dimensions. +4. **Risk mitigation strategies**: Three primary approaches - risk avoidance (eliminate risk source), risk monitoring (track risk indicators), and risk contingency planning (prepare response plans). +5. **COCOMO model foundation**: Boehm's cost estimation models (COCOMO/COCOMO II) provide quantitative basis for impact assessment in software projects. +6. **Spiral model integration**: Risk assessment is built into the spiral software development model at each iteration cycle. + +## Mechanism + +Probability × Impact works because it forces decision-makers to externalize and quantify what would otherwise remain gut feelings. Low-probability high-impact risks (e.g., database vendor bankruptcy) are distinguished from high-probability low-impact risks (e.g., minor performance degradation) — both may have the same exposure score but demand different mitigation strategies. The framework also introduces risk leverage: high-leverage mitigations (significant risk reduction for low cost) are prioritized over low-leverage ones (minor risk reduction for high cost). + +## Relevance + +Foundational framework for architectural decision records (ADRs) where each decision carries potential risks requiring explicit evaluation. Essential for project management, system design, and any development context requiring systematic risk assessment. Directly applicable to technology selection, architecture planning, and resource allocation decisions in software engineering. + +## Related Research + +- (Kazman, Klein & Clements, 2000) — ATAM method building on Boehm's risk assessment principles +- (Fowler, 2003) — Architectural decision-making frameworks incorporating risk evaluation diff --git a/docs/research/software-engineering/architecture/brown_2018.md b/docs/research/software-engineering/architecture/brown_2018.md new file mode 100644 index 0000000..45a6e27 --- /dev/null +++ b/docs/research/software-engineering/architecture/brown_2018.md @@ -0,0 +1,48 @@ +# C4 Model — Brown, 2006–2018 + +## Citation + +Brown, S. (2018). *Software Architecture for Developers*, Volume 1. Leanpub. C4 model first described 2006–2011, official site launched 2018. Available at https://c4model.com + +## Source Type + +Practitioner Book + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Four levels of architectural abstraction — Context, Container, Component, Code — provide just enough detail at each audience level without overwhelming any single audience. + +## Core Findings + +1. **Hierarchical abstraction levels**: Context (system in environment), Container (deployable units), Component (modules within containers), Code (classes and functions). +2. **Audience-specific communication**: Each level answers different questions for different audiences — Context for stakeholders, Container for developers/operators, Component for internal structure, Code for detailed design. +3. **Progressive disclosure**: Starting from Context and drilling down prevents premature detail overload. +4. **Notation independence**: C4 works with any diagramming tool or notation — boxes and lines are sufficient. +5. **Tooling independence**: Can be implemented with simple drawing tools, specialized software, or code-based approaches. +6. **Developer-friendly approach**: Focuses on developer mental models rather than formal architectural frameworks. +7. **Supporting diagrams**: System landscape, dynamic, and deployment diagrams complement the core four levels. + +## Mechanism + +The C4 model works because each level answers a different question for a different audience: Context for stakeholders and non-technical team members ("what does the system interact with?"), Container for developers and operators ("what are the deployable units and their tech stacks?"), Component for developers working within a container ("how is this container structured internally?"), and Code for detailed design (rarely needed as a diagram). Starting from Context and drilling down prevents premature detail and ensures the architecture communicates effectively at every level. + +## Relevance + +Essential methodology for software architecture documentation and communication. Widely adopted for system design, technical onboarding, and stakeholder communication. Critical for teams needing to communicate architecture across different technical skill levels and organizational roles. Directly applicable to microservices documentation, system integration planning, and technical decision-making processes. + +## Related Research + +- (Kruchten, 1995) — 4+1 architectural view model that influenced hierarchical approach +- (Fowler, 2003) — "Who Needs an Architect?" discussion on architectural communication \ No newline at end of file diff --git a/docs/research/software-engineering/architecture/cockburn_2005.md b/docs/research/software-engineering/architecture/cockburn_2005.md new file mode 100644 index 0000000..4d64f90 --- /dev/null +++ b/docs/research/software-engineering/architecture/cockburn_2005.md @@ -0,0 +1,47 @@ +# Hexagonal Architecture (Ports & Adapters) — Cockburn, 2005 + +## Citation + +Cockburn, A. (2005). "Hexagonal Architecture." *Alistair Cockburn's blog*. Originally discussed on the Portland Pattern Repository wiki in the early 2000s; formalized as "Ports and Adapters" in 2005. + +## Source Type + +Blog/Article + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Software should be designed so that the domain core has no dependency on any external technology or delivery mechanism. The domain exposes ports (interfaces) that define what it needs; adapters implement those ports for specific technologies. + +## Core Findings + +1. **Dependency inversion principle**: Infrastructure depends on domain abstractions, not the other way around, making the domain testable in isolation and swappable in deployment. +2. **Ports and adapters pattern**: Ports are domain-defined interfaces; adapters are infrastructure implementations that connect external systems to the domain through these ports. +3. **Technology independence**: The same domain logic can be exercised through any delivery mechanism (HTTP, CLI, message queue, test harness) without modification. +4. **Symmetrical architecture**: All external dependencies (databases, UI, external services, test harnesses) are treated equally as adapters, eliminating the traditional "top" and "bottom" of layered architectures. +5. **Framework isolation**: The domain remains independent of frameworks, databases, and UI technologies, enabling easier testing and technology evolution. +6. **Business logic protection**: Core business rules are isolated from infrastructure concerns, making them more maintainable and less brittle to external changes. + +## Mechanism + +By reversing the dependency so that infrastructure depends on domain abstractions (not the other way around), the domain becomes testable in isolation and swappable in deployment. The hexagonal shape represents that there are multiple ways to interact with the application - through different ports - and each port can have multiple adapters. This ensures the domain remains independent of frameworks, databases, and UI, and that the same domain logic can be exercised through any delivery mechanism without modification. + +## Relevance + +Foundational pattern for clean architecture, domain-driven design, and microservices architecture. Essential for creating testable, maintainable systems that can evolve independently of infrastructure concerns. Critical for understanding how to structure applications to achieve technology independence and high testability. Directly applicable to API design, service architecture, and any system requiring multiple integration points. + +## Related Research + +- (Martin, 2017) — Clean Architecture building on Cockburn's dependency inversion principles +- (Fowler, 2003) — Architectural decision-making frameworks that support ports and adapters pattern \ No newline at end of file diff --git a/docs/research/software-engineering/architecture/conway_1968.md b/docs/research/software-engineering/architecture/conway_1968.md new file mode 100644 index 0000000..4b0476e --- /dev/null +++ b/docs/research/software-engineering/architecture/conway_1968.md @@ -0,0 +1,47 @@ +# Conway's Law and Inverse Conway Maneuver — Conway, 1968 + +## Citation + +Conway, M. E. (1968). "How Do Committees Invent?" *Datamation*, 14(4), 28–31. https://www.melconway.com/Home/Committees_Paper.html + +## Source Type + +Academic Paper + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Any organization that designs a system will produce a design whose structure is a copy of the organization's communication structure. This is known as "Conway's Law." + +## Core Findings + +1. **System-organization mirroring**: Organizations are constrained to produce designs that copy their communication structures. +2. **Communication boundaries become system boundaries**: Teams that communicate frequently create tightly coupled systems; teams with minimal communication create loosely coupled systems. +3. **Empirical validation**: MIT and Harvard Business School research found "strong evidence to support the mirroring hypothesis" - loosely-coupled organizations produce significantly more modular products. +4. **Inverse Conway Maneuver**: Deliberately restructuring teams to match desired architecture rather than fighting organizational constraints. +5. **Three strategic responses**: Organizations can ignore (creating friction), accept (aligning architecture with existing structure), or invert (restructuring teams for desired architecture). +6. **Architectural implications**: Microservices require autonomous teams, monoliths work with closely collaborating teams, API boundaries should align with team boundaries. + +## Mechanism + +System boundaries mirror communication boundaries. Teams that communicate frequently create tightly coupled systems. Teams with minimal communication create loosely coupled systems. Organizational design becomes architectural design. The Inverse Conway Maneuver deliberately alters team organization to encourage the desired software architecture—aligning Conway's Law with architectural intent rather than fighting it. + +## Relevance + +Foundational principle for organizational design in software development. Critical for microservices architecture, team topology design, and system boundary definition. Agent role design implements Inverse Conway: the system-architect → software-engineer → system-architect loop creates a closed communication path where SA designs module boundaries, SE builds within them, and SA verifies boundary respect. + +## Related Research + +- (Skelton & Pais, 2019) — Team Topologies and modern application of Conway's Law +- (MacCormack, Rusnak & Baldwin, 2011) — Empirical validation of the mirroring hypothesis \ No newline at end of file diff --git a/docs/research/software-engineering/architecture/fielding_2000.md b/docs/research/software-engineering/architecture/fielding_2000.md new file mode 100644 index 0000000..66f9d49 --- /dev/null +++ b/docs/research/software-engineering/architecture/fielding_2000.md @@ -0,0 +1,47 @@ +# Representational State Transfer (REST) — Fielding, 2000 + +## Citation + +Fielding, R. T. (2000). *Architectural Styles and the Design of Network-based Software Architectures*. Doctoral dissertation, University of California, Irvine. https://www.ics.uci.edu/~fielding/pubs/dissertation/top.htm + +## Source Type + +Academic Paper + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +REST defines six architectural constraints for network-based software that enable scalable, reliable, and maintainable distributed systems by treating network communication as stateless operations on resources identified by URLs. + +## Core Findings + +1. **Six architectural constraints**: Client-Server (separation of concerns), Stateless (each request self-contained), Cacheable (responses declare cacheability), Uniform Interface (standardized resource operations), Layered System (transparent intermediaries), Code-on-Demand (optional client extensions). +2. **Uniform Interface supremacy**: The constraint that differentiates REST from other distributed architectures - resources identified by URIs, manipulated via standard methods (GET, POST, PUT, DELETE), with hypermedia driving application state (HATEOAS). +3. **Statelessness benefits**: Each request contains all needed information, improving reliability (any server can handle any request) and scalability (no server-side session state management). +4. **Cacheability advantages**: Explicit cache control reduces latency and server load while maintaining data consistency. +5. **Layered system flexibility**: Intermediaries (caches, proxies, load balancers) can be inserted without affecting client or server design. +6. **Web architecture alignment**: REST explains why the Web scales - it codifies the architectural principles that made the WWW successful. + +## Mechanism + +REST works because the Uniform Interface constraint reduces coupling between client and server to a minimum: clients only need to understand media types and standard methods, not server implementation details. Statelessness improves reliability and scalability. For API design, REST implies contracts should be expressed as resource shapes (data structure) and media types (data format), not procedure calls. The contract becomes the resource schema and allowed transitions, not method signatures. + +## Relevance + +Foundational architecture for web services, APIs, and distributed systems. Essential for understanding modern web architecture, microservices design, and HTTP-based APIs. Critical for system architects designing scalable, maintainable distributed systems. Directly applicable to API design, web service architecture, and system integration patterns. + +## Related Research + +- (Conway, 1968) — Organizational structure implications for REST service boundaries +- (Fowler, 2014) — Microservices architecture patterns building on REST principles diff --git a/docs/research/software-engineering/architecture/fowler_2003.md b/docs/research/software-engineering/architecture/fowler_2003.md new file mode 100644 index 0000000..18f4474 --- /dev/null +++ b/docs/research/software-engineering/architecture/fowler_2003.md @@ -0,0 +1,47 @@ +# Who Needs an Architect? — Fowler, 2003 + +## Citation + +Fowler, M. (2003). "Who Needs an Architect?" *IEEE Software*, 20(5), 11–13. https://martinfowler.com/ieeeSoftware/whoNeedsArchitect.pdf + +## Source Type + +Academic Paper + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +The architect's job is not to draw diagrams—it is to make **significant decisions** that are hard to change later. The architect is a facilitator who builds consensus around technical direction, not a dictator who issues edicts. + +## Core Findings + +1. **Four architect archetypes**: Architect as Decision-Maker (owns hard-to-change choices), Expert (provides technical depth), Facilitator (brings stakeholders to consensus), and Gatekeeper (enforces standards and reviews compliance). +2. **Programming architects superiority**: The best architects are also programmers who understand implementation constraints firsthand rather than ivory-tower theorists. +3. **Policy vs. detail separation**: The architect owns **policy** (business rules, interfaces, architectural constraints) while developers own **detail** (algorithms, data structures, implementation mechanics). +4. **Significant decisions focus**: Architecture is about making important decisions that affect the system's ability to meet its quality requirements, not about creating comprehensive documentation. +5. **Facilitation over dictation**: Effective architects build consensus and shared understanding rather than issuing top-down mandates. +6. **Hands-on involvement**: Architects must stay involved in implementation to understand real-world constraints and trade-offs. + +## Mechanism + +This separation enables independent evolution of concerns - policy can change without affecting implementation details, and vice versa. The architect focuses on decisions that are expensive to change later (technology choices, integration patterns, quality attribute strategies) while leaving implementation flexibility to developers. Facilitation works better than dictation because it creates buy-in and shared understanding, making architectural decisions more likely to be followed and adapted appropriately as circumstances change. + +## Relevance + +Foundational framework for defining architectural roles and responsibilities in modern software development. The system-architect role combines decision-maker and gatekeeper functions: making architectural decisions (ADRs) and enforcing them through adversarial review. Essential for understanding the balance between architectural guidance and implementation autonomy in agile development environments. + +## Related Research + +- (Martin, 2017) — Clean Architecture principles building on Fowler's policy/detail separation +- (Bass et al., 2021) — Software Architecture in Practice expanding on architectural decision-making frameworks \ No newline at end of file diff --git a/docs/research/software-engineering/architecture/hohpe_woolf_2003.md b/docs/research/software-engineering/architecture/hohpe_woolf_2003.md new file mode 100644 index 0000000..c32ca3f --- /dev/null +++ b/docs/research/software-engineering/architecture/hohpe_woolf_2003.md @@ -0,0 +1,47 @@ +# Enterprise Integration Patterns — Hohpe & Woolf, 2003 + +## Citation + +Hohpe, G., & Woolf, B. (2003). *Enterprise Integration Patterns: Designing, Building, and Deploying Messaging Solutions*. Addison-Wesley. ISBN 978-0-321-20068-6. + +## Source Type + +Practitioner Book + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Asynchronous messaging between systems follows a catalog of 65 integration patterns that solve recurring coupling, reliability, and ordering problems, providing technology-independent solutions to distributed system integration challenges. + +## Core Findings + +1. **Foundational messaging patterns**: Message (data packet), Message Channel (transport), Message Endpoint (producer/consumer), Message Router (content-based routing), Message Translator (schema conversion), and Publish-Subscribe Channel (one-to-many distribution) form the core vocabulary. +2. **Decoupling dimensions**: Integration patterns decouple time (asynchronous delivery), space (location independence), and schema (data model independence) between systems. +3. **Event contract specifications**: Beyond payload schema, event contracts must specify ordering guarantees, delivery semantics (at-most-once, at-least-once, exactly-once), and error handling policies. +4. **Pattern language approach**: 65 patterns organized into categories (messaging systems, channels, construction, routing, transformation, endpoints, system management) providing comprehensive integration vocabulary. +5. **Technology independence**: Patterns apply across messaging technologies (JMS, MSMQ, TIBCO, modern cloud messaging, microservices, serverless architectures). +6. **Industry adoption**: Spurred development of Enterprise Service Bus implementations including Apache Camel, Mule, WSO2, Oracle Service Bus, Open ESB, and modern integration platforms. + +## Mechanism + +Integration patterns work because they decouple time, space, and schema between systems. A Message Channel decouples space (producer and consumer don't need to know each other's location); asynchronous delivery decouples time (producer and consumer don't need to be available simultaneously); a Message Translator decouples schema (each system retains its own data model). The key insight is that event contracts must specify not just the payload schema but also ordering guarantees (per-sender FIFO, causal ordering), delivery semantics, and error handling. Without these, integration points become fragile and hard to reason about. + +## Relevance + +Foundational reference for all distributed system integration, microservices architecture, event-driven systems, and API design. Essential for understanding asynchronous messaging patterns that remain relevant across technology generations from enterprise messaging to modern serverless and cloud-native architectures. Critical for designing robust, loosely-coupled distributed systems. + +## Related Research + +- (Fielding, 2000) — REST architectural style complementing messaging patterns for distributed systems +- (Conway, 1968) — Organizational structures affecting integration architecture design diff --git a/docs/research/software-engineering/architecture/kazman_klein_clements_2000.md b/docs/research/software-engineering/architecture/kazman_klein_clements_2000.md new file mode 100644 index 0000000..a5ed3c0 --- /dev/null +++ b/docs/research/software-engineering/architecture/kazman_klein_clements_2000.md @@ -0,0 +1,47 @@ +# Architecture Tradeoff Analysis Method (ATAM) — Kazman, Klein & Clements, 2000 + +## Citation + +Kazman, R., Klein, M., & Clements, P. (2000). "ATAM: Method for Architecture Evaluation" (CMU/SEI-2000-TR-004). Software Engineering Institute, Carnegie Mellon University. + +## Source Type + +Academic Paper + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Architecture should be evaluated early through structured scenario analysis. ATAM discovers **trade-offs** and **sensitivity points** before implementation begins, when change cost is minimal. + +## Core Findings + +1. **Risk-mitigation roadmap**: ATAM produces a structured assessment of architectural risks rather than a simple pass/fail verdict. +2. **Nine-step process**: Systematic methodology covering stakeholder presentation, business driver analysis, architecture presentation, approach identification, quality attribute tree generation, analysis, scenario brainstorming, re-analysis, and results presentation. +3. **Sensitivity points identification**: Reveals architectural decisions that most significantly affect quality attributes. +4. **Trade-off points analysis**: Identifies decisions affecting multiple quality attributes in opposing ways, highlighting necessary compromises. +5. **Quality attribute focus**: Structures evaluation around specific quality concerns (performance, security, maintainability, etc.) rather than general architectural goodness. +6. **Stakeholder-driven scenarios**: Uses real stakeholder scenarios to test architectural decisions against actual usage patterns and concerns. + +## Mechanism + +The method reveals how architectural decisions affect quality attributes and identifies decisions that most impact system success. ATAM works by systematically walking through architectural approaches against stakeholder-prioritized quality attribute scenarios. Sensitivity points emerge when small changes in architectural decisions cause large changes in quality attribute response. Trade-off points appear when architectural decisions improve one quality attribute while degrading another, forcing explicit design trade-offs. + +## Relevance + +Foundational methodology for architectural assessment and review processes. ATAM-style analysis is applied in adversarial review during verification: testing implemented architecture against quality-attribute scenarios identified during design. Essential for system architects who need to evaluate architectural decisions before implementation when change costs are minimal. + +## Related Research + +- (Bass, Clements & Kazman, 2021) — Software Architecture in Practice expanding on ATAM methodology +- (Clements, Kazman & Klein, 2002) — Evaluating Software Architectures comprehensive guide \ No newline at end of file diff --git a/docs/research/software-engineering/architecture/kruchten_1995.md b/docs/research/software-engineering/architecture/kruchten_1995.md new file mode 100644 index 0000000..60074e0 --- /dev/null +++ b/docs/research/software-engineering/architecture/kruchten_1995.md @@ -0,0 +1,47 @@ +# The 4+1 View Model of Architecture — Kruchten, 1995 + +## Citation + +Kruchten, P. B. (1995). "The 4+1 View Model of Architecture." *IEEE Software*, 12(6), 42–50. https://doi.org/10.1109/52.469759 + +## Source Type + +Academic Paper + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Software architecture cannot be adequately captured in a single model or view. The 4+1 model provides multiple, complementary perspectives that together form a complete architectural description. + +## Core Findings + +1. **Five complementary views**: Logical view (object model, functional requirements), Process view (concurrency, distribution, performance), Physical view (deployment, hardware topology), Development view (static organization, modules, subsystems), and Scenarios (+1) that tie views together through use cases. +2. **Stakeholder-specific concerns**: Each view addresses different stakeholder concerns and quality attributes - developers need Development view, system integrators need Physical view, performance engineers need Process view. +3. **Scenario validation**: The scenarios (+1) validate that the architecture works as an integrated whole by showing how the views collaborate to support key use cases. +4. **Quality attribute mapping**: Each view specifically addresses non-functional requirements - Performance (Process), Availability (Physical), Modifiability (Development), Functionality (Logical). +5. **Multi-perspective necessity**: Architecture is not just structure - it must address non-functional requirements through specific design decisions in each view. +6. **IEEE 1471 influence**: Kruchten's work heavily influenced IEEE 1471-2000 standard for architectural description. + +## Mechanism + +The model emphasizes that architecture is not just structure—it must address non-functional requirements (performance, availability, modifiability) through specific design decisions in each view. Each view uses different notation and focuses on different architectural elements, but scenarios weave through all views to demonstrate end-to-end system behavior. This multi-perspective approach ensures no critical architectural concern is overlooked while avoiding the complexity of a single, monolithic architectural model. + +## Relevance + +Foundational framework for architectural documentation and communication. C4 diagrams and modern architectural documentation templates follow this multi-view principle. Context, Container, Component, and Code diagrams provide complementary perspectives that together describe complete architecture. Essential for enterprise architecture, system design documentation, and architectural review processes. + +## Related Research + +- (Brown, 2018) — C4 model applying multi-view principles to contemporary software architecture +- (Bass et al., 2021) — Software Architecture in Practice building on Kruchten's view-based approach \ No newline at end of file diff --git a/docs/research/software-engineering/architecture/martin_2012_clean.md b/docs/research/software-engineering/architecture/martin_2012_clean.md new file mode 100644 index 0000000..bd22e81 --- /dev/null +++ b/docs/research/software-engineering/architecture/martin_2012_clean.md @@ -0,0 +1,48 @@ +# Clean Architecture — Martin, 2012 + +## Citation + +Martin, R. C. (2012). "The Clean Architecture." *8th Light Blog*. Later expanded in *Clean Architecture: A Craftsman's Guide to Software Structure and Design* (2017), Prentice Hall. ISBN 978-0-13-449416-6. + +## Source Type + +Practitioner Book + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +The dependency arrow always points inward: from infrastructure toward application toward domain. The domain knows nothing about frameworks, databases, or external services. + +## Core Findings + +1. **Dependency Rule**: Source code dependencies must point inward only — outer layers can depend on inner layers, but inner layers must never depend on outer layers. +2. **Concentric layer structure**: Four layers from outside to inside: Frameworks/Drivers → Interface Adapters → Application Business Rules (Use Cases) → Enterprise Business Rules (Entities). +3. **Framework independence**: The architecture doesn't depend on frameworks; frameworks are tools to be used, not architectures to be conformed to. +4. **Testable in isolation**: Business rules can be tested without UI, database, web server, or any external element because dependencies point inward. +5. **Database independence**: Business rules are not bound to the database — you can swap Oracle for SQL Server, MongoDB, CouchDB, or something else without affecting business rules. +6. **UI independence**: The UI can change without changing the rest of the system — Web UI could be replaced with console UI without changing business rules. +7. **Building on previous architectures**: Clean Architecture synthesizes Hexagonal Architecture (Cockburn, 2005), Onion Architecture, Screaming Architecture, and DCI into a unified approach. + +## Mechanism + +Clean Architecture builds on Hexagonal Architecture and layer-based approaches by making the dependency rule explicit: source code dependencies must point inward only. The outermost layers (frameworks, drivers, UI, database) are details that can be changed without affecting inner layers. The innermost layer (entities, use cases) contains business rules that have no knowledge of the outside world. This ensures that the domain is both testable in isolation and insulated from infrastructure churn. Dependency Inversion Principle enables this by having high-level modules define interfaces that low-level modules must implement. + +## Relevance + +Foundational architecture pattern for creating maintainable, testable, framework-independent systems. Essential for microservices design, domain-driven design implementation, and any system requiring long-term maintainability. Critical for applications where business logic must evolve independently of technical infrastructure choices. + +## Related Research + +- (Parnas, 1972) — Information hiding principles underlying Clean Architecture's dependency rule +- (Cockburn, 2005) — Hexagonal Architecture that Clean Architecture builds upon and generalizes \ No newline at end of file diff --git a/docs/research/software-engineering/architecture/nygard_2011.md b/docs/research/software-engineering/architecture/nygard_2011.md new file mode 100644 index 0000000..f12a991 --- /dev/null +++ b/docs/research/software-engineering/architecture/nygard_2011.md @@ -0,0 +1,48 @@ +# Architecture Decision Records — Nygard, 2011 + +## Citation + +Nygard, M. (2011). "Documenting Architecture Decisions." *Cognitect Blog*. November 15, 2011. Later adopted by ThoughtWorks Technology Radar (2016). https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions + +## Source Type + +Blog/Article + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Architecturally significant decisions should be documented as short, immutable records capturing the context, decision, rationale, alternatives, and consequences. Each record is written once and never edited — if understanding changes, a new record supersedes the old one. + +## Core Findings + +1. **Five-part structure**: Title, Context (forces at play), Decision (response to forces), Status (proposed/accepted/superseded), Consequences (resulting context after decision). +2. **Immutability principle**: ADRs are never edited after acceptance — superseded decisions remain as historical record with references to replacements. +3. **Lightweight format**: One to two pages maximum, written in Markdown, stored in version control with code. +4. **Architecturally significant scope**: Decisions affecting structure, non-functional characteristics, dependencies, interfaces, or construction techniques. +5. **Sequential numbering**: ADRs numbered monotonically and sequentially (never reused) for easy reference. +6. **Conversation with future developers**: Written in full sentences with active voice to communicate reasoning to new team members. +7. **ThoughtWorks adoption**: Added to Technology Radar in 2016, driving widespread industry adoption. + +## Mechanism + +ADRs work because they externalise architectural reasoning that would otherwise remain tacit, tribal knowledge. By forcing the decision-maker to articulate the context (what forces are at play), the decision (what was chosen), the reason (why this choice over alternatives), and the consequences (what becomes easier or harder), ADRs create a decision trail that new team members can read. Immutability prevents retroactive justification: you cannot rewrite history, only supersede it. The consequences of one ADR often become the context for subsequent ADRs, creating a decision pattern language. + +## Relevance + +Essential practice for software architecture documentation and knowledge management. Critical for distributed teams, high-turnover environments, and complex systems requiring architectural decision tracking. Widely adopted across the software industry for maintaining architectural reasoning, onboarding new developers, and preventing repeated architectural mistakes. Directly applicable to any project requiring transparent decision-making processes. + +## Related Research + +- (Kruchten, 2004) — Importance of architecture decisions in software development +- (Brown, 2018) — C4 model complementing ADR documentation with visual architecture communication \ No newline at end of file diff --git a/docs/research/software-engineering/architecture/parnas_1972.md b/docs/research/software-engineering/architecture/parnas_1972.md new file mode 100644 index 0000000..bb2efea --- /dev/null +++ b/docs/research/software-engineering/architecture/parnas_1972.md @@ -0,0 +1,47 @@ +# Information Hiding — Parnas, 1972 + +## Citation + +Parnas, D. L. (1972). "On the criteria to be used in decomposing systems into modules." *Communications of the ACM*, 15(12), 1053–1058. https://doi.org/10.1145/361598.361623 + +## Source Type + +Academic Paper + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +The correct criterion for decomposing a system into modules is **information hiding**: each module hides a design decision that is likely to change. Modules should reveal only what callers need while hiding implementation details. + +## Core Findings + +1. **Information hiding principle**: Each module should hide one specific design decision that is likely to change, creating a stable interface while allowing implementation flexibility. +2. **Decomposition by change-prone decisions**: Rather than decomposing by execution steps (procedure-based), decompose by decisions most likely to change (data structures, algorithms, I/O formats, external protocols). +3. **Module interface stability**: The module's public interface should be change-stable while the implementation remains change-free from the caller's perspective. +4. **Coupling reduction**: Information hiding prevents tight coupling by making modules depend only on abstract interfaces, not concrete implementations. +5. **Foundation for modern principles**: This 1972 paper established the theoretical foundation for SOLID principles (especially Dependency Inversion), Hexagonal Architecture, and Domain-Driven Design bounded contexts. +6. **Engineering professionalization**: Parnas was among the first to apply traditional engineering principles to software design, earning professional engineering licenses and advocating for software engineering as a legitimate engineering discipline. + +## Mechanism + +Decomposing by execution steps (procedure-based) creates tight coupling to implementation order. Decomposing by change-prone decisions (information-hiding) allows each decision to be changed independently without affecting other modules. The mechanism works by identifying decisions most likely to change (data structures, algorithms, I/O formats, external service protocols), then making each such decision a module boundary. The module's public interface exposes only what callers need; all implementation details remain hidden and changeable. + +## Relevance + +Foundational principle for all modern software architecture. Essential for creating maintainable, evolvable systems where changes to implementation details don't cascade through the entire codebase. Critical for microservices design, API development, library design, and any system requiring long-term maintainability. Directly applicable to bounded context identification, dependency injection, and modular system design. + +## Related Research + +- (Martin, 2000) — SOLID principles building on Parnas's information hiding foundation +- (Cockburn, 2005) — Hexagonal Architecture applying information hiding to external dependencies \ No newline at end of file diff --git a/docs/research/software-engineering/architecture/skelton_pais_2019.md b/docs/research/software-engineering/architecture/skelton_pais_2019.md new file mode 100644 index 0000000..9d1fcf3 --- /dev/null +++ b/docs/research/software-engineering/architecture/skelton_pais_2019.md @@ -0,0 +1,48 @@ +# Team Topologies — Skelton & Pais, 2019 + +## Citation + +Skelton, M., & Pais, M. (2019). *Team Topologies: Organizing Business and Technology Teams for Fast Flow*. IT Revolution Press. ISBN 978-1942788812. + +## Source Type + +Practitioner Book + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Team cognitive load and communication patterns directly impact software architecture quality. Teams should be organized around the architecture you want, not the architecture you have. + +## Core Findings + +1. **Four fundamental team types**: Stream-aligned (continuous flow aligned to business capability), Enabling (helps other teams overcome obstacles), Complicated-subsystem (specialized knowledge for complex subsystems), Platform (provides internal services to reduce cognitive load). +2. **Three team interaction modes**: Collaboration (working together for discovery and rapid learning), X-as-a-Service (consuming services with minimal collaboration), Facilitating (helping another team learn or adopt new approaches). +3. **Cognitive load management**: Teams have limited cognitive capacity — exceeding this limit through too many responsibilities, technologies, or domains reduces effectiveness. +4. **Conway's Law application**: Team boundaries become system boundaries, so design team structures that mirror your desired architecture using the Inverse Conway Maneuver. +5. **Fast flow optimization**: Team topologies should minimize cognitive load while maximizing flow of value to customers. +6. **Evolutionary design**: Team structures and communication pathways must be able to evolve with technological and organizational maturity. +7. **Second edition emphasis**: Cognitive load as a design principle, organizations as "flourishing ecosystems" rather than "efficient machines." + +## Mechanism + +Teams are the fundamental means of delivery, where team structures determine software architecture through Conway's Law. By deliberately designing team topologies to match desired system architecture, organizations can influence both technical and organizational outcomes. Cognitive load acts as a constraint — teams exceeding their cognitive capacity produce lower quality software with slower delivery. The four team types and three interaction modes provide a vocabulary for designing sustainable organizational structures. + +## Relevance + +Essential framework for organizational design in technology companies. Critical for DevOps transformation, microservices architecture, platform engineering, and any organization seeking to improve software delivery performance. Directly applicable to team formation, organizational restructuring, and aligning team boundaries with system boundaries. + +## Related Research + +- (Conway, 1968) — Conway's Law as the theoretical foundation for team-system mirroring +- (Brooks, 1975) — The Mythical Man-Month on team size and communication overhead limits \ No newline at end of file diff --git a/docs/research/software-engineering/process/beck_1999_yagni.md b/docs/research/software-engineering/process/beck_1999_yagni.md new file mode 100644 index 0000000..675ee12 --- /dev/null +++ b/docs/research/software-engineering/process/beck_1999_yagni.md @@ -0,0 +1,45 @@ +# YAGNI ("You Aren't Gonna Need It") — Beck & Jeffries, 1999 + +## Citation + +Beck, K., & Jeffries, R. (1999). Extreme Programming principle, originated on the Ward Cunningham Wiki and in comp.software.extreme-programming discussions. Later articulated in Beck, K. (2000). *Extreme Programming Explained*, Addison-Wesley. + +## Source Type + +Practitioner Book + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Never add functionality until it is required by a failing test or current requirement - speculative code adds complexity without delivering value. + +## Core Findings + +1. **Principle Definition**: "Always implement things when you actually need them, never when you just foresee that you need them" (Ron Jeffries) +2. **Cognitive Bias Protection**: YAGNI counteracts planning fallacy (overestimating likelihood of predicted needs) and sunk cost bias +3. **Design Priority**: YAGNI operates as highest-priority design rule (YAGNI > KISS > DRY > OC > SOLID > patterns) +4. **XP Integration**: Used with continuous refactoring, automated unit testing, and continuous integration +5. **Expert Validation**: John Carmack noted "rarely architecting for future requirements turns out net-positive" + +## Mechanism + +YAGNI protects against two cognitive biases: planning fallacy (overestimating likelihood that predicted future needs will materialize) and sunk cost (reluctance to remove expensive-to-write code). By deferring all implementation until demanded by tests or requirements, YAGNI keeps codebase minimal and focused. Must be used with supporting practices like continuous refactoring to avoid technical debt. + +## Relevance + +Essential for lean software development, preventing over-engineering and feature creep. Applied in TDD workflows, API design, architecture decisions. Fundamental principle in Extreme Programming and Agile methodologies for maintaining code simplicity and reducing maintenance burden. + +## Related Research + +Connects to (Beck, 2002) on TDD practices, (Fowler, 1999) on refactoring support, (Gamma et al., 1994) on design patterns as lower priority. Part of broader XP methodology alongside KISS principle and DRY principle. Related to Lean principles of waste elimination. \ No newline at end of file diff --git a/docs/research/software-engineering/process/beyer_et_al_2016.md b/docs/research/software-engineering/process/beyer_et_al_2016.md new file mode 100644 index 0000000..7458ea9 --- /dev/null +++ b/docs/research/software-engineering/process/beyer_et_al_2016.md @@ -0,0 +1,45 @@ +# Blameless Post-Mortems — Beyer et al., 2016 + +## Citation + +Beyer, B., Jones, R., Petoff, J., & Murphy, N. R. (2016). *Site Reliability Engineering: How Google Runs Production Systems*. O'Reilly Media. Chapter 15: "Postmortem Culture: Learning from Failure." + +## Source Type + +Practitioner Book + +## Method + +Observational + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Blameless post-mortems focus on process and system failures (not individual mistakes) to produce more actionable improvements than blame-oriented reviews. + +## Core Findings + +1. **Fundamental Principle**: Every failure is a process failure, not a people failure +2. **Psychological Safety Effect**: Blameless approach enables honest disclosure of full context including mistakes and near-misses +3. **Process-Level Framing**: "What process gap allowed this to happen?" vs "Who made the mistake?" shifts improvement target from individual behavior to systemic reliability +4. **Complete Root Cause Analysis**: Participants share more information when not threatened with punishment +5. **Google SRE Origin**: Developed as core practice in Site Reliability Engineering at Google, founded by Benjamin Treynor Sloss in 2003 + +## Mechanism + +Blameless post-mortems work through psychological safety enabling honest disclosure. When participants know they will not be punished, they share complete context including their own mistakes. This produces more comprehensive root cause analysis than blame-oriented reviews where participants hide information for self-protection. Process-level framing shifts focus to systemic improvements. + +## Relevance + +Essential for incident response, organizational learning, reliability engineering. Applied in SRE practices, DevOps culture, continuous improvement. Fundamental for building high-reliability organizations and preventing repeat failures through systemic fixes rather than individual blame. + +## Related Research + +Connects to (Amy Edmondson) on psychological safety, (Sidney Dekker) on Just Culture, (John Allspaw) on post-mortem practices. Part of broader SRE methodology alongside error budgets, monitoring, automation. Related to learning organization principles and continuous improvement frameworks. \ No newline at end of file diff --git a/docs/research/software-engineering/process/calver_2020.md b/docs/research/software-engineering/process/calver_2020.md new file mode 100644 index 0000000..f07a8f7 --- /dev/null +++ b/docs/research/software-engineering/process/calver_2020.md @@ -0,0 +1,45 @@ +# Calendar Versioning — CalVer, 2020 + +## Citation + +CalVer (2020). Calendar Versioning. https://calver.org + +## Source Type + +Specification + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Date-based version segments communicate release timing directly, suitable for projects with large/constantly-changing scope or time-sensitive releases. + +## Core Findings + +1. **Family of Schemes**: Not single scheme but flexible framework - YY.MINOR.MICRO (pip), YYYY.MM.DD (certifi), YY.0M (OpenSCAD) +2. **Standard Terminology**: YYYY/YY/0Y (year), MM/0M (month), WW/0W (week), DD/0D (day) segments +3. **Wide Adoption**: Ubuntu, Twisted, youtube-dl, pip, PyCharm, Unity, LibreOffice, OpenSCAD, Stripe API +4. **Compatibility Limitation**: CalVer alone doesn't signal breaking changes - some projects use hybrid SemVer+CalVer approach +5. **Three Key Use Cases**: Large/changing scope systems, time-sensitive releases, external-change-driven projects + +## Mechanism + +CalVer replaces arbitrary version increments with calendar-derived segments using Gregorian calendar and UTC convention. Date segments are 1-based (unlike traditional 0-based incremented versions) with short/zero-padded years relative to year 2000. Projects choose appropriate scheme based on release patterns and communication needs. + +## Relevance + +Essential for projects with time-based releases, security updates, business support schedules, large system coordination. Applied in operating systems, frameworks, security libraries, API versioning. Alternative to SemVer when semantic meaning is less relevant than temporal context. + +## Related Research + +Connects to (Preston-Werner, 2013) on Semantic Versioning as alternative approach. Part of broader software versioning strategies including hybrid approaches. Related to release management, dependency management, and software lifecycle practices. \ No newline at end of file diff --git a/docs/research/software-engineering/process/clegg_barker_1994.md b/docs/research/software-engineering/process/clegg_barker_1994.md new file mode 100644 index 0000000..03fd7b8 --- /dev/null +++ b/docs/research/software-engineering/process/clegg_barker_1994.md @@ -0,0 +1,45 @@ +# MoSCoW Prioritization — Clegg & Barker, 1994 + +## Citation + +Clegg, D., & Barker, R. (1994). *Case Method Fast-Track: A RAD Approach*. Addison-Wesley. (DSDM origin.) + +## Source Type + +Practitioner Book + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Classifying requirements as Must/Should/Could/Won't forces explicit negotiation about what is essential vs. desired, preventing gold-plating. + +## Core Findings + +1. **Four Priority Categories**: Must have (critical), Should have (important but not necessary), Could have (desirable), Won't have (lowest priority/out of scope) +2. **DSDM Integration**: Originally developed by Dai Clegg in 1994 for Rapid Application Development, extensively used in Dynamic Systems Development Method from 2002 +3. **Effort Constraints**: DSDM mandates that Must requirements cannot exceed 60% of total effort +4. **Plain English Value**: Categories more meaningful than High/Medium/Low, helping customers understand priority impact +5. **Story-Level Application**: When applied within single story, reveals bloated stories that should be split + +## Mechanism + +MoSCoW forces explicit negotiation by using plain English categories that clarify business impact. The 60% constraint on Must requirements prevents scope creep. At story level, if only 3 of 12 examples are Must, the remaining 9 can be deferred, keeping stories focused and deliverable within timeboxes. + +## Relevance + +Essential for Agile development, requirements prioritization, scope management, minimum viable product definition. Applied in Scrum, RAD, DSDM methodologies. Fundamental for timeboxed delivery and preventing feature creep in iterative development approaches. + +## Related Research + +Connects to (Kano et al., 1984) on alternative prioritization methods. Part of broader Agile methodology alongside user stories and timeboxing. Related to scope management, minimum viable product concepts, and iterative development frameworks. diff --git a/docs/research/software-engineering/process/fagan_1976.md b/docs/research/software-engineering/process/fagan_1976.md new file mode 100644 index 0000000..ff3e2aa --- /dev/null +++ b/docs/research/software-engineering/process/fagan_1976.md @@ -0,0 +1,45 @@ +# Design and Code Inspections — Fagan, 1976 + +## Citation + +Fagan, M. E. (1976). "Design and Code Inspections to Reduce Errors in Program Development." *IBM Systems Journal*, 15(3), 182–211. + +## Source Type + +Academic Paper + +## Method + +Experiment + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Structured inspections using checklists detect 60-90% of defects before testing, far more than unstructured walkthroughs by forcing reviewers to articulate specific failures. + +## Core Findings + +1. **Defect Detection Rate**: Structured inspections detect 60-90% of defects before testing phase +2. **Cost Effectiveness**: Fixing defects in early phases costs 10-100x less than fixing in maintenance phase +3. **Six-Phase Process**: Planning, Overview, Preparation, Inspection meeting, Rework, Follow-up +4. **Role-Based Review**: Author, Reader, Reviewers, Moderator, Recorder each have specific responsibilities +5. **Checklist-Driven**: Systematic checking against specific quality attributes prevents confirmation bias + +## Mechanism + +Fagan inspections constrain reviewer attention to overcome confirmation bias. Unstructured reviews allow skimming and overlooking defects through expectation confirmation. Structured inspection requires checking each quality attribute individually, forcing System 2 thinking. Self-declaration checklists (AGREE/DISAGREE criteria) prevent vague "looks good" approvals that hide defects. + +## Relevance + +Essential for code quality assurance, defect prevention, software inspection processes. Applied in formal review procedures, quality gates, peer review systems. Foundational for static analysis, code review practices, and quality assurance in software development lifecycle. + +## Related Research + +Connects to (Tversky & Kahneman, 1974) on confirmation bias, (Kahneman, 2011) on System 1/2 thinking. Part of broader software quality methodologies alongside testing, static analysis. Related to inspection techniques, peer review processes, and formal verification approaches. \ No newline at end of file diff --git a/docs/research/software-engineering/process/preston-werner_2013.md b/docs/research/software-engineering/process/preston-werner_2013.md new file mode 100644 index 0000000..a0218f8 --- /dev/null +++ b/docs/research/software-engineering/process/preston-werner_2013.md @@ -0,0 +1,45 @@ +# Semantic Versioning 2.0.0 — Preston-Werner, 2013 + +## Citation + +Preston-Werner, T. (2013). Semantic Versioning 2.0.0. https://semver.org + +## Source Type + +Specification + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Version numbers follow MAJOR.MINOR.PATCH format encoding compatibility intent to enable automated dependency resolution and prevent dependency hell. + +## Core Findings + +1. **Three-Part Versioning**: MAJOR.MINOR.PATCH where MAJOR for incompatible API changes, MINOR for backward-compatible additions, PATCH for backward-compatible bug fixes +2. **Build Metadata Independence**: Build metadata (after `+`) provides arbitrary information without affecting version precedence - `1.0.0+20260430` and `1.0.0` have same precedence +3. **Pre-release Ordering**: Pre-release versions (after `-`) have lower precedence than normal versions: `1.0.0-alpha < 1.0.0` +4. **Dependency Resolution**: Enables automated package management with range specifications like `>=3.1.0 <4.0.0` preventing dependency hell +5. **Public API Declaration**: Requires clear, precise public API definition as foundation for meaningful version communication + +## Mechanism + +SemVer encodes compatibility intent in version number itself. MAJOR increments signal breaking changes requiring consumer updates; MINOR increments signal safe additions; PATCH increments signal safe fixes. Build metadata suffix (§10) allows arbitrary data (dates, commit hashes) without affecting dependency solver precedence calculations. + +## Relevance + +De facto standard for software versioning, essential for package management systems (npm, pip, Maven), continuous integration, API evolution communication. Foundational for dependency resolution algorithms, semantic release automation, software distribution strategies. + +## Related Research + +Created by Tom Preston-Werner (GitHub co-founder, Gravatar inventor) in 2013. Based on widespread existing practices in open/closed-source software. Influences modern package managers, CI/CD systems, release automation tools. Licensed under Creative Commons CC BY 3.0, maintained as open specification. \ No newline at end of file diff --git a/docs/research/software-engineering/process/reinertsen_2009.md b/docs/research/software-engineering/process/reinertsen_2009.md new file mode 100644 index 0000000..604bb53 --- /dev/null +++ b/docs/research/software-engineering/process/reinertsen_2009.md @@ -0,0 +1,45 @@ +# The Principles of Product Development Flow (WSJF) — Reinertsen, 2009 + +## Citation + +Reinertsen, D. G. (2009). *The Principles of Product Development Flow: Second Generation Lean Product Development*. Celeritas Publishing. + +## Source Type + +Practitioner Book + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Weighted Shortest Job First (WSJF), calculated as Cost of Delay divided by Duration, optimizes product development work sequencing by prioritizing items where delay is most costly relative to implementation time. + +## Core Findings + +1. **WSJF Formula**: Cost of Delay ÷ Duration provides quantitative ranking replacing subjective prioritization +2. **Cost of Delay Quantification**: "One thing" to quantify - partial derivative of total expected value with respect to time ($/time units) +3. **CD3 Algorithm**: "Cost of Delay Divided by Duration" maximizes total value delivered by scarce development capacity +4. **Manager Knowledge Gap**: ~85% of product managers don't know their Cost of Delay +5. **Intuition Failure**: Intuitive Cost of Delay estimates differ by 50:1 ratio, making quantification essential + +## Mechanism + +WSJF transforms subjective prioritization into quantitative ranking. Cost of Delay captures economic impact of not doing work now. Duration normalizes for effort. The ratio identifies work delivering most value per unit time invested. "CD3" scheduling algorithm maximizes value in any given time period. + +## Relevance + +Essential for product management, agile prioritization, lean development. Applied in feature prioritization, backlog management, resource allocation. Foundational for SAFe (Scaled Agile Framework) prioritization and quantitative product development flow optimization. + +## Related Research + +Created by Donald G. Reinertsen, author of "Managing the Design Factory" (1997). Cost of Delay concept described as "golden key that unlocks many doors" with "astonishing power to transform development organization mindset." Adopted in SAFe methodology and lean-agile practices worldwide. diff --git a/docs/research/software-engineering/quality/bay_2008.md b/docs/research/software-engineering/quality/bay_2008.md new file mode 100644 index 0000000..b2ba63d --- /dev/null +++ b/docs/research/software-engineering/quality/bay_2008.md @@ -0,0 +1,46 @@ +# Object Calisthenics — Bay, 2008 + +## Citation + +Bay, J. (2008). "Object Calisthenics." In *The ThoughtWorks Anthology*, pp. 65–78. Pragmatic Bookshelf. + +## Source Type + +Practitioner Book + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +Moderate + +## Key Insight + +Nine syntactic constraints force developers to distribute behavior rather than centralize data, producing significantly better object-oriented designs through structural pressure. + +## Core Findings + +1. **Extreme constraints methodology**: Nine rules including "no more than two instance variables per class" and "no getters/setters/properties" create structural pressure toward better designs. +2. **Anti-pattern prevention**: "No more than two instance variables" prevents god-object anti-pattern by making it impossible to hold all related data in one class, forcing decomposition into collaborating objects. +3. **Training wheels philosophy**: Constraints are intentionally extreme - they are temporary training tools, not permanent rules - but design habits they produce persist after constraints are relaxed. +4. **Behavior distribution**: Forces small, focused classes with clear responsibilities and behavior-rich objects rather than data containers. +5. **ThoughtWorks methodology**: Published as part of ThoughtWorks Anthology, reflecting company's agile software development practices and expertise. + +## Mechanism + +Constraints create structural pressure toward small, focused classes with clear responsibilities. Extreme limitations make poor design choices impossible, forcing developers to find alternative approaches that result in better object-oriented structure. Design habits developed under constraints (small classes, behavior-rich objects, encapsulated data) persist after constraints are relaxed. + +## Relevance + +Valuable training methodology for developing better object-oriented design skills. Useful for teams struggling with large classes, anemic domain models, or excessive coupling. Educational tool for understanding principles behind good OOP design through extreme application. + +## Related Research + +- (Martin, 2000) — SOLID principles providing theoretical foundation for good OOP design +- (Fowler, 1999) — Refactoring techniques for improving object-oriented design incrementally \ No newline at end of file diff --git a/docs/research/software-engineering/quality/beck_2002.md b/docs/research/software-engineering/quality/beck_2002.md new file mode 100644 index 0000000..04e3f50 --- /dev/null +++ b/docs/research/software-engineering/quality/beck_2002.md @@ -0,0 +1,48 @@ +# Test-Driven Development — Beck, 2002 + +## Citation + +Beck, K. (2002). *Test-Driven Development: By Example*. Addison-Wesley. + +## Source Type + +Practitioner Book + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +The RED-GREEN-REFACTOR cycle — write a failing test, write the minimum code to pass it, then improve the structure while keeping all tests green — drives better design by forcing each unit of behaviour to be specified before it is implemented. + +## Core Findings + +1. Tests written first act as specifications, not verification, preventing over-engineering and ensuring every piece of code has a reason to exist +2. The RED phase forces developers to articulate what they want before building it +3. The GREEN phase constrains implementation to the minimum that satisfies the specification, preventing speculative generalisation +4. The REFACTOR phase is safe because all existing tests remain green, providing a regression net +5. The cycle produces small, focused, well-tested units of code because each unit must be both specifiable (testable) and minimal (just enough to pass) +6. Two basic rules: (1) Never write a single line of code unless you have a failing automated test, (2) Eliminate duplication + +## Mechanism + +TDD works by inverting the traditional development flow. Instead of writing code then testing it, developers first write failing tests that specify desired behavior. This forces clear thinking about requirements and interfaces before implementation. The requirement to make tests pass with minimal code prevents over-engineering, while the refactoring phase improves design quality under the safety net of comprehensive test coverage. + +## Relevance + +Foundational methodology for software quality assurance and design. TDD has become a cornerstone practice in agile development, influencing modern software engineering through improved code quality, better design, and increased developer confidence in changes. Essential for understanding test-first approaches and the relationship between testing and design. + +## Related Research + +- (Beck, 1999) - Extreme Programming Explained, which introduced TDD as part of XP practices +- (Fowler et al., 1999) - Refactoring book co-authored by Beck, providing systematic approach to code improvement +- (Beck & Gamma, 2004) - JUnit framework implementation demonstrating TDD principles in practice \ No newline at end of file diff --git a/docs/research/software-engineering/quality/demillo_lipton_sayward_1978.md b/docs/research/software-engineering/quality/demillo_lipton_sayward_1978.md new file mode 100644 index 0000000..4371426 --- /dev/null +++ b/docs/research/software-engineering/quality/demillo_lipton_sayward_1978.md @@ -0,0 +1,45 @@ +# Mutation Testing — DeMillo, Lipton & Sayward, 1978 + +## Citation + +DeMillo, R. A., Lipton, R. J., & Sayward, F. G. (1978). "Hints on test data selection: Help for the practicing programmer." *Computer*, 11(4), 34–41. + +## Source Type + +Academic Paper + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +A meaningful test fails when a mutation (small deliberate code change) is introduced - if test survives every mutation without failing, it tests nothing useful. + +## Core Findings + +1. **Competent Programmer Hypothesis**: Competent programmers write programs close to being correct behaviorally +2. **Coupling Effect**: Simple faults cascade to form other emergent faults, so detecting simple mutations catches complex bugs +3. **RIP Model**: Tests must Reach mutated statement, Infect program state, and Propagate incorrect state to output +4. **Equivalent Mutants Problem**: Some mutants produce behaviorally equivalent programs, creating analysis challenges +5. **Quality Measurement**: Mutation score (mutants killed / total mutants) provides objective test quality metric + +## Mechanism + +Mutation testing systematically introduces small bugs into code using mutation operators (arithmetic/relational/logical changes) and checks whether tests detect them. Tests failing to catch artificial bugs indicate weak test quality or missing edge cases. Strong mutation requires full RIP model satisfaction; weak mutation only requires reach/infect. + +## Relevance + +Essential for test quality assessment, TDD validation, regression testing. Applied in modern tools (PITest, Stryker, mutmut, cosmic-ray). Fundamental for measuring test effectiveness beyond code coverage, ensuring tests constrain actual behavior rather than implementation details. + +## Related Research + +Originally proposed by Richard Lipton (1971), developed by DeMillo, Lipton & Sayward (1978). First implementation by Timothy Budd (1980). Connects to (Jia & Harman, 2011) comprehensive survey. Modern applications in security testing, object-oriented mutation operators, higher-order mutants research. \ No newline at end of file diff --git a/docs/research/software-engineering/quality/feathers_2004.md b/docs/research/software-engineering/quality/feathers_2004.md new file mode 100644 index 0000000..ae4dda3 --- /dev/null +++ b/docs/research/software-engineering/quality/feathers_2004.md @@ -0,0 +1,46 @@ +# Working Effectively with Legacy Code — Feathers, 2004 + +## Citation + +Feathers, M. (2004). *Working Effectively with Legacy Code*. Prentice Hall. + +## Source Type + +Practitioner Book + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Legacy code is code without tests - the safest way to modify it is to first write characterization tests that capture current behavior, then refactor under the safety net of those tests. + +## Core Findings + +1. **Legacy code definition**: Code without automated tests, making it dangerous to modify and prone to introducing bugs during changes. +2. **Characterization tests methodology**: Tests that document what code currently does (not what it should do) - essential when modifying untested code to create regression safety net. +3. **Seam-based approach**: Seams (parameter seams, link seams, preprocessing seams, object seams) are points where behavior can be varied without editing code - primary mechanism for getting legacy code under test. +4. **Test-first modification**: Process is identify seam, get code under test by writing characterization test at that seam, then refactor safely. +5. **"Edit and pray" elimination**: Replaces dangerous "modify and hope nothing breaks" approach with disciplined "test, then modify, then verify" cycle. + +## Mechanism + +Characterization tests differ from specification tests: they document what code currently does, not what it should do. This creates regression protection before any changes are made. Seams allow injecting test doubles at specific points without modifying production code. Process: identify seam, get code under test, write characterization test, then refactor. Avoids dangerous untested modifications. + +## Relevance + +Essential methodology for working with legacy codebases safely. Critical for organizations maintaining large existing systems without comprehensive test coverage. Foundational approach for incremental improvement of legacy systems and technical debt reduction. + +## Related Research + +- (Beck, 2002) — Test-driven development providing methodology for new code development with tests +- (Fowler, 1999) — Refactoring techniques that require test safety net provided by characterization tests \ No newline at end of file diff --git a/docs/research/software-engineering/quality/fowler_1999.md b/docs/research/software-engineering/quality/fowler_1999.md new file mode 100644 index 0000000..62d13de --- /dev/null +++ b/docs/research/software-engineering/quality/fowler_1999.md @@ -0,0 +1,46 @@ +# Refactoring: Improving the Design of Existing Code — Fowler, 1999 + +## Citation + +Fowler, M. (1999). *Refactoring: Improving the Design of Existing Code*. Addison-Wesley. Second edition 2018 with K. Beck, J. Brant, W. Opdyke, D. Roberts. + +## Source Type + +Practitioner Book + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Refactoring is disciplined technique for restructuring existing code without changing external behavior, done in small steps each verified by tests. + +## Core Findings + +1. **Catalog methodology**: 66 named transformations (Extract Method, Move Field, Replace Conditional with Polymorphism, etc.) each with known pre-condition, mechanic, and effect on code structure. +2. **Code smell diagnostics**: Diagnostic indicators (Long Method, Feature Envy, Switch Statements, etc.) signal when refactoring is needed and point to specific techniques. +3. **Test-driven safety**: Small, test-verified steps ensure restructuring doesn't introduce bugs while improving design quality. +4. **Behavior preservation**: External functionality remains unchanged while internal structure improves through systematic transformations. +5. **Design emergence**: Better design emerges through incremental improvements rather than upfront architectural decisions. + +## Mechanism + +Each refactoring has known pre-condition (when safe to apply), step-by-step mechanic (the transformation), and guaranteed post-condition (what improves). By applying refactorings in small, test-verified steps, developer can restructure code safely without introducing bugs. Code smells serve as diagnostic indicators pointing to specific refactoring technique most likely to improve structure. The smell identifies problem; refactoring provides solution. + +## Relevance + +Foundational methodology for systematic code improvement and design evolution. Essential practice for maintaining code quality, reducing technical debt, and enabling sustainable software development. Widely adopted as core agile development practice. + +## Related Research + +- (Beck, 2002) — Test-driven development methodology supporting refactoring safety +- (Shvets, 2014) — Comprehensive online refactoring catalog building on Fowler's work \ No newline at end of file diff --git a/docs/research/software-engineering/quality/freeman_pryce_2009.md b/docs/research/software-engineering/quality/freeman_pryce_2009.md new file mode 100644 index 0000000..56694a9 --- /dev/null +++ b/docs/research/software-engineering/quality/freeman_pryce_2009.md @@ -0,0 +1,45 @@ +# GOOS — Growing Object-Oriented Software, Guided by Tests — Freeman & Pryce, 2009 + +## Citation + +Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley. + +## Source Type + +Practitioner Book + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Acceptance tests and unit tests operate at two separate nested timescales - outer loop writes failing acceptance tests before implementation; inner loop drives implementation with unit-level Red-Green-Refactor cycles. + +## Core Findings + +1. **Double Loop TDD**: Outer loop (acceptance tests) provides direction (what to build); inner loop (unit tests) provides momentum (how to build it) +2. **Nested Timescales**: Acceptance test stays red throughout all inner cycles and goes green only when feature is complete +3. **Direction vs Momentum**: Acceptance tests prevent over-engineering by defining "done"; unit tests drive good internal design +4. **Integration Safety**: Acceptance tests catch integration issues early while unit tests provide rapid feedback +5. **Mock Objects**: Use test doubles to maintain fast, isolated unit tests while preserving design feedback + +## Mechanism + +Outer loop begins with failing acceptance test for next feature, then enters inner loop of Red-Green-Refactor unit test cycles. Inner loop repeats (write failing unit test, make it pass with minimal code, refactor) until acceptance test passes. This structure provides safety nets at both levels for refactoring and ensures comprehensive test coverage. + +## Relevance + +Essential for advanced TDD practices, BDD implementation, acceptance test-driven development. Applied in enterprise software development, continuous integration, behavior-driven development. Foundational for understanding relationship between unit and acceptance testing in agile methodologies. + +## Related Research + +Connects to (Beck, 2002) on TDD fundamentals, (North, 2006) on BDD practices. Part of broader testing methodologies alongside ATDD, specification by example. Related to mock object patterns and test double strategies for maintainable test suites. \ No newline at end of file diff --git a/docs/research/software-engineering/quality/gamma_et_al_1994.md b/docs/research/software-engineering/quality/gamma_et_al_1994.md new file mode 100644 index 0000000..7e3ce29 --- /dev/null +++ b/docs/research/software-engineering/quality/gamma_et_al_1994.md @@ -0,0 +1,46 @@ +# Design Patterns: Elements of Reusable Object-Oriented Software — Gamma, Helm, Johnson, Vlissides, 1994 + +## Citation + +Gamma, E., Helm, R., Johnson, R., & Vlissides, J. (1994). *Design Patterns: Elements of Reusable Object-Oriented Software*. Addison-Wesley. + +## Source Type + +Practitioner Book + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Design patterns provide reusable solutions to recurring design problems by naming proven structural approaches that teams can communicate at higher abstraction level. + +## Core Findings + +1. **Pattern catalog**: 23 design patterns divided into three categories by intent: Creational (abstracting object creation), Structural (composing classes/objects into larger structures), Behavioral (allocating responsibility between objects). +2. **Communication abstraction**: Patterns name recurring design structures enabling teams to communicate at higher level - saying "Strategy pattern" conveys entire structural solution. +3. **Problem-solution mapping**: Each pattern captures proven solution to specific class of design problem - Strategy eliminates type-switching, Observer decouples event sources from handlers, State replaces conditional state machines. +4. **Foundational principles**: "Program to interface, not implementation" and "Favor object composition over class inheritance" guide pattern application. +5. **Massive influence**: Over 500,000 copies sold in 14 languages, ACM SIGPLAN Programming Languages Achievement Award 2005, foundational for object-oriented design. + +## Mechanism + +Patterns work by naming recurring design structures so teams can communicate at higher level of abstraction. Each pattern captures proven solution to specific class of design problem. Patterns should be applied only when code smell triggers them, never speculatively. The smell identifies the gap; the pattern provides structural solution. + +## Relevance + +Foundational reference for object-oriented design and software architecture. Essential vocabulary for software development teams and architectural decision-making. Widely adopted across programming languages and frameworks for systematic design improvement. + +## Related Research + +- (Fowler, 1999) — Refactoring methodology that prepares code for pattern application +- (Shvets, 2014) — Modern catalog connecting code smells to appropriate patterns \ No newline at end of file diff --git a/docs/research/software-engineering/quality/google_testing_2013.md b/docs/research/software-engineering/quality/google_testing_2013.md new file mode 100644 index 0000000..f478b9a --- /dev/null +++ b/docs/research/software-engineering/quality/google_testing_2013.md @@ -0,0 +1,45 @@ +# Test-Behavior Alignment — Google Testing Blog, 2013 + +## Citation + +Google Testing Blog. (2013). "Testing on the Toilet: Test Behavior, Not Implementation." By Andrew Trenk. *Google Testing Blog*. + +## Source Type + +Blog/Article + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Test setup may need to change if implementation changes, but the actual test assertion should not need to change if the code's user-facing behavior doesn't change. + +## Core Findings + +1. **Implementation Independence**: Tests should focus on testing code's public API, not internal implementation details +2. **Maintenance Benefits**: Tests independent of implementation details are easier to maintain since they don't need changes with each implementation change +3. **Documentation Value**: Behavior-focused tests act as code samples showing different ways class methods can be used +4. **Setup vs Assertion**: Test setup may change with implementation (e.g., new constructor dependencies) but assertions should remain stable +5. **Brittleness Prevention**: Tests tightly coupled to implementation details break during refactoring and become drag on design improvement + +## Mechanism + +Implementation-focused tests verify internal structure (method calls, object creation, internal state) creating brittleness. Behavior-focused tests verify observable outcomes that users can witness, providing stability. Former creates maintenance overhead; latter provides lasting value through internal rewrites. + +## Relevance + +Essential for maintainable test suites, refactoring safety, TDD practices. Applied in contract testing, behavior-driven development, test design principles. Foundational for writing tests from caller's perspective without knowledge of internal implementation mechanics. + +## Related Research + +Connects to (Freeman & Pryce, 2009) on GOOS principles, (Fowler, 2018) on Test Pyramid. Part of broader testing methodologies alongside TDD, BDD, contract testing. Related to mock object patterns and test double strategies for behavior verification. \ No newline at end of file diff --git a/docs/research/software-engineering/quality/maciver_2016.md b/docs/research/software-engineering/quality/maciver_2016.md new file mode 100644 index 0000000..4db4887 --- /dev/null +++ b/docs/research/software-engineering/quality/maciver_2016.md @@ -0,0 +1,47 @@ +# Property-Based Testing — MacIver, 2016 + +## Citation + +MacIver, D. R. (2016). "What is Property Based Testing?" *Hypothesis*. https://hypothesis.works/articles/what-is-property-based-testing/ + +## Source Type + +Blog/Article + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Property-based testing constructs tests such that when these tests are fuzzed with generated inputs, failures reveal problems that could not have been revealed by direct fuzzing alone. + +## Core Findings + +1. Meaningful property tests assert invariants—things that must always be true about the contract +2. Tautological tests assert reconstruction patterns that merely verify the implementation without constraining behavior +3. Property tests generate diverse inputs to verify that certain properties hold across the entire input space +4. This approach discovers edge cases that example-based tests typically miss and provides stronger confidence in correctness +5. Property-based testing complements example-based BDD scenarios by providing broader coverage of the input space + +## Mechanism + +Property tests work by generating diverse inputs to verify that certain properties (invariants) hold across the entire input space. Unlike example-based tests that check specific scenarios, property tests explore the full domain of possible inputs, automatically discovering edge cases that developers typically miss. The key is focusing on behavioral contracts rather than implementation details. + +## Relevance + +Essential for comprehensive test coverage in software quality assurance. Property-based testing complements traditional BDD scenarios by providing mathematical rigor to test validation. Particularly valuable for testing complex algorithms, data transformations, and API contracts where exhaustive example-based testing is impractical. + +## Related Research + +- (Claessen & Hughes, 2000) - Original QuickCheck paper establishing property-based testing foundations +- (Fink & Bishop, 1997) - Early work on property-based testing for software assurance +- (MacIver et al., 2019) - Hypothesis library implementation extending QuickCheck concepts to Python \ No newline at end of file diff --git a/docs/research/software-engineering/quality/martin_2000_solid.md b/docs/research/software-engineering/quality/martin_2000_solid.md new file mode 100644 index 0000000..d1a13ca --- /dev/null +++ b/docs/research/software-engineering/quality/martin_2000_solid.md @@ -0,0 +1,47 @@ +# SOLID Principles — Martin, 2000 + +## Citation + +Martin, R. C. (2000). Design Principles and Design Patterns. Object Mentor. [PDF archived at Internet Archive] + +## Source Type + +Practitioner Book + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Five object-oriented design principles that reduce coupling and increase maintainability when applied together. + +## Core Findings + +1. **Single Responsibility Principle (SRP)**: A class should have only one reason to change — each class should have only one responsibility. +2. **Open-Closed Principle (OCP)**: Software entities should be open for extension but closed for modification. +3. **Liskov Substitution Principle (LSP)**: Derived classes must be substitutable for their base classes without altering program correctness. +4. **Interface Segregation Principle (ISP)**: Clients should not be forced to depend on interface methods they don't use. +5. **Dependency Inversion Principle (DIP)**: Depend on abstractions, not concrete implementations. +6. The SOLID acronym was coined around 2004 by Michael Feathers to make these principles memorable. + +## Mechanism + +Each principle addresses specific coupling pathologies: SRP prevents god-objects by enforcing single responsibility; OCP prevents modification cascades by enabling extension over modification; LSP prevents behavioral contract violations in inheritance hierarchies; ISP prevents fat interfaces that force unnecessary dependencies; DIP enables loose coupling by inverting dependencies toward abstractions. Together they reduce change propagation and make systems more testable. + +## Relevance + +Foundational for modern software architecture and clean code practices. Directly applicable to module design, interface definitions, and refactoring strategies. Essential for creating maintainable codebases that can evolve without breaking existing functionality. + +## Related Research + +- (Fowler, 1999) — Refactoring patterns that support SOLID principles +- (Beck, 2002) — Test-driven development practices that reinforce these design principles \ No newline at end of file diff --git a/docs/research/software-engineering/quality/martin_2017_first_class_tests.md b/docs/research/software-engineering/quality/martin_2017_first_class_tests.md new file mode 100644 index 0000000..4423ff7 --- /dev/null +++ b/docs/research/software-engineering/quality/martin_2017_first_class_tests.md @@ -0,0 +1,45 @@ +# Test Contra-variance (First-Class Tests) — Martin, 2017 + +## Citation + +Martin, R. C. (2017). "Test Contra-variance." *Clean Coder Blog*, October 3, 2017. + +## Source Type + +Blog/Article + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Tests should be treated as first-class citizens with independent structural design - not coupled mirror images of production code structure. + +## Core Findings + +1. **Structural Contra-variance**: Test structure should not mirror production code structure (one test class per production class creates fragile coupling) +2. **Behavioral Contra-variance**: As tests become more specific, production code becomes more generic, moving in opposite directions along generality axis +3. **Fragile Test Problem**: Covariant test structure causes large test changes from small production changes, breaking refactoring workflows +4. **Decoupling Through Generalization**: Generalizing production code to satisfy test specifications creates behavioral decoupling while maintaining correctness +5. **Independent Test Design**: Tests need their own architectural design to minimize coupling while maintaining behavioral verification + +## Mechanism + +Covariant test structure (mirroring production classes) creates tight coupling preventing safe refactoring. Contra-variant approach: tests maintain stable public API focus while production code extracts classes/methods behind interface. Tests become increasingly specific behavioral specifications; production code generalizes to satisfy broader spectrum of behaviors than tests specify. + +## Relevance + +Essential for sustainable TDD practices, refactoring safety, test maintenance. Applied in contract testing, API design, behavior-driven development. Fundamental for writing tests that enable rather than obstruct design improvements and architectural evolution. + +## Related Research + +Part of Robert C. Martin's Clean Code philosophy. Connects to (Beck, 2002) TDD principles, (Freeman & Pryce, 2009) GOOS methodology. Related to test design patterns, mock object strategies, behavioral specification approaches. Foundational for understanding test-production code relationships. \ No newline at end of file diff --git a/docs/research/software-engineering/quality/meszaros_2007.md b/docs/research/software-engineering/quality/meszaros_2007.md new file mode 100644 index 0000000..89cddf2 --- /dev/null +++ b/docs/research/software-engineering/quality/meszaros_2007.md @@ -0,0 +1,46 @@ +# xUnit Test Patterns — Meszaros, 2007 + +## Citation + +Meszaros, G. (2007). *xUnit Test Patterns: Refactoring Test Code*. Addison-Wesley. + +## Source Type + +Practitioner Book + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Tests should specify observable behavior, not verify implementation - coupling to internal details creates brittle tests that break during refactoring even when behavior is preserved. + +## Core Findings + +1. **Test coupling spectrum**: Four levels from most resilient to most brittle: end-to-end tests (highest), unit contract tests, property-based tests, and white-box tests (lowest, avoid). +2. **Implementation coupling danger**: Tests coupled to implementation details break when code is refactored even when behavior is preserved, producing false negatives that erode trust in test suite. +3. **Semantic alignment rule**: Tests must operate at same abstraction level as acceptance criterion they verify - if criterion says "user presses W," test sends W through actual input mechanism, not internal method call. +4. **Observable behavior focus**: Decoupled tests use public interfaces and assert on observable outcomes, remaining green through refactoring because they verify what system does, not how it does it. +5. **Test pattern catalog**: Comprehensive patterns for test organization, fixture management, result verification, and test code maintainability in xUnit frameworks. + +## Mechanism + +Test coupling arises when test depends on how system works internally rather than what it does externally. Coupled tests use private methods, internal state, or implementation-specific assertions. When implementation changes — even if behavior is identical — coupled tests fail, creating noise that trains developers to ignore test failures. Decoupled tests use public interfaces and assert on observable outcomes, remaining green through refactoring. + +## Relevance + +Essential reference for writing maintainable test code in xUnit frameworks (JUnit, NUnit, etc.). Foundational for test-driven development practices and ensuring tests support rather than hinder refactoring. Widely used for improving test suite quality and reducing test maintenance burden. + +## Related Research + +- (Beck, 2002) — Test-driven development methodology using xUnit frameworks +- (Fowler, 1999) — Refactoring techniques that tests must support without breaking \ No newline at end of file diff --git a/docs/research/software-engineering/quality/north_2006.md b/docs/research/software-engineering/quality/north_2006.md new file mode 100644 index 0000000..965e20b --- /dev/null +++ b/docs/research/software-engineering/quality/north_2006.md @@ -0,0 +1,45 @@ +# Behaviour-Driven Development — North, 2006 + +## Citation + +North, D. (2006). "Introducing BDD." *Better Software Magazine*, March 2006. Originally published at dannorth.net. + +## Source Type + +Blog/Article + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +BDD refines TDD by writing tests in domain language of business (Given/When/Then), making them specifications rather than verification tools. + +## Core Findings + +1. **Test Method Names as Sentences**: Using "should" template (The class *should* do something) keeps tests focused and creates readable documentation +2. **Behaviour vs Test Vocabulary**: Word "behaviour" eliminates TDD coaching confusion - what to test, when to delete tests, test naming becomes clear +3. **Given/When/Then Template**: Structured format captures acceptance criteria as executable specifications that business stakeholders can validate +4. **Ubiquitous Language for Analysis**: BDD provides consistent vocabulary bridging technical and business domains throughout entire analysis process +5. **Business Value Focus**: "What's the next most important thing the system doesn't do?" drives feature prioritization and development direction + +## Mechanism + +By requiring tests written in domain vocabulary (not implementation language), BDD forces shared understanding between domain experts and developers. "Given a registered user / When user logs in / Then user sees welcome message" specifies observable behaviour stakeholders care about, not technical implementation steps. Eliminates implementation coupling from specifications. + +## Relevance + +Foundational for behavior-driven development practices, acceptance test-driven development, specification by example. Essential for bridging business-technical communication gaps, creating living documentation, automated acceptance testing frameworks like Cucumber. + +## Related Research + +Created by Dan North, influenced by Eric Evans' Domain-Driven Design ubiquitous language concept, Chris Matts' business value focus. Led to development of JBehave framework, Ruby RSpec project, Cucumber framework. Foundational for modern BDD tools and practices, specification by example methodologies. \ No newline at end of file diff --git a/docs/research/software-engineering/quality/shvets_2014.md b/docs/research/software-engineering/quality/shvets_2014.md new file mode 100644 index 0000000..78780ee --- /dev/null +++ b/docs/research/software-engineering/quality/shvets_2014.md @@ -0,0 +1,46 @@ +# Refactoring.Guru — Shvets, 2014 + +## Citation + +Shvets, A. (2014–present). *Refactoring.Guru*. https://refactoring.guru/ + +## Source Type + +Blog/Article + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Refactoring follows systematic catalog of 66 techniques triggered by 21 code smells, creating diagnostic chain from problem identification to pattern application. + +## Core Findings + +1. **Smell-first methodology**: 21 code smells organized into 5 categories (Bloaters, OO Abusers, Change Preventers, Dispensables, Couplers) drive refactoring decisions. +2. **Systematic technique catalog**: 66 refactoring techniques organized into 6 categories (Composing Methods, Moving Features between Objects, Organizing Data, Simplifying Conditional Expressions, Simplifying Method Calls, Dealing with Generalization). +3. **Pattern-smell connection**: Each of 22 GoF design patterns triggered by specific code smell, creating motivation for pattern application. +4. **Diagnostic methodology**: Smell → refactoring technique → design pattern progression prevents speculative application. +5. **Visual learning approach**: Comprehensive illustrations and examples make complex concepts accessible to practitioners. + +## Mechanism + +The catalog provides smell-first approach: identify code smell, then apply corresponding refactoring technique or design pattern. Five smell categories group related pathologies: Bloaters (structures grown too large), OO Abusers (misapplied OOP), Change Preventers (changes that ripple), Dispensables (dead weight), Couplers (excessive inter-object dependency). Each smell entry links to refactoring techniques that resolve it, and each pattern entry explains which smell triggers it. Creates diagnostic chain where each step is motivated by previous one rather than applied speculatively. + +## Relevance + +Essential reference for code quality improvement, refactoring practice, and design pattern application. Widely used by developers for systematic code improvement and architectural decision-making. Provides practical methodology for identifying and resolving code quality issues. + +## Related Research + +- (Fowler, 1999) — Foundational refactoring catalog and methodology +- (Gamma et al., 1995) — Original Gang of Four design patterns catalog \ No newline at end of file diff --git a/docs/research/software-engineering/requirements/christel_kang_1992.md b/docs/research/software-engineering/requirements/christel_kang_1992.md new file mode 100644 index 0000000..ab69d75 --- /dev/null +++ b/docs/research/software-engineering/requirements/christel_kang_1992.md @@ -0,0 +1,45 @@ +# Issues in Requirements Elicitation — Christel & Kang, 1992 + +## Citation + +Christel, M. G., & Kang, K. C. (1992). *Issues in Requirements Elicitation*. CMU/SEI-92-TR-012. Software Engineering Institute, Carnegie Mellon University. https://www.sei.cmu.edu/library/abstracts/reports/92tr012.cfm + +## Source Type + +Academic Paper + +## Method + +Observational + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Stakeholders have three structural problems making direct questioning insufficient: omitting "obvious" information, trouble articulating unarticulated needs, not knowing what they want until seeing what they don't want. + +## Core Findings + +1. **Three Stakeholder Problems**: Omit obvious information, difficulty articulating tacit knowledge, discover needs reactively +2. **Expert Knowledge Nature**: Largely procedural and tacit - experts describe beliefs about what happens, not actual reality +3. **Elicitation Activities**: Fact-finding, requirements gathering, evaluation and rationalization, prioritization, integration +4. **Gap-Finding Necessity**: Required techniques that bypass expert's mental schema to uncover actual vs. perceived processes +5. **Specification vs. Elicitation**: Most tools focus on representation (specification) rather than discovery (elicitation) + +## Mechanism + +Expert knowledge is largely procedural and tacit. When asked "how does the system work?", experts describe their beliefs about what happens rather than observing actual processes. Gap-finding techniques are required because they bypass the expert's mental schema and reveal discrepancies between perceived and actual workflows. + +## Relevance + +Essential for requirements engineering, systems analysis, stakeholder interviews, business analysis. Applied in software development, system design, process improvement. Foundational for understanding why traditional interviewing fails and why observational techniques are necessary. + +## Related Research + +Connects to (Flanagan, 1954) on Critical Incident Technique for elicitation, (Fisher & Geiselman, 1987) on Enhanced Cognitive Interview methods. Part of broader requirements engineering methodology alongside prototyping and use case analysis. Related to tacit knowledge research and cognitive interview techniques. diff --git a/docs/research/software-engineering/requirements/kano_et_al_1984.md b/docs/research/software-engineering/requirements/kano_et_al_1984.md new file mode 100644 index 0000000..11a977f --- /dev/null +++ b/docs/research/software-engineering/requirements/kano_et_al_1984.md @@ -0,0 +1,45 @@ +# Attractive Quality and Must-Be Quality (Kano Model) — Kano et al., 1984 + +## Citation + +Kano, N., Seraku, N., Takahashi, F., & Tsuji, S. (1984). "Attractive quality and must-be quality." *Journal of the Japanese Society for Quality Control*, 14(2), 39–48. + +## Source Type + +Academic Paper + +## Method + +Observational + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Customer satisfaction is not unidimensional - features fall into asymmetric categories enabling differentiated prioritization strategies. + +## Core Findings + +1. **Five Feature Categories**: Must-be (expected baseline), One-dimensional (more is better), Attractive (delighters), Indifferent (no impact), Reverse (negative impact) +2. **Asymmetric Satisfaction**: Must-be features don't increase satisfaction when present but cause dissatisfaction when absent; Attractive features delight when present but don't dissatisfy when absent +3. **Strategic Prioritization**: First ensure Must-be features exist, then maximize One-dimensional features, then selectively invest in Attractive features +4. **Temporal Evolution**: Features migrate from Attractive → One-dimensional → Must-be over time as customer expectations evolve +5. **Empirical Measurement**: Standardized functional/dysfunctional questionnaire pairs enable systematic feature classification + +## Mechanism + +Must-be features form satisfaction baseline - presence doesn't increase satisfaction but absence causes dissatisfaction. Attractive features provide asymmetric delight - presence surprises positively but absence goes unnoticed. This asymmetry enables strategic resource allocation focusing first on preventing dissatisfaction, then creating satisfaction. + +## Relevance + +Essential for product management, requirements prioritization, customer satisfaction strategy. Applied in Quality Function Deployment (QFD), feature roadmapping, competitive analysis. Foundational for understanding satisfaction drivers beyond linear assumptions in product development. + +## Related Research + +Developed by Noriaki Kano building on Herzberg's two-factor theory. Connects to (Herzberg, 1959) on hygiene vs motivator factors. Applied in Quality Function Deployment methodologies and modern product management frameworks. Related to Jobs-to-be-Done theory and customer outcome prioritization. diff --git a/docs/research/software-engineering/requirements/wake_2003.md b/docs/research/software-engineering/requirements/wake_2003.md new file mode 100644 index 0000000..a4616dd --- /dev/null +++ b/docs/research/software-engineering/requirements/wake_2003.md @@ -0,0 +1,45 @@ +# INVEST in Good Stories — Wake, 2003 + +## Citation + +Wake, B. (2003). *INVEST in Good Stories, and SMART Tasks*. XP123.com. + +## Source Type + +Blog/Article + +## Method + +Theoretical + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Stories that are Independent, Negotiable, Valuable, Estimable, Small, and Testable produce fewer downstream defects and smoother development cycles. + +## Core Findings + +1. **INVEST Acronym**: Independent (no overlap, any order), Negotiable (details co-created), Valuable (to customer), Estimable (sized appropriately), Small (few person-weeks), Testable (clear acceptance criteria) +2. **Vertical Slicing**: Split stories vertically through all layers (network/persistence/logic/presentation) rather than horizontally by layer +3. **Testable Forces Clarity**: "Testable" forces Product Owner to express observable outcomes, directly enabling Given/When/Then format +4. **Size Impact**: Smaller stories get more accurate estimates; above few person-weeks becomes too hard to scope +5. **SMART Tasks Extension**: Specific, Measurable, Achievable, Relevant, Time-boxed tasks complement INVEST stories + +## Mechanism + +"Testable" forces PO to express observable outcomes directly enabling Given/When/Then. "Small" forces decomposition preventing scope creep. "Independent" prevents hidden ordering dependencies. "Valuable" ensures customer-centric vertical slicing through technical layers. + +## Relevance + +Foundational for agile user story writing, product backlog management, acceptance criteria definition. Applied in Scrum, XP, SAFe methodologies. Essential for Product Owners, Business Analysts, development teams writing effective requirements. + +## Related Research + +Created by Bill Wake (XP123.com) in 2003. Consciously developed by clustering story attributes and finding memorable acronym. Widely adopted in agile methodologies. Complements (Cohn, 2004) user story practices and (North, 2006) BDD Given/When/Then format. diff --git a/docs/research/software-engineering/requirements/wynne_2015.md b/docs/research/software-engineering/requirements/wynne_2015.md new file mode 100644 index 0000000..1945ec9 --- /dev/null +++ b/docs/research/software-engineering/requirements/wynne_2015.md @@ -0,0 +1,45 @@ +# Example Mapping — Wynne, 2015 + +## Citation + +Wynne, M. (2015). "Introducing Example Mapping." *Cucumber Blog*. https://cucumber.io/blog/bdd/example-mapping-introduction/ + +## Source Type + +Blog/Article + +## Method + +Case Study + +## Verification Status + +Verified + +## Confidence + +High + +## Key Insight + +Inserting a "rules" layer between stories and examples prevents redundant or contradictory acceptance criteria - visual arrangement reveals story complexity and knowledge gaps before development begins. + +## Core Findings + +1. **Four Card Types**: Story (yellow), Rules (blue), Examples (green), Questions (red) using colored index cards in visual mapping +2. **Quality Signals**: Many rules → story needs splitting; many examples per rule → rule too complex; many red cards → story not ready; no red cards → conversation may be insufficient +3. **Time-boxed Process**: Well-understood, well-sized story should map in ~25 minutes with thumb-vote to determine development readiness +4. **Rules Layer Value**: Groups related examples under business rules they illustrate, preventing duplicated logic and making business constraints explicit +5. **"Friends Episode" Naming**: Rough examples using informal names ("The one where customer forgot receipt") instead of formal Gherkin during mapping + +## Mechanism + +Collaborative session involves stakeholders placing colored cards on table/wall. Visual arrangement provides instant feedback on story complexity. Rules layer acts as intermediary between high-level stories and concrete examples, preventing redundancy and revealing natural slicing points. + +## Relevance + +Essential for BDD story refinement, three amigos sessions, backlog grooming. Applied in agile requirements discovery, acceptance criteria definition, story sizing. Foundational technique for preventing oversized stories entering sprints and discovering unknown unknowns systematically. + +## Related Research + +Created by Matt Wynne (Cucumber Project Lead) in 2015. Builds on (North, 2006) BDD practices and three amigos concept. Complements (Wake, 2003) INVEST criteria by providing structured discovery technique. Part of broader BDD ecosystem alongside Gherkin, specification workshops, deliberate discovery practices. \ No newline at end of file diff --git a/docs/scientific-research/README.md b/docs/scientific-research/README.md deleted file mode 100644 index 3338996..0000000 --- a/docs/scientific-research/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# Scientific Research — Index - -Theoretical and empirical foundations for the decisions made in this template, organized by domain. - -| File | Entries | Domain | -|---|---|---| -| `cognitive-science.md` | 1–10 | Pre-mortem, implementation intentions, commitment devices, System 2, adversarial collaboration, accountability, chunking, elaborative encoding, error feedback, prospective memory | -| `testing.md` | 11–15, 51–54 | Observable behavior testing, test-behavior alignment, first-class tests, property-based testing, mutation testing, Canon TDD, GOOS outer/inner loop, Is TDD Dead, BDD origin | -| `software-economics.md` | 16 | Cost of change curve (shift left) | -| `requirements-elicitation.md` | 17–20, 28–30, 43–50 | INVEST, Example Mapping, declarative Gherkin, MoSCoW, active listening, Kipling 5Ws, BA framework, FDD, affinity mapping, Event Storming, CIT, cognitive interview, laddering, funnel technique, RE issues | -| `domain-modeling.md` | 31, 63–68 | DDD bounded contexts, ubiquitous language, feature identification, DDD Reference, Fowler UL/BC bliki, Vernon IDDD, Verraes UL-not-glossary, Whirlpool | -| `oop-design.md` | 32–35 | Object Calisthenics, Refactoring (Fowler), GoF Design Patterns, SOLID | -| `refactoring-empirical.md` | 36–41 | QDIR smell prioritization, smells + architectural refactoring, SPIRIT tool, bad OOP engineering properties, CWC complexity metric, metric threshold unreliability | -| `architecture.md` | 42, 55–58 | Hexagonal Architecture, ADRs, 4+1 View Model, C4 model, information hiding | -| `ai-agents.md` | 21–27 | Minimal-scope agent design, context isolation, on-demand skills, instruction conflict resolution failure, positional attention degradation, modular prompt de-duplication, three-file separation | -| `documentation.md` | 59–62 | Developer information needs, docs-as-code, Diátaxis documentation framework, blameless post-mortems | diff --git a/docs/scientific-research/ai-agents.md b/docs/scientific-research/ai-agents.md deleted file mode 100644 index aaae407..0000000 --- a/docs/scientific-research/ai-agents.md +++ /dev/null @@ -1,118 +0,0 @@ -# Scientific Research — AI Agent Design - -Foundations for the agent architecture, file structure, and context management decisions in this template. - ---- - -### 21. Minimal-Scope Agent Design - -| | | -|---|---| -| **Source** | OpenAI. (2024). *Agent definitions*. OpenAI Agents SDK Documentation. https://platform.openai.com/docs/guides/agents/define-agents | -| **Date** | 2024 | -| **Alternative** | Anthropic. (2024). *Building effective agents*. Anthropic Engineering Blog. https://www.anthropic.com/engineering/building-effective-agents | -| **Status** | Confirmed — corrects the belief that subagents should be "lean routing agents" | -| **Core finding** | "Define the smallest agent that can own a clear task. Add more agents only when you need separate ownership, different instructions, different tool surfaces, or different approval policies." The split criterion is ownership boundary, not instruction volume. | -| **Mechanism** | Multiple agents competing to own the same concern create authority conflicts and inconsistent tool access. The right unit is the smallest coherent domain that requires exclusive responsibility. | -| **Where used** | Agent design in `.opencode/agents/*.md` — 4 agents, each owning a distinct domain (PO, software-engineer, reviewer, setup). | - ---- - -### 22. Context Isolation via Subagents - -| | | -|---|---| -| **Source** | Anthropic. (2025). *Best practices for Claude Code*. Anthropic Documentation. https://www.anthropic.com/engineering/claude-code-best-practices | -| **Date** | 2025 | -| **Status** | Confirmed — the primary reason subagents exist is context isolation, not routing | -| **Core finding** | Subagents run in their own context windows and report back summaries, keeping the main conversation clean for implementation. Every file read in a subagent burns tokens in a child window, not the primary window. | -| **Mechanism** | Context window is the primary performance constraint for LLM agents. Investigation tasks rapidly exhaust context if done inline. Delegating to a subagent quarantines that cost; the primary agent receives only the distilled result. A fresh context in the subagent also prevents anchoring bias from prior conversation state. | -| **Where used** | OpenCode `task` tool usage in all agents; `explore` and `general` built-in subagents. | - ---- - -### 23. On-Demand Skill Loading (Context Budget) - -| | | -|---|---| -| **Source** | Anthropic. (2025). *Best practices for Claude Code*. Anthropic Documentation. https://www.anthropic.com/engineering/claude-code-best-practices | -| **Date** | 2025 | -| **Alternative** | OpenCode. (2026). *Agent Skills*. OpenCode Documentation. https://opencode.ai/docs/skills/ | -| **Status** | Confirmed (vendor guidance) — benefit on task completion quality extrapolated from RAG retrieval literature | -| **Core finding** | "CLAUDE.md is loaded every session, so only include things that apply broadly. For domain knowledge or workflows only relevant sometimes, use skills instead. Claude loads them on demand without bloating every conversation." Bloated always-loaded files cause Claude to ignore critical instructions. | -| **Mechanism** | Every token in an unconditionally-loaded file competes for attention against the task prompt. Long always-loaded files push important instructions beyond effective attention range, causing silent non-compliance. Skills are injected only when the task calls for them, preserving the primary context budget. | -| **Where used** | `AGENTS.md` carries only shared project conventions and commands; all step-specific workflows live in `.opencode/skills/*.md` and are loaded via the `skill` tool only when the relevant step begins. | - ---- - -### 24. Instruction Conflict Resolution Failure in LLMs - -| | | -|---|---| -| **Source** | Geng et al. (2025). Control Illusion: The Failure of Instruction Hierarchies in Large Language Models. AAAI-26. arXiv:2502.15851. https://arxiv.org/abs/2502.15851 | -| **Date** | 2025 | -| **Alternative** | Wallace et al. (2024). The Instruction Hierarchy: Training LLMs to Prioritize Privileged Instructions. arXiv:2404.13208. | -| **Status** | Confirmed — peer-reviewed (AAAI-26), replicated across 6 models | -| **Core finding** | LLMs do not reliably prioritize system-prompt instructions over conflicting instructions from other sources. Resolution is inconsistent and biased by pretraining-derived priors, not by prompt structure or position. | -| **Mechanism** | No structural separation between instruction sources enforces reliable priority at inference time. When the same directive appears in two locations with divergent content, the model selects between them based on statistical priors from pretraining. | -| **Where used** | Justifies single source of truth in `AGENTS.md`: workflow details duplicated across agent files and skills that drift out of sync produce conflicting instructions the model cannot resolve reliably. | - ---- - -### 25. Positional Attention Degradation in Long Contexts - -| | | -|---|---| -| **Source** | Liu et al. (2023). Lost in the Middle: How Language Models Use Long Contexts. *Transactions of the Association for Computational Linguistics*. arXiv:2307.03172. https://arxiv.org/abs/2307.03172 | -| **Date** | 2023 | -| **Alternative** | McKinnon (2025). arXiv:2511.05850 — effect attenuated for simple retrieval in Gemini 2.5+; persists for multi-hop reasoning. | -| **Status** | Confirmed with caveat — robust for multi-hop reasoning; attenuated for simple retrieval in frontier models (2025–2026) | -| **Core finding** | Performance on tasks requiring retrieval from long contexts follows a U-shaped curve: highest when relevant content is at the beginning or end of the context, degraded when content falls in the middle. | -| **Mechanism** | Transformer attention is not uniform across token positions. Content placed in the middle of a long context receives less attention weight regardless of its relevance. | -| **Where used** | Supports keeping always-loaded files lean. Duplicated workflow detail in always-loaded files increases total context length, pushing other content into lower-attention positions. | - ---- - -### 26. Modular Prompt De-duplication Reduces Interference - -| | | -|---|---| -| **Source** | Sharma & Henley (2026). Modular Prompt Optimization. arXiv:2601.04055. https://arxiv.org/abs/2601.04055 | -| **Date** | 2026 | -| **Status** | Partially confirmed — single-agent reasoning benchmarks only; not tested on multi-file agent architectures | -| **Core finding** | Structured prompts with explicit section de-duplication outperform both monolithic prompts and unstructured modular prompts. The mechanism cited is "reducing redundancy and interference between components." | -| **Mechanism** | Redundant content across prompt sections creates competing attention targets. De-duplication concentrates relevant signal in one canonical location per concern. | -| **Where used** | Supports the rule that skills and agent routing files contain no duplication of `AGENTS.md` content or of each other. | - ---- - -### 27. Agent File Architecture — Three-File Separation - -| | | -|---|---| -| **Source** | Convergence of entries 23, 24, 25, 26. | -| **Date** | — | -| **Status** | Inferred — no direct A/B test of this architecture exists; supported by convergence of confirmed and partially confirmed findings above | -| **Core finding** | Three distinct failure modes (instruction conflict on drift, positional attention degradation, redundancy interference) converge to produce a three-file split with defined content rules for each. | -| **Mechanism** | Each file runs at a different time and serves a different purpose. Mixing concerns across files reintroduces the failure modes the split is designed to prevent. | -| **Where used** | Structural rule for `AGENTS.md`, `.opencode/agents/*.md`, and `.opencode/skills/*.md`. | - -| File | Runs when | Contains | Does NOT contain | -|---|---|---|---| -| `AGENTS.md` | Every session, always loaded | Project conventions, shared commands, formats, standards | Step procedures, role-specific rules, path specs | -| `.opencode/agents/*.md` | When that role is invoked | Role identity, step ownership, skill load instructions, tool permissions, escalation paths | Workflow details, principle lists, path specs, commit formats | -| `.opencode/skills/*.md` | On demand, when that step begins | Full procedural instructions for that step, self-contained | Duplication of `AGENTS.md` content or other skills | - ---- - -## Bibliography - -1. Anthropic. (2024). Building effective agents. https://www.anthropic.com/engineering/building-effective-agents -2. Anthropic. (2025). Best practices for Claude Code. https://www.anthropic.com/engineering/claude-code-best-practices -3. Geng et al. (2025). Control Illusion. AAAI-26. arXiv:2502.15851. https://arxiv.org/abs/2502.15851 -4. Liu, N. F. et al. (2023). Lost in the Middle. *TACL*. arXiv:2307.03172. https://arxiv.org/abs/2307.03172 -5. McKinnon, R. (2025). arXiv:2511.05850. https://arxiv.org/abs/2511.05850 -6. OpenAI. (2024). Agent definitions. https://platform.openai.com/docs/guides/agents/define-agents -7. OpenCode. (2026). Agent Skills. https://opencode.ai/docs/skills/ -8. Sharma, A., & Henley, A. (2026). Modular Prompt Optimization. arXiv:2601.04055. https://arxiv.org/abs/2601.04055 -9. Wallace, E. et al. (2024). The Instruction Hierarchy. arXiv:2404.13208. diff --git a/docs/scientific-research/architecture.md b/docs/scientific-research/architecture.md deleted file mode 100644 index 8cf3a9d..0000000 --- a/docs/scientific-research/architecture.md +++ /dev/null @@ -1,86 +0,0 @@ -# Scientific Research — Architecture - -Foundations for the architectural decisions and patterns used in this template. - ---- - -### 42. Hexagonal Architecture — Ports and Adapters - -| | | -|---|---| -| **Source** | Cockburn, A. (2005). "Hexagonal Architecture." *alistair.cockburn.us*. https://alistair.cockburn.us/hexagonal-architecture/ | -| **Date** | 2005 | -| **Alternative** | Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley. (Chapter 7: "Ports and Adapters") | -| **Status** | Confirmed — foundational; widely adopted as Clean Architecture, Onion Architecture | -| **Core finding** | The application domain should have no knowledge of external systems (databases, filesystems, network, UI). All contact between the domain and the outside world passes through a **port** (an interface / Protocol) and an **adapter** (a concrete implementation of that port). The domain is independently testable without any infrastructure. The key structural rule: dependency arrows point inward — domain code never imports from adapters; adapters import from domain. | -| **Mechanism** | Two distinct sides of any application: the "driving side" (actors who initiate action — tests, UI, CLI) and the "driven side" (actors the application drives — databases, filesystems, external services). Each driven-side dependency is hidden behind a port. Tests supply a test adapter; production supplies a real adapter. Substituting adapters requires no domain code changes. This is SOLID-D at the architectural layer. | -| **Where used** | Step 2 (Architecture): if an external dependency is identified during domain analysis, assign it a Protocol. `ports/` and `adapters/` folders emerge when a concrete dependency is confirmed — do not pre-create them. The dependency-inversion principle (SOLID-D) is the goal; the folder names are convention, not law. | - ---- - -### 55. Architecture Decision Records (ADRs) - -| | | -|---|---| -| **Source** | Nygard, M. T. (2011). "Documenting Architecture Decisions." *cognitect.com*. https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions | -| **Date** | 2011 | -| **Alternative** | Keeling, M. (2017). *Design It!: From Programmer to Software Architect*. Pragmatic Bookshelf. (Chapter 6: "Architectural Decisions") | -| **Status** | Confirmed — widely adopted industry standard; tooled by adr-tools, ADR Manager, Log4Brains | -| **Core finding** | Architectural decisions should be recorded as short, immutable documents capturing: what was decided, why, and what alternatives were rejected. Without this record, decisions get re-litigated by every new developer (or AI agent) who encounters the codebase, producing rework and re-divergence. | -| **Mechanism** | An ADR is written at decision time, never edited afterward. If the decision changes, a new ADR is written that supersedes the old one. The append-only record becomes a reliable audit trail. The constraint "one sentence per field" forces clarity — if you can't state the reason in one sentence, the decision is not yet understood. | -| **Where used** | `docs/architecture/architecture.md` (ADR template). SE appends one block per non-obvious decision after Step 2. The `living-docs` skill reads ADRs as input for C4 diagram annotations. | - ---- - -### 56. The 4+1 View Model of Architecture - -| | | -|---|---| -| **Source** | Kruchten, P. B. (1995). "The 4+1 View Model of Architecture." *IEEE Software*, 12(6), 42–50. https://doi.org/10.1109/52.469759 | -| **Date** | 1995 | -| **Alternative** | Bass, L., Clements, P., & Kazman, R. (2021). *Software Architecture in Practice* (4th ed.). Addison-Wesley. | -| **Status** | Confirmed — 3,000+ citations; foundational IEEE reference for architectural documentation | -| **Core finding** | A single architectural diagram cannot communicate all relevant aspects of a system. Four distinct views are required: **Logical** (domain objects and relationships), **Process** (runtime behavior and concurrency), **Development** (module organisation and dependencies), **Physical** (deployment topology). A fifth **Scenarios** view (use cases) ties the four together by showing how each scenario exercises each view. | -| **Mechanism** | Different stakeholders need different views: a developer needs the Development view; an operator needs the Physical view; a domain expert needs the Logical view. Conflating views into one diagram produces a cluttered diagram that satisfies nobody. The 4+1 model assigns each concern to its appropriate view and cross-validates them through scenarios. | -| **Where used** | Theoretical foundation for the C4 model (entry 57). The `living-docs` skill generates C4 diagrams that map to: Context diagram (Scenarios view), Container diagram (Physical + Development views), Component diagram (Logical + Development views). | - ---- - -### 57. The C4 Model for Software Architecture - -| | | -|---|---| -| **Source** | Brown, S. (2018). *The C4 Model for Software Architecture*. Leanpub. https://c4model.com | -| **Date** | 2018 (ongoing) | -| **Alternative** | Brown, S. (2023). "The C4 model for visualising software architecture." *InfoQ*. | -| **Status** | Confirmed — widely adopted; tooled by Structurizr, PlantUML C4, Mermaid C4 | -| **Core finding** | Software architecture can be communicated at four zoom levels: **Level 1 — System Context** (who uses the system and what external systems it talks to), **Level 2 — Container** (major runnable/deployable units), **Level 3 — Component** (major structural building blocks within a container), **Level 4 — Code** (classes, interfaces; usually auto-generated). Each level answers a specific question; mixing levels in one diagram creates confusion. | -| **Mechanism** | C4 operationalises the 4+1 View Model (entry 56) into a lightweight notation that can be expressed in text (PlantUML, Mermaid) and version-controlled alongside code. The notation is deliberately constrained: boxes (people, systems, containers, components) and unidirectional arrows with labels. No UML formalism required. Context + Container diagrams cover >90% of communication needs for most teams. | -| **Where used** | The `living-docs` skill generates and updates C4 diagrams in `docs/c4/`. Context diagram (L1) always generated; Container (L2) generated when multiple containers are identified; Component (L3) generated on demand. Source files are Mermaid so they render in GitHub and are version-controlled. | - ---- - -### 58. Information Hiding — Module Decomposition Criterion - -| | | -|---|---| -| **Source** | Parnas, D. L. (1972). "On the criteria to be used in decomposing systems into modules." *Communications of the ACM*, 15(12), 1053–1058. https://doi.org/10.1145/361598.361623 | -| **Date** | 1972 | -| **Alternative** | Parnas, D. L. (1974). "On a 'buzzword': Hierarchical structure." *Proc. IFIP Congress 74*, 336–339. | -| **Status** | Confirmed — 4,000+ citations; foundational criterion for all modular decomposition in software engineering | -| **Core finding** | The correct criterion for decomposing a system into modules is **information hiding**: each module hides a design decision that is likely to change. A module's interface reveals only what callers need; its implementation hides how. Decomposing by execution steps (procedure-based) creates tight coupling to implementation order; decomposing by change-prone decisions (information-hiding) allows each decision to be changed independently. | -| **Mechanism** | Identify which decisions are most likely to change (data structures, algorithms, I/O formats, external service protocols). Each such decision becomes a module boundary. The module's public interface is defined to be change-stable; the implementation is change-free from the caller's perspective. This is the theoretical basis for SOLID-D (depend on abstractions), Hexagonal Architecture (hide external decisions behind ports), and DDD bounded contexts (hide language decisions behind context boundaries). | -| **Where used** | Step 2 Architecture: bounded context check ("same word, different meaning across features? → module boundary") and external dep Protocol assignment both apply the information-hiding criterion. The `living-docs` skill uses module boundaries as container/component boundaries in `docs/c4/` diagrams. | - ---- - -## Bibliography - -1. Bass, L., Clements, P., & Kazman, R. (2021). *Software Architecture in Practice* (4th ed.). Addison-Wesley. -2. Brown, S. (2018). *The C4 Model for Software Architecture*. Leanpub. https://c4model.com -3. Cockburn, A. (2005). Hexagonal Architecture. *alistair.cockburn.us*. https://alistair.cockburn.us/hexagonal-architecture/ -4. Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley. -5. Keeling, M. (2017). *Design It!: From Programmer to Software Architect*. Pragmatic Bookshelf. -6. Kruchten, P. B. (1995). The 4+1 View Model of Architecture. *IEEE Software*, 12(6), 42–50. https://doi.org/10.1109/52.469759 -7. Nygard, M. T. (2011). Documenting Architecture Decisions. *cognitect.com*. https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions -8. Parnas, D. L. (1972). On the criteria to be used in decomposing systems into modules. *CACM*, 15(12), 1053–1058. https://doi.org/10.1145/361598.361623 diff --git a/docs/scientific-research/cognitive-science.md b/docs/scientific-research/cognitive-science.md deleted file mode 100644 index dad8e2b..0000000 --- a/docs/scientific-research/cognitive-science.md +++ /dev/null @@ -1,150 +0,0 @@ -# Scientific Research — Cognitive Science - -Mechanisms from cognitive and social psychology that justify workflow design decisions in this template. - ---- - -### 1. Pre-mortem (Prospective Hindsight) - -| | | -|---|---| -| **Source** | Klein, G. (1998). *Sources of Power: How People Make Decisions*. MIT Press. | -| **Date** | 1998 | -| **Status** | Confirmed | -| **Core finding** | Asking "imagine this failed — why?" catches 30% more issues than forward-looking review. | -| **Mechanism** | Prospective hindsight shifts from prediction (weak) to explanation (strong). The brain is better at explaining past events than predicting future ones. By framing as "it already failed," you activate explanation mode. | -| **Where used** | PO pre-mortem at scope, software-engineer pre-mortem before handoff. | - ---- - -### 2. Implementation Intentions - -| | | -|---|---| -| **Source** | Gollwitzer, P. M. (1999). Implementation intentions: Strong effects of simple planning aids. *American Journal of Preventive Medicine*, 16(4), 257–276. | -| **Date** | 1999 | -| **Status** | Confirmed | -| **Core finding** | "If X then Y" plans are 2–3x more likely to execute than general intentions. | -| **Mechanism** | If-then plans create automatic cue-response links in memory. The brain processes "if function > 20 lines then extract helper" as an action trigger, not a suggestion to consider. | -| **Where used** | Refactor Self-Check Gates in `implementation/SKILL.md`, Code Quality checks in `verify/SKILL.md`. | - ---- - -### 3. Commitment Devices - -| | | -|---|---| -| **Source** | Cialdini, R. B. (2001). *Influence: The Psychology of Persuasion* (rev. ed.). HarperBusiness. | -| **Date** | 2001 | -| **Status** | Confirmed | -| **Core finding** | Forcing an explicit micro-commitment (filling in a PASS/FAIL cell) creates resistance to reversals. A checkbox checked is harder to uncheck than a todo noted. | -| **Mechanism** | Structured tables with PASS/FAIL cells create commitment-device effects. The act of marking "FAIL" requires justification, making silent passes psychologically costly. | -| **Where used** | SOLID enforcement table, ObjCal enforcement table, Design Patterns table — all require explicit PASS/FAIL with evidence. | - ---- - -### 4. System 2 Before System 1 - -| | | -|---|---| -| **Source** | Kahneman, D. (2011). *Thinking, Fast and Slow*. Farrar, Straus and Giroux. | -| **Date** | 2011 | -| **Status** | Confirmed | -| **Core finding** | System 1 (fast, automatic) is vulnerable to anchoring and confirmation bias. System 2 (slow, deliberate) must be activated before System 1's judgments anchor. | -| **Mechanism** | Running semantic review *before* automated commands prevents the "all green" dopamine hit from anchoring the reviewer's judgment. Doing hard cognitive work first protects against System 1 shortcuts. | -| **Where used** | Verification order in `verify/SKILL.md`: semantic alignment check before commands. | - ---- - -### 5. Adversarial Collaboration - -| | | -|---|---| -| **Source** | Mellers, B. A., Hertwig, R., & Kahneman, D. (2001). Do frequency representations eliminate cooperative bias? *Psychological Review*, 108(4), 709–735. | -| **Date** | 2001 | -| **Status** | Confirmed | -| **Core finding** | Highest-quality thinking emerges when parties hold different hypotheses and are charged with finding flaws in each other's reasoning. | -| **Mechanism** | Explicitly framing the reviewer as "your job is to break this feature" activates the adversarial collaboration mode. The reviewer seeks disconfirmation rather than confirmation. | -| **Where used** | Adversarial mandate in `reviewer.md` and `verify/SKILL.md`. | - ---- - -### 6. Accountability to Unknown Audience - -| | | -|---|---| -| **Source** | Tetlock, P. E. (1983). Accountability: A social determinant of judgment. In *Psychology of Learning and Motivation* (Vol. 17, pp. 295–332). Academic Press. | -| **Date** | 1983 | -| **Status** | Confirmed | -| **Core finding** | Accountability to an unknown audience with unknown views improves reasoning quality. The agent anticipates being audited and adjusts reasoning. | -| **Mechanism** | The explicit report format (APPROVED/REJECTED with evidence) creates an accountability structure — the reviewer's reasoning will be read by the PO. | -| **Where used** | Report format in `verify/SKILL.md`, structured evidence columns in all enforcement tables. | - ---- - -### 7. Chunking and Cognitive Load Reduction - -| | | -|---|---| -| **Source** | Miller, G. A. (1956). The magical number seven, plus or minus two. *Psychological Review*, 63(2), 81–97. | -| **Date** | 1956 | -| **Alternative** | Sweller, J. (1988). Cognitive load during problem solving. *Cognitive Science*, 12(2), 257–285. | -| **Status** | Confirmed | -| **Core finding** | Structured tables reduce working memory load vs. narrative text. Chunking related items into table rows enables parallel processing. | -| **Mechanism** | Replacing prose checklists with structured tables (rows × columns) allows the reviewer to process all items in a single pass. | -| **Where used** | All enforcement tables in `verify/SKILL.md` and `reviewer.md`. | - ---- - -### 8. Elaborative Encoding - -| | | -|---|---| -| **Source** | Craik, F. I. M., & Lockhart, R. S. (1972). Levels of processing: A framework for memory research. *Journal of Verbal Learning and Verbal Behavior*, 11(6), 671–684. | -| **Date** | 1972 | -| **Status** | Confirmed | -| **Core finding** | Deeper processing — explaining *why* a rule matters — leads to better retention and application than shallow processing. | -| **Mechanism** | Adding a "Why it matters" column to enforcement tables forces the reviewer to process the rationale, not just scan the rule name. | -| **Where used** | SOLID table, ObjCal table, Design Patterns table — all have "Why it matters" column. | - ---- - -### 9. Error-Specific Feedback - -| | | -|---|---| -| **Source** | Hattie, J., & Timperley, H. (2007). The power of feedback. *Review of Educational Research*, 77(1), 81–112. | -| **Date** | 2007 | -| **Status** | Confirmed | -| **Core finding** | Feedback is most effective when it tells the agent exactly what went wrong and what the correct action is. "FAIL: function > 20 lines at file:47" is actionable; "Apply function length rules" is not. | -| **Mechanism** | The evidence column in enforcement tables requires specific file:line references, turning vague rules into actionable directives. | -| **Where used** | Evidence column in all enforcement tables. | - ---- - -### 10. Prospective Memory Cues - -| | | -|---|---| -| **Source** | McDaniel, M. A., & Einstein, G. O. (2000). Strategic and automatic processes in prospective memory retrieval. *Applied Cognitive Psychology*, 14(7), S127–S144. | -| **Date** | 2000 | -| **Status** | Confirmed | -| **Core finding** | Memory for intended actions is better when cues are embedded at the point of action, not in a separate appendix. | -| **Mechanism** | Placing if-then gates inline (in the REFACTOR section) rather than in a separate "reference" document increases adherence. The cue appears exactly when the developer is about to make the relevant decision. | -| **Where used** | Refactor Self-Check Gates embedded inline in `refactor/SKILL.md`. | - ---- - -## Bibliography - -1. Cialdini, R. B. (2001). *Influence: The Psychology of Persuasion* (rev. ed.). HarperBusiness. -2. Craik, F. I. M., & Lockhart, R. S. (1972). Levels of processing: A framework for memory research. *Journal of Verbal Learning and Verbal Behavior*, 11(6), 671–684. -3. Gollwitzer, P. M. (1999). Implementation intentions. *American Journal of Preventive Medicine*, 16(4), 257–276. -4. Hattie, J., & Timperley, H. (2007). The power of feedback. *Review of Educational Research*, 77(1), 81–112. -5. Kahneman, D. (2011). *Thinking, Fast and Slow*. Farrar, Straus and Giroux. -6. Klein, G. (1998). *Sources of Power: How People Make Decisions*. MIT Press. -7. McDaniel, M. A., & Einstein, G. O. (2000). Strategic and automatic processes in prospective memory retrieval. *Applied Cognitive Psychology*, 14(7), S127–S144. -8. Mellers, B. A., Hertwig, R., & Kahneman, D. (2001). Do frequency representations eliminate cooperative bias? *Psychological Review*, 108(4), 709–735. -9. Miller, G. A. (1956). The magical number seven, plus or minus two. *Psychological Review*, 63(2), 81–97. -10. Sweller, J. (1988). Cognitive load during problem solving. *Cognitive Science*, 12(2), 257–285. -11. Tetlock, P. E. (1983). Accountability: A social determinant of judgment. In *Psychology of Learning and Motivation* (Vol. 17). Academic Press. diff --git a/docs/scientific-research/documentation.md b/docs/scientific-research/documentation.md deleted file mode 100644 index 9c77a00..0000000 --- a/docs/scientific-research/documentation.md +++ /dev/null @@ -1,69 +0,0 @@ -# Scientific Research — Documentation - -Foundations for living documentation, docs-as-code, information architecture, and post-mortem practices used in this template. - ---- - -### 59. Information Needs in Collocated Software Development Teams - -| | | -|---|---| -| **Source** | Ko, A. J., DeLine, R., & Venolia, G. (2007). "Information Needs in Collocated Software Development Teams." *Proc. 29th International Conference on Software Engineering (ICSE 2007)*, pp. 344–353. IEEE. https://doi.org/10.1109/ICSE.2007.45 | -| **Date** | 2007 | -| **Alternative** | Dagenais, B., & Robillard, M. P. (2010). "Creating and evolving developer documentation." *Proc. FSE 2010*, pp. 127–136. ACM. | -| **Status** | Confirmed — empirical study; 600+ citations | -| **Core finding** | Developers spend 35–50% of their working time not writing code but searching for information — navigating code, reading past decisions, and understanding relationships between components. The most frequently sought information is: who wrote this, why was it written this way, and what does this module depend on. Direct questioning of teammates is the most common fallback when documentation is absent, creating serial bottlenecks. | -| **Mechanism** | Information seeking is triggered by a task, not by curiosity. A developer encountering an unfamiliar component has a specific decision to make. When documentation is absent, the seek-ask-wait loop (find the right person, ask, wait for a response) dominates time. Persistent documentation (ADRs, architecture diagrams, glossary) short-circuits this loop by making the answer findable without a human intermediary. | -| **Where used** | Justifies the full `living-docs` skill: C4 diagrams answer "what does this module depend on?"; the ADR record answers "why was it written this way?"; the living glossary answers "what does this term mean in this context?". Collectively these eliminate the three most frequent information needs identified by Ko et al. | - ---- - -### 60. Software Engineering at Google — Documentation Chapter - -| | | -|---|---| -| **Source** | Winters, T., Manshreck, T., & Wright, H. (2020). *Software Engineering at Google: Lessons Learned from Programming Over Time*. O'Reilly. Chapter 10: "Documentation." https://abseil.io/resources/swe-book/html/ch10.html | -| **Date** | 2020 | -| **Alternative** | Fitzpatrick, B., & Collins-Sussman, B. (2012). *Team Geek*. O'Reilly. | -| **Status** | Confirmed — large-scale industry evidence from a codebase with ~2 billion lines of code | -| **Core finding** | Documentation that lives outside the code repository decays at a rate proportional to how often the code changes — because there is no mechanism that forces the doc to be updated when the code changes. Docs-as-code (documentation in the same repo, reviewed in the same PRs, tested in the same CI pipeline) dramatically reduces divergence because the cost of updating the doc is incurred at the same moment as the cost of the code change. | -| **Mechanism** | Google's g3doc system co-locates docs with the code they describe. When a PR changes `payments/service.py`, the reviewer also sees `payments/README.md` in the diff and can flag staleness immediately. At scale, Google found that docs with no co-located tests or CI checks become stale within 3–6 months regardless of team discipline. | -| **Where used** | Justifies co-locating `docs/` within the project repository. Living docs (`docs/architecture/c4/`, `docs/glossary.md`) are updated in the same commits as the code they describe. The `living-docs` skill is the mechanism that enforces this — it runs after Step 5 to regenerate diagrams from the current state of the codebase and discovery docs. | - ---- - -### 61. Diátaxis — A Systematic Framework for Technical Documentation - -| | | -|---|---| -| **Source** | Procida, D. (2021). "Diátaxis — A systematic approach to technical documentation." *diataxis.fr*. https://diataxis.fr | -| **Date** | 2021 | -| **Status** | Confirmed — adopted by Django, NumPy, Gatsby, Cloudflare, and the Python Software Foundation | -| **Core finding** | Technical documentation fails because it conflates four fundamentally different needs into a single undifferentiated text. The four types are: **Tutorials** (learning-oriented; guides a beginner through a complete task), **How-to guides** (task-oriented; solves a specific problem for a practitioner), **Reference** (information-oriented; describes the system accurately and completely), **Explanation** (understanding-oriented; discusses concepts and decisions). Each type has a different audience mental state and requires a different writing mode. Mixing them degrades all four. | -| **Mechanism** | The two axes of Diátaxis are: **practical ↔ theoretical** (tutorials and how-to guides are practical; reference and explanation are theoretical) and **acquiring ↔ applying** (tutorials and explanation are for acquiring knowledge; how-to guides and reference are for applying it). A document that tries to be both a tutorial and a reference simultaneously will be a poor tutorial (too much information) and a poor reference (not structured for lookup). | -| **Where used** | Documentation structure in this template maps to Diátaxis: `README.md` = tutorial (getting started), `AGENTS.md` = reference (complete description of roles, skills, commands) and explanation (why the workflow exists), `docs/c4/` = reference (system structure), post-mortems = explanation (why decisions were made). The `living-docs` skill produces reference-type documentation (C4 diagrams, glossary) — not tutorials. | - ---- - -### 62. Blameless Post-Mortems and a Just Culture - -| | | -|---|---| -| **Source** | Allspaw, J. (2012). "Blameless PostMortems and a Just Culture." *code.etsy.com* (archived). https://www.etsy.com/codeascraft/blameless-postmortems/ | -| **Date** | 2012 | -| **Alternative** | Dekker, S. (2006). *The Field Guide to Understanding Human Error*. Ashgate. | -| **Status** | Confirmed — foundational DevOps/SRE practice; referenced in Google SRE Book (2016) | -| **Core finding** | Post-mortems that assign blame produce less information and lower long-term system reliability than blameless post-mortems. When individuals believe they will be blamed, they withhold information about contributing factors, preventing the systemic causes from being identified and fixed. A blameless post-mortem treats the incident as a system failure, not an individual failure — asking "what conditions allowed this to happen?" not "who caused this?" | -| **Mechanism** | Allspaw's model separates two questions: (1) what happened? (factual, blameless) and (2) what changes would prevent recurrence? (systemic). The post-mortem document records both. The output is not an individual's performance review but a list of system changes — process improvements, documentation gaps, tooling additions. Etsy's incident rate fell after adopting blameless post-mortems because engineers began reporting near-misses that they previously concealed. | -| **Where used** | `docs/post-mortem/` directory. Post-mortems in this template follow the blameless model: they report workflow gaps found, not who made the mistake. The output of each post-mortem is a list of improvements to skills, agents, or workflow documentation. The `living-docs` skill is one such improvement — it emerged from the discovery that architecture and glossary documentation were falling behind the codebase. | - ---- - -## Bibliography - -1. Allspaw, J. (2012). Blameless PostMortems and a Just Culture. *code.etsy.com*. https://www.etsy.com/codeascraft/blameless-postmortems/ -2. Dagenais, B., & Robillard, M. P. (2010). Creating and evolving developer documentation. *Proc. FSE 2010*, pp. 127–136. ACM. -3. Dekker, S. (2006). *The Field Guide to Understanding Human Error*. Ashgate. -4. Ko, A. J., DeLine, R., & Venolia, G. (2007). Information Needs in Collocated Software Development Teams. *Proc. ICSE 2007*, pp. 344–353. https://doi.org/10.1109/ICSE.2007.45 -5. Procida, D. (2021). Diátaxis — A systematic approach to technical documentation. *diataxis.fr*. https://diataxis.fr -6. Winters, T., Manshreck, T., & Wright, H. (2020). *Software Engineering at Google*. O'Reilly. Chapter 10. https://abseil.io/resources/swe-book/html/ch10.html diff --git a/docs/scientific-research/domain-modeling.md b/docs/scientific-research/domain-modeling.md deleted file mode 100644 index eb9143e..0000000 --- a/docs/scientific-research/domain-modeling.md +++ /dev/null @@ -1,115 +0,0 @@ -# Scientific Research — Domain Modeling - -Foundations for bounded context identification, ubiquitous language, and feature decomposition used in this template. - ---- - -### 31. Domain-Driven Design — Bounded Contexts and Feature Identification - -| | | -|---|---| -| **Source** | Evans, E. (2003). *Domain-Driven Design: Tackling Complexity in the Heart of Software*. Addison-Wesley. | -| **Date** | 2003 | -| **Alternative** | Context Mapper (2025). Rapid Object-Oriented Analysis and Design. https://contextmapper.org/docs/rapid-ooad | -| **Status** | Confirmed — foundational DDD literature | -| **Core finding** | A Bounded Context is a boundary within which a particular ubiquitous language is consistent. Features are identified by grouping related user stories that share the same language. The decomposition criterion is "single responsibility per context" + "consistency of language." | -| **Mechanism** | In DDD: (1) Extract ubiquitous language from requirements → (2) Group by language consistency → (3) Each group is a candidate bounded context → (4) Each bounded context maps to a feature. Context Mapper automates this: User Stories → Subdomains (via noun/verb extraction) → Bounded Contexts of type FEATURE. | -| **Where used** | Stage 1 Discovery: after session synthesis, verify each feature has consistent language. Noun/verb extraction from discovery answers builds the Domain Model in `docs/discovery.md`. The `Rules (Business):` section in `.feature` files captures the ubiquitous language rules that govern each feature. | - ---- - -### 63. DDD Reference — Pattern Summaries (CC-BY) - -| | | -|---|---| -| **Source** | Evans, E. (2015). *DDD Reference: Definitions and Pattern Summaries*. domainlanguage.com. https://www.domainlanguage.com/ddd/reference/ | -| **Date** | 2015 | -| **Alternative** | Evans, E. (2003). *Domain-Driven Design*. Addison-Wesley. (full book; entry #31) | -| **Status** | Confirmed — freely available CC-BY canonical summary; maintained by Evans personally | -| **Core finding** | The open-access pattern summary of all DDD patterns from the 2003 book. More precisely citable than the book for specific pattern definitions. Key patterns: Ubiquitous Language ("Use the model as the backbone of a language. Commit the team to exercising that language relentlessly in all communication within the team and in the code."), Bounded Context, Context Map, Domain Events, Aggregates, Repositories. | -| **Mechanism** | Each pattern is described with: intent, prescription, and "therefore" consequences. The Ubiquitous Language pattern prescribes: use the same terms in diagrams, writing, and especially speech. Refactor the code when the language changes. Resolve confusion over terms in conversation, the way confusion over ordinary words is resolved — by agreement and precision. | -| **Where used** | Primary reference for `docs/discovery.md` Domain Model structure and the ubiquitous language practice. `living-docs` skill glossary entries derive from this: terms must match code identifiers (Evans' "use the same language in code" prescription). `docs/scientific-research/domain-modeling.md`. | -| **Note** | Supersedes entry #31 as the citable source for specific pattern quotes. Entry #31 remains as the book reference. Use this entry when citing a specific Evans pattern definition. | - ---- - -### 64. UbiquitousLanguage — Fowler Bliki - -| | | -|---|---| -| **Source** | Fowler, M. (2006). "UbiquitousLanguage." *martinfowler.com*. https://martinfowler.com/bliki/UbiquitousLanguage.html | -| **Date** | 2006 | -| **Alternative** | Evans (2015) DDD Reference (entry #63) — the primary source Fowler summarises | -| **Status** | Confirmed — widely cited secondary source; Fowler wrote the DDD foreword and is considered the authoritative secondary interpreter of Evans | -| **Core finding** | The ubiquitous language is a practice, not a document. The glossary is a secondary artifact — a snapshot of the current state of the language. The language itself lives in conversation, in the code, and in all written communication. "By using the model-based language pervasively and not being satisfied until it flows, we approach a model that is complete and comprehensible." Domain experts must object to inadequate terms; developers must flag ambiguity. | -| **Mechanism** | The key test of a ubiquitous language: can a domain expert read the domain layer code and recognize their domain? If the code uses different names than the glossary, the code must be refactored — not the glossary relaxed. The language evolves through experimentation with alternative expressions, followed by code refactoring to match the new model. | -| **Where used** | `living-docs` skill — grounds the rule "verify each term matches the identifier used in the code's domain layer." `docs/glossary.md` — the glossary is explicitly secondary to the code. `docs/scientific-research/domain-modeling.md`. | - ---- - -### 65. BoundedContext — Fowler Bliki - -| | | -|---|---| -| **Source** | Fowler, M. (2014). "BoundedContext." *martinfowler.com*. https://martinfowler.com/bliki/BoundedContext.html | -| **Date** | 2014 | -| **Alternative** | Evans (2015) DDD Reference (entry #63) — Fowler cites Evans directly | -| **Status** | Confirmed — includes a direct Evans quote; the canonical accessible reference for Bounded Context as a design pattern | -| **Core finding** | "Total unification of the domain model for a large system will not be feasible or cost-effective" (Evans, quoted directly). The same word can mean different things in different Bounded Contexts — this is not a defect but a reflection of domain reality. "You need a different model when the language changes." A Bounded Context is the boundary within which a particular ubiquitous language is internally consistent. Terms must be qualified by their context when a project has more than one bounded context. | -| **Mechanism** | Fowler's electricity utility example: the word "meter" meant different things in billing, grid management, and customer service. Attempting to unify these into one definition created confusion. Each bounded context maintains its own model and its own language. Context Maps document the relationships and translation rules between bounded contexts. | -| **Where used** | `living-docs` skill — `**Bounded context:**` field in `docs/glossary.md` entries is mandatory when the project has more than one bounded context (this is the Evans/Fowler requirement). `docs/scientific-research/domain-modeling.md`. | - ---- - -### 66. Implementing Domain-Driven Design - -| | | -|---|---| -| **Source** | Vernon, V. (2013). *Implementing Domain-Driven Design*. Addison-Wesley. | -| **Date** | 2013 | -| **Alternative** | Evans (2003) DDD (entry #31) — Vernon explicitly builds on Evans | -| **Status** | Confirmed — second most cited DDD book; ~5,000 citations | -| **Core finding** | Three additions to Evans: (1) **Domain Events as first-class vocabulary** — past-tense verb phrases ("OrderPlaced," "VersionDisplayed") are part of the ubiquitous language and belong in the glossary as a distinct type. (2) **Context Maps as the organizing principle** for multi-context glossaries — each bounded context has its own language documentation; the Context Map shows translation rules between contexts. (3) **Documentation co-located with the code** — docs in the same repository decay at the same rate as the code, dramatically reducing divergence. | -| **Mechanism** | Vernon's IDDD samples (github.com/VaughnVernon/IDDD_Samples) demonstrate all three in practice. The Product Owner / Business Analyst plays the domain-expert-representative role in glossary maintenance — validating semantic correctness — while developers own structural precision. Neither writes the glossary unilaterally. | -| **Where used** | `living-docs` skill — `Domain Event` added as a distinct Type value in `docs/glossary.md` entries. Grounds the PO-owned glossary with SE input via `docs/architecture.md` Reason: fields. `docs/scientific-research/domain-modeling.md`. | - ---- - -### 67. Ubiquitous Language Is Not a Glossary — Verraes - -| | | -|---|---| -| **Source** | Verraes, M. (2013). "Ubiquitous Language Is Not a Glossary." *verraes.net*. https://web.archive.org/web/20131004/https://verraes.net/2013/04/ubiquitous-language-is-not-a-glossary/ | -| **Date** | 2013 | -| **Alternative** | Fowler (2006) UbiquitousLanguage (entry #64) — the same secondary-artifact point, less pointed | -| **Status** | Confirmed — original URL is 404; widely documented through community discussion and practitioner secondary accounts; thesis is uncontested in the DDD community | -| **Core finding** | A glossary is not a ubiquitous language. Teams that maintain a glossary but do not reflect its terms in the code have the *appearance* of a ubiquitous language without the substance. The glossary is a secondary artifact derived from the code and domain-expert conversations — not the reverse. The canonical source of truth is the domain layer code, not the glossary document. A glossary that diverges from the code is lying. | -| **Mechanism** | The test: can a domain expert read the domain layer code and recognize their domain without a translator? If yes, the ubiquitous language exists. If the only evidence of the language is the glossary document, it does not exist. Consequence: every term added to the glossary must be verified against the corresponding code identifier. | -| **Where used** | `living-docs` skill — grounds the checklist item "Verify each term matches the identifier used in the code's domain layer." Prevents the common failure mode of glossary-as-theatre. `docs/scientific-research/domain-modeling.md`. | - ---- - -### 68. Whirlpool Process of Model Exploration — Evans - -| | | -|---|---| -| **Source** | Evans, E. (2011). *Whirlpool Process of Model Exploration*. domainlanguage.com. https://www.domainlanguage.com/ddd/whirlpool/ | -| **Date** | 2011 | -| **Alternative** | Brandolini, A. (2013). *Introducing EventStorming*. Leanpub. — a later, more structured alternative to Whirlpool | -| **Status** | Confirmed — freely available; Evans' own post-2003 process guidance | -| **Core finding** | Model exploration is a cycle: Scenario Exploring → Harvesting Abstractions → Probing the Model → Challenging the Model → back to Scenario Exploring. New vocabulary crystallizes at the Harvesting Abstractions step — concrete scenarios surface candidate terms, which are then named, defined, and reflected in the code. The glossary grows at each Harvesting Abstractions step. | -| **Mechanism** | The Whirlpool is not a development process — it fits within most iterative processes. It is a model-exploration subprocess triggered whenever the team encounters a poorly understood domain concept. The output of each cycle is a refined model expressed in clearer language, with updated code identifiers and glossary entries. | -| **Where used** | `living-docs` skill — grounds the timing of glossary updates: after each completed feature (Step 5) corresponds to the Harvesting Abstractions step in the Whirlpool. Discovery sessions (Stage 1) correspond to Scenario Exploring. `docs/scientific-research/domain-modeling.md`. | - ---- - -## Bibliography - -1. Context Mapper. (2025). Rapid Object-Oriented Analysis and Design. https://contextmapper.org/docs/rapid-ooad -2. Evans, E. (2003). *Domain-Driven Design: Tackling Complexity in the Heart of Software*. Addison-Wesley. -3. Evans, E. (2011). *Whirlpool Process of Model Exploration*. domainlanguage.com. https://www.domainlanguage.com/ddd/whirlpool/ -4. Evans, E. (2015). *DDD Reference: Definitions and Pattern Summaries* (CC-BY). domainlanguage.com. https://www.domainlanguage.com/ddd/reference/ -5. Fowler, M. (2006). UbiquitousLanguage. martinfowler.com. https://martinfowler.com/bliki/UbiquitousLanguage.html -6. Fowler, M. (2014). BoundedContext. martinfowler.com. https://martinfowler.com/bliki/BoundedContext.html -7. Vernon, V. (2013). *Implementing Domain-Driven Design*. Addison-Wesley. -8. Verraes, M. (2013). Ubiquitous Language Is Not a Glossary. verraes.net (archived). https://web.archive.org/web/20131004/https://verraes.net/2013/04/ubiquitous-language-is-not-a-glossary/ diff --git a/docs/scientific-research/oop-design.md b/docs/scientific-research/oop-design.md deleted file mode 100644 index 4b0637d..0000000 --- a/docs/scientific-research/oop-design.md +++ /dev/null @@ -1,64 +0,0 @@ -# Scientific Research — OOP Design - -Foundations for object-oriented design principles used in this template. - ---- - -### 32. Object Calisthenics — Nine Rules - -| | | -|---|---| -| **Source** | Bay, J. "Object Calisthenics." *The Thoughtworks Anthology* (PragProg, 2008). Original in IEEE Software/DevX, ~2005. https://www.bennadel.com/resources/uploads/2012/objectcalisthenics.pdf | -| **Date** | ~2005 | -| **Status** | Practitioner synthesis | -| **Core finding** | 9 rules to internalize OOP: (1) One level indentation per method, (2) No ELSE, (3) Wrap primitives/Strings, (4) First class collections, (5) One dot per line, (6) No abbreviations, (7) Classes ≤50 lines, (8) ≤2 instance variables, (9) No getters/setters. 7 of 9 enforce data encapsulation; 1 drives polymorphism; 1 drives naming. | -| **Mechanism** | Restrictions force decomposition. When you cannot use getters, behavior must move into the object. When you cannot use ELSE, you use polymorphism. When classes must be ≤2 ivars, you discover missing abstractions. | -| **Where used** | Refactor self-declaration checklist in `refactor/SKILL.md`. | - ---- - -### 33. Refactoring - -| | | -|---|---| -| **Source** | Fowler, M. (1999/2018). *Refactoring: Improving the Design of Existing Code* (2nd ed.). Addison-Wesley. https://martinfowler.com/books/refactoring.html | -| **Date** | 1999, 2018 | -| **Status** | Confirmed — foundational | -| **Core finding** | Refactoring = behavior-preserving transformations. 68 catalogued refactorings, each small enough to do safely but cumulative effect significant. Code smells (duplicate code, long methods, feature envy) indicate refactoring opportunities. | -| **Mechanism** | Small steps reduce risk. Each refactoring is reversible. Test suite validates behavior unchanged. | -| **Where used** | `refactor/SKILL.md`: smell detection triggers refactoring; full protocol and catalogue entries. | - ---- - -### 34. Design Patterns - -| | | -|---|---| -| **Source** | Gamma, E., Helm, R., Johnson, R., Vlissides, J. (1995). *Design Patterns: Elements of Reusable Object-Oriented Software*. Addison-Wesley. | -| **Date** | 1995 | -| **Status** | Confirmed — foundational | -| **Core finding** | 23 patterns catalogued in 3 categories: Creational (5), Structural (7), Behavioral (11). Key principles: "Favor composition over inheritance," "Program to an interface, not an implementation." | -| **Mechanism** | Patterns are recurring solutions to common problems. Named and catalogued so developers don't rediscover them. | -| **Where used** | `design-patterns/SKILL.md`: full GoF catalogue with smell-triggered Python before/after examples. | - ---- - -### 35. SOLID Principles - -| | | -|---|---| -| **Source** | Martin, R. C. (2000). "Principles of OOD." *ButUncleBob.com*. Acronym coined by Michael Feathers (2004). https://blog.interface-solv.com/wp-content/uploads/2020/07/Principles-Of-OOD.pdf | -| **Date** | 2000 | -| **Status** | Confirmed | -| **Core finding** | S: One reason to change. O: Open extension, closed modification. L: Subtypes substitutable. I: No forced stub methods. D: Depend on abstractions, not concretes. | -| **Mechanism** | Each principle targets a specific coupling failure mode. Together they produce low coupling, high cohesion. | -| **Where used** | Refactor self-declaration checklist in `refactor/SKILL.md`: 5-row SOLID table with Python before/after examples. | - ---- - -## Bibliography - -1. Bay, J. (~2005). "Object Calisthenics." *IEEE Software/DevX*. https://www.bennadel.com/resources/uploads/2012/objectcalisthenics.pdf -2. Fowler, M. (1999/2018). *Refactoring: Improving the Design of Existing Code* (2nd ed.). Addison-Wesley. https://martinfowler.com/books/refactoring.html -3. Gamma, E., Helm, R., Johnson, R., & Vlissides, J. (1995). *Design Patterns: Elements of Reusable Object-Oriented Software*. Addison-Wesley. -4. Martin, R. C. (2000). "Principles of OOD." *ButUncleBob.com*. https://blog.interface-solv.com/wp-content/uploads/2020/07/Principles-Of-OOD.pdf diff --git a/docs/scientific-research/refactoring-empirical.md b/docs/scientific-research/refactoring-empirical.md deleted file mode 100644 index 61d666c..0000000 --- a/docs/scientific-research/refactoring-empirical.md +++ /dev/null @@ -1,100 +0,0 @@ -# Scientific Research — Refactoring (Empirical) - -Empirical studies on code smells, refactoring prioritization, and OOP complexity used in this template. - ---- - -### 36. QDIR — Bad-Smells + OO Metrics Prioritization - -| | | -|---|---| -| **Source** | Malhotra, R., Singh, P. (2020). "Exploiting bad-smells and object-oriented characteristics to prioritize classes for refactoring." *Int. J. Syst. Assur. Eng. Manag.* 11(Suppl 2), 133–144. Springer. | -| **Date** | 2020 | -| **URL** | https://doi.org/10.1007/s13198-020-01001-x | -| **Status** | Confirmed — empirical | -| **Core finding** | QDIR (Quality Depreciation Index Rule) combines bad-smell severity with OO metrics (LOC, WMC, CBO, RFC, DIT) to prioritize classes for refactoring. Validated on 8 open-source Java systems. | -| **Mechanism** | Classes with high smell severity AND high OO metrics are prioritized. QDIR = weighted sum. | -| **Where used** | Refactor prioritization: when smell detected, check OO metrics to prioritize. | - ---- - -### 37. Smells + Architectural Refactoring - -| | | -|---|---| -| **Source** | Silva, C. et al. (2020). "When Are Smells Indicators of Architectural Refactoring Opportunities." *Proc. 28th Int. Conf. on Program Comprehension*. ACM. | -| **Date** | 2020 | -| **URL** | https://doi.org/10.1145/3387904.3389276 | -| **Status** | Confirmed — empirical | -| **Core finding** | Study of 50 projects, 52,667 refactored elements. 67.53% of smells co-occur. Smells that co-occur are indicators of architectural refactoring in 88.53% of cases. | -| **Mechanism** | Single smells are often code-level; co-occurring smells indicate architectural problems. Pattern catalog for smells → specific architectural refactorings. | -| **Where used** | Smell detection triggers architectural analysis when co-occurrence patterns detected. | - ---- - -### 38. SPIRIT Tool — Code Smell Prioritization - -| | | -|---|---| -| **Source** | Vidal, S. A., Marcos, C., Díaz-Pace, J. A. (2014). "An Approach to Prioritize Code Smells for Refactoring." *Automated Software Engineering*, 23(3), 501–532. | -| **Date** | 2014 | -| **URL** | https://doi.org/10.1007/s10515-014-0175-x | -| **Status** | Confirmed — tool | -| **Core finding** | SPIRIT (Smart Identification of Refactoring opportunITies) prioritizes smells by 3 criteria: (1) component stability, (2) impact on modifiability scenarios, (3) smell relevance. Top-ranked smells correlate with expert developer judgment. | -| **Mechanism** | Semi-automated ranking. Combines version history (stable vs. unstable), impact analysis, and smell type. | -| **Where used** | Refactor prioritization: stability = has the class changed recently? Unstable + smelly = prioritize. | - ---- - -### 39. Bad Engineering Properties of OOP - -| | | -|---|---| -| **Source** | Cardelli, L. (1996). "Bad Engineering Properties of Object-Oriented Languages." *ACM Computing Surveys*, 28(4), 150. | -| **Date** | 1996 | -| **URL** | https://www.microsoft.com/en-us/research/publication/bad-engineering-properties-of-object-oriented-languages/ | -| **Status** | Confirmed — foundational critique | -| **Core finding** | OOP has 5 "economy" problems: (1) Execution (virtual methods prevent inlining), (2) Compilation (no code/interface separation), (3) Small-scale dev (expressive type systems missing), (4) Large-scale dev (poor class extension/modification), (5) Language features (baroque complexity). | -| **Mechanism** | OOP is not universally superior. Trade-offs exist. Knowing these helps avoid over-engineering. | -| **Where used** | Anti-pre-pattern: know when OOP adds complexity vs. value. | - ---- - -### 40. Code Complexity Model of OOP - -| | | -|---|---| -| **Source** | Aluthwaththage, J. H., Thathsarani, H. A. N. N. (2024). "A Novel OO-Based Code Complexity Metric." *Proc. Future Technologies Conference (FTC)*, 616–628. Springer/IEEE. | -| **Date** | 2024 | -| **URL** | https://link.springer.com/chapter/10.1007/978-3-031-73125-9_39 | -| **Alternative** | Misra et al. (2024). "A Suite of Object Oriented Cognitive Complexity Metrics." IEEE. | -| **Status** | Partially confirmed — recent | -| **Core finding** | CWC (Combined Weighted Complexity) measures OOP complexity at statement level, considering 8 factors: nesting depth, control types, compound conditions, try-catch, threads, pointers, references, dynamic memory. Addresses gap in existing metrics ignoring cognitive load. | -| **Mechanism** | Granular complexity scoring. Higher scores indicate more cognitively demanding code. | -| **Where used** | Complexity measurement: when function > 20 lines, consider CWC-style granular scoring. | - ---- - -### 41. Metric Thresholds for Smell Detection - -| | | -|---|---| -| **Source** | Bigonha, M. A. S., et al. (2019). "The usefulness of software metric thresholds for detection of bad smells and fault prediction." *Information and Software Technology*, 115, 79–92. | -| **Date** | 2019 | -| **URL** | https://doi.org/10.1016/j.infsof.2019.08.005 | -| **Alternative** | Catal et al. (2018). "Software metrics thresholds calculation techniques." *Info. Softw. Technol.* | -| **Status** | Confirmed | -| **Core finding** | Metric thresholds (e.g., LOC > 600) used for smell detection are unreliable. Study on 92 open-source systems found precision too low for practical use. Neither heuristic-based nor ML approaches achieve acceptable accuracy. | -| **Mechanism** | Fixed thresholds are context-dependent. Thresholds should be project-specific, not universal. | -| **Where used** | Anti-pre-pattern: do not rely on fixed thresholds. Use co-occurrence patterns (entry 37) instead. | - ---- - -## Bibliography - -1. Aluthwaththage, J. H., & Thathsarani, H. A. N. N. (2024). A Novel OO-Based Code Complexity Metric. *Proc. Future Technologies Conference (FTC)*, 616–628. https://link.springer.com/chapter/10.1007/978-3-031-73125-9_39 -2. Bigonha, M. A. S., et al. (2019). The usefulness of software metric thresholds. *Information and Software Technology*, 115, 79–92. https://doi.org/10.1016/j.infsof.2019.08.005 -3. Cardelli, L. (1996). Bad Engineering Properties of Object-Oriented Languages. *ACM Computing Surveys*, 28(4), 150. https://www.microsoft.com/en-us/research/publication/bad-engineering-properties-of-object-oriented-languages/ -4. Malhotra, R., & Singh, P. (2020). Exploiting bad-smells and OO characteristics. *Int. J. Syst. Assur. Eng. Manag.*, 11(Suppl 2), 133–144. https://doi.org/10.1007/s13198-020-01001-x -5. Silva, C. et al. (2020). When Are Smells Indicators of Architectural Refactoring Opportunities. *Proc. 28th ICPC*. https://doi.org/10.1145/3387904.3389276 -6. Vidal, S. A., Marcos, C., & Díaz-Pace, J. A. (2014). An Approach to Prioritize Code Smells. *Automated Software Engineering*, 23(3), 501–532. https://doi.org/10.1007/s10515-014-0175-x diff --git a/docs/scientific-research/requirements-elicitation.md b/docs/scientific-research/requirements-elicitation.md deleted file mode 100644 index b272727..0000000 --- a/docs/scientific-research/requirements-elicitation.md +++ /dev/null @@ -1,246 +0,0 @@ -# Scientific Research — Requirements Elicitation - -Foundations for the PO interview structure, Gherkin criteria, and feature discovery in this template. - ---- - -### 17. INVEST Criteria for User Stories - -| | | -|---|---| -| **Source** | Wake, B. (2003). *INVEST in Good Stories, and SMART Tasks*. XP123.com. | -| **Date** | 2003 | -| **Alternative** | Cohn, M. (2004). *User Stories Applied: For Agile Software Development*. Addison-Wesley. | -| **Status** | Confirmed | -| **Core finding** | Stories that are Independent, Negotiable, Valuable, Estimable, Small, and Testable produce fewer downstream defects and smoother development cycles. | -| **Mechanism** | INVEST serves as a quality gate before stories enter development. "Testable" forces the PO to express observable outcomes (directly enabling Given/When/Then). "Small" forces decomposition. "Independent" prevents hidden ordering dependencies. | -| **Where used** | INVEST gate in Phase 3 of `scope/SKILL.md`. | - ---- - -### 18. Example Mapping (Rules Layer) - -| | | -|---|---| -| **Source** | Wynne, M. (2015). *Introducing Example Mapping*. Cucumber Blog. https://cucumber.io/blog/bdd/example-mapping-introduction/ | -| **Date** | 2015 | -| **Status** | Confirmed | -| **Core finding** | Inserting a "rules" layer between stories and examples prevents redundant or contradictory acceptance criteria. A story with many rules needs splitting; a story with many open questions is not ready for development. | -| **Mechanism** | Four card types: Story (yellow), Rules (blue), Examples (green), Questions (red). The rules layer groups related examples under the business rule they illustrate. Red cards (unanswered questions) are a first-class signal to stop and investigate. | -| **Where used** | `Rules (Business):` section in each `.feature` file. PO identifies business rules before writing Examples in Stage 2 Step B. | - ---- - -### 19. Declarative Gherkin - -| | | -|---|---| -| **Source** | Cucumber Team. (2024). *Better Gherkin*. Cucumber Documentation. https://cucumber.io/docs/bdd/better-gherkin/ | -| **Date** | 2024 | -| **Status** | Confirmed | -| **Core finding** | Declarative Gherkin ("When Bob logs in") produces specifications that survive UI changes. Imperative Gherkin ("When I click the Login button") couples specs to implementation details and breaks on every UI redesign. | -| **Mechanism** | Declarative steps describe *what happens* at the business level. Imperative steps describe *how the user interacts with a specific UI*. AI agents are especially prone to writing imperative Gherkin because they mirror literal steps. | -| **Where used** | Declarative vs. imperative table in Stage 2 Step B (Criteria) of `scope/SKILL.md`. | - ---- - -### 20. MoSCoW Prioritization (Within-Story Triage) - -| | | -|---|---| -| **Source** | Clegg, D., & Barker, R. (1994). *Case Method Fast-Track: A RAD Approach*. Addison-Wesley (DSDM origin). | -| **Date** | 1994 | -| **Status** | Confirmed | -| **Core finding** | Classifying requirements as Must/Should/Could/Won't forces explicit negotiation about what is essential vs. desired. When applied *within* a single story, it reveals bloated stories that should be split. | -| **Mechanism** | DSDM mandates that Musts cannot exceed 60% of total effort. At the story level: if a story has 12 Examples and only 3 are Musts, the remaining 9 can be deferred. This prevents gold-plating and keeps stories small. | -| **Where used** | MoSCoW triage in Stage 2 Step B (Criteria) of `scope/SKILL.md`. | - ---- - -### 28. Active Listening — Paraphrase-Clarify-Summarize - -| | | -|---|---| -| **Source** | Rogers, C. R., & Farson, R. E. (1957). *Active Listening*. Industrial Relations Center, University of Chicago. | -| **Date** | 1957 | -| **Alternative** | McNaughton, D. et al. (2008). Learning to Listen. *Topics in Early Childhood Special Education*, 27(4), 223–231. | -| **Status** | Confirmed — foundational clinical research; widely replicated | -| **Core finding** | Active listening — paraphrasing what was heard in the listener's own words, asking clarifying questions, then summarizing the main points and intent — reduces misunderstanding, builds trust, and confirms mutual understanding before proceeding. | -| **Mechanism** | Paraphrasing forces the listener to reconstruct the speaker's meaning, surfacing gaps immediately. Clarifying questions address residual ambiguity. Summarizing creates a shared record that both parties can confirm or correct. | -| **Where used** | PO summarization protocol in `scope/SKILL.md`: after each interview round, PO produces a "Here is what I understood" block before proceeding. | - ---- - -### 28a. Active Listening — Three-Level Structure - -| | | -|---|---| -| **Source** | Synthesis of: Nielsen (2010); Farrell (2017); Ambler (2002); Wynne (2015). | -| **Date** | 2010–2015 | -| **Status** | Synthesized rule of thumb — each component individually confirmed | -| **Core finding** | Active listening in requirements interviews operates at three granularities: **Level 1** (per answer) — immediate paraphrase; **Level 2** (per topic cluster) — transition summary; **Level 3** (end of interview) — full synthesis serving four downstream purposes. | -| **Level 3 — four uses** | 1. Accuracy gate (NN/G). 2. Scope crystallization (Ambler/FDD). 3. Input to domain modeling (Ambler/FDD). 4. Baseline trigger (Wynne/Cucumber). | -| **Where used** | Stage 1 Discovery sessions in `scope/SKILL.md`. | - ---- - -### 29. The Kipling Method — Five Ws and One H - -| | | -|---|---| -| **Source** | Kipling, R. (1902). *Just So Stories*. Macmillan. | -| **Date** | 1902 | -| **Alternative** | Hermagoras of Temnos (2nd century BCE) — seven circumstances of rhetoric. | -| **Status** | Practitioner synthesis — journalism, business analysis, investigative methodology | -| **Core finding** | The six interrogative questions (Who, What, When, Where, Why, How) form a complete framework for gathering all essential facts about any situation. Together they ensure completeness and prevent gaps. | -| **Where used** | Stage 1 Discovery, General questions (first session): the initial seven questions are an adaptation of the 5W1H framework. | - ---- - -### 30. BA Requirements Question Framework - -| | | -|---|---| -| **Source** | Brandenburg, L. (2025). *Requirements Discovery Checklist Pack*. TechCanvass. | -| **Date** | 2025 | -| **Status** | Practitioner synthesis — consolidated BA methodology, not peer-reviewed | -| **Core finding** | Ten questions consistently make the most difference in requirements elicitation: (1) What problem are we solving? (2) What happens if we do nothing? (3) Who uses this? (4) What does success look like? (5) Walk me through how this works today. (6) Where does this usually break? (7) What decisions will this help? (8) What should definitely not happen? (9) What happens if input is wrong? (10) What assumptions are we making? | -| **Where used** | Stage 1 Discovery, General questions: the "Success", "Failure", and "Out-of-scope" questions map to this framework. | - ---- - -### 43. Feature-Driven Development — Domain Modeling to Feature List - -| | | -|---|---| -| **Source** | Ambler, S. W. (2002). *Agile Modeling*. Wiley. https://www.agilemodeling.com/essays/fdd.htm | -| **Date** | 2002 | -| **Alternative** | Palmer, S. R., & Felsing, J. M. (2002). *A Practical Guide to Feature-Driven Development*. Prentice Hall. | -| **Status** | Confirmed | -| **Core finding** | FDD requires domain modeling *before* feature naming. Features are expressed as "Action result object" triples. Features group into Feature Sets (shared domain object), which group into Subject Areas. | -| **Mechanism** | Domain modeling extracts the vocabulary (nouns = candidate classes, verbs = candidate methods). Feature identification then asks: "what verbs act on each noun?" | -| **Where used** | Stage 1 Discovery in `scope/SKILL.md`: after session synthesis, PO performs domain analysis (nouns/verbs → subject areas → FDD "Action object" feature names) for first session. | - ---- - -### 44. Affinity Mapping / KJ Method — Bottom-Up Feature Identification - -| | | -|---|---| -| **Source** | Krause, R., & Pernice, K. (2024). Affinity Diagramming. *Nielsen Norman Group*. https://www.nngroup.com/articles/affinity-diagram/ | -| **Date** | 2024 (method origin: Kawakita, J., 1960s) | -| **Alternative** | Kawakita, J. (1967). *Abduction*. Chuokoronsha. | -| **Status** | Confirmed | -| **Core finding** | Affinity diagramming groups raw observations/requirements into clusters by bottom-up similarity — no categories are named until grouping is complete. This prevents confirmation bias from top-down pre-labelling. | -| **Where used** | Stage 1 Discovery in `scope/SKILL.md` (alternative to FDD domain modeling): PO uses affinity mapping on interview answers to derive feature clusters. Best suited when working from interview transcripts solo. | - ---- - -### 45. Event Storming — Domain Events to Functional Areas - -| | | -|---|---| -| **Source** | Brandolini, A. (2013–present). *EventStorming*. Leanpub / eventstorming.com. https://eventstorming.com | -| **Date** | 2013 | -| **Status** | Confirmed | -| **Core finding** | Event Storming is a collaborative workshop where domain experts place past-tense domain events on a timeline. Sorting the events creates natural Functional Area clusters — these are candidate feature groups. The workshop also produces Ubiquitous Language, a Problem Inventory, and Actor roles. | -| **Mechanism** | Temporal sequencing of domain events forces resolution of conflicting mental models across organisational silos. Clusters emerge from shared vocabulary and causal proximity. | -| **Where used** | Optional alternative in Stage 1 Discovery in `scope/SKILL.md` for cross-silo discovery. | - ---- - -### 46. Critical Incident Technique — Gap-Finding via Past Events - -| | | -|---|---| -| **Source** | Flanagan, J. C. (1954). "The critical incident technique." *Psychological Bulletin*, 51(4), 327–357. https://doi.org/10.1037/h0061470 | -| **Date** | 1954 | -| **Alternative** | Rosala, M. (2020). The Critical Incident Technique in UX. *Nielsen Norman Group*. https://www.nngroup.com/articles/critical-incident-technique/ | -| **Status** | Confirmed — foundational; ~200 follow-on empirical studies | -| **Core finding** | Anchoring an interview on a specific past incident ("Tell me about a time when X broke down") breaks schema-based recall. Stakeholders describing actual past events report real workarounds, edge cases, and failure modes that never surface when asked "how does this usually work?" | -| **Mechanism** | Direct questions elicit the stakeholder's mental schema — a sanitized, gap-free description of how things *should* work. Incidents bypass the schema because episodic memory is anchored to specific sensory and emotional detail. | -| **Where used** | Cross-cutting and per-feature questions (gap-finding) in Stage 1 Discovery in `scope/SKILL.md`. | - ---- - -### 47. Cognitive Interview — Memory-Enhancing Elicitation Technique - -| | | -|---|---| -| **Source** | Fisher, R. P., & Geiselman, R. E. (1992). *Memory-Enhancing Techniques for Investigative Interviewing: The Cognitive Interview*. Charles C. Thomas. | -| **Date** | 1984 (original); 1987 (enhanced CI); 1992 (manual) | -| **Alternative** | Moody, W., Will, R. P., & Blanton, J. E. (1996). Enhancing knowledge elicitation using the cognitive interview. *Expert Systems with Applications*, 10(1), 127–133. | -| **Status** | Confirmed — meta-analysis: Köhnken et al. (1999), *Psychology, Crime & Law*, 5(1-2), 3–27. | -| **Core finding** | The enhanced CI elicits ~35% more correct information than standard interviews with equal accuracy rates. | -| **Mechanism** | Four retrieval mnemonics: (1) mental reinstatement of context; (2) report everything; (3) temporal reversal; (4) perspective change. Each mnemonic opens a different memory access route, collectively surfacing what direct questions cannot. | -| **Where used** | Cross-cutting and per-feature questions (gap-finding) in Stage 1 Discovery in `scope/SKILL.md`. | - ---- - -### 48. Laddering / Means-End Chain — Surfacing Unstated Motivations - -| | | -|---|---| -| **Source** | Reynolds, T. J., & Gutman, J. (1988). "Laddering theory, method, analysis, and interpretation." *Journal of Advertising Research*, 28(1), 11–31. | -| **Date** | 1988 | -| **Status** | Confirmed — operationalised in IS research (Hunter & Beck 2000) | -| **Core finding** | Repeatedly asking "Why is that important to you?" climbs a means-end chain from concrete attribute → functional consequence → psychosocial consequence → terminal value. The stakeholder's first answer is rarely the real constraint. | -| **Mechanism** | The Gherkin "So that [benefit]" clause is structurally a single-rung means-end ladder. Full laddering reveals value conflicts between stakeholders whose surface requirements look identical but whose ladders diverge at the consequence level. | -| **Where used** | Cross-cutting and per-feature questions (gap-finding) in Stage 1 Discovery in `scope/SKILL.md`. | - ---- - -### 49. Funnel Technique — Question Ordering to Prevent Priming - -| | | -|---|---| -| **Source** | Rosala, M., & Moran, K. (2022). The Funnel Technique in Qualitative User Research. *Nielsen Norman Group*. https://www.nngroup.com/articles/the-funnel-technique-in-qualitative-user-research/ | -| **Date** | 2022 | -| **Alternative** | Christel, M. G., & Kang, K. C. (1992). *Issues in Requirements Elicitation*. CMU/SEI-92-TR-012. | -| **Status** | Confirmed — standard NNG qualitative research protocol | -| **Core finding** | Starting with broad open-ended questions before narrowing to specifics prevents the interviewer from priming the interviewee's responses. | -| **Mechanism** | Priming bias is structural: any category name the interviewer introduces activates a schema that filters what the interviewee considers worth reporting. The funnel sequences questions so the interviewee's own categories emerge first. | -| **Where used** | Within each Stage 1 Discovery session in `scope/SKILL.md`. | - ---- - -### 50. Issues in Requirements Elicitation — Why Direct Questions Fail - -| | | -|---|---| -| **Source** | Christel, M. G., & Kang, K. C. (1992). *Issues in Requirements Elicitation*. CMU/SEI-92-TR-012. Software Engineering Institute, Carnegie Mellon University. https://www.sei.cmu.edu/library/abstracts/reports/92tr012.cfm | -| **Date** | 1992 | -| **Alternative** | Sommerville, I., & Sawyer, P. (1997). *Requirements Engineering: A Good Practice Guide*. Wiley. | -| **Status** | Confirmed — foundational SEI technical report | -| **Core finding** | Stakeholders have three structural problems that make direct questioning insufficient: (1) they omit information that is "obvious" to them; (2) they have trouble communicating needs they have never had to articulate; (3) they may not know what they want until they see what they don't want. | -| **Mechanism** | Expert knowledge is largely procedural and tacit. When asked "how does the system work?", experts describe what they believe happens, not what actually happens. Gap-finding techniques are required because they bypass the expert's mental schema. | -| **Where used** | Theoretical justification for the 3-session interview structure and use of CIT, CI, and Laddering in `scope/SKILL.md`. | - ---- - -## Bibliography - -1. Ambler, S. W. (2002). *Agile Modeling*. Wiley. https://www.agilemodeling.com/essays/fdd.htm -2. Brandenburg, L. (2025). *Requirements Discovery Checklist Pack*. TechCanvass. -3. Brandolini, A. (2013–present). *EventStorming*. https://eventstorming.com -4. Christel, M. G., & Kang, K. C. (1992). *Issues in Requirements Elicitation*. CMU/SEI-92-TR-012. https://www.sei.cmu.edu/library/abstracts/reports/92tr012.cfm -5. Clegg, D., & Barker, R. (1994). *Case Method Fast-Track: A RAD Approach*. Addison-Wesley. -6. Cohn, M. (2004). *User Stories Applied*. Addison-Wesley. -7. Cucumber Team. (2024). Better Gherkin. https://cucumber.io/docs/bdd/better-gherkin/ -8. Farrell, S. (2017). UX Research Cheat Sheet. *Nielsen Norman Group*. https://www.nngroup.com/articles/ux-research-cheat-sheet/ -9. Fisher, R. P., & Geiselman, R. E. (1992). *Memory-Enhancing Techniques for Investigative Interviewing*. Charles C. Thomas. -10. Flanagan, J. C. (1954). The critical incident technique. *Psychological Bulletin*, 51(4), 327–357. https://doi.org/10.1037/h0061470 -11. Kawakita, J. (1967). *Abduction*. Chuokoronsha. -12. Kipling, R. (1902). *Just So Stories*. Macmillan. -13. Köhnken, G., Milne, R., Memon, A., & Bull, R. (1999). The cognitive interview: A meta-analysis. *Psychology, Crime & Law*, 5(1-2), 3–27. -14. Krause, R., & Pernice, K. (2024). Affinity Diagramming. *Nielsen Norman Group*. https://www.nngroup.com/articles/affinity-diagram/ -15. McNaughton, D. et al. (2008). Learning to Listen. *Topics in Early Childhood Special Education*, 27(4), 223–231. -16. Moody, W., Will, R. P., & Blanton, J. E. (1996). Enhancing knowledge elicitation using the cognitive interview. *Expert Systems with Applications*, 10(1), 127–133. -17. Nielsen, J. (2010). *Interviewing Users*. Nielsen Norman Group. https://www.nngroup.com/articles/interviewing-users/ -18. Palmer, S. R., & Felsing, J. M. (2002). *A Practical Guide to Feature-Driven Development*. Prentice Hall. -19. Reynolds, T. J., & Gutman, J. (1988). Laddering theory, method, analysis, and interpretation. *Journal of Advertising Research*, 28(1), 11–31. -20. Rogers, C. R., & Farson, R. E. (1957). *Active Listening*. Industrial Relations Center, University of Chicago. -21. Rosala, M. (2020). The Critical Incident Technique in UX. *Nielsen Norman Group*. https://www.nngroup.com/articles/critical-incident-technique/ -22. Rosala, M., & Moran, K. (2022). The Funnel Technique. *Nielsen Norman Group*. https://www.nngroup.com/articles/the-funnel-technique-in-qualitative-user-research/ -23. Wake, B. (2003). INVEST in Good Stories, and SMART Tasks. *XP123.com*. -24. Wynne, M. (2015). Introducing Example Mapping. *Cucumber Blog*. https://cucumber.io/blog/bdd/example-mapping-introduction/ diff --git a/docs/scientific-research/software-economics.md b/docs/scientific-research/software-economics.md deleted file mode 100644 index becd695..0000000 --- a/docs/scientific-research/software-economics.md +++ /dev/null @@ -1,24 +0,0 @@ -# Scientific Research — Software Economics - -Foundations for the shift-left, early defect detection, and workflow ordering decisions in this template. - ---- - -### 16. Cost of Change Curve (Shift Left) - -| | | -|---|---| -| **Source** | Boehm, B. W. (1981). *Software Engineering Economics*. Prentice-Hall. | -| **Date** | 1981 | -| **Alternative** | Boehm, B., & Papaccio, P. N. (1988). Understanding and controlling software costs. *IEEE Transactions on Software Engineering*, 14(10), 1462–1477. | -| **Status** | Confirmed | -| **Core finding** | The cost to fix a defect multiplies by roughly 10x per SDLC phase: requirements (1x) → design (5x) → coding (10x) → testing (20x) → production (200x). A defect caught during requirements costs 200x less than the same defect found after release. | -| **Mechanism** | Defects compound downstream: a wrong requirement becomes a wrong design, which becomes wrong code, which becomes wrong tests, all of which must be unwound. Catching errors at the source eliminates the entire cascade. This is the empirical foundation for "shift left" — investing earlier in quality always dominates fixing later. | -| **Where used** | Justifies the multi-session PO elicitation model: every acceptance criterion clarified at scope prevents 10–200x rework downstream. Also justifies the adversarial pre-mortem at the end of each elicitation cycle, and the adversarial mandate in `verify/SKILL.md`. The entire 5-step pipeline is ordered to surface defects at the earliest (cheapest) phase. | - ---- - -## Bibliography - -1. Boehm, B. W. (1981). *Software Engineering Economics*. Prentice-Hall. -2. Boehm, B., & Papaccio, P. N. (1988). Understanding and controlling software costs. *IEEE Transactions on Software Engineering*, 14(10), 1462–1477. diff --git a/docs/scientific-research/testing.md b/docs/scientific-research/testing.md deleted file mode 100644 index 2c7f7d7..0000000 --- a/docs/scientific-research/testing.md +++ /dev/null @@ -1,137 +0,0 @@ -# Scientific Research — Testing - -Foundations for test design, TDD, BDD, and property-based testing used in this template. - ---- - -### 11. Observable Behavior Testing - -| | | -|---|---| -| **Source** | Fowler, M. (2018). *The Practical Test Pyramid*. Thoughtworks. https://martinfowler.com/articles/practical-test-pyramid.html | -| **Date** | 2018 | -| **Status** | Confirmed | -| **Core finding** | Tests should answer "if I enter X and Y, will the result be Z?" — not "will method A call class B first?" | -| **Mechanism** | A test is behavioral if its assertion describes something a caller/user can observe without knowing the implementation. The test should still pass if you completely rewrite the internals. | -| **Where used** | Contract test rule in `implementation/SKILL.md`: "Write every test as if you cannot see the production code." | - ---- - -### 12. Test-Behavior Alignment - -| | | -|---|---| -| **Source** | Google Testing Blog (2013). *Testing on the Toilet: Test Behavior, Not Implementation*. | -| **Date** | 2013 | -| **Status** | Confirmed | -| **Core finding** | Test setup may need to change if implementation changes, but the actual test shouldn't need to change if the code's user-facing behavior doesn't change. | -| **Mechanism** | Tests that are tightly coupled to implementation break on refactoring and become a drag on design improvement. Behavioral tests survive internal rewrites. | -| **Where used** | Contract test rule in `implementation/SKILL.md`, reviewer verification check in `reviewer.md`. | - ---- - -### 13. Tests as First-Class Citizens - -| | | -|---|---| -| **Source** | Martin, R. C. (2017). *First-Class Tests*. Clean Coder Blog. | -| **Date** | 2017 | -| **Status** | Confirmed | -| **Core finding** | Tests should be treated as first-class citizens of the system — not coupled to implementation. Bad tests are worse than no tests because they give false confidence. | -| **Mechanism** | Tests written as "contract tests" — describing what the caller observes — remain stable through refactoring. Tests that verify implementation details are fragile and create maintenance burden. | -| **Where used** | Contract test rule in `implementation/SKILL.md`, verification check in `reviewer.md`. | - ---- - -### 14. Property-Based Testing (Invariant Discovery) - -| | | -|---|---| -| **Source** | MacIver, D. R. (2016). *What is Property Based Testing?* Hypothesis. https://hypothesis.works/articles/what-is-property-based-testing/ | -| **Date** | 2016 | -| **Status** | Confirmed | -| **Core finding** | Property-based testing is "the construction of tests such that, when these tests are fuzzed, failures reveal problems that could not have been revealed by direct fuzzing." Property tests test *invariants* — things that must always be true about the contract. | -| **Mechanism** | Meaningful property tests assert invariants: `assert Score(x).value >= 0` tests the contract. Tautological tests assert reconstruction: `assert Score(x).value == x` tests the implementation. | -| **Where used** | Meaningful vs. Tautological table in `implementation/SKILL.md`. | - ---- - -### 15. Mutation Testing (Test Quality Verification) - -| | | -|---|---| -| **Source** | King, K. N. (1991). *The Gamma (formerly mutants)*. | -| **Date** | 1991 | -| **Alternative** | Mutation testing tools: Cosmic Ray, mutmut (Python) | -| **Status** | Confirmed | -| **Core finding** | A meaningful test fails when a mutation (small deliberate code change) is introduced. A tautological test passes even with mutations because it doesn't constrain the behavior. | -| **Mechanism** | If a test survives every mutation of the production code without failing, it tests nothing. Only tests that fail on purposeful "damage" to the code are worth keeping. | -| **Where used** | Implicitly encouraged: tests must describe contracts, not implementation, which is the theoretical complement to mutation testing. | - ---- - -### 51. Canon TDD — Authoritative Red-Green-Refactor Definition - -| | | -|---|---| -| **Source** | Beck, K. (2023). "Canon TDD." *tidyfirst.substack.com*. December 11, 2023. https://tidyfirst.substack.com/p/canon-tdd | -| **Date** | 2023 | -| **Alternative** | Fowler, M. (2023). "Test Driven Development." *martinfowler.com*. https://martinfowler.com/bliki/TestDrivenDevelopment.html | -| **Status** | Confirmed — canonical source; explicitly authored to stop strawman critiques | -| **Core finding** | The canonical TDD loop is: (1) write a list of test scenarios; (2) convert exactly one item into a runnable test; (3) make it pass; (4) optionally refactor; (5) repeat. Writing all test code before any implementation is an explicit anti-pattern. | -| **Mechanism** | The interleaving of test-writing and implementation is not cosmetic — each test drives interface decisions at the moment they are cheapest to make. | -| **Where used** | Justifies one-@id-at-a-time interleaved TDD in Step 3 of `implementation/SKILL.md`. | - ---- - -### 52. GOOS — Outer/Inner TDD Loop - -| | | -|---|---| -| **Source** | Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley. | -| **Date** | 2009 | -| **Status** | Confirmed — canonical ATDD/BDD integration model | -| **Core finding** | Acceptance tests and unit tests operate at two separate, nested timescales. The outer loop: write one failing acceptance test before any implementation. The inner loop: drive implementation with unit-level Red-Green-Refactor cycles until the acceptance test passes. | -| **Mechanism** | The outer loop provides direction (what to build); the inner loop provides momentum (how to build it). The acceptance test stays red throughout all inner cycles and goes green only when the feature is complete. | -| **Where used** | Justifies the two-level structure in Step 3: outer loop per `@id` acceptance test, inner loop per unit. | - ---- - -### 53. Is TDD Dead? — Anti-Bureaucracy Evidence - -| | | -|---|---| -| **Source** | Beck, K., Fowler, M., & Hansson, D. H. (2014). "Is TDD Dead?" Video series. *martinfowler.com*. https://martinfowler.com/articles/is-tdd-dead/ | -| **Date** | 2014 | -| **Status** | Confirmed — primary evidence for what TDD practitioners reject as overhead | -| **Core finding** | Per-cycle human reviewer gates, per-cycle checklists, and tests with zero delta coverage are all explicitly identified as harmful overhead. The green bar is the quality gate — not a checklist. | -| **Mechanism** | Administrative overhead added to TDD workflows increases the cost per cycle without increasing coverage or catching defects. The optimal TDD loop is as lean as productive. | -| **Where used** | Justifies removing per-test reviewer gates. Self-declaration moves to end-of-feature (once), preserving accountability at feature granularity without interrupting cycle momentum. | - ---- - -### 54. Introducing BDD — Behavioural-Driven Development Origin - -| | | -|---|---| -| **Source** | North, D. (2006). "Introducing BDD." *Better Software Magazine*. https://dannorth.net/introducing-bdd/ | -| **Date** | 2006 | -| **Alternative** | Fowler, M. (2013). "Given When Then." *martinfowler.com*. https://martinfowler.com/bliki/GivenWhenThen.html | -| **Status** | Confirmed — primary BDD source | -| **Core finding** | BDD evolved directly from TDD to address persistent practitioner confusion. BDD reframes TDD vocabulary around observable behavior: scenarios instead of tests, Given-When-Then instead of Arrange-Act-Assert. | -| **Mechanism** | "Given" captures preconditions (Arrange), "When" captures the triggering event (Act), "Then" captures the observable outcome (Assert). Translating to G/W/T shifts focus from implementation mechanics to user-observable behavior. | -| **Where used** | Theoretical link between Gherkin `@id` Examples (Step 1 output) and the TDD inner loop (Step 3). | - ---- - -## Bibliography - -1. Beck, K. (2023). "Canon TDD." *tidyfirst.substack.com*. https://tidyfirst.substack.com/p/canon-tdd -2. Beck, K., Fowler, M., & Hansson, D. H. (2014). "Is TDD Dead?" *martinfowler.com*. https://martinfowler.com/articles/is-tdd-dead/ -3. Fowler, M. (2018). *The Practical Test Pyramid*. https://martinfowler.com/articles/practical-test-pyramid.html -4. Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley. -5. Google Testing Blog. (2013). Testing on the Toilet: Test Behavior, Not Implementation. -6. King, K. N. (1991). *The Gamma (formerly mutants)*. -7. MacIver, D. R. (2016). What is Property Based Testing? *Hypothesis*. https://hypothesis.works/articles/what-is-property-based-testing/ -8. Martin, R. C. (2017). First-Class Tests. *Clean Coder Blog*. -9. North, D. (2006). Introducing BDD. *Better Software Magazine*. https://dannorth.net/introducing-bdd/ diff --git a/docs/features/in-progress/.gitkeep b/docs/spec/.gitkeep similarity index 100% rename from docs/features/in-progress/.gitkeep rename to docs/spec/.gitkeep diff --git a/docs/spec/context_map.md b/docs/spec/context_map.md new file mode 100644 index 0000000..b5f258d --- /dev/null +++ b/docs/spec/context_map.md @@ -0,0 +1,84 @@ +# Context Map: smith + +> DDD context map showing relationships between bounded contexts. +> Updated by the Software Architect when contexts or relationships change. +> Follows the DDD strategic design patterns for inter-context relationships. + +--- + +## Context Relationships + +| Upstream Context | Downstream Context | Relationship Pattern | Translation / Anti-Corruption Layer | +|-----------------|-------------------|---------------------|-------------------------------------| +| Template Source (External) | Connection | Customer-Supplier | Connection reads files from the template source; no translation needed — files are copied as-is | + +> smith has one bounded context (Connection). The Template Source is an external dependency, not a separate bounded context within smith. It provides files but has no domain logic or invariants that smith owns. The relationship is Customer-Supplier: smith (downstream) depends on the template source (upstream) for file content, but does not control it. If template versioning or validation becomes a domain concern, Template Source may be promoted to its own bounded context. + +--- + +## Context Map Diagram + +```mermaid +graph LR + Connection[Connection Context
connect · disconnect · update · status] + TS[Template Source
External Dependency] + + TS -->|provides files| Connection +``` + +> The Connection context is the sole bounded context within smith. It owns the Connection aggregate and all four CLI commands. The Template Source is an external dependency (default: agents-smith; override: `--from `) that provides the agentic files to be written. There is no anti-corruption layer because the files are copied as-is — no domain translation is needed. + +--- + +## Integration Points + +| Integration | From | To | Mechanism | Contract | +|-------------|------|----|-----------|----------| +| File Provisioning | Template Source | Connection | importlib.resources from package data (bundled), filesystem read (local), HTTP download (URL) | Template source must provide a valid directory structure containing AGENTS.md, .opencode/, .templates/, and .flowr/ | + +> The only integration point is file provisioning: the Connection context reads agentic files from the template source. For the default (agents-smith), files are read from the `smith/data/` package directory via `importlib.resources` — no network access required. For `--from `, files are read from the local filesystem. For `--from `, files are downloaded via HTTP (`.tar.gz` or `.zip`) and extracted to a temporary directory. No domain events cross this boundary — it is a simple data dependency. + +--- + +## Anti-Corruption Layers + +| ACL | Protects Context | From Context | Translation Rules | +|-----|-----------------|--------------|-------------------| +| TemplateSourceAdapter | Connection | Template Source (External) | Normalises different source types (bundled package data, local path, remote URL) into a uniform file-provider interface that the Connection aggregate can consume without knowing the source type | + +> The TemplateSourceAdapter protects the Connection context from variations in how template files are obtained. It translates between three source types (bundled package data via importlib.resources, local filesystem paths, remote URLs via HTTP download) and presents a uniform interface: "given a template source, provide the set of files to write." This keeps the Connection aggregate focused on its invariants (atomicity, safety, clean separation) without coupling to file resolution details. + +--- + +## Bounded Context Details + +### Connection Context + +**Responsibility:** Manage the full lifecycle of connecting agentic files to a project directory — connect, disconnect, update, and status. + +**Aggregate Root:** Connection + +**Key Invariants:** +- Atomicity: either all agentic files are written or none are +- Safety: existing files are never overwritten without explicit `--overwrite` flag +- Clean separation: on disconnect, no agentic files remain (only .gitignore entries) +- Consistency: .gitignore section and agentic file set are always in sync + +**CLI Commands (delivery mechanism):** +- `smith connect [--from ] [--overwrite]` +- `smith disconnect` +- `smith update` +- `smith status` + +**Entities:** Connection (aggregate root) + +**Value Objects:** TemplateSource, GitignoreSection, ConnectionStatus + +--- + +## Changes + +| Date | Source | Change | Reason | +|------|--------|--------|--------| +| 2026-05-01 | architecture-assessment | Complete rewrite for corrected product scope | Previous context map described the wrong product (Python project template with single Template context). smith is an AI pair programming platform with a Connection context and external Template Source dependency. | +| 2026-05-01 | IN_20260501_local-bundle-reversal | Updated integration point and ACL description: bundled template resolution is now local package data via importlib.resources, not GitHub-based download; URL sources download via requests with no persistent cache | Local bundle provides instant offline default; GitHub-based resolution introduced runtime network dependency and cache staleness | \ No newline at end of file diff --git a/docs/spec/domain_model.md b/docs/spec/domain_model.md new file mode 100644 index 0000000..42eda0e --- /dev/null +++ b/docs/spec/domain_model.md @@ -0,0 +1,123 @@ +# Domain Model: smith + +> Current understanding of the business domain. +> Updated by the Domain Expert when domain understanding evolves. +> This document captures what code cannot express: WHY entities exist, HOW aggregates are bounded, and WHAT business capabilities each context serves. +> +> **Evolving document:** Event Storming fills the Event Map, Aggregate Candidates, and Context Candidates sections (workshop draft). Domain Modeling then formalizes them into Entities, Relationships, and Aggregate Boundaries. + +--- + +## Summary + +smith is an AI pair programming platform that connects standardised agent configurations (AGENTS.md, .opencode/, .templates/, .flowr/) to any project directory — and disconnects cleanly when done. Its domain is centred on the **Connection lifecycle**: connect, disconnect, update, and status. The domain has one bounded context — the **Connection context** — which owns the Connection aggregate and supporting value objects. The initial delivery (smith-commands) validates the full connect/work/disconnect cycle end-to-end with four CLI commands. + +--- + +## Event Map + +### Domain Events + +| Event | Description | Trigger | Bounded Context | +|-------|-------------|---------|-----------------| +| `ConnectionRequested` | User invoked `smith connect` in a project directory | User runs `smith connect [--from ] [--overwrite]` | Connection | +| `ConnectionEstablished` | All agentic files written to the project directory atomically | All files written successfully | Connection | + +| `ConnectionRolledBack` | Partial write detected; all written files removed to restore clean state | Write failure during connect | Connection | +| `DisconnectionRequested` | User invoked `smith disconnect` in a connected project directory | User runs `smith disconnect` | Connection | +| `DisconnectionCompleted` | All agentic files removed; managed .gitignore section preserved | All files removed successfully | Connection | +| `UpdateRequested` | User invoked `smith update` in a connected project directory | User runs `smith update` | Connection | +| `UpdateCompleted` | Agentic files updated to latest from template source | All files updated successfully | Connection | +| `StatusRequested` | User invoked `smith status` in a project directory | User runs `smith status` | Connection | +| `StatusReported` | Connection status displayed to the user | Status check completed | Connection | + +### Commands + +| Command | Description | Produces Event | Actor | +|---------|-------------|----------------|-------| +| `Connect` | Write all agentic files to a project directory from a template source | `ConnectionEstablished` or `ConnectionRolledBack` | Engineer | +| `Disconnect` | Remove all agentic files and managed .gitignore entries from a project directory | `DisconnectionCompleted` | Engineer | +| `Update` | Refresh agentic files from the template source in an already-connected project | `UpdateCompleted` | Engineer | +| `ReportStatus` | Display whether the project directory is connected and which agentic files are present | `StatusReported` | Engineer | + +### Read Models + +| Read Model | Description | Consumes Event | Used By | +|------------|-------------|----------------|---------| +| `ConnectionStatus` | Whether the project is connected, which files are present, and the template source | `StatusRequested` | CLI output | +| `RollbackLog` | Files that were written before rollback was triggered | `ConnectionRolledBack` | CLI error output | + +--- + +## Context Candidates + +> Filled during Event Storming. Formalized in Bounded Contexts section below by Domain Modeling. + +| Candidate | Responsibility | Grouped Aggregates | Notes | +|-----------|---------------|--------------------|-------| +| Connection | Owns the full lifecycle of connecting/disconnecting agentic files to a project directory | Connection | Single context — the Connection lifecycle is the domain | + +--- + +## Aggregate Candidates + +> Filled during Event Storming. Formalized in Aggregate Boundaries section below by Domain Modeling. + +| Candidate | Events Grouped | Tentative Root Entity | Notes | +|-----------|---------------|-----------------------|-------| +| Connection | `ConnectionRequested`, `ConnectionEstablished`, `ConnectionRolledBack`, `DisconnectionRequested`, `DisconnectionCompleted`, `UpdateRequested`, `UpdateCompleted`, `StatusRequested`, `StatusReported` | Connection | Single aggregate — the Connection is the sole entry point for all four commands | + +--- + +## Bounded Contexts + +| Context | Responsibility | Key Entities | Integration Points | +|---------|----------------|--------------|-------------------| +| Connection | Manage the full lifecycle of connecting agentic files to a project directory: connect, disconnect, update, and status | Connection, TemplateSource, GitignoreSection | Template Source (external dependency for file resolution) | + +> smith has one bounded context (Connection). The Template Source is an infrastructure dependency, not a separate bounded context — it provides files but has no independent domain logic or invariants. If template versioning or validation becomes a domain concern in future, it may be extracted into its own context. + +--- + +## Entities + +| Name | Type | Description | Bounded Context | Aggregate Root? | +|------|------|-------------|-----------------|-----------------| +| Connection | Entity | The aggregate root representing a project directory's connection to smith's agentic configuration. Tracks connection state, template source, and the set of managed files. | Connection | Yes | + +--- + +## Value Objects + +| Name | Type | Description | Bounded Context | +|------|------|-------------|-----------------| +| TemplateSource | Where agentic files come from: default (agents-smith), local path, or URL. Immutable once resolved. | Connection | +| GitignoreSection | The `# smith managed` section in .gitignore. Contains entries for all agentic file patterns. Managed as a unit — added on connect, removed on disconnect. | Connection | +| ConnectionStatus | The current state of a project's connection: connected, disconnected, or partial (some but not all agentic files present). | Connection | +| FileSpec | A single file or directory to be written during connect or update, with a source path (from the template) and a destination path (in the project directory). | Connection | + +--- + +## Relationships + +| Subject | Relation | Object | Cardinality | Notes | +|---------|----------|--------|-------------|-------| +| Connection | resolves | TemplateSource | 1:1 | Each connection resolves one template source | +| Connection | maintains | GitignoreSection | 1:1 | Each connection manages one .gitignore section | +| Connection | manages | FileSpec | 1:many | Each connection manages multiple file specifications | + +--- + +## Aggregate Boundaries + +| Aggregate | Root Entity | Invariants | Bounded Context | +|-----------|-------------|------------|-----------------| +| Connection | Connection | **Atomicity:** either all agentic files are written or none are — no partial connections, ever. **Safety:** user-tracked files (not managed by smith) are never overwritten; smith-managed files are auto-updated — zero silent overwrites of user-tracked files, ever. **Clean separation:** on disconnect, no agentic files remain (only .gitignore entries) — zero orphaned files after disconnect. **Consistency:** the .gitignore section and the agentic file set must always be in sync — connected means files present AND .gitignore section present; disconnected means no agentic files present but the .gitignore section is preserved as a guard. | Connection | + +--- + +## Changes + +| Date | Source | Change | Reason | +|------|--------|--------|--------| +| 2026-05-01 | architecture-assessment | Complete rewrite for corrected product scope | Previous domain model described the wrong product (Python project template). smith is an AI pair programming platform with connect/disconnect/update/status commands. | \ No newline at end of file diff --git a/docs/spec/glossary.md b/docs/spec/glossary.md new file mode 100644 index 0000000..ea32a65 --- /dev/null +++ b/docs/spec/glossary.md @@ -0,0 +1,255 @@ +# Glossary: smith + +> Living glossary of domain terms used in this project. +> Written and maintained by the Domain Expert during Discovery. +> Append-only: never edit or remove past entries. If a term changes, mark it retired in favor of the new entry and write a new entry. +> Code and tests take precedence over this glossary — if they diverge, refactor the code, not this file. + +--- + +## Entry Format + +``` +## + +**Definition:** + +**Aliases:** + +**Example:** + +**Source:** +``` + +Entries are sorted alphabetically. + +--- + +## Agentic File + +**Definition:** A file or directory that smith manages in a connected project, drawn from a template source and written to the project directory. + +**Aliases:** managed file, smith file + +**Example:** AGENTS.md, .opencode/, .templates/, and .flowr/ are the agentic files that `smith connect` writes to a project directory. + +**Source:** smith-commands + +--- + +## Agentic File Set + +**Definition:** The complete set of agentic files (AGENTS.md, .opencode/, .templates/, .flowr/) that smith writes as a unit during connection. In code, represented as `list[FileSpec]` rather than a separate entity. + +**Aliases:** file set, managed set + +**Example:** The Agentic File Set is written atomically — either all four items are present or none are. + +**Source:** smith-commands + +--- + +## Atomic Connection + +**Definition:** A connection guarantee that either all agentic files are written to the project directory or none are, ensuring no partial state exists. + +**Aliases:** none + +**Example:** When `smith connect` encounters a write failure, it rolls back all previously written files to maintain an atomic connection. + +**Source:** smith-commands + +--- + +## Clean Separation + +**Definition:** A disconnection guarantee that no agentic files remain in the project directory after `smith disconnect`, leaving only .gitignore entries as a trace. + +**Aliases:** none + +**Example:** After running `smith disconnect`, the project directory contains no .opencode/ directory, no .templates/ directory, no .flowr/ directory, and no AGENTS.md file. + +**Source:** smith-commands + +--- + +## Connect + +**Definition:** The CLI command `smith connect [--from ] [--overwrite]` that writes all agentic files from a template source to the current project directory, adds a managed .gitignore section, and establishes a connection. + +**Aliases:** connect command, smith connect + +**Example:** Running `smith connect` in a project directory writes AGENTS.md, .opencode/, .templates/, and .flowr/ and adds their patterns to .gitignore under `# smith managed`. + +**Source:** smith-commands + +--- + +## Connection + +**Definition:** The aggregate root representing the state of a project directory's relationship to smith's agentic configuration, tracking whether the project is connected, the template source, and the set of managed files. + +**Aliases:** none + +**Example:** A Connection is established by `smith connect` and removed by `smith disconnect`. + +**Source:** smith-commands + +--- + +## Disconnect + +**Definition:** The CLI command `smith disconnect` that removes all agentic files managed by smith from the current project directory while preserving the `# smith managed` section in .gitignore as a guard for future usage. + +**Aliases:** disconnect command, smith disconnect + +**Example:** Running `smith disconnect` removes AGENTS.md, .opencode/, .templates/, .flowr/ (only those tracked in the `# smith managed` section), but preserves the section header in .gitignore. + +**Source:** smith-commands + +--- + +## Managed .gitignore Section + +**Definition:** A delimited section in .gitignore marked with `# smith managed` that contains entries for all agentic file patterns, added on connect and preserved on disconnect. The section's presence indicates an existing or previous connection. + +**Aliases:** gitignore section, managed section, GitignoreSection (code) + +**Example:** After `smith connect`, .gitignore contains a `# smith managed` section with entries like `.opencode/` and `.flowr/sessions/`. + +**Source:** smith-commands + +--- + +## Safety + +**Definition:** A connection guarantee that user-tracked files (not managed by smith) are never overwritten; smith-managed files are auto-updated, ensuring zero silent overwrites. + +**Aliases:** overwrite protection + +**Example:** When `smith connect` finds an existing AGENTS.md that is user-tracked (not in `# smith managed`), it skips user-tracked files and proceeds with the remaining files. + +**Source:** smith-commands + +--- + +## Smith + +**Definition:** An AI pair programming platform that connects standardised agent configurations to any project directory, enabling engineers to immediately work with consistent AI agent workflows. + +**Aliases:** agents-smith (PyPI package name), smith (Python module name) + +**Example:** `smith connect` in any project directory sets up AGENTS.md, .opencode/, .templates/, and .flowr/ so engineers can start using AI-assisted workflows immediately. + +**Source:** smith-commands + +--- + +## Status + +**Definition:** The CLI command `smith status` that reports whether the current project directory is connected, which agentic files are present, and which template source was used. + +**Aliases:** status command, smith status + +**Example:** Running `smith status` in a connected project shows "Connected" with a list of present agentic files and the template source. + +**Source:** smith-commands + +--- + +## Template Source + +**Definition:** The origin of agentic files to be written during connection: the default agents-smith templates, a local directory path, or a remote URL specified via `--from`. + +**Aliases:** source, template source + +**Example:** `smith connect --from ./my-templates` uses a local directory as the template source instead of the default agents-smith templates. + +**Source:** smith-commands + +--- + +## Update + +**Definition:** The CLI command `smith update` that refreshes agentic files in a connected project directory from the original template source, applying any changes from the source to the project. + +**Aliases:** update command, smith update + +**Example:** Running `smith update` after the default agents-smith templates have been updated writes the latest versions of agentic files to the project directory. + +**Source:** smith-commands + +--- + +## ConnectionStatus + +**Definition:** A value object representing the current state of a project directory's connection to smith's agentic configuration: connected, disconnected, or partial (some but not all agentic files present). + +**Aliases:** status, connection state + +**Example:** Running `smith status` returns a ConnectionStatus of "connected" when all agentic files are present, or "partial" when some are missing. + +**Source:** smith-commands + +--- + +## FileSpec + +**Definition:** A value object representing a single file or directory to be written during a connect or update operation, with a source path (from the template) and a destination path (in the project directory). + +**Aliases:** file specification, agentic file (informal) + +**Example:** A FileSpec for AGENTS.md has source `templates/AGENTS.md` and destination `./AGENTS.md` in the project directory. + +**Source:** smith-commands + +--- + +## Managed Section Header (smith metadata) + +**Definition:** Source metadata stored within the `# smith managed` section header of `.gitignore`, using the format `# smith managed source:`. Connection state is inferred from the presence of the managed section — no separate metadata file is created (stateless design). + +**Aliases:** section header metadata, stateless metadata + +**Example:** After `smith connect --from ./my-templates`, the `.gitignore` section header reads `# smith managed source:./my-templates`. + +**Source:** smith-commands + +--- + +## Agents-Smith + +**Definition:** The default bundled template source for smith, providing the standard agentic files (AGENTS.md, .opencode/, .templates/, .flowr/) packaged in the `smith/data/` directory and read via `importlib.resources`. + +**Aliases:** agents-smith, default template, bundled template + +**Example:** Running `smith connect` without `--from` reads the agents-smith templates from the packaged `smith/data/` directory — no network call required. + +**Source:** smith-commands, IN_20260501_local-bundle-reversal + +--- + +## Bundled Template Resolution + +**Definition:** The process by which the default `agents-smith` template source reads template files from the `smith/data/` package directory via `importlib.resources`, rather than downloading them at runtime. + +**Aliases:** local bundle resolution, packaged template resolution + +**Example:** When an engineer runs `smith connect`, BundledTemplateSource reads agentic files from `smith/data/AGENTS.md`, `smith/data/.opencode/`, etc. via `importlib.resources` — no network access or caching required. + +**Source:** IN_20260501_local-bundle-reversal + +--- + +## Retired Terms + +| Term | Retired In Favor Of | Reason | Date | +|------|---------------------|--------|------| +| Cache Directory | — | Bundled source no longer uses caching; URL sources re-download each time | 2026-05-01 | +| GitHub-based Resolution | Local Bundle Resolution | Bundled source now reads from packaged files, not GitHub downloads | 2026-05-01 | +| CLI Application | Connection | Product scope changed from project template to AI pair programming platform; the CLI is now one delivery mechanism for the Connection aggregate | 2026-05-01 | +| CLI Entrypoint | Connection | Product scope changed; the entry point is now the `smith` command, not a generic CLI application | 2026-05-01 | +| Package Metadata | TemplateSource | Package metadata is an infrastructure concern; the domain concept is the template source that provides files | 2026-05-01 | +| Project Template | smith | Product scope changed from project template to AI pair programming platform | 2026-05-01 | +| Quality Gate | (kept, but not a domain term) | Quality Gate is a process concept, not a smith domain term | 2026-05-01 | +| Workflow Engine | (removed) | The workflow engine concept belongs to agents-smith (the template source), not to smith's domain | 2026-05-01 | \ No newline at end of file diff --git a/docs/spec/product_definition.md b/docs/spec/product_definition.md new file mode 100644 index 0000000..c3d03d3 --- /dev/null +++ b/docs/spec/product_definition.md @@ -0,0 +1,156 @@ +# Product Definition: smith + +> **Status:** DRAFT (2026-05-01) +> This document is the single source of truth for project scope and conventions. +> Supersedes IN_20260422 — the original product definition captured the wrong product scope. + +--- + +## What smith IS + +- An AI pair programming platform that assimilates ordinary projects into high-performing, AI-augmented systems +- A CLI tool (`smith`) that connects standardised agent configurations (AGENTS.md, .opencode/, .templates/, .flowr/) to any project directory — and disconnects cleanly when done +- A standardisation engine: the same agents, the same flows, every project, by connecting once and working immediately +- A demonstration vehicle that ships with four working commands (`connect`, `disconnect`, `update`, `status`) so engineers see the full connect/work/disconnect cycle end-to-end + +## What smith IS NOT + +- Does NOT execute AI agents — smith configures projects to use AI agents, it doesn't run them +- Does NOT provide CI/CD infrastructure — it doesn't replace your pipelines or deployment setup +- Does NOT manage package dependencies or versions +- Does NOT enforce a specific programming language or framework — smith works with any project +- Does NOT silently overwrite project customizations — user-tracked files are skipped; smith-managed files are auto-updated +- Does NOT leave partial state — connects are atomic: all files or none + +## Why does this exist + +AI agents need structure. Without consistent agent configurations, each project has different .opencode agents, different workflows, and different templates. Engineers waste time maintaining these across projects. Existing solutions are either bare skeletons or opinionated frameworks. smith fills this gap by providing a standardised, reversible way to connect AI agent configurations to any project — new or legacy — so engineers can focus on building, not configuring. Like Agent Smith in the Matrix, smith enters a project, copies its patterns, and returns something more capable than what it found. + +## Users + +- **Software Engineer** — runs `smith connect` in any project directory to immediately start working with standard AI agent workflows; runs `smith disconnect` when done +- **Tech Lead** — standardises AI agent configurations across the team's projects by connecting the same template to each one + +## Quality Attributes + +| Attribute | Scenario | Target | Priority | +|-----------|----------|--------|----------| +| Safety | When smith connects to a project that already has user-tracked agentic files (not managed by smith), it skips user-tracked files; smith-managed files are auto-updated without `--overwrite`. | Zero silent overwrites of user-tracked files, ever | Must (#1) | +| Atomicity | When smith connects, either all agentic files are written or none are | No partial connections, ever | Must (#2) | +| Clean separation | When smith disconnects from a project, no agentic files remain (only .gitignore entries) | Zero orphaned files after disconnect | Must (#3) | +| Usability | When an engineer runs `smith connect` in any project directory, they can immediately start working with standard flows and agents | < 1 minute from connect to working | Must (#4) | +| Modifiability | When a new template source type is needed, it can be added as an infrastructure adapter without changing domain logic | Zero domain changes for new source types | Should (#5) | +| Testability | When unit tests run, domain logic can be tested via port mocks without filesystem or network access | 100% domain test coverage without infrastructure | Should (#6) | + +--- + +## Out of Scope + +- AI execution engine (smith configures agents, doesn't run them) +- CI/CD infrastructure +- Package management +- Language/framework enforcement +- IDE-specific configuration + +## Delivery Order + +1 → **smith-commands** — `smith connect [--from ]`, `smith disconnect`, `smith update`, `smith status`. Four commands that demonstrate the full connect/work/disconnect cycle end-to-end. This feature validates the entire workflow and serves as the reference implementation for future features. + +--- + +## Project Conventions + +### Definition of Done + +All criteria must be met before a feature is considered done. + +**Development:** + +- [ ] All BDD scenarios from `features/smith-commands.feature` pass +- [ ] Quality Gate passes all three tiers (Design → Structure → Conventions) +- [ ] Test coverage meets project threshold (≥ 80%) +- [ ] No test coupling — tests verify behavior, not structure +- [ ] Production code follows priority order: YAGNI > DRY > KISS > OC > SOLID > Design Patterns +- [ ] Code uses ubiquitous language from glossary.md (Connection, FileSpec, TemplateSource, GitignoreSection, ConnectionStatus) +- [ ] Safety invariant verified: no silent overwrites of user-tracked files in any code path (all Must Examples in Rule 2 pass) +- [ ] Atomicity invariant verified: pair-atomic write (AGENTS.md + .opencode/) tested with rollback (SC-008) +- [ ] Clean separation invariant verified: disconnect removes all managed files, preserves user-tracked files (SC-014, SC-017) +- [ ] Exit codes verified: 0 (success), 1 (error) — all Examples assert correct exit code + +**Review — Tier 1: Design Correctness (does it do the right thing?)** + +- [ ] Domain invariants enforced: Safety (user-tracked files are never overwritten; smith-managed files are auto-updated), Atomicity (pair-atomic writes), Clean separation (no orphaned files) +- [ ] All ports are Protocol interfaces in the domain layer; no infrastructure imports in domain or application +- [ ] Connection aggregate is the sole entry point for all four commands +- [ ] CLI is a thin delivery adapter that delegates to application use cases + +**Review — Tier 2: Test Structure (are tests good enough?)** + +- [ ] Each Must Example has a passing test with observable outcome +- [ ] Tests mock ports (FileSystemPort, GitignorePort, TemplateSourcePort) — no filesystem/network in unit tests +- [ ] SC-008 (pair-atomic rollback) has a test that simulates mid-write failure +- [ ] User-tracked file skipping tested for each managed file type +- [ ] No test couples to implementation details (private methods, file paths, internal state) + +**Review — Tier 3: Conventions (does it follow project standards?)** + +- [ ] CI pipeline passes all checks +- [ ] Code Review approved by R (independent reviewer, not the SE who wrote the code) +- [ ] Acceptance Testing passed — PO verifies BDD scenarios behave as expected +- [ ] `smith` CLI command works (`python -m smith` entry point) +- [ ] `--help` and `--version` flags work + +**Deployment:** + +- [ ] Release Verification checklist completed +- [ ] CHANGELOG.md updated with delivered scenarios + +### Deployment + +**Deployment type:** Library (installable Python package) + +**CLI command:** `smith` (entry point: `python -m smith`) +**PyPI package:** `agents-smith` + +#### Common (all deployment types) + +- [ ] Version bumped in pyproject.toml +- [ ] CHANGELOG.md updated with version and delivered scenarios +- [ ] Git tag created (format: `v`) + +#### Library + +- [ ] Package builds without errors (`python -m build`) +- [ ] Package published to PyPI (`twine upload dist/*`) +- [ ] Installable from PyPI in clean environment + +#### Rollback Plan + +- Revert the git tag and re-publish the previous version to PyPI +- PyPI does not support deleting versions; yank the release instead (`twine upload --yank`) + +### Branch Strategy + +- **Convention:** Trunk-based (short-lived feature branches from trunk, PR before merge) +- **Branch naming:** `/` (e.g., `feature/add-smith-commands`) +- **Merge policy:** Squash merge to trunk after approval + +### Naming + +- **CLI command:** `smith` +- **PyPI package:** `agents-smith` +- **Python module: `smith`` +- **Tagline:** Pair program with AI, the right way. +- **Branding:** Matrix/Agent Smith theme (see `docs/branding.md`) + +### .gitignore Convention + +smith manages its own section in .gitignore, marked with `# smith managed`. On connect, entries for agentic files are added to this section. On disconnect, the agentic files are removed but the `# smith managed` section is preserved (it serves as a guard for future usage). Files listed in `# smith managed` are treated as managed by smith; files outside this section are user-tracked and never touched by smith. + +--- + +## Scope Changes + +| Date | Session | Change | Reason | +|------|---------|--------|--------| +| 2026-05-01 | IN_20260501_stakeholder-reinterview | Complete product redefinition: smith is an AI pair programming platform, not just a Python template. Delivery order changed from cli-entrypoint to smith-commands. | Stakeholder clarified product scope during reinterview | \ No newline at end of file diff --git a/docs/spec/system.md b/docs/spec/system.md new file mode 100644 index 0000000..44e3bac --- /dev/null +++ b/docs/spec/system.md @@ -0,0 +1,157 @@ +# System Overview: smith + +> Current-state description of the production system. +> Updated by the Software Architect when domain understanding changes (rare). +> Contains only completed features — nothing from backlog or in-progress. +> This document captures what code cannot express: WHY contexts exist, HOW they relate, WHAT the aggregate boundaries are and why. + +--- + +## Summary + +smith is an AI pair programming platform that connects standardised agent configurations (AGENTS.md, .opencode/, .templates/, .flowr/) to any project directory — and disconnects cleanly when done. Its sole bounded context is the **Connection lifecycle**: connect, disconnect, update, and status. The system is delivered as a CLI tool (`smith`) with one external runtime dependency (`requests`, for URL template sources), using hexagonal architecture to keep domain logic independent of filesystem operations and template resolution. The primary users are software engineers and tech leads who need consistent AI agent configurations across projects. + +--- + +## Delivery + +**Mechanism:** CLI (command-line interface) + +The `smith` command is the sole delivery mechanism. Users interact with four subcommands: `connect`, `disconnect`, `update`, `status`. The CLI is a thin adapter that parses arguments and delegates to application use cases. The domain has no knowledge of argparse or terminal output — it enforces invariants and produces domain objects; the delivery layer translates these into human-readable output and exit codes. + +--- + +## Context (C4 Level 1) + +### Actors + +| Actor | Description | +|-------|-------------| +| Software Engineer | Runs `smith connect` in any project directory to immediately start working with standard AI agent workflows; runs `smith disconnect` when done | +| Tech Lead | Standardises AI agent configurations across the team's projects by connecting the same template to each one | + +### Systems + +| System | Kind | Description | +|--------|------|-------------| +| smith | Internal | CLI tool that manages the Connection lifecycle: connect, disconnect, update, status | +| Template Source | External | Provides agentic files for provisioning. Three variants: bundled (agents-smith, packaged in smith/data/), local path (filesystem), remote URL (HTTP/HTTPS) | +| Project Directory | External | The target project directory where agentic files are written/removed | + +### Interactions + +| Interaction | Behaviour | Technology | +|-------------|-----------|------------| +| Engineer → smith | Runs CLI commands (connect, disconnect, update, status) | Shell / terminal | +| smith → Template Source | Reads template files for provisioning | importlib.resources (bundled), requests (URL), pathlib (local) | +| smith → Project Directory | Writes/removes agentic files atomically; manages .gitignore section with source metadata in header; stateless — no metadata file | pathlib, shutil, tempfile | + +--- + +## Container (C4 Level 2) + +### Boundary: smith + +| Container | Technology | Responsibility | +|-----------|------------|----------------| +| CLI Delivery Layer | argparse (stdlib) | Parse CLI arguments, dispatch to use cases, format output, set exit codes | +| Application Services | Python (pure) | Orchestrate use cases: connect, disconnect, update, status. Enforce invariants via domain layer | +| Domain Layer | Python (pure) | Enforce invariants (atomicity, safety, clean separation, consistency). Define ports (Protocols) that infrastructure must implement | +| Infrastructure Adapters | Python + requests | Implement domain ports: BundledTemplateSource (importlib.resources from smith/data), LocalTemplateSource, UrlTemplateSource (requests + tarfile/zipfile), AtomicFileSystem, GitignoreManager, SectionMetadata | + +### Interactions + +| Interaction | Behaviour | +|-------------|-----------| +| CLI → Application Services | Dispatches parsed CLI arguments to the appropriate use case (ConnectUseCase, DisconnectUseCase, etc.) | +| Application Services → Domain | Delegates invariant enforcement to the Connection aggregate; uses ports for side effects | +| Infrastructure → Domain | Implements domain port Protocols; dependency arrow points inward (infrastructure depends on domain, not vice versa) | +| Infrastructure → Template Source | Reads template files: importlib.resources from packaged data (bundled), filesystem read (local), HTTP download (URL) | +| Infrastructure → Project Directory | Writes/removes agentic files atomically via temp-directory staging; manages .gitignore section with source metadata; stateless — no metadata file | + +--- + +## Module Structure + +| Module | Responsibility | Bounded Context | +|--------|----------------|-----------------| +| `smith.domain.connection` | Connection aggregate root — enforces atomicity, safety, clean separation, consistency invariants | Connection | +| `smith.domain.value_objects` | TemplateSource, GitignoreSection, ConnectionStatus, FileSpec — immutable value objects | Connection | +| `smith.domain.ports` | TemplateSourcePort, FileSystemPort, GitignorePort, MetadataPort — Protocol interfaces defining what the domain needs | Connection | +| `smith.application.connect` | ConnectUseCase — orchestrates conflict check, template resolution, atomic write, .gitignore update, metadata save | Connection | +| `smith.application.disconnect` | DisconnectUseCase — orchestrates file removal, preserving the .gitignore section as a guard | Connection | +| `smith.application.update` | UpdateUseCase — orchestrates connection check, template resolution, atomic overwrite, .gitignore update, metadata update | Connection | +| `smith.application.status` | StatusUseCase — orchestrates connection check, file presence check, status report | Connection | +| `smith.infrastructure.template_source` | BundledTemplateSource (importlib.resources from smith/data/), LocalTemplateSource, UrlTemplateSource (requests + tarfile/zipfile, no cache) — implement TemplateSourcePort | Connection | +| `smith.infrastructure.filesystem` | AtomicFileSystem — implements FileSystemPort with temp-directory staging | Connection | +| `smith.infrastructure.gitignore` | GitignoreManager — implements GitignorePort with delimited section management | Connection | +| `smith.infrastructure.metadata` | SectionMetadata — delegates to GitignoreManager for source metadata in gitignore section header (stateless — no .smith.yaml file) | Connection | +| `smith.delivery.cli` | build_parser(), main(), command handlers — argparse setup and dispatch | Connection | + +--- + +## Domain Model Documentation + +### Why Each Context Exists + +| Bounded Context | Business Capability | Why It's Separate | +|-----------------|---------------------|-------------------| +| Connection | Manage the full lifecycle of connecting agentic files to a project directory | The Connection lifecycle is the core domain — connect, disconnect, update, status. It encapsulates all invariants (atomicity, safety, clean separation, consistency) and is the sole entry point for all four commands. No other context is needed because the domain is small and cohesive. | + +### Aggregate Boundary Rationale + +| Aggregate | Why These Entities Are Grouped | Transactional Invariant | +|-----------|-------------------------------|------------------------| +| Connection | The Connection aggregate root owns the TemplateSource, GitignoreSection, and the list of FileSpecs. All operations (connect, disconnect, update, status) go through the Connection root. The file set cannot exist independently — it is always part of a Connection. | **Atomicity:** either all agentic files are written or none are. **Safety:** user-tracked files are skipped; smith-managed files are auto-updated. **Clean separation:** on disconnect, no agentic files remain. **Consistency:** .gitignore section and agentic file set are always in sync. | + +--- + +## Active Constraints + +- **Minimal runtime dependencies:** The package has one external runtime dependency (`requests`), used only for URL template source resolution. The bundled `agents-smith` source reads from packaged files via `importlib.resources` — no network call required. All other functionality uses Python stdlib. See ADR-007. +- **Atomicity via temp-directory staging:** All file writes must be staged to a temporary directory before being committed to the project directory. No partial connections are allowed. +- **Safety via pre-write conflict check:** Before any write, the project directory must be scanned for existing agentic files. User-tracked files are skipped; smith-managed files are auto-updated. +- **Clean separation via managed .gitignore section:** The `# smith managed` / `# end smith managed` delimiters must be used to mark the section. On disconnect, agentic files are removed but the section is preserved as a guard for future usage. +- **Hexagonal architecture:** Domain logic must not import from infrastructure, application, or delivery layers. The dependency arrow always points inward. +- **Usability:** `smith connect` must complete (files written and .gitignore updated) in under 1 minute in any project directory. + +--- + +## Key Decisions + +- **argparse as CLI framework** — Sufficient for four subcommands; maintains minimal runtime dependencies. See ADR-001. +- **Atomic file writes via temp-directory staging** — All files written to a temp directory first, then moved atomically. On failure, the temp directory is discarded. See ADR-002. +- **Hexagonal architecture (Ports & Adapters)** — Domain logic is independent of filesystem, network, and CLI. Ports are Protocol interfaces defined in the domain layer; infrastructure adapters implement them. +- **Stateless design — no .smith.yaml** — Connection state is inferred from the `# smith managed` section in `.gitignore`. Source metadata is stored in the section header (e.g., `# smith managed source:agents-smith`). No separate metadata file is created. ADR-004 (originally defining .smith.yaml) is superseded by this stateless decision. +- **No smart merge** — For `.flowr/` and `.templates/` that already exist, `--overwrite` replaces entirely. No partial merge logic. This is a deliberate simplicity trade-off (YAGNI > DRY). +- **Local bundled template resolution** — The default `agents-smith` template source reads agentic files from `smith/data/` via `importlib.resources`. No network call is required. A manual script (`scripts/update-bundle.sh`) syncs the bundle from the agents-smith `v8_release` branch when a new release is prepared. See ADR-007. + +--- + +## ADRs + +See `docs/adr/` for the full decision record. + +- ADR-001: Use argparse as CLI framework (minimal runtime dependencies) — `docs/adr/ADR_20260501_argparse-cli-framework.md` +- ADR-002: Atomic file writes via temp-directory staging — `docs/adr/ADR_20260501_atomic-file-writes-via-temp-directory.md` +- ADR-003: Hexagonal architecture (Ports & Adapters) — `docs/adr/ADR_20260501_hexagonal-architecture.md` +- ADR-004: .smith.yaml metadata file — `docs/adr/ADR_20260501_smith-yaml-metadata.md` +- ADR-005: No smart merge for .flowr/ and .templates/ — `docs/adr/ADR_20260501_no-smart-merge.md` +- ADR-006: GitHub-based bundled template resolution — **Superseded** by ADR-007 +- ADR-007: Local bundled template resolution (importlib.resources) — `docs/adr/ADR_20260501_local-bundled-template-resolution.md` + +--- + +## Completed Features + +See `docs/features/` for accepted features. + +--- + +## Changes + +| Date | Source | Change | Reason | +|------|--------|--------|--------| +| 2026-05-01 | architecture-assessment | Initial system overview | New feature: smith-commands (connect, disconnect, update, status) | +| 2026-05-01 | IN_20260501_agents-smith-dependency-resolution | Changed bundled template resolution from importlib.resources/smith/data/ to GitHub-based download + local cache; updated dependency constraint from zero runtime deps to one (requests) | Bundled template files in smith/data/ were stale copies that would go out of sync; GitHub-based resolution ensures templates are always current | +| 2026-05-01 | IN_20260501_local-bundle-reversal | Reverted bundled template resolution to local package (importlib.resources + smith/data/); fully implemented UrlTemplateSource; removed caching; deprecated BDD examples a1b2c3d4 and e5f6g7h8; superseded ADR-006 with ADR-007 | GitHub-based resolution introduced runtime network dependency and cache staleness issues; local bundle provides instant offline default | \ No newline at end of file diff --git a/docs/spec/technical_design.md b/docs/spec/technical_design.md new file mode 100644 index 0000000..77b6549 --- /dev/null +++ b/docs/spec/technical_design.md @@ -0,0 +1,722 @@ +# Technical Design: smith + +> Technical design document for the smith-commands feature. +> Updated by the Software Architect when stack, contracts, or interfaces change. +> Contract-first design: API and event contracts are defined here before implementation begins. + +--- + +## Feature + +`docs/features/smith-commands/` — the connect/disconnect/update/status CLI commands. + +--- + +## Architectural Style + +**Style:** Hexagonal (Ports & Adapters) + +**Rationale:** smith's core domain — the Connection lifecycle — must be testable in isolation from filesystem operations, network requests, and CLI argument parsing. The quality attribute priority (Safety > Atomicity > Clean Separation > Usability) demands that domain invariants are enforced without coupling to infrastructure. Hexagonal architecture achieves this by defining ports (Protocol interfaces) in the domain layer that infrastructure adapters implement. The CLI is a delivery mechanism — a thin adapter that translates argparse results into domain commands. This allows the Connection aggregate to enforce atomicity and safety invariants without knowing whether files are written to a real filesystem or an in-memory test double. The dependency arrow always points inward: infrastructure → application → domain (Cockburn, 2005; Evans, 2003). + +Note: The Safety invariant protects user-tracked files (not managed by smith) from silent overwrite. Smith-managed files (in the `# smith managed` section) may be updated by `smith connect` (auto-update) and `smith update` without `--overwrite`. + +--- + +## Quality Attributes + +| Attribute | Priority | Architectural Decision | ADR Ref | +|-----------|----------|----------------------|---------| +| Safety | 1 (Must) | Conflict detection before any write; `--overwrite` gate enforced for user-tracked files; no silent overwrites of user-tracked files ever; smith-managed files may be updated without `--overwrite` | — | +| Atomicity | 2 (Must) | Temp-directory staging with atomic rename; all files written to staging area first, then moved to final locations; on failure, staging area is discarded | ADR-002 | +| Clean Separation | 3 (Must) | Managed `.gitignore` section with clear delimiters; disconnect removes all agentic files while preserving the section as a guard; connection state inferred from the managed section in `.gitignore` (stateless — no metadata file) | — | +| Usability | 4 (Must) | Four subcommands with clear output; argparse provides help text; exit codes distinguish success/error | ADR-001 | +| Modifiability | 5 (Should) | Hexagonal architecture allows adding new template sources (URL types, git repos) without changing domain logic; new CLI flags are thin delivery-layer additions | — | +| Testability | 6 (Should) | Domain logic tested via port mocks; no filesystem or network in unit tests; integration tests use temp directories | — | + +--- + +## Stack + +| Layer | Technology | Version | Rationale | +|-------|-----------|---------|-----------| +| Language | Python | 3.13 | Project requirement (pyproject.toml: `requires-python = ">=3.13"`) | +| CLI Framework | argparse | stdlib | Sufficient for four subcommands with options; maintains minimal runtime dependencies (ADR-001) | +| Package metadata | importlib.metadata | stdlib | Already used for version/description; no new dependency | +| HTTP client | requests | PyPI | URL template source resolution (tar.gz/zip download); cleaner API and error handling than urllib.request (ADR-007) | +| Archive extraction | tarfile / zipfile | stdlib | Extract downloaded template archives for URL sources; no new dependency | +| Package resources | importlib.resources | stdlib | Read bundled template files from `smith.data` package; no new dependency | +| File operations | pathlib / shutil / tempfile | stdlib | Atomic writes, directory operations; no new dependency | +| Metadata storage | — | — | Connection state inferred from `# smith managed` section in `.gitignore`; source metadata stored in section header (e.g., `# smith managed source:agents-smith`); no separate metadata file — stateless design | + +**Minimal runtime dependencies** is a deliberate constraint. The only external dependency is `requests` (used for URL template source resolution). The bundled `agents-smith` source reads from packaged files via `importlib.resources` — no network call needed. See ADR-007 for the rationale. + +--- + +## Module Structure + +``` +smith/ + __init__.py # Package marker + __main__.py # Entry point: python -m smith + domain/ + __init__.py + connection.py # Connection aggregate root + value_objects.py # TemplateSource, GitignoreSection, ConnectionStatus, FileSpec + ports.py # TemplateSourcePort, FileSystemPort, GitignorePort, MetadataPort (Protocols) + application/ + __init__.py + connect.py # ConnectUseCase + disconnect.py # DisconnectUseCase + update.py # UpdateUseCase + status.py # StatusUseCase + infrastructure/ + __init__.py + template_source.py # BundledTemplateSource (importlib.resources), LocalTemplateSource, UrlTemplateSource + filesystem.py # AtomicFileSystem + gitignore.py # GitignoreManager + metadata.py # SectionMetadata + delivery/ + __init__.py + cli.py # build_parser(), main(), command handlers +``` + +**Dependency direction:** `delivery` → `application` → `domain` ← `infrastructure` + +The domain layer has **zero** imports from application, infrastructure, or delivery. The application layer imports from domain only. Infrastructure implements domain ports. Delivery calls application use cases. + +**Rationale:** This structure enforces the hexagonal boundary. The Connection aggregate enforces invariants (atomicity, safety, clean separation) without knowing whether files are written to a real filesystem or a test double. New template source types (git repos, archives) are added as infrastructure adapters without touching domain or application code. + +--- + +## API Contracts + +### `smith connect [--from ] [--overwrite]` + +**Behaviour:** Write all agentic files from the template source to the current project directory. Add a managed `.gitignore` section with source metadata in the section header. + +**Request:** +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `--from` | string | No | `agents-smith` | Template source: `agents-smith` (bundled), local path, or URL | +| `--overwrite` | flag | No | False | Replace existing agentic files without prompting | + +**Response (stdout):** +| Condition | Output | Exit Code | +|-----------|--------|-----------| +| Success | `Connected from .` + list of files written | 0 | +| Error | `Error: ` | 1 | + +**Preconditions:** +- Current directory is a project directory (writable) +- If already connected (`# smith managed` section exists), auto-update managed files +- User-tracked files (not in `# smith managed` section) are skipped, not reported as conflicts +- `--overwrite` replaces all managed files; user-tracked files are always preserved + +**Postconditions:** +- All agentic files present in project directory (atomicity) +- `# smith managed` section added to `.gitignore` with source metadata in header +- No partial state on failure (atomicity) + +--- + +### `smith disconnect` + +**Behaviour:** Remove all smith-managed agentic files from the current project directory. Preserve the `# smith managed` section in `.gitignore` (serves as guard for future usage). Smith is stateless — no metadata file to remove. + +**Request:** No parameters. + +**Response (stdout):** +| Condition | Output | Exit Code | +|-----------|--------|-----------| +| Success | `Disconnected.` + list of files removed | 0 | +| Not connected | `Not connected — nothing to disconnect.` | 0 | +| Error | `Error: ` | 1 | + +**Preconditions:** +- Current directory is a project directory (writable) + +**Postconditions:** +- No smith-managed agentic files remain in project directory (clean separation) +- `# smith managed` section preserved in `.gitignore` (guard for future usage) +- If `.gitignore` is empty after removal, it is left as an empty file (not deleted) + +--- + +### `smith update` + +**Behaviour:** Refresh agentic files in a connected project directory from the original or specified template source. Overwrite all managed agentic files with latest versions. If the project is not connected, auto-connect (same as `smith connect`). + +**Request:** Optional `--from ` to use a different template source. + +**Response (stdout):** +| Condition | Output | Exit Code | +|-----------|--------|-----------| +| Success | `Updated from .` + list of files updated | 0 | +| Not connected (auto-connect) | Same as `smith connect` | 0 | +| Error | `Error: ` | 1 | + +**Preconditions:** +- Template source must be reachable + +**Postconditions:** +- All agentic files updated to latest from template source +- Source metadata in `.gitignore` section header updated +- `.gitignore` managed section patterns updated if changed + +--- + +### `smith status` + +**Behaviour:** Report whether the current project directory is connected, which agentic files are present, and which template source was used. + +**Request:** No parameters. + +**Response (stdout):** +| Condition | Output | Exit Code | +|-----------|--------|-----------| +| Connected | `Connected from .` + list of files with status | 0 | +| Disconnected | `Not connected.` | 1 | +| Partial | `Partial connection — some files missing:` + list with status | 1 | + +**Preconditions:** None (always succeeds in reporting). + +**Postconditions:** None (read-only, no side effects). + +--- + +## Event Contracts + +smith is a synchronous CLI tool with no event-driven communication. All operations are request-response within a single process. No event contracts are needed for the current architecture. + +If smith evolves to support background operations or daemon mode, event contracts will be defined at that time (YAGNI). + +--- + +## Interface Definitions + +### TemplateSourcePort + +```python +from pathlib import Path +from typing import Protocol + + +class TemplateSourcePort(Protocol): + """Port for resolving template files from a source. + + Implementations: BundledTemplateSource, LocalTemplateSource, UrlTemplateSource. + The domain defines this interface; infrastructure adapters implement it. + """ + + def resolve(self) -> list[FileSpec]: + """Resolve the template source into a list of file specifications. + + Returns: + List of FileSpec objects, each containing a relative path and content. + + Raises: + TemplateSourceError: If the source cannot be resolved + (not found, network error, invalid archive). + """ + ... + + def gitignore_patterns(self) -> list[str]: + """Return gitignore patterns for the managed section. + + Returns: + List of gitignore patterns (e.g., ['.opencode/', '.templates/', + '.flowr/sessions/']). + """ + ... +``` + +### FileSystemPort + +```python +class FileSystemPort(Protocol): + """Port for atomic file system operations. + + Implementations: AtomicFileSystem (production), InMemoryFileSystem (tests). + The domain defines this interface; infrastructure adapters implement it. + """ + + def check_conflicts(self, paths: list[Path]) -> list[Path]: + """Check which paths already exist in the project directory. + + Args: + paths: List of relative paths to check. + + Returns: + List of paths that already exist in the project directory. + """ + ... + + def write_atomic(self, specs: list[FileSpec]) -> None: + """Write all file specifications atomically to the project directory. + + Either all files are written or none are. On failure, any partially + written files are rolled back. + + Args: + specs: List of FileSpec objects with relative paths and content. + + Raises: + FileSystemError: If any write fails (rolled back to clean state). + """ + ... + + def remove(self, paths: list[Path]) -> None: + """Remove files and directories from the project directory. + + Args: + paths: List of relative paths to remove. + + Raises: + FileSystemError: If any removal fails. + """ + ... + + def exists(self, paths: list[Path]) -> dict[Path, bool]: + """Check which paths exist in the project directory. + + Args: + paths: List of relative paths to check. + + Returns: + Dictionary mapping each path to whether it exists. + """ + ... +``` + +### GitignorePort + +```python +class GitignorePort(Protocol): + """Port for managing the smith-managed section in .gitignore. + + Implementations: GitignoreManager (production), InMemoryGitignore (tests). + The domain defines this interface; infrastructure adapters implement it. + + Connection state is inferred from the managed section in .gitignore, + not from a separate metadata file. This port is the primary state mechanism. + """ + + def add_section(self, patterns: list[str]) -> None: + """Add a managed section to .gitignore with the given patterns. + + Creates .gitignore if it does not exist. The section is delimited by + '# smith managed' and '# end smith managed' markers. + + Args: + patterns: List of gitignore patterns to include. + """ + ... + + def has_section(self) -> bool: + """Check whether .gitignore contains a smith-managed section. + + Returns: + True if the managed section exists, False otherwise. + """ + ... + + def get_patterns(self) -> list[str]: + """Return the gitignore patterns from the managed section. + + Returns: + List of gitignore patterns currently in the managed section. + Returns an empty list if the section does not exist. + """ + ... +``` + +### MetadataPort + +```python +class MetadataPort(Protocol): + """Port for reading and writing connection metadata. + + Connection state is inferred from the '# smith managed' section in .gitignore, + not from a separate metadata file. This port handles source metadata stored + within the gitignore section header (e.g., '# smith managed source:agents-smith'). + + Implementations: GitignoreManager (production, dual-implements GitignorePort + and MetadataPort), InMemoryGitignore (tests). + The domain defines this interface; infrastructure adapters implement it. + """ + + def save_source(self, source: TemplateSource) -> None: + """Write template source metadata to the gitignore section header. + + Args: + source: The template source used for the connection. + """ + ... + + def load_source(self) -> TemplateSource | None: + """Read template source metadata from the gitignore section header. + + Returns: + The stored TemplateSource, or None if not connected. + """ + ... +``` + +--- + +## Value Objects + +### FileSpec + +```python +from dataclasses import dataclass +from pathlib import Path + + +@dataclass(frozen=True) +class FileSpec: + """A file to be written from a template source to a project directory. + + Attributes: + relative_path: Path relative to the project root + (e.g., 'AGENTS.md', '.opencode/agents/po.md'). + content: File content as bytes. + """ + + relative_path: Path + content: bytes +``` + +### TemplateSource + +```python +from dataclasses import dataclass +from typing import Literal + + +@dataclass(frozen=True) +class TemplateSource: + """The origin of agentic files. + + Attributes: + kind: 'bundled', 'local', or 'url'. + location: 'agents-smith' for bundled, absolute path for local, URL for url. + """ + + kind: Literal["bundled", "local", "url"] + location: str +``` + +### ConnectionStatus + +```python +from dataclasses import dataclass +from enum import Enum + + +class ConnectionState(Enum): + """Possible states of a project's connection.""" + + CONNECTED = "connected" + DISCONNECTED = "disconnected" + PARTIAL = "partial" + + +@dataclass(frozen=True) +class ConnectionStatus: + """The current state of a project's connection. + + Attributes: + state: Whether connected, disconnected, or partial. + source: The template source (None if disconnected). + present_files: List of agentic file paths that exist. + missing_files: List of agentic file paths that are missing. + """ + + state: ConnectionState + source: TemplateSource | None + present_files: list[Path] + missing_files: list[Path] +``` + +### GitignoreSection + +```python +from dataclasses import dataclass + + +@dataclass(frozen=True) +class GitignoreSection: + """The managed section in .gitignore. + + Attributes: + patterns: List of gitignore patterns (e.g., ['.opencode/', '.templates/']). + start_marker: Section start delimiter (default: '# smith managed'). + end_marker: Section end delimiter (default: '# end smith managed'). + """ + + patterns: list[str] + start_marker: str = "# smith managed" + end_marker: str = "# end smith managed" +``` + +--- + +## C4 Diagrams + +### Context (C4 Level 1) + +```mermaid +graph TB + Engineer[Software Engineer / Tech Lead] + smith[smith CLI] + TS[Template Source] + PD[Project Directory] + + Engineer -->|runs commands| smith + smith -->|reads templates| TS + smith -->|writes/removes agentic files| PD + smith -->|manages .gitignore section + source metadata| PD +``` + +**Actors:** + +| Actor | Description | +|-------|-------------| +| Software Engineer | Runs `smith connect` in any project directory to start working with standard AI agent workflows; runs `smith disconnect` when done | +| Tech Lead | Standardises AI agent configurations across the team's projects by connecting the same template to each one | + +**Systems:** + +| System | Kind | Description | +|--------|------|-------------| +| smith | Internal | CLI tool that connects/disconnects standardised agent configurations to project directories | +| Template Source | External | Provides agentic files: bundled (agents-smith), local path, or remote URL | +| Project Directory | External | The target project where agentic files are written/removed | + +**Interactions:** + +| Interaction | Behaviour | Technology | +|-------------|-----------|------------| +| Engineer → smith | Runs CLI commands (connect, disconnect, update, status) | Shell / terminal | +| smith → Template Source | Reads template files for provisioning | requests (bundled/URL), pathlib (local) | +| smith → Project Directory | Writes/removes agentic files, manages .gitignore section with source metadata in header | pathlib, shutil, tempfile | + +### Container (C4 Level 2) + +```mermaid +graph TB + CLI[CLI Delivery Layer
argparse] + App[Application Services
Use Case Orchestration] + Domain[Domain Layer
Connection Aggregate, Value Objects, Ports] + Infra[Infrastructure Adapters
TemplateSource, FileSystem, Gitignore, Metadata] + TS[Template Source
External] + PD[Project Directory
Filesystem] + + CLI -->|dispatches commands| App + App -->|enforces invariants| Domain + Infra -.->|implements ports| Domain + Infra -->|reads templates| TS + Infra -->|writes/removes files| PD +``` + +**Boundary: smith** + +| Container | Technology | Responsibility | +|-----------|------------|----------------| +| CLI Delivery Layer | argparse (stdlib) | Parse CLI arguments, dispatch to use cases, format output | +| Application Services | Python (pure) | Orchestrate use cases: connect, disconnect, update, status | +| Domain Layer | Python (pure) | Enforce invariants (atomicity, safety, clean separation, consistency); define ports | +| Infrastructure Adapters | Python + requests | Implement domain ports: BundledTemplateSource (importlib.resources from smith/data), LocalTemplateSource, UrlTemplateSource (requests + tarfile/zipfile), AtomicFileSystem, GitignoreManager, SectionMetadata | + +**Interactions:** + +| Interaction | Behaviour | +|-------------|-----------| +| CLI → Application Services | Dispatches parsed CLI arguments to the appropriate use case | +| Application Services → Domain | Delegates invariant enforcement to the Connection aggregate | +| Infrastructure → Domain | Implements domain port Protocols; dependency arrow points inward | +| Infrastructure → Template Source | Reads template files: importlib.resources from package data (bundled), filesystem read (local), HTTP download (URL) | +| Infrastructure → Project Directory | Writes/removes agentic files atomically; manages .gitignore section with source metadata; stateless — no metadata file | + +### Component (C4 Level 3) — Domain Layer + +```mermaid +graph TB + Connection[Connection
Aggregate Root] + TS[TemplateSource
Value Object] + GS[GitignoreSection
Value Object] + CS[ConnectionStatus
Value Object] + FS[FileSpec
Value Object] + + Connection -->|resolves| TS + Connection -->|maintains| GS + Connection -->|reports| CS + Connection -->|manages| FS +``` + +### Component (C4 Level 3) — Application Services + +```mermaid +graph TB + Connect[ConnectUseCase] + Disconnect[DisconnectUseCase] + Update[UpdateUseCase] + Status[StatusUseCase] + + Connect -->|creates Connection| Connection[Connection Aggregate] + Disconnect -->|removes Connection| Connection + Update -->|refreshes Connection| Connection + Status -->|queries Connection| Connection +``` + +### Component (C4 Level 3) — Infrastructure Adapters + +```mermaid +graph TB + Bundled[BundledTemplateSource
implements TemplateSourcePort] + Local[LocalTemplateSource
implements TemplateSourcePort] + Url[UrlTemplateSource
implements TemplateSourcePort] + AFS[AtomicFileSystem
implements FileSystemPort] + GM[GitignoreManager
implements GitignorePort] + YM[SectionMetadata
implements MetadataPort] + + Bundled -->|importlib.resources| DataDir[smith/data/
packaged templates] + Local -->|pathlib| LocalFS[Local filesystem
path] + Url -->|requests| Remote[Remote URL
HTTP/HTTPS] + AFS -->|tempfile + shutil| ProjectFS[Project directory
filesystem] + GM -->|pathlib| GitignoreFile[.gitignore
file + source metadata] + YM -->|delegates to| GM +``` + +--- + +## Dependencies + +| Dependency | What it provides | Why not replaced | +|------------|------------------|-----------------| +| `argparse` | CLI argument parsing | Stdlib; sufficient for four subcommands; minimal runtime dependency (ADR-001) | +| `importlib.metadata` | Package version and description | Stdlib; already used in `__main__.py` | +| `requests` | HTTP downloads for URL template sources | External; cleaner API and error handling than urllib.request; used for tar.gz/zip archive download from remote URLs (ADR-007) | +| `importlib.resources` | Read bundled template files from `smith/data` package | Stdlib; no network call needed for the default template source | +| `pathlib` | Path manipulation | Stdlib; modern Python path handling | +| `shutil` | File/directory operations (copy, rmtree) | Stdlib; needed for atomic writes and directory removal | +| `tempfile` | Temporary directory creation for atomic writes | Stdlib; core of the atomicity mechanism (ADR-002) | +| `tarfile` | Archive extraction for URL template sources | Stdlib; needed for .tar.gz archives | +| `zipfile` | Archive extraction for URL template sources | Stdlib; needed for .zip archives | +| `dataclasses` | Value object definitions | Stdlib; frozen dataclasses for immutable value objects | +| `typing` | Protocol definitions, type hints | Stdlib; `Protocol` for port definitions | + +**One runtime dependency beyond Python stdlib:** `requests` is the only external package. This is a deliberate trade-off — `requests` provides significantly better HTTP handling than `urllib.request` for URL template source downloads. The bundled `agents-smith` source reads from packaged files via `importlib.resources` and requires no network call. See ADR-007. + +--- + +## Configuration Keys + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `--from` | string | `agents-smith` | Template source: `agents-smith` (bundled), local path, or URL | +| `--overwrite` | flag | `False` | Replace existing agentic files without prompting | +| `smith.managed_section_start` | string | `# smith managed` | Delimiter marking the start of the managed .gitignore section | +| `smith.managed_section_end` | string | `# end smith managed` | Delimiter marking the end of the managed .gitignore section | +| `smith.default_template` | string | `agents-smith` | Default template source when `--from` is not specified | + +**Note:** Configuration keys with the `smith.` prefix are internal constants, not user-facing configuration. They are defined as module-level constants in the domain layer and are not configurable via environment variables or config files (YAGNI). The only user-facing configuration is the `--from` and `--overwrite` CLI flags. + +--- + +## Atomicity Implementation + +The atomicity invariant (all files or none) is implemented using a **temp-directory staging pattern**: + +1. **Stage:** Write all agentic files to a temporary directory (via `tempfile.mkdtemp`). +2. **Validate:** After all writes succeed, check that all expected files exist in the staging area. +3. **Commit:** Move staged files to their final locations in the project directory. Each file is moved atomically using `os.replace` (atomic on the same filesystem). +4. **Rollback:** If any step fails, remove the entire staging directory. If some commits have already succeeded, remove the committed files (best-effort rollback). + +The `.gitignore` section (with source metadata in the header) is written **after** all agentic files are committed. This ensures that a partial connection never leaves the `.gitignore` section pointing to missing files. There is no separate metadata file — smith is stateless. + +**Rollback on disconnect:** The `disconnect` command removes smith-managed agentic files, preserving user-tracked files and the `.gitignore` section. If any removal fails, the command reports the error but continues removing remaining files (best-effort cleanup). The `.gitignore` section is preserved as a guard for future connections. + +--- + +## Safety Implementation + +The safety invariant (no silent overwrites of user-tracked files) is implemented as a **pre-write conflict check**: + +1. **Auto-update:** If the project is already connected (has `# smith managed` section and all managed files exist), `smith connect` auto-updates smith-managed files without requiring `--overwrite`. This is intentional — smith manages these files. +2. **Scan:** Before any write to an unconnected or partially-connected project, scan for existing files that conflict with the template. +3. **Skip:** If user-tracked files are found (files not in the `# smith managed` section), skip them — write only the files that don't conflict. The operation succeeds (exit 0) with the user-tracked files left untouched. +4. **Overwrite:** If `--overwrite` is set, overwrite all managed files (user-tracked files are still preserved via `_is_path_managed` within `_resolve_specs`). + +The conflict check is performed by the `FileSystemPort.check_conflicts()` method, which is called by the `ConnectUseCase` before staging any writes. This keeps the safety check in the application layer (orchestration) while the domain invariant (no silent overwrites) is enforced by the Connection aggregate. + +--- + +## .gitignore Management + +The managed section in `.gitignore` uses clear delimiters: + +```gitignore +# smith managed source:agents-smith +.opencode/ +.templates/ +.flowr/sessions/ +# end smith managed +``` + +**On connect:** +- If `.gitignore` does not exist, create it with the managed section. +- If `.gitignore` exists but has no managed section, append the managed section. +- If `.gitignore` exists and has a managed section, auto-update (overwrite managed files, skip user-tracked files). + +**On disconnect:** +- Remove all agentic files tracked in the managed section (AGENTS.md, .opencode/, .templates/, .flowr/). +- Preserve the `# smith managed` section in .gitignore — its presence serves as a guard for future `smith connect` or `smith update` commands. +- If an agentic file is NOT tracked in the managed section (user tracks it manually), do NOT remove it. + +**On update:** +- If the gitignore patterns have changed (e.g., template source provides different patterns), replace the managed section with the new patterns. + +--- + +## Template Source Resolution + +The `TemplateSourceAdapter` is a facade within `smith/infrastructure/template_source.py` that normalises three source types into a uniform `TemplateSourcePort` interface: + +| Source Type | Detection | Resolution | +|-------------|-----------|------------| +| Bundled (`agents-smith`) | Default (no `--from` flag) | `importlib.resources` reads agentic files from the `smith.data` package directory; no network call required | +| Local path | `--from` starts with `/`, `./`, `../`, or is an absolute path | `pathlib.Path` reads files from the local filesystem | +| Remote URL | `--from` starts with `http://` or `https://` | `requests` downloads the archive; `tarfile` or `zipfile` extracts to a temp directory; agentic file filter applied; temp directory cleaned up after resolution | + +**Bundled template resolution (local package):** The default `agents-smith` template source is resolved by reading agentic files directly from the `smith.data` package directory via `importlib.resources`. No network call is required — the files are packaged with smith. The agentic file filter (`_is_agentic_path`) selects only the essential subdirectories: `AGENTS.md`, `.opencode/agents/`, `.opencode/knowledge/`, `.opencode/skills/`, `.opencode/tools/`, `.templates/`, `.flowr/`. This excludes non-essential content like `node_modules/`, `package.json`, and other development artifacts that may exist in the source repository. A manual script (`scripts/update-bundle.sh`) syncs `smith/data/` from the agents-smith `v8_release` branch when a new release is prepared. See ADR-007 for the rationale. + +**Local path validation:** The `LocalTemplateSource` adapter validates that the path exists and contains the expected agentic file structure (at minimum, an `AGENTS.md` file). If the path is invalid, it raises `TemplateSourceError`. + +**URL download and extraction:** The `UrlTemplateSource` adapter downloads the archive to a temporary directory, extracts it (`.tar.gz` or `.zip`), applies the agentic file filter (only `AGENTS.md`, `.opencode/agents/`, `.opencode/knowledge/`, `.opencode/skills/`, `.opencode/tools/`, `.templates/`, `.flowr/` are included — non-essential content like `node_modules/` is excluded), and returns `FileSpec` objects. The temp directory is cleaned up after the `TemplateSourcePort.resolve()` call completes. No persistent cache is maintained for URL sources — each `resolve()` call re-downloads the archive. + +**Network failure handling:** If the URL download fails (network unreachable, HTTP error, timeout), `UrlTemplateSource.resolve()` raises `TemplateSourceError` with a clear message. The bundled source does not require network access — it reads from packaged files and always works offline. + +--- + +## Entry Point Configuration + +The `smith` CLI command is configured via `pyproject.toml` console scripts: + +```toml +[project.scripts] +smith = "smith.delivery.cli:main" +``` + +This allows users to run `smith connect` after installing the package, while `python -m smith` continues to work for development. + +--- + +## Changes + +| Date | Source | Change | Reason | +|------|--------|--------|--------| +| 2026-05-01 | architecture-assessment | Initial technical design | New feature: smith-commands (connect, disconnect, update, status) | +| 2026-05-01 | IN_20260501_agents-smith-dependency-resolution | Replaced bundled template source (importlib.resources + smith/data/) with GitHub-based download + local cache; added requests dependency; added cache_dir and bundled_archive_url config keys | Bundled template files in smith/data/ were stale copies that would go out of sync; GitHub-based resolution ensures templates are always current | +| 2026-05-01 | IN_20260501_local-bundle-reversal | Reverted bundled template source to local package (importlib.resources + smith/data/); fully implemented UrlTemplateSource (tar.gz/zip with agentic filter); removed caching; removed cache_dir and bundled_archive_url config keys; deprecated BDD examples a1b2c3d4 and e5f6g7h8; superseded ADR-006 with ADR-007 | GitHub-based resolution introduced runtime network dependency and cache staleness issues; local bundle provides instant offline default experience; UrlTemplateSource handles URL sources independently | \ No newline at end of file diff --git a/docs/spec/workflow-design.md b/docs/spec/workflow-design.md new file mode 100644 index 0000000..db2893d --- /dev/null +++ b/docs/spec/workflow-design.md @@ -0,0 +1,565 @@ +# Development Lifecycle Workflow — Non-Deterministic State Machine Design + +## Model Rules + +- Each stage is a node or a link to another diagram (sub-flow) +- Diagrams can contain cycles, but cycles only point to the same level (not parent/child) +- Each diagram has finite, flat exit points +- Sub-flows are linked from states that require them + +## Design Principles + +### Priority Order (conflict resolution) + +When two principles conflict, the earlier one wins: + +**YAGNI > DRY > KISS > OC > SOLID > Design Patterns** + +1. **YAGNI** — Don't build what you don't need yet. If a feature isn't required by a .feature file example, it doesn't exist. +2. **DRY** — Don't repeat yourself, but only after YAGNI passes. Duplication is better than the wrong abstraction. +3. **KISS** — Keep it simple, but only after eliminating duplication. The simplest design that passes all .feature examples wins. +4. **OC** — Object Calisthenics, but only after KISS passes. Structure serves simplicity, not the other way around. +5. **SOLID** — Apply SOLID principles, but only after OC passes. SOLID is a tool, not a goal. +6. **Design Patterns** — Use patterns only when simpler approaches don't work. A pattern is justified only when YAGNI, KISS, and OC all point to it. + +### Philosophical Principles (from the Zen of Python) + +These guide all design decisions. When in doubt, refer to these: + +- Beautiful is better than ugly. +- Explicit is better than implicit. +- Simple is better than complex. +- Complex is better than complicated. +- Flat is better than nested. +- Sparse is better than dense. +- Readability counts. +- Special cases aren't special enough to break the rules. +- Although practicality beats purity. +- Errors should never pass silently unless explicitly silenced. +- In the face of ambiguity, refuse the temptation to guess. +- There should be one — and preferably only one — obvious way to do it. +- Now is better than never. Although never is often better than *right now*. +- If the implementation is hard to explain, it's a bad idea. +- If the implementation is easy to explain, it may be a good idea. + +### Core Workflow Principles + +1. **Fail-fast, shift-left** — issues caught early cost 10x less than issues caught late. The Review sub-step is tiered so the most expensive issues (design) are caught before cheaper issues (conventions) are invested in. +2. **Never invest in Tier 3 work on code that hasn't passed Tier 1** — docstrings, formatting, and conventions are waste on code that may need complete restructuring. +3. **BDD features are the single thread of truth** — written in Planning, used as test spec in Development, validated in Acceptance. +4. **Each artifact is a translation of the previous one** — never skip an artifact. Each one is a checkpoint where you can validate alignment with the domain before investing in the next level of detail. If scope is wrong, .features will be wrong. If .features are wrong, tests will test the wrong things. If signatures don't match the domain model, test bodies will couple to wrong structure. +5. **Architecture must be reviewed before implementation begins** — catching design errors after SE has built everything is 100x more expensive than catching them during architecture. SA's work is reviewed by a separate R hat during Architecture Review & Sign-off, not after Development. +6. **Technical review happens in Development, not after** — R reviews all three tiers (design, structure, conventions) during Development's Review sub-step. Acceptance (PO) is purely business validation. This eliminates redundancy and catches issues where they're cheapest to fix. + +--- + +## Actors + +| Actor | Abbreviation | Responsibility | Documents they own | +|---|---|---|---| +| **Product Owner** | PO | Business requirements, scope validation, acceptance sign-off | interview-notes/*.md, product_definition.md, features/.feature | +| **Domain Expert** | DE | Domain knowledge, ubiquitous language | domain_model.md, glossary.md, event_map | +| **Software Architect** | SA | Architecture decisions, context mapping, interface contracts | context_map.md, adr/*.md, technical_design.md, system.md, py_stubs, test_stubs | +| **Software Engineer** | SE | Implementation, test design, code quality | test_bodies, function_bodies, commits | +| **Reviewer** | R | Independent verification, cannot review own work | Review evidence (categorized by tier), approval records | + +**Key principle: You cannot review your own work.** R is a separate hat (not necessarily a separate person). If SA designed the architecture, someone else must wear the R hat for Architecture Review & Sign-off. If SE wrote the code, someone else must wear the R hat for Development's Review sub-step. + +In small teams, PO+DE may be one person, and SA+SE may be one person. But the **responsibilities are distinct** — the same person wears different hats. The R hat ensures independent verification at critical gates. + +--- + +## Process Support + +The `.opencode/` directory provides the meta-process infrastructure that guides how the flows are executed. It is separate from project artifacts — it is the procedural and reference system, not the work product. + +**Entry point**: `AGENTS.md` (project root) is loaded every session. It provides navigation, wikilink resolution, and discovery commands. See `agent-design/principles` for design rationale. + +**Discover, don't enumerate**: The number and names of agents, skills, and knowledge files change. AGENTS.md provides discovery commands rather than inventories: + +```bash +ls .opencode/agents/ # agent identity definitions +ls .opencode/skills/ # skill directories (each has SKILL.md) +find .opencode/knowledge -name '*.md' # knowledge files +``` + +### Agent-Role Mapping + +| Agent | Abbreviation | Decides | +|---|---|---| +| Product Owner | PO | Scope, priority, acceptance | +| Domain Expert | DE | Domain model, ubiquitous language | +| System Architect | SA | Architecture, ADRs, project structure | +| Reviewer | R | Pass/fail (cannot review own work) | + +Each flow state specifies its owner (PO, DE, SA, SE, or R). The owner maps to the agent file in `.opencode/agents/`. Agent files contain identity only (who I am, what I decide) — no skill lists, no routing, no knowledge content. + +### Skill Loading + +Each flow state loads skills on demand. The flow YAML `skills` field specifies which skill to invoke. Skills are procedural (step-by-step instructions) and are the only files that load knowledge. See `skill-design/principles` for skill structure. + +### Knowledge Resolution + +Skills reference knowledge via `[[domain/concept]]` wikilinks, resolved to `.opencode/knowledge/{domain}/{concept}.md`. Knowledge files use 4-section progressive disclosure: + +| Fragment | Loads | Token Savings | +|---|---|---| +| `[[domain/concept#key-takeaways]]` | Frontmatter + Key Takeaways | ~80% | +| `[[domain/concept#concepts]]` | Frontmatter + Key Takeaways + Concepts | ~65% | +| `[[domain/concept]]` | Entire file | 0% | + +Knowledge domains: `architecture`, `domain-modeling`, `requirements`, `software-craft`, `workflow`, `agent-design`, `skill-design`, `knowledge-design`. + +--- + +## Main Flow: Development Lifecycle + +The main flow separates project-level work (done once) from feature-level work (looped per feature). + +``` +Discovery → Architecture → Feature Development ←┐ + ↑ ↑ │ │ + │ └ needs_architecture┘ │ + └ needs_discovery next-feature ─────┘ + completed ──► [Completed] + cancelled ──► [Cancelled] +``` + +Terminal exits: **completed** | **cancelled** + +| State | Purpose | Sub-flow | Transitions | +|---|---|---|---| +| **Discovery** | Domain understanding & scope | → Discovery Flow | `complete` → Architecture | +| **Architecture** | Architecture & context mapping | → Architecture Flow | `complete` → Feature Development, `needs_discovery` → Discovery | +| **Feature Development** | Feature-level loop: Planning → Dev → Acceptance → PR | → Feature Development Flow | `next-feature` → Feature Development (loop), `needs_architecture` → Architecture, `cancelled` → Cancelled, `completed` → Completed | + +**Why separate project-level and feature-level?** Discovery and Architecture establish the domain model and technical foundation once for the entire project. Feature Development then loops: each feature goes through Planning → Development → Acceptance → PR Creation. When all features are delivered (or none remain), the project completes. + +--- + +## Hotfix Process + +Hotfixes use the same Main Flow (Discovery → Architecture → Feature Development) but with constrained scope: + +**Scope constraints:** +- **Discovery**: Focused on root cause analysis of the specific issue +- **Architecture**: Minimal change that fixes the issue without breaking existing contracts +- **Planning**: PO decides the specification approach: + - *Add new example* to existing .feature file (missing edge case) + - *Create new .feature file* (completely new behavior required) + - *Fix existing examples* (current specification is wrong) +- **Development**: Same TDD cycle and Review sub-flow - no shortcuts +- **Acceptance**: Same acceptance process - PO verifies business behavior +- **PR Creation**: Same PR process + +**Key principle: Quality gates remain the same.** Speed comes from smaller scope and focused specification changes, not skipped steps. A hotfix that breaks architecture or introduces technical debt creates bigger problems than the original issue. + +--- + +## Test Body Design Pattern (cross-cutting) + +Every test body across all levels follows **Given/When/Then maps to Arrange/Act/Assert** — but the scope of what's under test differs: + +| Test Level | Scope | "Given" sets up | "When" triggers | "Then" asserts | +|---|---|---|---|---| +| **Unit** | Single domain object | Value objects, primitives | A method/command on one object | State changes, return values, exceptions | +| **Integration** | Aggregate + persistence | Aggregate via repository, test DB | A command through the aggregate root | Events emitted, state persisted, invariants held | +| **Acceptance (BDD)** | Full bounded context | Application service, test doubles | A use case through the API/entry point | End-to-end behavior matching BDD examples | + +--- + +## Discovery Flow — DDD Strategic Phase + +``` +Stakeholder Interview ──► Event Storming ──► Language Definition ──► Domain Modeling ──► Scope Boundary + │ ↑ ↑ │ │ ↑ + │ │ └ needs_restorming─┘ │ │ + ├── needs_full_discovery┘ │ │ + ├── needs_scope_only ──────────────────────────────────────────────┘ │ + ├── already_known ──► [complete] │ + │ │ + └── needs_reinterview ◄─────────────── Domain Modeling ─────────────────────┘ +``` + +| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts | +|---|---|---|---|---| +| **Stakeholder Interview** | PO | — | — | interview-notes/*.md#pain_points, interview-notes/*.md#business_goals, interview-notes/*.md#terms_to_define, interview-notes/*.md#quality_attributes | +| **Event Storming** | DE | interview-notes/*.md#pain_points, interview-notes/*.md#business_goals, interview-notes/*.md#terms_to_define | — | domain_model.md#event_map, domain_model.md#context_candidates, domain_model.md#aggregate_candidates | +| **Language Definition** | DE | interview-notes/*.md#terms_to_define, domain_model.md#event_map | — | glossary.md | +| **Domain Modeling** | DE | glossary.md, domain_model.md#event_map, domain_model.md#aggregate_candidates, domain_model.md#context_candidates | domain_model.md#bounded_contexts, domain_model.md#entities, domain_model.md#relationships, domain_model.md#aggregate_boundaries, domain_model.md#summary | — | +| **Scope Boundary** | PO | domain_model.md#bounded_contexts, domain_model.md#aggregate_boundaries, domain_model.md#context_candidates, domain_model.md#summary, glossary.md | — | product_definition.md#what_is, product_definition.md#what_is_not, product_definition.md#why, product_definition.md#users, product_definition.md#out_of_scope, product_definition.md#delivery_order, product_definition.md#quality_attributes, product_definition.md#deployment | + +**Routing from Stakeholder Interview:** +- `needs_full_discovery` → Event Storming (new domain/concept) +- `needs_scope_only` → Scope Boundary (domain understood, scope new work) +- `already_known` → complete (no discovery needed) + +**Iteration loops:** +- Event Storming → `needs_reinterview` → Stakeholder Interview (workshop reveals gaps) +- Language Definition → `needs_restorming` → Event Storming (language contradicts event map) +- Domain Modeling → `contradiction_found` → Language Definition (model contradicts language) +- Domain Modeling → `needs_reinterview` → Stakeholder Interview (model reveals missing domain knowledge) +- Scope Boundary → `needs_reinterview` → Stakeholder Interview (scope questions reveal missing requirements) + +**Why this order?** Event Storming (Brandolini) is an exploratory technique that surfaces domain events, commands, and aggregate *candidates* — it comes before formal modeling. Language Definition formalizes the ubiquitous language from interviews + event storming terms — it comes before the domain model because the model is *expressed in* the ubiquitous language. Domain Modeling then formalizes the candidates into proper entities, invariants, and aggregate boundaries using glossary terms. + +**domain_model.md is an evolving document:** Event Storming fills the Event Map, Aggregate Candidates, and Context Candidates sections (workshop draft). Domain Modeling then formalizes these into the Bounded Contexts, Entities, Relationships, and Aggregate Boundaries sections. Both steps edit the same document — no separate event storming artifact needed. + +**Carried forward to Architecture Flow:** glossary.md, domain_model.md, product_definition.md + +--- + +## Architecture Flow — DDD Tactical + Technical Design + +``` +Architecture Assessment ──► no_architecture_needed ──► [complete] (when architecture_exists) + │ + ├── needs_context_update ──► Context Mapping ──► Technical Design ──┐ + │ │ │ + │ └── needs_discovery │ + ├── needs_technical_design ──────────────────────────────────────► │ + │ │ needs_decisions │ + │ └──► ADR Draft ─────────►│ + │ ▼ + └── needs_discovery ──► [needs_discovery] Review & Sign-off + │ + └── inconsistent ──► Architecture Assessment +``` + +| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts | +|---|---|---|---|---| +| **Architecture Assessment** | SA | product_definition.md#what_is, product_definition.md#delivery_order, product_definition.md#deployment, product_definition.md#quality_attributes, domain_model.md#bounded_contexts, domain_model.md#summary, system.md, technical_design.md, context_map.md | product_definition.md#deployment* | — | +| **Context Mapping** | SA | domain_model.md#bounded_contexts, domain_model.md#context_candidates, product_definition.md#what_is, product_definition.md#what_is_not, product_definition.md#out_of_scope, glossary.md | — | context_map.md#context_relationships, context_map.md#context_map_diagram, context_map.md#integration_points, context_map.md#anti_corruption_layers | +| **Technical Design** | SA | context_map.md#context_relationships, context_map.md#integration_points, context_map.md#anti_corruption_layers, domain_model.md#entities, domain_model.md#relationships, domain_model.md#aggregate_boundaries, glossary.md, system.md, product_definition.md#what_is, product_definition.md#what_is_not, product_definition.md#out_of_scope, product_definition.md#deployment, product_definition.md#quality_attributes | technical_design.md#architectural_style, technical_design.md#quality_attributes, technical_design.md#stack, technical_design.md#module_structure, technical_design.md#api_contracts, technical_design.md#event_contracts, technical_design.md#interface_definitions, technical_design.md#c4_diagrams, technical_design.md#dependencies, technical_design.md#configuration_keys, system.md#context, system.md#container, system.md#module_structure, system.md#delivery | — | +| **ADR Draft** | SA | technical_design.md#architectural_style, technical_design.md#quality_attributes, technical_design.md#stack, technical_design.md#module_structure, context_map.md#context_relationships, domain_model.md#bounded_contexts, domain_model.md#aggregate_boundaries, product_definition.md#what_is, product_definition.md#quality_attributes, glossary.md, system.md | system.md#key_decisions, system.md#active_constraints | adr/*.md | +| **Review & Sign-off** | R | context_map.md, technical_design.md, system.md, adr/*.md†, product_definition.md#what_is, product_definition.md#what_is_not, product_definition.md#quality_attributes, domain_model.md#bounded_contexts, domain_model.md#aggregate_boundaries, glossary.md | — | — | + +**Routing from Architecture Assessment:** +- `no_architecture_needed` → complete (when `architecture_exists`: system_md, technical_design_md, context_map_md all exist — feature fits existing architecture) +- `needs_technical_design` → Technical Design (new API contracts, modules, or interfaces) +- `needs_context_update` → Context Mapping (when `architecture_exists` — bounded context boundaries change, but base architecture exists) +- `needs_discovery` → needs_discovery exit (domain model insufficient) + +**Routing from Context Mapping:** +- `done` → Technical Design (context boundaries updated, contracts must be verified) +- `needs_discovery` → needs_discovery exit (bounded contexts in domain_model.md don't hold up under mapping) + +**Routing from Technical Design:** +- `done` → Review & Sign-off (no significant decisions needed) +- `needs_decisions` → ADR Draft (architecturally significant choice required) + +**Why assessment first?** Most features fit the existing architecture. Forcing context mapping and technical design for every feature is wasteful. SA assesses the feature against existing architecture and only does the work that's needed. This also gives SA a chance to interview the stakeholder about technical constraints (deployment target, infrastructure preferences) before making architectural decisions. + +**First-run safety (architecture_exists guard):** The `no_architecture_needed` and `needs_context_update` routes are guarded by the `architecture_exists` condition, which checks that system.md, technical_design.md, and context_map.md all exist. This prevents accidentally skipping architecture on a project's first feature where these artifacts don't yet exist. + +**Why is ADR conditional?** ADRs record architecturally significant decisions — most features don't involve such decisions. Forcing an ADR per feature creates noise. When SA discovers a decision is needed during technical design, they route to ADR Draft. Otherwise they skip it. + +**Why does ADR Draft edit system.md?** system.md is the living reference for the current system state. ADR summaries (key decisions) and risk constraints (active constraints) belong there so that R can verify implementation against them during Review Gate, and so that future SA assessments have a concise summary of architectural decisions without reading every ADR. + +**Dual ownership of product_definition.md#deployment\***: PO sets an initial deployment preference during Discovery (Scope Boundary). SA may override it during Architecture Assessment when technical constraints demand a different mechanism. SA has final say — deployment mechanism is an architectural decision, not a business preference. + +**Routing from Review & Sign-off:** +- `approved` → complete (all documents consistent and aligned) +- `inconsistent` → Architecture Assessment (documents contradict each other — SA must re-examine) +- `needs_discovery` → needs_discovery exit (domain model insufficient) + +**Reconciliation (explicit in review-signoff):** R verifies cross-document consistency before approving: +- technical_design.md ↔ domain_model.md (module structure matches bounded contexts; API contracts match entities) +- technical_design.md ↔ product_definition.md (out-of-scope items not in design; quality attributes addressed) +- technical_design.md ↔ glossary.md (terms in contracts match ubiquitous language) +- context_map.md ↔ domain_model.md (integration points match context boundaries) +- adr/*.md ↔ technical_design.md (ADRs consistent with actual design) + +**Conditional input †**: Review & Sign-off lists `adr/*.md` as input, but ADRs may not exist when Technical Design routes directly to Review & Sign-off (no `needs_decisions`). R reads whatever ADRs exist — zero ADRs is valid. + +**needs_discovery from different sources**: Both Assessment and Context Mapping can exit with `needs_discovery`. Assessment triggers it when the domain model is insufficient to make architectural decisions. Context Mapping triggers it when bounded contexts don't hold up under relationship analysis. Review & Sign-off can also trigger `needs_discovery` when R finds architectural problems that stem from flawed discovery. All three route back to the full Discovery cycle — this is deliberate over-correction: partial discovery rework risks reintroducing the same gaps. + +**inconsistent from Review & Sign-off**: When R finds that the architecture documents contradict each other (e.g., technical design uses terms not in the glossary, or context map doesn't align with domain model boundaries), the flow routes back to Architecture Assessment rather than Discovery. The domain model may be fine — the problem is that the architecture doesn't consistently translate it. + +**Carried forward to Feature Development Flow:** context_map.md, adr/*.md, technical_design.md, system.md + +--- + +## Planning Flow — BDD Story Definition + +``` +Feature Selection → Feature Specification → Feature Breakdown → BDD Features → Definition of Done → Ready + │ │ ↑ ↑ + │ └ needs_architecture───┘ │ + │ └ needs_respecification─┘ + │ + └ no_features → [completed] +``` + +| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts | +|---|---|---|---|---| +| **Feature Selection** | PO | product_definition.md#what_is, product_definition.md#why, product_definition.md#delivery_order, technical_design.md#feature, technical_design.md#module_structure | — | — | +| **Feature Specification** | PO | product_definition.md#what_is, product_definition.md#users, product_definition.md#quality_attributes, product_definition.md#out_of_scope, domain_model.md#bounded_contexts, domain_model.md#entities, domain_model.md#aggregate_boundaries, glossary.md, technical_design.md#api_contracts, technical_design.md#feature | — | interview-notes/*.md | +| **Feature Breakdown** | PO | product_definition.md#what_is, product_definition.md#why, product_definition.md#users, product_definition.md#delivery_order, technical_design.md#feature, technical_design.md#module_structure, interview-notes/*.md | — | feature_list | +| **BDD Features** | PO | feature_list, product_definition.md#what_is, product_definition.md#users, product_definition.md#quality_attributes, domain_model.md#entities, domain_model.md#aggregate_boundaries, glossary.md | — | features/.feature | +| **Definition of Done** | PO | features/.feature, product_definition.md#quality_attributes | product_definition.md#definition_of_done | — | +| **Ready** | PO | features/.feature, product_definition.md#definition_of_done | — | — | + +**Exits:** `complete` → Development, `needs_architecture` → Architecture, `no_features` → Completed (no more features to develop) + +**Why feature selection first?** Before planning a feature, PO must verify that the architecture covers it. If technical_design.md doesn't address the feature, Planning routes back to Architecture rather than proceeding with incomplete design. + +**Why feature specification?** The initial stakeholder interview in Discovery covers domain understanding at scope level, not feature-level behavioral detail. Feature Specification is a targeted conversation about one feature's concrete behavior — behavioral rules, scenarios, and acceptance criteria — informed by domain constraints (domain_model.md, glossary.md) and technical contracts (technical_design.md#api_contracts). + +**Key design principle:** BDD features are the **contract between Planning and Development**. Each example becomes: +1. A test specification (test body design) +2. The acceptance criteria (Acceptance validates against them) + +Feature Specification fills the gap between Discovery's scope-level interview and feature-level behavioral detail. Feature Breakdown then decomposes the specified feature into stories. BDD features are written using both the breakdown and the specification interview notes. + +**Feature file convention:** Flows work on one feature at a time. Artifact references use `features/.feature` (singular placeholder), not `features/*.feature` (glob). The flow engine processes a single feature per cycle through the Feature Development loop. + +**Iteration loops:** Feature Breakdown and BDD Features can route back to Feature Specification via `needs_respecification` when decomposition reveals that the specification was incomplete or inconsistent. + +**Carried forward to Development Flow:** features/.feature, product_definition.md (with DoD), feature_list, interview-notes/*.md + +**Project convention (not per-feature):** Branch naming convention, PR template, merge policy are established once at project start and referenced from product_definition.md, not repeated each planning cycle. Trunk-based: short-lived feature branches from trunk, PR before merge. + +--- + +## Development Flow — TDD Implementation + +``` +Project Structuring → [TDD Cycle Flow] → [Review Gate Flow] → Commit + ↑ │ │ + └── blocked ─────────┘ │ + │ │ + └── needs_planning │ + │ + fail ────────────────┘ +``` + +### Project Structuring (owned by SA) + +| Step | What gets created | Source artifact | Output Artifacts | +|---|---|---|---| +| Package/module directories | Folder structure matching bounded context design | technical_design.md#module_structure | git_branch | +| `.py` stubs/signatures | Class names, typed attributes, method signatures, interfaces — **NO behavior** | domain_model.md#entities + domain_model.md#relationships + domain_model.md#bounded_contexts + glossary.md + technical_design.md#api_contracts + technical_design.md#interface_definitions + technical_design.md#dependencies + technical_design.md#configuration_keys + context_map.md#context_relationships + context_map.md#integration_points + context_map.md#anti_corruption_layers + adr/*.md + product_definition.md#quality_attributes | py_stubs | +| Test class stubs | One test file per `.feature` file, example function names as placeholders — **no fixtures/assertions** | features/.feature | test_stubs | + +**Why signatures before tests?** The `.py` stubs consolidate domain ideas into code structure. Test stubs then map `.feature` examples onto that structure. If signatures are wrong (don't match the domain), test bodies will couple to wrong abstractions — and refactoring both tests and implementation is expensive. Signatures are cheap to change; coupled tests are not. + +### TDD Cycle Flow (separate flow, owned by SE) + +| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts | +|---|---|---|---|---| +| **RED** | SE | test_stubs, py_stubs | — | test_bodies | +| **GREEN** | SE | test_bodies, py_stubs | — | function_bodies | +| **REFACTOR** | SE | function_bodies, test_bodies | function_bodies | refactored_code | + +**Exits:** `all_green` → Review Gate, `blocked` → Project Structuring + +### Review Gate Flow (separate flow, owned by R) + +| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts | +|---|---|---|---|---| +| **Design Review** | R | domain_model.md#bounded_contexts, domain_model.md#entities, domain_model.md#aggregate_boundaries, glossary.md, technical_design.md#module_structure, technical_design.md#api_contracts, technical_design.md#event_contracts, context_map.md#context_relationships, system.md, product_definition.md#quality_attributes, adr/*.md, refactored_code | — | design_review_evidence | +| **Structure Review** | R | coverage_reports, test_output, refactored_code, features/.feature, domain_model.md#entities, domain_model.md#aggregate_boundaries, glossary.md | — | structure_review_evidence | +| **Conventions Review** | R | linter_output, refactored_code, product_definition.md#project_conventions, glossary.md | — | conventions_review_evidence | + +**Exits:** `pass` → Commit, `fail` → TDD Cycle + +| Tier | Name | What R checks | Evidence sources | Fail routes SE to | +|---|---|---|---|---| +| **1** | **Design** | Domain alignment, DDD patterns, ubiquitous language, architecture compliance, priority order (YAGNI → DRY → KISS → OC → SOLID → Design Patterns) | R's judgment, domain_model.md#bounded_contexts, domain_model.md#entities, glossary.md, technical_design.md#module_structure, technical_design.md#api_contracts, context_map.md#context_relationships, system.md (key decisions + active constraints) | → REFACTOR (design is wrong — do not polish) | +| **2** | **Structure** | Test coverage, test coupling, BDD examples pass, missing test cases, behavior vs structure testing | Coverage reports, test runner output, R's judgment | → TDD Cycle (tests need work) | +| **3** | **Conventions** | Formatting, docstrings, type hints, import ordering, lint rules unrelated to design | Linter/formatter output, R's judgment | → quick surface fix | + +**Why this order?** If Tier 1 fails, the design is wrong and will be restructured. Writing docstrings (Tier 3) for code that will be rewritten is pure waste. If Tier 2 fails, behavior is broken — no point formatting broken code. Tier 3 is cheap to fix but only worth it when design and behavior are stable. + +**Key principle:** R uses automated tools as **evidence**, not as a replacement for judgment. A linter passing doesn't mean R approves the structure. R might say "tests pass but they're testing implementation details, not behavior" — that's a Tier 2 judgment automation can't make. + +### Commit (owned by SE) + +| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts | +|---|---|---|---|---| +| **Commit** | SE | test_bodies, function_bodies, review_gate_evidence, features/.feature | — | commits | + +**Carried forward to Acceptance:** Feature branch (with all commits), test results, coverage report, example traceability + +--- + +## Feature Development Flow — Feature-Level Loop + +After Architecture completes, the project enters the feature development loop. Each feature goes through Planning → Development → Acceptance → PR Creation. After a feature is merged, the loop starts again for the next feature. Post-mortem routes back to Planning (most common root cause: specification issues), with an escalation path to Architecture when needed. + +``` +Planning ──► Development ──► Acceptance ──► PR Creation ──► [next-feature] + │ │ │ │ + │ │ └ rejected─┤ + │ │ │ + └ needs_architecture └ rejected Post-mortem ──► Planning (replan) + └ no_features ──► [completed] ├──► [needs_architecture] + └──► [cancelled] +``` + +**Exits:** `next-feature` → loop again, `needs_architecture` → Architecture (parent), `cancelled` → Cancelled (parent), `completed` → Completed (parent) + +### Acceptance (owned by PO) + +Technical review (design, structure, conventions) already happened in Development Flow's Review sub-step (owned by R). Acceptance is purely business validation by PO. + +| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts | +|---|---|---|---|---| +| **Acceptance** | PO | features/.feature, product_definition.md#quality_attributes, product_definition.md#definition_of_done | — | acceptance_evidence, approval_record | + +**Transitions:** `approved` → PR Creation, `rejected` → Post-mortem + +**Why no technical review here?** R already reviewed all three tiers (design, structure, conventions) during Development's Review sub-step. Acceptance is PO's domain: did we build the *right thing*, not did we build the *thing right*. + +### PR Creation (owned by SE) + +| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts | +|---|---|---|---|---| +| **PR Creation** | SE | commits, approval_record, features/.feature | — | pull_request | + +**Transitions:** `merged` → next-feature (when `ci_passes=true` + `no_changes_requested=true`), `rejected` → Post-mortem + +--- + +## Post-mortem Flow — Failure Analysis + +``` +Root Cause Analysis ──► Document Findings ──► Extract Lessons ──► Action Items ──► Complete (→ Planning) + │ │ + └── no_issues_found ──► No Action ├──► needs_architecture (→ Architecture) + └──► No Action (→ Cancelled) +``` + +| State | Owner | Input Artifacts | Edited Artifacts | Output Artifacts | +|---|---|---|---|---| +| **Root Cause Analysis** | R | — | — | root_cause | +| **Document Findings** | R | root_cause | — | post-mortem/PM_YYYYMMDD_.md#failed_at, post-mortem/PM_YYYYMMDD_.md#root_cause, post-mortem/PM_YYYYMMDD_.md#missed_gate | +| **Extract Lessons** | R | post-mortem/PM_YYYYMMDD_.md#root_cause, post-mortem/PM_YYYYMMDD_.md#missed_gate | post-mortem/PM_YYYYMMDD_.md#fix | — | +| **Action Items** | R | post-mortem/PM_YYYYMMDD_.md#fix | post-mortem/PM_YYYYMMDD_.md#restart_check | — | + +**Exits:** `complete` → Planning (replan), `needs_architecture` → Architecture (architectural root cause), `no_action` → Cancelled + +**Why route to Planning, not Architecture?** Most PR rejections are specification problems — the feature didn't match what was intended, or scenarios were incomplete. Routing through Architecture every time wastes a cycle for the common case. When the root cause is architectural (wrong bounded context boundaries, wrong technical design), the `needs_architecture` exit escalates to the parent flow. + +--- + +## Document Registry — Complete Artifact List + +### Living Documents (maintained throughout project) + +| Document | Path | Owner | When Changed | Purpose | +|---|---|---|---|---| +| **interview-notes/*.md** | `docs/interview-notes/IN_YYYYMMDD_.md` | PO | Append-only per session (Discovery + Feature Specification) | Raw stakeholder Q&A, reconstruction source | +| **product_definition.md** | `docs/product_definition.md` | PO (SA overrides #deployment) | When scope changes | IS/IS NOT boundaries, out of scope, users, project conventions | +| **glossary.md** | `docs/glossary.md` | DE | When domain terms emerge or change | Ubiquitous language dictionary | +| **domain_model.md** | `docs/domain_model.md` | DE | When domain understanding evolves | Event map, aggregate/context candidates, bounded contexts, entities, relationships, aggregate boundaries. Evolving: Event Storming fills candidates, Domain Modeling formalizes them | +| **context_map.md** | `docs/context_map.md` | SA | When contexts or relationships change | DDD relationships: upstream/downstream, anti-corruption layers | +| **system.md** | `docs/system.md` | SA | When domain understanding changes (rare) | C4 context/container diagrams, module structure, domain model documentation | +| **technical_design.md** | `docs/technical_design.md` | SA | When stack/contracts change | Stack choices, API/event contracts, interface definitions | +| **adr/*.md** | `docs/adr/ADR_YYYYMMDD_.md` | SA | New decisions or status changes | Architecture decisions with risk assessment | +| **features/*.feature** | `docs/features/feature-name/feature-name.feature` | PO | When requirements change | BDD features in Gherkin format — the single thread of truth (flows process one `.feature` at a time) | + +### Intermediate Documents (produced, consumed, then archived) + +| Document | Path | Owner | Purpose | +|---|---|---|---| +| **Feature list** | Directory structure of `docs/features/` | PO | Decomposed into .feature files, then becomes reference | + +### Transient Artifacts (not maintained as documents) + +| Artifact | Location | Purpose | +|---|---|---| +| **py_stubs** | `src/**/*.py` | Class signatures, typed attributes, interfaces — NO behavior | +| **test_stubs** | `tests/**/*.py` | Example function names as placeholders — no fixtures/assertions | +| **test_bodies** | `tests/**/*.py` | Executable specification | +| **function_bodies** | `src/**/*.py` | Production code | +| **CI pipeline results** | CI logs | Per-run output | +| **Coverage reports** | CI artifacts | Per-run metrics | + +### Meta-Process Documents + +| Document | Path | Owner | When Created | Purpose | +|---|---|---|---|---| +| **post-mortem/*.md** | `docs/post-mortem/PM_YYYYMMDD_.md` | R | When PR is rejected | Root cause analysis, lessons, action items | + +### Process Support Files (`.opencode/`) + +These files are the meta-process infrastructure, not project artifacts. They guide how the flows are executed. + +| Type | Path | Loaded When | Purpose | +|---|---|---|---| +| **Navigation** | `AGENTS.md` (project root) | Every session | Where files live, wikilink resolution, discovery commands | +| **Agent identity** | `.opencode/agents/{role}.md` | When role invoked | Who I am, what I decide | +| **Skill procedure** | `.opencode/skills/{skill}/SKILL.md` | On demand | Step-by-step instructions for a flow state | +| **Knowledge reference** | `.opencode/knowledge/{domain}/{concept}.md` | On demand, via wikilinks | What and why (progressive disclosure) | +| **Research notes** | `docs/research/{domain}/{concept}.md` | When knowledge file references them | Source material cited by knowledge files | + +### File Structure Convention + +- **Folders**: kebab-case (`interview-notes/`, `post-mortem/`) +- **Documents**: snake_case (`domain_model.md`, `product_definition.md`) +- **Features**: kebab-case folder + matching filename (`display-version/display-version.feature`) +- **Agents**: `.opencode/agents/{role}.md` +- **Skills**: `.opencode/skills/{skill}/SKILL.md` +- **Knowledge**: `.opencode/knowledge/{domain}/{concept}.md` +- **Research**: `docs/research/{domain}/{concept}.md` +- **ADRs**: `docs/adr/ADR_YYYYMMDD_{slug}.md` + +--- + +## Consolidation Summary — What Flows Where + +``` +Discovery ──► interview-notes/*.md + ──► domain_model.md#event_map + domain_model.md#context_candidates + domain_model.md#aggregate_candidates (from Event Storming) + ──► glossary.md (from Language Definition, using interview-notes + domain_model.md#event_map) + ──► domain_model.md#bounded_contexts + #entities + #relationships + #aggregate_boundaries (formalized by Domain Modeling) + ──► product_definition.md#what_is + #what_is_not + #why + #users + #out_of_scope + #delivery_order + #quality_attributes + #deployment (from Scope Boundary) + │ + ▼ +Architecture ──► [Assessment: SA interviews stakeholder + decides routing; when guards prevent skipping on first run] + ──► (no_architecture_needed, when architecture_exists) → skip to Feature Development + ──► context_map.md#context_relationships + #context_map_diagram + #integration_points + #anti_corruption_layers (if context boundaries change) + ──► technical_design.md#architectural_style + #quality_attributes + #stack + #module_structure + #api_contracts + #event_contracts + #interface_definitions + #c4_diagrams + #dependencies + #configuration_keys (edited by Technical Design) + ──► system.md#context + #container + #module_structure + #delivery (edited by Technical Design) + ──► adr/*.md (conditional — only when architecturally significant decision required) + ──► system.md#key_decisions + #active_constraints (edited by ADR Draft) + │ + ▼ +Feature Development ──► [Feature-level loop: Planning → Development → Acceptance → PR Creation per feature] + │ + ├── Planning ──► [Feature Selection: PO picks next feature, verifies architecture coverage; routes needs_architecture if gap found] + │ ──► interview-notes/*.md (from Feature Specification, using domain_model + glossary + technical_design#api_contracts) + │ ──► feature_list (from Feature Breakdown, using product_definition.md#what_is + #why + #users + #delivery_order + interview-notes/*.md) + │ ──► features/.feature (from BDD Features, using feature_list + product_definition.md#what_is + #users + product_definition.md#quality_attributes + domain_model.md#entities + #aggregate_boundaries + glossary.md) + │ ──► product_definition.md#definition_of_done (edited by Definition of Done) + │ ──► no_features → Completed (project done) + │ │ + │ ▼ (BDD features → test specifications AND acceptance criteria) + ├── Development ──► py_stubs + test_stubs + git_branch (from Project Structuring) + │ ──► [TDD Cycle Flow]: test_bodies (RED) → function_bodies (GREEN) → refactored_code (REFACTOR) + │ ──► [Review Gate Flow]: design_review_evidence → structure_review_evidence → conventions_review_evidence + │ ──► commits (from Commit) + │ │ + │ ▼ + ├── Acceptance ──► acceptance_evidence + approval_record (PO validates against BDD scenarios + quality attributes) + │ │ + │ ▼ + ├── PR Creation ──► merged (when ci_passes + no_changes_requested) → next-feature (loop) + │ ──► rejected → Post-mortem + │ │ + │ ▼ (if rejected) + └── Post-mortem ──► root_cause → post-mortem#failed_at + #root_cause + #missed_gate → #fix → #restart_check + ──► complete → Planning (replan — most common: specification issues) + ──► needs_architecture → Architecture (architectural root cause) + ──► no_action → Cancelled +``` + +The **BDD feature is the single thread of truth** — written in Planning (PO), used as test spec in TDD Cycle Flow (RED), validated in Acceptance (PO), and traced in PR Creation (release notes reference which examples were delivered). + +The **Review Gate Flow ensures design issues are caught before conventions investment** — fail-fast, shift-left, tier by tier (Design → Structure → Conventions). Never invest in Tier 3 work on code that hasn't passed Tier 1. R reviews ALL three tiers and reports categorized findings to SE. + +The **artifact chain ensures each translation is validated before the next level of detail is invested** — scope → features → signatures → test stubs → test bodies → function bodies. The Review Gate Flow checks these artifacts, not creates new ones. + +The **"cannot review own work" principle prevents conflicts of interest** — Architecture Review & Sign-off (R verifies SA's architecture before implementation), Review Gate Flow (R verifies SE's implementation across all three tiers, cannot be same person as SE). Acceptance (PO) is purely business validation, not technical review. diff --git a/pyproject.toml b/pyproject.toml index 75614c3..09a03aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "agents-smith" -version = "0.1.20260421" -description = "A project for people to pair program with AI, the right way." +version = "0.1.0" +description = "AI-assisted software delivery system with flow-based agent orchestration" readme = "README.md" requires-python = ">=3.13" license = { file = "LICENSE" } @@ -11,13 +11,13 @@ authors = [ maintainers = [ { name = "eol", email = "nullhack@users.noreply.github.com" } ] -classifiers = [ - "Development Status :: 4 - Beta", -] dependencies = [ - "fire>=0.7.1", + "requests>=2.32", ] +[project.scripts] +smith = "smith.__main__:main" + [project.urls] Repository = "https://github.com/nullhack/agents-smith" Documentation = "https://github.com/nullhack/agents-smith/tree/main/docs/api/" @@ -26,7 +26,6 @@ Documentation = "https://github.com/nullhack/agents-smith/tree/main/docs/api/" dev = [ "pdoc>=14.0", "pytest>=9.0.3", - "pytest-beehave[html]>=3.3,<4", "pytest-cov>=6.1.1", "pytest-mock>=3.14.0", "ruff>=0.11.5", @@ -34,13 +33,14 @@ dev = [ "hypothesis>=6.148.4", "pyright>=1.1.407", "ghp-import>=2.1.0", + "flowr>=0.3", + "gherkin-official>=39.0.0", + "safety>=3.7.0", ] -[tool.uv] -package = true - [tool.setuptools] packages = ["smith"] +package-data = { "smith.data" = ["**/*"] } [tool.ruff.lint] ignore = [] @@ -79,7 +79,8 @@ mccabe.max-complexity = 10 pydocstyle.convention = "google" [tool.ruff.lint.per-file-ignores] -"tests/**" = ["S101", "ANN", "D205", "D212", "D415", "D100", "D103"] +"tests/**" = ["S101", "S404", "S108", "ANN", "D102", "D107", "D205", "D212", "D415", "D100", "D103", "D101"] +"scripts/*.py" = ["T20"] [tool.pytest.ini_options] minversion = "6.0" @@ -97,17 +98,18 @@ addopts = """ testpaths = ["tests"] python_files = ["*_test.py"] python_functions = ["test_*"] -render_collapsed = "all" [tool.coverage.report] +fail_under = 100 exclude_lines = [ "pragma: no cover", "def __repr__", "if self.debug:", - "if settings.DEBUG", + "if settings.DEBUG:", "raise AssertionError", "if 0:", "if __name__ == .__main__.:", + "...", ] [tool.taskipy.tasks] @@ -116,18 +118,15 @@ test-coverage = """\ pytest \ --cov-config=pyproject.toml \ --cov=smith \ - --cov-fail-under=100 \ --tb=no """ test-build = """\ pytest \ - -p no:beehave \ --doctest-modules \ --cov-config=pyproject.toml \ --cov-report html:docs/coverage \ --cov-report term:skip-covered \ --cov=smith \ - --cov-fail-under=100 \ --hypothesis-show-statistics \ --html=docs/tests/report.html \ --self-contained-html \ @@ -151,13 +150,9 @@ pytest \ -q \ """ doc-publish = "task doc-build && ghp-import -n -p -f docs" -static-check = "pyright" +static-check = "pyright smith tests" +validate-flows = "bash scripts/flowr-utils.sh validate" +regenerate-flowviz = "python scripts/generate-flowviz-data.py" +release-check = "task lint && task static-check && task test && task doc-build" -[dependency-groups] -dev = [ - "gherkin-official>=39.0.0", - "safety>=3.7.0", -] -[tool.beehave] -features_path = "docs/features" diff --git a/scripts/flowr-utils.sh b/scripts/flowr-utils.sh new file mode 100755 index 0000000..d5f4eb2 --- /dev/null +++ b/scripts/flowr-utils.sh @@ -0,0 +1,121 @@ +#!/bin/bash + +# Utility script for working with flowr flows +# Usage: ./scripts/flowr-utils.sh [validate|view|list] [FLOW_NAME] + +set -e + +FLOWS_DIR=".flowr/flows" + +# Check if we're in venv (look for flowr) +if ! source .venv/bin/activate 2>/dev/null || ! python -c "import flowr" 2>/dev/null; then + echo "Error: flowr not available. Make sure you're in the project directory and have activated the venv" + echo "Run: source .venv/bin/activate" + exit 1 +fi + +# Activate venv +source .venv/bin/activate + +show_usage() { + echo "Usage: $0 [validate|view|list|graph] [FLOW_NAME]" + echo "" + echo "Commands:" + echo " validate [FLOW] - Validate flow definition(s)" + echo " view [FLOW] - View flow as mermaid diagram" + echo " list - List all available flows" + echo " graph - Generate interactive D3.js visualization" + echo "" + echo "Examples:" + echo " $0 list" + echo " $0 validate main-flow" + echo " $0 view tdd-cycle-flow" + echo " $0 validate # validates all flows" + echo " $0 graph # generates .flowr/graph/index.html" +} + +list_flows() { + echo "Available flowr flows:" + for yaml_file in "${FLOWS_DIR}"/*.yaml; do + if [[ -f "$yaml_file" ]]; then + flow_name=$(basename "$yaml_file" .yaml) + echo " - $flow_name" + fi + done +} + +validate_flow() { + local flow_name="$1" + local yaml_file="${FLOWS_DIR}/${flow_name}.yaml" + + if [[ ! -f "$yaml_file" ]]; then + echo "Error: Flow '$flow_name' not found at $yaml_file" + exit 1 + fi + + echo "Validating $flow_name..." + python -m flowr validate "$yaml_file" +} + +view_flow() { + local flow_name="$1" + local yaml_file="${FLOWS_DIR}/${flow_name}.yaml" + + if [[ ! -f "$yaml_file" ]]; then + echo "Error: Flow '$flow_name' not found at $yaml_file" + exit 1 + fi + + echo "Mermaid diagram for $flow_name:" + echo "================================" + python -m flowr mermaid "$yaml_file" +} + +generate_graph() { + echo "Generating interactive D3.js visualization..." + python scripts/generate-graph.py +} + +validate_all() { + echo "Validating all flows..." + for yaml_file in "${FLOWS_DIR}"/*.yaml; do + if [[ -f "$yaml_file" ]]; then + flow_name=$(basename "$yaml_file" .yaml) + echo "" + validate_flow "$flow_name" + fi + done +} + +# Main logic +case "${1:-}" in + "list") + list_flows + ;; + "validate") + if [[ -n "${2:-}" ]]; then + validate_flow "$2" + else + validate_all + fi + ;; + "view") + if [[ -z "${2:-}" ]]; then + echo "Error: Flow name required for view command" + show_usage + exit 1 + fi + view_flow "$2" + ;; + "graph") + generate_graph + ;; + "-h"|"--help"|"") + show_usage + ;; + *) + echo "Error: Unknown command '$1'" + show_usage + exit 1 + ;; +esac \ No newline at end of file diff --git a/scripts/generate-flowviz-data.py b/scripts/generate-flowviz-data.py new file mode 100755 index 0000000..c06f017 --- /dev/null +++ b/scripts/generate-flowviz-data.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +"""Generate FlowViz data bundle from flowr YAML. + +Outputs a single JS file (`flowviz/data.js`) that defines `window.FLOWVIZ_DATA`. +This is intentionally `file://` friendly: the HTML can be opened directly +without needing a local web server (no fetch/XHR). +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import yaml + +ROOT = Path(__file__).resolve().parents[1] +FLOWS_DIR = ROOT / ".flowr" / "flows" +OUT_DIR = ROOT / "flowviz" +OUT_FILE = OUT_DIR / "data.js" + + +def _title_case(s: str) -> str: + return " ".join([p.capitalize() for p in s.replace("_", "-").split("-") if p]) + + +def load_flow_yaml(path: Path) -> dict: + """Load and validate a flowr YAML file.""" + with path.open("r", encoding="utf-8") as f: + data = yaml.safe_load(f) + if not isinstance(data, dict) or "flow" not in data: + raise ValueError(f"Invalid flow YAML: {path}") + return data + + +def _flatten_artifacts(artifacts: list | None) -> list[str]: + if not artifacts: + return [] + result: list[str] = [] + for item in artifacts: + if isinstance(item, str): + result.append(item) + elif isinstance(item, dict): + for doc, sections in item.items(): + if isinstance(sections, list) and sections: + for sec in sections: + result.append(f"{doc}#{sec}") + else: + result.append(doc) + return result + + +def _flatten_attrs(attrs: dict | None) -> dict | None: + if not attrs: + return attrs + flat = {} + for key, val in attrs.items(): + if key in ("input_artifacts", "edited_artifacts", "output_artifacts"): + flat[key] = _flatten_artifacts(val) + else: + flat[key] = val + return flat + + +def _resolve_when( + when_clause: dict | list | str, + state_conditions: dict | None, + state_id: str, +) -> dict[str, str] | None: + """Resolve a when clause into a flat dict of conditions. + + Mirrors the flowr loader's resolve_when_clause logic but returns + a plain dict suitable for JSON serialization instead of GuardCondition. + """ + if isinstance(when_clause, dict): + return dict(when_clause) + + items = [when_clause] if isinstance(when_clause, str) else list(when_clause) + resolved: dict[str, str] = {} + + for item in items: + if isinstance(item, dict): + resolved.update(item) + elif isinstance(item, str): + # Named reference to a condition group on this state + if not state_conditions or item not in state_conditions: + raise ValueError( + f"Unknown condition reference '{item}' in state '{state_id}'" + ) + resolved.update(state_conditions[item]) + + return resolved or None + + +def _add_exit_node( + nodes: list[dict], node_ids: set[str], target: str, exits: list[str] +) -> None: + """Add an exit node to the graph if not already present.""" + if target not in node_ids: + nodes.append({"id": target, "type": "exit", "label": _title_case(target)}) + node_ids.add(target) + + +def _process_dict_transitions( + nxt: dict, + st_id: str, + exits: list[str], + state_conditions: dict | None, + nodes: list[dict], + node_ids: set[str], + edges: list[dict], +) -> None: + """Process dict-format transitions (flowr normal form).""" + for trigger, tgt in nxt.items(): + target: str | None = None + when: dict[str, str] | None = None + + if isinstance(tgt, str): + target = tgt + elif isinstance(tgt, dict): + target = tgt.get("to") + raw_when = tgt.get("when") + if raw_when is not None: + when = _resolve_when(raw_when, state_conditions, st_id) + + if target is None: + continue + + edge = { + "source": st_id, + "target": target, + "label": "" if trigger == "default" else str(trigger), + "kind": "exit" if target in exits else "transition", + } + if when: + edge["when"] = when + edges.append(edge) + _add_exit_node(nodes, node_ids, target, exits) + + +def _process_list_transitions( + nxt: list, + st_id: str, + exits: list[str], + nodes: list[dict], + node_ids: set[str], + edges: list[dict], +) -> None: + """Process list-format transitions (older/alternate format).""" + for t in nxt: + target = t["target"] + cond = t.get("when", "default") + edges.append( + { + "source": st_id, + "target": target, + "label": "" if cond == "default" else str(cond), + "kind": "exit" if target in exits else "transition", + } + ) + _add_exit_node(nodes, node_ids, target, exits) + + +def build_graph(flow_data: dict) -> dict: + """Build a visualization graph from flowr YAML data.""" + exits = list(flow_data.get("exits", []) or []) + states = list(flow_data.get("states", []) or []) + + nodes: list[dict] = [] + edges: list[dict] = [] + + for st in states: + st_id = st["id"] + is_subflow = "flow" in st + node_type = "subflow" if is_subflow else "state" + + nodes.append( + { + "id": st_id, + "type": node_type, + "label": _title_case(st_id), + "subflow": st.get("flow"), + "subflowVersion": st.get("flow-version"), + "attrs": _flatten_attrs(st.get("attrs")) or None, + } + ) + + for ex in exits: + nodes.append( + { + "id": ex, + "type": "exit", + "label": _title_case(ex), + } + ) + + node_ids = {n["id"] for n in nodes} + + for st in states: + st_id = st["id"] + state_conditions = st.get("conditions") + nxt = st.get("next") + if not nxt: + continue + + if isinstance(nxt, dict): + _process_dict_transitions( + nxt, st_id, exits, state_conditions, nodes, node_ids, edges + ) + elif isinstance(nxt, list): + _process_list_transitions(nxt, st_id, exits, nodes, node_ids, edges) + + return { + "flow": flow_data["flow"], + "version": flow_data.get("version", "0.0.0"), + "exits": exits, + "nodes": nodes, + "edges": edges, + } + + +def main() -> int: + """Generate the flowviz data bundle from all flowr YAML files.""" + if not FLOWS_DIR.exists(): + raise SystemExit(f"Missing flows directory: {FLOWS_DIR}") + + OUT_DIR.mkdir(parents=True, exist_ok=True) + + flows: dict[str, dict] = {} + for p in sorted(FLOWS_DIR.glob("*.yaml")): + data = load_flow_yaml(p) + flows[data["flow"]] = build_graph(data) + + bundle = { + "schema": 1, + "defaultFlow": "main-flow" if "main-flow" in flows else min(flows), + "flows": flows, + } + + js = "window.FLOWVIZ_DATA = " + json.dumps(bundle, indent=2, sort_keys=True) + ";\n" + OUT_FILE.write_text(js, encoding="utf-8") + + print(f"Wrote {OUT_FILE}") + print(f"Flows: {', '.join(sorted(flows.keys()))}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/generate-svg.sh b/scripts/generate-svg.sh new file mode 100755 index 0000000..544289e --- /dev/null +++ b/scripts/generate-svg.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +# Generate SVG diagrams from flowr flow definitions +# Usage: ./scripts/generate-svg.sh [FLOW_NAME] +# If FLOW_NAME not provided, generates all flows that work with mermaid-cli + +set -e + +FLOWS_DIR=".flowr/flows" +OUTPUT_DIR="flows" + +# Flows that actually work with mermaid-cli (very limited due to exit state syntax issues) +WORKING_FLOWS=( + "tdd-cycle-flow" +) + +# Check if python3 is available +if ! command -v python3 &> /dev/null; then + echo "Error: python3 is required" + exit 1 +fi + +# Check if npx is available (for mermaid-cli) +if ! command -v npx &> /dev/null; then + echo "Error: npx is required for mermaid-cli" + exit 1 +fi + +# Check if we're in venv (look for flowr) +if ! source .venv/bin/activate 2>/dev/null || ! python -c "import flowr" 2>/dev/null; then + echo "Error: flowr not available. Make sure you're in the project directory and have activated the venv" + echo "Run: source .venv/bin/activate" + exit 1 +fi + +# Activate venv +source .venv/bin/activate + +generate_svg() { + local flow_name="$1" + local yaml_file="${FLOWS_DIR}/${flow_name}.yaml" + local svg_file="${OUTPUT_DIR}/${flow_name}.svg" + local temp_mermaid="/tmp/${flow_name}.mmd" + + if [[ ! -f "$yaml_file" ]]; then + echo "Warning: $yaml_file not found, skipping..." + return + fi + + echo "Generating $svg_file from flowr definition..." + + # Step 1: Convert flowr to mermaid (filter out problematic syntax) + python -m flowr mermaid "$yaml_file" | grep -v "note right of" > "$temp_mermaid" + + # Step 2: Convert mermaid to SVG + if npx @mermaid-js/mermaid-cli@11.12.0 -i "$temp_mermaid" -o "$svg_file" -t neutral 2>/dev/null; then + echo "✓ Generated $svg_file" + else + echo "⚠️ Failed to generate $svg_file (mermaid-cli compatibility issue)" + echo " Flow definition is valid, but SVG generation failed" + fi + + # Clean up temp file + rm -f "$temp_mermaid" +} + +# Create output directory if it doesn't exist +mkdir -p "$OUTPUT_DIR" + +if [[ $# -eq 1 ]]; then + # Generate specific flow + generate_svg "$1" +else + # Generate working flows + echo "Generating SVG diagrams from flowr flows..." + echo "Note: Only generating flows known to work with mermaid-cli" + + for flow_name in "${WORKING_FLOWS[@]}"; do + generate_svg "$flow_name" + done + + echo "" + echo "SVG generation limitations:" + echo "- Only tdd-cycle-flow works with mermaid-cli (exit state syntax issues)" + echo "- All other flows have mermaid-cli compatibility problems" + echo "" + echo "All flows are still valid flowr definitions and can be:" + echo "- Validated: python -m flowr validate .flowr/flows/.yaml" + echo "- Viewed as mermaid: python -m flowr mermaid .flowr/flows/.yaml" + echo "- Used programmatically with flowr APIs" + echo "✓ All working flows converted to SVG" +fi \ No newline at end of file diff --git a/scripts/update-bundle.sh b/scripts/update-bundle.sh new file mode 100755 index 0000000..b4fa5c4 --- /dev/null +++ b/scripts/update-bundle.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# Update smith/data/ with agentic files from the agents-smith v8_release branch. +# Usage: ./scripts/update-bundle.sh +# +# Downloads the agents-smith v8_release archive from GitHub, +# extracts it, and copies only the agentic files to smith/data/. +# Agentic files: AGENTS.md, .opencode/agents/, .opencode/skills/, +# .opencode/knowledge/, .opencode/tools/, .templates/, .flowr/ + +set -euo pipefail + +TEMP_DIR=$(mktemp -d) +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +DATA_DIR="$PROJECT_ROOT/smith/data" + +echo "Downloading agents-smith v8_release archive..." +curl -sL "https://github.com/nullhack/agents-smith/archive/refs/heads/v8_release.tar.gz" \ + -o "$TEMP_DIR/agents-smith.tar.gz" + +echo "Extracting archive..." +tar -xzf "$TEMP_DIR/agents-smith.tar.gz" -C "$TEMP_DIR" + +SRC_DIR="$TEMP_DIR/agents-smith-v8_release" + +if [ ! -d "$SRC_DIR" ]; then + echo "ERROR: Expected directory agents-smith-v8_release not found in archive" + rm -rf "$TEMP_DIR" + exit 1 +fi + +echo "Removing old agentic files from smith/data/..." +rm -rf "$DATA_DIR/AGENTS.md" "$DATA_DIR/.opencode" "$DATA_DIR/.templates" "$DATA_DIR/.flowr" + +echo "Copying AGENTS.md..." +cp "$SRC_DIR/AGENTS.md" "$DATA_DIR/AGENTS.md" + +echo "Copying .opencode/ subdirectories (agents, skills, knowledge, tools)..." +mkdir -p "$DATA_DIR/.opencode" +for subdir in agents skills knowledge tools; do + if [ -d "$SRC_DIR/.opencode/$subdir" ]; then + cp -r "$SRC_DIR/.opencode/$subdir" "$DATA_DIR/.opencode/$subdir" + echo " Copied .opencode/$subdir/" + else + echo " Skipped .opencode/$subdir/ (not found in source)" + fi +done + +echo "Copying .templates/..." +cp -r "$SRC_DIR/.templates" "$DATA_DIR/.templates" + +echo "Copying .flowr/..." +cp -r "$SRC_DIR/.flowr" "$DATA_DIR/.flowr" + +echo "Cleaning up..." +rm -rf "$TEMP_DIR" + +echo "Done. smith/data/ updated from agents-smith v8_release." +echo "Files in smith/data/:" +find "$DATA_DIR" -not -name '__init__.py' -not -path "$DATA_DIR/__init__.py" -type f | head -20 +echo "..." +TOTAL=$(find "$DATA_DIR" -type f | wc -l) +echo "Total files: $TOTAL" +SIZE=$(du -sh "$DATA_DIR" | cut -f1) +echo "Total size: $SIZE" \ No newline at end of file diff --git a/smith/__init__.py b/smith/__init__.py index 18b665e..2deafdf 100644 --- a/smith/__init__.py +++ b/smith/__init__.py @@ -1 +1 @@ -"""Application package.""" +"""Smith — connect AI agent configurations to any project.""" diff --git a/smith/__main__.py b/smith/__main__.py index a200610..9456bc3 100644 --- a/smith/__main__.py +++ b/smith/__main__.py @@ -1,24 +1,6 @@ -"""Entry point for running the application as a module.""" - -import logging - -import fire - -logger = logging.getLogger(__name__) - - -def main(verbosity: str = "INFO") -> None: - """Run the application. - - Args: - verbosity: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL). - """ - logging.basicConfig( - level=getattr(logging, verbosity.upper(), logging.INFO), - format="%(levelname)s - %(name)s: %(message)s", - ) - logger.info("Ready.") +"""Entry point for running smith as ``python -m smith``.""" +from smith.delivery.cli import main if __name__ == "__main__": - fire.Fire(main) + raise SystemExit(main()) diff --git a/smith/application/__init__.py b/smith/application/__init__.py new file mode 100644 index 0000000..97d47de --- /dev/null +++ b/smith/application/__init__.py @@ -0,0 +1 @@ +"""Application layer — use-case orchestration.""" diff --git a/smith/application/connect.py b/smith/application/connect.py new file mode 100644 index 0000000..a99edb0 --- /dev/null +++ b/smith/application/connect.py @@ -0,0 +1,30 @@ +"""Connect use-case — wire a project to a template source.""" + +from __future__ import annotations + +from pathlib import Path + +from smith.domain.connection import Connection +from smith.domain.value_objects import TemplateSource +from smith.infrastructure.filesystem import AtomicFileSystem +from smith.infrastructure.gitignore import GitignoreManager +from smith.infrastructure.metadata import SectionMetadata +from smith.infrastructure.template_source import TemplateSourceAdapter + + +class ConnectUseCase: + """Orchestrate the connection of a project to a template source.""" + + def __init__(self, project_dir: Path) -> None: + """Initialise with the target project directory.""" + self._project_dir = project_dir + + def execute(self, source: TemplateSource, overwrite: bool = False) -> None: + """Connect the project to the given template source.""" + connection = Connection( + template_source_port=TemplateSourceAdapter(source), + filesystem_port=AtomicFileSystem(self._project_dir), + gitignore_port=GitignoreManager(self._project_dir), + metadata_port=SectionMetadata(self._project_dir), + ) + connection.connect(source=source, overwrite=overwrite) diff --git a/smith/application/disconnect.py b/smith/application/disconnect.py new file mode 100644 index 0000000..29a4efd --- /dev/null +++ b/smith/application/disconnect.py @@ -0,0 +1,32 @@ +"""Disconnect use-case — remove agentic files from a project.""" + +from __future__ import annotations + +from pathlib import Path + +from smith.domain.connection import Connection +from smith.domain.value_objects import TemplateSource +from smith.infrastructure.filesystem import AtomicFileSystem +from smith.infrastructure.gitignore import GitignoreManager +from smith.infrastructure.metadata import SectionMetadata +from smith.infrastructure.template_source import TemplateSourceAdapter + + +class DisconnectUseCase: + """Orchestrate the disconnection of a project from its template source.""" + + def __init__(self, project_dir: Path) -> None: + """Initialise with the target project directory.""" + self._project_dir = project_dir + + def execute(self) -> list[Path]: + """Disconnect the project and return paths that were removed.""" + connection = Connection( + template_source_port=TemplateSourceAdapter( + TemplateSource(kind="bundled", location="agents-smith"), + ), + filesystem_port=AtomicFileSystem(self._project_dir), + gitignore_port=GitignoreManager(self._project_dir), + metadata_port=SectionMetadata(self._project_dir), + ) + return connection.disconnect() diff --git a/smith/application/status.py b/smith/application/status.py new file mode 100644 index 0000000..1892121 --- /dev/null +++ b/smith/application/status.py @@ -0,0 +1,32 @@ +"""Status use-case — report the current connection state of a project.""" + +from __future__ import annotations + +from pathlib import Path + +from smith.domain.connection import Connection +from smith.domain.value_objects import ConnectionStatus, TemplateSource +from smith.infrastructure.filesystem import AtomicFileSystem +from smith.infrastructure.gitignore import GitignoreManager +from smith.infrastructure.metadata import SectionMetadata +from smith.infrastructure.template_source import TemplateSourceAdapter + + +class StatusUseCase: + """Orchestrate querying the connection status of a project.""" + + def __init__(self, project_dir: Path) -> None: + """Initialise with the target project directory.""" + self._project_dir = project_dir + + def execute(self) -> ConnectionStatus: + """Return the current connection status of the project.""" + connection = Connection( + template_source_port=TemplateSourceAdapter( + TemplateSource(kind="bundled", location="agents-smith"), + ), + filesystem_port=AtomicFileSystem(self._project_dir), + gitignore_port=GitignoreManager(self._project_dir), + metadata_port=SectionMetadata(self._project_dir), + ) + return connection.status() diff --git a/smith/application/update.py b/smith/application/update.py new file mode 100644 index 0000000..e627ae5 --- /dev/null +++ b/smith/application/update.py @@ -0,0 +1,32 @@ +"""Update use-case — refresh agentic files in a connected project.""" + +from __future__ import annotations + +from pathlib import Path + +from smith.domain.connection import Connection +from smith.domain.value_objects import TemplateSource +from smith.infrastructure.filesystem import AtomicFileSystem +from smith.infrastructure.gitignore import GitignoreManager +from smith.infrastructure.metadata import SectionMetadata +from smith.infrastructure.template_source import TemplateSourceAdapter + + +class UpdateUseCase: + """Orchestrate updating agentic files in an already-connected project.""" + + def __init__(self, project_dir: Path) -> None: + """Initialise with the target project directory.""" + self._project_dir = project_dir + + def execute(self, source: TemplateSource | None = None) -> None: + """Update the project's agentic files, optionally from a new source.""" + connection = Connection( + template_source_port=TemplateSourceAdapter( + source or TemplateSource(kind="bundled", location="agents-smith"), + ), + filesystem_port=AtomicFileSystem(self._project_dir), + gitignore_port=GitignoreManager(self._project_dir), + metadata_port=SectionMetadata(self._project_dir), + ) + connection.update(source=source) diff --git a/smith/data/.flowr/.gitignore b/smith/data/.flowr/.gitignore new file mode 100644 index 0000000..7275442 --- /dev/null +++ b/smith/data/.flowr/.gitignore @@ -0,0 +1,2 @@ +# Ignore SVG files (regeneratable from mermaid) +*.svg \ No newline at end of file diff --git a/smith/data/.flowr/flows/architecture-flow.yaml b/smith/data/.flowr/flows/architecture-flow.yaml new file mode 100644 index 0000000..d0b69eb --- /dev/null +++ b/smith/data/.flowr/flows/architecture-flow.yaml @@ -0,0 +1,145 @@ +flow: architecture-flow +version: 4.0.0 +exits: + - complete + - needs_discovery + +states: + - id: architecture-assessment + attrs: + description: "SA evaluates whether the feature requires new architecture or fits the existing system, potentially overriding deployment decisions" + owner: SA + skills: + - assess-architecture + in: + - product_definition.md + - domain_model.md + - system.md + - technical_design.md + - context_map.md + out: + - product_definition.md: + - deployment + - quality_attributes + conditions: + architecture_complete: + system_md: ==true + technical_design_md: ==true + context_map_md: ==true + deployment_matches_codebase: ==true + architecture_exists: + system_md: ==true + technical_design_md: ==true + context_map_md: ==true + no_architecture_exists: + technical_design_md: ==false + context_map_md: ==false + next: + no_architecture_needed: + to: complete + when: architecture_complete + needs_context_update: + to: context-mapping + when: architecture_exists + needs_technical_design: + to: technical-design + when: architecture_exists + greenfield: + to: context-mapping + when: no_architecture_exists + delivery_mismatch_unresolvable: needs_discovery + needs_discovery: needs_discovery + + - id: context-mapping + attrs: + description: "SA maps bounded context relationships, integration points, and anti-corruption layers" + owner: SA + skills: + - map-contexts + in: + - domain_model.md + - product_definition.md + - glossary.md + out: + - context_map.md: + - context_relationships + - context_map_diagram + - integration_points + - anti_corruption_layers + next: + done: technical-design + needs_discovery: needs_discovery + + - id: technical-design + attrs: + description: "SA designs the technical solution — architectural style, stack, module structure, API/event contracts, interface definitions — and updates the system overview" + owner: SA + skills: + - design-technical-solution + in: + - context_map.md + - domain_model.md + - glossary.md + - system.md + - product_definition.md + out: + - technical_design.md: + - architectural_style + - quality_attributes + - stack + - module_structure + - api_contracts + - event_contracts + - interface_definitions + - c4_diagrams + - dependencies + - configuration_keys + - system.md: + - context + - container + - module_structure + - delivery + next: + done: review-signoff + needs_decisions: adr-draft + + - id: adr-draft + attrs: + description: "SA documents architecturally significant decisions as ADRs and records key decisions and active constraints in system.md" + owner: SA + skills: + - draft-adr + in: + - technical_design.md + - context_map.md + - domain_model.md + - product_definition.md + - glossary.md + - system.md + out: + - system.md: + - key_decisions + - active_constraints + - adr/.md + next: + done: review-signoff + + - id: review-signoff + attrs: + description: "R independently verifies architecture alignment with domain model and requirements, and cross-document consistency, before implementation begins" + owner: R + skills: + - review-architecture + in: + - context_map.md + - technical_design.md + - system.md + - adr/*.md + - product_definition.md + - domain_model.md + - glossary.md + out: [] + next: + approved: complete + inconsistent: architecture-assessment + needs_discovery: needs_discovery \ No newline at end of file diff --git a/smith/data/.flowr/flows/branding-flow.yaml b/smith/data/.flowr/flows/branding-flow.yaml new file mode 100644 index 0000000..33956cd --- /dev/null +++ b/smith/data/.flowr/flows/branding-flow.yaml @@ -0,0 +1,61 @@ +flow: branding-flow +version: 2.0.0 +exits: + - branded + - cancelled + +states: + - id: setup-branding + attrs: + description: "Interview stakeholder to establish brand identity: personality, visual metaphor, wording, and release naming" + owner: Design Agent + skills: + - setup-branding + in: [] + out: + - branding.md: + - identity + - release_naming + - wording + next: + confirmed: design-colors + cancelled: cancelled + + - id: design-colors + attrs: + description: "Select and validate a colour palette with WCAG contrast, dark-mode counterparts, and hue semantics" + owner: Design Agent + skills: + - design-colors + in: + - branding.md + out: + - branding.md: + - visual + next: + approved: design-assets + revise: design-colors + cancelled: cancelled + + - id: design-assets + attrs: + description: "Create logo and banner using favicon-first, monochrome-first, progressive-simplification process" + owner: Design Agent + skills: + - design-assets + in: + - branding.md + out: + - docs/assets/logo.svg + - docs/assets/banner.svg + conditions: + monochrome_passed: + logo_monochrome: ==true + scalability_passed: + logo_scalability: ==true + blur_passed: + logo_blur_test: ==true + next: + approved: branded + revise: design-assets + cancelled: cancelled \ No newline at end of file diff --git a/smith/data/.flowr/flows/delivery-flow.yaml b/smith/data/.flowr/flows/delivery-flow.yaml new file mode 100644 index 0000000..bd58922 --- /dev/null +++ b/smith/data/.flowr/flows/delivery-flow.yaml @@ -0,0 +1,80 @@ +flow: delivery-flow +version: 4.0.0 + +exits: + - next-feature + - rejected + - needs_development + - cancelled + +states: + - id: acceptance + attrs: + description: "PO validates business behavior against BDD scenarios and quality attributes" + owner: PO + skills: + - accept-feature + in: + - features/.feature + - product_definition.md + out: + - acceptance_evidence + - approval_record + conditions: + feature_accepted: + feature_status: ==ACCEPTED + next: + approved: + to: local-merge + when: feature_accepted + rejected: rejected + + - id: local-merge + attrs: + description: "SE squash-merges feature commits into local main and resolves any conflicts" + owner: SE + skills: + - merge-local + in: + - feature_commits + - approval_record + - features/.feature + out: + - merged_commits + next: + merged: publish-decision + conflict: needs_development + + - id: publish-decision + attrs: + description: "PO decides whether to publish the accumulated batch as a PR or continue accumulating features on local main" + owner: PO + skills: + - decide-batch-action + in: + - merged_commits + out: [] + next: + accumulate: next-feature + publish: pr-creation + + - id: pr-creation + attrs: + description: "SE creates an administrative PR for changes already on local main" + owner: SE + skills: + - create-pr + in: + - merged_commits + - features/.feature + out: [] + conditions: + merged: + ci_passes: ==true + no_changes_requested: ==true + next: + approved: + to: next-feature + when: merged + changes_requested: needs_development + cancelled: cancelled \ No newline at end of file diff --git a/smith/data/.flowr/flows/development-flow.yaml b/smith/data/.flowr/flows/development-flow.yaml new file mode 100644 index 0000000..ae9c1ab --- /dev/null +++ b/smith/data/.flowr/flows/development-flow.yaml @@ -0,0 +1,72 @@ +flow: development-flow +version: 4.0.0 +exits: + - done + - needs_planning + +states: + - id: project-structuring + attrs: + description: "SA creates the project skeleton — branch, package structure, port interfaces, aggregate root signatures — before any feature-specific stubs" + owner: SA + skills: + - structure-project + in: + - features/.feature + - technical_design.md + - domain_model.md + - glossary.md + - context_map.md + - adr/*.md + - product_definition.md + out: + - git_branch + next: + ready: tdd-cycle + needs_planning: needs_planning + + - id: tdd-cycle + attrs: + description: "SE implements the feature through repeated RED-GREEN-REFACTOR cycles until all BDD examples pass" + flow: tdd-cycle-flow + flow-version: "^2" + conditions: + design_declared: + yagni: ==true + kiss: ==true + dry: ==true + oc: ==true + solid: ==true + patterns: ==true + next: + all_green: + to: review-gate + when: design_declared + blocked: project-structuring + + - id: review-gate + attrs: + description: "R independently verifies implementation across three tiers — design, structure, and conventions — before commit" + flow: review-gate-flow + flow-version: "^2" + next: + pass: commit + fail: tdd-cycle + + - id: commit + attrs: + description: "SE commits the reviewed, passing implementation with traceability to feature files" + owner: SE + skills: + - commit-implementation + in: + - test_implementations + - source_implementations + - design_review_evidence + - structure_review_evidence + - conventions_review_evidence + - features/.feature + out: + - feature_commits + next: + done: done \ No newline at end of file diff --git a/smith/data/.flowr/flows/discovery-flow.yaml b/smith/data/.flowr/flows/discovery-flow.yaml new file mode 100644 index 0000000..d7aaecb --- /dev/null +++ b/smith/data/.flowr/flows/discovery-flow.yaml @@ -0,0 +1,101 @@ +flow: discovery-flow +version: 3.0.0 +exits: + - complete + +states: + - id: stakeholder-interview + attrs: + description: "PO interviews stakeholders to understand pain points, business goals, and domain terms, then decides how much discovery is needed" + owner: PO + skills: + - conduct-interview + in: + - interview-notes/*.md + out: + - interview-notes/.md: + - pain_points + - business_goals + - terms_to_define + - quality_attributes + next: + needs_full_discovery: event-storming + needs_scope_only: scope-boundary + already_known: complete + + - id: event-storming + attrs: + description: "DE facilitates an event storming workshop to surface domain events, commands, and aggregate candidates" + owner: DE + skills: + - facilitate-event-storming + in: + - interview-notes/*.md + out: + - domain_model.md: + - event_map + - context_candidates + - aggregate_candidates + next: + done: language-definition + needs_reinterview: stakeholder-interview + + - id: language-definition + attrs: + description: "DE formalizes the ubiquitous language by defining domain terms into a glossary" + owner: DE + skills: + - define-ubiquitous-language + in: + - interview-notes/*.md + - domain_model.md + out: + - glossary.md + next: + done: domain-modeling + needs_restorming: event-storming + + - id: domain-modeling + attrs: + description: "DE formalizes candidates into proper bounded contexts, entities, relationships, and aggregate boundaries" + owner: DE + skills: + - model-domain + in: + - glossary.md + - domain_model.md + out: + - domain_model.md: + - bounded_contexts + - entities + - relationships + - aggregate_boundaries + - summary + - glossary.md + next: + done: scope-boundary + contradiction_found: language-definition + needs_reinterview: stakeholder-interview + + - id: scope-boundary + attrs: + description: "PO defines what the product IS and IS NOT, who the users are, and the delivery order" + owner: PO + skills: + - define-product-scope + in: + - domain_model.md + - glossary.md + out: + - product_definition.md: + - what_is + - what_is_not + - why + - users + - out_of_scope + - delivery_order + - quality_attributes + - deployment + next: + done: complete + needs_reinterview: stakeholder-interview \ No newline at end of file diff --git a/smith/data/.flowr/flows/feature-development-flow.yaml b/smith/data/.flowr/flows/feature-development-flow.yaml new file mode 100644 index 0000000..50119dc --- /dev/null +++ b/smith/data/.flowr/flows/feature-development-flow.yaml @@ -0,0 +1,40 @@ +flow: feature-development-flow +version: 6.0.0 + +exits: + - needs_architecture + - cancelled + - completed + +states: + - id: planning + flow: planning-flow + flow-version: "^4" + next: + complete: development + needs_architecture: needs_architecture + no_features: completed + + - id: development + flow: development-flow + flow-version: "^4" + next: + done: delivery + needs_planning: planning + + - id: delivery + flow: delivery-flow + flow-version: "^4" + next: + next-feature: planning + rejected: post-mortem + needs_development: development + cancelled: cancelled + + - id: post-mortem + flow: post-mortem-flow + flow-version: "^2" + next: + complete: planning + needs_architecture: needs_architecture + no_action: cancelled \ No newline at end of file diff --git a/smith/data/.flowr/flows/main-flow.yaml b/smith/data/.flowr/flows/main-flow.yaml new file mode 100644 index 0000000..43bfd59 --- /dev/null +++ b/smith/data/.flowr/flows/main-flow.yaml @@ -0,0 +1,31 @@ +flow: main-flow +version: 7.0.0 +exits: [completed, cancelled] + +states: + - id: discovery + attrs: + description: "Understand the domain, define scope, and establish ubiquitous language through stakeholder interviews and domain modeling" + flow: discovery-flow + flow-version: "^3" + next: + complete: architecture + + - id: architecture + attrs: + description: "Design technical architecture, context boundaries, and API contracts for the entire project" + flow: architecture-flow + flow-version: "^4" + next: + complete: feature-development + needs_discovery: discovery + + - id: feature-development + attrs: + description: "Feature-level loop: Planning → Development → Acceptance → Delivery per feature" + flow: feature-development-flow + flow-version: "^6" + next: + needs_architecture: architecture + cancelled: cancelled + completed: completed \ No newline at end of file diff --git a/smith/data/.flowr/flows/planning-flow.yaml b/smith/data/.flowr/flows/planning-flow.yaml new file mode 100644 index 0000000..7bb966d --- /dev/null +++ b/smith/data/.flowr/flows/planning-flow.yaml @@ -0,0 +1,156 @@ +flow: planning-flow +version: 4.0.0 +exits: + - complete + - needs_architecture + - no_features + +states: + - id: feature-selection + attrs: + description: "PO picks the next feature to develop based on business priority and delivery order, verifying that architecture covers it" + owner: PO + skills: + - select-feature + in: + - product_definition.md + - technical_design.md + out: [] + next: + selected: feature-specification + needs_architecture: needs_architecture + no_features: no_features + + - id: feature-specification + attrs: + description: "PO conducts a targeted conversation with stakeholders to capture feature-specific behavioral rules, scenarios, and acceptance criteria" + owner: PO + skills: + - specify-feature + in: + - product_definition.md + - domain_model.md + - glossary.md + - technical_design.md + out: + - interview-notes/.md + next: + done: feature-breakdown + needs_architecture: needs_architecture + + - id: feature-breakdown + attrs: + description: "PO decomposes the selected feature into Rule blocks (user stories) within the feature file based on specification interview and domain constraints" + owner: PO + skills: + - break-down-feature + in: + - features/.feature + - product_definition.md + - technical_design.md + - interview-notes/*.md + out: + - features/.feature: + - rules + conditions: + invest_passed: + independent: ==true + negotiable: ==true + valuable: ==true + estimable: ==true + small: ==true + testable: ==true + next: + done: + to: bdd-features + when: invest_passed + needs_respecification: feature-specification + + - id: bdd-features + attrs: + description: "PO writes concrete Given/When/Then Example blocks for each Rule in the feature file using ubiquitous language from the glossary" + owner: PO + skills: + - write-bdd-features + in: + - features/.feature + - product_definition.md + - domain_model.md + - glossary.md + out: + - features/.feature: + - examples + conditions: + examples_have_ids: + all_examples_have_ids: ==true + examples_have_gherkin: + all_examples_have_gherkin: ==true + premortem_done: + premortem_done: ==true + decomposition_valid: + concerns: <=2 + must_examples: <=8 + examples_complete: + all_examples_have_ids: ==true + all_examples_have_gherkin: ==true + premortem_done: ==true + concerns: <=2 + must_examples: <=8 + next: + done: + to: create-py-stubs + when: examples_complete + needs_respecification: feature-specification + + - id: create-py-stubs + attrs: + description: "SA creates minimum typed stubs and test stubs as domain model breadcrumbs for the current feature" + owner: SA + skills: + - create-py-stubs + in: + - features/.feature + - technical_design.md + - domain_model.md + - glossary.md + out: + - typed_source_stubs + - test_skeletons + conditions: + stubs_traceable: + all_ids_have_stubs: ==true + next: + done: definition-of-done + + - id: definition-of-done + attrs: + description: "PO tailors the definition of done criteria based on the specific feature's requirements" + owner: PO + skills: + - define-done + in: + - features/.feature + - product_definition.md + out: + - product_definition.md: + - definition_of_done + next: + done: ready + + - id: ready + attrs: + description: "PO confirms all planning artifacts are complete and the feature is ready for development" + owner: PO + skills: + - confirm-baseline + in: + - features/.feature + - product_definition.md + out: [] + conditions: + feature_baselined: + feature_status: ==BASELINED + next: + done: + to: complete + when: feature_baselined \ No newline at end of file diff --git a/smith/data/.flowr/flows/post-mortem-flow.yaml b/smith/data/.flowr/flows/post-mortem-flow.yaml new file mode 100644 index 0000000..065ae75 --- /dev/null +++ b/smith/data/.flowr/flows/post-mortem-flow.yaml @@ -0,0 +1,66 @@ +flow: post-mortem-flow +version: 2.0.0 +exits: + - complete + - needs_architecture + - no_action + +states: + - id: root-cause-analysis + attrs: + description: "R investigates why the PR was rejected, identifying the failure point and missed gate" + owner: R + skills: + - analyze-root-cause + in: [] + out: + - root_cause_analysis + next: + issues_found: document-findings + no_issues_found: no_action + + - id: document-findings + attrs: + description: "R records what failed, why, and which quality gate was missed" + owner: R + skills: + - document-post-mortem + in: + - root_cause_analysis + out: + - post-mortem/PM_YYYYMMDD_.md: + - failed_at + - root_cause + - missed_gate + next: + done: extract-lessons + + - id: extract-lessons + attrs: + description: "R determines the corrective fix and updates the post-mortem with remediation steps" + owner: R + skills: + - extract-lessons + in: + - post-mortem/PM_YYYYMMDD_.md + out: + - post-mortem/PM_YYYYMMDD_.md: + - fix + next: + done: action-items + + - id: action-items + attrs: + description: "R determines whether the feature needs replanning, architecture changes, or should be abandoned" + owner: R + skills: + - determine-action-items + in: + - post-mortem/PM_YYYYMMDD_.md + out: + - post-mortem/PM_YYYYMMDD_.md: + - restart_check + next: + replan: complete + architecture_issue: needs_architecture + abandon: no_action \ No newline at end of file diff --git a/smith/data/.flowr/flows/review-gate-flow.yaml b/smith/data/.flowr/flows/review-gate-flow.yaml new file mode 100644 index 0000000..6271383 --- /dev/null +++ b/smith/data/.flowr/flows/review-gate-flow.yaml @@ -0,0 +1,63 @@ +flow: review-gate-flow +version: 2.0.0 +exits: + - pass + - fail + +states: + - id: design-review + attrs: + description: "R verifies implementation aligns with domain model, follows DDD patterns, and respects architectural decisions" + owner: R + skills: + - review-design + in: + - domain_model.md + - glossary.md + - technical_design.md + - context_map.md + - system.md + - product_definition.md + - adr/*.md + - refactored_source + out: + - design_review_evidence + next: + pass: structure-review + fail: fail + + - id: structure-review + attrs: + description: "R verifies test coverage, BDD example pass rate, test coupling, and behavior-vs-structure testing" + owner: R + skills: + - review-structure + in: + - coverage_reports + - test_output + - refactored_source + - features/.feature + - domain_model.md + - glossary.md + out: + - structure_review_evidence + next: + pass: conventions-review + fail: fail + + - id: conventions-review + attrs: + description: "R verifies formatting, docstrings, type hints, import ordering, and lint rules unrelated to design" + owner: R + skills: + - review-conventions + in: + - linter_output + - refactored_source + - product_definition.md + - glossary.md + out: + - conventions_review_evidence + next: + pass: pass + fail: fail \ No newline at end of file diff --git a/smith/data/.flowr/flows/setup-project-flow.yaml b/smith/data/.flowr/flows/setup-project-flow.yaml new file mode 100644 index 0000000..8411373 --- /dev/null +++ b/smith/data/.flowr/flows/setup-project-flow.yaml @@ -0,0 +1,89 @@ +flow: setup-project-flow +version: 2.0.0 +exits: [initialized, cancelled] + +states: + - id: assess-requirements + attrs: + description: "Interview user to understand project needs and assess parameters" + owner: Setup Agent + skills: + - setup-assess + in: [] + out: + - requirements_assessment + next: + assessed: configure-parameters + cancelled: cancelled + + - id: configure-parameters + attrs: + description: "Gather and confirm project parameters based on assessment" + owner: Setup Agent + skills: + - setup-configure + in: + - requirements_assessment + out: + - template-config.yaml + conditions: + template_files_exist: + pyproject_toml: ==true + readme_md: ==true + github_workflows_ci_yml: ==true + license: ==true + tests_unit_main_test_py: ==true + app_directory: ==true + next: + confirmed: + to: apply-substitutions + when: template_files_exist + missing_files: cancelled + + - id: apply-substitutions + attrs: + description: "Apply all text substitutions, rename package, reset version" + owner: Setup Agent + skills: + - setup-apply + in: + - template-config.yaml + out: + - pyproject.toml + - README.md + - tests/unit/main_test.py + - .github/workflows/ci.yml + - LICENSE + - template-config.yaml + - package_directory + conditions: + substitutions_successful: + no_stale_app_imports: ==true + package_renamed: ==true + version_reset: ==true + next: + applied: + to: verify-and-finalize + when: substitutions_successful + failed: cancelled + + - id: verify-and-finalize + attrs: + description: "Verify transformations, clean template artifacts, finalize project" + owner: Setup Agent + skills: + - setup-verify + in: + - package_directory + out: + - git_remote + conditions: + verification_passed: + tests_pass: ==true + imports_valid: ==true + artifacts_cleaned: ==true + next: + initialized: + to: initialized + when: verification_passed + failed: cancelled \ No newline at end of file diff --git a/smith/data/.flowr/flows/tdd-cycle-flow.yaml b/smith/data/.flowr/flows/tdd-cycle-flow.yaml new file mode 100644 index 0000000..297ce8c --- /dev/null +++ b/smith/data/.flowr/flows/tdd-cycle-flow.yaml @@ -0,0 +1,51 @@ +flow: tdd-cycle-flow +version: 2.0.0 +exits: + - all_green + - blocked + +states: + - id: red + attrs: + description: "SE writes a failing test body for one BDD example, specifying expected behavior before implementation exists" + owner: SE + skills: + - write-test + in: + - test_skeletons + - typed_source_stubs + out: + - test_implementations + next: + test_written: green + blocked: blocked + + - id: green + attrs: + description: "SE writes the minimum production code needed to make the failing test pass" + owner: SE + skills: + - implement-minimum + in: + - test_implementations + - typed_source_stubs + out: + - source_implementations + next: + test_passes: refactor + + - id: refactor + attrs: + description: "SE improves code structure while keeping all tests passing, then cycles to the next example or exits when all pass" + owner: SE + skills: + - refactor + in: + - source_implementations + - test_implementations + out: + - source_implementations + - refactored_source + next: + next_example: red + all_examples_pass: all_green \ No newline at end of file diff --git a/smith/data/.flowr/sessions/current.yaml b/smith/data/.flowr/sessions/current.yaml new file mode 100644 index 0000000..4fc6167 --- /dev/null +++ b/smith/data/.flowr/sessions/current.yaml @@ -0,0 +1,10 @@ +flow: feature-development-flow +state: post-mortem +stack: + - flow: main-flow + state: feature-development + - flow: feature-development-flow + state: delivery +params: + selected_feature: smith-commands + post_mortem: PM_20260501_missing-overwrite-flag \ No newline at end of file diff --git a/smith/data/.opencode/agents/design-agent.md b/smith/data/.opencode/agents/design-agent.md new file mode 100644 index 0000000..d680cd9 --- /dev/null +++ b/smith/data/.opencode/agents/design-agent.md @@ -0,0 +1,10 @@ +--- +description: "Design Agent — creates and maintains brand identity, visual assets, and colour systems" +mode: subagent +temperature: 0.4 +--- + +# Design Agent + +You are the Design Agent. You create and maintain the project's brand identity, visual assets, and colour systems. +You follow monochrome-first, favicon-first design principles and validate all colours against WCAG contrast requirements. \ No newline at end of file diff --git a/smith/data/.opencode/agents/domain-expert.md b/smith/data/.opencode/agents/domain-expert.md new file mode 100644 index 0000000..2dcddc4 --- /dev/null +++ b/smith/data/.opencode/agents/domain-expert.md @@ -0,0 +1,10 @@ +--- +description: "Domain Expert — facilitates discovery and models the domain" +mode: subagent +temperature: 0.3 +--- + +# Domain Expert + +You are the Domain Expert. You facilitate discovery and model the domain. +You are responsible for event storming, ubiquitous language, and domain modeling. \ No newline at end of file diff --git a/smith/data/.opencode/agents/product-owner.md b/smith/data/.opencode/agents/product-owner.md new file mode 100644 index 0000000..03cea68 --- /dev/null +++ b/smith/data/.opencode/agents/product-owner.md @@ -0,0 +1,10 @@ +--- +description: "Product Owner — owns scope, requirements, and acceptance" +mode: subagent +temperature: 0.4 +--- + +# Product Owner + +You are the Product Owner. You own what gets built and when. +You are the sole decision-maker on feature priority, scope, and acceptance. \ No newline at end of file diff --git a/smith/data/.opencode/agents/reviewer.md b/smith/data/.opencode/agents/reviewer.md new file mode 100644 index 0000000..f58475d --- /dev/null +++ b/smith/data/.opencode/agents/reviewer.md @@ -0,0 +1,10 @@ +--- +description: "Reviewer — independently verifies architecture and implementation" +mode: subagent +temperature: 0.3 +--- + +# Reviewer + +You are the Reviewer. You independently verify architecture and implementation. +You are never the same agent who designed or built the work under review. \ No newline at end of file diff --git a/smith/data/.opencode/agents/setup-agent.md b/smith/data/.opencode/agents/setup-agent.md new file mode 100644 index 0000000..170c921 --- /dev/null +++ b/smith/data/.opencode/agents/setup-agent.md @@ -0,0 +1,10 @@ +--- +description: "Setup Agent — transforms templates into new projects" +mode: subagent +temperature: 0.3 +--- + +# Setup Agent + +You are the Setup Agent. You transform a template into a new project by assessing requirements and applying configured substitutions. +You decide when parameters are appropriate and when the project transformation is complete. \ No newline at end of file diff --git a/smith/data/.opencode/agents/software-engineer.md b/smith/data/.opencode/agents/software-engineer.md new file mode 100644 index 0000000..99c4972 --- /dev/null +++ b/smith/data/.opencode/agents/software-engineer.md @@ -0,0 +1,10 @@ +--- +description: "Software Engineer — implements, tests, and ships production code" +mode: subagent +temperature: 0.3 +--- + +# Software Engineer + +You are the Software Engineer. You write production code that passes tests and meets acceptance criteria. +You are responsible for implementation, TDD cycles, commits, and merge operations. \ No newline at end of file diff --git a/smith/data/.opencode/agents/system-architect.md b/smith/data/.opencode/agents/system-architect.md new file mode 100644 index 0000000..97b6ccc --- /dev/null +++ b/smith/data/.opencode/agents/system-architect.md @@ -0,0 +1,10 @@ +--- +description: "System Architect — designs technical architecture and reviews implementation" +mode: subagent +temperature: 0.3 +--- + +# System Architect + +You are the System Architect. You design the technical architecture and review implementation. +You are responsible for technical decisions, ADRs, and project structuring. \ No newline at end of file diff --git a/smith/data/.opencode/knowledge/agent-design/principles.md b/smith/data/.opencode/knowledge/agent-design/principles.md new file mode 100644 index 0000000..b608395 --- /dev/null +++ b/smith/data/.opencode/knowledge/agent-design/principles.md @@ -0,0 +1,100 @@ +--- +domain: agent-design +tags: [agents, identity, subagents, separation-of-concerns] +last-updated: 2026-04-29 +--- + +# Agent Design Principles + +## Key Takeaways + +- Agents contain identity only (who I am, what I decide); the flow YAML is the source of truth for routing, skills, and artifacts. +- Use subagents for investigation tasks that rapidly exhaust context; they quarantine token cost and prevent anchoring bias (Tversky & Kahneman, 1974). +- Maintain a three-file separation (AGENTS.md, agents, skills) to prevent conflicting instructions from competing sources, positional attention degradation (Liu et al., 2023), and redundant content creating competing attention targets. +- Agents are minimal — the flow determines which skill to load, the skill determines how to do the work, the knowledge provides the reference material. +- AGENTS.md must discover, not enumerate — provide discovery commands and naming conventions, never file inventories that go stale. + +## Concepts + +**Agent = Identity Only**: The agent file defines who the agent is and what it decides. It does NOT contain skill lists, ownership tables, routing logic, artifact paths, or knowledge references. The flow YAML is the single source of truth for routing (owner, skills, transitions, artifacts). Duplicating any of these in the agent creates a second source of truth that will drift. + +**Three-File Separation**: Three failure modes observed in LLM context windows produce a three-file split: +- **Conflicting instructions** from multiple sources — each concern has one file +- **Positional attention degradation** (Liu et al., 2023 — middle content receives less attention) — keep files short +- **Redundant content** creating competing attention targets — each fact in one location + +| Concern | File | Purpose | Loaded When | +|---|---|---|---| +| Navigation | `AGENTS.md` | Where files live, how to resolve wikilinks | Every session | +| Identity | `.opencode/agents/*.md` | Who I am, what I decide | When role invoked | +| Procedure | `.opencode/skills/*/SKILL.md` | Step-by-step instructions | On demand | +| Reference | `.opencode/knowledge/*/` | What and why | On demand, via wikilinks | + +**Subagents for Investigation**: When a task requires extensive reading (auditing code, researching decisions), use a subagent with read-only or restricted permissions. Subagents quarantine token cost and prevent anchoring bias from the main conversation context. + +**Effective Instruction Writing**: Specific IF-THEN triggers at decision points are 2-3x more likely to execute than general intentions (Gollwitzer, 1999). But these triggers belong in the skill steps at the decision point, NOT in the agent file. The agent file is too far from the work context for triggers to be effective. + +**Discover, Don't Enumerate**: AGENTS.md must never enumerate files that can go stale. Instead, it provides discovery commands (`ls`, `find`) and file naming conventions so agents discover what exists at runtime. This prevents drift between documentation and reality — an inventory that lists 30 skills will be wrong the moment a skill is added or removed, but a discovery command is always correct. + +**Naming Distinction**: `AGENTS.md` (project root) is the navigation file loaded every session. `.opencode/agents/*.md` are agent identity files loaded on demand. Despite the similar names, they serve different purposes: AGENTS.md tells you where things are; agent files tell you who you are. + +**Research Notes Are Consultable, Not Session-Loaded**: Research notes in `docs/research/` are source material cited by knowledge files. They are not loaded every session. An agent consults them only when a knowledge file references them and more detail is needed. + +## Content + +### Agent File Format + +```markdown +--- +description: "" +mode: subagent +temperature: <0.3-0.7> +--- + +# + +You are the . . +. +``` + +That is the entire agent. No skill lists, no ownership tables, no IF-THEN triggers, no knowledge references, no routing. + +### What NOT to Put in an Agent File + +- **Skill lists** — the flow `skills` field determines which skill to load +- **Ownership tables** — the flow `input/edited/output_artifacts` defines what each state reads and writes +- **Routing logic** — the flow `next` field defines transitions +- **Knowledge references** — the skill's `## Load` section handles knowledge loading +- **Step procedures** — skills contain procedure, agents contain identity +- **Quality gates** — the flow `conditions` field defines gate conditions + +### AGENTS.md Is Navigation Only + +AGENTS.md is loaded every session. It should contain ONLY: +- Where files live (project structure) +- How to resolve wikilinks +- Session protocol (use `flowr status`, `flowr advance`) +- File naming conventions +- Discovery commands (not file inventories) + +It must NOT contain quality gates, priority orders, step procedures, knowledge content, or file enumerations. + +### Naming Conventions + +| Path | Purpose | Loaded When | +|---|---|---| +| `/AGENTS.md` | Root navigation (where things are, how to discover them) | Every session | +| `.opencode/agents/{role}.md` | Agent identity (who I am, what I decide) | When role invoked | +| `.opencode/skills/{skill}/SKILL.md` | Skill procedure (step-by-step instructions) | On demand | +| `.opencode/knowledge/{domain}/{concept}.md` | Knowledge reference (progressive disclosure) | On demand, via wikilinks | +| `.templates/{path}.template` | Artifact templates | When creating artifacts | +| `docs/research/{domain}/{concept}.md` | Research source notes (cited by knowledge files) | When knowledge file references them | +| `docs/adr/ADR_YYYYMMDD_{slug}.md` | Architecture decision records | When referenced | + +Note: Despite similar names, `AGENTS.md` (root navigation) and `.opencode/agents/` (identity files) serve different purposes. + +## Related + +- [[skill-design/principles]] +- [[knowledge-design/principles]] +- [[workflow/flowr-spec]] \ No newline at end of file diff --git a/smith/data/.opencode/knowledge/architecture/adr.md b/smith/data/.opencode/knowledge/architecture/adr.md new file mode 100644 index 0000000..2bda578 --- /dev/null +++ b/smith/data/.opencode/knowledge/architecture/adr.md @@ -0,0 +1,79 @@ +--- +domain: architecture +tags: [adr, architecture-decision-records, decision-making] +last-updated: 2026-04-29 +--- + +# Architecture Decision Records + +## Key Takeaways + +- ADRs document architecturally significant decisions — decisions that are hard to change and affect multiple components (Nygard, 2011). +- Each ADR follows a fixed structure: Status, Context, Decision, Reason, Alternatives, Consequences (Nygard, 2011). +- ADRs are append-only — once written, they are never edited. Superseded ADRs get a new "Superseded by" reference, not a revision. +- ADRs must be consistent with feature requirements — every ADR should reference the `@id` criteria it addresses. +- ADR risk assessment uses Probability × Impact classification (Boehm, 1991) to prioritise mitigation effort on the highest-exposure risks. + +## Concepts + +**Architecturally Significant** — A decision is architecturally significant if it affects multiple components, is hard to reverse, or constrains future choices (Nygard, 2011; Fowler, 2003). Choosing a database is architecturally significant. Choosing a variable name is not. When in doubt, write the ADR. + +**ADR Structure** — Every ADR contains (Nygard, 2011): Status (Proposed, Accepted, Deprecated, Superseded), Context (the forces at play, the problem being solved), Decision (the choice made), Reason (why this choice over alternatives), Alternatives (other options considered and why they were rejected), Consequences (what changes because of this decision, both positive and negative). + +**Append-Only Discipline** — ADRs capture the decision as it was made at the time. If understanding changes, write a new ADR that supersedes the old one. This preserves the history of architectural reasoning and prevents retroactive justification. + +**ADR Consistency** — Every ADR must be consistent with the feature requirements it addresses. During review, check that each ADR aligns with the `@id` criteria in the feature file. An ADR that contradicts a requirement is a signal that either the ADR or the requirement needs updating. Architecture review is adversarial — the reviewer actively seeks inconsistencies and gaps, leveraging accountability to an unknown audience (Tetlock, 1985) to produce more rigorous decisions. + +**Risk Assessment** (Boehm, 1991) — Each ADR's Risk Assessment table uses Probability × Impact to classify and prioritise risks. Probability (Low/Medium/High) estimates how likely the risk is to materialise. Impact (Low/Medium/High) estimates how severe the consequence would be. Risks with High Probability and High Impact demand explicit mitigations or rejection of the decision. Risks with Low Probability and Low Impact may be accepted without mitigation. Risk leverage — the ratio of risk reduction to mitigation cost — helps prioritise which mitigations to invest in first. + +## Content + +### ADR Template Fields + +| Field | Content | +|---|---| +| Status | Proposed, Accepted, Deprecated, or Superseded | +| Context | What is the issue that we're seeing that is motivating this decision? | +| Decision | What is the change that we're proposing/making? | +| Reason | Why is this the best choice given the alternatives? | +| Alternatives | What other choices were considered and why were they rejected? | +| Consequences | What becomes easier or harder to do because of this change? | + +### When to Write an ADR + +- Choosing a framework, library, or database +- Choosing an architectural style (monolith, microservices, event-driven) +- Choosing a communication pattern (sync HTTP, async events, gRPC) +- Choosing a data storage strategy (SQL, NoSQL, event sourcing) +- Choosing a deployment strategy (container, serverless, bare metal) +- Introducing a new bounded context boundary +- Changing a cross-cutting concern (authentication, logging, error handling) + +### Risk Assessment Classification + +| Probability \ Impact | Low | Medium | High | +|---|---|---|---| +| **High** | Monitor | Mitigate | Mitigate or Reject | +| **Medium** | Accept | Monitor | Mitigate | +| **Low** | Accept | Accept | Monitor | + +- **Mitigate**: Explicit mitigation strategy required before accepting the ADR +- **Monitor**: Flag for future review; no immediate action needed +- **Accept**: Risk is acceptable; document in ADR +- **Reject**: Risk is too high; reconsider the decision or choose an alternative + +Risk leverage prioritises mitigations with the highest ratio of risk reduction to mitigation cost (Boehm, 1991). + +### When NOT to Write an ADR + +- Choosing a variable name +- Choosing a code style (use project conventions) +- Choosing a test framework (use project conventions) +- Any decision that is easily reversible and affects only one component + +## Related + +- [[architecture/assessment]] +- [[architecture/technical-design]] +- [[architecture/reconciliation]] +- [[architecture/quality-attributes]] \ No newline at end of file diff --git a/smith/data/.opencode/knowledge/architecture/assessment.md b/smith/data/.opencode/knowledge/architecture/assessment.md new file mode 100644 index 0000000..d2716d8 --- /dev/null +++ b/smith/data/.opencode/knowledge/architecture/assessment.md @@ -0,0 +1,69 @@ +--- +domain: architecture +tags: [architecture, delivery-mechanism, bounded-contexts, hexagonal-architecture] +last-updated: 2026-04-29 +--- + +# Architecture Assessment + +## Key Takeaways + +- Delivery mechanism is the boundary between the domain and the outside world (Cockburn, 2005) — HTTP, CLI, message queue, etc. — it must be verified against the product definition before designing anything. +- Architecture exists when system.md, technical_design.md, and context_map.md all contain meaningful content aligned with the current domain. +- If architecture exists but delivery mechanism mismatches, record it as an ADR before proceeding. +- Hexagonal architecture (Ports & Adapters — Cockburn, 2005) keeps the domain independent of delivery mechanism — verify this is followed. +- SA conducts an assessment interview to verify and correct quality attributes, deployment constraints, and hidden requirements before routing. + +## Concepts + +**Delivery Mechanism Verification** — Before designing a feature, the architect must verify that the delivery mechanism stated in the product definition (e.g., "web application", "CLI tool", "API service") matches the actual codebase implementation. A mismatch (e.g., product says "web" but codebase is CLI) must be recorded as an ADR and resolved before proceeding. This checkpoint prevents building on a foundation that doesn't match the product's intent. + +**Architecture Existence Check** — Architecture is considered to exist when three documents contain meaningful, aligned content: system.md (current state snapshot), technical_design.md (technical decisions), and context_map.md (bounded context relationships). Empty or placeholder content does not count. If all three exist and are coherent, the architect evaluates whether the existing architecture covers the new feature or needs updating. + +**Hexagonal Architecture (Ports & Adapters — Cockburn, 2005)** — The domain core must not depend on infrastructure. Ports define what the domain needs; adapters provide concrete implementations. When reviewing architecture, verify that external dependencies (databases, frameworks, APIs) are behind Protocol interfaces, not directly referenced in domain code. + +**Assessment Interview** — The SA interviews the stakeholder to surface information not captured in the artifacts. Topics: quality attribute priorities (are the documented priorities accurate and complete?), deployment constraints (does the deployment section match reality?), hidden requirements (constraints not captured in the artifacts), and architecture gaps (does the current system fail to cover anything needed?). Apply gap-finding techniques from [[requirements/interview-techniques]]: use CIT to probe for specific past failures, use Laddering to climb from surface preferences to real constraints. Apply a pre-mortem from [[requirements/pre-mortem]]: "Imagine this architecture is built exactly as designed, all tests pass, but it fails in production — what would be missing?" Corrections are written into existing artifacts (product_definition.md), not into separate interview notes. + +## Content + +### Delivery Mechanism Checkpoint + +The delivery mechanism is the outermost layer — how users or systems interact with the product. Common delivery mechanisms: + +- Web application (HTTP server, browser-based) +- CLI tool (terminal interface) +- API service (REST, GraphQL, gRPC) +- Desktop application (GUI) +- Library/SDK (programmatic interface) + +When assessing architecture for a new feature: + +1. Read the product definition's deployment section +2. Verify the codebase's actual entry points match +3. If mismatched, create an ADR documenting the discrepancy and the resolution path +4. Only proceed with technical design after the delivery mechanism is verified + +### Architecture Existence Decision Tree + +| Condition | Routing | +|---|---| +| No architecture documents exist | Needs full architecture (technical-design) | +| Architecture exists, covers the feature | No architecture needed (proceed to planning) | +| Architecture exists, needs updating for this feature | Needs context update (context-map then technical-design) | +| Architecture exists, fundamental gap discovered | Needs discovery (back to discovery flow) | + +### Hexagonal Architecture Verification + +When reviewing existing architecture: + +- Every external dependency must have a Protocol (interface) in the domain layer +- The domain layer must have zero imports from infrastructure packages +- Adapters must implement domain-defined Ports, not the other way around +- If the domain references a concrete technology, it's a violation + +## Related + +- [[architecture/quality-attributes]] +- [[architecture/reconciliation]] +- [[requirements/pre-mortem]] +- [[workflow/flowr-spec]] \ No newline at end of file diff --git a/smith/data/.opencode/knowledge/architecture/contract-design.md b/smith/data/.opencode/knowledge/architecture/contract-design.md new file mode 100644 index 0000000..20e3c69 --- /dev/null +++ b/smith/data/.opencode/knowledge/architecture/contract-design.md @@ -0,0 +1,72 @@ +--- +domain: architecture +tags: [api-contracts, event-contracts, interfaces, rest, hexagonal-architecture] +last-updated: 2026-04-29 +--- + +# Contract Design + +## Key Takeaways + +- API contracts, event contracts, and interface definitions are the boundaries between modules — design them before implementation (contract-first design). +- REST constraints (Fielding, 2000) define API contracts as resource shapes and media types, not procedure calls — the contract is what data a resource contains and how it can transition, not a method signature. +- Event contracts must specify not just payload schema but also ordering guarantees, delivery semantics, and error handling (Hohpe & Woolf, 2003). +- Interface definitions (Protocol/abstract classes) in the domain layer define what the domain needs; infrastructure implements them — the dependency arrow always points inward (Cockburn, 2005; Evans, 2003). + +## Concepts + +**Contract-First Design** — Define the boundaries between modules before implementing them. API contracts specify request/response shapes, error codes, authentication, and versioning. Event contracts specify event names, payload schemas, ordering guarantees, and delivery semantics. Interface definitions specify the operations the domain requires without specifying how they are implemented. All three contract types are living documents that evolve with the system but must be versioned to maintain backward compatibility. + +**REST and API Contracts** (Fielding, 2000) — REST defines API contracts through resources (identified by URIs), representations (media types like JSON Schema), and standard methods (GET, POST, PUT, DELETE). The Uniform Interface constraint means the client only needs to understand media types and standard methods, not server implementation details. API contracts should specify: resource paths, request/response schemas, error response formats, authentication requirements, and rate limits. + +**Event Contracts** (Hohpe & Woolf, 2003) — Asynchronous messaging between systems requires explicit contracts covering: payload schema (event type, aggregate ID, timestamp, data fields), ordering guarantees (per-sender FIFO, causal ordering, or none), delivery semantics (at-most-once, at-least-once, exactly-once), and error handling (dead letter channels, retry policies, circuit breakers). Event contracts decouple time (producer and consumer don't need to be available simultaneously) and schema (each system retains its own model through translation layers). + +**Interface Definitions** — In hexagonal architecture (Cockburn, 2005), the domain layer defines Protocol interfaces (ports) that specify what operations the domain needs. Infrastructure adapters implement these ports. The domain never imports from infrastructure — the dependency arrow always points inward (infrastructure → application → domain). Interface definitions must specify: method signatures, parameter types, return types, error types, and preconditions. + +## Content + +### API Contract Specification + +Every API endpoint must document: + +| Element | Content | +|---|---| +| Path | `//` with path parameters | +| Method | GET, POST, PUT, DELETE | +| Request schema | Fields, types, required/optional, validation rules | +| Response schema | Fields, types, status codes | +| Error responses | Error code, message, retry guidance | +| Authentication | Required auth mechanism | +| Versioning | Header or URL-based version strategy | + +### Event Contract Specification + +Every event must document: + +| Element | Content | +|---|---| +| Event type | Past-tense domain event name (e.g., `OrderPlaced`) | +| Payload schema | Fields, types, required/optional | +| Ordering | None, per-sender FIFO, or causal | +| Delivery | At-most-once, at-least-once, or exactly-once | +| Error handling | Dead letter channel, retry policy, circuit breaker | +| Produced by | Module/context that emits the event | +| Consumed by | Module/context that handles the event | + +### Interface Definition Specification + +Every domain port must document: + +| Element | Content | +|---|---| +| Port name | Domain operation name (e.g., `PaymentGateway`) | +| Methods | Method signatures with parameter and return types | +| Errors | Domain error types the method can raise | +| Preconditions | Conditions that must hold before calling | +| Implementations | Which infrastructure adapter implements this port | + +## Related + +- [[architecture/technical-design]] +- [[architecture/quality-attributes]] +- [[domain-modeling/context-mapping]] \ No newline at end of file diff --git a/smith/data/.opencode/knowledge/architecture/quality-attributes.md b/smith/data/.opencode/knowledge/architecture/quality-attributes.md new file mode 100644 index 0000000..79e62ed --- /dev/null +++ b/smith/data/.opencode/knowledge/architecture/quality-attributes.md @@ -0,0 +1,60 @@ +--- +domain: architecture +tags: [quality-attributes, architectural-styles, trade-offs, ATAM] +last-updated: 2026-04-29 +--- + +# Quality Attributes + +## Key Takeaways + +- Quality attributes — not functional requirements — drive architectural decisions (Bass, Clements & Kazman, 2021). +- Six architecturally significant quality attribute categories: Performance, Availability, Security, Modifiability, Reliability, and Usability (Bass et al., 2021). +- Architectural style selection must be justified against quality attribute priorities, not personal preference — each style optimises for different attributes. +- Quality attributes often conflict — optimising for Performance may harm Modifiability; the utility tree method (ATAM) forces explicit prioritisation with business value justification. + +## Concepts + +**Quality Attributes as Architectural Drivers** (Bass et al., 2021) — Quality attributes are measurable properties of a system's architecture, distinct from functional requirements (what the system does). Performance, Availability, and Security constrain the architecture; Modifiability, Reliability, and Usability shape its flexibility. Each quality attribute produces concrete architectural tactics that directly affect module structure, dependency direction, and communication patterns. + +**Quality Attribute Conflicts** — Performance (fast response, low latency) often conflicts with Modifiability (abstraction layers, indirection). Security (encryption, validation) often conflicts with Performance (overhead). Availability (redundancy, failover) often conflicts with cost constraints. The architect must prioritise which attributes matter most for the business and make trade-offs explicitly, documented as ADRs. + +**ATAM Utility Tree** — The Architecture Tradeoff Analysis Method provides a structured way to prioritise quality attributes: stakeholders rank attribute scenarios by business value (High/Medium/Low) and by architectural difficulty (High/Medium/Low). The intersection produces a prioritised set of scenarios that the architecture must address first. This prevents architects from over-engineering for low-value attributes or under-engineering for high-value ones. + +**Architectural Tactics** — Each quality attribute has a set of design tactics that directly address it: Performance uses resource arbitration, concurrency, and caching; Availability uses redundancy, fault detection, and recovery; Modifiability uses encapsulation, substitution, and binding time. Tactics are the building blocks that architects combine into architectural styles. + +## Content + +### Quality Attribute Taxonomy + +| Category | Definition | Key Tactics | +|---|---|---| +| Performance | Response time and throughput under load | Caching, concurrency, resource pooling, load balancing | +| Availability | System uptime and fault tolerance | Redundancy, failover, circuit breaker, health checks | +| Security | Protection against unauthorised access and data breaches | Authentication, authorisation, encryption, audit logging | +| Modifiability | Ease of changing the system without side effects | Encapsulation, substitution, dependency inversion, binding time | +| Reliability | Correct operation over time under stated conditions | Input validation, checksums, transaction boundaries, retry | +| Usability | Ease of use for end users | Consistent UI patterns, clear error messages, progressive disclosure | + +### Architectural Styles and Quality Attributes + +| Style | Optimises For | Trades Off | +|---|---|---| +| Monolith | Simplicity, fast time-to-market, low-latency intra-module calls | Independent deployment, team autonomy | +| Microservices | Independent deployment, team autonomy, fault isolation | Operational complexity, inter-service latency | +| Event-driven | Loose coupling, async processing, scalability | Eventual consistency, debugging complexity | +| Serverless | Cost optimisation (pay-per-use), auto-scaling | Cold starts, vendor lock-in, debugging difficulty | +| Hexagonal | Testability, domain isolation, delivery-mechanism independence | Indirection overhead, architectural discipline required | + +### Quality Attributes in Architecture Documents + +When documenting quality attributes in `technical_design.md`: +- Each attribute must link to an architectural decision that addresses it +- Each architectural decision must link to an ADR +- Priority order must be explicit (which attribute wins when they conflict) + +## Related + +- [[architecture/technical-design]] +- [[architecture/adr]] +- [[architecture/assessment]] \ No newline at end of file diff --git a/smith/data/.opencode/knowledge/architecture/reconciliation.md b/smith/data/.opencode/knowledge/architecture/reconciliation.md new file mode 100644 index 0000000..e25865a --- /dev/null +++ b/smith/data/.opencode/knowledge/architecture/reconciliation.md @@ -0,0 +1,57 @@ +--- +domain: architecture +tags: [reconciliation, cross-document-consistency, adversarial-review, verification] +last-updated: 2026-04-29 +--- + +# Reconciliation + +## Key Takeaways + +- Reconciliation is an adversarial cross-document consistency check — the reviewer actively seeks inconsistencies, not confirms consistency (Tetlock, 1985). +- Five cross-document consistency checks verify alignment: system↔glossary, system↔feature, ADRs↔feature, glossary↔feature, product_definition↔scope. +- Every inconsistency is a signal that either the architecture or the requirements need updating — the reviewer does not decide which side changes, only that a mismatch exists. +- Reconciliation gates prevent forward progress until all five checks pass; this is the last chance to catch misalignment before implementation begins. + +## Concepts + +**Adversarial Reconciliation** — The reviewer's default hypothesis is that inconsistencies exist. Leveraging accountability to an unknown audience (Tetlock, 1985), the reviewer actively searches for mismatches rather than confirming alignment. This adversarial stance produces more rigorous verification than cooperative review because it prevents confirmation bias — the tendency to see what we expect rather than what is actually there. + +**Five Cross-Document Consistency Checks** — Each check compares two documents and verifies that their models, terms, and requirements align. A mismatch in any check is a hard blocker: the architecture must be corrected, or the requirements must be revised, before implementation can proceed. + +**Reconciliation Gate** — The reconciliation gate sits between architecture review and implementation. It is the last point where misalignment can be caught cheaply. After this gate, code is written against the architecture, and fixing misalignment becomes exponentially more expensive. + +## Content + +### The Five Checks + +| # | Check | Verify | Mismatch Signal | +|---|---|---|---| +| 1 | system ↔ glossary | Every glossary term matches how it is used in system.md Domain Model | A term defined in the glossary is used with a different meaning in the domain model | +| 2 | system ↔ feature | Every entity, action, and relationship in system.md Domain Model matches feature requirements | An entity appears in the domain model but not in any feature, or vice versa | +| 3 | ADRs ↔ feature | Every ADR aligns with feature requirements; each ADR references specific `@id` criteria | An ADR contradicts a feature requirement, or a feature requirement has no ADR addressing it | +| 4 | glossary ↔ feature | Every domain term in the feature file matches its glossary definition | A term used in the feature has no glossary entry, or the glossary definition contradicts the feature's usage | +| 5 | product_definition ↔ scope | Scope in the product definition stays within the stated boundaries (what_is, what_is_not, out_of_scope) | A feature requirement exceeds the product definition's stated scope | + +### Mismatch Resolution + +When a mismatch is found: + +1. **Record the mismatch**: Which two documents, which specific items, and how they disagree. +2. **Determine which side changes**: If the architecture is wrong, update system.md, technical_design.md, or the ADR. If the requirements are wrong, update the feature file or product definition. +3. **Update both documents**: Ensure the correction is reflected in all affected documents. +4. **Re-run the affected check**: Verify the mismatch is resolved. + +### Reviewer Stance Declaration + +Before performing reconciliation, the reviewer declares: + +- Adversarial stance: "I will actively search for inconsistencies, not confirm consistency." +- Boundary check: "I will verify every cross-document relationship, not just the ones that seem obvious." +- Semantic read: "I will read for meaning, not just surface-level keyword matching." + +## Related + +- [[architecture/adr]] +- [[architecture/assessment]] +- [[requirements/pre-mortem]] \ No newline at end of file diff --git a/smith/data/.opencode/knowledge/architecture/technical-design.md b/smith/data/.opencode/knowledge/architecture/technical-design.md new file mode 100644 index 0000000..eb4869c --- /dev/null +++ b/smith/data/.opencode/knowledge/architecture/technical-design.md @@ -0,0 +1,61 @@ +--- +domain: architecture +tags: [technical-design, architectural-styles, c4-diagrams, hexagonal-architecture, module-structure] +last-updated: 2026-04-29 +--- + +# Technical Design + +## Key Takeaways + +- Architectural style must be selected based on quality attributes and deployment constraints — not personal preference. +- C4 diagrams provide four levels of abstraction (Brown, 2018): Context (system in environment), Container (deployable units), Component (modules within containers), Code (classes and functions). +- Module structure follows separation of concerns — domain logic must not depend on infrastructure (Cockburn, 2005; Evans, 2003). +- API contracts, event contracts, and interface definitions are the boundaries between modules — design them before implementation. + +## Concepts + +**Architectural Styles** — Common styles and when to choose them: Monolith (single deployment, simple ops, low latency between modules), Microservices (independent deployment, team autonomy, high operational complexity), Event-driven (loose coupling, eventual consistency, async workflows), Serverless (pay-per-use, auto-scaling, cold starts), Hexagonal/Ports & Adapters (testability, domain isolation, delivery-mechanism independence). + +**C4 Diagrams** (Brown, 2018) — Four levels of architectural visualization: Context (actors and external systems), Container (deployable units and their tech stacks), Component (internal modules and their responsibilities), Code (individual classes — rarely needed). Always start with Context, then Container. Component diagrams are optional. Code diagrams are rarely necessary. + +**Module Structure** — Organize by bounded context first (Evans, 2003), then by layer (domain, application, infrastructure). Domain layer has zero infrastructure imports. Application layer orchestrates use cases. Infrastructure layer implements external concerns. The dependency arrow always points inward: infrastructure → application → domain (Clean Architecture — Martin, 2012; Hexagonal Architecture — Cockburn, 2005). + +**Contract-First Design** — Define the boundaries before the implementation: API contracts (request/response shapes, error codes, authentication), Event contracts (event names, payload schemas, ordering guarantees), Interface definitions (Protocol/abstract classes that the domain defines and infrastructure implements). + +## Content + +### Architectural Style Selection + +| Quality Attribute Priority | Recommended Style | +|---|---| +| Simplicity, fast time-to-market | Monolith | +| Team autonomy, independent scaling | Microservices | +| Loose coupling, async workflows | Event-driven | +| Cost optimization, variable load | Serverless | +| Testability, domain isolation | Hexagonal | + +Hybrid approaches are valid: a monolith with hexagonal internals, or microservices with event-driven communication between them. + +### C4 Diagram Guidelines + +- Context diagram: always include — shows the system boundary and external actors +- Container diagram: always include — shows deployable units and tech choices +- Component diagram: include when module structure is non-trivial +- Code diagram: only for complex algorithms or critical paths + +### Module Structure Template + +``` +feature_name/ + domain/ # Business logic, zero infrastructure imports + application/ # Use case orchestration + infrastructure/ # External concerns (DB, HTTP, queues) + api/ # Delivery mechanism (routes, serializers) +``` + +## Related + +- [[architecture/assessment]] +- [[architecture/quality-attributes]] +- [[architecture/contract-design]] \ No newline at end of file diff --git a/smith/data/.opencode/knowledge/design/color-systems.md b/smith/data/.opencode/knowledge/design/color-systems.md new file mode 100644 index 0000000..7c0ecd7 --- /dev/null +++ b/smith/data/.opencode/knowledge/design/color-systems.md @@ -0,0 +1,126 @@ +--- +domain: design +tags: [color, palette, wcag, contrast, accessibility, hue-semantics, saturation, value, colour-harmony] +last-updated: 2026-04-30 +--- + +# Color Systems for Project Branding + +## Key Takeaways + +- Design monochrome-first; add colour only after the shape holds identity in pure black on white (Rand, 1985; Kare, 1984). +- Use 1–2 colours maximum in a logo mark; three or more create reproduction problems and visual noise at small sizes. +- Hue is one axis; saturation (vivid vs muted) and value (light vs dark) are independent levers that carry as much meaning. A desaturated blue signals "corporate"; a saturated blue signals "tech/digital." +- Every text–background pair must meet WCAG 2.1 SC 1.4.3 minimum contrast: 4.5:1 for normal text, 3:1 for large text (W3C, 2018). +- Verify colours on actual backgrounds, not in isolation — simultaneous contrast shifts perceived hue (Itten, 1961; Albers, 1963). +- Define each brand colour with: hex value, RGB, dark-mode counterpart, and WCAG contrast ratio against primary backgrounds. +- Choose colour harmony type based on emotional effect: analogous for calm, complementary for vibrancy, split-complementary for balanced contrast, triadic for energy. + +## Concepts + +**Monochrome-First Process**: Design the entire mark in black on white, then white on black. If it does not work in monochrome, colour will not save it. Only after the shape holds identity in one colour should a second colour be introduced — and only as an accent, never carrying meaning that must be read. + +**Hue, Saturation, and Value as Independent Axes**: Hue (which colour) is one dimension. Saturation (how vivid vs muted) and value (how light vs dark) carry as much meaning as hue and are independent levers. A single hue can express different personalities by varying saturation and value: +- High saturation + medium value: "digital, energetic, modern" (e.g., #3B82F6) +- Low saturation + medium value: "corporate, professional, subdued" (e.g., #6B7280) +- High saturation + dark value: "premium, deep, authoritative" (e.g., #1E3A5F) +- Low saturation + light value: "calm, approachable, subtle" (e.g., #DBEAFE) + +Choose personality → hue → saturation/value, not hue first then wonder why it doesn't feel right. + +**Hue Semantics**: Colours carry cultural associations. Blue signals trust and stability (most common in tech). Green signals growth and nature. Red signals energy and urgency. Orange signals creativity and enthusiasm. Purple signals innovation and premium quality. Yellow signals optimism and warmth. Choose a primary hue that reinforces the project's personality adjectives, not one that clashes with them. + +**Saturation–Value Personality Map**: The same hue communicates different personalities depending on saturation and value: + +| Personality | Hue | Saturation | Value | Example hex | +|-------------|-----|-----------|-------|------------| +| Precise, technical | Blue | Medium | Dark | #1E3A5F | +| Trustworthy, stable | Blue | Medium | Medium | #3B82F6 | +| Bold, disruptive | Red | High | Medium | #DC2626 | +| Warm, creative | Orange | High | Medium | #F97316 | +| Calm, approachable | Green | Low–Medium | Light–Medium | #86EFAC | +| Premium, innovative | Purple | Medium | Dark | #7C3AED | + +**Colour Harmony Types**: Choose the harmony type based on the emotional effect you want: + +| Harmony type | Wheel relationship | Angle | Effect | Use when | +|-------------|-------------------|-------|--------|----------| +| Complementary | Opposite on wheel | 180° | Maximum contrast, vibrant tension | Accent needs to pop against primary | +| Split-complementary | Complement ± 30° | 150° + 30° | Balanced contrast, less harsh than direct complement | Most versatile for branding | +| Analogous | Adjacent on wheel | ± 30° | Calm, harmonious, unified | Personality is "calm, cohesive, subtle" | +| Triadic | Evenly spaced 3 | 120° | Energetic, diverse, playful | Need 3 distinct brand colours | +| Tetradic (square) | Evenly spaced 4 | 90° | Complex, rich, hard to balance | Only with experienced colour sense | + +For 2-colour brand marks, use complementary or split-complementary. Analogous palettes lack enough contrast for accent visibility. Triadic requires 3 colours, violating the 2-colour maximum rule. + +**Itten's Seven Contrast Types**: Each contrast type is a design tool that produces a different emotional effect (Itten, 1961). The three most useful for branding: + +1. **Light-Dark contrast**: Black on white. Maximum clarity. Foundation of WCAG accessibility. Use for text–background pairs where legibility is paramount. +2. **Complementary contrast**: Opposite hues placed together (red–green, blue–orange). Maximum visual tension and vibrancy. Risk: at similar saturation, complements vibrate uncomfortably. Mitigation: vary the value (one lighter/darker) or desaturate one. +3. **Saturation contrast**: A vivid colour next to a muted one. The vivid colour appears to glow. This is the primary tool for accent hierarchy — a saturated accent on a desaturated primary draws the eye precisely without requiring hue contrast. + +The other four (cold-warm, simultaneous, hue, extension) are documented in the research (see `docs/research/design/visual/itten_1961.md`) and are useful for advanced palette refinement. + +**Complementary Palette Construction**: A brand palette has 5 roles: (1) primary — the dominant colour; (2) accent — a contrasting highlight; (3) background — the surface colour; (4) text-primary — the main text colour; (5) text-secondary — muted text. Primary and accent are typically complementary or split-complementary. Background and text colours must achieve ≥4.5:1 contrast (WCAG AA). + +**WCAG Contrast Calculation**: Relative luminance L = 0.2126R + 0.7152G + 0.0722B (after gamma linearisation). Contrast ratio = (L_lighter + 0.05) / (L_darker + 0.05). Ratio ranges from 1:1 to 21:1. Normal text requires ≥4.5:1 (AA) or ≥7:1 (AAA). Large text (≥18pt or ≥14pt bold) requires ≥3:1 (AA) or ≥4.5:1 (AAA). + +**Dark-Mode Counterparts**: For each light-theme colour, define a dark-theme counterpart that maintains the same relative visual weight and contrast ratio. Do not simply invert (white-on-black is harsh). Use off-white (#e0e0e0) on dark backgrounds instead of pure white, and adjust accent saturation for dark contexts. + +**Simultaneous Contrast**: A neutral grey on a red background appears greenish; on a green background it appears reddish (Itten, 1961). Always test brand colours against both light and dark backgrounds before finalising. Adjust hex values to compensate for the perceptual shift, not the theoretical value. + +## Content + +### Hue Semantics Table + +| Hue | Signal | Common In | Avoid If | +|-----|--------|-----------|----------| +| Blue | Trust, stability, professionalism | Tech, finance, enterprise | Project personality is "bold, disruptive" | +| Green | Growth, nature, health | Environment, fintech, health | Project personality is "precise, minimal" | +| Red | Energy, urgency, danger | News, entertainment, alerts | Project personality is "calm, reliable" | +| Orange | Creativity, enthusiasm, warmth | Creative tools, education | Project personality is "serious, formal" | +| Purple | Innovation, premium, luxury | Design tools, premium SaaS | Project personality is "accessible, simple" | +| Yellow | Optimism, warmth, caution | Children's products, warnings | Used as small accent only (low contrast on white) | +| Teal | Balance, sophistication | Health tech, lifestyle | Combined with similar-value greens | +| Grey | Neutrality, professionalism | Enterprise, documentation | Used as primary (no personality signal) | + +### Complementary Pair Examples + +| Primary | Accent | Relationship | Contrast on White | +|---------|--------|-------------|-------------------| +| #1a1a2e (dark navy) | #e94560 (warm red) | Split-complementary | Navy 15.7:1, Red 4.7:1 | +| #2d5016 (forest green) | #c9a84c (antique gold) | Split-complementary | Green 7.2:1, Gold 3.1:1 (large text only) | +| #0f3460 (mid blue) | #e94560 (warm red) | Triadic | Blue 11.4:1, Red 4.7:1 | +| #3b2410 (deep brown) | #7baabf (steel blue) | Complementary | Brown 14.2:1, Blue 3.7:1 | + +### Visual Weight Proportions (Itten's Contrast of Extension) + +When balancing colour areas in a composition, visual weight depends on inherent brightness: + +| Colour pair | Visual weight ratio (area for equal perceived weight) | +|------------|------------------------------------------------------| +| Yellow : Violet | 1 : 3 | +| Orange : Blue | 1 : 2 | +| Red : Green | 1 : 1 | +| Yellow : Orange | 1 : 1.5 | +| Light grey : Dark navy | 1 : 2 | + +A thin gold line on a navy field reads as "balanced" because the yellow's visual weight per unit area is 3× the violet's. + +### WCAG Contrast Verification Checklist + +For each colour in the palette, verify contrast ratio against: + +1. **Primary on background** — must be ≥4.5:1 for normal text +2. **Accent on background** — must be ≥3:1 for large text or ≥4.5:1 if carrying meaning +3. **Secondary on background** — must be ≥3:1 minimum +4. **Dark-mode primary on dark background** — must be ≥4.5:1 +5. **Dark-mode accent on dark background** — must be ≥3:1 +6. **Logo mark on white** — must be clearly legible (no numeric threshold, but test by squinting) +7. **Logo mark on dark** — must be clearly legible in dark mode variant + +## Related + +- [[design/project-assets]] +- [[design/identity-design]] +- [[design/visual-harmony]] \ No newline at end of file diff --git a/smith/data/.opencode/knowledge/design/identity-design.md b/smith/data/.opencode/knowledge/design/identity-design.md new file mode 100644 index 0000000..be68c49 --- /dev/null +++ b/smith/data/.opencode/knowledge/design/identity-design.md @@ -0,0 +1,36 @@ +--- +domain: design +tags: [brand, identity, naming, interview, personality, logo-type] +last-updated: 2026-04-30 +--- + +# Identity Design + +## Key Takeaways + +- Brand identity has five components: name, tagline, personality (3 adjectives), visual mark, and wording rules. All are captured in `docs/branding.md`. +- Choose logo type based on project recognition and visual metaphor: combination mark for new brands, abstract mark for established names, pictogram for strong metaphors, letterform for compact marks. +- Release naming convention lives in `docs/branding.md` under the Release Naming section — it is part of the brand identity, not separate from it. +- Wording rules (words to avoid, words to prefer) are brand identity constraints, not style preferences — they prevent the brand voice from drifting. +- The interview for brand identity uses [[requirements/interview-techniques#key-takeaways]] but focuses on personality, visual metaphor, and wording rather than requirements. + +## Concepts + +**Brand Identity Components**: A project's brand identity is the combination of (1) name — the project identifier; (2) tagline — one sentence describing what the project does; (3) personality — 3 adjectives that define tone and visual style; (4) visual mark — logo, banner, colour palette; (5) wording rules — words to avoid and prefer. These are captured in `docs/branding.md` per the template at `.templates/docs/branding/branding.md.template`. + +**Logo Type Selection**: Four logo types are appropriate for open-source projects. (1) Combination mark (symbol + wordmark) — best for new brands where the name is not yet widely known. (2) Abstract mark — best for established names that need a unique symbol. (3) Pictogram — best when the project name suggests a strong visual metaphor (e.g., Docker = whale). (4) Letterform/monogram — best for projects with long names needing a compact avatar mark. Choose based on: is the name well-known? (no → combination mark). Does the name suggest a metaphor? (yes → pictogram). Is the primary context small? (yes → letterform). + +**Personality Adjectives**: Three adjectives define the brand personality. They drive every design and writing decision: colour choices (warm vs cool, saturated vs muted), logo style (geometric vs organic, bold vs delicate), and wording (direct vs friendly, technical vs approachable). Examples: "precise, calm, reliable" → cool blues, geometric shapes, direct wording. "Bold, fast, disruptive" → warm reds, angular shapes, punchy wording. + +**Release Naming Convention**: Stored in `docs/branding.md` under the Release Naming section. Convention format (e.g., `adjective-greek-figure`), theme (e.g., Greek antiquity), rationale, and excluded words. This is part of brand identity because release names are public-facing communications that reinforce (or contradict) the project's personality. See [[software-craft/versioning#key-takeaways]] for the versioning scheme. + +**Wording Rules**: Two lists: words to avoid and words to prefer. These prevent brand voice drift across releases, documentation, and README. Example: avoid "easy, simple, just" (these are subjective and often false); prefer "minimal, precise, production-ready" (these are verifiable). Wording rules are identity constraints, not stylistic preferences — they define what the project sounds like. + +**Brand Interview Structure**: The interview for brand identity is structured in three phases: (1) personality — what 3 adjectives describe the project? what must it NOT convey? where will the logo appear most? (2) visual metaphor — does the project name suggest a visual? what are 5 peer/competitor logos? how should yours differ? (3) wording — what words should the project avoid? what words should it prefer? what is the tagline? See [[requirements/interview-techniques#concepts]] for interview techniques (CIT for specific examples, Laddering for climbing from surface preferences to real constraints). + +## Related + +- [[design/color-systems]] +- [[design/project-assets]] +- [[design/visual-harmony]] +- [[requirements/interview-techniques]] \ No newline at end of file diff --git a/smith/data/.opencode/knowledge/design/project-assets.md b/smith/data/.opencode/knowledge/design/project-assets.md new file mode 100644 index 0000000..3cf0dfb --- /dev/null +++ b/smith/data/.opencode/knowledge/design/project-assets.md @@ -0,0 +1,76 @@ +--- +domain: design +tags: [logo, banner, favicon, svg, dark-mode, assets, delivery, progressive-simplification] +last-updated: 2026-04-30 +--- + +# Project Asset Design + +## Key Takeaways + +- Design favicon-first: if the mark cannot hold identity at 16×16, it is not strong enough (Kare, 1984). +- Design monochrome-first: if it does not work in pure black on white, colour will not save it (Rand, 1985). +- Progressive simplification: each size tier gets its own optimised version, not a scaled-down copy (Hicks, 2011). +- Pass the 5-second test, blur test, monochrome test, scalability test, and proximity test before finalising (Airey, 2010). +- SVG assets must use presentation attributes (not CSS classes), be self-contained (no external references), and be optimised with SVGO. +- Provide dark-mode variants: embedded `@media (prefers-color-scheme: dark)` in SVGs, or separate files. +- Minimum favicon delivery set: favicon.ico, icon.svg, apple-touch-icon.png, icon-192.png, icon-512.png. +- Social preview image: 1280×640px minimum, critical content centred in 60–70% of frame. + +## Concepts + +**Favicon-First Design**: Design at the smallest target size (16×16 or 32×32) first, then scale up and add detail. Kare designed all original Macintosh icons on a 32×32 grid because "every pixel must carry meaning." If a mark cannot be recognised at favicon size, it relies on detail that will vanish in real usage. The favicon version is not a simplification of a larger design — it is the core identity, and the larger versions are elaborations of it. + +**Monochrome-First Process**: The mark must work in a single colour on a single background before any colour is applied. Rand tested his logos by blurring them (Gaussian blur 3–5px) to verify the silhouette held. If the blurred mark is still identifiable, the shape is strong. If not, it relies on detail that will fail at small sizes, in print, or on dark backgrounds. + +**Progressive Simplification**: Each size tier gets its own optimised version: Master (512px, full detail), Standard (180px, remove thin strokes, simplify curves), Small (32px, only core silhouette, strokes→fills), Tiny (16px, single bold shape, often hand-redrawn). Do not simply scale a 512px icon to 16px — it produces a muddy, unrecognisable mark. + +**Evaluation Checklist**: (1) 5-second test — show for 5 seconds, remove, ask "what did you see?"; (2) blur test — Gaussian blur 3–5px, silhouette must remain identifiable; (3) monochrome test — pure black on white, pure white on black; (4) scalability test — legible at 16px and 500px; (5) proximity test — distinguishable from 5 competitor logos; (6) "one thing" test — there should be one dominant feature. + +**SVG Construction Rules**: Use presentation attributes (`fill="#1a1a2e"`) not CSS classes (GitHub strips inline `