From 3d9cd7a43c5616b658944f250cb4c27594803493 Mon Sep 17 00:00:00 2001
From: Eli Fine <eli88fine@gmail.com>
Date: Tue, 31 Mar 2026 19:21:25 +0000
Subject: [PATCH 1/2] copier

---
 .claude/commands/red.md                     | 15 +++++++++++++++
 .copier-answers.yml                         |  2 +-
 .devcontainer/devcontainer.json             |  2 +-
 .devcontainer/manual-setup-deps.py          | 14 ++++++++++----
 .github/workflows/ci.yaml                   |  1 +
 AGENTS.md                                   |  1 +
 template/.claude/commands/red.md            | 15 +++++++++++++++
 template/.devcontainer/manual-setup-deps.py | 14 ++++++++++----
 template/.github/workflows/ci.yaml.jinja    |  1 +
 template/AGENTS.md                          |  1 +
 10 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/.claude/commands/red.md b/.claude/commands/red.md
index e6120877..01dc7124 100644
--- a/.claude/commands/red.md
+++ b/.claude/commands/red.md
@@ -94,6 +94,21 @@ This phase is **not part of the regular TDD workflow** and must only be applied
 - Once sufficient understanding is achieved, all spike code is discarded, and normal TDD resumes starting from the **Red Phase**.
 - A Spike is justified only when it is impossible to define a meaningful failing test due to technical uncertainty or unknown system behavior.
 
+### If a New Test Passes Immediately
+
+If a newly written test passes without any implementation change, do not assume it is correct. Verify it actually exercises the intended behavior:
+
+1. Identify the implementation line most likely responsible for the pass
+2. Temporarily remove that line
+3. Run the **full test suite** (not just the new test)
+
+Then interpret the result:
+
+- **Only the new test fails** — the line was never driven by a prior test. This is accidental over-implementation: delete the line permanently and proceed to the green phase to reintroduce it properly.
+- **Other existing tests also fail** — the line was already legitimately required by prior work. The new test is valid regression coverage. Restore the line; the test is confirmed correct as written.
+
+In both cases, confirm the new test fails for the expected reason before proceeding (the right assertion, not a syntax or import error).
+
 ### General Information
 
 - Sometimes the test output shows as no tests have been run when a new test is failing due to a missing import or constructor. In such cases, allow the agent to create simple stubs. Ask them if they forgot to create a stub if they are stuck.
diff --git a/.copier-answers.yml b/.copier-answers.yml
index 255dfba7..dd692b23 100644
--- a/.copier-answers.yml
+++ b/.copier-answers.yml
@@ -1,5 +1,5 @@
 # Changes here will be overwritten by Copier
-_commit: v0.0.109
+_commit: v0.0.109-4-g8c4c457
 _src_path: gh:LabAutomationAndScreening/copier-base-template.git
 description: Copier template for creating Python libraries and executables
 install_claude_cli: true
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index c2d91ede..0c63929f 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -65,5 +65,5 @@
   "initializeCommand": "sh .devcontainer/initialize-command.sh",
   "onCreateCommand": "sh .devcontainer/on-create-command.sh",
   "postStartCommand": "sh .devcontainer/post-start-command.sh"
-  // Devcontainer context hash (do not manually edit this, it's managed by a pre-commit hook): d77a3ff3 # spellchecker:disable-line
+  // Devcontainer context hash (do not manually edit this, it's managed by a pre-commit hook): 087a93d5 # spellchecker:disable-line
 }
diff --git a/.devcontainer/manual-setup-deps.py b/.devcontainer/manual-setup-deps.py
index 53e59e1b..19a1c15e 100644
--- a/.devcontainer/manual-setup-deps.py
+++ b/.devcontainer/manual-setup-deps.py
@@ -11,6 +11,7 @@
 
 REPO_ROOT_DIR = Path(__file__).parent.parent.resolve()
 ENVS_CONFIG = REPO_ROOT_DIR / ".devcontainer" / "envs.json"
+PULUMI_CLI_INSTALL_SCRIPT = REPO_ROOT_DIR / ".devcontainer" / "install-pulumi-cli.sh"
 UV_PYTHON_ALREADY_CONFIGURED = "UV_PYTHON" in os.environ
 parser = argparse.ArgumentParser(description="Manual setup for dependencies in the repo")
 _ = parser.add_argument(
@@ -140,10 +141,15 @@ def main():
                 and env.lock_file.exists()
                 and '"pulumi"' in env.lock_file.read_text()
             ):
-                _ = subprocess.run(
-                    ["sh", str(REPO_ROOT_DIR / ".devcontainer" / "install-pulumi-cli.sh"), str(env.lock_file)],
-                    check=True,
-                )
+                if not PULUMI_CLI_INSTALL_SCRIPT.exists():
+                    print(
+                        f"Pulumi CLI install script not found at {PULUMI_CLI_INSTALL_SCRIPT}, skipping Pulumi CLI installation"
+                    )
+                else:
+                    _ = subprocess.run(
+                        ["sh", str(PULUMI_CLI_INSTALL_SCRIPT), str(env.lock_file)],
+                        check=True,
+                    )
         elif env.package_manager == PackageManager.PNPM:
             pnpm_command = ["pnpm", "install", "--dir", str(env.path)]
             if env_check_lock:
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index aee9c2d9..969f23eb 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -23,6 +23,7 @@ jobs:
 
   check-skip-duplicate:
     runs-on: ubuntu-24.04
+    timeout-minutes: 2
     outputs:
       should-run: ${{ steps.check.outputs.should-run }}
     steps:
diff --git a/AGENTS.md b/AGENTS.md
index 89ac5111..29308581 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -23,6 +23,7 @@ This project is a Copier template used to generate other copier templates. It is
 - Avoid magic values in comparisons in tests in all languages (like ruff rule PLR2004 specifies)
 - Prefer using random values in tests rather than arbitrary ones (e.g. the faker library, uuids, random.randint) when possible. For enums, pick randomly rather than hardcoding one value.
 - Avoid loops in tests — assert each item explicitly so failures pinpoint the exact element. When verifying a condition across all items in a collection, collect the violations into a list and assert it's empty (e.g., assert [x for x in items if bad_condition(x)] == []).
+- When a test's final assertion is an absence (e.g., element is `null`, list is empty, modal is closed), include a prior presence assertion confirming the expected state existed before the action that removed it. A test whose only assertion is an absence check can pass vacuously if setup silently failed.
 - When asserting a mock or spy was called with specific arguments, always constrain as tightly as possible. In order of preference: (1) assert called exactly once with those args (`assert_called_once_with` in Python, `toHaveBeenCalledExactlyOnceWith` in Vitest/Jest); (2) if multiple calls are expected, assert the total call count and use a positional or last-call assertion (`nthCalledWith`, `lastCalledWith` / `assert_has_calls` with `call_args_list[n]`); (3) plain "called with at any point" (`toHaveBeenCalledWith`, `assert_called_with`) is a last resort only when neither the call count nor the call order can reasonably be constrained.
 
 ### Python Testing
diff --git a/template/.claude/commands/red.md b/template/.claude/commands/red.md
index e6120877..01dc7124 100644
--- a/template/.claude/commands/red.md
+++ b/template/.claude/commands/red.md
@@ -94,6 +94,21 @@ This phase is **not part of the regular TDD workflow** and must only be applied
 - Once sufficient understanding is achieved, all spike code is discarded, and normal TDD resumes starting from the **Red Phase**.
 - A Spike is justified only when it is impossible to define a meaningful failing test due to technical uncertainty or unknown system behavior.
 
+### If a New Test Passes Immediately
+
+If a newly written test passes without any implementation change, do not assume it is correct. Verify it actually exercises the intended behavior:
+
+1. Identify the implementation line most likely responsible for the pass
+2. Temporarily remove that line
+3. Run the **full test suite** (not just the new test)
+
+Then interpret the result:
+
+- **Only the new test fails** — the line was never driven by a prior test. This is accidental over-implementation: delete the line permanently and proceed to the green phase to reintroduce it properly.
+- **Other existing tests also fail** — the line was already legitimately required by prior work. The new test is valid regression coverage. Restore the line; the test is confirmed correct as written.
+
+In both cases, confirm the new test fails for the expected reason before proceeding (the right assertion, not a syntax or import error).
+
 ### General Information
 
 - Sometimes the test output shows as no tests have been run when a new test is failing due to a missing import or constructor. In such cases, allow the agent to create simple stubs. Ask them if they forgot to create a stub if they are stuck.
diff --git a/template/.devcontainer/manual-setup-deps.py b/template/.devcontainer/manual-setup-deps.py
index 53e59e1b..19a1c15e 100644
--- a/template/.devcontainer/manual-setup-deps.py
+++ b/template/.devcontainer/manual-setup-deps.py
@@ -11,6 +11,7 @@
 
 REPO_ROOT_DIR = Path(__file__).parent.parent.resolve()
 ENVS_CONFIG = REPO_ROOT_DIR / ".devcontainer" / "envs.json"
+PULUMI_CLI_INSTALL_SCRIPT = REPO_ROOT_DIR / ".devcontainer" / "install-pulumi-cli.sh"
 UV_PYTHON_ALREADY_CONFIGURED = "UV_PYTHON" in os.environ
 parser = argparse.ArgumentParser(description="Manual setup for dependencies in the repo")
 _ = parser.add_argument(
@@ -140,10 +141,15 @@ def main():
                 and env.lock_file.exists()
                 and '"pulumi"' in env.lock_file.read_text()
             ):
-                _ = subprocess.run(
-                    ["sh", str(REPO_ROOT_DIR / ".devcontainer" / "install-pulumi-cli.sh"), str(env.lock_file)],
-                    check=True,
-                )
+                if not PULUMI_CLI_INSTALL_SCRIPT.exists():
+                    print(
+                        f"Pulumi CLI install script not found at {PULUMI_CLI_INSTALL_SCRIPT}, skipping Pulumi CLI installation"
+                    )
+                else:
+                    _ = subprocess.run(
+                        ["sh", str(PULUMI_CLI_INSTALL_SCRIPT), str(env.lock_file)],
+                        check=True,
+                    )
         elif env.package_manager == PackageManager.PNPM:
             pnpm_command = ["pnpm", "install", "--dir", str(env.path)]
             if env_check_lock:
diff --git a/template/.github/workflows/ci.yaml.jinja b/template/.github/workflows/ci.yaml.jinja
index 9d611dc5..8ad54cbd 100644
--- a/template/.github/workflows/ci.yaml.jinja
+++ b/template/.github/workflows/ci.yaml.jinja
@@ -23,6 +23,7 @@ jobs:
 
   check-skip-duplicate:
     runs-on: {% endraw %}{{ gha_linux_runner }}{% raw %}
+    timeout-minutes: {% endraw %}{{ gha_short_timeout_minutes }}{% raw %}
     permissions:
       contents: read
       pull-requests: read # needed to check if PR exists for current branch
diff --git a/template/AGENTS.md b/template/AGENTS.md
index d6ed3c36..ac6c8f1f 100644
--- a/template/AGENTS.md
+++ b/template/AGENTS.md
@@ -23,6 +23,7 @@ This project is a Python library.
 - Avoid magic values in comparisons in tests in all languages (like ruff rule PLR2004 specifies)
 - Prefer using random values in tests rather than arbitrary ones (e.g. the faker library, uuids, random.randint) when possible. For enums, pick randomly rather than hardcoding one value.
 - Avoid loops in tests — assert each item explicitly so failures pinpoint the exact element. When verifying a condition across all items in a collection, collect the violations into a list and assert it's empty (e.g., assert [x for x in items if bad_condition(x)] == []).
+- When a test's final assertion is an absence (e.g., element is `null`, list is empty, modal is closed), include a prior presence assertion confirming the expected state existed before the action that removed it. A test whose only assertion is an absence check can pass vacuously if setup silently failed.
 - When asserting a mock or spy was called with specific arguments, always constrain as tightly as possible. In order of preference: (1) assert called exactly once with those args (`assert_called_once_with` in Python, `toHaveBeenCalledExactlyOnceWith` in Vitest/Jest); (2) if multiple calls are expected, assert the total call count and use a positional or last-call assertion (`nthCalledWith`, `lastCalledWith` / `assert_has_calls` with `call_args_list[n]`); (3) plain "called with at any point" (`toHaveBeenCalledWith`, `assert_called_with`) is a last resort only when neither the call count nor the call order can reasonably be constrained.
 
 ### Python Testing

From 3b46ec1f8eb561be8d0f880f2dee7a94265fd2a2 Mon Sep 17 00:00:00 2001
From: Eli Fine <eli88fine@gmail.com>
Date: Tue, 31 Mar 2026 20:13:49 +0000
Subject: [PATCH 2/2] tag

---
 .copier-answers.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.copier-answers.yml b/.copier-answers.yml
index dd692b23..a1554af1 100644
--- a/.copier-answers.yml
+++ b/.copier-answers.yml
@@ -1,5 +1,5 @@
 # Changes here will be overwritten by Copier
-_commit: v0.0.109-4-g8c4c457
+_commit: v0.0.110
 _src_path: gh:LabAutomationAndScreening/copier-base-template.git
 description: Copier template for creating Python libraries and executables
 install_claude_cli: true