From 6e5718f8ce7a5f2d43106c055f2a9d825a8c996b Mon Sep 17 00:00:00 2001 From: Jordan Padams Date: Thu, 2 Jul 2026 15:31:55 -0700 Subject: [PATCH 1/3] Standardize secrets detection to use script-based approach Replace inline slim-detect-secrets hook and duplicated --exclude-files args with scripts/detect_secrets_baseline.sh, which centralizes all scan logic and reads per-repo exclusions from .detect-secrets-ignore. Adds detect-secrets~=1.5.0 to dev extras so it installs with the venv. Closes #145 Co-Authored-By: Claude Sonnet 4.6 --- .detect-secrets-ignore | 11 +++ .github/workflows/secrets-detection.yaml | 63 +++---------- .pre-commit-config.yaml | 26 ++---- CLAUDE.md | 106 ++++++++++++++++++++++ README.md | 31 ++----- docs/wiki-detect-secrets.md | 90 +++++++++++++++++++ scripts/detect_secrets_baseline.sh | 108 +++++++++++++++++++++++ setup.cfg | 1 + 8 files changed, 342 insertions(+), 94 deletions(-) create mode 100644 .detect-secrets-ignore create mode 100644 CLAUDE.md create mode 100644 docs/wiki-detect-secrets.md create mode 100755 scripts/detect_secrets_baseline.sh diff --git a/.detect-secrets-ignore b/.detect-secrets-ignore new file mode 100644 index 0000000..904a107 --- /dev/null +++ b/.detect-secrets-ignore @@ -0,0 +1,11 @@ +# Per-repo secret detection exclusions for your_package_name. +# One regex per line; lines starting with # are comments. +# These supplement the global exclusions in scripts/detect_secrets_baseline.sh. + +# Test fixture directories that may contain sample/fake credentials +.*/test/data/.* +.*/tests/data/.* + +# Documentation files contain placeholder/example credentials only +README\.md +CHANGELOG\.md diff --git a/.github/workflows/secrets-detection.yaml b/.github/workflows/secrets-detection.yaml index f10a872..25413f8 100644 --- a/.github/workflows/secrets-detection.yaml +++ b/.github/workflows/secrets-detection.yaml @@ -16,63 +16,22 @@ jobs: name: Checkout code uses: actions/checkout@v6 - - name: Install necessary packages - run: | - pip install git+https://github.com/NASA-AMMOS/slim-detect-secrets.git@exp - pip install jq + name: Install detect-secrets + run: pip install detect-secrets~=1.5.0 - - name: Create an initial .secrets.baseline if .secrets.baseline does not exist + name: Verify .secrets.baseline exists run: | if [ ! -f .secrets.baseline ]; then - # This generated baseline file will only be temporarily available on the GitHub side and will not appear in the user's local files. - # Scanning an empty folder to generate an initial .secrets.baseline without secrets in the results. - echo "⚠️ No existing .secrets.baseline file detected. Creating a new blank baseline file." - mkdir empty-dir - detect-secrets scan empty-dir > .secrets.baseline - echo "✅ Blank .secrets.baseline file created successfully." - rm -r empty-dir - else - echo "✅ Existing .secrets.baseline file detected. No new baseline file will be created." + echo "❌ No .secrets.baseline found." >&2 + echo "" >&2 + echo "Run the following locally, audit the results, then commit the baseline:" >&2 + echo " scripts/detect_secrets_baseline.sh scan" >&2 + echo " scripts/detect_secrets_baseline.sh audit" >&2 + exit 1 fi + echo "✅ .secrets.baseline found." - name: Scan repository for secrets - run: | - # scripts to scan repository for new secrets - - # backup the list of known secrets - cp .secrets.baseline .secrets.new - - # find the secrets in the repository - detect-secrets scan --disable-plugin AbsolutePathDetectorExperimental --baseline .secrets.new \ - --exclude-files '\.secrets..*' \ - --exclude-files '\.git.*' \ - --exclude-files '\.mypy_cache' \ - --exclude-files '\.pytest_cache' \ - --exclude-files '\.tox' \ - --exclude-files '\.venv' \ - --exclude-files 'venv' \ - --exclude-files 'dist' \ - --exclude-files 'build' \ - --exclude-files '.*\.egg-info' \ - --exclude-files '\.pre-commit-config\.yaml' - - # if there is any difference between the known and newly detected secrets, break the build - # Function to compare secrets without listing them - compare_secrets() { diff <(jq -r '.results | keys[] as $key | "\($key),\(.[$key] | .[] | .hashed_secret)"' "$1" | sort) <(jq -r '.results | keys[] as $key | "\($key),\(.[$key] | .[] | .hashed_secret)"' "$2" | sort) >/dev/null; } - - # Check if there's any difference between the known and newly detected secrets - if ! compare_secrets .secrets.baseline .secrets.new; then - echo "⚠️ Attention Required! ⚠️" >&2 - echo "New secrets have been detected in your recent commit. Due to security concerns, we cannot display detailed information here and we cannot proceed until this issue is resolved." >&2 - echo "" >&2 - echo "Please follow the steps below on your local machine to reveal and handle the secrets:" >&2 - echo "" >&2 - echo "1️⃣ Run the 'detect-secrets' tool on your local machine. This tool will identify and clean up the secrets. You can find detailed instructions at this link: https://nasa-ammos.github.io/slim/continuous-testing/starter-kits/#detect-secrets" >&2 - echo "" >&2 - echo "2️⃣ After cleaning up the secrets, commit your changes and re-push your update to the repository." >&2 - echo "" >&2 - echo "Your efforts to maintain the security of our codebase are greatly appreciated!" >&2 - exit 1 - fi + run: scripts/detect_secrets_baseline.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9bcd298..1f9b7d5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -56,24 +56,10 @@ repos: stages: [push] pass_filenames: false -- repo: https://github.com/NASA-AMMOS/slim-detect-secrets - # using commit id for now, will change to tag when official version is released - rev: 91e097ad4559ae6ab785c883dc5ed989202c7fbe +- repo: local hooks: - - id: detect-secrets - args: - - '--disable-plugin' - - 'AbsolutePathDetectorExperimental' - - '--baseline' - - '.secrets.baseline' - - --exclude-files '\.secrets..*' - - --exclude-files '\.git.*' - - --exclude-files '\.mypy_cache' - - --exclude-files '\.pytest_cache' - - --exclude-files '\.tox' - - --exclude-files '\.venv' - - --exclude-files 'venv' - - --exclude-files 'dist' - - --exclude-files 'build' - - --exclude-files '.*\.egg-info' - - --exclude-files '\.pre-commit-config\.yaml' + - id: detect-secrets + name: detect-secrets + entry: scripts/detect_secrets_baseline.sh + language: script + pass_filenames: false diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..fa746c3 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,106 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Purpose + +This is NASA-PDS's template repository for new Python projects. When working in a repo created from this template, the placeholder `your_package_name` must be replaced with the actual module name throughout `setup.cfg`, `src/pds/`, `tests/`, and other files. + +## Commands + +### Setup + +```bash +python -m venv venv +source venv/bin/activate +pip install --editable '.[dev]' +``` + +Or via tox: + +```bash +tox --devenv venv -e dev +``` + +### Testing + +```bash +pytest # run all tests +pytest tests/path/test_foo.py # run a single test file +pytest -k "test_name" # run a single test by name +ptw # watch mode +``` + +Tests run in parallel by default (`--numprocesses auto`) with coverage reporting to XML and terminal. + +### Linting + +```bash +tox -e lint # run all linters (flake8, mypy, pre-commit hooks) +flake8 src # flake8 only +mypy src # type-checking only +``` + +### Full build (tests + lint + docs) + +```bash +tox +``` + +### Documentation + +```bash +sphinx-build docs/source docs/build +# output at docs/build/index.html +``` + +### Build package + +```bash +pip install build +python -m build . +``` + +### Secrets detection + +```bash +scripts/detect_secrets_baseline.sh scan # regenerate .secrets.baseline +scripts/detect_secrets_baseline.sh audit # interactively review/classify detected secrets +scripts/detect_secrets_baseline.sh # check for new secrets vs baseline (run by pre-commit) +``` + +Per-repo file exclusions go in `.detect-secrets-ignore` (one regex per line). Global exclusions (`.git`, `venv`, `dist`, etc.) are baked into the script. + +## Architecture + +### Package layout + +Source lives under `src/pds//` using a [PEP 420 namespace package](https://peps.python.org/pep-0420/) — `src/pds/__init__.py` is intentionally minimal (no `__path__` manipulation) to support the `pds.*` namespace shared across multiple PDS Python packages. + +Version is read at import time from `src/pds//VERSION.txt` via `importlib.resources`, not hardcoded. + +Entry points (CLI scripts) are declared in `setup.cfg` under `[options.entry_points] console_scripts`. + +### Tests + +Tests go in `tests/pds//` mirroring the source tree. The `[tool:pytest]` section in `setup.cfg` configures coverage to report on the `pds` namespace. + +### CI/CD + +Two standard GitHub Actions workflows drive releases via [NASA-PDS/roundup-action](https://github.com/NASA-PDS/roundup-action): + +- **`unstable-cicd.yaml`** — triggers on push to `main`; publishes a SNAPSHOT release to Test PyPI +- **`stable-cicd.yaml`** — triggers on push to `release/` branches; publishes stable releases to PyPI + +Required repository secrets: `ADMIN_GITHUB_TOKEN`, `TEST_PYPI_USERNAME`, `TEST_PYPI_PASSWORD`, `SONAR_TOKEN`. + +### Code style + +- **flake8** enforces PEP8 + docstrings (Google convention) + bugbear; max line length 120 +- **mypy** enforces type annotations across `src/` +- **black** is configured (`pyproject.toml`) but disabled in pre-commit due to conflict with `reorder-python-imports` +- Pre-commit hooks run mypy + flake8 on commit; pytest runs on push + +### Logging + +Use `logging.getLogger(__name__)` — never `print()` for runtime output. diff --git a/README.md b/README.md index 47b8e93..ac7eda3 100644 --- a/README.md +++ b/README.md @@ -72,29 +72,18 @@ Install in editable mode and with extra developer dependencies into your virtual Make a baseline for any secrets (email addresses, passwords, API keys, etc.) in the repository: - detect-secrets scan . \ - --all-files \ - --exclude-files '\.secrets..*' \ - --exclude-files '\.git.*' \ - --exclude-files '\.mypy_cache' \ - --exclude-files '\.pytest_cache' \ - --exclude-files '\.tox' \ - --exclude-files '\.venv' \ - --exclude-files 'venv' \ - --exclude-files 'dist' \ - --exclude-files 'build' \ - --exclude-files '.*\.egg-info' \ - --exclude-files '\.pre-commit-config\.yaml' \ - > .secrets.baseline - -Review the secrets to determine which should be allowed and which are false positives: - - detect-secrets audit .secrets.baseline - -Please remove any secrets that should not be seen by the public. You can then add the baseline file to the commit: + scripts/detect_secrets_baseline.sh scan + +Review and classify each detected secret (mark as `is_secret: true/false`): + + scripts/detect_secrets_baseline.sh audit + +Commit the baseline: git add .secrets.baseline +To exclude additional files specific to this repo from scanning, add regex patterns (one per line) to `.detect-secrets-ignore`. Global exclusions (`.git`, `venv`, `dist`, etc.) are already handled by the script. + Then, configure the `pre-commit` hooks: pre-commit install @@ -104,8 +93,6 @@ Then, configure the `pre-commit` hooks: These hooks then will check for any future commits that might contain secrets. They also check code formatting, PEP8 compliance, type hints, etc. -👉 **Note:** A one time setup is required both to support `detect-secrets` and in your global Git configuration. See [the wiki entry on Secrets](https://github.com/NASA-PDS/nasa-pds.github.io/wiki/Git-and-Github-Guide#detect-secrets) to learn how. - ### Packaging diff --git a/docs/wiki-detect-secrets.md b/docs/wiki-detect-secrets.md new file mode 100644 index 0000000..fb935c1 --- /dev/null +++ b/docs/wiki-detect-secrets.md @@ -0,0 +1,90 @@ +## Detect Secrets + +### Intro + +The Planetary Data System's Engineering Node uses [detect-secrets](https://github.com/Yelp/detect-secrets) to help prevent committing sensitive information—passwords, API keys, tokens, hostnames, email addresses, and high-entropy strings—to a repository. Unlike a simple pattern matcher, it uses entropy analysis to surface randomized strings that may be credentials. + +Detect Secrets is already integrated into the [Python template repository](https://github.com/NASA-PDS/template-repo-python/) and the [Java template repository](https://github.com/NASA-PDS/template-repo-java/), so creating new repositories from those templates gives you the support for Detect Secrets out of the box. + +**However**, you must manually install the `detect-secrets` command-line tool and make it available on your shell's `PATH` in order to support pre-commit hooks and to create or update the `.secrets.baseline` file for your repository. + +### Installation + +The tool is written in Python, so you'll also need Python. We recommend a dedicated virtual environment: + +```bash +python3 -m venv ~/Tools/detect-secrets +~/Tools/detect-secrets/bin/pip install detect-secrets~=1.5.0 +``` + +Then add `~/Tools/detect-secrets/bin` to your `PATH`. + +### How it works in PDS repos + +Each PDS Python repository ships two files that together drive secrets scanning: + +| File | Purpose | +|---|---| +| `scripts/detect_secrets_baseline.sh` | Single source of truth for scan arguments and check logic. Run it directly or via pre-commit. | +| `.detect-secrets-ignore` | Per-repo file exclusions (one regex per line, `#` for comments). Supplements the global exclusions baked into the script. | + +The pre-commit hook runs `scripts/detect_secrets_baseline.sh` (no arguments) on every commit. The GitHub Actions workflow `secrets-detection.yaml` runs the same script on every push and pull request to `main`. + +### Create or Update the Secrets Baseline + +Each repo must have a `.secrets.baseline` file at the root. This catalog tells detect-secrets which findings are known false positives (example API keys, email addresses we intentionally include, etc.). + +**Scan and generate a new baseline:** + +```bash +scripts/detect_secrets_baseline.sh scan +``` + +**Review and classify each finding** (mark each as `is_secret: true` or `is_secret: false`): + +```bash +scripts/detect_secrets_baseline.sh audit +``` + +**Commit the baseline:** + +```bash +git add .secrets.baseline +``` + +### Excluding files + +To exclude additional files from scanning in a specific repo, add regex patterns to `.detect-secrets-ignore`, one per line: + +``` +# Example: ignore test fixture directories +.*/tests/data/.* + +# Example: ignore generated documentation +docs/build/.* +``` + +Global exclusions (`.git`, `venv`, `dist`, `build`, `*.egg-info`, etc.) are already handled by the script and do not need to be repeated here. + +### Check for new secrets manually + +```bash +scripts/detect_secrets_baseline.sh +``` + +This is the same check the pre-commit hook runs. It will: +1. Fail if any entries in `.secrets.baseline` have not been audited (no `is_secret` field). +2. Fail if any new secrets are detected that are not already in the baseline. + +### Configuring pre-commit hooks + +After generating and committing your baseline, install the pre-commit hooks: + +```bash +pre-commit install +pre-commit install -t pre-push +pre-commit install -t prepare-commit-msg +pre-commit install -t commit-msg +``` + +From that point on, the detect-secrets check runs automatically on every `git commit`. diff --git a/scripts/detect_secrets_baseline.sh b/scripts/detect_secrets_baseline.sh new file mode 100755 index 0000000..17b43e6 --- /dev/null +++ b/scripts/detect_secrets_baseline.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# Single source of truth for detect-secrets arguments. +# Per-repo exclusions go in .detect-secrets-ignore (one regex per line, # for comments). +# +# Usage: +# scripts/detect_secrets_baseline.sh scan # Regenerate .secrets.baseline +# scripts/detect_secrets_baseline.sh audit # Interactively audit .secrets.baseline +# scripts/detect_secrets_baseline.sh # Check for new secrets vs baseline +set -e + +# Prefer venv's detect-secrets over system install +if [ -f "venv/bin/detect-secrets" ]; then + DETECT_SECRETS="venv/bin/detect-secrets" +elif [ -f ".venv/bin/detect-secrets" ]; then + DETECT_SECRETS=".venv/bin/detect-secrets" +else + DETECT_SECRETS="detect-secrets" +fi + +# Global excludes applied in every repo +GLOBAL_EXCLUDES=( + '\.secrets\..*' + '\.git.*' + '\.pre-commit-config\.yaml' + '\.mypy_cache' + '\.pytest_cache' + '\.tox' + '\.venv' + 'venv' + 'dist' + 'build' + '.*\.egg-info' + 'scripts/detect_secrets_baseline\.sh' +) + +EXCLUDE_ARGS=() +for pat in "${GLOBAL_EXCLUDES[@]}"; do + EXCLUDE_ARGS+=(--exclude-files "$pat") +done + +# Per-repo excludes from .detect-secrets-ignore (one regex per line, # comments ok) +if [ -f .detect-secrets-ignore ]; then + while IFS= read -r line || [ -n "$line" ]; do + [[ "$line" =~ ^[[:space:]]*# ]] && continue + [[ -z "${line// }" ]] && continue + EXCLUDE_ARGS+=(--exclude-files "$line") + done < .detect-secrets-ignore +fi + +compare_secrets() { + diff \ + <(python3 -c " +import json, sys +with open(sys.argv[1]) as f: data = json.load(f) +lines = [f\"{k},{s['hashed_secret']}\" for k, v in data.get('results', {}).items() for s in v] +print('\n'.join(sorted(lines))) +" "$1") \ + <(python3 -c " +import json, sys +with open(sys.argv[1]) as f: data = json.load(f) +lines = [f\"{k},{s['hashed_secret']}\" for k, v in data.get('results', {}).items() for s in v] +print('\n'.join(sorted(lines))) +" "$2") \ + >/dev/null +} + +if [ "$1" = "scan" ]; then + $DETECT_SECRETS scan "${EXCLUDE_ARGS[@]}" > .secrets.baseline + echo "Updated .secrets.baseline" + echo "Next step: run 'scripts/detect_secrets_baseline.sh audit' to review and classify detected secrets." +elif [ "$1" = "audit" ]; then + $DETECT_SECRETS audit .secrets.baseline +else + # Check 1: Fail if any secrets in the baseline have not been audited + unaudited=$(python3 -c " +import json, sys +with open('.secrets.baseline') as f: data = json.load(f) +count = sum(1 for v in data.get('results', {}).values() for s in v if 'is_secret' not in s) +print(count) +") + if [ "$unaudited" -gt 0 ]; then + echo "⚠️ Attention Required! ⚠️" >&2 + echo "$unaudited secret(s) in .secrets.baseline have not been audited." >&2 + echo "Run 'scripts/detect_secrets_baseline.sh audit' to review and classify each detected secret." >&2 + exit 1 + fi + + # Check 2: Fail if any new secrets are detected that are not in the baseline + cp .secrets.baseline .secrets.new + $DETECT_SECRETS scan "${EXCLUDE_ARGS[@]}" --baseline .secrets.new + + if ! compare_secrets .secrets.baseline .secrets.new; then + echo "⚠️ Attention Required! ⚠️" >&2 + echo "New secrets have been detected in your recent commit. Due to security concerns, we cannot display detailed information here and we cannot proceed until this issue is resolved." >&2 + echo "" >&2 + echo "Please follow the steps below on your local machine to reveal and handle the secrets:" >&2 + echo "" >&2 + echo "1️⃣ Run the 'detect-secrets' tool on your local machine. This tool will identify and clean up the secrets. You can find detailed instructions at this link: https://nasa-ammos.github.io/slim/continuous-testing/starter-kits/#detect-secrets" >&2 + echo "" >&2 + echo "2️⃣ After cleaning up the secrets, commit your changes and re-push your update to the repository." >&2 + echo "" >&2 + echo "Your efforts to maintain the security of our codebase are greatly appreciated!" >&2 + rm -f .secrets.new + exit 1 + fi + + rm -f .secrets.new +fi diff --git a/setup.cfg b/setup.cfg index 193a729..4069364 100644 --- a/setup.cfg +++ b/setup.cfg @@ -70,6 +70,7 @@ dev = pytest-watch~=4.2.0 pytest-xdist~=3.3.1 pre-commit~=3.3.3 + detect-secrets~=1.5.0 sphinx~=8.2.3 sphinx-rtd-theme~=3.0.2 sphinxcontrib-googleanalytics~=0.5 From f0b90346614f739be4e7f1870c34530c52be8b12 Mon Sep 17 00:00:00 2001 From: Jordan Padams Date: Thu, 2 Jul 2026 15:34:45 -0700 Subject: [PATCH 2/3] Add template notice to CLAUDE.md Remind users who create repos from this template to update CLAUDE.md for their specific project before using it with Claude Code. Co-Authored-By: Claude Sonnet 4.6 --- CLAUDE.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index fa746c3..53238f6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -2,6 +2,8 @@ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. +> **Template repo notice:** This file was generated for `template-repo-python`. If you created a new repo from this template, update this file to reflect your project's actual package name, commands, and architecture before using it. + ## Purpose This is NASA-PDS's template repository for new Python projects. When working in a repo created from this template, the placeholder `your_package_name` must be replaced with the actual module name throughout `setup.cfg`, `src/pds/`, `tests/`, and other files. From fc4d754df93a57cdd101d319c1fde0ec7bfe681a Mon Sep 17 00:00:00 2001 From: Jordan Padams Date: Thu, 2 Jul 2026 17:22:34 -0700 Subject: [PATCH 3/3] Update secrets baseline --- .secrets.baseline | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/.secrets.baseline b/.secrets.baseline index 650a7cd..32cf433 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -1,18 +1,12 @@ { - "version": "1.4.0", + "version": "1.5.0", "plugins_used": [ - { - "name": "AbsolutePathDetectorExperimental" - }, { "name": "ArtifactoryDetector" }, { "name": "AWSKeyDetector" }, - { - "name": "AWSSensitiveInfoDetectorExperimental" - }, { "name": "AzureStorageKeyDetector" }, @@ -30,10 +24,10 @@ "name": "DiscordBotTokenDetector" }, { - "name": "EmailAddressDetector" + "name": "GitHubTokenDetector" }, { - "name": "GitHubTokenDetector" + "name": "GitLabTokenDetector" }, { "name": "HexHighEntropyString", @@ -61,9 +55,15 @@ { "name": "NpmDetector" }, + { + "name": "OpenAIDetector" + }, { "name": "PrivateKeyDetector" }, + { + "name": "PypiTokenDetector" + }, { "name": "SendGridDetector" }, @@ -79,6 +79,9 @@ { "name": "StripeDetector" }, + { + "name": "TelegramBotTokenDetector" + }, { "name": "TwilioKeyDetector" } @@ -125,7 +128,7 @@ { "path": "detect_secrets.filters.regex.should_exclude_file", "pattern": [ - "\\.secrets..*", + "\\.secrets\\..*", "\\.git.*", "\\.pre-commit-config\\.yaml", "\\.mypy_cache", @@ -135,21 +138,26 @@ "venv", "dist", "build", - ".*\\.egg-info" + ".*\\.egg-info", + "scripts/detect_secrets_baseline\\.sh", + ".*/test/data/.*", + ".*/tests/data/.*", + "README\\.md", + "CHANGELOG\\.md" ] } ], "results": { - "setup.cfg": [ + "CLAUDE.md": [ { - "type": "Email Address", - "filename": "setup.cfg", - "hashed_secret": "3a6d7aa49a8e4a2fe32a5cd0e53da9cb96bd8d29", + "type": "Secret Keyword", + "filename": "CLAUDE.md", + "hashed_secret": "17af698d29702c0d4121498fa6ed3c8346944b42", "is_verified": false, - "line_number": 22, + "line_number": 97, "is_secret": false } ] }, - "generated_at": "2024-10-02T16:13:34Z" + "generated_at": "2026-07-03T00:22:23Z" }