From b0dbcf2d240e0705706ebb24eb90f686b139bb46 Mon Sep 17 00:00:00 2001 From: Mike McCarty Date: Tue, 3 Feb 2026 17:40:24 -0500 Subject: [PATCH] added a hook for checking secrets with coderabbit gitleaks --- .pre-commit-hooks.yaml | 7 + pyproject.toml | 1 + .../coderabbit_gitleaks.py | 161 +++++++++ .../test_coderabbit_gitleaks.py | 329 ++++++++++++++++++ tests/test_pre_commit.py | 6 + 5 files changed, 504 insertions(+) create mode 100644 src/rapids_pre_commit_hooks/coderabbit_gitleaks.py create mode 100644 tests/rapids_pre_commit_hooks/test_coderabbit_gitleaks.py diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 013cce9..92a0642 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -19,6 +19,13 @@ (?x) ^[.]github/CODEOWNERS$ args: [--fix] +- id: verify-coderabbit-gitleaks + name: verify-coderabbit-gitleaks + description: check for secrets using CodeRabbit CLI Gitleaks + entry: verify-coderabbit-gitleaks + language: python + pass_filenames: false + always_run: true - id: verify-conda-yes name: pass -y/--yes to conda description: make sure that all calls to conda pass -y/--yes diff --git a/pyproject.toml b/pyproject.toml index 35784ac..7f16280 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ alpha-spec = [ [project.scripts] verify-alpha-spec = "rapids_pre_commit_hooks.alpha_spec:main" verify-codeowners = "rapids_pre_commit_hooks.codeowners:main" +verify-coderabbit-gitleaks = "rapids_pre_commit_hooks.coderabbit_gitleaks:main" verify-conda-yes = "rapids_pre_commit_hooks.shell.verify_conda_yes:main" verify-copyright = "rapids_pre_commit_hooks.copyright:main" verify-hardcoded-version = "rapids_pre_commit_hooks.hardcoded_version:main" diff --git a/src/rapids_pre_commit_hooks/coderabbit_gitleaks.py b/src/rapids_pre_commit_hooks/coderabbit_gitleaks.py new file mode 100644 index 0000000..6548006 --- /dev/null +++ b/src/rapids_pre_commit_hooks/coderabbit_gitleaks.py @@ -0,0 +1,161 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import json +import os +import subprocess +import sys +from typing import TYPE_CHECKING + +from rich.console import Console +from rich.markup import escape + +if TYPE_CHECKING: + from typing import Any + + +def run_coderabbit_gitleaks(files: list[str]) -> int: + """Run CodeRabbit CLI Gitleaks scan and check for secrets. + + Args: + files: List of files to scan (passed from pre-commit) + + Returns: + Exit code: 0 if no secrets found, 1 if secrets found or error occurred + """ + console = Console(highlight=False) + + # Allow skipping if CodeRabbit is not installed (for testing/CI) + skip_if_missing = os.getenv("SKIP_CODERABBIT_IF_MISSING", "false").lower() in ( + "true", + "1", + "yes", + ) + + # Check if coderabbit CLI is installed + try: + result = subprocess.run( + ["coderabbit", "--version"], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + if skip_if_missing: + console.print( + "[bold yellow]Warning:[/bold yellow] CodeRabbit CLI is " + "not installed - skipping secret scan" + ) + return 0 + console.print( + "[bold red]Error:[/bold red] CodeRabbit CLI is not installed " + "or not in PATH" + ) + console.print( + "Install it from: https://docs.coderabbit.ai/cli#installation" + ) + return 1 + except FileNotFoundError: + if skip_if_missing: + console.print( + "[bold yellow]Warning:[/bold yellow] CodeRabbit CLI is not " + "installed - skipping secret scan" + ) + return 0 + console.print( + "[bold red]Error:[/bold red] CodeRabbit CLI is not installed" + ) + console.print( + "Install it from: https://docs.coderabbit.ai/cli#installation" + ) + return 1 + + # Run gitleaks scan + console.print("Running CodeRabbit Gitleaks scan...") + try: + result = subprocess.run( + ["coderabbit", "gitleaks", "--format", "json"], + capture_output=True, + text=True, + check=False, + ) + except Exception as e: + console.print(f"[bold red]Error running CodeRabbit:[/bold red] {e}") + return 1 + + # Parse the output + has_secrets = False + if result.returncode != 0: + # Gitleaks returns non-zero when secrets are found + has_secrets = True + + # Try to parse JSON output for detailed information + if result.stdout: + try: + output_data: "Any" = json.loads(result.stdout) + if isinstance(output_data, list) and len(output_data) > 0: + has_secrets = True + console.print( + f"\n[bold red]Found {len(output_data)} potential " + f"secret(s):[/bold red]\n" + ) + for i, finding in enumerate(output_data, 1): + file_path = finding.get("File", "unknown") + line = finding.get("StartLine", "?") + rule_id = finding.get("RuleID", "unknown") + match_text = finding.get("Match", "") + + console.print( + f"{i}. [bold]{escape(file_path)}:{line}[/bold]" + ) + console.print(f" Rule: {escape(rule_id)}") + if match_text: + # Truncate long matches + if len(match_text) > 80: + match_text = match_text[:77] + "..." + console.print(f" Match: {escape(match_text)}") + console.print() + except json.JSONDecodeError: + # If JSON parsing fails, check stderr for error messages + if result.stderr: + console.print( + "[bold yellow]Warning:[/bold yellow] " + "Could not parse Gitleaks output" + ) + console.print(result.stderr) + + if has_secrets: + console.print( + "[bold red]CodeRabbit Gitleaks found potential secrets in your " + "code.[/bold red]" + ) + console.print( + "Please review and remove any sensitive information before " + "committing." + ) + return 1 + + console.print("[bold green]No secrets detected.[/bold green]") + return 0 + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Run CodeRabbit CLI Gitleaks scan to detect secrets" + ) + parser.add_argument( + "files", + nargs="*", + metavar="file", + help="files to check (passed by pre-commit)", + ) + args = parser.parse_args() + + # Run the scan - CodeRabbit Gitleaks scans the entire repository + # regardless of which files are passed + sys.exit(run_coderabbit_gitleaks(args.files)) + + +if __name__ == "__main__": + main() diff --git a/tests/rapids_pre_commit_hooks/test_coderabbit_gitleaks.py b/tests/rapids_pre_commit_hooks/test_coderabbit_gitleaks.py new file mode 100644 index 0000000..a3f5cb8 --- /dev/null +++ b/tests/rapids_pre_commit_hooks/test_coderabbit_gitleaks.py @@ -0,0 +1,329 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + +import json +from unittest.mock import MagicMock, patch + +from rapids_pre_commit_hooks.coderabbit_gitleaks import ( + run_coderabbit_gitleaks, +) + + +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_coderabbit_not_installed(mock_run): + """Test handling when CodeRabbit CLI is not installed.""" + # Simulate coderabbit command not found + mock_run.side_effect = FileNotFoundError() + + exit_code = run_coderabbit_gitleaks([]) + assert exit_code == 1 + + +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_coderabbit_version_check_fails(mock_run): + """Test handling when CodeRabbit version check fails.""" + # Simulate version check returning non-zero + mock_result = MagicMock() + mock_result.returncode = 1 + mock_result.stdout = "" + mock_result.stderr = "command not found" + mock_run.return_value = mock_result + + exit_code = run_coderabbit_gitleaks([]) + assert exit_code == 1 + + +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_no_secrets_found(mock_run): + """Test successful scan with no secrets found.""" + # First call: version check (success) + version_result = MagicMock() + version_result.returncode = 0 + version_result.stdout = "coderabbit version 1.0.0" + + # Second call: gitleaks scan (no secrets) + scan_result = MagicMock() + scan_result.returncode = 0 + scan_result.stdout = "[]" + scan_result.stderr = "" + + mock_run.side_effect = [version_result, scan_result] + + exit_code = run_coderabbit_gitleaks(["file1.py", "file2.py"]) + assert exit_code == 0 + + +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_secrets_found_with_json_output(mock_run): + """Test scan with secrets found and valid JSON output.""" + # First call: version check (success) + version_result = MagicMock() + version_result.returncode = 0 + version_result.stdout = "coderabbit version 1.0.0" + + # Second call: gitleaks scan (secrets found) + secrets = [ + { + "File": "config.py", + "StartLine": 10, + "RuleID": "generic-api-key", + "Match": "api_key = 'sk_test_1234567890'", + }, + { + "File": "credentials.json", + "StartLine": 5, + "RuleID": "aws-access-token", + "Match": "AKIAIOSFODNN7EXAMPLE", + }, + ] + scan_result = MagicMock() + scan_result.returncode = 1 # Non-zero indicates secrets found + scan_result.stdout = json.dumps(secrets) + scan_result.stderr = "" + + mock_run.side_effect = [version_result, scan_result] + + exit_code = run_coderabbit_gitleaks(["file1.py"]) + assert exit_code == 1 + + +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_secrets_found_no_json_output(mock_run): + """Test scan with secrets found but no parseable JSON output.""" + # First call: version check (success) + version_result = MagicMock() + version_result.returncode = 0 + version_result.stdout = "coderabbit version 1.0.0" + + # Second call: gitleaks scan (secrets found, no JSON) + scan_result = MagicMock() + scan_result.returncode = 1 # Non-zero indicates secrets found + scan_result.stdout = "" + scan_result.stderr = "Error: secrets detected" + + mock_run.side_effect = [version_result, scan_result] + + exit_code = run_coderabbit_gitleaks([]) + assert exit_code == 1 + + +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_secrets_with_long_match_text(mock_run): + """Test that long match text is properly truncated.""" + # First call: version check (success) + version_result = MagicMock() + version_result.returncode = 0 + version_result.stdout = "coderabbit version 1.0.0" + + # Second call: gitleaks scan with very long match + long_match = "x" * 100 + secrets = [ + { + "File": "test.py", + "StartLine": 1, + "RuleID": "generic-api-key", + "Match": long_match, + } + ] + scan_result = MagicMock() + scan_result.returncode = 1 + scan_result.stdout = json.dumps(secrets) + scan_result.stderr = "" + + mock_run.side_effect = [version_result, scan_result] + + exit_code = run_coderabbit_gitleaks(["test.py"]) + assert exit_code == 1 + + +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_gitleaks_scan_exception(mock_run): + """Test handling of exceptions during gitleaks scan.""" + # First call: version check (success) + version_result = MagicMock() + version_result.returncode = 0 + version_result.stdout = "coderabbit version 1.0.0" + + # Second call: gitleaks scan raises exception + mock_run.side_effect = [ + version_result, + Exception("Unexpected error"), + ] + + exit_code = run_coderabbit_gitleaks([]) + assert exit_code == 1 + + +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_invalid_json_output(mock_run): + """Test handling of invalid JSON output from gitleaks.""" + # First call: version check (success) + version_result = MagicMock() + version_result.returncode = 0 + version_result.stdout = "coderabbit version 1.0.0" + + # Second call: gitleaks scan with invalid JSON + scan_result = MagicMock() + scan_result.returncode = 1 + scan_result.stdout = "not valid json {[" + scan_result.stderr = "some error occurred" + + mock_run.side_effect = [version_result, scan_result] + + exit_code = run_coderabbit_gitleaks([]) + assert exit_code == 1 + + +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_empty_secrets_list(mock_run): + """Test scan with empty secrets list (no secrets).""" + # First call: version check (success) + version_result = MagicMock() + version_result.returncode = 0 + version_result.stdout = "coderabbit version 1.0.0" + + # Second call: gitleaks scan (empty list) + scan_result = MagicMock() + scan_result.returncode = 0 + scan_result.stdout = "[]" + scan_result.stderr = "" + + mock_run.side_effect = [version_result, scan_result] + + exit_code = run_coderabbit_gitleaks(["file1.py"]) + assert exit_code == 0 + + +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_secrets_with_minimal_fields(mock_run): + """Test secrets output with minimal required fields.""" + # First call: version check (success) + version_result = MagicMock() + version_result.returncode = 0 + version_result.stdout = "coderabbit version 1.0.0" + + # Second call: gitleaks scan with minimal fields + secrets = [ + { + "File": "secret.txt", + # Missing StartLine and RuleID + } + ] + scan_result = MagicMock() + scan_result.returncode = 1 + scan_result.stdout = json.dumps(secrets) + scan_result.stderr = "" + + mock_run.side_effect = [version_result, scan_result] + + exit_code = run_coderabbit_gitleaks(["secret.txt"]) + assert exit_code == 1 + + +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_multiple_secrets_in_same_file(mock_run): + """Test multiple secrets detected in the same file.""" + # First call: version check (success) + version_result = MagicMock() + version_result.returncode = 0 + version_result.stdout = "coderabbit version 1.0.0" + + # Second call: gitleaks scan with multiple secrets + secrets = [ + { + "File": "config.py", + "StartLine": 10, + "RuleID": "generic-api-key", + "Match": "api_key = 'secret1'", + }, + { + "File": "config.py", + "StartLine": 20, + "RuleID": "generic-secret", + "Match": "password = 'secret2'", + }, + { + "File": "other.py", + "StartLine": 5, + "RuleID": "aws-access-token", + "Match": "aws_token", + }, + ] + scan_result = MagicMock() + scan_result.returncode = 1 + scan_result.stdout = json.dumps(secrets) + scan_result.stderr = "" + + mock_run.side_effect = [version_result, scan_result] + + exit_code = run_coderabbit_gitleaks(["config.py", "other.py"]) + assert exit_code == 1 + + +@patch.dict( + "os.environ", {"SKIP_CODERABBIT_IF_MISSING": "true"}, clear=False +) +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_skip_when_not_installed_and_env_var_set(mock_run): + """Test that hook passes with warning when skip flag is set.""" + # Simulate coderabbit command not found + mock_run.side_effect = FileNotFoundError() + + exit_code = run_coderabbit_gitleaks([]) + assert exit_code == 0 + + +@patch.dict( + "os.environ", {"SKIP_CODERABBIT_IF_MISSING": "1"}, clear=False +) +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_skip_with_numeric_env_var(mock_run): + """Test that hook passes with warning when skip flag is '1'.""" + # Simulate coderabbit command not found + mock_run.side_effect = FileNotFoundError() + + exit_code = run_coderabbit_gitleaks([]) + assert exit_code == 0 + + +@patch.dict( + "os.environ", {"SKIP_CODERABBIT_IF_MISSING": "false"}, clear=False +) +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_no_skip_when_env_var_is_false(mock_run): + """Test that hook fails when skip flag is explicitly false.""" + # Simulate coderabbit command not found + mock_run.side_effect = FileNotFoundError() + + exit_code = run_coderabbit_gitleaks([]) + assert exit_code == 1 + + +@patch("rapids_pre_commit_hooks.coderabbit_gitleaks.subprocess.run") +def test_no_files_passed(mock_run): + """Test running scan with no files (scans entire repository).""" + # First call: version check (success) + version_result = MagicMock() + version_result.returncode = 0 + version_result.stdout = "coderabbit version 1.0.0" + + # Second call: gitleaks scan (no secrets) + scan_result = MagicMock() + scan_result.returncode = 0 + scan_result.stdout = "[]" + scan_result.stderr = "" + + mock_run.side_effect = [version_result, scan_result] + + exit_code = run_coderabbit_gitleaks([]) + assert exit_code == 0 + + # Verify that gitleaks was called without file arguments + assert mock_run.call_count == 2 + gitleaks_call = mock_run.call_args_list[1] + assert gitleaks_call[0][0] == [ + "coderabbit", + "gitleaks", + "--format", + "json", + ] diff --git a/tests/test_pre_commit.py b/tests/test_pre_commit.py index f533348..d6cce8c 100644 --- a/tests/test_pre_commit.py +++ b/tests/test_pre_commit.py @@ -122,6 +122,11 @@ def list_files(top): example_dir = os.path.join(EXAMPLES_DIR, hook_name, expected_status) main_dir = os.path.join(example_dir, "main") + + # Skip test if example directory doesn't exist + if not os.path.exists(main_dir): + pytest.skip(f"No {expected_status} example for {hook_name}") + shutil.copytree(main_dir, git_repo.working_tree_dir, dirs_exist_ok=True) try: @@ -200,5 +205,6 @@ def list_files(top): "RAPIDS_COPYRIGHT_FORCE_SPDX": "0", "TARGET_BRANCH": "main", "RAPIDS_TEST_YEAR": "2024", + "SKIP_CODERABBIT_IF_MISSING": "true", }, )