cloudexplain · CodeMaster4711 · Nov 3, 2025 · Nov 3, 2025 · Nov 3, 2025 · Nov 3, 2025
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -0,0 +1,112 @@
+name: Linting
+
+on:
+  push:
+    branches:
+      - main
+      - feat/ci-pipeline-4
+  pull_request:
+    branches:
+      - main
+
+permissions:
+  checks: write
+  contents: write
+
+jobs:
+  lint:
+    name: Lint ${{ matrix.language }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - language: Python
+            setup: python
+          - language: Rust
+            setup: rust
+    steps:
+      - name: Check out Git repository
+        uses: actions/checkout@v4
+
+      # Python Setup
+      - name: Set up Python
+        if: matrix.setup == 'python'
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.x'
+
+      - name: Install Python dependencies
+        if: matrix.setup == 'python'
+        run: |
+          python -m pip install --upgrade pip
+          pip install black flake8 isort
+
+      - name: Auto-format with black
+        if: matrix.setup == 'python'
+        run: black .
+
+      - name: Sort imports with isort
+        if: matrix.setup == 'python'
+        run: isort .
+
+      - name: Lint with flake8
+        if: matrix.setup == 'python'
+        run: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+
+      # Rust Setup
+      - name: Install Rust Toolchain
+        if: matrix.setup == 'rust'
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          components: clippy, rustfmt
+
+      - name: Auto-format with rustfmt
+        if: matrix.setup == 'rust'
+        run: cargo fmt --all
+        working-directory: ./priors
+
+      - name: Run clippy with auto-fix
+        if: matrix.setup == 'rust'
+        run: cargo clippy --all-targets --all-features --fix --allow-dirty -- -D warnings
+        working-directory: ./priors
+
+  commit:
+    name: Commit all changes
+    needs: lint
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out Git repository
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.head_ref }}
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.x'
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install black flake8 isort
+
+      - name: Install Rust Toolchain
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          components: clippy, rustfmt
+
+      - name: Run all linters
+        run: |
+          black .
+          isort .
+          cd priors
+          cargo fmt --all
+          cargo clippy --all-targets --all-features --fix --allow-dirty --allow-staged -- -D warnings
+
+      - name: Commit changes
+        uses: stefanzweifel/git-auto-commit-action@v5
+        with:
+          commit_message: "style: auto-format with linters"
+          commit_user_name: "github-actions[bot]"
+          commit_user_email: "github-actions[bot]@users.noreply.github.com"
+          skip_dirty_check: false
diff --git a/benchmark.py b/benchmark.py
@@ -1,27 +1,31 @@
 #!/usr/bin/env python3
 
+import os
 import time
+
 import numpy as np
 import psutil
-import os
 
 try:
     import priors
+
     HAS_PRIORS = True
 except ImportError:
     print("❌ priors not installed")
     exit(1)
 
 try:
-    from mlxtend.frequent_patterns import fpgrowth as mlxtend_fpgrowth
     import pandas as pd
+    from mlxtend.frequent_patterns import fpgrowth as mlxtend_fpgrowth
+
     HAS_MLXTEND = True
 except ImportError:
     HAS_MLXTEND = False
     print("⚠️  mlxtend not installed")
 
 try:
     from efficient_apriori import apriori as efficient_apriori
+
     HAS_EFFICIENT_APRIORI = True
 except ImportError:
     HAS_EFFICIENT_APRIORI = False
@@ -33,7 +37,7 @@ def get_memory_mb():
 
 
 def generate_data(num_tx, num_items, avg_size, density):
-    print(f"  Generating {num_tx:,} × {num_items} transactions...", end='', flush=True)
+    print(f"  Generating {num_tx:,} × {num_items} transactions...", end="", flush=True)
     np.random.seed(42)
     data = np.zeros((num_tx, num_items), dtype=np.int32)
 
@@ -48,7 +52,7 @@ def generate_data(num_tx, num_items, avg_size, density):
 
 
 def benchmark(name, func, data, min_sup):
-    print(f"  {name:25s}", end='', flush=True)
+    print(f"  {name:25s}", end="", flush=True)
     mem_start = get_memory_mb()
 
     try:
@@ -83,20 +87,22 @@ def test_priors_lazy(data, sup):
     chunk_size = 5000
 
     for i in range(0, data.shape[0], chunk_size):
-        priors.lazy_count_pass(pid, data[i:i+chunk_size])
+        priors.lazy_count_pass(pid, data[i : i + chunk_size])
 
     priors.lazy_finalize_counts(pid, sup)
 
     for i in range(0, data.shape[0], chunk_size):
-        priors.lazy_build_pass(pid, data[i:i+chunk_size])
+        priors.lazy_build_pass(pid, data[i : i + chunk_size])
 
     result = priors.lazy_mine_patterns(pid, sup)
     priors.lazy_cleanup(pid)
     return result
 
 
 def test_mlxtend(data, sup):
-    df = pd.DataFrame(data.astype(bool), columns=[f"i{i}" for i in range(data.shape[1])])
+    df = pd.DataFrame(
+        data.astype(bool), columns=[f"i{i}" for i in range(data.shape[1])]
+    )
     return mlxtend_fpgrowth(df, min_support=sup, use_colnames=True)
 
 
@@ -106,73 +112,117 @@ def test_efficient_apriori(data, sup):
 
 
 configs = [
-    {'name': '10K × 50', 'tx': 10_000, 'items': 50, 'size': 20, 'dens': 0.7, 'sup': 0.02},
-    {'name': '30K × 80', 'tx': 30_000, 'items': 80, 'size': 35, 'dens': 0.75, 'sup': 0.01},
-    {'name': '60K × 100', 'tx': 60_000, 'items': 100, 'size': 50, 'dens': 0.8, 'sup': 0.008},
-    {'name': '100K × 120', 'tx': 100_000, 'items': 120, 'size': 60, 'dens': 0.85, 'sup': 0.005},
+    {
+        "name": "10K × 50",
+        "tx": 10_000,
+        "items": 50,
+        "size": 20,
+        "dens": 0.7,
+        "sup": 0.02,
+    },
+    {
+        "name": "30K × 80",
+        "tx": 30_000,
+        "items": 80,
+        "size": 35,
+        "dens": 0.75,
+        "sup": 0.01,
+    },
+    {
+        "name": "60K × 100",
+        "tx": 60_000,
+        "items": 100,
+        "size": 50,
+        "dens": 0.8,
+        "sup": 0.008,
+    },
+    {
+        "name": "100K × 120",
+        "tx": 100_000,
+        "items": 120,
+        "size": 60,
+        "dens": 0.85,
+        "sup": 0.005,
+    },
 ]
 
-print("\n" + "="*80)
+print("\n" + "=" * 80)
 print("⚡ FP-Growth Benchmark")
-print("="*80)
-print(f"System RAM: {psutil.virtual_memory().total/1024**3:.1f}GB | Available: {psutil.virtual_memory().available/1024**3:.1f}GB")
-print("="*80)
+print("=" * 80)
+print(
+    f"System RAM: {psutil.virtual_memory().total/1024**3:.1f}GB | Available: {psutil.virtual_memory().available/1024**3:.1f}GB"
+)
+print("=" * 80)
 
 results = []
 
 for cfg in configs:
     print(f"\n📊 {cfg['name']} (density={cfg['dens']}, support={cfg['sup']})")
     print("-" * 80)
 
-    data = generate_data(cfg['tx'], cfg['items'], cfg['size'], cfg['dens'])
+    data = generate_data(cfg["tx"], cfg["items"], cfg["size"], cfg["dens"])
 
-    t1, p1, m1 = benchmark('priors (regular)', test_priors_regular, data, cfg['sup'])
-    t2, p2, m2 = benchmark('priors (lazy)', test_priors_lazy, data, cfg['sup'])
+    t1, p1, m1 = benchmark("priors (regular)", test_priors_regular, data, cfg["sup"])
+    t2, p2, m2 = benchmark("priors (lazy)", test_priors_lazy, data, cfg["sup"])
 
     if HAS_MLXTEND:
-        t3, p3, m3 = benchmark('mlxtend', test_mlxtend, data, cfg['sup'])
+        t3, p3, m3 = benchmark("mlxtend", test_mlxtend, data, cfg["sup"])
     else:
         t3, p3, m3 = None, None, None
 
     if HAS_EFFICIENT_APRIORI:
-        t4, p4, m4 = benchmark('efficient-apriori', test_efficient_apriori, data, cfg['sup'])
+        t4, p4, m4 = benchmark(
+            "efficient-apriori", test_efficient_apriori, data, cfg["sup"]
+        )
     else:
         t4, p4, m4 = None, None, None
 
     if t1 and t2:
-        print(f"\n  💡 Lazy vs Regular: {((t2/t1-1)*100):+.1f}% time | {((1-m2/m1)*100):+.1f}% memory savings")
+        print(
+            f"\n  💡 Lazy vs Regular: {((t2/t1-1)*100):+.1f}% time | {((1-m2/m1)*100):+.1f}% memory savings"
+        )
 
     if t1 and t3:
         print(f"  💡 priors vs mlxtend: {(t3/t1):.1f}x faster")
 
     if t1 and t4:
         print(f"  💡 priors vs efficient-apriori: {(t4/t1):.1f}x faster")
 
-    results.append({
-        'dataset': cfg['name'],
-        'priors_time': t1,
-        'lazy_time': t2,
-        'mlxtend_time': t3,
-        'efficient_time': t4,
-        'patterns': p1,
-    })
-
-print("\n" + "="*80)
+    results.append(
+        {
+            "dataset": cfg["name"],
+            "priors_time": t1,
+            "lazy_time": t2,
+            "mlxtend_time": t3,
+            "efficient_time": t4,
+            "patterns": p1,
+        }
+    )
+
+print("\n" + "=" * 80)
 print("📈 Summary")
-print("="*80)
+print("=" * 80)
 
 for r in results:
     print(f"\n{r['dataset']}:")
-    print(f"  Patterns: {r['patterns']:,}" if r['patterns'] else "  Patterns: N/A")
-    if r['priors_time']:
+    print(f"  Patterns: {r['patterns']:,}" if r["patterns"] else "  Patterns: N/A")
+    if r["priors_time"]:
         print(f"  priors (regular): {r['priors_time']:.3f}s")
-    if r['lazy_time']:
+    if r["lazy_time"]:
         print(f"  priors (lazy):    {r['lazy_time']:.3f}s")
-    if r['mlxtend_time']:
-        print(f"  mlxtend:          {r['mlxtend_time']:.3f}s ({r['mlxtend_time']/r['priors_time']:.1f}x slower)" if r['priors_time'] else f"  mlxtend:          {r['mlxtend_time']:.3f}s")
-    if r['efficient_time']:
-        print(f"  efficient-apriori: {r['efficient_time']:.3f}s ({r['efficient_time']/r['priors_time']:.1f}x slower)" if r['priors_time'] else f"  efficient-apriori: {r['efficient_time']:.3f}s")
-
-print("\n" + "="*80)
+    if r["mlxtend_time"]:
+        print(
+            f"  mlxtend:          {r['mlxtend_time']:.3f}s ({r['mlxtend_time']/r['priors_time']:.1f}x slower)"
+            if r["priors_time"]
+            else f"  mlxtend:          {r['mlxtend_time']:.3f}s"
+        )
+    if r["efficient_time"]:
+        print(
+            f"  efficient-apriori: {r['efficient_time']:.3f}s ({r['efficient_time']/r['priors_time']:.1f}x slower)"
+            if r["priors_time"]
+            else f"  efficient-apriori: {r['efficient_time']:.3f}s"
+        )
+
+print("\n" + "=" * 80)
 print("✓ Benchmark Complete")
-print("="*80)
+print("=" * 80)