first commit

emattia · emattia · commit f28452776f4d · 2026-01-29T11:01:57.000-08:00
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -0,0 +1,64 @@
+name: Deploy Project
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+    branches:
+    - main
+
+env:
+  GH_HEAD_REF: ${{ github.head_ref }}
+  GH_REF: ${{ github.ref_name }}
+
+permissions:
+  id-token: write
+  contents: read
+  pull-requests: write
+
+jobs:
+  deploy:
+    name: Deploy Project
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        ref: ${{ github.event.pull_request.head.sha }}
+        fetch-depth: 0
+
+    - name: Set up Python
+      uses: actions/setup-python@v1
+      with:
+        python-version: 3.12
+
+    - name: Install dependencies
+      run: |
+        python3 -m pip install -U requests
+        python3 -m pip install outerbounds pyyaml
+        python3 -m pip install -U ob-project-utils
+    - name: Configure Outerbounds
+      run: |
+        PROJECT_NAME=$(yq .project obproject.toml)
+        DEFAULT_CICD_USER="${PROJECT_NAME//_/-}-cicd"
+        PLATFORM=$(yq .platform obproject.toml)
+        CICD_USER=$(yq ".cicd_user // \"$DEFAULT_CICD_USER\"")
+        PERIMETER="default"
+        echo "🏗️ Deployment target:"
+        echo "  Platform: $PLATFORM"
+        echo "  CI/CD User: $CICD_USER"
+        echo "  Perimeter: $PERIMETER"
+        outerbounds service-principal-configure \
+          --name $CICD_USER \
+          --deployment-domain $PLATFORM \
+          --perimeter $PERIMETER \
+          --github-actions
+
+    - name: Deploy Project
+      env:
+        COMMIT_URL: "https://github.com/${{ github.repository }}/commit/"
+        CI_URL: "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        COMMENTS_URL: ${{ github.event.pull_request.comments_url }}
+        PYTHONUNBUFFERED: 1
+      run: obproject-deploy
diff --git a/README.md b/README.md
@@ -0,0 +1,69 @@
+# Flow Chaining Example
+
+Minimal example showing how to chain flows using `@trigger_on_finish`.
+
+## Structure
+
+```
+flow-chaining-example/
+  obproject.toml
+  flows/
+    preprocess/flow.py   # Runs first, processes datasets in parallel
+    train/flow.py        # Triggered when preprocess finishes
+```
+
+## How It Works
+
+1. **PreprocessFlow** runs with a `datasets` parameter (comma-separated paths)
+2. Uses `foreach` to process each dataset in parallel
+3. Stores results in `self.processed_paths` artifact
+4. **TrainFlow** has `@trigger_on_finish(flow="PreprocessFlow")`
+5. When PreprocessFlow completes, TrainFlow starts automatically
+6. TrainFlow accesses data via `current.trigger.run.data.processed_paths`
+
+## Testing Locally
+
+```bash
+# Test PreprocessFlow standalone
+cd flows/preprocess
+python flow.py run --datasets "path1,path2,path3"
+
+# Test TrainFlow standalone (without trigger)
+cd flows/train
+python flow.py run --learning_rate 0.05 --n_estimators 200
+```
+
+## Deploy to Outerbounds
+
+```bash
+obproject-deploy
+```
+
+After deploy:
+1. Run PreprocessFlow from UI or CLI
+2. TrainFlow will trigger automatically when it finishes
+3. TrainFlow parameters (learning_rate, n_estimators) use deploy-time defaults
+
+## Passing Parameters at Runtime
+
+TrainFlow parameters are set at **deploy time** via the flow definition defaults.
+To change them per-run, either:
+
+1. **Redeploy** with different defaults
+2. **Use artifacts** instead of Parameters for runtime values:
+   - PreprocessFlow stores config in an artifact
+   - TrainFlow reads it via `current.trigger.run.data.config`
+
+## Key Pattern
+
+```python
+# In TrainFlow
+@trigger_on_finish(flow="PreprocessFlow")
+class TrainFlow(ProjectFlow):
+
+    @step
+    def start(self):
+        if current.trigger:
+            # Access parent flow's artifacts
+            data = current.trigger.run.data.processed_paths
+```
diff --git a/flows/preprocess/flow.py b/flows/preprocess/flow.py
@@ -0,0 +1,49 @@
+"""
+Preprocess Flow: Processes multiple datasets in parallel.
+When finished, automatically triggers TrainFlow.
+"""
+
+from metaflow import step, Parameter, current
+from obproject import ProjectFlow
+
+
+class PreprocessFlow(ProjectFlow):
+    """
+    Preprocesses datasets. Runs multiple configs in parallel via foreach.
+    """
+
+    datasets = Parameter(
+        "datasets",
+        default="s3://bucket/data1,s3://bucket/data2",
+        help="Comma-separated dataset paths",
+    )
+
+    @step
+    def start(self):
+        self.dataset_list = self.datasets.split(",")
+        print(f"Processing {len(self.dataset_list)} datasets: {self.dataset_list}")
+        self.next(self.preprocess, foreach="dataset_list")
+
+    @step
+    def preprocess(self):
+        """Process each dataset (A1, A2, etc.)"""
+        self.dataset_path = self.input
+        self.output_path = f"{self.dataset_path}_processed"
+        print(f"Preprocessed: {self.dataset_path} -> {self.output_path}")
+        self.next(self.join)
+
+    @step
+    def join(self, inputs):
+        """Merge results from parallel preprocessing"""
+        self.processed_paths = [inp.output_path for inp in inputs]
+        print(f"All preprocessed: {self.processed_paths}")
+        self.next(self.end)
+
+    @step
+    def end(self):
+        # These artifacts are accessible to TrainFlow via current.trigger.run.data
+        print(f"Preprocess complete. Outputs: {self.processed_paths}")
+
+
+if __name__ == "__main__":
+    PreprocessFlow()
diff --git a/flows/train/flow.py b/flows/train/flow.py
@@ -0,0 +1,50 @@
+"""
+Train Flow: Triggered automatically when PreprocessFlow finishes.
+Accesses PreprocessFlow's outputs via current.trigger.run.data
+"""
+
+from metaflow import step, Parameter, current, trigger_on_finish
+from obproject import ProjectFlow
+
+
+@trigger_on_finish(flow="PreprocessFlow")
+class TrainFlow(ProjectFlow):
+    """
+    Training flow. Triggered automatically when PreprocessFlow completes.
+    """
+
+    learning_rate = Parameter("learning_rate", default=0.01, type=float)
+    n_estimators = Parameter("n_estimators", default=100, type=int)
+
+    @step
+    def start(self):
+        # Access data from the triggering flow
+        if current.trigger:
+            trigger_run = current.trigger.run
+            self.input_paths = trigger_run.data.processed_paths
+            print(f"Triggered by: {trigger_run.pathspec}")
+            print(f"Input paths from PreprocessFlow: {self.input_paths}")
+        else:
+            # Standalone run - use defaults for testing
+            self.input_paths = ["test/path1_processed", "test/path2_processed"]
+            print(f"Standalone run. Using test paths: {self.input_paths}")
+
+        self.next(self.train)
+
+    @step
+    def train(self):
+        """Train model using preprocessed data"""
+        print(f"Training with lr={self.learning_rate}, n_estimators={self.n_estimators}")
+        print(f"Using data: {self.input_paths}")
+        self.model_path = "s3://bucket/models/trained_model"
+        self.metrics = {"accuracy": 0.95, "f1": 0.93}
+        self.next(self.end)
+
+    @step
+    def end(self):
+        print(f"Training complete. Model: {self.model_path}")
+        print(f"Metrics: {self.metrics}")
+
+
+if __name__ == "__main__":
+    TrainFlow()
diff --git a/obproject.toml b/obproject.toml
@@ -0,0 +1,3 @@
+project = "flow-chaining-demo"
+title = "Flow Chaining Example"
+platform = "dev-yellow.outerbounds.xyz"

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+project = "flow-chaining-demo"`
	`2`	`+title = "Flow Chaining Example"`
	`3`	`+platform = "dev-yellow.outerbounds.xyz"`