Skip to content

Commit 8716cb0

Browse files
anth-volkclaude
andcommitted
Checkpoint prerequisite files for cross-container --script mode
download_private_prerequisites.py downloads files (puf_2015.csv, demographics_2015.csv, soi.csv, np2023_d5_mid.csv, policy_data.db) to the local filesystem, which vanishes when the container exits. In --script mode, each script runs in a separate container, so subsequent scripts couldn't find the prerequisites. Fix: save prerequisite files to the checkpoint volume after download, and restore them before running any other script. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent da8dddd commit 8716cb0

1 file changed

Lines changed: 19 additions & 1 deletion

File tree

modal_app/data_build.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,17 @@
9898
"small_enhanced_cps": "policyengine_us_data/datasets/cps/small_enhanced_cps.py",
9999
}
100100

101+
# Files downloaded by download_private_prerequisites.py that must be
102+
# checkpointed so subsequent --script calls in separate containers
103+
# can access them.
104+
PREREQUISITE_FILES = [
105+
"policyengine_us_data/storage/puf_2015.csv",
106+
"policyengine_us_data/storage/demographics_2015.csv",
107+
"policyengine_us_data/storage/soi.csv",
108+
"policyengine_us_data/storage/np2023_d5_mid.csv",
109+
"policyengine_us_data/storage/calibration/policy_data.db",
110+
]
111+
101112

102113
def setup_gcp_credentials():
103114
"""Write GCP credentials JSON to a temp file for google.auth.default()."""
@@ -700,7 +711,10 @@ def run_single_script(
700711
# Handle download_prerequisites specially (no SCRIPT_OUTPUTS entry)
701712
if script_name == "download_prerequisites":
702713
run_script(script_path)
703-
checkpoint_volume.commit()
714+
# Checkpoint prerequisite files so subsequent containers can
715+
# restore them.
716+
for prereq in PREREQUISITE_FILES:
717+
save_checkpoint(branch, prereq, checkpoint_volume)
704718
return f"Completed {script_name}"
705719

706720
output_files = SCRIPT_OUTPUTS.get(script_path)
@@ -710,6 +724,10 @@ def run_single_script(
710724
f"Valid names: {', '.join(SCRIPT_SHORT_NAMES.keys())}"
711725
)
712726

727+
# Restore prerequisite files from checkpoint volume
728+
for prereq in PREREQUISITE_FILES:
729+
restore_from_checkpoint(branch, prereq)
730+
713731
# Restore any existing checkpoints for dependencies
714732
for dep_path, dep_outputs in SCRIPT_OUTPUTS.items():
715733
if dep_path == script_path:

0 commit comments

Comments
 (0)