diff --git a/.gitattributes b/.gitattributes index 51b0677..743a5c6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ raw/** filter=lfs diff=lfs merge=lfs -text +staging/** filter=lfs diff=lfs merge=lfs -text diff --git a/.typos.toml b/.typos.toml index a8ad854..7699225 100644 --- a/.typos.toml +++ b/.typos.toml @@ -5,3 +5,8 @@ extend-exclude = [ # Auto-generated by Flower "resources" ] + +[default] + +[default.extend-words] +vas = "vas" diff --git a/justfile b/justfile index dfe00d4..f85dfc8 100644 --- a/justfile +++ b/justfile @@ -86,3 +86,7 @@ download-data-dict: # Download data from REDCap download-data: uv run python scripts/redcap_data.py + +# Stage raw data +stage-data: + uv run python scripts/stage_data.py diff --git a/scripts/stage_data.py b/scripts/stage_data.py new file mode 100644 index 0000000..a646a27 --- /dev/null +++ b/scripts/stage_data.py @@ -0,0 +1,140 @@ +import re +from datetime import datetime +from operator import itemgetter +from pathlib import Path +from typing import cast + +import polars as pl +import seedcase_soil as so + +VAS_TIME_FIELD_PATTERN = re.compile( + r"^vas_(?P.+?)(_fasted)?_(?P