diff --git a/datapackage.json b/datapackage.json index 6504081..640f8eb 100644 --- a/datapackage.json +++ b/datapackage.json @@ -23778,10 +23778,10 @@ } }, { - "name": "sefnc_baseline_v4", - "path": "resources/sefnc_baseline_v4/data.parquet", - "title": "sefnc_baseline_v4", - "description": "sefnc_baseline_v4", + "name": "sefnc", + "path": "resources/sefnc/data.parquet", + "title": "Self-efficacy for nutrition change", + "description": "Self-efficacy for nutrition change measurements self-reported by participants across study weeks.", "schema": { "fields": [ { @@ -23813,722 +23813,29 @@ ] }, { - "name": "sefnc_when_tired", - "title": "sefnc_when_tired", - "type": "string", - "description": "Participant's confidence in following their diet when feeling tired. Self-reported by participant. Baseline (V4).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_during_crisis", - "title": "sefnc_during_crisis", - "type": "string", - "description": "Participant's confidence in following their diet during or after a personal crisis. Baseline (V4).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_when_depressed", - "title": "sefnc_when_depressed", - "type": "string", - "description": "Participant's confidence in following their diet when feeling depressed. Self-reported by participant. Baseline (V4).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_when_anxious", - "title": "sefnc_when_anxious", - "type": "string", - "description": "Participant's confidence in following their diet when feeling anxious. Self-reported by participant. Baseline (V4).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_on_vacation", - "title": "sefnc_on_vacation", - "type": "string", - "description": "Participant's confidence in following their diet while on vacation. Self-reported by participant. Repeated at baseline (V4), week 12 (V6) and week 52 (V10).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_when_busy", - "title": "sefnc_when_busy", - "type": "string", - "description": "Participant's confidence in following their diet when they have a heavy workload. Self-reported by participant. Repeated at baseline (V4), week 12 (V6) and week 52 (V10).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_when_missed_goals", - "title": "sefnc_when_missed_goals", - "type": "string", - "description": "Participant's confidence in following their diet when they have not met their dietary goals. Self-reported by participant. Baseline (V4).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_no_support", - "title": "sefnc_no_support", - "type": "string", - "description": "Participant's confidence in following their diet when lacking support from family or friends. Baseline (V4).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_busy_schedule", - "title": "sefnc_busy_schedule", - "type": "string", - "description": "Participant's confidence in following their diet when their schedule is busy or resource- and/or time-demanding. Self-reported by participant. Baseline (V4).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - } - ], - "primaryKey": [ - "event" - ] - } - }, - { - "name": "sefnc_week12_v6", - "path": "resources/sefnc_week12_v6/data.parquet", - "title": "sefnc_week12_v6", - "description": "sefnc_week12_v6", - "schema": { - "fields": [ - { - "name": "event", - "title": "The unique name of the event", - "type": "string", - "description": "The unique name identifying the event when the form was filled in.", - "constraints": { - "required": true - } - }, - { - "name": "center", - "title": "Research center", - "type": "string", - "description": "The research center where the data item was recorded.", - "constraints": { - "required": true, - "enum": [ - "Copenhagen", - "Aarhus", - "Odense" - ] - }, - "categories": [ - "Copenhagen", - "Aarhus", - "Odense" - ] - }, - { - "name": "sefnc_when_tired_v6", - "title": "sefnc_when_tired_v6", - "type": "string", - "description": "Participant's confidence in following their diet when feeling tired. Self-reported by participant. Week 12 (V6).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_during_crisis_v6", - "title": "sefnc_during_crisis_v6", - "type": "string", - "description": "Participant's confidence in following their diet during or after a personal crisis. Self-reported by participant. Week 12 (V6).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_when_depressed_v6", - "title": "sefnc_when_depressed_v6", - "type": "string", - "description": "Participant's confidence in following their diet when feeling depressed. Self-reported by participant. Week 12 (V6).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_when_anxious_v6", - "title": "sefnc_when_anxious_v6", - "type": "string", - "description": "Participant's confidence in following their diet when feeling anxious. Self-reported by participant. Week 12 (V6).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_on_vacation_v6", - "title": "sefnc_on_vacation_v6", - "type": "string", - "description": "Participant's confidence in following their diet while on vacation. Self-reported by participant. Week 12 (V6).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_when_busy_v6", - "title": "sefnc_when_busy_v6", - "type": "string", - "description": "Participant's confidence in following their diet when they have a heavy workload. Self-reported by participant. Week 12 (V6).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_when_missed_goals_v6", - "title": "sefnc_when_missed_goals_v6", - "type": "string", - "description": "Participant's confidence in following their diet when they have not met their dietary goals. Self-reported by participant. Week 12 (V6).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_no_support_v6", - "title": "sefnc_no_support_v6", - "type": "string", - "description": "Participant's confidence in following their diet when lacking support from family or friends. Self-reported by participant. Week 12 (V6).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - { - "name": "sefnc_ubusy_schedule_v6", - "title": "sefnc_ubusy_schedule_v6", - "type": "string", - "description": "Participant's confidence in following their diet when their schedule is busy or resource- and/or time-demanding. Self-reported by participant. Week 12 (V6).", - "constraints": { - "required": true, - "enum": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - }, - "categories": [ - "0%", - "10%", - "20%", - "30%", - "40%", - "50%", - "60%", - "70%", - "80%", - "90%", - "100%" - ] - } - ], - "primaryKey": [ - "event" - ] - } - }, - { - "name": "selfefficacy_for_nutrition_change_sefnc_week_52", - "path": "resources/selfefficacy_for_nutrition_change_sefnc_week_52/data.parquet", - "title": "selfefficacy_for_nutrition_change_sefnc_week_52", - "description": "selfefficacy_for_nutrition_change_sefnc_week_52", - "schema": { - "fields": [ - { - "name": "event", - "title": "The unique name of the event", - "type": "string", - "description": "The unique name identifying the event when the form was filled in.", - "constraints": { - "required": true - } - }, - { - "name": "center", - "title": "Research center", - "type": "string", - "description": "The research center where the data item was recorded.", + "name": "week", + "title": "Week", + "type": "integer", + "description": "The study week when the SEFNC measurement was recorded.", "constraints": { "required": true, "enum": [ - "Copenhagen", - "Aarhus", - "Odense" + 0, + 12, + 52 ] }, "categories": [ - "Copenhagen", - "Aarhus", - "Odense" + 0, + 12, + 52 ] }, { - "name": "sefnc_when_tired_v10", - "title": "sefnc_when_tired_v10", + "name": "when_tired", + "title": "when_tired", "type": "string", - "description": "Participant's confidence in following their diet when feeling tired. Self-reported by participant. Week 52 (10).", + "description": "Participant's confidence in following their diet when feeling tired.", "constraints": { "required": true, "enum": [ @@ -24560,10 +23867,10 @@ ] }, { - "name": "sefnc_during_crisis_v10", - "title": "sefnc_during_crisis_v10", + "name": "during_crisis", + "title": "during_crisis", "type": "string", - "description": "Participant's confidence in following their diet during or after a personal crisis. Self-reported by participant. Week 52 (10).", + "description": "Participant's confidence in following their diet during or after a personal crisis.", "constraints": { "required": true, "enum": [ @@ -24595,10 +23902,10 @@ ] }, { - "name": "sefnc_when_depressed_v10", - "title": "sefnc_when_depressed_v10", + "name": "when_depressed", + "title": "when_depressed", "type": "string", - "description": "Participant's confidence in following their diet when feeling depressed. Self-reported by participant. Week 52 (10).", + "description": "Participant's confidence in following their diet when feeling depressed.", "constraints": { "required": true, "enum": [ @@ -24630,10 +23937,10 @@ ] }, { - "name": "sefnc_when_anxious_v10", - "title": "sefnc_when_anxious_v10", + "name": "when_anxious", + "title": "when_anxious", "type": "string", - "description": "Participant's confidence in following their diet when feeling anxious. Self-reported by participant. Week 52 (10).", + "description": "Participant's confidence in following their diet when feeling anxious.", "constraints": { "required": true, "enum": [ @@ -24665,10 +23972,10 @@ ] }, { - "name": "sefnc_on_vacation_v10", - "title": "sefnc_on_vacation_v10", + "name": "on_vacation", + "title": "on_vacation", "type": "string", - "description": "Participant's confidence in following their diet while on vacation. Self-reported by participant. Week 52 (10).", + "description": "Participant's confidence in following their diet while on vacation.", "constraints": { "required": true, "enum": [ @@ -24700,10 +24007,10 @@ ] }, { - "name": "sefnc_when_busy_v10", - "title": "sefnc_when_busy_v10", + "name": "when_busy", + "title": "when_busy", "type": "string", - "description": "Participant's confidence in following their diet when they have a heavy workload. Self-reported by participant. Week 52 (10).", + "description": "Participant's confidence in following their diet when they have a heavy workload.", "constraints": { "required": true, "enum": [ @@ -24735,10 +24042,10 @@ ] }, { - "name": "sefnc_when_missed_goals_v10", - "title": "sefnc_when_missed_goals_v10", + "name": "when_missed_goals", + "title": "when_missed_goals", "type": "string", - "description": "Participant's confidence in following their diet when they have not met their dietary goals. Self-reported by participant. Week 52 (10).", + "description": "Participant's confidence in following their diet when they have not met their dietary goals.", "constraints": { "required": true, "enum": [ @@ -24770,10 +24077,10 @@ ] }, { - "name": "sefnc_no_support_v10", - "title": "sefnc_no_support_v10", + "name": "no_support", + "title": "no_support", "type": "string", - "description": "Participant's confidence in following their diet when lacking support from family or friends. Self-reported by participant. Week 52 (10).", + "description": "Participant's confidence in following their diet when lacking support from family or friends.", "constraints": { "required": true, "enum": [ @@ -24805,10 +24112,10 @@ ] }, { - "name": "sefnc_ubusy_schedule_v10", - "title": "sefnc_ubusy_schedule_v10", + "name": "busy_schedule", + "title": "busy_schedule", "type": "string", - "description": "Participant's confidence in following their diet when their schedule is busy or resource- and/or time-demanding. Self-reported by participant. Week 52 (10).", + "description": "Participant's confidence in following their diet when their schedule is busy or resource- and/or time-demanding.", "constraints": { "required": true, "enum": [ @@ -24841,7 +24148,8 @@ } ], "primaryKey": [ - "event" + "event", + "week" ] } }, diff --git a/scripts/redcap_dict_to_properties.py b/scripts/redcap_dict_to_properties.py index 672e30a..a9d6b13 100644 --- a/scripts/redcap_dict_to_properties.py +++ b/scripts/redcap_dict_to_properties.py @@ -13,6 +13,13 @@ VAS_TIMEPOINTS = [-10, 30, 60, 90, 120, 180, 240] VAS_TIME_FORM_PATTERN = re.compile(r"^vas_(minus10|(30|60|90|120|180|240)_?min)$") VAS_TIME_FIELD_PATTERN = re.compile(r"(_fasted)?_(minus10|30|60|90|120|180|240)min$") +SEFNC_WEEKS = [0, 12, 52] +SEFNC_FORM_WEEKS = { + "sefnc_baseline_v4": 0, + "sefnc_week12_v6": 12, + "selfefficacy_for_nutrition_change_sefnc_week_52": 52, +} +SEFNC_WEEK_FIELD_PATTERN = re.compile(r"_v(6|10)$") def _map(x: Iterable[In], fn: Callable[[In], Out]) -> list[Out]: @@ -38,6 +45,7 @@ def dictionary_to_properties( redcap_fields: list[dict[str, str]], ) -> list[sp.ResourceProperties]: """Converts REDCap data dictionary to Data Package resources.""" + redcap_fields = _join_sefnc_week_resources(redcap_fields) redcap_fields = _join_vas_time_resources(redcap_fields) sorted_by_form = sorted(redcap_fields, key=lambda field: field["form_name"]) grouped_by_form = groupby(sorted_by_form, key=lambda field: field["form_name"]) @@ -109,6 +117,87 @@ def _remove_vas_time_from_annotation(annotation: str) -> str: ).strip() +def _join_sefnc_week_resources( + redcap_fields: list[dict[str, str]], +) -> list[dict[str, str]]: + """Combines SEFNC week-specific forms into one resource schema.""" + return _deduplicate_sefnc_fields( + _map(redcap_fields, _normalise_sefnc_week_resource_field) + ) + + +def _normalise_sefnc_week_resource_field(field: dict[str, str]) -> dict[str, str]: + if not _is_sefnc_week_resource_field(field): + return field + + return { + **field, + "field_name": _normalise_sefnc_field_name(field["field_name"]), + "form_name": "sefnc", + "field_annotation": _remove_sefnc_week_from_annotation( + field["field_annotation"] + ), + } + + +def _normalise_sefnc_field_name(field_name: str) -> str: + field_name = SEFNC_WEEK_FIELD_PATTERN.sub("", field_name).replace( + "sefnc_ubusy_schedule", "sefnc_busy_schedule" + ) + return re.sub(r"^sefnc_", "", field_name) + + +def _is_sefnc_week_resource_field(field: dict[str, str]) -> bool: + return field["form_name"] in SEFNC_FORM_WEEKS + + +def _deduplicate_sefnc_fields(fields: list[dict[str, str]]) -> list[dict[str, str]]: + deduplicated_fields, _ = reduce( + _append_if_new_sefnc_field, + fields, + ([], set()), + ) + return deduplicated_fields + + +def _append_if_new_sefnc_field( + result: tuple[list[dict[str, str]], set[str]], field: dict[str, str] +) -> tuple[list[dict[str, str]], set[str]]: + fields, seen_sefnc_fields = result + field_name = field["field_name"] + + if field["form_name"] != "sefnc": + return fields + [field], seen_sefnc_fields + + if field_name in seen_sefnc_fields: + return result + + return (fields + [field], seen_sefnc_fields.union({field_name})) + + +def _remove_sefnc_week_from_annotation(annotation: str) -> str: + annotation = re.sub( + r"\s+Repeated at baseline \(V4\), week 12 \(V6\) and week 52 \(V10\)\.?", + "", + annotation, + flags=re.IGNORECASE, + ) + + annotation = re.sub( + r"\s+(Baseline|Week 12|Week 52)\s*\((V4|V6|V?10)\)\.?", + "", + annotation, + flags=re.IGNORECASE, + ) + + return re.sub( + r"\s*Self-reported by participant\.?", + "", + annotation, + flags=re.IGNORECASE, + ).strip() + + def _form_to_resource( form_name: str, fields: list[dict[str, str]] ) -> sp.ResourceProperties: @@ -153,6 +242,21 @@ def _form_to_resource( default_fields.append(time_field) primary_key.append("minutes_from_meal") + if form_name == "sefnc": + week_field = sp.FieldProperties( + name="week", + title="Week", + type="integer", + description="The study week when the SEFNC measurement was recorded.", + categories=SEFNC_WEEKS, + constraints=sp.ConstraintsProperties( + required=True, + enum=SEFNC_WEEKS, + ), + ) + default_fields.append(week_field) + primary_key.append("week") + # Discard fields displayed for information only form_redcap_fields = _filter( fields, lambda field: field["field_type"] not in ["descriptive", "checkbox"] @@ -199,6 +303,9 @@ def _get_resource_title(form_name: str) -> str: if form_name == "vas": return "Visual analogue scale measurements" + if form_name == "sefnc": + return "Self-efficacy for nutrition change" + return form_name @@ -209,6 +316,12 @@ def _get_resource_description(form_name: str) -> str: "relative to the meal." ) + if form_name == "sefnc": + return ( + "Self-efficacy for nutrition change measurements self-reported by " + "participants across study weeks." + ) + return form_name