From 4f90ee56614b8a523c64b374f3e441c37bc7b8f1 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 20 Nov 2025 10:25:53 -0500 Subject: [PATCH 1/5] work in progress --- src/schema/schema_validators.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index c5710f57..ab7da44c 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -674,6 +674,25 @@ def validate_sample_category(property_key, normalized_entity_type, request, exis if new_data_dict[property_key] != sample_category: raise ValueError(f"The case of sample_category '{new_data_dict[property_key]}'" f" must be specified as '{sample_category}'.") + +""" +Validate the provided value of Dataset.direct_ancestor on create via POST and update via PUT + +Parameters +---------- +property_key : str + The target property key +normalized_type : str + Submission +request: Flask request object + The instance of Flask request passed in from application request +existing_data_dict : dict + A dictionary that contains all existing entity properties +new_data_dict : dict + The json data in request body, already after the regular validations +""" +def validate_ancestor_type(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): + """ Validate the provided value of Publication.publication_date is in the correct format against ISO 8601 Format: From 1806c2bce071731f13f350f514eca6fa6fe67426 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 20 Nov 2025 10:26:55 -0500 Subject: [PATCH 2/5] work in progress --- src/schema/provenance_schema.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index fb0debe1..37192954 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -454,10 +454,12 @@ ENTITIES: type: list before_property_create_validators: - validate_no_duplicates_in_list + - validate_ancestor_type before_property_update_validators: - validate_no_duplicates_in_list - validate_not_invalid_creation_action - validate_id_not_in_direct_ancestor + - validate_ancestor_type transient: true exposed: false indexed: false From 4d42de74849a7baf83dc94c7602d1ce0aab21ebc Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 20 Nov 2025 12:09:02 -0500 Subject: [PATCH 3/5] added a new validator to check whether dataset direct ancestors are only datasets, publications, and samples. Organ samples not allowed --- src/schema/schema_validators.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index ab7da44c..e176421d 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -11,6 +11,7 @@ from schema import schema_neo4j_queries from schema.schema_constants import SchemaConstants from hubmap_commons import hm_auth +from app_neo4j_queries import get_entities_by_uuid logger = logging.getLogger(__name__) @@ -692,6 +693,25 @@ def validate_sample_category(property_key, normalized_entity_type, request, exis The json data in request body, already after the regular validations """ def validate_ancestor_type(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): + allowed_ancestor_types = ["dataset", "sample"] + direct_ancestor_uuids = new_data_dict[property_key] + ancestors = get_entities_by_uuid(schema_manager.get_neo4j_driver_instance(), direct_ancestor_uuids, fields=["entity_type", "sample_category", "uuid"]) + invalid_uuids = [] + for ancestor in ancestors: + ancestor_uuid = ancestor['uuid'] + ancestor_type = ancestor['entity_type'] + ancestor_type_superclass = schema_manager.get_entity_superclass(ancestor_type) + ancestor_type_superclass = ancestor_type_superclass.lower() if ancestor_type_superclass is not None else None + if not any(t in allowed_ancestor_types for t in (ancestor_type.lower(), ancestor_type_superclass)): + invalid_uuids.append(ancestor_uuid) + continue + if ancestor_type.lower() == "sample": + sample_category = ancestor.get('sample_category') + if sample_category and sample_category.lower() == 'organ': + invalid_uuids.append(ancestor_uuid) + continue + if invalid_uuids: + raise ValueError(f"Invalid direct_ancestor_uuid(s). Allowed entity_types are: {', '.join(allowed_ancestor_types)} and their subclasses. For samples, 'organ' is not allowed.") """ From 8c04820a4bace047db0fa4418953045273844487 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 21 Nov 2025 16:17:55 -0500 Subject: [PATCH 4/5] added new schema_manager function to return subclasses to go along with the existing one to return superclasses. Reworked validator to do all validation inside a single neo4j query rather than iteating through a python loop. --- src/schema/schema_manager.py | 28 ++++++++++++++++++++++++++++ src/schema/schema_neo4j_queries.py | 29 +++++++++++++++++++++++++++++ src/schema/schema_validators.py | 24 +++++++----------------- 3 files changed, 64 insertions(+), 17 deletions(-) diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 5808c468..afca2757 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -226,6 +226,34 @@ def get_entity_superclass(normalized_entity_class): return normalized_superclass +""" +Get the optional subclass (if defined) of the given entity class + +Parameters +---------- +normalized_entity_class : str + The normalized target entity class + +Returns +------- +string or None + One of the normalized entity classes if defined. None otherwise +""" +def get_entity_subclasses(normalized_entity_class): + subclasses = [] + all_entity_types = get_all_entity_types() + + if normalized_entity_class not in all_entity_types: + raise ValueError(f"Unrecognized entity class: {normalized_entity_class}") + + for name, data in _schema["ENTITIES"].items(): + superclass = data.get("superclass") + if superclass and normalize_entity_type(superclass) == normalized_entity_class: + subclasses.append(normalize_entity_type(name)) + + return subclasses + + """ Determine if the Entity type with 'entity_type' is an instance of 'entity_class'. Use this function if you already have the Entity type. Use `entity_instanceof(uuid, class)` diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 096837db..b7fdc2fe 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -1798,6 +1798,35 @@ def create_activity_tx(tx, activity_data_dict): return node +def validate_direct_ancestors(neo4j_driver, entity_uuids, allowed_types, disallowed_property_values=None): + disallowed_rules_list = disallowed_property_values + query = """ + UNWIND $uuids AS uid + OPTIONAL MATCH (n) WHERE n.uuid = uid + WITH uid, n, + CASE + WHEN n IS NULL THEN false + ELSE any(l IN labels(n) WHERE l IN $allowed_labels) + END AS label_ok, + $disallowed AS rules + WITH uid, label_ok, + any(rule IN rules WHERE + n IS NOT NULL + AND n[rule.property] IS NOT NULL + AND n[rule.property] = rule.value + ) AS has_forbidden_prop + WHERE NOT label_ok OR has_forbidden_prop + RETURN DISTINCT uid AS invalid_uuid + """ + with neo4j_driver.session() as session: + result = session.run(query, + uuids=entity_uuids, + allowed_labels=allowed_types, + disallowed=disallowed_rules_list) + + return [record["invalid_uuid"] for record in result] + + """ Build the property key-value pairs to be used in the Cypher clause for node creation/update diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index e176421d..0c59f2cd 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -693,25 +693,15 @@ def validate_sample_category(property_key, normalized_entity_type, request, exis The json data in request body, already after the regular validations """ def validate_ancestor_type(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): - allowed_ancestor_types = ["dataset", "sample"] + allowed_ancestor_types = ["Dataset", "Sample"] + for allowed_ancestor in list(allowed_ancestor_types): + subclasses = schema_manager.get_entity_subclasses(schema_manager.normalize_entity_type(allowed_ancestor)) + allowed_ancestor_types.extend(subclasses) direct_ancestor_uuids = new_data_dict[property_key] - ancestors = get_entities_by_uuid(schema_manager.get_neo4j_driver_instance(), direct_ancestor_uuids, fields=["entity_type", "sample_category", "uuid"]) - invalid_uuids = [] - for ancestor in ancestors: - ancestor_uuid = ancestor['uuid'] - ancestor_type = ancestor['entity_type'] - ancestor_type_superclass = schema_manager.get_entity_superclass(ancestor_type) - ancestor_type_superclass = ancestor_type_superclass.lower() if ancestor_type_superclass is not None else None - if not any(t in allowed_ancestor_types for t in (ancestor_type.lower(), ancestor_type_superclass)): - invalid_uuids.append(ancestor_uuid) - continue - if ancestor_type.lower() == "sample": - sample_category = ancestor.get('sample_category') - if sample_category and sample_category.lower() == 'organ': - invalid_uuids.append(ancestor_uuid) - continue + disallowed_properties = [{"property": "sample_category", "value": "organ"}] + invalid_uuids = schema_neo4j_queries.validate_direct_ancestors(schema_manager.get_neo4j_driver_instance(), direct_ancestor_uuids, allowed_ancestor_types, disallowed_properties) if invalid_uuids: - raise ValueError(f"Invalid direct_ancestor_uuid(s). Allowed entity_types are: {', '.join(allowed_ancestor_types)} and their subclasses. For samples, 'organ' is not allowed.") + raise ValueError(f"Invalid or not-found direct_ancestor_uuid(s). Allowed entity_types are: {', '.join(allowed_ancestor_types)}. For samples, 'organ' is not allowed. Invalid uuids: {', '.join(invalid_uuids)}") """ From 3dbe4b811f66f4c15fb368600296f6db2f9bb619 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Mon, 24 Nov 2025 09:17:51 -0500 Subject: [PATCH 5/5] removed unused import app_neo4j_queries.get_entities_by_uuid --- src/schema/schema_validators.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index 0c59f2cd..078f319a 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -11,7 +11,6 @@ from schema import schema_neo4j_queries from schema.schema_constants import SchemaConstants from hubmap_commons import hm_auth -from app_neo4j_queries import get_entities_by_uuid logger = logging.getLogger(__name__)