From 25d717ea7ffe97314b3b97a2e317bcde7e36e0b6 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 20 Nov 2025 08:31:14 -0500 Subject: [PATCH 1/5] added collection_associated_publication read trigger. Updated delete cache to also delete the associated publication. removed deprecation comments involving uses_data relationship --- src/app.py | 6 ++-- src/schema/provenance_schema.yaml | 12 ++++++-- src/schema/schema_neo4j_queries.py | 45 +++++++++++++++++++++++------- src/schema/schema_triggers.py | 39 ++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 16 deletions(-) diff --git a/src/app.py b/src/app.py index 0322b702..f0873eb7 100644 --- a/src/app.py +++ b/src/app.py @@ -5383,6 +5383,7 @@ def delete_cache(entity_uuid, entity_type): collection_uuids = [] dataset_upload_dict = {} publication_collection_dict = {} + collection_publication_uuid = [] # Determine the associated cache keys based on the entity type # For Donor/Datasets/Sample/Publication, delete the cache of all the descendants @@ -5392,6 +5393,7 @@ def delete_cache(entity_uuid, entity_type): # For Collection/Epicollection, delete the cache for each of its associated datasets (via [:IN_COLLECTION]) if schema_manager.entity_type_instanceof(entity_type, 'Collection'): collection_dataset_uuids = schema_neo4j_queries.get_collection_associated_datasets(neo4j_driver_instance, entity_uuid , 'uuid') + collection_publication_uuid = schema_neo4j_queries.get_collection_associated_publication(neo4j_driver_instance, entity_uuid)['uuid'] # For Upload, delete the cache for each of its associated Datasets (via [:IN_UPLOAD]) if entity_type == 'Upload': @@ -5403,13 +5405,11 @@ def delete_cache(entity_uuid, entity_type): dataset_upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid) # For Publication, also delete cache of the associated collection - # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. - # Still keep it in the code until further decision - Zhou if entity_type == 'Publication': publication_collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid) # We only use uuid in the cache key acorss all the cache types - uuids_list = [entity_uuid] + descendant_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids + uuids_list = [entity_uuid] + descendant_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids + collection_publication_uuid # Add to the list if the target dataset has linked upload if dataset_upload_dict: diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index fb0debe1..7f84b571 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -308,6 +308,15 @@ ENTITIES: indexed: true description: "The displayname of globus group which the user who created this entity is a member of" before_create_trigger: set_group_name #same as group_uuid, except set group_name + associated_publication: + type: json_string #dict + generated: true + indexed: true + transient: true + description: "A JSON containing the UUID, HuBMAP_ID, and Title for the associated publication" + on_read_trigger: get_collection_associated_publication + on_index_trigger: get_collection_associated_publication + ############################################# Dataset ############################################# @@ -786,9 +795,6 @@ ENTITIES: type: string indexed: true description: 'A DOI pointing to an Organ Mapping Antibody Panel relevant to this publication' - - # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. - # Still keep it in the code until further decision - Zhou associated_collection: type: json_string # dict generated: true diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 096837db..8ee77093 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -839,8 +839,6 @@ def get_parent_activity_uuid_from_entity(neo4j_driver, entity_uuid): the uuid of the associated collection """ def link_publication_to_associated_collection(neo4j_driver, entity_uuid, associated_collection_uuid): - # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. - # Still keep it in the code until further decision - Zhou try: with neo4j_driver.session() as session: tx = session.begin_transaction() @@ -1109,9 +1107,6 @@ def get_next_revision_uuids(neo4j_driver, uuid): """ def get_collection_associated_datasets(neo4j_driver, uuid, property_key = None): results = [] - - # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. - # Still keep it in the code until further decision - Zhou if property_key: query = (f"MATCH (e:Entity)-[:IN_COLLECTION|:USES_DATA]->(c:Collection) " f"WHERE c.uuid = '{uuid}' " @@ -1210,9 +1205,6 @@ def get_dataset_collections(neo4j_driver, uuid, property_key = None, properties_ """ def get_publication_associated_collection(neo4j_driver, uuid): result = {} - - # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. - # Still keep it in the code until further decision - Zhou query = (f"MATCH (p:Publication)-[:USES_DATA]->(c:Collection) " f"WHERE p.uuid = '{uuid}' " f"RETURN c as {record_field_name}") @@ -1229,6 +1221,41 @@ def get_publication_associated_collection(neo4j_driver, uuid): return result + +""" +Get the associated collection for a given publication + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +uuid : str + The uuid of publication +property_key : str + A target property key for result filtering + +Returns +------- +dict + A dictionary representation of the chosen values +""" +def get_collection_associated_publication(neo4j_driver, uuid): + result = {} + query = (f"MATCH (p:Publication)-[:USES_DATA]->(c:Collection) " + f"WHERE c.uuid = '{uuid}' " + f"RETURN {{uuid: p.uuid, hubmap_id: p.hubmap_id, title: p.title}} AS publication") + + logger.info("=====get_collection_associated_publication() query======") + logger.debug(query) + + with neo4j_driver.session() as session: + record = session.run(query).single() + if record: + result = record["publication"] + return result + + + """ Get the associated Upload for a given dataset @@ -2057,8 +2084,6 @@ def delete_ancestor_linkages_tx(neo4j_driver, entity_uuid, ancestor_uuids): The uuid to target publication """ def _delete_publication_associated_collection_linkages_tx(tx, uuid): - # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. - # Still keep it in the code until further decision - Zhou query = (f"MATCH (p:Publication)-[r:USES_DATA]->(c:Collection) " f"WHERE p.uuid = '{uuid}' " f"DELETE r") diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index a23321f9..1e6f3d5d 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -830,6 +830,45 @@ def get_publication_associated_collection(property_key, normalized_type, request return property_key, schema_manager.normalize_entity_result_for_response(collection_dict) +""" +TriggerTypeEnum.ON_READ + +Trigger event method of getting the associated publication for this collection + +Parameters +---------- +property_key : str + The target property key +normalized_type : str + One of the types defined in the schema yaml: Dataset +request_args: ImmutableMultiDict + The Flask request.args passed in from application request +user_token: str + The user's globus nexus token +existing_data_dict : dict + A dictionary that contains all existing entity properties +new_data_dict : dict + A merged dictionary that contains all possible input data to be used + +Returns +------- +str: The target property key +dict: A dictionary representation of the associated collection with all the normalized information +""" +def get_collection_associated_publication(property_key, normalized_type, request_args, user_token, existing_data_dict, new_data_dict): + if 'uuid' not in existing_data_dict: + raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_collection_associated_publication()' trigger method.") + + logger.info(f"Executing 'get_collection_associated_publication()' trigger method on uuid: {existing_data_dict['uuid']}") + + collection_dict = schema_neo4j_queries.get_collection_associated_publication(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid']) + + # Get rid of the entity node properties that are not defined in the yaml schema + # as well as the ones defined as `exposed: false` in the yaml schema + return property_key, collection_dict + + + """ TriggerTypeEnum.ON_READ From 654fa37a61575212d042b7bdf58642e77564e195 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 20 Nov 2025 10:31:31 -0500 Subject: [PATCH 2/5] updated the entity-api-spec yaml for the new associated_publication field --- entity-api-spec.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml index f84dcddb..f443b094 100644 --- a/entity-api-spec.yaml +++ b/entity-api-spec.yaml @@ -937,6 +937,10 @@ components: type: string readOnly: true description: 'The email address of the person or process authenticated when creating the object.' + associated_publication: + type: object + description: 'The publication associated with the given collection' + readOnly: true created_by_user_sub: type: string readOnly: true From f06137fdab2eb46ec2da6fc3bc28ed523f45893f Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 20 Nov 2025 12:19:17 -0500 Subject: [PATCH 3/5] renamed collection_dict as publication_dict. updated returns section in the function documentation to reflect its returning a publication rather than a collection --- src/schema/schema_triggers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 1e6f3d5d..811eed0d 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -853,7 +853,7 @@ def get_publication_associated_collection(property_key, normalized_type, request Returns ------- str: The target property key -dict: A dictionary representation of the associated collection with all the normalized information +dict: A dictionary representation of the associated publication with all the normalized information """ def get_collection_associated_publication(property_key, normalized_type, request_args, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: @@ -861,11 +861,11 @@ def get_collection_associated_publication(property_key, normalized_type, request logger.info(f"Executing 'get_collection_associated_publication()' trigger method on uuid: {existing_data_dict['uuid']}") - collection_dict = schema_neo4j_queries.get_collection_associated_publication(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid']) + publication_dict = schema_neo4j_queries.get_collection_associated_publication(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid']) # Get rid of the entity node properties that are not defined in the yaml schema # as well as the ones defined as `exposed: false` in the yaml schema - return property_key, collection_dict + return property_key, publication_dict From e36fe172f5510b24938fdd3f857ea806f8809131 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 20 Nov 2025 14:02:44 -0500 Subject: [PATCH 4/5] added brackets to collection_publication_uuid in delete_cache to fix a bug --- src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index f0873eb7..5bf8f659 100644 --- a/src/app.py +++ b/src/app.py @@ -5409,7 +5409,7 @@ def delete_cache(entity_uuid, entity_type): publication_collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid) # We only use uuid in the cache key acorss all the cache types - uuids_list = [entity_uuid] + descendant_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids + collection_publication_uuid + uuids_list = [entity_uuid] + descendant_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids + [collection_publication_uuid] # Add to the list if the target dataset has linked upload if dataset_upload_dict: From 5b6c92ee33f85c64c56dfb8733a58d0ea12b15a7 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 21 Nov 2025 14:27:55 -0500 Subject: [PATCH 5/5] reverted delete cache to remove the the unecessary collection associated publication --- src/app.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/app.py b/src/app.py index 5bf8f659..9ef3b201 100644 --- a/src/app.py +++ b/src/app.py @@ -5383,7 +5383,6 @@ def delete_cache(entity_uuid, entity_type): collection_uuids = [] dataset_upload_dict = {} publication_collection_dict = {} - collection_publication_uuid = [] # Determine the associated cache keys based on the entity type # For Donor/Datasets/Sample/Publication, delete the cache of all the descendants @@ -5393,7 +5392,6 @@ def delete_cache(entity_uuid, entity_type): # For Collection/Epicollection, delete the cache for each of its associated datasets (via [:IN_COLLECTION]) if schema_manager.entity_type_instanceof(entity_type, 'Collection'): collection_dataset_uuids = schema_neo4j_queries.get_collection_associated_datasets(neo4j_driver_instance, entity_uuid , 'uuid') - collection_publication_uuid = schema_neo4j_queries.get_collection_associated_publication(neo4j_driver_instance, entity_uuid)['uuid'] # For Upload, delete the cache for each of its associated Datasets (via [:IN_UPLOAD]) if entity_type == 'Upload': @@ -5409,7 +5407,7 @@ def delete_cache(entity_uuid, entity_type): publication_collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid) # We only use uuid in the cache key acorss all the cache types - uuids_list = [entity_uuid] + descendant_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids + [collection_publication_uuid] + uuids_list = [entity_uuid] + descendant_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids # Add to the list if the target dataset has linked upload if dataset_upload_dict: