From 8d4d1ba5d3ca1bf3870b0dddd8b0797dadc6c942 Mon Sep 17 00:00:00 2001 From: Derek Furst Date: Tue, 5 May 2026 00:24:42 -0400 Subject: [PATCH 1/7] wip, adding new supporting endpoints --- src/app.py | 172 +++++++++++++++++++++++++++++++ src/app_neo4j_queries.py | 194 +++++++++++++++++++++++++++++++++++ src/schema/schema_manager.py | 29 ++++++ 3 files changed, 395 insertions(+) diff --git a/src/app.py b/src/app.py index ab832ddc..612bb688 100644 --- a/src/app.py +++ b/src/app.py @@ -1114,6 +1114,178 @@ def get_entities_by_type(entity_type): # Response with the final result return jsonify(final_result) +@app.route('/entities//ancestor-info', methods=['GET']) +def get_ancestor_info(uuid): + validate_token_if_auth_header_exists(request) + include_fields = None + if bool(request.args): + included = request.args.get('include') + if included: + include_fields = [ + f.strip().strip("'").strip('"') + for f in included.split(',') + if f.strip() + ] + valid_fields = set(schema_manager.get_persistent_fields()) + invalid = [f for f in include_fields if f not in valid_fields] + if invalid: + return bad_request_error(f"Invalid include fields: {invalid}") + result = app_neo4j_queries.get_ancestors_trimmed(neo4j_driver_instance, uuid, included_fields=include_fields) + if result is None: + return not_found_error(f"Entity {uuid} not found") + return jsonify(result) + + +@app.route('/entities//descendant-info', methods=['GET']) +def get_descendant_info(uuid): + validate_token_if_auth_header_exists(request) + include_fields = None + if bool(request.args): + included = request.args.get('include') + if included: + include_fields = [ + f.strip().strip("'").strip('"') + for f in included.split(',') + if f.strip() + ] + valid_fields = set(schema_manager.get_persistent_fields()) + invalid = [f for f in include_fields if f not in valid_fields] + if invalid: + return bad_request_error(f"Invalid include fields: {invalid}") + result = app_neo4j_queries.get_descendants_trimmed(neo4j_driver_instance, uuid, included_fields=include_fields) + if result is None: + return not_found_error(f"Entity {uuid} not found") + return jsonify(result) + +@app.route('/entities//parent-info', methods=['GET']) +def get_parent_info(uuid): + validate_token_if_auth_header_exists(request) + included_fields = None + if bool(request.args): + included = request.args.get('include') + if included: + included_fields = [ + f.strip().strip("'").strip('"') + for f in included.split(',') + if f.strip() + ] + valid_fields = set(schema_manager.get_persistent_fields()) + invalid = [f for f in included_fields if f not in valid_fields] + if invalid: + return bad_request_error(f"Invalid include fields: {invalid}") + result = app_neo4j_queries.get_parent_info(neo4j_driver_instance, uuid, included_fields=included_fields) + if result is None: + return not_found_error(f"Entity {uuid} not found") + return jsonify(result) + + +@app.route('/entities//child-info', methods=['GET']) +def get_child_info(uuid): + validate_token_if_auth_header_exists(request) + included_fields = None + if bool(request.args): + included = request.args.get('include') + if included: + included_fields = [ + f.strip().strip("'").strip('"') + for f in included.split(',') + if f.strip() + ] + valid_fields = set(schema_manager.get_persistent_fields()) + invalid = [f for f in included_fields if f not in valid_fields] + if invalid: + return bad_request_error(f"Invalid include fields: {invalid}") + result = app_neo4j_queries.get_child_info(neo4j_driver_instance, uuid, included_fields=included_fields) + if result is None: + return not_found_error(f"Entity {uuid} not found") + return jsonify(result) + + +@app.route('/entities//donor-info', methods=['GET']) +def get_donor_info(uuid): + validate_token_if_auth_header_exists(request) + result = app_neo4j_queries.get_donor_info(neo4j_driver_instance, uuid) + if result is None: + return not_found_error(f"Entity {uuid} not found") + return jsonify(result) + +@app.route('/entities//origin-info', methods=['GET']) +def get_origin_samples(uuid): + validate_token_if_auth_header_exists(request) + result = app_neo4j_queries.get_origin_samples(neo4j_driver_instance, uuid) + if result is None: + return not_found_error(f"Entity {uuid} not found") + return jsonify(result) + + +@app.route('/entities//source-info', methods=['GET']) +def get_source_samples(uuid): + validate_token_if_auth_header_exists(request) + result = app_neo4j_queries.get_source_samples(neo4j_driver_instance, uuid) + if result is None: + return not_found_error(f"Entity {uuid} not found") + return jsonify(result) + +""" +Retrieve processed dataset documents associated with a collection or upload + +Parameters +---------- +uuid : str + The UUID of the target entity (Collection, Epicollection, or Upload) + +Returns +------- +json + A JSON object mapping dataset UUIDs to their processed document representations. + Each dataset is enriched via the trigger pipeline (ON_INDEX), normalized for response, + and stripped of selected large or unnecessary fields (e.g., ingest_metadata, metadata, files). + Returns a 404 error if the entity is not found. +""" +@app.route('/entities//dataset-documents', methods=['GET']) +def get_dataset_documents(uuid): + validate_token_if_auth_header_exists(request) + token = get_internal_token() + excluded_fields = None + if bool(request.args): + excluded = request.args.get('exclude') + if excluded: + excluded_fields = [ + f.strip().strip("'").strip('"') + for f in excluded.split(',') + if f.strip() + ] + + # This is a validation step. Because we're allowing excluded fields to be passed from search-api, + # we want to minimally at least make sure these are real property names before using them for + # querying neo4j. + valid_fields = set(schema_manager.get_persistent_fields()) + invalid = [f for f in excluded_fields if f not in valid_fields] + if invalid: + return bad_request_error(f"Invalid excluded fields: {invalid}") + + entity_record = app_neo4j_queries.get_dataset_documents_raw(neo4j_driver_instance, uuid, excluded_fields=excluded_fields) + if entity_record is None: + return not_found_error(f"Entity {uuid} not found") + + result = {} + for dataset_uuid, entity_dict in entity_record.items(): + try: + complete = schema_manager.remove_none_values({**entity_dict}) + final = schema_manager.normalize_document_result_for_response(entity_dict=complete) + for field in ['ingest_metadata', 'metadata', 'files']: + final.pop(field, None) + result[dataset_uuid] = final + except Exception as e: + logger.error(f"Failed to process document for {dataset_uuid}: {e}") + continue + + resp_body = json.dumps(result).encode('utf-8') + try_resp = try_stash_response_body(resp_body) + if try_resp is not None: + return try_resp + return jsonify(result) + """ Create an entity of the target type in neo4j diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 2975c908..df7d3d90 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -12,6 +12,12 @@ # The filed name of the single result record record_field_name = 'result' +TRIMMED_ENTITY_FIELDS = [ + 'uuid', 'hubmap_id', 'entity_type', 'dataset_type', 'rui_location', + 'group_uuid', 'group_name', 'last_modified_timestamp', + 'created_by_user_displayname', 'thumbnail_file', 'sample_category', + 'organ', 'data_access_level', 'status' +] #################################################################################################### ## Directly called by app.py @@ -99,6 +105,194 @@ def get_entities_by_type(neo4j_driver, entity_type, property_key = None): return results +def get_ancestors_trimmed(neo4j_driver, uuid, included_fields): + with neo4j_driver.session() as session: + record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + OPTIONAL MATCH (e)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(a:Entity) + WHERE a.entity_type <> 'Lab' + WITH apoc.coll.toSet(COLLECT(a)) AS ancestors + RETURN [a IN ancestors | a { %s }] AS result + """ % ', '.join(f'.{f}' for f in included_fields), + uuid=uuid).single() + if record is None: + return None + return [dict(a) for a in (record['result'] or [])] + + +def get_descendants_trimmed(neo4j_driver, uuid, included_fields): + with neo4j_driver.session() as session: + record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + OPTIONAL MATCH (e)-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]->(d:Entity) + WITH apoc.coll.toSet(COLLECT(d)) AS descendants + RETURN [d IN descendants | d { %s }] AS result + """ % ', '.join(f'.{f}' for f in included_fields), + uuid=uuid).single() + if record is None: + return None + return [dict(d) for d in (record['result'] or [])] + + +def get_parent_info(neo4j_driver, uuid, included_fields=None): + with neo4j_driver.session() as session: + projection = 'properties(p)' + if included_fields: + projection = 'p { %s }' % ', '.join(f'.{f}' for f in included_fields) + + record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + OPTIONAL MATCH (e)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(p:Entity) + WHERE p.entity_type <> 'Lab' + WITH apoc.coll.toSet(COLLECT(p)) AS parents + RETURN [p IN parents | %s] AS result + """ % projection, uuid=uuid).single() + + if record is None: + return None + return [dict(p) for p in (record['result'] or [])] + + +def get_child_info(neo4j_driver, uuid, included_fields=None): + with neo4j_driver.session() as session: + projection = 'properties(c)' + if included_fields: + projection = 'c { %s }' % ', '.join(f'.{f}' for f in included_fields) + + record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + OPTIONAL MATCH (e)-[:ACTIVITY_INPUT]->(:Activity)-[:ACTIVITY_OUTPUT]->(c:Entity) + WITH apoc.coll.toSet(COLLECT(c)) AS children + RETURN [c IN children | %s] AS result + """ % projection, uuid=uuid).single() + + if record is None: + return None + return [dict(c) for c in (record['result'] or [])] + +def get_donor_info(neo4j_driver, uuid): + with neo4j_driver.session() as session: + entity_record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + RETURN e.uuid AS uuid + """, uuid=uuid).single() + if entity_record is None: + return None + + record = session.run(""" + MATCH (e:Entity {uuid: $uuid})<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(d:Donor) + WITH COLLECT(DISTINCT d) AS donors + RETURN [d IN donors | properties(d)] AS donors + """, uuid=uuid).single() + + return [dict(d) for d in (record['donors'] or [])] + +def get_origin_samples(neo4j_driver, uuid): + with neo4j_driver.session() as session: + entity_record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + RETURN e.uuid AS uuid + """, uuid=uuid).single() + if entity_record is None: + return None + + record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + OPTIONAL MATCH (e)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Entity) + WHERE s.entity_type = 'Sample' + AND s.sample_category IS NOT NULL + AND toLower(s.sample_category) = 'organ' + AND s.organ IS NOT NULL + AND trim(s.organ) <> '' + RETURN apoc.coll.toSet(COLLECT(properties(s))) AS origin_samples + """, uuid=uuid).single() + + return [dict(s) for s in (record['origin_samples'] or [])] + + +def get_source_samples(neo4j_driver, uuid): + with neo4j_driver.session() as session: + entity_record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + RETURN e.uuid AS uuid + """, uuid=uuid).single() + if entity_record is None: + return None + + record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + MATCH (e)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Entity {entity_type: 'Sample'}) + WHERE NOT EXISTS { + MATCH (s)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(closer:Entity {entity_type: 'Sample'}) + MATCH (closer)-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]->(e) + } + RETURN apoc.coll.toSet(COLLECT(properties(s))) AS source_samples + """, uuid=uuid).single() + + return [dict(s) for s in (record['source_samples'] or [])] + + +""" +Retrieve dataset documents associated with a collection or upload + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The Neo4j database connection pool +uuid : str + The UUID of the target entity (Collection, Epicollection, or Upload) + +Returns +------- +dict + A dictionary mapping dataset UUIDs to their node properties for all datasets + directly linked to the given entity via the appropriate relationship + (IN_COLLECTION or IN_UPLOAD). Returns an empty dictionary if no datasets + are found, or None if the input UUID does not correspond to a supported + entity type. +""" +def get_dataset_documents_raw(neo4j_driver, uuid, excluded_fields=None): + if excluded_fields is None: + excluded_fields = [] + + with neo4j_driver.session() as session: + entity_record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + RETURN e.entity_type AS entity_type + """, uuid=uuid).single() + + if not entity_record: + return None + + entity_type = entity_record["entity_type"] + + if entity_type in ['Collection', 'Epicollection']: + relationship = 'IN_COLLECTION' + root_label = 'Collection' + elif entity_type == 'Upload': + relationship = 'IN_UPLOAD' + root_label = 'Upload' + else: + return None + + projection = "d { .* }" + + if excluded_fields: + null_projection = ", ".join(f"{field}: NULL" for field in excluded_fields) + projection = f"d {{ .*, {null_projection} }}" + + record = session.run(""" + MATCH (root:%s {uuid: $uuid})<-[:%s]-(d:Dataset) + RETURN apoc.map.fromPairs(COLLECT([d.uuid, %s])) AS result + """ % (root_label, relationship, projection), uuid=uuid).single() + + if not record or not record["result"]: + return {} + + return {uuid: dict(props) for uuid, props in record["result"].items()} + + + """ Determine if given dataset has componet children diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 075b5fea..a7e55595 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -577,6 +577,35 @@ def get_exclusion_types(normalized_entity_type, flat_list): return triggered_top_props_to_skip, neo4j_top_props_to_skip, neo4j_nested_props_to_skip + +""" +Get all non-transient properties across all entity types + +Parameters +---------- +None + +Returns +------- +list + A list of property names defined in the provenance schema where transient != True +""" +def get_persistent_fields(): + global _schema + + persistent_fields = set() + + for entity in _schema['ENTITIES'].values(): + properties = entity.get('properties', {}) + + for field, props in properties.items(): + if not props.get('transient', False): + persistent_fields.add(field) + + return list(persistent_fields) + + + """ Generating triggered data based on the target events and methods From 1699613fa8be47a0512e36be26c7c26c02899d7a Mon Sep 17 00:00:00 2001 From: Derek Furst Date: Fri, 8 May 2026 11:38:53 -0400 Subject: [PATCH 2/7] changed endpoint paths to better match search-api's call_entity function. Tweaked some app neo4j queries for performance --- src/app.py | 57 +++++++++++++++++++++------------------- src/app_neo4j_queries.py | 25 +++++++++++------- 2 files changed, 45 insertions(+), 37 deletions(-) diff --git a/src/app.py b/src/app.py index 612bb688..ea0d65af 100644 --- a/src/app.py +++ b/src/app.py @@ -1114,7 +1114,7 @@ def get_entities_by_type(entity_type): # Response with the final result return jsonify(final_result) -@app.route('/entities//ancestor-info', methods=['GET']) +@app.route('/ancestor-info/', methods=['GET']) def get_ancestor_info(uuid): validate_token_if_auth_header_exists(request) include_fields = None @@ -1133,10 +1133,13 @@ def get_ancestor_info(uuid): result = app_neo4j_queries.get_ancestors_trimmed(neo4j_driver_instance, uuid, included_fields=include_fields) if result is None: return not_found_error(f"Entity {uuid} not found") - return jsonify(result) + cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] + complete = [schema_manager.normalize_document_result_for_response(entity) for entity in cleaned_result] + ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] + return jsonify(ordered_response) -@app.route('/entities//descendant-info', methods=['GET']) +@app.route('/descendant-info/', methods=['GET']) def get_descendant_info(uuid): validate_token_if_auth_header_exists(request) include_fields = None @@ -1155,9 +1158,12 @@ def get_descendant_info(uuid): result = app_neo4j_queries.get_descendants_trimmed(neo4j_driver_instance, uuid, included_fields=include_fields) if result is None: return not_found_error(f"Entity {uuid} not found") - return jsonify(result) + cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] + complete = [schema_manager.normalize_document_result_for_response(entity) for entity in cleaned_result] + ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] + return jsonify(ordered_response) -@app.route('/entities//parent-info', methods=['GET']) +@app.route('/parent-info/', methods=['GET']) def get_parent_info(uuid): validate_token_if_auth_header_exists(request) included_fields = None @@ -1176,10 +1182,13 @@ def get_parent_info(uuid): result = app_neo4j_queries.get_parent_info(neo4j_driver_instance, uuid, included_fields=included_fields) if result is None: return not_found_error(f"Entity {uuid} not found") - return jsonify(result) + cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] + complete = [schema_manager.normalize_document_result_for_response(entity) for entity in cleaned_result] + ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] + return jsonify(ordered_response) -@app.route('/entities//child-info', methods=['GET']) +@app.route('/child-info/', methods=['GET']) def get_child_info(uuid): validate_token_if_auth_header_exists(request) included_fields = None @@ -1198,27 +1207,12 @@ def get_child_info(uuid): result = app_neo4j_queries.get_child_info(neo4j_driver_instance, uuid, included_fields=included_fields) if result is None: return not_found_error(f"Entity {uuid} not found") - return jsonify(result) - - -@app.route('/entities//donor-info', methods=['GET']) -def get_donor_info(uuid): - validate_token_if_auth_header_exists(request) - result = app_neo4j_queries.get_donor_info(neo4j_driver_instance, uuid) - if result is None: - return not_found_error(f"Entity {uuid} not found") - return jsonify(result) + cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] + complete = [schema_manager.normalize_document_result_for_response(entity) for entity in cleaned_result] + ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] + return jsonify(ordered_response) -@app.route('/entities//origin-info', methods=['GET']) -def get_origin_samples(uuid): - validate_token_if_auth_header_exists(request) - result = app_neo4j_queries.get_origin_samples(neo4j_driver_instance, uuid) - if result is None: - return not_found_error(f"Entity {uuid} not found") - return jsonify(result) - - -@app.route('/entities//source-info', methods=['GET']) +@app.route('/source-info/', methods=['GET']) def get_source_samples(uuid): validate_token_if_auth_header_exists(request) result = app_neo4j_queries.get_source_samples(neo4j_driver_instance, uuid) @@ -1226,6 +1220,15 @@ def get_source_samples(uuid): return not_found_error(f"Entity {uuid} not found") return jsonify(result) +def alphabetize_dict_recursive(obj): + if isinstance(obj, dict): + return {k: alphabetize_dict_recursive(obj[k]) for k in sorted(obj.keys())} + elif isinstance(obj, list): + return [alphabetize_dict_recursive(item) for item in obj] + else: + return obj + + """ Retrieve processed dataset documents associated with a collection or upload diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index df7d3d90..340cdc00 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -198,9 +198,8 @@ def get_origin_samples(neo4j_driver, uuid): record = session.run(""" MATCH (e:Entity {uuid: $uuid}) - OPTIONAL MATCH (e)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Entity) - WHERE s.entity_type = 'Sample' - AND s.sample_category IS NOT NULL + OPTIONAL MATCH (e)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Sample) + WHERE s.sample_category IS NOT NULL AND toLower(s.sample_category) = 'organ' AND s.organ IS NOT NULL AND trim(s.organ) <> '' @@ -220,18 +219,24 @@ def get_source_samples(neo4j_driver, uuid): return None record = session.run(""" - MATCH (e:Entity {uuid: $uuid}) - MATCH (e)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Entity {entity_type: 'Sample'}) - WHERE NOT EXISTS { - MATCH (s)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(closer:Entity {entity_type: 'Sample'}) - MATCH (closer)-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]->(e) - } + MATCH (e:Dataset {uuid: $uuid}) + CALL apoc.path.expandConfig(e, { + relationshipFilter: " Date: Fri, 8 May 2026 18:07:00 -0400 Subject: [PATCH 3/7] tweaked reindex resource endpoints to be more consistent --- src/app.py | 24 ++++++++++++------------ src/app_neo4j_queries.py | 4 ++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/app.py b/src/app.py index ea0d65af..be7180cc 100644 --- a/src/app.py +++ b/src/app.py @@ -1114,8 +1114,8 @@ def get_entities_by_type(entity_type): # Response with the final result return jsonify(final_result) -@app.route('/ancestor-info/', methods=['GET']) -def get_ancestor_info(uuid): +@app.route('/ancestors-info/', methods=['GET']) +def get_ancestors_info(uuid): validate_token_if_auth_header_exists(request) include_fields = None if bool(request.args): @@ -1139,8 +1139,8 @@ def get_ancestor_info(uuid): return jsonify(ordered_response) -@app.route('/descendant-info/', methods=['GET']) -def get_descendant_info(uuid): +@app.route('/descendants-info/', methods=['GET']) +def get_descendants_info(uuid): validate_token_if_auth_header_exists(request) include_fields = None if bool(request.args): @@ -1163,8 +1163,8 @@ def get_descendant_info(uuid): ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] return jsonify(ordered_response) -@app.route('/parent-info/', methods=['GET']) -def get_parent_info(uuid): +@app.route('/parents-info/', methods=['GET']) +def get_parents_info(uuid): validate_token_if_auth_header_exists(request) included_fields = None if bool(request.args): @@ -1179,7 +1179,7 @@ def get_parent_info(uuid): invalid = [f for f in included_fields if f not in valid_fields] if invalid: return bad_request_error(f"Invalid include fields: {invalid}") - result = app_neo4j_queries.get_parent_info(neo4j_driver_instance, uuid, included_fields=included_fields) + result = app_neo4j_queries.get_parents_info(neo4j_driver_instance, uuid, included_fields=included_fields) if result is None: return not_found_error(f"Entity {uuid} not found") cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] @@ -1188,8 +1188,8 @@ def get_parent_info(uuid): return jsonify(ordered_response) -@app.route('/child-info/', methods=['GET']) -def get_child_info(uuid): +@app.route('/children-info/', methods=['GET']) +def get_children_info(uuid): validate_token_if_auth_header_exists(request) included_fields = None if bool(request.args): @@ -1204,7 +1204,7 @@ def get_child_info(uuid): invalid = [f for f in included_fields if f not in valid_fields] if invalid: return bad_request_error(f"Invalid include fields: {invalid}") - result = app_neo4j_queries.get_child_info(neo4j_driver_instance, uuid, included_fields=included_fields) + result = app_neo4j_queries.get_children_info(neo4j_driver_instance, uuid, included_fields=included_fields) if result is None: return not_found_error(f"Entity {uuid} not found") cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] @@ -1212,8 +1212,8 @@ def get_child_info(uuid): ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] return jsonify(ordered_response) -@app.route('/source-info/', methods=['GET']) -def get_source_samples(uuid): +@app.route('/sources-info/', methods=['GET']) +def get_sources_info(uuid): validate_token_if_auth_header_exists(request) result = app_neo4j_queries.get_source_samples(neo4j_driver_instance, uuid) if result is None: diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 340cdc00..18dd65b9 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -134,7 +134,7 @@ def get_descendants_trimmed(neo4j_driver, uuid, included_fields): return [dict(d) for d in (record['result'] or [])] -def get_parent_info(neo4j_driver, uuid, included_fields=None): +def get_parents_info(neo4j_driver, uuid, included_fields=None): with neo4j_driver.session() as session: projection = 'properties(p)' if included_fields: @@ -153,7 +153,7 @@ def get_parent_info(neo4j_driver, uuid, included_fields=None): return [dict(p) for p in (record['result'] or [])] -def get_child_info(neo4j_driver, uuid, included_fields=None): +def get_children_info(neo4j_driver, uuid, included_fields=None): with neo4j_driver.session() as session: projection = 'properties(c)' if included_fields: From 5d36ee4554bda8b8ce3df38203f218e0bcae7661 Mon Sep 17 00:00:00 2001 From: Derek Furst Date: Thu, 14 May 2026 15:36:44 -0400 Subject: [PATCH 4/7] tweaked some of the new reindex endpoints to align with the changes on search-api --- src/app.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index be7180cc..54aa57f2 100644 --- a/src/app.py +++ b/src/app.py @@ -1218,7 +1218,32 @@ def get_sources_info(uuid): result = app_neo4j_queries.get_source_samples(neo4j_driver_instance, uuid) if result is None: return not_found_error(f"Entity {uuid} not found") - return jsonify(result) + cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] + complete = [schema_manager.normalize_document_result_for_response(entity) for entity in cleaned_result] + ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] + return jsonify(ordered_response) + +@app.route('/origins-info/', methods=['GET']) +def get_origin_info(uuid): + validate_token_if_auth_header_exists(request) + result = app_neo4j_queries.get_origin_samples(neo4j_driver_instance, uuid) + if result is None: + return not_found_error(f"Entity {uuid} not found") + cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] + complete = [schema_manager.normalize_document_result_for_response(entity) for entity in cleaned_result] + ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] + return jsonify(ordered_response) + +@app.route('/donors-info/', methods=['GET']) +def get_donors_info(uuid): + validate_token_if_auth_header_exists(request) + result = app_neo4j_queries.get_donor_info(neo4j_driver_instance, uuid) + if result is None: + return not_found_error(f"Entity {uuid} not found") + cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] + complete = [schema_manager.normalize_document_result_for_response(entity) for entity in cleaned_result] + ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] + return jsonify(ordered_response) def alphabetize_dict_recursive(obj): if isinstance(obj, dict): From e358c10e89a06c78c272179925a9ca764b8dcd85 Mon Sep 17 00:00:00 2001 From: Derek Furst Date: Tue, 19 May 2026 15:39:21 -0400 Subject: [PATCH 5/7] added error handling for when the ancestors-info or descendants-info are hit without included fields. This differs from children and parents which default to the full data because those endpoints are considerably less expensive --- src/app.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/app.py b/src/app.py index 54aa57f2..c48a6cb2 100644 --- a/src/app.py +++ b/src/app.py @@ -1130,6 +1130,10 @@ def get_ancestors_info(uuid): invalid = [f for f in include_fields if f not in valid_fields] if invalid: return bad_request_error(f"Invalid include fields: {invalid}") + else: + return bad_request_error(f"Missing required parameter: 'include'. Must include a list of properties to be returned.") + else: + return bad_request_error(f"Missing required parameter: 'include'. Must include a list of properties to be returned.") result = app_neo4j_queries.get_ancestors_trimmed(neo4j_driver_instance, uuid, included_fields=include_fields) if result is None: return not_found_error(f"Entity {uuid} not found") @@ -1155,6 +1159,10 @@ def get_descendants_info(uuid): invalid = [f for f in include_fields if f not in valid_fields] if invalid: return bad_request_error(f"Invalid include fields: {invalid}") + else: + return bad_request_error(f"Missing required parameter: 'include'. Must include a list of properties to be returned.") + else: + return bad_request_error(f"Missing required parameter: 'include'. Must include a list of properties to be returned.") result = app_neo4j_queries.get_descendants_trimmed(neo4j_driver_instance, uuid, included_fields=include_fields) if result is None: return not_found_error(f"Entity {uuid} not found") From f796b844e73bf0392e4590adec844377f128ab74 Mon Sep 17 00:00:00 2001 From: Derek Furst Date: Tue, 19 May 2026 17:34:46 -0400 Subject: [PATCH 6/7] added comment descriptions for each of the new endpoints created for the new reindex procedure --- src/app.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/src/app.py b/src/app.py index c48a6cb2..66cef3dd 100644 --- a/src/app.py +++ b/src/app.py @@ -1114,6 +1114,24 @@ def get_entities_by_type(entity_type): # Response with the final result return jsonify(final_result) + +""" +Retrieve the document info needed for a given entity's ancestors. Result filtering for this +endpoint is required and is given by the required parameter 'include'. +For example /ancestors-info?include=uuid,status,entity_type + + +Parameters +---------- +include : str + A comma delimited string of all the properties to be retrieved by the endpoint + +Returns +------- +json + A list of dicts where each dict contains the requested fields for the given ancestor. +""" + @app.route('/ancestors-info/', methods=['GET']) def get_ancestors_info(uuid): validate_token_if_auth_header_exists(request) @@ -1143,6 +1161,22 @@ def get_ancestors_info(uuid): return jsonify(ordered_response) +""" +Retrieve the document info needed for a given entity's descendant. Result filtering for this +endpoint is required and is given by the required parameter 'include'. +For example /descendants-info?include=uuid,status,entity_type + + +Parameters +---------- +include : str + A comma delimited string of all the properties to be retrieved by the endpoint + +Returns +------- +json + A list of dicts where each dict contains the requested fields for the given descendant. +""" @app.route('/descendants-info/', methods=['GET']) def get_descendants_info(uuid): validate_token_if_auth_header_exists(request) @@ -1171,6 +1205,23 @@ def get_descendants_info(uuid): ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] return jsonify(ordered_response) + +""" +Retrieve the document info needed for a given entity's parents (immediate ancestors). Result filtering for this +endpoint is allowed and is given by the required parameter 'include'. +For example /parents-info?include=uuid,status,entity_type + + +Parameters +---------- +include : str + A comma delimited string of all the properties to be retrieved by the endpoint + +Returns +------- +json + A list of dicts where each dict contains the requested fields for the given parent. +""" @app.route('/parents-info/', methods=['GET']) def get_parents_info(uuid): validate_token_if_auth_header_exists(request) @@ -1196,6 +1247,22 @@ def get_parents_info(uuid): return jsonify(ordered_response) +""" +Retrieve the document info needed for a given entity's children (immediate descendants). Result filtering for this +endpoint is allowed and is given by the required parameter 'include'. +For example /children-info?include=uuid,status,entity_type + + +Parameters +---------- +include : str + A comma delimited string of all the properties to be retrieved by the endpoint + +Returns +------- +json + A list of dicts where each dict contains the requested fields for the given child. +""" @app.route('/children-info/', methods=['GET']) def get_children_info(uuid): validate_token_if_auth_header_exists(request) From 701bb0094ba896658704828facd9e1d9801c65fa Mon Sep 17 00:00:00 2001 From: Derek Furst Date: Thu, 21 May 2026 16:39:19 -0400 Subject: [PATCH 7/7] changed get_dataset_documents to use an included fields model instead of excluded like the rest --- src/app.py | 33 +++++++++++++++++++-------------- src/app_neo4j_queries.py | 18 ++++++------------ 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/src/app.py b/src/app.py index 66cef3dd..60a5f960 100644 --- a/src/app.py +++ b/src/app.py @@ -1349,25 +1349,30 @@ def alphabetize_dict_recursive(obj): def get_dataset_documents(uuid): validate_token_if_auth_header_exists(request) token = get_internal_token() - excluded_fields = None + include_fields = None if bool(request.args): - excluded = request.args.get('exclude') - if excluded: - excluded_fields = [ + included = request.args.get('include') + if included: + include_fields = [ f.strip().strip("'").strip('"') - for f in excluded.split(',') + for f in included.split(',') if f.strip() ] + # Validation step to ensure fields are real property names + valid_fields = set(schema_manager.get_persistent_fields()) + invalid = [f for f in include_fields if f not in valid_fields] + if invalid: + return bad_request_error(f"Invalid include fields: {invalid}") + else: + return bad_request_error("Missing required parameter: 'include'. Must include a list of properties to be returned.") + else: + return bad_request_error("Missing required parameter: 'include'. Must include a list of properties to be returned.") - # This is a validation step. Because we're allowing excluded fields to be passed from search-api, - # we want to minimally at least make sure these are real property names before using them for - # querying neo4j. - valid_fields = set(schema_manager.get_persistent_fields()) - invalid = [f for f in excluded_fields if f not in valid_fields] - if invalid: - return bad_request_error(f"Invalid excluded fields: {invalid}") - - entity_record = app_neo4j_queries.get_dataset_documents_raw(neo4j_driver_instance, uuid, excluded_fields=excluded_fields) + entity_record = app_neo4j_queries.get_dataset_documents_raw( + neo4j_driver_instance, + uuid, + included_fields=include_fields + ) if entity_record is None: return not_found_error(f"Entity {uuid} not found") diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 18dd65b9..33782290 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -256,10 +256,7 @@ def get_source_samples(neo4j_driver, uuid): are found, or None if the input UUID does not correspond to a supported entity type. """ -def get_dataset_documents_raw(neo4j_driver, uuid, excluded_fields=None): - if excluded_fields is None: - excluded_fields = [] - +def get_dataset_documents_raw(neo4j_driver, uuid, included_fields): with neo4j_driver.session() as session: entity_record = session.run(""" MATCH (e:Entity {uuid: $uuid}) @@ -279,22 +276,19 @@ def get_dataset_documents_raw(neo4j_driver, uuid, excluded_fields=None): root_label = 'Upload' else: return None - - projection = "d { .* }" - if excluded_fields: - null_projection = ", ".join(f"{field}: NULL" for field in excluded_fields) - projection = f"d {{ .*, {null_projection} }}" record = session.run(""" MATCH (root:%s {uuid: $uuid})<-[:%s]-(d:Dataset) - RETURN apoc.map.fromPairs(COLLECT([d.uuid, %s])) AS result - """ % (root_label, relationship, projection), uuid=uuid).single() + WITH apoc.coll.toSet(COLLECT(d)) AS datasets + RETURN [d IN datasets | d { %s }] AS result + """ % (root_label, relationship, ', '.join(f'.{f}' for f in included_fields)), + uuid=uuid).single() if not record or not record["result"]: return {} - return {uuid: dict(props) for uuid, props in record["result"].items()} + return {d['uuid']: dict(d) for d in record["result"]}