diff --git a/src/app.py b/src/app.py index ab832ddc..60a5f960 100644 --- a/src/app.py +++ b/src/app.py @@ -1114,6 +1114,286 @@ def get_entities_by_type(entity_type): # Response with the final result return jsonify(final_result) + +""" +Retrieve the document info needed for a given entity's ancestors. Result filtering for this +endpoint is required and is given by the required parameter 'include'. +For example /ancestors-info?include=uuid,status,entity_type + + +Parameters +---------- +include : str + A comma delimited string of all the properties to be retrieved by the endpoint + +Returns +------- +json + A list of dicts where each dict contains the requested fields for the given ancestor. +""" + +@app.route('/ancestors-info/', methods=['GET']) +def get_ancestors_info(uuid): + validate_token_if_auth_header_exists(request) + include_fields = None + if bool(request.args): + included = request.args.get('include') + if included: + include_fields = [ + f.strip().strip("'").strip('"') + for f in included.split(',') + if f.strip() + ] + valid_fields = set(schema_manager.get_persistent_fields()) + invalid = [f for f in include_fields if f not in valid_fields] + if invalid: + return bad_request_error(f"Invalid include fields: {invalid}") + else: + return bad_request_error(f"Missing required parameter: 'include'. Must include a list of properties to be returned.") + else: + return bad_request_error(f"Missing required parameter: 'include'. Must include a list of properties to be returned.") + result = app_neo4j_queries.get_ancestors_trimmed(neo4j_driver_instance, uuid, included_fields=include_fields) + if result is None: + return not_found_error(f"Entity {uuid} not found") + cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] + complete = [schema_manager.normalize_document_result_for_response(entity) for entity in cleaned_result] + ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] + return jsonify(ordered_response) + + +""" +Retrieve the document info needed for a given entity's descendant. Result filtering for this +endpoint is required and is given by the required parameter 'include'. +For example /descendants-info?include=uuid,status,entity_type + + +Parameters +---------- +include : str + A comma delimited string of all the properties to be retrieved by the endpoint + +Returns +------- +json + A list of dicts where each dict contains the requested fields for the given descendant. +""" +@app.route('/descendants-info/', methods=['GET']) +def get_descendants_info(uuid): + validate_token_if_auth_header_exists(request) + include_fields = None + if bool(request.args): + included = request.args.get('include') + if included: + include_fields = [ + f.strip().strip("'").strip('"') + for f in included.split(',') + if f.strip() + ] + valid_fields = set(schema_manager.get_persistent_fields()) + invalid = [f for f in include_fields if f not in valid_fields] + if invalid: + return bad_request_error(f"Invalid include fields: {invalid}") + else: + return bad_request_error(f"Missing required parameter: 'include'. Must include a list of properties to be returned.") + else: + return bad_request_error(f"Missing required parameter: 'include'. Must include a list of properties to be returned.") + result = app_neo4j_queries.get_descendants_trimmed(neo4j_driver_instance, uuid, included_fields=include_fields) + if result is None: + return not_found_error(f"Entity {uuid} not found") + cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] + complete = [schema_manager.normalize_document_result_for_response(entity) for entity in cleaned_result] + ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] + return jsonify(ordered_response) + + +""" +Retrieve the document info needed for a given entity's parents (immediate ancestors). Result filtering for this +endpoint is allowed and is given by the required parameter 'include'. +For example /parents-info?include=uuid,status,entity_type + + +Parameters +---------- +include : str + A comma delimited string of all the properties to be retrieved by the endpoint + +Returns +------- +json + A list of dicts where each dict contains the requested fields for the given parent. +""" +@app.route('/parents-info/', methods=['GET']) +def get_parents_info(uuid): + validate_token_if_auth_header_exists(request) + included_fields = None + if bool(request.args): + included = request.args.get('include') + if included: + included_fields = [ + f.strip().strip("'").strip('"') + for f in included.split(',') + if f.strip() + ] + valid_fields = set(schema_manager.get_persistent_fields()) + invalid = [f for f in included_fields if f not in valid_fields] + if invalid: + return bad_request_error(f"Invalid include fields: {invalid}") + result = app_neo4j_queries.get_parents_info(neo4j_driver_instance, uuid, included_fields=included_fields) + if result is None: + return not_found_error(f"Entity {uuid} not found") + cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] + complete = [schema_manager.normalize_document_result_for_response(entity) for entity in cleaned_result] + ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] + return jsonify(ordered_response) + + +""" +Retrieve the document info needed for a given entity's children (immediate descendants). Result filtering for this +endpoint is allowed and is given by the required parameter 'include'. +For example /children-info?include=uuid,status,entity_type + + +Parameters +---------- +include : str + A comma delimited string of all the properties to be retrieved by the endpoint + +Returns +------- +json + A list of dicts where each dict contains the requested fields for the given child. +""" +@app.route('/children-info/', methods=['GET']) +def get_children_info(uuid): + validate_token_if_auth_header_exists(request) + included_fields = None + if bool(request.args): + included = request.args.get('include') + if included: + included_fields = [ + f.strip().strip("'").strip('"') + for f in included.split(',') + if f.strip() + ] + valid_fields = set(schema_manager.get_persistent_fields()) + invalid = [f for f in included_fields if f not in valid_fields] + if invalid: + return bad_request_error(f"Invalid include fields: {invalid}") + result = app_neo4j_queries.get_children_info(neo4j_driver_instance, uuid, included_fields=included_fields) + if result is None: + return not_found_error(f"Entity {uuid} not found") + cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] + complete = [schema_manager.normalize_document_result_for_response(entity) for entity in cleaned_result] + ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] + return jsonify(ordered_response) + +@app.route('/sources-info/', methods=['GET']) +def get_sources_info(uuid): + validate_token_if_auth_header_exists(request) + result = app_neo4j_queries.get_source_samples(neo4j_driver_instance, uuid) + if result is None: + return not_found_error(f"Entity {uuid} not found") + cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] + complete = [schema_manager.normalize_document_result_for_response(entity) for entity in cleaned_result] + ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] + return jsonify(ordered_response) + +@app.route('/origins-info/', methods=['GET']) +def get_origin_info(uuid): + validate_token_if_auth_header_exists(request) + result = app_neo4j_queries.get_origin_samples(neo4j_driver_instance, uuid) + if result is None: + return not_found_error(f"Entity {uuid} not found") + cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] + complete = [schema_manager.normalize_document_result_for_response(entity) for entity in cleaned_result] + ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] + return jsonify(ordered_response) + +@app.route('/donors-info/', methods=['GET']) +def get_donors_info(uuid): + validate_token_if_auth_header_exists(request) + result = app_neo4j_queries.get_donor_info(neo4j_driver_instance, uuid) + if result is None: + return not_found_error(f"Entity {uuid} not found") + cleaned_result = [schema_manager.remove_none_values(entity) for entity in result] + complete = [schema_manager.normalize_document_result_for_response(entity) for entity in cleaned_result] + ordered_response = [alphabetize_dict_recursive(entity) for entity in complete] + return jsonify(ordered_response) + +def alphabetize_dict_recursive(obj): + if isinstance(obj, dict): + return {k: alphabetize_dict_recursive(obj[k]) for k in sorted(obj.keys())} + elif isinstance(obj, list): + return [alphabetize_dict_recursive(item) for item in obj] + else: + return obj + + +""" +Retrieve processed dataset documents associated with a collection or upload + +Parameters +---------- +uuid : str + The UUID of the target entity (Collection, Epicollection, or Upload) + +Returns +------- +json + A JSON object mapping dataset UUIDs to their processed document representations. + Each dataset is enriched via the trigger pipeline (ON_INDEX), normalized for response, + and stripped of selected large or unnecessary fields (e.g., ingest_metadata, metadata, files). + Returns a 404 error if the entity is not found. +""" +@app.route('/entities//dataset-documents', methods=['GET']) +def get_dataset_documents(uuid): + validate_token_if_auth_header_exists(request) + token = get_internal_token() + include_fields = None + if bool(request.args): + included = request.args.get('include') + if included: + include_fields = [ + f.strip().strip("'").strip('"') + for f in included.split(',') + if f.strip() + ] + # Validation step to ensure fields are real property names + valid_fields = set(schema_manager.get_persistent_fields()) + invalid = [f for f in include_fields if f not in valid_fields] + if invalid: + return bad_request_error(f"Invalid include fields: {invalid}") + else: + return bad_request_error("Missing required parameter: 'include'. Must include a list of properties to be returned.") + else: + return bad_request_error("Missing required parameter: 'include'. Must include a list of properties to be returned.") + + entity_record = app_neo4j_queries.get_dataset_documents_raw( + neo4j_driver_instance, + uuid, + included_fields=include_fields + ) + if entity_record is None: + return not_found_error(f"Entity {uuid} not found") + + result = {} + for dataset_uuid, entity_dict in entity_record.items(): + try: + complete = schema_manager.remove_none_values({**entity_dict}) + final = schema_manager.normalize_document_result_for_response(entity_dict=complete) + for field in ['ingest_metadata', 'metadata', 'files']: + final.pop(field, None) + result[dataset_uuid] = final + except Exception as e: + logger.error(f"Failed to process document for {dataset_uuid}: {e}") + continue + + resp_body = json.dumps(result).encode('utf-8') + try_resp = try_stash_response_body(resp_body) + if try_resp is not None: + return try_resp + return jsonify(result) + """ Create an entity of the target type in neo4j diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 2975c908..33782290 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -12,6 +12,12 @@ # The filed name of the single result record record_field_name = 'result' +TRIMMED_ENTITY_FIELDS = [ + 'uuid', 'hubmap_id', 'entity_type', 'dataset_type', 'rui_location', + 'group_uuid', 'group_name', 'last_modified_timestamp', + 'created_by_user_displayname', 'thumbnail_file', 'sample_category', + 'organ', 'data_access_level', 'status' +] #################################################################################################### ## Directly called by app.py @@ -99,6 +105,193 @@ def get_entities_by_type(neo4j_driver, entity_type, property_key = None): return results +def get_ancestors_trimmed(neo4j_driver, uuid, included_fields): + with neo4j_driver.session() as session: + record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + OPTIONAL MATCH (e)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(a:Entity) + WHERE a.entity_type <> 'Lab' + WITH apoc.coll.toSet(COLLECT(a)) AS ancestors + RETURN [a IN ancestors | a { %s }] AS result + """ % ', '.join(f'.{f}' for f in included_fields), + uuid=uuid).single() + if record is None: + return None + return [dict(a) for a in (record['result'] or [])] + + +def get_descendants_trimmed(neo4j_driver, uuid, included_fields): + with neo4j_driver.session() as session: + record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + OPTIONAL MATCH (e)-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]->(d:Entity) + WITH apoc.coll.toSet(COLLECT(d)) AS descendants + RETURN [d IN descendants | d { %s }] AS result + """ % ', '.join(f'.{f}' for f in included_fields), + uuid=uuid).single() + if record is None: + return None + return [dict(d) for d in (record['result'] or [])] + + +def get_parents_info(neo4j_driver, uuid, included_fields=None): + with neo4j_driver.session() as session: + projection = 'properties(p)' + if included_fields: + projection = 'p { %s }' % ', '.join(f'.{f}' for f in included_fields) + + record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + OPTIONAL MATCH (e)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(p:Entity) + WHERE p.entity_type <> 'Lab' + WITH apoc.coll.toSet(COLLECT(p)) AS parents + RETURN [p IN parents | %s] AS result + """ % projection, uuid=uuid).single() + + if record is None: + return None + return [dict(p) for p in (record['result'] or [])] + + +def get_children_info(neo4j_driver, uuid, included_fields=None): + with neo4j_driver.session() as session: + projection = 'properties(c)' + if included_fields: + projection = 'c { %s }' % ', '.join(f'.{f}' for f in included_fields) + + record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + OPTIONAL MATCH (e)-[:ACTIVITY_INPUT]->(:Activity)-[:ACTIVITY_OUTPUT]->(c:Entity) + WITH apoc.coll.toSet(COLLECT(c)) AS children + RETURN [c IN children | %s] AS result + """ % projection, uuid=uuid).single() + + if record is None: + return None + return [dict(c) for c in (record['result'] or [])] + +def get_donor_info(neo4j_driver, uuid): + with neo4j_driver.session() as session: + entity_record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + RETURN e.uuid AS uuid + """, uuid=uuid).single() + if entity_record is None: + return None + + record = session.run(""" + MATCH (e:Entity {uuid: $uuid})<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(d:Donor) + WITH COLLECT(DISTINCT d) AS donors + RETURN [d IN donors | properties(d)] AS donors + """, uuid=uuid).single() + + return [dict(d) for d in (record['donors'] or [])] + +def get_origin_samples(neo4j_driver, uuid): + with neo4j_driver.session() as session: + entity_record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + RETURN e.uuid AS uuid + """, uuid=uuid).single() + if entity_record is None: + return None + + record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + OPTIONAL MATCH (e)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Sample) + WHERE s.sample_category IS NOT NULL + AND toLower(s.sample_category) = 'organ' + AND s.organ IS NOT NULL + AND trim(s.organ) <> '' + RETURN apoc.coll.toSet(COLLECT(properties(s))) AS origin_samples + """, uuid=uuid).single() + + return [dict(s) for s in (record['origin_samples'] or [])] + + +def get_source_samples(neo4j_driver, uuid): + with neo4j_driver.session() as session: + entity_record = session.run(""" + MATCH (e:Entity {uuid: $uuid}) + RETURN e.uuid AS uuid + """, uuid=uuid).single() + if entity_record is None: + return None + + record = session.run(""" + MATCH (e:Dataset {uuid: $uuid}) + CALL apoc.path.expandConfig(e, { + relationshipFilter: "