From 701bb0094ba896658704828facd9e1d9801c65fa Mon Sep 17 00:00:00 2001 From: Derek Furst Date: Thu, 21 May 2026 16:39:19 -0400 Subject: [PATCH] changed get_dataset_documents to use an included fields model instead of excluded like the rest --- src/app.py | 33 +++++++++++++++++++-------------- src/app_neo4j_queries.py | 18 ++++++------------ 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/src/app.py b/src/app.py index 66cef3dd..60a5f960 100644 --- a/src/app.py +++ b/src/app.py @@ -1349,25 +1349,30 @@ def alphabetize_dict_recursive(obj): def get_dataset_documents(uuid): validate_token_if_auth_header_exists(request) token = get_internal_token() - excluded_fields = None + include_fields = None if bool(request.args): - excluded = request.args.get('exclude') - if excluded: - excluded_fields = [ + included = request.args.get('include') + if included: + include_fields = [ f.strip().strip("'").strip('"') - for f in excluded.split(',') + for f in included.split(',') if f.strip() ] + # Validation step to ensure fields are real property names + valid_fields = set(schema_manager.get_persistent_fields()) + invalid = [f for f in include_fields if f not in valid_fields] + if invalid: + return bad_request_error(f"Invalid include fields: {invalid}") + else: + return bad_request_error("Missing required parameter: 'include'. Must include a list of properties to be returned.") + else: + return bad_request_error("Missing required parameter: 'include'. Must include a list of properties to be returned.") - # This is a validation step. Because we're allowing excluded fields to be passed from search-api, - # we want to minimally at least make sure these are real property names before using them for - # querying neo4j. - valid_fields = set(schema_manager.get_persistent_fields()) - invalid = [f for f in excluded_fields if f not in valid_fields] - if invalid: - return bad_request_error(f"Invalid excluded fields: {invalid}") - - entity_record = app_neo4j_queries.get_dataset_documents_raw(neo4j_driver_instance, uuid, excluded_fields=excluded_fields) + entity_record = app_neo4j_queries.get_dataset_documents_raw( + neo4j_driver_instance, + uuid, + included_fields=include_fields + ) if entity_record is None: return not_found_error(f"Entity {uuid} not found") diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 18dd65b9..33782290 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -256,10 +256,7 @@ def get_source_samples(neo4j_driver, uuid): are found, or None if the input UUID does not correspond to a supported entity type. """ -def get_dataset_documents_raw(neo4j_driver, uuid, excluded_fields=None): - if excluded_fields is None: - excluded_fields = [] - +def get_dataset_documents_raw(neo4j_driver, uuid, included_fields): with neo4j_driver.session() as session: entity_record = session.run(""" MATCH (e:Entity {uuid: $uuid}) @@ -279,22 +276,19 @@ def get_dataset_documents_raw(neo4j_driver, uuid, excluded_fields=None): root_label = 'Upload' else: return None - - projection = "d { .* }" - if excluded_fields: - null_projection = ", ".join(f"{field}: NULL" for field in excluded_fields) - projection = f"d {{ .*, {null_projection} }}" record = session.run(""" MATCH (root:%s {uuid: $uuid})<-[:%s]-(d:Dataset) - RETURN apoc.map.fromPairs(COLLECT([d.uuid, %s])) AS result - """ % (root_label, relationship, projection), uuid=uuid).single() + WITH apoc.coll.toSet(COLLECT(d)) AS datasets + RETURN [d IN datasets | d { %s }] AS result + """ % (root_label, relationship, ', '.join(f'.{f}' for f in included_fields)), + uuid=uuid).single() if not record or not record["result"]: return {} - return {uuid: dict(props) for uuid, props in record["result"].items()} + return {d['uuid']: dict(d) for d in record["result"]}