From 701bb0094ba896658704828facd9e1d9801c65fa Mon Sep 17 00:00:00 2001
From: Derek Furst <drf57@pitt.edu>
Date: Thu, 21 May 2026 16:39:19 -0400
Subject: [PATCH] changed get_dataset_documents to use an included fields model
 instead of excluded like the rest

---
 src/app.py               | 33 +++++++++++++++++++--------------
 src/app_neo4j_queries.py | 18 ++++++------------
 2 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/src/app.py b/src/app.py
index 66cef3dd..60a5f960 100644
--- a/src/app.py
+++ b/src/app.py
@@ -1349,25 +1349,30 @@ def alphabetize_dict_recursive(obj):
 def get_dataset_documents(uuid):
     validate_token_if_auth_header_exists(request)
     token = get_internal_token()
-    excluded_fields = None
+    include_fields = None
     if bool(request.args):
-        excluded = request.args.get('exclude')
-        if excluded:
-            excluded_fields = [
+        included = request.args.get('include')
+        if included:
+            include_fields = [
                 f.strip().strip("'").strip('"')
-                for f in excluded.split(',')
+                for f in included.split(',')
                 if f.strip()
             ]
+            # Validation step to ensure fields are real property names
+            valid_fields = set(schema_manager.get_persistent_fields())
+            invalid = [f for f in include_fields if f not in valid_fields]
+            if invalid:
+                return bad_request_error(f"Invalid include fields: {invalid}")
+        else:
+            return bad_request_error("Missing required parameter: 'include'. Must include a list of properties to be returned.")
+    else:
+        return bad_request_error("Missing required parameter: 'include'. Must include a list of properties to be returned.")
 
-        # This is a validation step. Because we're allowing excluded fields to be passed from search-api,
-        # we want to minimally at least make sure these are real property names before using them for 
-        # querying neo4j. 
-        valid_fields = set(schema_manager.get_persistent_fields())
-        invalid = [f for f in excluded_fields if f not in valid_fields]
-        if invalid:
-            return bad_request_error(f"Invalid excluded fields: {invalid}")
-
-    entity_record = app_neo4j_queries.get_dataset_documents_raw(neo4j_driver_instance, uuid, excluded_fields=excluded_fields)
+    entity_record = app_neo4j_queries.get_dataset_documents_raw(
+        neo4j_driver_instance, 
+        uuid, 
+        included_fields=include_fields
+    )
     if entity_record is None:
         return not_found_error(f"Entity {uuid} not found")
 
diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py
index 18dd65b9..33782290 100644
--- a/src/app_neo4j_queries.py
+++ b/src/app_neo4j_queries.py
@@ -256,10 +256,7 @@ def get_source_samples(neo4j_driver, uuid):
     are found, or None if the input UUID does not correspond to a supported
     entity type.
 """
-def get_dataset_documents_raw(neo4j_driver, uuid, excluded_fields=None):
-    if excluded_fields is None:
-        excluded_fields = []
-
+def get_dataset_documents_raw(neo4j_driver, uuid, included_fields):
     with neo4j_driver.session() as session:
         entity_record = session.run("""
             MATCH (e:Entity {uuid: $uuid})
@@ -279,22 +276,19 @@ def get_dataset_documents_raw(neo4j_driver, uuid, excluded_fields=None):
             root_label = 'Upload'
         else:
             return None
-        
-        projection = "d { .* }"
 
-        if excluded_fields:
-            null_projection = ", ".join(f"{field}: NULL" for field in excluded_fields)
-            projection = f"d {{ .*, {null_projection} }}"
 
         record = session.run("""
             MATCH (root:%s {uuid: $uuid})<-[:%s]-(d:Dataset)
-            RETURN apoc.map.fromPairs(COLLECT([d.uuid, %s])) AS result
-        """ % (root_label, relationship, projection), uuid=uuid).single()
+            WITH apoc.coll.toSet(COLLECT(d)) AS datasets
+            RETURN [d IN datasets | d { %s }] AS result
+        """ % (root_label, relationship, ', '.join(f'.{f}' for f in included_fields)),
+        uuid=uuid).single()
 
         if not record or not record["result"]:
             return {}
 
-        return {uuid: dict(props) for uuid, props in record["result"].items()}
+        return {d['uuid']: dict(d) for d in record["result"]}