@@ -50,6 +50,9 @@ def trim_to_size(input_list, length):
5050 else :
5151 return input_list
5252
53+ KPS_THAT_RETURN_PREFERRED_NODE_CURIES = {'infores:retriever' }
54+ KP_THAT_CAN_HANDLE_SINGLE_NODE_QUERIES = {'infores:rtx-kg2' }
55+
5356class ARAXExpander :
5457
5558 def __init__ (self ):
@@ -1101,22 +1104,9 @@ async def expand_edge_async(
11011104
11021105 # Do some post-processing (deduplicate nodes, remove self-edges..)
11031106 # KG2c and retriever are already deduplicated and uses canonical predicates
1104- if kp_to_use != 'infores:rtx-kg2' and kp_to_use != 'infores:retriever' :
1105- qg_org_kg = eu .check_for_canonical_predicates (qg_org_kg , kp_to_use , log )
1106- qg_org_kg ,\
1107- dropped_edge_counts = self ._deduplicate_nodes (qg_org_kg ,
1108- kp_to_use ,
1109- log )
1110- for qedge_key , count in dropped_edge_counts .items ():
1111- if count > 0 :
1112- # update query plan here
1113- done_str = log .query_plan ['qedge_keys' ][qedge_key ][kp_to_use ]['description' ]
1114- log .update_query_plan (qedge_key ,
1115- kp_to_use ,
1116- "Warning" ,
1117- done_str + "; "
1118- f"{ count } edges dropped due "
1119- "to node reference failure" )
1107+ if kp_to_use not in KPS_THAT_RETURN_PREFERRED_NODE_CURIES :
1108+ log .warning (f"{ kp_to_use } : this KP may not return preferred CURIEs; please check, and if it does return only preferred CURIEs, add to the Expand whitelist" )
1109+
11201110 if any (edges for edges in qg_org_kg .edges_by_qg_id .values ()): # Make sure the KP actually returned something
11211111 qg_org_kg = self ._remove_self_edges (qg_org_kg , kp_to_use , log )
11221112
@@ -1136,6 +1126,8 @@ def _expand_node(qnode_key: str,
11361126 # This function expands a single node using the specified knowledge provider (for now only KG2 is supported)
11371127 log .debug (f"Expanding node { qnode_key } using { kps_to_use } " )
11381128 qnode = query_graph .nodes [qnode_key ]
1129+ if qnode .ids :
1130+ qnode .ids = eu .get_canonical_curies_list (qnode .ids , log )
11391131 single_node_qg = QueryGraph (nodes = {qnode_key : qnode }, edges = {})
11401132 answer_kg = QGOrganizedKnowledgeGraph ()
11411133 if log .status != 'OK' :
@@ -1145,18 +1137,21 @@ def _expand_node(qnode_key: str,
11451137 return answer_kg
11461138
11471139 # Answer the query using the proper KP (only our own KP answers single-node queries for now)
1148- if kps_to_use == ["infores:rtx-kg2" ]:
1149- kp_querier = TRAPIQuerier (response_object = log ,
1150- kp_name = kps_to_use [0 ],
1151- user_specified_kp = user_specified_kp ,
1152- kp_timeout = kp_timeout )
1153- answer_kg = kp_querier .answer_single_node_query (single_node_qg )
1154- log .info (f"Query for node { qnode_key } returned results ({ eu .get_printable_counts_by_qg_id (answer_kg )} )" )
1155- return answer_kg
1156- else :
1157- log .error ("Only infores:rtx-kg2 can answer single-node queries currently" , error_code = "InvalidKP" )
1140+ kps_to_use_that_cannot_handle_single_node_queries = set (kps_to_use ) - KP_THAT_CAN_HANDLE_SINGLE_NODE_QUERIES
1141+ if kps_to_use_that_cannot_handle_single_node_queries :
1142+ log .error ("these KPs cannot answer single-node queries: "
1143+ f"{ kps_to_use_that_cannot_handle_single_node_queries } " ,
1144+ error_code = "InvalidKP" )
11581145 return answer_kg
11591146
1147+ kp_querier = TRAPIQuerier (response_object = log ,
1148+ kp_name = next (iter (KP_THAT_CAN_HANDLE_SINGLE_NODE_QUERIES )),
1149+ user_specified_kp = user_specified_kp ,
1150+ kp_timeout = kp_timeout )
1151+ answer_kg = kp_querier .answer_single_node_query (single_node_qg )
1152+ log .info (f"Query for node { qnode_key } returned results ({ eu .get_printable_counts_by_qg_id (answer_kg )} )" )
1153+ return answer_kg
1154+
11601155 def _get_query_graph_for_edge (self , qedge_key : str , full_qg : QueryGraph , overarching_kg : QGOrganizedKnowledgeGraph , log : ARAXResponse ) -> QueryGraph :
11611156 # This function creates a query graph for the specified qedge, updating its qnodes' curies as needed
11621157 edge_qg = QueryGraph (nodes = {}, edges = {})
@@ -1205,79 +1200,6 @@ def _get_query_graph_for_edge(self, qedge_key: str, full_qg: QueryGraph, overarc
12051200 f"{ qedge .predicates if qedge .predicates else '' } -({ output_qnode_key } :{ output_qnode .categories } { output_curie_summary } )" )
12061201 return edge_qg
12071202
1208- @staticmethod
1209- def _deduplicate_nodes (
1210- answer_kg : QGOrganizedKnowledgeGraph ,
1211- kp_name : str ,
1212- log : ARAXResponse
1213- ) -> tuple [QGOrganizedKnowledgeGraph , dict [str , int ]]:
1214- log .debug (f"{ kp_name } : Deduplicating nodes" )
1215- deduplicated_kg = QGOrganizedKnowledgeGraph (nodes = {qnode_key : {} for qnode_key in answer_kg .nodes_by_qg_id },
1216- edges = {qedge_key : {} for qedge_key in answer_kg .edges_by_qg_id })
1217- deduplicated_kg .unbound_edges = answer_kg .unbound_edges
1218- curie_mappings = {}
1219-
1220- # First deduplicate the bound nodes
1221- for qnode_key , nodes in {** answer_kg .nodes_by_qg_id , UNBOUND_NODES_KEY : answer_kg .unbound_nodes }.items ():
1222- # Load preferred curie info from NodeSynonymizer
1223- log .debug (f"{ kp_name } : Getting preferred curies for { qnode_key } nodes returned in this step" )
1224- canonicalized_nodes = eu .get_canonical_curies_dict (list (nodes ), log ) if nodes else {}
1225- if log .status != 'OK' :
1226- return deduplicated_kg
1227-
1228- for node_key in nodes :
1229- # Figure out the preferred curie/name for this node
1230- node = nodes .get (node_key )
1231- canonicalized_node = canonicalized_nodes .get (node_key )
1232- if canonicalized_node :
1233- preferred_curie = canonicalized_node .get ('preferred_curie' , node_key )
1234- preferred_name = canonicalized_node .get ('preferred_name' , node .name )
1235- preferred_type = canonicalized_node .get ('preferred_type' )
1236- preferred_categories = eu .convert_to_list (preferred_type ) if preferred_type else node .categories
1237- curie_mappings [node_key ] = preferred_curie
1238- else :
1239- # Means the NodeSynonymizer didn't recognize this curie
1240- preferred_curie = node_key
1241- preferred_name = node .name
1242- preferred_categories = node .categories
1243- curie_mappings [node_key ] = preferred_curie
1244-
1245- # Add this node into our deduplicated KG as necessary
1246- if qnode_key != UNBOUND_NODES_KEY :
1247- if preferred_curie not in deduplicated_kg .nodes_by_qg_id [qnode_key ]:
1248- node_key = preferred_curie
1249- node .name = preferred_name
1250- node .categories = preferred_categories
1251- deduplicated_kg .add_node (node_key , node , qnode_key )
1252- else : # this is an unbound node
1253- if preferred_curie not in deduplicated_kg .unbound_nodes :
1254- node .name = preferred_name
1255- node .categories = preferred_categories
1256- deduplicated_kg .unbound_nodes [preferred_curie ] = node
1257-
1258- # Then update the edges to reflect changes made to the nodes
1259- dropped_edge_count = {}
1260- for qedge_key , edges in answer_kg .edges_by_qg_id .items ():
1261- dropped_edge_count [qedge_key ] = 0
1262- for edge_key , edge in edges .items ():
1263- drop_edge = False
1264- if edge .subject not in curie_mappings :
1265- log .warning (f"{ kp_name } : edge subject not in curie mappings; qedge key: { qedge_key } ; subject ID: { edge .subject } " )
1266- drop_edge = True
1267- dropped_edge_count [qedge_key ] += 1
1268- else :
1269- edge .subject = curie_mappings .get (edge .subject )
1270- if edge .object not in curie_mappings :
1271- log .warning (f"{ kp_name } : edge object not in curie mappings; qedge key: { qedge_key } ; object ID: { edge .object } " )
1272- drop_edge = True
1273- dropped_edge_count [qedge_key ] += 1
1274- else :
1275- edge .object = curie_mappings .get (edge .object )
1276- if not drop_edge :
1277- deduplicated_kg .add_edge (edge_key , edge , qedge_key )
1278- log .debug (f"{ kp_name } : After deduplication, answer KG counts are: { eu .get_printable_counts_by_qg_id (deduplicated_kg )} " )
1279- return deduplicated_kg , dropped_edge_count
1280-
12811203 @staticmethod
12821204 def _extract_query_subgraph (qedge_keys_to_expand : list [str ], query_graph : QueryGraph , log : ARAXResponse ) -> QueryGraph :
12831205 # This function extracts a sub-query graph containing the provided qedge IDs from a larger query graph
0 commit comments