Skip to content

Commit 079db18

Browse files
authored
Merge pull request #2713 from RTXteam/issue-2712
Fix unit tests broken by the transition to TRAPI 1.6.0 in kp_info_cacher (which means Expand uses Retriever)
2 parents 7b5ed40 + 647a924 commit 079db18

32 files changed

Lines changed: 9839 additions & 530 deletions

code/ARAX/ARAXQuery/ARAX_expander.py

Lines changed: 21 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ def trim_to_size(input_list, length):
5050
else:
5151
return input_list
5252

53+
KPS_THAT_RETURN_PREFERRED_NODE_CURIES = {'infores:retriever'}
54+
KP_THAT_CAN_HANDLE_SINGLE_NODE_QUERIES = {'infores:rtx-kg2'}
55+
5356
class ARAXExpander:
5457

5558
def __init__(self):
@@ -1101,22 +1104,9 @@ async def expand_edge_async(
11011104

11021105
# Do some post-processing (deduplicate nodes, remove self-edges..)
11031106
# KG2c and retriever are already deduplicated and uses canonical predicates
1104-
if kp_to_use != 'infores:rtx-kg2' and kp_to_use != 'infores:retriever':
1105-
qg_org_kg = eu.check_for_canonical_predicates(qg_org_kg, kp_to_use, log)
1106-
qg_org_kg,\
1107-
dropped_edge_counts = self._deduplicate_nodes(qg_org_kg,
1108-
kp_to_use,
1109-
log)
1110-
for qedge_key, count in dropped_edge_counts.items():
1111-
if count > 0:
1112-
# update query plan here
1113-
done_str = log.query_plan['qedge_keys'][qedge_key][kp_to_use]['description']
1114-
log.update_query_plan(qedge_key,
1115-
kp_to_use,
1116-
"Warning",
1117-
done_str + "; "
1118-
f"{count} edges dropped due "
1119-
"to node reference failure")
1107+
if kp_to_use not in KPS_THAT_RETURN_PREFERRED_NODE_CURIES:
1108+
log.warning(f"{kp_to_use}: this KP may not return preferred CURIEs; please check, and if it does return only preferred CURIEs, add to the Expand whitelist")
1109+
11201110
if any(edges for edges in qg_org_kg.edges_by_qg_id.values()): # Make sure the KP actually returned something
11211111
qg_org_kg = self._remove_self_edges(qg_org_kg, kp_to_use, log)
11221112

@@ -1136,6 +1126,8 @@ def _expand_node(qnode_key: str,
11361126
# This function expands a single node using the specified knowledge provider (for now only KG2 is supported)
11371127
log.debug(f"Expanding node {qnode_key} using {kps_to_use}")
11381128
qnode = query_graph.nodes[qnode_key]
1129+
if qnode.ids:
1130+
qnode.ids = eu.get_canonical_curies_list(qnode.ids, log)
11391131
single_node_qg = QueryGraph(nodes={qnode_key: qnode}, edges={})
11401132
answer_kg = QGOrganizedKnowledgeGraph()
11411133
if log.status != 'OK':
@@ -1145,18 +1137,21 @@ def _expand_node(qnode_key: str,
11451137
return answer_kg
11461138

11471139
# Answer the query using the proper KP (only our own KP answers single-node queries for now)
1148-
if kps_to_use == ["infores:rtx-kg2"]:
1149-
kp_querier = TRAPIQuerier(response_object=log,
1150-
kp_name=kps_to_use[0],
1151-
user_specified_kp=user_specified_kp,
1152-
kp_timeout=kp_timeout)
1153-
answer_kg = kp_querier.answer_single_node_query(single_node_qg)
1154-
log.info(f"Query for node {qnode_key} returned results ({eu.get_printable_counts_by_qg_id(answer_kg)})")
1155-
return answer_kg
1156-
else:
1157-
log.error("Only infores:rtx-kg2 can answer single-node queries currently", error_code="InvalidKP")
1140+
kps_to_use_that_cannot_handle_single_node_queries = set(kps_to_use) - KP_THAT_CAN_HANDLE_SINGLE_NODE_QUERIES
1141+
if kps_to_use_that_cannot_handle_single_node_queries:
1142+
log.error("these KPs cannot answer single-node queries: "
1143+
f"{kps_to_use_that_cannot_handle_single_node_queries}",
1144+
error_code="InvalidKP")
11581145
return answer_kg
11591146

1147+
kp_querier = TRAPIQuerier(response_object=log,
1148+
kp_name=next(iter(KP_THAT_CAN_HANDLE_SINGLE_NODE_QUERIES)),
1149+
user_specified_kp=user_specified_kp,
1150+
kp_timeout=kp_timeout)
1151+
answer_kg = kp_querier.answer_single_node_query(single_node_qg)
1152+
log.info(f"Query for node {qnode_key} returned results ({eu.get_printable_counts_by_qg_id(answer_kg)})")
1153+
return answer_kg
1154+
11601155
def _get_query_graph_for_edge(self, qedge_key: str, full_qg: QueryGraph, overarching_kg: QGOrganizedKnowledgeGraph, log: ARAXResponse) -> QueryGraph:
11611156
# This function creates a query graph for the specified qedge, updating its qnodes' curies as needed
11621157
edge_qg = QueryGraph(nodes={}, edges={})
@@ -1205,79 +1200,6 @@ def _get_query_graph_for_edge(self, qedge_key: str, full_qg: QueryGraph, overarc
12051200
f"{qedge.predicates if qedge.predicates else ''}-({output_qnode_key}:{output_qnode.categories}{output_curie_summary})")
12061201
return edge_qg
12071202

1208-
@staticmethod
1209-
def _deduplicate_nodes(
1210-
answer_kg: QGOrganizedKnowledgeGraph,
1211-
kp_name: str,
1212-
log: ARAXResponse
1213-
) -> tuple[QGOrganizedKnowledgeGraph, dict[str, int]]:
1214-
log.debug(f"{kp_name}: Deduplicating nodes")
1215-
deduplicated_kg = QGOrganizedKnowledgeGraph(nodes={qnode_key: {} for qnode_key in answer_kg.nodes_by_qg_id},
1216-
edges={qedge_key: {} for qedge_key in answer_kg.edges_by_qg_id})
1217-
deduplicated_kg.unbound_edges = answer_kg.unbound_edges
1218-
curie_mappings = {}
1219-
1220-
# First deduplicate the bound nodes
1221-
for qnode_key, nodes in {**answer_kg.nodes_by_qg_id, UNBOUND_NODES_KEY: answer_kg.unbound_nodes}.items():
1222-
# Load preferred curie info from NodeSynonymizer
1223-
log.debug(f"{kp_name}: Getting preferred curies for {qnode_key} nodes returned in this step")
1224-
canonicalized_nodes = eu.get_canonical_curies_dict(list(nodes), log) if nodes else {}
1225-
if log.status != 'OK':
1226-
return deduplicated_kg
1227-
1228-
for node_key in nodes:
1229-
# Figure out the preferred curie/name for this node
1230-
node = nodes.get(node_key)
1231-
canonicalized_node = canonicalized_nodes.get(node_key)
1232-
if canonicalized_node:
1233-
preferred_curie = canonicalized_node.get('preferred_curie', node_key)
1234-
preferred_name = canonicalized_node.get('preferred_name', node.name)
1235-
preferred_type = canonicalized_node.get('preferred_type')
1236-
preferred_categories = eu.convert_to_list(preferred_type) if preferred_type else node.categories
1237-
curie_mappings[node_key] = preferred_curie
1238-
else:
1239-
# Means the NodeSynonymizer didn't recognize this curie
1240-
preferred_curie = node_key
1241-
preferred_name = node.name
1242-
preferred_categories = node.categories
1243-
curie_mappings[node_key] = preferred_curie
1244-
1245-
# Add this node into our deduplicated KG as necessary
1246-
if qnode_key != UNBOUND_NODES_KEY:
1247-
if preferred_curie not in deduplicated_kg.nodes_by_qg_id[qnode_key]:
1248-
node_key = preferred_curie
1249-
node.name = preferred_name
1250-
node.categories = preferred_categories
1251-
deduplicated_kg.add_node(node_key, node, qnode_key)
1252-
else: # this is an unbound node
1253-
if preferred_curie not in deduplicated_kg.unbound_nodes:
1254-
node.name = preferred_name
1255-
node.categories = preferred_categories
1256-
deduplicated_kg.unbound_nodes[preferred_curie] = node
1257-
1258-
# Then update the edges to reflect changes made to the nodes
1259-
dropped_edge_count = {}
1260-
for qedge_key, edges in answer_kg.edges_by_qg_id.items():
1261-
dropped_edge_count[qedge_key] = 0
1262-
for edge_key, edge in edges.items():
1263-
drop_edge = False
1264-
if edge.subject not in curie_mappings:
1265-
log.warning(f"{kp_name}: edge subject not in curie mappings; qedge key: {qedge_key}; subject ID: {edge.subject}")
1266-
drop_edge = True
1267-
dropped_edge_count[qedge_key] += 1
1268-
else:
1269-
edge.subject = curie_mappings.get(edge.subject)
1270-
if edge.object not in curie_mappings:
1271-
log.warning(f"{kp_name}: edge object not in curie mappings; qedge key: {qedge_key}; object ID: {edge.object}")
1272-
drop_edge = True
1273-
dropped_edge_count[qedge_key] += 1
1274-
else:
1275-
edge.object = curie_mappings.get(edge.object)
1276-
if not drop_edge:
1277-
deduplicated_kg.add_edge(edge_key, edge, qedge_key)
1278-
log.debug(f"{kp_name}: After deduplication, answer KG counts are: {eu.get_printable_counts_by_qg_id(deduplicated_kg)}")
1279-
return deduplicated_kg, dropped_edge_count
1280-
12811203
@staticmethod
12821204
def _extract_query_subgraph(qedge_keys_to_expand: list[str], query_graph: QueryGraph, log: ARAXResponse) -> QueryGraph:
12831205
# This function extracts a sub-query graph containing the provided qedge IDs from a larger query graph

code/ARAX/ARAXQuery/ARAX_filter_kg.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -575,13 +575,24 @@ def __remove_edges_by_predicate(self, describe=False):
575575
"""
576576
message = self.message
577577
parameters = self.parameters
578+
kg = message.knowledge_graph
578579
# make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters
579580
if message and parameters and hasattr(message, 'query_graph') and hasattr(message.query_graph, 'edges'):
580581
allowable_parameters = {'action': {'remove_edges_by_predicate'},
581582
'edge_predicate': set([x.predicate for x in self.message.knowledge_graph.edges.values()]),
582583
'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
583-
'qnode_keys': set([t for x in self.message.knowledge_graph.nodes.values() if x.qnode_keys is not None for t in x.qnode_keys]),
584-
'qedge_keys': set([t for x in self.message.knowledge_graph.edges.values() if x.qedge_keys is not None for t in x.qedge_keys])
584+
'qnode_keys': {
585+
qnode_key
586+
for node in kg.nodes.values()
587+
for qnode_key in (getattr(node, "qnode_keys", None) or [])
588+
},
589+
'qedge_keys': {
590+
qedge_key
591+
for edge in kg.edges.values()
592+
for qedge_key in (getattr(edge, "qedge_keys", None) or [])
593+
}
594+
# 'qnode_keys': set([t for x in self.message.knowledge_graph.nodes.values() if x.qnode_keys is not None for t in x.qnode_keys]),
595+
# 'qedge_keys': set([t for x in self.message.knowledge_graph.edges.values() if x.qedge_keys is not None for t in x.qedge_keys])
585596
}
586597
else:
587598
allowable_parameters = {'action': {'remove_edges_by_predicate'},
@@ -849,6 +860,7 @@ def __remove_edges_by_std_dev(self, describe=False):
849860
:return:
850861
"""
851862
message = self.message
863+
kg = message.knowledge_graph
852864
parameters = self.parameters
853865
# make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters
854866
if message and parameters and hasattr(message, 'knowledge_graph') and hasattr(message.knowledge_graph, 'edges'):
@@ -868,8 +880,16 @@ def __remove_edges_by_std_dev(self, describe=False):
868880
'threshold': {float()},
869881
'top': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
870882
'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
871-
'qnode_keys':set([t for x in self.message.knowledge_graph.nodes.values() if x.qnode_keys is not None for t in x.qnode_keys]),
872-
'qedge_keys': set([t for x in self.message.knowledge_graph.edges.values() if x.qedge_keys is not None for t in x.qedge_keys])
883+
'qnode_keys': {
884+
qnode_key
885+
for node in kg.nodes.values()
886+
for qnode_key in (getattr(node, "qnode_keys", None) or [])
887+
},
888+
'qedge_keys': {
889+
qedge_key
890+
for edge in kg.edges.values()
891+
for qedge_key in (getattr(edge, "qedge_keys", None) or [])
892+
}
873893
}
874894
else:
875895
allowable_parameters = {'action': {'remove_edges_by_std_dev'},

code/ARAX/ARAXQuery/Expand/kp_info_cacher.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def load_kp_info_caches(self, log: ARAXResponse):
142142
log.error(f"Unable to load KP info caches: {e}")
143143

144144
# The caches MUST be up to date at this point, so we just load them
145-
log.debug("Loading cached Smart API amd meta map info")
145+
log.debug("Loading cached Smart API and meta map info")
146146
with open(self.smart_api_and_meta_map_cache, "rb") as cache:
147147
cache = pickle.load(cache)
148148
smart_api_info = cache['smart_api_cache']

0 commit comments

Comments
 (0)