11import uuid
2+ from typing import Dict , List , Any
23
34import pandas as pd
45from pandas import DataFrame
56from py2neo import Graph # type: ignore
6- from typing import List , Dict , Any
77from src .argument_parser import logger
88
99uri : str = "bolt://localhost:7687"
1010graph : Graph = Graph (uri , auth = ("neo4j" , "test" ))
1111
1212
13- def create_reaction_id_map (decomposed_uid_mapping , reaction_ids , best_matches ):
13+ def _get_reactome_id_from_hash (decomposed_uid_mapping : pd .DataFrame , hash_value : str ) -> int :
14+ """Extract reactome_id for a given hash from decomposed_uid_mapping."""
15+ return decomposed_uid_mapping .loc [
16+ decomposed_uid_mapping ["uid" ] == hash_value , "reactome_id"
17+ ].values [0 ]
18+
19+
20+ def create_reaction_id_map (
21+ decomposed_uid_mapping : pd .DataFrame ,
22+ reaction_ids : List [int ],
23+ best_matches : pd .DataFrame
24+ ) -> pd .DataFrame :
25+ """Create a mapping between reaction UIDs, reactome IDs, and input/output hashes."""
26+
1427 reaction_id_map_column_types = {
1528 "uid" : str ,
1629 "reactome_id" : pd .Int64Dtype (),
1730 "input_hash" : str ,
1831 "output_hash" : str ,
1932 }
20- reaction_id_map = pd .DataFrame (columns = reaction_id_map_column_types .keys ()).astype (
21- reaction_id_map_column_types
22- )
23-
24- rows = []
33+
2534 print ("Checking best_matches contents:" )
26-
27- for index , match in best_matches .iterrows ():
35+
36+ rows = []
37+ for _ , match in best_matches .iterrows ():
2838 incomming_hash = match ["incomming" ]
2939 outgoing_hash = match ["outgoing" ]
30- reactome_id = decomposed_uid_mapping .loc [
31- decomposed_uid_mapping ["uid" ] == incomming_hash , "reactome_id"
32- ].values [0 ]
40+ reactome_id = _get_reactome_id_from_hash (decomposed_uid_mapping , incomming_hash )
41+
3342 row = {
3443 "uid" : str (uuid .uuid4 ()),
3544 "reactome_id" : int (reactome_id ),
@@ -39,102 +48,131 @@ def create_reaction_id_map(decomposed_uid_mapping, reaction_ids, best_matches):
3948 print ("row" )
4049 print (row )
4150 rows .append (row )
42-
43- new_rows_df = pd .DataFrame (rows )
44- reaction_id_map = pd .concat ([reaction_id_map , new_rows_df ], ignore_index = True )
45-
51+
52+ reaction_id_map = pd .DataFrame (rows ).astype (reaction_id_map_column_types )
53+
4654 return reaction_id_map
4755
4856
4957def create_uid_reaction_connections (
50- reaction_id_map : pd .DataFrame , best_matches : pd .DataFrame , decomposed_uid_mapping
58+ reaction_id_map : pd .DataFrame ,
59+ best_matches : pd .DataFrame ,
60+ decomposed_uid_mapping : pd .DataFrame
5161) -> pd .DataFrame :
52- uid_reaction_connections_data = []
62+ """Create connections between reaction UIDs based on best matches."""
63+
5364 reactome_id_to_uid_mapping = dict (
5465 zip (reaction_id_map ["reactome_id" ], reaction_id_map ["uid" ])
5566 )
56-
57- # Create uid_reaction_connections from best_matches
67+
68+ uid_reaction_connections_data = []
69+
5870 for _ , match in best_matches .iterrows ():
5971 incomming_hash = match ["incomming" ]
6072 outgoing_hash = match ["outgoing" ]
61- preceding_reaction_id = decomposed_uid_mapping . loc [
62- decomposed_uid_mapping [ "uid" ] == incomming_hash , "reactome_id"
63- ]. values [ 0 ]
64- following_reaction_id = decomposed_uid_mapping . loc [
65- decomposed_uid_mapping [ "uid" ] == outgoing_hash , "reactome_id"
66- ]. values [ 0 ]
73+
74+ # Get reactome IDs for both hashes
75+ preceding_reaction_id = _get_reactome_id_from_hash ( decomposed_uid_mapping , incomming_hash )
76+ following_reaction_id = _get_reactome_id_from_hash ( decomposed_uid_mapping , outgoing_hash )
77+
78+ # Get corresponding UIDs
6779 preceding_uid = reactome_id_to_uid_mapping .get (preceding_reaction_id )
6880 following_uid = reactome_id_to_uid_mapping .get (following_reaction_id )
81+
82+ # Only add connection if both UIDs exist
6983 if preceding_uid is not None and following_uid is not None :
70- uid_reaction_connections_data .append (
71- {"preceding_uid" : preceding_uid , "following_uid" : following_uid }
72- )
73-
74- uid_reaction_connections = pd .DataFrame (uid_reaction_connections_data )
75- return uid_reaction_connections
84+ uid_reaction_connections_data .append ({
85+ "preceding_uid" : preceding_uid ,
86+ "following_uid" : following_uid
87+ })
88+
89+ return pd .DataFrame (uid_reaction_connections_data )
90+
91+
92+ def _execute_regulator_query (
93+ graph : Graph ,
94+ query : str ,
95+ reaction_uuid : str ,
96+ function_name : str
97+ ) -> List [Dict [str , Any ]]:
98+ """Execute a regulator query and return processed results."""
99+ try :
100+ result = graph .run (query )
101+ regulators = []
102+
103+ for record in result :
104+ regulator_uuid = str (uuid .uuid4 ())
105+ regulators .append ({
106+ "reaction" : reaction_uuid ,
107+ "PhysicalEntity" : regulator_uuid ,
108+ "edge_type" : "regulator" ,
109+ "uuid" : regulator_uuid ,
110+ "reaction_uuid" : reaction_uuid ,
111+ })
112+
113+ return regulators
114+
115+ except Exception as e :
116+ logger .error (f"Error in { function_name } " , exc_info = True )
117+ raise e
76118
77119
78120def get_catalysts_for_reaction (reaction_id_map : DataFrame , graph : Graph ) -> DataFrame :
121+ """Get catalysts for reactions using Neo4j graph queries."""
79122 catalyst_list = []
80-
123+
81124 for _ , row in reaction_id_map .iterrows ():
82125 reaction_id = row ["reactome_id" ]
126+ reaction_uuid = row ["uid" ]
127+
83128 query = (
84129 f"MATCH (reaction:ReactionLikeEvent{{dbId: { reaction_id } }})-[:catalystActivity]->(catalystActivity:CatalystActivity)-[:physicalEntity]->(catalyst:PhysicalEntity) "
85130 f"RETURN reaction.dbId AS reaction_id, catalyst.dbId AS catalyst_id, 'catalyst' AS edge_type"
86131 )
132+
87133 try :
88134 data = graph .run (query ).data ()
89135 for item in data :
90- item ["uuid" ] = str (uuid .uuid4 ()) # Generate UUID for each entity
91- # Map the reaction ID to the UUID
92- item ["reaction_uuid" ] = row ["uid" ]
136+ item ["uuid" ] = str (uuid .uuid4 ())
137+ item ["reaction_uuid" ] = reaction_uuid
93138 catalyst_list .extend (data )
139+
94140 except Exception as e :
95141 logger .error ("Error in get_catalysts_for_reaction" , exc_info = True )
96142 raise e
97-
143+
98144 return pd .DataFrame (
99145 catalyst_list ,
100146 columns = ["reaction_id" , "catalyst_id" , "edge_type" , "uuid" , "reaction_uuid" ],
101147 )
102148
103149
104150def get_positive_regulators_for_reaction (
105- reaction_id_mapping : DataFrame , graph : Graph
151+ reaction_id_mapping : DataFrame ,
152+ graph : Graph
106153) -> DataFrame :
154+ """Get positive regulators for reactions using Neo4j graph queries."""
107155 regulators_list = []
108-
156+
109157 for _ , row in reaction_id_mapping .iterrows ():
110158 reaction_id = row ["reactome_id" ]
111159 reaction_uuid = row ["uid" ]
160+
112161 if pd .isna (reaction_uuid ):
113162 logger .error (f"No UUID found for reaction ID { reaction_id } " )
114163 continue
115-
164+
116165 query = (
117166 f"MATCH (reaction)-[:regulatedBy]->(regulator:PositiveRegulation)-[:regulator]->(pe:PhysicalEntity) "
118167 f"WHERE reaction.dbId = { reaction_id } "
119168 "RETURN reaction.dbId as reaction, pe.dbId as PhysicalEntity"
120169 )
121- try :
122- result = graph .run (query )
123- for record in result :
124- regulator_uuid = str (uuid .uuid4 ()) # Generate UUID for each entity
125- regulators_list .append (
126- {
127- "reaction" : reaction_uuid ,
128- "PhysicalEntity" : regulator_uuid ,
129- "edge_type" : "regulator" ,
130- "uuid" : regulator_uuid ,
131- "reaction_uuid" : reaction_uuid ,
132- }
133- )
134- except Exception as e :
135- logger .error ("Error in get_positive_regulators_for_reaction" , exc_info = True )
136- raise e
137-
170+
171+ regulators = _execute_regulator_query (
172+ graph , query , reaction_uuid , "get_positive_regulators_for_reaction"
173+ )
174+ regulators_list .extend (regulators )
175+
138176 return pd .DataFrame (
139177 regulators_list ,
140178 columns = ["reaction" , "PhysicalEntity" , "edge_type" , "uuid" , "reaction_uuid" ],
@@ -143,47 +181,38 @@ def get_positive_regulators_for_reaction(
143181
144182
145183def get_negative_regulators_for_reaction (
146- reaction_id_mapping : DataFrame , graph : Graph
184+ reaction_id_mapping : DataFrame ,
185+ graph : Graph
147186) -> DataFrame :
187+ """Get negative regulators for reactions using Neo4j graph queries."""
148188 regulators_list = []
149-
189+
150190 for _ , row in reaction_id_mapping .iterrows ():
151191 reaction_id = row ["reactome_id" ]
152192 reaction_uuid = row ["uid" ]
193+
153194 if pd .isna (reaction_uuid ):
154195 logger .error (f"No UUID found for reaction ID { reaction_id } " )
155196 continue
156-
197+
157198 query = (
158199 f"MATCH (reaction)-[:regulatedBy]->(regulator:NegativeRegulation)-[:regulator]->(pe:PhysicalEntity) "
159200 f"WHERE reaction.dbId = { reaction_id } "
160201 "RETURN reaction.dbId as reaction, pe.dbId as PhysicalEntity"
161202 )
162- try :
163- result = graph .run (query )
164- for record in result :
165- regulator_uuid = str (uuid .uuid4 ()) # Generate UUID for each entity
166- regulators_list .append (
167- {
168- "reaction" : reaction_uuid ,
169- "PhysicalEntity" : regulator_uuid ,
170- "edge_type" : "regulator" ,
171- "uuid" : regulator_uuid ,
172- "reaction_uuid" : reaction_uuid ,
173- }
174- )
175- except Exception as e :
176- logger .error ("Error in get_negative_regulators_for_reaction" , exc_info = True )
177- raise e
178-
203+
204+ regulators = _execute_regulator_query (
205+ graph , query , reaction_uuid , "get_negative_regulators_for_reaction"
206+ )
207+ regulators_list .extend (regulators )
208+
179209 return pd .DataFrame (
180210 regulators_list ,
181211 columns = ["reaction" , "PhysicalEntity" , "edge_type" , "uuid" , "reaction_uuid" ],
182212 index = None ,
183213 )
184214
185215
186-
187216def _get_non_null_values (df : pd .DataFrame , column : str ) -> List [Any ]:
188217 """Extract non-null values from a DataFrame column."""
189218 return [value for value in df [column ].tolist () if pd .notna (value )]
@@ -401,7 +430,6 @@ def create_pathway_logic_network(
401430 pathway_logic_network_data ,
402431 )
403432
404- # Note: Using empty strings to maintain original functionality
405433 and_or = ""
406434 edge_type = ""
407435 append_regulators (
0 commit comments