Skip to content

Commit e9488be

Browse files
authored
Merge pull request #34 from reactome/refactored_logic_network_generator.py
Improved cypher query functions and reaction_id map
2 parents b68c803 + adaff8b commit e9488be

1 file changed

Lines changed: 107 additions & 79 deletions

File tree

src/logic_network_generator.py

Lines changed: 107 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,44 @@
11
import uuid
2+
from typing import Dict, List, Any
23

34
import pandas as pd
45
from pandas import DataFrame
56
from py2neo import Graph # type: ignore
6-
from typing import List, Dict, Any
77
from src.argument_parser import logger
88

99
uri: str = "bolt://localhost:7687"
1010
graph: Graph = Graph(uri, auth=("neo4j", "test"))
1111

1212

13-
def create_reaction_id_map(decomposed_uid_mapping, reaction_ids, best_matches):
13+
def _get_reactome_id_from_hash(decomposed_uid_mapping: pd.DataFrame, hash_value: str) -> int:
14+
"""Extract reactome_id for a given hash from decomposed_uid_mapping."""
15+
return decomposed_uid_mapping.loc[
16+
decomposed_uid_mapping["uid"] == hash_value, "reactome_id"
17+
].values[0]
18+
19+
20+
def create_reaction_id_map(
21+
decomposed_uid_mapping: pd.DataFrame,
22+
reaction_ids: List[int],
23+
best_matches: pd.DataFrame
24+
) -> pd.DataFrame:
25+
"""Create a mapping between reaction UIDs, reactome IDs, and input/output hashes."""
26+
1427
reaction_id_map_column_types = {
1528
"uid": str,
1629
"reactome_id": pd.Int64Dtype(),
1730
"input_hash": str,
1831
"output_hash": str,
1932
}
20-
reaction_id_map = pd.DataFrame(columns=reaction_id_map_column_types.keys()).astype(
21-
reaction_id_map_column_types
22-
)
23-
24-
rows = []
33+
2534
print("Checking best_matches contents:")
26-
27-
for index, match in best_matches.iterrows():
35+
36+
rows = []
37+
for _, match in best_matches.iterrows():
2838
incomming_hash = match["incomming"]
2939
outgoing_hash = match["outgoing"]
30-
reactome_id = decomposed_uid_mapping.loc[
31-
decomposed_uid_mapping["uid"] == incomming_hash, "reactome_id"
32-
].values[0]
40+
reactome_id = _get_reactome_id_from_hash(decomposed_uid_mapping, incomming_hash)
41+
3342
row = {
3443
"uid": str(uuid.uuid4()),
3544
"reactome_id": int(reactome_id),
@@ -39,102 +48,131 @@ def create_reaction_id_map(decomposed_uid_mapping, reaction_ids, best_matches):
3948
print("row")
4049
print(row)
4150
rows.append(row)
42-
43-
new_rows_df = pd.DataFrame(rows)
44-
reaction_id_map = pd.concat([reaction_id_map, new_rows_df], ignore_index=True)
45-
51+
52+
reaction_id_map = pd.DataFrame(rows).astype(reaction_id_map_column_types)
53+
4654
return reaction_id_map
4755

4856

4957
def create_uid_reaction_connections(
50-
reaction_id_map: pd.DataFrame, best_matches: pd.DataFrame, decomposed_uid_mapping
58+
reaction_id_map: pd.DataFrame,
59+
best_matches: pd.DataFrame,
60+
decomposed_uid_mapping: pd.DataFrame
5161
) -> pd.DataFrame:
52-
uid_reaction_connections_data = []
62+
"""Create connections between reaction UIDs based on best matches."""
63+
5364
reactome_id_to_uid_mapping = dict(
5465
zip(reaction_id_map["reactome_id"], reaction_id_map["uid"])
5566
)
56-
57-
# Create uid_reaction_connections from best_matches
67+
68+
uid_reaction_connections_data = []
69+
5870
for _, match in best_matches.iterrows():
5971
incomming_hash = match["incomming"]
6072
outgoing_hash = match["outgoing"]
61-
preceding_reaction_id = decomposed_uid_mapping.loc[
62-
decomposed_uid_mapping["uid"] == incomming_hash, "reactome_id"
63-
].values[0]
64-
following_reaction_id = decomposed_uid_mapping.loc[
65-
decomposed_uid_mapping["uid"] == outgoing_hash, "reactome_id"
66-
].values[0]
73+
74+
# Get reactome IDs for both hashes
75+
preceding_reaction_id = _get_reactome_id_from_hash(decomposed_uid_mapping, incomming_hash)
76+
following_reaction_id = _get_reactome_id_from_hash(decomposed_uid_mapping, outgoing_hash)
77+
78+
# Get corresponding UIDs
6779
preceding_uid = reactome_id_to_uid_mapping.get(preceding_reaction_id)
6880
following_uid = reactome_id_to_uid_mapping.get(following_reaction_id)
81+
82+
# Only add connection if both UIDs exist
6983
if preceding_uid is not None and following_uid is not None:
70-
uid_reaction_connections_data.append(
71-
{"preceding_uid": preceding_uid, "following_uid": following_uid}
72-
)
73-
74-
uid_reaction_connections = pd.DataFrame(uid_reaction_connections_data)
75-
return uid_reaction_connections
84+
uid_reaction_connections_data.append({
85+
"preceding_uid": preceding_uid,
86+
"following_uid": following_uid
87+
})
88+
89+
return pd.DataFrame(uid_reaction_connections_data)
90+
91+
92+
def _execute_regulator_query(
93+
graph: Graph,
94+
query: str,
95+
reaction_uuid: str,
96+
function_name: str
97+
) -> List[Dict[str, Any]]:
98+
"""Execute a regulator query and return processed results."""
99+
try:
100+
result = graph.run(query)
101+
regulators = []
102+
103+
for record in result:
104+
regulator_uuid = str(uuid.uuid4())
105+
regulators.append({
106+
"reaction": reaction_uuid,
107+
"PhysicalEntity": regulator_uuid,
108+
"edge_type": "regulator",
109+
"uuid": regulator_uuid,
110+
"reaction_uuid": reaction_uuid,
111+
})
112+
113+
return regulators
114+
115+
except Exception as e:
116+
logger.error(f"Error in {function_name}", exc_info=True)
117+
raise e
76118

77119

78120
def get_catalysts_for_reaction(reaction_id_map: DataFrame, graph: Graph) -> DataFrame:
121+
"""Get catalysts for reactions using Neo4j graph queries."""
79122
catalyst_list = []
80-
123+
81124
for _, row in reaction_id_map.iterrows():
82125
reaction_id = row["reactome_id"]
126+
reaction_uuid = row["uid"]
127+
83128
query = (
84129
f"MATCH (reaction:ReactionLikeEvent{{dbId: {reaction_id}}})-[:catalystActivity]->(catalystActivity:CatalystActivity)-[:physicalEntity]->(catalyst:PhysicalEntity) "
85130
f"RETURN reaction.dbId AS reaction_id, catalyst.dbId AS catalyst_id, 'catalyst' AS edge_type"
86131
)
132+
87133
try:
88134
data = graph.run(query).data()
89135
for item in data:
90-
item["uuid"] = str(uuid.uuid4()) # Generate UUID for each entity
91-
# Map the reaction ID to the UUID
92-
item["reaction_uuid"] = row["uid"]
136+
item["uuid"] = str(uuid.uuid4())
137+
item["reaction_uuid"] = reaction_uuid
93138
catalyst_list.extend(data)
139+
94140
except Exception as e:
95141
logger.error("Error in get_catalysts_for_reaction", exc_info=True)
96142
raise e
97-
143+
98144
return pd.DataFrame(
99145
catalyst_list,
100146
columns=["reaction_id", "catalyst_id", "edge_type", "uuid", "reaction_uuid"],
101147
)
102148

103149

104150
def get_positive_regulators_for_reaction(
105-
reaction_id_mapping: DataFrame, graph: Graph
151+
reaction_id_mapping: DataFrame,
152+
graph: Graph
106153
) -> DataFrame:
154+
"""Get positive regulators for reactions using Neo4j graph queries."""
107155
regulators_list = []
108-
156+
109157
for _, row in reaction_id_mapping.iterrows():
110158
reaction_id = row["reactome_id"]
111159
reaction_uuid = row["uid"]
160+
112161
if pd.isna(reaction_uuid):
113162
logger.error(f"No UUID found for reaction ID {reaction_id}")
114163
continue
115-
164+
116165
query = (
117166
f"MATCH (reaction)-[:regulatedBy]->(regulator:PositiveRegulation)-[:regulator]->(pe:PhysicalEntity) "
118167
f"WHERE reaction.dbId = {reaction_id} "
119168
"RETURN reaction.dbId as reaction, pe.dbId as PhysicalEntity"
120169
)
121-
try:
122-
result = graph.run(query)
123-
for record in result:
124-
regulator_uuid = str(uuid.uuid4()) # Generate UUID for each entity
125-
regulators_list.append(
126-
{
127-
"reaction": reaction_uuid,
128-
"PhysicalEntity": regulator_uuid,
129-
"edge_type": "regulator",
130-
"uuid": regulator_uuid,
131-
"reaction_uuid": reaction_uuid,
132-
}
133-
)
134-
except Exception as e:
135-
logger.error("Error in get_positive_regulators_for_reaction", exc_info=True)
136-
raise e
137-
170+
171+
regulators = _execute_regulator_query(
172+
graph, query, reaction_uuid, "get_positive_regulators_for_reaction"
173+
)
174+
regulators_list.extend(regulators)
175+
138176
return pd.DataFrame(
139177
regulators_list,
140178
columns=["reaction", "PhysicalEntity", "edge_type", "uuid", "reaction_uuid"],
@@ -143,47 +181,38 @@ def get_positive_regulators_for_reaction(
143181

144182

145183
def get_negative_regulators_for_reaction(
146-
reaction_id_mapping: DataFrame, graph: Graph
184+
reaction_id_mapping: DataFrame,
185+
graph: Graph
147186
) -> DataFrame:
187+
"""Get negative regulators for reactions using Neo4j graph queries."""
148188
regulators_list = []
149-
189+
150190
for _, row in reaction_id_mapping.iterrows():
151191
reaction_id = row["reactome_id"]
152192
reaction_uuid = row["uid"]
193+
153194
if pd.isna(reaction_uuid):
154195
logger.error(f"No UUID found for reaction ID {reaction_id}")
155196
continue
156-
197+
157198
query = (
158199
f"MATCH (reaction)-[:regulatedBy]->(regulator:NegativeRegulation)-[:regulator]->(pe:PhysicalEntity) "
159200
f"WHERE reaction.dbId = {reaction_id} "
160201
"RETURN reaction.dbId as reaction, pe.dbId as PhysicalEntity"
161202
)
162-
try:
163-
result = graph.run(query)
164-
for record in result:
165-
regulator_uuid = str(uuid.uuid4()) # Generate UUID for each entity
166-
regulators_list.append(
167-
{
168-
"reaction": reaction_uuid,
169-
"PhysicalEntity": regulator_uuid,
170-
"edge_type": "regulator",
171-
"uuid": regulator_uuid,
172-
"reaction_uuid": reaction_uuid,
173-
}
174-
)
175-
except Exception as e:
176-
logger.error("Error in get_negative_regulators_for_reaction", exc_info=True)
177-
raise e
178-
203+
204+
regulators = _execute_regulator_query(
205+
graph, query, reaction_uuid, "get_negative_regulators_for_reaction"
206+
)
207+
regulators_list.extend(regulators)
208+
179209
return pd.DataFrame(
180210
regulators_list,
181211
columns=["reaction", "PhysicalEntity", "edge_type", "uuid", "reaction_uuid"],
182212
index=None,
183213
)
184214

185215

186-
187216
def _get_non_null_values(df: pd.DataFrame, column: str) -> List[Any]:
188217
"""Extract non-null values from a DataFrame column."""
189218
return [value for value in df[column].tolist() if pd.notna(value)]
@@ -401,7 +430,6 @@ def create_pathway_logic_network(
401430
pathway_logic_network_data,
402431
)
403432

404-
# Note: Using empty strings to maintain original functionality
405433
and_or = ""
406434
edge_type = ""
407435
append_regulators(

0 commit comments

Comments
 (0)