Skip to content

Commit 25cc203

Browse files
authored
Merge pull request #24 from VACLab/issue-8-caching-bugfix
fix caching issue #8
2 parents 2be49ee + 746940f commit 25cc203

3 files changed

Lines changed: 38 additions & 18 deletions

File tree

biasanalyzer/cohort.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,10 @@ def get_concept_stats(self, concept_type='condition_occurrence', filter_count=0,
6363
vocab=vocab,
6464
print_concept_hierarchy=print_concept_hierarchy)
6565
return (cohort_stats,
66-
ConceptHierarchy.build_concept_hierarchy_from_results(self.cohort_id, cohort_stats[concept_type]))
66+
ConceptHierarchy.build_concept_hierarchy_from_results(self.cohort_id, concept_type,
67+
cohort_stats[concept_type],
68+
filter_count=filter_count,
69+
vocab=vocab))
6770

6871

6972
def __del__(self):
@@ -162,7 +165,10 @@ def get_cohorts_concept_stats(self, cohorts: List[int],
162165
filter_count=filter_count,
163166
vocab=vocab)
164167
for c in cohorts]
165-
hierarchies = [ConceptHierarchy.build_concept_hierarchy_from_results(c, c_stats.get(concept_type, []))
168+
hierarchies = [ConceptHierarchy.build_concept_hierarchy_from_results(c, concept_type,
169+
c_stats.get(concept_type, []),
170+
filter_count=filter_count,
171+
vocab=vocab)
166172
for c, c_stats in zip(cohorts, cohort_concept_stats)]
167173
return reduce(lambda h1, h2: h1.union(h2), hierarchies).to_dict()
168174

biasanalyzer/concept.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,20 @@ def _normalize_identifier(identifier: str) -> str:
8686
return "+".join(parts)
8787

8888
@classmethod
89-
def build_concept_hierarchy_from_results(cls, cohort_id: int, results: List[dict]):
89+
def build_concept_hierarchy_from_results(cls, cohort_id: int, concept_type: str, results: List[dict],
90+
filter_count=0, vocab=None):
9091
"""
9192
build concept hierarchy tree managed by networkx from list of dicts returned from the concept prevalence SQL
92-
with cache management
93+
with cache management. cohort_id, concept_type, and filter_count are used for caching to uniquely identify
94+
a cached concept hierarchy for the specified cohort_id, concept_type, and filter_count.
9395
:param results: list of dicts from prevalence SQL
9496
:param cohort_id: cohort id to get concept hierarchy for
97+
:param concept_type: concept_type to get concept hierarchy for
98+
:param filer_count: filter_count to get concept hierarchy for with default value 0 meaning no filtering
99+
:param vocab: vocab to get concept hierarchy for with default value None meaning default vocab will be used
95100
:return: ConceptHierarchy object
96101
"""
97-
identifer = str(cohort_id)
102+
identifer = f"{cohort_id}-{concept_type}-{filter_count}-{vocab}"
98103
if identifer in cls._graph_cache:
99104
return cls._graph_cache[identifer]
100105

@@ -117,7 +122,7 @@ def build_concept_hierarchy_from_results(cls, cohort_id: int, results: List[dict
117122
graph = nx.DiGraph()
118123
# add nodes with metadata + metrics
119124
for cid, meta in node_metadata.items():
120-
graph.add_node(cid, **meta, metrics={identifer: metrics_by_concept[cid]})
125+
graph.add_node(cid, **meta, metrics={str(cohort_id): metrics_by_concept[cid]})
121126

122127
# add parent-child edges
123128
for row in results:

tests/query_based/test_hierarchical_prevalence.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,24 @@ def test_identifier_normalization_and_cache():
6262
assert ConceptHierarchy._normalize_identifier("1+2+2") == "1+2"
6363

6464
# fake minimal results to build hierarchy
65-
results = [
65+
results1 = [
6666
{"ancestor_concept_id": 1, "descendant_concept_id": 1,
6767
"concept_name": "Diabetes", "concept_code": "DIA",
6868
"count_in_cohort": 5, "prevalence": 0.5}
6969
]
70-
h1 = ConceptHierarchy.build_concept_hierarchy_from_results(1, results)
71-
h2 = ConceptHierarchy.build_concept_hierarchy_from_results(1, results)
72-
assert h1 is h2 # cache reuse
73-
assert h1.identifier == "1"
70+
results2 = [
71+
{"ancestor_concept_id": 1, "descendant_concept_id": 1,
72+
"concept_name": "Diabetes2", "concept_code": "DIA",
73+
"count_in_cohort": 15, "prevalence": 0.15}
74+
]
75+
h1 = ConceptHierarchy.build_concept_hierarchy_from_results(1, 'condition_occurrence', results1)
76+
h2 = ConceptHierarchy.build_concept_hierarchy_from_results(1, 'condition_occurrence', results2)
77+
assert h1 is h2 # cache reuse even though results2 is different from results1
78+
assert h1.identifier == "1-condition_occurrence-0-None"
79+
h2 = ConceptHierarchy.build_concept_hierarchy_from_results(1, 'drug_exposure', results2)
80+
assert not h1 is h2 # cache is not used since drug_exposure concept_name is different than the cached
81+
# condition_occurrence
82+
assert h2.identifier == "1-drug_exposure-0-None"
7483

7584
def test_union_and_cache_behavior():
7685
ConceptHierarchy.clear_cache()
@@ -85,14 +94,14 @@ def test_union_and_cache_behavior():
8594
"count_in_cohort": 3, "prevalence": 0.3}
8695
]
8796

88-
h1 = ConceptHierarchy.build_concept_hierarchy_from_results(1, results1)
89-
h2 = ConceptHierarchy.build_concept_hierarchy_from_results(2, results2)
90-
assert "1" in ConceptHierarchy._graph_cache
91-
assert "2" in ConceptHierarchy._graph_cache
97+
h1 = ConceptHierarchy.build_concept_hierarchy_from_results(1, 'condition_occurrence', results1)
98+
h2 = ConceptHierarchy.build_concept_hierarchy_from_results(2, 'condition_occurrence', results2)
99+
assert "1-condition_occurrence-0-None" in ConceptHierarchy._graph_cache
100+
assert "2-condition_occurrence-0-None" in ConceptHierarchy._graph_cache
92101
h12 = h1.union(h2)
93102
h21 = h2.union(h1)
94-
assert h12.identifier == "1+2"
95-
assert h21.identifier == "1+2"
103+
assert h12.identifier == "1-condition_occurrence-0-None+2-condition_occurrence-0-None"
104+
assert h21.identifier == "1-condition_occurrence-0-None+2-condition_occurrence-0-None"
96105
assert h12 is h21
97106

98107
def test_traversal_and_serialization():
@@ -105,7 +114,7 @@ def test_traversal_and_serialization():
105114
"concept_name": "Child", "concept_code": "C",
106115
"count_in_cohort": 2, "prevalence": 0.2}
107116
]
108-
h = ConceptHierarchy.build_concept_hierarchy_from_results(1, results)
117+
h = ConceptHierarchy.build_concept_hierarchy_from_results(1, 'condition_occurrence', results)
109118

110119
# roots
111120
roots = h.get_root_nodes()

0 commit comments

Comments
 (0)