-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathsnomed_usage_data.rs
More file actions
331 lines (287 loc) · 13.8 KB
/
snomed_usage_data.rs
File metadata and controls
331 lines (287 loc) · 13.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
//! This file contains the snomed usage data struct and its implementation
//! It contains functionality for downloading and parsing SNOMED usage data files from NHS Digital. See https://digital.nhs.uk/data-and-information/publications/statistical/mi-snomed-code-usage-in-primary-care.
//!
//! Components of the file:
//!
//! SNOMED_Concept_ID:
//! SNOMED concepts which have been added to a patient record in a general practice system during the reporting period.
//!
//! Description:
//! The fully specified name associated with the SNOMED_Concept_ID on the final day of the reporting period (31 July).
//!
//! Usage:
//! The number of times that the SNOMED_Concept_ID was added into any patient record within the reporting period, rounded to the nearerst 10. Usage of 1 to 4 is displayed as *. SNOMED concepts with no code usage are not included.
//! Important notes:
//! - Data prior to 2019 was originally submitted mostly in READ V2 or CTV3, but in the usage files, these codes have been mapped to corresponding SNOMED codes using final 2020 version of the mapping tables published by NHS England.
//! - The usage does not show how many patients had each code added to their record - each addition regardless of whether it is the same patient increments the count by 1. Therefore it is not possible to infer the number of individual patients with a particular code.
//! - For the 2011-12 to 2017-18 data, it is stated that "Current maximum value is approximately 250,000,000" - no such maximum is stated for the 2018-19 onwards data.
//!
//! Active_at_Start:
//! Active status of the SNOMED_Concept_ID on the first day of the reporting period. This is taken from the most recent UK clinical extension, or associated International extention, which was published up to the start of the reporting year (1 August).
//! 1 = SNOMED concept was published and was active.
//! 0 = SNOMED concept was either not yet available or was inactive.
//!
//! Active_at_End:
//! Active status of the SNOMED_Concept_ID on the last day of the reporting period. This is taken from the most recent UK clinical extension, or associated International extention, which was published up to the end of the reporting year (31 July).
//! 1 = SNOMED concept was published and was active.
//! 0 = SNOMED concept was either not yet available or was inactive.
use std::fs;
// Internal imports
use crate::errors::CodeListBuilderError;
// External imports
use csv;
use reqwest;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// Struct to represent a snomed usage data entry
///
/// # Fields
/// * `snomed_concept_id` - The snomed concept id
/// * `description` - The description
/// * `usage` - The usage. A count of 1-4 is denoted by a *. Counts above 4 are denoted by a number rounded to the nearest 10.
/// * `active_at_start` - Whether the concept was active at the start of the usage period
/// * `active_at_end` - Whether the concept was active at the end of the usage period
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct SnomedUsageDataEntry {
pub snomed_concept_id: String,
pub description: String,
pub usage: String,
pub active_at_start: bool,
pub active_at_end: bool,
}
/// Struct to represent snomed usage data
///
/// # Fields
/// * `usage_data` - The usage data
/// * `usage_year` - The usage year
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct SnomedUsageData {
pub usage_data: Vec<SnomedUsageDataEntry>,
pub usage_year: String,
}
impl SnomedUsageData {
/// Download snomed usage data from a url
///
/// # Arguments
/// * `usage_year` - The usage year
/// * `config_path` - Optional path to config file
///
/// # Returns
/// Self or an error if the download fails
pub async fn download_usage(
usage_year: &str,
config_path: Option<String>,
) -> Result<Self, CodeListBuilderError> {
let config_string = match config_path {
Some(path) => fs::read_to_string(path)?,
None => include_str!("config/snomed_usage_config.json").to_string(),
};
let config: HashMap<String, String> = serde_json::from_str(&config_string)?;
let url = config
.get(usage_year)
.ok_or_else(|| CodeListBuilderError::url_not_found(usage_year))?;
let response = reqwest::get(url).await?;
if !response.status().is_success() {
let status = response.status().to_string();
let body = response.text().await.unwrap_or_default();
return Err(CodeListBuilderError::http_error_code(status, body));
}
let body = response.text().await.map_err(CodeListBuilderError::from)?;
let usage_data = Self::parse_from_string(&body)?;
Ok(SnomedUsageData { usage_data, usage_year: usage_year.to_string() })
}
/// Parse snomed usage data from a string
///
/// # Arguments
/// * `data` - The data to parse
///
/// # Returns
/// * The parsed usage data or an error
pub fn parse_from_string(
data: &str,
) -> Result<Vec<SnomedUsageDataEntry>, CodeListBuilderError> {
let mut rdr = csv::ReaderBuilder::new()
.has_headers(true)
.delimiter(b'\t')
.from_reader(data.as_bytes());
let mut usage_data = Vec::new();
for (row_idx, result) in rdr.records().enumerate() {
let record = result?;
if record.len() != 5 {
return Err(CodeListBuilderError::invalid_usage_data(format!(
"Invalid number of columns in record ({}) at row {}",
record.len(),
row_idx + 1
)));
}
if let Some((col_idx, _)) =
record.iter().enumerate().find(|(_, field)| field.trim().is_empty())
{
return Err(CodeListBuilderError::invalid_usage_data(format!(
"Empty value found in record at row {}, column {}",
row_idx + 1,
col_idx
)));
}
let entry = SnomedUsageDataEntry {
snomed_concept_id: record[0].to_string(),
description: record[1].to_string(),
usage: record[2].to_string(),
active_at_start: record[3] == *"1",
active_at_end: record[4] == *"1",
};
usage_data.push(entry);
}
Ok(usage_data)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::errors::CodeListBuilderError;
const LONG_TEST_DATA: &str = "SNOMED_Concept_ID Description Usage Active_at_Start Active_at_End
279991000000102 Short message service text message sent to patient (procedure) 122292090 1 1
163030003 On examination - Systolic blood pressure reading (finding) 59227180 1 1
163031004 On examination - Diastolic blood pressure reading (finding) 59184050 1 1
163020007 On examination - blood pressure reading (finding) 37837700 1 1
1000731000000107 Serum creatinine level (observable entity) 33211250 1 1
1000661000000107 Serum sodium level (observable entity) 31630420 1 1
1000651000000109 Serum potassium level (observable entity) 31542470 1 1
162763007 On examination - weight (finding) 30836800 1 1
1022431000000105 Haemoglobin estimation (observable entity) 29864410 1 1
4468401000001106 Triptorelin 3.75mg injection (pdr for recon)+solvent prefilled syringe (product) 80 0 0";
const SINGLE_ENTRY_TEST_DATA: &str =
"SNOMED_Concept_ID Description Usage Active_at_Start Active_at_End
279991000000102 Short message service text message sent to patient (procedure) 122292090 1 1";
#[test]
fn test_parse_from_string_single_entry() -> Result<(), CodeListBuilderError> {
let test_data = SINGLE_ENTRY_TEST_DATA;
let entries = SnomedUsageData::parse_from_string(test_data)?;
assert_eq!(entries.len(), 1);
let entry = &entries[0];
assert_eq!(entry.snomed_concept_id, "279991000000102");
assert_eq!(
entry.description,
"Short message service text message sent to patient (procedure)"
);
assert_eq!(entry.usage, "122292090");
assert!(entry.active_at_start);
assert!(entry.active_at_end);
Ok(())
}
#[test]
fn test_parse_from_string_multiple_entries() -> Result<(), CodeListBuilderError> {
let test_data = LONG_TEST_DATA;
let entries = SnomedUsageData::parse_from_string(test_data)?;
assert_eq!(entries.len(), 10);
assert_eq!(entries[0].snomed_concept_id, "279991000000102");
assert_eq!(
entries[0].description,
"Short message service text message sent to patient (procedure)"
);
assert_eq!(entries[0].usage, "122292090");
assert!(entries[0].active_at_start);
assert!(entries[0].active_at_end);
assert_eq!(entries[1].snomed_concept_id, "163030003");
assert_eq!(
entries[1].description,
"On examination - Systolic blood pressure reading (finding)"
);
assert_eq!(entries[1].usage, "59227180");
assert!(entries[1].active_at_start);
assert!(entries[1].active_at_end);
assert_eq!(entries[2].snomed_concept_id, "163031004");
assert_eq!(
entries[2].description,
"On examination - Diastolic blood pressure reading (finding)"
);
assert_eq!(entries[2].usage, "59184050");
assert!(entries[2].active_at_start);
assert!(entries[2].active_at_end);
assert_eq!(entries[3].snomed_concept_id, "163020007");
assert_eq!(entries[3].description, "On examination - blood pressure reading (finding)");
assert_eq!(entries[3].usage, "37837700");
assert!(entries[3].active_at_start);
assert!(entries[3].active_at_end);
assert_eq!(entries[4].snomed_concept_id, "1000731000000107");
assert_eq!(entries[4].description, "Serum creatinine level (observable entity)");
assert_eq!(entries[4].usage, "33211250");
assert!(entries[4].active_at_start);
assert!(entries[4].active_at_end);
assert_eq!(entries[5].snomed_concept_id, "1000661000000107");
assert_eq!(entries[5].description, "Serum sodium level (observable entity)");
assert_eq!(entries[5].usage, "31630420");
assert!(entries[5].active_at_start);
assert!(entries[5].active_at_end);
assert_eq!(entries[6].snomed_concept_id, "1000651000000109");
assert_eq!(entries[6].description, "Serum potassium level (observable entity)");
assert_eq!(entries[6].usage, "31542470");
assert!(entries[6].active_at_start);
assert!(entries[6].active_at_end);
assert_eq!(entries[7].snomed_concept_id, "162763007");
assert_eq!(entries[7].description, "On examination - weight (finding)");
assert_eq!(entries[7].usage, "30836800");
assert!(entries[7].active_at_start);
assert!(entries[7].active_at_end);
assert_eq!(entries[8].snomed_concept_id, "1022431000000105");
assert_eq!(entries[8].description, "Haemoglobin estimation (observable entity)");
assert_eq!(entries[8].usage, "29864410");
assert!(entries[8].active_at_start);
assert!(entries[8].active_at_end);
assert_eq!(entries[9].snomed_concept_id, "4468401000001106");
assert_eq!(
entries[9].description,
"Triptorelin 3.75mg injection (pdr for recon)+solvent prefilled syringe (product)"
);
assert_eq!(entries[9].usage, "80");
assert!(!entries[9].active_at_start);
assert!(!entries[9].active_at_end);
Ok(())
}
#[test]
fn test_parse_from_string_empty_data() -> Result<(), CodeListBuilderError> {
let test_data = "SNOMED_Concept_ID Description Usage Active_at_Start Active_at_End";
let entries = SnomedUsageData::parse_from_string(test_data)?;
assert_eq!(entries.len(), 0);
Ok(())
}
#[test]
fn test_parse_from_string_column_count_too_small() -> Result<(), CodeListBuilderError> {
let test_data = "SNOMED_Concept_ID Description Usage Active_at_Start
279991000000102 Short message service text message sent to patient (procedure) 122292090 1";
let error = SnomedUsageData::parse_from_string(test_data).unwrap_err();
let error_string = error.to_string();
assert_eq!(
&error_string,
"Invalid usage data: Invalid number of columns in record (4) at row 1"
);
Ok(())
}
#[test]
fn test_parse_from_string_column_count_too_big() -> Result<(), CodeListBuilderError> {
let test_data =
"SNOMED_Concept_ID Description Usage Active_at_Start Active_at_End Active_at_End
279991000000102 Short message service text message sent to patient (procedure) 122292090 1 1 1";
let error = SnomedUsageData::parse_from_string(test_data).unwrap_err();
let error_string = error.to_string();
assert_eq!(
&error_string,
"Invalid usage data: Invalid number of columns in record (6) at row 1"
);
Ok(())
}
#[test]
fn test_parse_from_string_unequal_column_count() -> Result<(), CodeListBuilderError> {
let test_data = "SNOMED_Concept_ID Description Usage Active_at_Start Active_at_End
279991000000102 Short message service text message sent to patient (procedure) 122292090 1
163030003 On examination - Systolic blood pressure reading (finding) 59227180 1
163031004 On examination - Diastolic blood pressure reading (finding) 59184050 1 1
163020007 On examination - blood pressure reading (finding) 37837700 1 1
1000731000000107 Serum creatinine level (observable entity) 33211250 1
1000661000000107 Serum sodium level (observable entity) 31630420 1 1";
let error = SnomedUsageData::parse_from_string(test_data).unwrap_err();
let error_string = error.to_string();
assert!(error_string.contains("CSV error:"));
Ok(())
}
}