Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions rust/codelist-rs/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pub enum CodeListType {
ICD10,
SNOMED,
OPCS,
CTV3,
}

impl CodeListType {
Expand Down Expand Up @@ -56,6 +57,7 @@ impl FromStr for CodeListType {
"icd10" => Ok(CodeListType::ICD10),
"snomed" => Ok(CodeListType::SNOMED),
"opcs" => Ok(CodeListType::OPCS),
"ctv3" => Ok(CodeListType::CTV3),
invalid_string => Err(CodeListError::invalid_code_list_type(invalid_string)),
}
}
Expand All @@ -71,6 +73,7 @@ impl fmt::Display for CodeListType {
CodeListType::ICD10 => "ICD10",
CodeListType::SNOMED => "SNOMED",
CodeListType::OPCS => "OPCS",
CodeListType::CTV3 => "CTV3",
};
write!(f, "{s}")
}
Expand All @@ -85,6 +88,7 @@ mod tests {
assert!(matches!(CodeListType::from_str("icd10"), Ok(CodeListType::ICD10)));
assert!(matches!(CodeListType::from_str("snomed"), Ok(CodeListType::SNOMED)));
assert!(matches!(CodeListType::from_str("opcs"), Ok(CodeListType::OPCS)));
assert!(matches!(CodeListType::from_str("ctv3"), Ok(CodeListType::CTV3)));
assert!(matches!(CodeListType::from_str("invalid"),
Err(CodeListError::InvalidCodeListType { name }) if name == "invalid"));
}
Expand All @@ -94,12 +98,14 @@ mod tests {
assert!(matches!(CodeListType::from_str("ICD10"), Ok(CodeListType::ICD10)));
assert!(matches!(CodeListType::from_str("SNOMED"), Ok(CodeListType::SNOMED)));
assert!(matches!(CodeListType::from_str("OPCS"), Ok(CodeListType::OPCS)));
assert!(matches!(CodeListType::from_str("CTV3"), Ok(CodeListType::CTV3)));
}

#[test]
fn test_to_string() {
assert_eq!(CodeListType::ICD10.to_string(), "ICD10");
assert_eq!(CodeListType::SNOMED.to_string(), "SNOMED");
assert_eq!(CodeListType::OPCS.to_string(), "OPCS");
assert_eq!(CodeListType::CTV3.to_string(), "CTV3");
}
}
233 changes: 233 additions & 0 deletions rust/codelist-validator-rs/src/ctv3_validator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
//! CTV3 validator for validating CTV3 codes in a codelist
//!
//! Validation Rules
//! 1. The code must be exactly 5 characters in length.
//! 2. Only alphanumeric characters (a-z, A-Z, 0-9) and dots (.) are allowed.
//! 3. The code starts with 0-5 alphanumeric characters followed by dots to pad to 5 characters.
Comment on lines +1 to +6
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Excellent documentation. Thanks for the effort here. It is always nice to have this right off the bat.

use std::sync::LazyLock;

use codelist_rs::codelist::CodeList;
use regex::Regex;

use crate::{errors::CodeListValidatorError, validator::CodeValidator};

pub struct Ctv3Validator<'a>(pub &'a CodeList);

static REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"^(?:[a-zA-Z0-9]{5}|[a-zA-Z0-9]{4}\.|[a-zA-Z0-9]{3}\.\.|[a-zA-Z0-9]{2}\.\.\.|[a-zA-Z0-9]{1}\.\.\.\.|\.{5})$").expect("Unable to create regex")
});

impl CodeValidator for Ctv3Validator<'_> {
fn validate_code(&self, code: &str) -> Result<(), CodeListValidatorError> {
if code.len() > 5 {
return Err(CodeListValidatorError::invalid_code_length(
code,
"Code is greater than 5 characters in length",
self.0.codelist_type.to_string(),
));
}

if code.len() < 5 {
return Err(CodeListValidatorError::invalid_code_length(
code,
"Code is less than 5 characters in length",
self.0.codelist_type.to_string(),
));
}

if !REGEX.is_match(code) {
return Err(CodeListValidatorError::invalid_code_contents(
code,
"Code does not match the expected format",
self.0.codelist_type.to_string(),
));
}

Ok(())
}

fn validate_all_code(&self) -> Result<(), CodeListValidatorError> {
let mut reasons = Vec::new();

for (code, _) in self.0.entries.iter() {
if let Err(err) = self.validate_code(code) {
reasons.push(err.to_string());
}
}

if reasons.is_empty() {
Ok(())
} else {
Err(CodeListValidatorError::invalid_codelist(reasons))
}
}
}

#[cfg(test)]
mod tests {
use codelist_rs::{
codelist::CodeList,
errors::CodeListError,
metadata::{
categorisation_and_usage::CategorisationAndUsage, metadata_source::Source,
provenance::Provenance, purpose_and_context::PurposeAndContext,
validation_and_review::ValidationAndReview, Metadata,
},
types::CodeListType,
};

use super::*;
use crate::validator::Validator;

// Helper function to create test metadata
fn create_test_metadata() -> Metadata {
Metadata::new(
Provenance::new(Source::ManuallyCreated, None),
CategorisationAndUsage::new(None, None, None),
PurposeAndContext::new(None, None, None),
ValidationAndReview::new(None, None, None, None, None),
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can simplify this with the changes from #68 that @oylenshpeegul added. We should be able to do

Metadata::default(),

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes will change this now, thanks!

)
}

// Helper function to create a test codelist with two entries, default options
// and test metadata
fn create_test_codelist() -> Result<CodeList, CodeListError> {
let codelist = CodeList::new(
"test_codelist".to_string(),
CodeListType::CTV3,
create_test_metadata(),
None,
);
Ok(codelist)
}

#[test]
fn test_validate_codelist_with_valid_code() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist()?;
let _ = codelist.add_entry("A9f..".to_string(), None, None);
assert!(codelist.validate_codes().is_ok());
Ok(())
}

#[test]
fn test_validate_code_with_invalid_code_length_too_long() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
let validator = Ctv3Validator(&codelist);
let code: &'static str = "A009000000";
let error = validator.validate_code(code).unwrap_err().to_string();
assert_eq!(error, "Code A009000000 is an invalid length for type CTV3. Reason: Code is greater than 5 characters in length");
Ok(())
}

#[test]
fn test_validate_code_with_invalid_code_length_too_short() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
let validator = Ctv3Validator(&codelist);
let code = "Af.";
let error = validator.validate_code(code).unwrap_err().to_string();
assert_eq!(error, "Code Af. is an invalid length for type CTV3. Reason: Code is less than 5 characters in length");
Ok(())
}

#[test]
fn test_validate_invalid_code_dot_first_character() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
let validator = Ctv3Validator(&codelist);
let code = ".a009";
let error = validator.validate_code(code).unwrap_err().to_string();
assert_eq!(error, "Code .a009 contents is invalid for type CTV3. Reason: Code does not match the expected format");
Ok(())
}

#[test]
fn test_validate_invalid_code_dot_middle_character_between_letters() -> Result<(), CodeListError>
{
let codelist = create_test_codelist()?;
let validator = Ctv3Validator(&codelist);
let code = "10a.f";
Comment on lines +126 to +130
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great set of tests.

let error = validator.validate_code(code).unwrap_err().to_string();
assert_eq!(error, "Code 10a.f contents is invalid for type CTV3. Reason: Code does not match the expected format");
Ok(())
}

#[test]
fn test_validate_invalid_code_invalid_characters() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
let validator = Ctv3Validator(&codelist);
let code = "Af!!!";
let error = validator.validate_code(code).unwrap_err().to_string();
assert_eq!(error, "Code Af!!! contents is invalid for type CTV3. Reason: Code does not match the expected format");
Ok(())
}

#[test]
fn test_validate_codelist_with_valid_codes() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist()?;
codelist.add_entry("Af918".to_string(), None, None)?;
codelist.add_entry("ABb..".to_string(), None, None)?;
codelist.add_entry("alkif".to_string(), None, None)?;
codelist.add_entry("F....".to_string(), None, None)?;
codelist.add_entry("bn89.".to_string(), None, None)?;
codelist.add_entry("Me...".to_string(), None, None)?;
codelist.add_entry("99999".to_string(), None, None)?;
codelist.add_entry(".....".to_string(), None, None)?;
assert!(codelist.validate_codes().is_ok());
Ok(())
}

#[test]
fn test_validate_codelist_with_all_invalid_codes() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist()?;
codelist.add_entry("A00900000".to_string(), None, None)?;
codelist.add_entry("10".to_string(), None, None)?;
codelist.add_entry("a.9jb".to_string(), None, None)?;
codelist.add_entry("..9jJ".to_string(), None, None)?;
codelist.add_entry("A00A".to_string(), None, None)?;
codelist.add_entry("*unf.".to_string(), None, None)?;
codelist.add_entry("..j..".to_string(), None, None)?;
codelist.add_entry("9874ji".to_string(), None, None)?;
let error = codelist.validate_codes().unwrap_err();
let error_string = error.to_string();

assert!(error_string.contains("Some codes in the list are invalid. Details:"));
assert!(error_string.contains("Code A00900000 is an invalid length for type CTV3. Reason: Code is greater than 5 characters in length"));
assert!(error_string.contains("Code 10 is an invalid length for type CTV3. Reason: Code is less than 5 characters in length"));
assert!(error_string.contains("Code a.9jb contents is invalid for type CTV3. Reason: Code does not match the expected format"));
assert!(error_string.contains("Code ..9jJ contents is invalid for type CTV3. Reason: Code does not match the expected format"));
assert!(error_string.contains("Code A00A is an invalid length for type CTV3. Reason: Code is less than 5 characters in length"));
assert!(error_string.contains("Code *unf. contents is invalid for type CTV3. Reason: Code does not match the expected format"));
assert!(error_string.contains("Code ..j.. contents is invalid for type CTV3. Reason: Code does not match the expected format"));
assert!(error_string.contains("Code 9874ji is an invalid length for type CTV3. Reason: Code is greater than 5 characters in length"));

assert!(
matches!(error, CodeListValidatorError::InvalidCodelist { reasons } if reasons.len() == 8)
);
Ok(())
}

#[test]
fn test_validate_codelist_with_mixed_invalid_and_valid_codes() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist()?;
codelist.add_entry("A54..".to_string(), None, None)?;
codelist.add_entry("1009.".to_string(), None, None)?;
codelist.add_entry("jk90L".to_string(), None, None)?;
codelist.add_entry("LK...".to_string(), None, None)?;
codelist.add_entry("N40".to_string(), None, None)?;
codelist.add_entry("A00.l".to_string(), None, None)?;
codelist.add_entry("Q90.....".to_string(), None, None)?;
codelist.add_entry("A..9k".to_string(), None, None)?;
let error = codelist.validate_codes().unwrap_err();
let error_string = error.to_string();

assert!(error_string.contains("Some codes in the list are invalid. Details:"));
assert!(error_string.contains("Code N40 is an invalid length for type CTV3. Reason: Code is less than 5 characters in length"));
assert!(error_string.contains("Code A00.l contents is invalid for type CTV3. Reason: Code does not match the expected format"));
assert!(error_string.contains("Code Q90..... is an invalid length for type CTV3. Reason: Code is greater than 5 characters in length"));
assert!(error_string.contains("Code A..9k contents is invalid for type CTV3. Reason: Code does not match the expected format"));

assert!(
matches!(error, CodeListValidatorError::InvalidCodelist { reasons } if reasons.len() == 4)
);
Ok(())
}
}
20 changes: 20 additions & 0 deletions rust/codelist-validator-rs/src/icd10_validator.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
//! ICD10 validator for validating ICD10 codes in a codelist
//!
//! Validation Rules
//! 1. The code must be 7 characters or less.
//! 2. The first character must be a letter.
//! 3. The second and third characters must be numbers.
//! 4. The fourth character must be a dot, or a number or X.
//! 5. If the fourth character is a dot, there must be at least 1 number after the dot.
//! 6. If the fourth character is a X, there are no further characters.
//! 7. The fifth to seventh characters must be numbers if present.
use std::sync::LazyLock;

use codelist_rs::codelist::CodeList;
Expand Down Expand Up @@ -179,6 +189,16 @@ mod tests {
Ok(())
}

#[test]
fn test_validate_invalid_code_lowercase_letter() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
let validator = IcdValidator(&codelist);
let code = "a54";
let error = validator.validate_code(code).unwrap_err().to_string();
assert_eq!(error, "Code a54 contents is invalid for type ICD10. Reason: Code does not match the expected format");
Ok(())
}

#[test]
fn test_validate_codelist_with_valid_codes() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist()?;
Expand Down
1 change: 1 addition & 0 deletions rust/codelist-validator-rs/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
extern crate core;

pub mod ctv3_validator;
pub mod errors;
pub mod icd10_validator;
pub mod opcs_validator;
Expand Down
8 changes: 8 additions & 0 deletions rust/codelist-validator-rs/src/opcs_validator.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
//! OPCS validator for validating OPCS codes in a codelist
//!
//! Validation Rules
//! 1. The code must be 3-5 characters long.
//! 2. The first character must be a letter.
//! 3. The second and third characters must be numbers.
//! 4. If there is a fourth character and it is a dot, there must be a number after the dot.
Comment on lines +1 to +7
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really appreciate this.

//! 5. The fifth character, if present, is a number.
use std::sync::LazyLock;

use codelist_rs::codelist::CodeList;
Expand Down
6 changes: 5 additions & 1 deletion rust/codelist-validator-rs/src/snomed_validator.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
// SNOMED validator for validating SNOMED codes in a codelist
//! SNOMED validator for validating SNOMED codes in a codelist
//!
//! Validation Rules
//! 1. The code consist of numbers only.
//! 2. The code must be between 6 and 18 numbers in length.
use codelist_rs::codelist::CodeList;

use crate::{errors::CodeListValidatorError, validator::CodeValidator};
Expand Down
9 changes: 5 additions & 4 deletions rust/codelist-validator-rs/src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
use codelist_rs::{codelist::CodeList, types::CodeListType};

use crate::{
errors::CodeListValidatorError, icd10_validator::IcdValidator, opcs_validator::OpcsValidator,
snomed_validator::SnomedValidator,
ctv3_validator::Ctv3Validator, errors::CodeListValidatorError, icd10_validator::IcdValidator,
opcs_validator::OpcsValidator, snomed_validator::SnomedValidator,
};

/// Validator trait for validating a codelist.
///
/// `validate_code`: validates a single OPCS code
/// `validate_all_code`: validates all OPCS codes in the codelist
/// `validate_code`: validates a single code
/// `validate_all_code`: validates all codes in the codelist
pub(crate) trait CodeValidator {
fn validate_code(&self, code: &str) -> Result<(), CodeListValidatorError>; // for 1 code
fn validate_all_code(&self) -> Result<(), CodeListValidatorError>;
Expand All @@ -26,6 +26,7 @@ impl Validator for CodeList {
CodeListType::ICD10 => IcdValidator(self).validate_all_code(),
CodeListType::SNOMED => SnomedValidator(self).validate_all_code(),
CodeListType::OPCS => OpcsValidator(self).validate_all_code(),
CodeListType::CTV3 => Ctv3Validator(self).validate_all_code(),
}
}
}
Loading