Skip to content

Commit d3678f2

Browse files
committed
fix: ensure that data contract errors can be assigned an error level
1 parent c1c5f3e commit d3678f2

9 files changed

Lines changed: 56 additions & 48 deletions

File tree

.tool-versions

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
python 3.11.14
2-
poetry 2.3.3
1+
python 3.12.12
2+
poetry 2.4.1
33
java liberica-1.8.0

poetry.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/dve/core_engine/message.py

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ class DataContractErrorDetail(BaseModel):
3636
"""Define custom error codes for validation issues raised during the data contract phase"""
3737

3838
error_code: str
39+
error_level: Optional[FailureType] = "record"
40+
is_informational: Optional[bool] = False
3941
error_message: Optional[str] = None
4042
reporting_entity: Optional[str] = None
4143

@@ -247,26 +249,14 @@ def from_pydantic_error(
247249
messages: Messages = []
248250
for error_dict in error.errors():
249251
error_type = error_dict["type"]
252+
# TODO - review in pydantic v2 - how handles null vs not provided values
250253
if "none.not_allowed" in error_type or "value_error.missing" in error_type:
251254
category = "Blank"
252255
else:
253256
category = "Bad value"
254-
error_code = error_type
255-
if "." in error_code:
256-
error_code = error_code.split(".", 1)[-1]
257-
258-
if error_code in INTEGRITY_ERROR_CODES:
259-
failure_type: FailureType = "integrity"
260-
elif error_code in SUBMISSION_ERROR_CODES:
261-
failure_type = "submission"
262-
else:
263-
failure_type = "record"
264-
257+
265258
error_field = ".".join([idx for idx in error_dict["loc"] if not isinstance(idx, int)])
266-
267-
is_informational = False
268-
if error_code.endswith("warning"):
269-
is_informational = True
259+
270260
error_detail: DataContractErrorDetail = error_details.get( # type: ignore
271261
error_field, DEFAULT_ERROR_DETAIL
272262
).get(category)
@@ -276,8 +266,8 @@ def from_pydantic_error(
276266
entity=error_detail.reporting_entity or entity,
277267
original_entity=entity,
278268
record=record,
279-
failure_type=failure_type,
280-
is_informational=is_informational,
269+
failure_type=error_detail.error_level,
270+
is_informational=error_detail.is_informational,
281271
error_type=error_type,
282272
error_location=error_dict["loc"], # type: ignore
283273
error_message=error_detail.template_message(record, error_dict["loc"]),

tests/features/movies.feature

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,18 @@ Feature: Pipeline tests using the movies dataset
1919
Then the movies entity is stored as a parquet after the file_transformation phase
2020
And the latest audit record for the submission is marked with processing status data_contract
2121
When I run the data contract phase
22-
Then there are 3 record rejections from the data_contract phase
22+
Then there is 1 submission rejection from the data_contract phase
23+
And there are 3 record rejections from the data_contract phase
2324
And there are errors with the following details and associated error_count from the data_contract phase
24-
| Entity | ErrorCode | ErrorMessage | RecordIndex | error_count |
25-
| movies | BLANKYEAR | year not provided | 2 | 1 |
26-
| movies_rename_test | DODGYYEAR | year value (NOT_A_NUMBER) is invalid | 1 | 1 |
27-
| movies | DODGYDATE | date_joined value is not valid: daft_date | 1 | 1 |
25+
| Entity | ErrorCode | ErrorMessage | RecordIndex | error_count |
26+
| movies | BLANKYEAR | year not provided | 2 | 1 |
27+
| movies_rename_test | DODGYYEAR | year value (NOT_A_NUMBER) is invalid | 1 | 1 |
28+
| movies | DODGYDATE | date_joined value is not valid: daft_date | 1 | 1 |
29+
| movies | BLANKTITLE | title should not be blank | 4 | 1 |
2830
And the movies entity is stored as a parquet after the data_contract phase
2931
And the latest audit record for the submission is marked with processing status business_rules
3032
When I run the business rules phase
31-
Then The rules restrict "movies" to 2 qualifying records
33+
Then The rules restrict "movies" to 3 qualifying records
3234
And there are errors with the following details and associated error_count from the business_rules phase
3335
| ErrorCode | ErrorMessage | RecordIndex | error_count |
3436
| LIMITED_RATINGS | Movie has too few ratings ([6.5]) | 4 | 1 |
@@ -37,10 +39,11 @@ Feature: Pipeline tests using the movies dataset
3739
When I run the error report phase
3840
Then An error report is produced
3941
And The statistics entry for the submission shows the following information
40-
| parameter | value |
41-
| record_count | 5 |
42-
| number_record_rejections | 4 |
43-
| number_warnings | 1 |
42+
| parameter | value |
43+
| record_count | 5 |
44+
| number_submission_rejections | 1 |
45+
| number_record_rejections | 3 |
46+
| number_warnings | 2 |
4447
And the error aggregates are persisted
4548

4649
Scenario: Validate and filter movies (duckdb)
@@ -55,16 +58,18 @@ Feature: Pipeline tests using the movies dataset
5558
Then the movies entity is stored as a parquet after the file_transformation phase
5659
And the latest audit record for the submission is marked with processing status data_contract
5760
When I run the data contract phase
58-
Then there are 3 record rejections from the data_contract phase
61+
Then there is 1 submission rejection from the data_contract phase
62+
And there are 3 record rejections from the data_contract phase
5963
And there are errors with the following details and associated error_count from the data_contract phase
6064
| Entity | ErrorCode | ErrorMessage | RecordIndex | error_count |
6165
| movies | BLANKYEAR | year not provided | 2 | 1 |
6266
| movies_rename_test | DODGYYEAR | year value (NOT_A_NUMBER) is invalid | 1 | 1 |
6367
| movies | DODGYDATE | date_joined value is not valid: daft_date | 1 | 1 |
68+
| movies | BLANKTITLE | title should not be blank | 4 | 1 |
6469
And the movies entity is stored as a parquet after the data_contract phase
6570
And the latest audit record for the submission is marked with processing status business_rules
6671
When I run the business rules phase
67-
Then The rules restrict "movies" to 2 qualifying records
72+
Then The rules restrict "movies" to 3 qualifying records
6873
And there are errors with the following details and associated error_count from the business_rules phase
6974
| ErrorCode | ErrorMessage | RecordIndex | error_count |
7075
| LIMITED_RATINGS | Movie has too few ratings ([6.5]) | 4 | 1 |
@@ -73,9 +78,10 @@ Feature: Pipeline tests using the movies dataset
7378
When I run the error report phase
7479
Then An error report is produced
7580
And The statistics entry for the submission shows the following information
76-
| parameter | value |
77-
| record_count | 5 |
78-
| number_record_rejections | 4 |
79-
| number_warnings | 1 |
81+
| parameter | value |
82+
| record_count | 5 |
83+
| number_submission_rejections | 1 |
84+
| number_record_rejections | 3 |
85+
| number_warnings | 2 |
8086
And the error aggregates are persisted
8187

tests/features/planets.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Feature: Pipeline tests using the planets dataset
1818
And the latest audit record for the submission is marked with processing status data_contract
1919
When I run the data contract phase
2020
Then there is 1 record rejection from the data_contract phase
21+
And there are no submission rejections from the data_contract phase
2122
And the planets entity is stored as a parquet after the data_contract phase
2223
And the latest audit record for the submission is marked with processing status business_rules
2324
When I run the business rules phase

tests/features/steps/steps_pipeline.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,13 +155,13 @@ def create_error_report(context: Context):
155155

156156

157157

158-
@then("there are {expected_num_errors:d} record rejections from the {service} phase")
159-
@then("there is {expected_num_errors:d} record rejection from the {service} phase")
160-
@then("there are no record rejections from the {service} phase")
161-
def get_record_rejects_from_service(context: Context, service: str, expected_num_errors: int = 0):
158+
@then("there are {expected_num_errors:d} {error_type} rejections from the {service} phase")
159+
@then("there is {expected_num_errors:d} {error_type} rejection from the {service} phase")
160+
@then("there are no {error_type} rejections from the {service} phase")
161+
def get_record_rejects_from_service(context: Context, service: str, error_type: str, expected_num_errors: int = 0):
162162
processing_path = ctxt.get_processing_location(context)
163163
message_df = load_errors_from_service(processing_path, service)
164-
num_rejections = message_df.filter(pl.col("FailureType").eq("record")).shape[0]
164+
num_rejections = message_df.filter(pl.col("FailureType").eq(error_type)).shape[0]
165165
assert num_rejections == expected_num_errors, f"Got {num_rejections} actual rejections"
166166

167167

tests/test_core_engine/test_message.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,9 +183,11 @@ class TestModel(BaseModel):
183183

184184
custom_error_details: str = """
185185
{"idx": {"Blank": {"error_code": "IDBLANKERRCODE",
186-
"error_message": "idx is a mandatory field"},
186+
"error_message": "idx is a mandatory field",
187+
"is_informational": true},
187188
"Bad value": {"error_code": "IDDODGYVALCODE",
188-
"error_message": "idx value is dodgy: {{idx}}"}},
189+
"error_message": "idx value is dodgy: {{idx}}",
190+
"error_level": "submission"}},
189191
"date_field": {"Bad value": {"error_code": "DATEDODGYVALCODE",
190192
"error_message": "date_field value is dodgy: idx: {{idx}}, date_field: {{date_field}}"}}}
191193
"""
@@ -216,10 +218,16 @@ class TestModel(BaseModel):
216218
assert len(msgs_bad) == 3
217219
assert msgs_bad[0].error_code == error_details.get("date_field").get("Bad value").error_code
218220
assert msgs_bad[0].error_message == error_details.get("date_field").get("Bad value").template_message(_bad_value_data)
221+
assert msgs_bad[0].failure_type == "record"
222+
assert not msgs_bad[0].is_informational
219223
assert msgs_bad[1].error_code == error_details.get("idx").get("Bad value").error_code
220224
assert msgs_bad[1].error_message == error_details.get("idx").get("Bad value").template_message(_bad_value_data)
225+
assert msgs_bad[1].failure_type == "submission"
226+
assert not msgs_bad[1].is_informational
221227
assert msgs_bad[2].error_code == bad_val_default.error_code
222228
assert msgs_bad[2].error_message == bad_val_default.error_message
229+
assert msgs_bad[2].failure_type == "record"
230+
assert not msgs_bad[2].is_informational
223231

224232
msgs_blank = FeedbackMessage.from_pydantic_error(entity="test_entity",
225233
record = _blank_value_data,
@@ -232,6 +240,7 @@ class TestModel(BaseModel):
232240
assert len(msgs_blank) == 2
233241
assert msgs_blank[0].error_code == error_details.get("idx").get("Blank").error_code
234242
assert msgs_blank[0].error_message == error_details.get("idx").get("Blank").template_message(_blank_value_data)
243+
assert msgs_blank[0].is_informational
235244
assert msgs_blank[1].error_code == blank_default.error_code
236245
assert msgs_blank[1].error_message == blank_default.error_message
237246

@@ -281,4 +290,5 @@ class TestModel(BaseModel):
281290
msg = msg[0]
282291
assert msg.error_code == "DATEDODGYVALCODE"
283292
assert msg.error_message == "date_field value is dodgy: a_field: test, date_field: Barry"
293+
284294

tests/testdata/movies/movies.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
]
3333
},
3434
{
35-
"title": "One with a cat and a dog",
3635
"year": 2020,
3736
"genre": ["Fantasy", "Family"],
3837
"duration_minutes": 110,

tests/testdata/movies/movies_contract_error_details.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
"title": {
33
"Blank": {
44
"error_code": "BLANKTITLE",
5-
"error_message": "title should not be blank"
5+
"error_message": "title should not be blank",
6+
"error_level": "submission"
67
}
78
},
89
"year": {
910
"Blank": {
1011
"error_code": "BLANKYEAR",
11-
"error_message": "year not provided"
12+
"error_message": "year not provided",
13+
"is_informational": true
1214
},
1315
"Bad value": {
1416
"error_code": "DODGYYEAR",

0 commit comments

Comments
 (0)