|
| 1 | +-- Create errors table for dead letter queue |
| 2 | +-- |
| 3 | +-- Usage: |
| 4 | +-- export PROJECT_ID=your-project |
| 5 | +-- export DATASET=events |
| 6 | +-- cat create_errors_table.sql | sed "s/{PROJECT_ID}/$PROJECT_ID/g" | sed "s/{DATASET}/$DATASET/g" | bq query --use_legacy_sql=false |
| 7 | + |
| 8 | +CREATE TABLE IF NOT EXISTS `{PROJECT_ID}.{DATASET}.errors` ( |
| 9 | + error_id STRING NOT NULL, |
| 10 | + timestamp TIMESTAMP NOT NULL, |
| 11 | + error_type STRING NOT NULL, -- validation_error | processing_error |
| 12 | + error_message STRING NOT NULL, |
| 13 | + error_details JSON, |
| 14 | + stream STRING NOT NULL, |
| 15 | + original_payload JSON NOT NULL, |
| 16 | + retry_count INT64 DEFAULT 0, |
| 17 | + retry_after TIMESTAMP, |
| 18 | + date DATE NOT NULL |
| 19 | +) |
| 20 | +PARTITION BY date |
| 21 | +CLUSTER BY error_type, stream |
| 22 | +OPTIONS( |
| 23 | + description="Dead letter queue for failed events. All events that fail validation or processing are stored here with full context for debugging.", |
| 24 | + partition_expiration_days=30 |
| 25 | +); |
| 26 | + |
| 27 | +-- Create metadata table for tracking loaded error files |
| 28 | +CREATE TABLE IF NOT EXISTS `{PROJECT_ID}.{DATASET}._loaded_error_files` ( |
| 29 | + file_path STRING NOT NULL, |
| 30 | + loaded_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP(), |
| 31 | + row_count INT64, |
| 32 | + error_type STRING |
| 33 | +) |
| 34 | +PARTITION BY DATE(loaded_at) |
| 35 | +OPTIONS( |
| 36 | + description="Metadata tracking which error files have been loaded to prevent duplicates" |
| 37 | +); |
| 38 | + |
| 39 | +-- Example queries for debugging |
| 40 | + |
| 41 | +-- Find validation errors in last 24 hours |
| 42 | +-- SELECT |
| 43 | +-- error_type, |
| 44 | +-- error_message, |
| 45 | +-- stream, |
| 46 | +-- COUNT(*) as count |
| 47 | +-- FROM `{PROJECT_ID}.{DATASET}.errors` |
| 48 | +-- WHERE date >= CURRENT_DATE() - 1 |
| 49 | +-- AND error_type = 'validation_error' |
| 50 | +-- GROUP BY error_type, error_message, stream |
| 51 | +-- ORDER BY count DESC; |
| 52 | + |
| 53 | +-- Find processing errors with stack traces |
| 54 | +-- SELECT |
| 55 | +-- timestamp, |
| 56 | +-- error_message, |
| 57 | +-- JSON_EXTRACT_SCALAR(error_details, '$.exception_type') as exception, |
| 58 | +-- JSON_EXTRACT_SCALAR(error_details, '$.stack_trace') as stack_trace, |
| 59 | +-- original_payload |
| 60 | +-- FROM `{PROJECT_ID}.{DATASET}.errors` |
| 61 | +-- WHERE error_type = 'processing_error' |
| 62 | +-- ORDER BY timestamp DESC |
| 63 | +-- LIMIT 10; |
| 64 | + |
| 65 | +-- Error rate by stream |
| 66 | +-- SELECT |
| 67 | +-- stream, |
| 68 | +-- error_type, |
| 69 | +-- COUNT(*) as error_count, |
| 70 | +-- ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (PARTITION BY stream), 2) as pct |
| 71 | +-- FROM `{PROJECT_ID}.{DATASET}.errors` |
| 72 | +-- WHERE date >= CURRENT_DATE() - 7 |
| 73 | +-- GROUP BY stream, error_type |
| 74 | +-- ORDER BY stream, error_count DESC; |
0 commit comments