Skip to content

Commit c8d2271

Browse files
Optimize parsing, filtering, and rendering performance (#27)
## Summary This PR reduces allocation and CPU overhead across parsing, filtering, query execution, and table rendering in `cirup_core`. Highlights: - Replace regex-backed text filter compilation and matching with a purpose-built matcher for the supported pattern subset. - Replace `prettytable-rs` table rendering with a manual ASCII formatter while preserving the existing output layout. - Remove `dot_json`-based JSON flattening in favor of direct recursive flattening. - Replace the RESTEXT regex parser with a manual parser and direct writer. - Reduce RESX escaping and parsing allocations by writing escapes directly into output buffers and iterating parsed nodes directly. - Reuse fast in-memory query execution for rusqlite-backed queries and avoid extra row cloning when reading DB results. - Preallocate file reads and avoid redundant UTF-8 output copies when writing resource files. ## Benchmark Highlights - Query filter compilation (`5,000` key patterns + `5,000` value patterns): `~1.251s -> ~2.451ms` - Filter pass on `50,000` resources with key and value filters: `~2.57ms -> ~1.71ms` - Repeated resource table rendering (`20,000` iterations): `~102.6ms -> ~13.3ms` - Repeated report table rendering (`20,000` iterations): `~193.1ms -> ~26.5ms` ## Validation - `cargo fmt --all` - `cargo test --workspace` - `cargo clippy --workspace --all-targets`
1 parent 9b760e6 commit c8d2271

11 files changed

Lines changed: 1231 additions & 720 deletions

File tree

Cargo.lock

Lines changed: 20 additions & 262 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cirup_core/Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,13 @@ turso-rust = ["dep:tokio", "dep:turso", "dep:libsql"]
1010
rusqlite-c = ["dep:rusqlite"]
1111

1212
[dependencies]
13-
regex = "1.0"
1413
serde = { version = "1.0", features = ["derive"] }
1514
xml-rs = "0.8.0"
1615
dot_json = "0.2.0"
1716
lazy_static = "1.0.0"
18-
prettytable-rs = "^0.10"
1917
log = "0.4"
2018
sha2 = "0.10"
19+
unicode-width = "0.2"
2120

2221
[dependencies.uuid]
2322
version = "0.6"

cirup_core/src/file.rs

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ use std::path::Path;
44

55
use std::collections::HashMap;
66
use std::sync::Mutex;
7+
#[cfg(test)]
8+
use std::time::Instant;
79

810
use sha2::{Digest, Sha256};
911

@@ -50,8 +52,14 @@ pub(crate) fn load_string_from_file(filename: &str) -> Result<String, Box<dyn Er
5052
if let Some(text) = vfile_get(filename) {
5153
return Ok(text);
5254
}
55+
5356
let mut file = fs::File::open(filename)?;
54-
let mut text = String::new();
57+
let capacity = file
58+
.metadata()
59+
.ok()
60+
.and_then(|metadata| usize::try_from(metadata.len()).ok())
61+
.unwrap_or(0);
62+
let mut text = String::with_capacity(capacity);
5563
file.read_to_string(&mut text)?;
5664
Ok(text)
5765
}
@@ -74,13 +82,14 @@ fn should_write_output(output_hash: [u8; 32], existing_bytes: Option<&[u8]>, tou
7482
output_hash != sha256_hash(existing_bytes)
7583
}
7684

77-
fn encode_utf8(text: &str, output_encoding: OutputEncoding) -> Vec<u8> {
85+
fn encode_utf8_owned(text: String, output_encoding: OutputEncoding) -> Vec<u8> {
7886
match output_encoding {
79-
OutputEncoding::Utf8NoBom => text.as_bytes().to_vec(),
87+
OutputEncoding::Utf8NoBom => text.into_bytes(),
8088
OutputEncoding::Utf8Bom => {
89+
let text = text.into_bytes();
8190
let mut output = Vec::with_capacity(UTF8_BOM.len() + text.len());
8291
output.extend_from_slice(&UTF8_BOM);
83-
output.extend_from_slice(text.as_bytes());
92+
output.extend_from_slice(&text);
8493
output
8594
}
8695
}
@@ -95,17 +104,17 @@ fn output_bytes_for_format(
95104
FormatType::Json => {
96105
let file_format = JsonFileFormat {};
97106
let text = file_format.write_to_str(resources);
98-
encode_utf8(&text, output_encoding)
107+
encode_utf8_owned(text, output_encoding)
99108
}
100109
FormatType::Resx => {
101110
let file_format = ResxFileFormat {};
102111
let text = file_format.write_to_str(resources);
103-
encode_utf8(&text, output_encoding)
112+
encode_utf8_owned(text, output_encoding)
104113
}
105114
FormatType::Restext => {
106115
let file_format = RestextFileFormat {};
107116
let text = file_format.write_to_str(resources);
108-
encode_utf8(&text, output_encoding)
117+
encode_utf8_owned(text, output_encoding)
109118
}
110119
FormatType::Unknown => Vec::new(),
111120
}
@@ -355,3 +364,31 @@ fn would_save_resource_file_reports_true_for_missing_output() {
355364

356365
assert!(would_write);
357366
}
367+
368+
#[test]
369+
#[ignore = "benchmark: run manually with --ignored --nocapture"]
370+
#[allow(clippy::print_stdout)]
371+
fn benchmark_output_bytes_for_format_large_input() {
372+
let resources = (0..50_000usize)
373+
.map(|index| Resource::new(&format!("group{index}.key{}", index % 13), &format!("value{index}")))
374+
.collect::<Vec<_>>();
375+
376+
let started = Instant::now();
377+
let utf8_no_bom = output_bytes_for_format(FormatType::Json, &resources, OutputEncoding::Utf8NoBom);
378+
let utf8_no_bom_elapsed = started.elapsed();
379+
380+
let started = Instant::now();
381+
let utf8_bom = output_bytes_for_format(FormatType::Json, &resources, OutputEncoding::Utf8Bom);
382+
let utf8_bom_elapsed = started.elapsed();
383+
384+
assert!(utf8_no_bom.len() < utf8_bom.len());
385+
386+
println!(
387+
"output-bytes benchmark: resources={} utf8_no_bom_bytes={} utf8_bom_bytes={} no_bom={:?} bom={:?}",
388+
resources.len(),
389+
utf8_no_bom.len(),
390+
utf8_bom.len(),
391+
utf8_no_bom_elapsed,
392+
utf8_bom_elapsed
393+
);
394+
}

cirup_core/src/json.rs

Lines changed: 73 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
extern crate dot_json;
21
extern crate serde;
32
extern crate serde_json;
43

5-
use dot_json::value_to_dot;
64
use serde::Serialize;
75
use serde_json::{Map, Value};
6+
#[cfg(test)]
7+
use std::time::Instant;
88

99
use crate::Resource;
1010
use crate::file::FileFormat;
@@ -14,23 +14,37 @@ use std::error::Error;
1414
pub(crate) struct JsonFileFormat {}
1515

1616
fn json_dot_insert(root_map: &mut Map<String, Value>, name: &str, value: &str) {
17-
if let Some(dot_index) = name.find('.') {
18-
let root_path = &name[0..dot_index];
19-
let child_path = &name[dot_index + 1..name.len()];
17+
if let Some((root_path, child_path)) = name.split_once('.') {
18+
let child_value = root_map
19+
.entry(root_path.to_owned())
20+
.or_insert_with(|| Value::Object(Map::new()));
2021

21-
if !root_map.contains_key(root_path) {
22-
let child_map: Map<String, Value> = Map::new();
23-
root_map.insert(root_path.to_owned(), Value::Object(child_map));
24-
}
25-
26-
if let Some(Value::Object(child_map)) = root_map.get_mut(root_path) {
22+
if let Value::Object(child_map) = child_value {
2723
json_dot_insert(child_map, child_path, value);
2824
}
2925
} else {
3026
root_map.insert(name.to_owned(), Value::String(value.to_owned()));
3127
}
3228
}
3329

30+
fn flatten_json_value(value: &Value, path: &mut String, resources: &mut Vec<Resource>) {
31+
match value {
32+
Value::Object(object) => {
33+
for (key, child_value) in object {
34+
let prefix_len = path.len();
35+
if prefix_len > 0 {
36+
path.push('.');
37+
}
38+
path.push_str(key);
39+
flatten_json_value(child_value, path, resources);
40+
path.truncate(prefix_len);
41+
}
42+
}
43+
Value::String(text) => resources.push(Resource::new(path, text)),
44+
_ => {}
45+
}
46+
}
47+
3448
fn json_to_string_pretty(value: &Map<String, Value>) -> String {
3549
let writer = Vec::new();
3650
let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
@@ -47,17 +61,18 @@ impl FileFormat for JsonFileFormat {
4761
fn parse_from_str(&self, text: &str) -> Result<Vec<Resource>, Box<dyn Error>> {
4862
let mut resources: Vec<Resource> = Vec::new();
4963
let root_value: Value = serde_json::from_str(text)?;
50-
let root_value_dot = value_to_dot(&root_value);
51-
let root_object_dot = match root_value_dot.as_object() {
64+
let root_object = match root_value.as_object() {
5265
Some(object) => object,
53-
None => Err("json dot value is not an object")?,
66+
None => Err("json value is not an object")?,
5467
};
55-
for (key, value) in root_object_dot.iter() {
56-
if let Some(value) = value.as_str() {
57-
let resource = Resource::new(key.as_str(), value);
58-
resources.push(resource);
59-
}
68+
69+
let mut path = String::new();
70+
for (key, value) in root_object {
71+
path.clear();
72+
path.push_str(key);
73+
flatten_json_value(value, &mut path, &mut resources);
6074
}
75+
6176
Ok(resources)
6277
}
6378

@@ -161,3 +176,42 @@ fn test_json_write() {
161176
//println!("{}", expected_text);
162177
assert_eq!(actual_text, expected_text);
163178
}
179+
180+
#[test]
181+
#[ignore = "benchmark: run manually with --ignored --nocapture"]
182+
#[allow(clippy::print_stdout)]
183+
fn benchmark_json_parse_and_write_large_input() {
184+
let file_format = JsonFileFormat {};
185+
let repetitions = 5_000usize;
186+
let mut resources = Vec::with_capacity(repetitions * 6);
187+
188+
for index in 0..repetitions {
189+
let prefix = format!("group{index}");
190+
resources.push(Resource::new(&format!("{prefix}.lblBoat"), "I'm on a boat."));
191+
resources.push(Resource::new(&format!("{prefix}.lblYolo"), "You only live once"));
192+
resources.push(Resource::new(&format!("{prefix}.lblDogs"), "Who let the dogs out?"));
193+
resources.push(Resource::new(&format!("{prefix}.language.en"), "English"));
194+
resources.push(Resource::new(&format!("{prefix}.language.fr"), "French"));
195+
resources.push(Resource::new(&format!("{prefix}.very.deep.object"), "value"));
196+
}
197+
198+
let started = Instant::now();
199+
let written = file_format.write_to_str(&resources);
200+
let write_elapsed = started.elapsed();
201+
202+
let started = Instant::now();
203+
let reparsed = file_format
204+
.parse_from_str(&written)
205+
.unwrap_or_else(|e| panic!("json benchmark parse failed: {}", e));
206+
let parse_elapsed = started.elapsed();
207+
208+
assert_eq!(reparsed.len(), resources.len());
209+
210+
println!(
211+
"json benchmark: resources={} bytes={} write={:?} parse={:?}",
212+
resources.len(),
213+
written.len(),
214+
write_elapsed,
215+
parse_elapsed
216+
);
217+
}

cirup_core/src/lib.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
extern crate regex;
21
extern crate treexml;
32
extern crate uuid;
43

@@ -10,9 +9,6 @@ extern crate serde_json;
109
#[macro_use]
1110
extern crate log;
1211

13-
#[macro_use]
14-
extern crate prettytable;
15-
1612
#[macro_use]
1713
extern crate lazy_static;
1814

0 commit comments

Comments
 (0)