Skip to content

Commit b8cc5e8

Browse files
committed
src: use simdutf for two-byte string utf8
conversion in utf8 value
1 parent ee41790 commit b8cc5e8

File tree

2 files changed

+37
-5
lines changed

2 files changed

+37
-5
lines changed

benchmark/util/utf8-value.js

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
'use strict';
2+
3+
const common = require('../common.js');
4+
5+
const bench = common.createBenchmark(main, {
6+
type: ['ascii', 'two_bytes', 'three_bytes', 'mixed'],
7+
n: [5e6],
8+
});
9+
10+
const urls = {
11+
ascii: 'https://example.com/path/to/resource?query=value&foo=bar',
12+
two_bytes: 'https://example.com/yol/türkçe/içerik?sağlık=değer',
13+
three_bytes: 'https://example.com/路径/资源?查询=值&名称=数据',
14+
mixed: 'https://example.com/hello/世界/path?name=değer&key=数据',
15+
};
16+
17+
function main({ n, type }) {
18+
const str = urls[type];
19+
20+
bench.start();
21+
for (let i = 0; i < n; i++) {
22+
URL.canParse(str);
23+
}
24+
bench.end(n);
25+
}

src/util.cc

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,20 @@ static void MakeUtf8String(Isolate* isolate,
121121
return;
122122
}
123123

124-
// Add +1 for null termination.
125-
size_t storage = (3 * value_length) + 1;
124+
auto const_char16 = reinterpret_cast<const char16_t*>(value_view.data16());
125+
size_t storage = static_cast<size_t>(value_length) * 3 + 1;
126126
target->AllocateSufficientStorage(storage);
127127

128-
size_t length = string->WriteUtf8V2(
129-
isolate, target->out(), storage, String::WriteFlags::kReplaceInvalidUtf8);
130-
target->SetLengthAndZeroTerminate(length);
128+
size_t actual_length =
129+
simdutf::convert_utf16_to_utf8(const_char16, value_length, target->out());
130+
if (actual_length == 0 && value_length > 0) {
131+
actual_length =
132+
string->WriteUtf8V2(isolate,
133+
target->out(),
134+
storage,
135+
String::WriteFlags::kReplaceInvalidUtf8);
136+
}
137+
target->SetLengthAndZeroTerminate(actual_length);
131138
}
132139

133140
Utf8Value::Utf8Value(Isolate* isolate, Local<Value> value) {

0 commit comments

Comments
 (0)