@@ -761,9 +761,38 @@ void StringWrite(const FunctionCallbackInfo<Value>& args) {
761761void SlowByteLengthUtf8 (const FunctionCallbackInfo<Value>& args) {
762762 CHECK (args[0 ]->IsString ());
763763
764- // Fast case: avoid StringBytes on UTF8 string. Jump to v8.
765- size_t result = args[0 ].As <String>()->Utf8LengthV2 (args.GetIsolate ());
766- args.GetReturnValue ().Set (static_cast <uint64_t >(result));
764+ Isolate* isolate = args.GetIsolate ();
765+ Local<String> source = args[0 ].As <String>();
766+
767+ static constexpr int kSmallStringThreshold = 128 ;
768+ if (source->Length () <= kSmallStringThreshold ) {
769+ size_t result = source->Utf8LengthV2 (isolate);
770+ args.GetReturnValue ().Set (static_cast <uint64_t >(result));
771+ return ;
772+ }
773+
774+ String::ValueView view (isolate, source);
775+ size_t length = view.length ();
776+ size_t utf8_length;
777+
778+ if (view.is_one_byte ()) {
779+ auto data = reinterpret_cast <const char *>(view.data8 ());
780+ simdutf::result result = simdutf::validate_ascii_with_errors (data, length);
781+ if (result.error == simdutf::SUCCESS) {
782+ utf8_length = length; // Pure ASCII, length stays the same
783+ } else {
784+ utf8_length = simdutf::utf8_length_from_latin1 (data, length);
785+ }
786+ } else {
787+ auto data = reinterpret_cast <const char16_t *>(view.data16 ());
788+ if (simdutf::validate_utf16 (data, length)) {
789+ utf8_length = simdutf::utf8_length_from_utf16 (data, length);
790+ } else {
791+ utf8_length = source->Utf8LengthV2 (isolate);
792+ }
793+ }
794+
795+ args.GetReturnValue ().Set (static_cast <uint64_t >(utf8_length));
767796}
768797
769798uint32_t FastByteLengthUtf8 (
@@ -776,49 +805,31 @@ uint32_t FastByteLengthUtf8(
776805 CHECK (sourceValue->IsString ());
777806 Local<String> sourceStr = sourceValue.As <String>();
778807
779- if (!sourceStr->IsExternalOneByte ()) {
808+ // For short inputs, use V8's path - function call overhead not worth it
809+ static constexpr int kSmallStringThreshold = 128 ;
810+ if (sourceStr->Length () <= kSmallStringThreshold ) {
780811 return sourceStr->Utf8LengthV2 (isolate);
781812 }
782- auto source = sourceStr->GetExternalOneByteStringResource ();
783- // For short inputs, the function call overhead to simdutf is maybe
784- // not worth it, reserve simdutf for long strings.
785- if (source->length () > 128 ) {
786- return simdutf::utf8_length_from_latin1 (source->data (), source->length ());
787- }
788-
789- uint32_t length = source->length ();
790- const auto input = reinterpret_cast <const uint8_t *>(source->data ());
791-
792- uint32_t answer = length;
793- uint32_t i = 0 ;
794813
795- auto pop = [](uint64_t v) {
796- return static_cast <size_t >(((v >> 7 ) & UINT64_C (0x0101010101010101 )) *
797- UINT64_C (0x0101010101010101 ) >>
798- 56 );
799- };
814+ // For large strings, use simdutf with String::ValueView for direct access
815+ // This is ~6x faster for large strings
816+ String::ValueView view (isolate, sourceStr);
817+ size_t length = view.length ();
800818
801- for (; i + 32 <= length; i += 32 ) {
802- uint64_t v;
803- memcpy (&v, input + i, 8 );
804- answer += pop (v);
805- memcpy (&v, input + i + 8 , 8 );
806- answer += pop (v);
807- memcpy (&v, input + i + 16 , 8 );
808- answer += pop (v);
809- memcpy (&v, input + i + 24 , 8 );
810- answer += pop (v);
811- }
812- for (; i + 8 <= length; i += 8 ) {
813- uint64_t v;
814- memcpy (&v, input + i, 8 );
815- answer += pop (v);
816- }
817- for (; i + 1 <= length; i += 1 ) {
818- answer += input[i] >> 7 ;
819+ if (view.is_one_byte ()) {
820+ auto data = reinterpret_cast <const char *>(view.data8 ());
821+ simdutf::result result = simdutf::validate_ascii_with_errors (data, length);
822+ if (result.error == simdutf::SUCCESS) {
823+ return length; // Pure ASCII, length stays the same
824+ }
825+ return simdutf::utf8_length_from_latin1 (data, length);
819826 }
820827
821- return answer;
828+ auto data = reinterpret_cast <const char16_t *>(view.data16 ());
829+ if (simdutf::validate_utf16 (data, length)) {
830+ return simdutf::utf8_length_from_utf16 (data, length);
831+ }
832+ return sourceStr->Utf8LengthV2 (isolate);
822833}
823834
824835static CFunction fast_byte_length_utf8 (CFunction::Make(FastByteLengthUtf8));
0 commit comments