66
77//! Streams of tendrils.
88
9- use crate :: fmt;
9+ use crate :: utf8_decode:: { decode_utf8, DecodeError , REPLACEMENT_CHARACTER } ;
10+ use crate :: { fmt, IncompleteUtf8 } ;
1011use crate :: { Atomicity , NonAtomic , Tendril } ;
1112
1213use std:: borrow:: Cow ;
@@ -15,7 +16,6 @@ use std::io;
1516use std:: marker:: PhantomData ;
1617use std:: path:: Path ;
1718
18- use crate :: utf8;
1919#[ cfg( feature = "encoding_rs" ) ]
2020use encoding_rs:: { self , DecoderResult } ;
2121
@@ -124,7 +124,7 @@ where
124124 A : Atomicity ,
125125{
126126 pub inner_sink : Sink ,
127- incomplete : Option < utf8 :: Incomplete > ,
127+ incomplete : Option < IncompleteUtf8 > ,
128128 marker : PhantomData < A > ,
129129}
130130
@@ -150,64 +150,71 @@ where
150150 A : Atomicity ,
151151{
152152 #[ inline]
153- fn process ( & mut self , mut t : Tendril < fmt:: Bytes , A > ) {
153+ fn process ( & mut self , mut bytes : Tendril < fmt:: Bytes , A > ) {
154154 // FIXME: remove take() and map() when non-lexical borrows are stable.
155155 if let Some ( mut incomplete) = self . incomplete . take ( ) {
156- let resume_at = incomplete. try_complete ( & t) . map ( |( result, rest) | {
157- match result {
158- Ok ( s) => self . inner_sink . process ( Tendril :: from_slice ( s) ) ,
159- Err ( _) => {
160- self . inner_sink . error ( "invalid byte sequence" . into ( ) ) ;
161- self . inner_sink
162- . process ( Tendril :: from_slice ( utf8:: REPLACEMENT_CHARACTER ) ) ;
163- } ,
164- }
165- t. len ( ) - rest. len ( )
166- } ) ;
156+ let resume_at = incomplete
157+ . try_to_complete_codepoint ( & bytes)
158+ . map ( |( result, rest) | {
159+ match result {
160+ Ok ( decoded_string) => {
161+ self . inner_sink . process ( Tendril :: from_slice ( decoded_string) )
162+ } ,
163+ Err ( _) => {
164+ self . inner_sink . error ( "invalid byte sequence" . into ( ) ) ;
165+ self . inner_sink
166+ . process ( Tendril :: from_slice ( REPLACEMENT_CHARACTER ) ) ;
167+ } ,
168+ }
169+ bytes. len ( ) - rest. len ( )
170+ } ) ;
167171 match resume_at {
168172 None => {
169173 self . incomplete = Some ( incomplete) ;
170174 return ;
171175 } ,
172- Some ( resume_at) => t . pop_front ( resume_at as u32 ) ,
176+ Some ( resume_at) => bytes . pop_front ( resume_at as u32 ) ,
173177 }
174178 }
175- while !t . is_empty ( ) {
176- let unborrowed_result = match utf8 :: decode ( & t ) {
179+ while !bytes . is_empty ( ) {
180+ let unborrowed_result = match decode_utf8 ( & bytes ) {
177181 Ok ( s) => {
178- debug_assert ! ( s. as_ptr( ) == t . as_ptr( ) ) ;
179- debug_assert ! ( s. len( ) == t . len( ) ) ;
182+ debug_assert ! ( s. as_ptr( ) == bytes . as_ptr( ) ) ;
183+ debug_assert ! ( s. len( ) == bytes . len( ) ) ;
180184 Ok ( ( ) )
181185 } ,
182- Err ( utf8 :: DecodeError :: Invalid {
186+ Err ( DecodeError :: Invalid {
183187 valid_prefix,
184188 invalid_sequence,
185189 ..
186190 } ) => {
187- debug_assert ! ( valid_prefix. as_ptr( ) == t . as_ptr( ) ) ;
188- debug_assert ! ( valid_prefix. len( ) <= t . len( ) ) ;
191+ debug_assert ! ( valid_prefix. as_ptr( ) == bytes . as_ptr( ) ) ;
192+ debug_assert ! ( valid_prefix. len( ) <= bytes . len( ) ) ;
189193 Err ( (
190194 valid_prefix. len ( ) ,
191195 Err ( valid_prefix. len ( ) + invalid_sequence. len ( ) ) ,
192196 ) )
193197 } ,
194- Err ( utf8 :: DecodeError :: Incomplete {
198+ Err ( DecodeError :: Incomplete {
195199 valid_prefix,
196200 incomplete_suffix,
197201 } ) => {
198- debug_assert ! ( valid_prefix. as_ptr( ) == t . as_ptr( ) ) ;
199- debug_assert ! ( valid_prefix. len( ) <= t . len( ) ) ;
202+ debug_assert ! ( valid_prefix. as_ptr( ) == bytes . as_ptr( ) ) ;
203+ debug_assert ! ( valid_prefix. len( ) <= bytes . len( ) ) ;
200204 Err ( ( valid_prefix. len ( ) , Ok ( incomplete_suffix) ) )
201205 } ,
202206 } ;
203207 match unborrowed_result {
204208 Ok ( ( ) ) => {
205- unsafe { self . inner_sink . process ( t. reinterpret_without_validating ( ) ) }
209+ unsafe {
210+ self . inner_sink
211+ . process ( bytes. reinterpret_without_validating ( ) )
212+ }
206213 return ;
207214 } ,
208215 Err ( ( valid_len, and_then) ) => {
209216 if valid_len > 0 {
210- let subtendril = t . subtendril ( 0 , valid_len as u32 ) ;
217+ let subtendril = bytes . subtendril ( 0 , valid_len as u32 ) ;
211218 unsafe {
212219 self . inner_sink
213220 . process ( subtendril. reinterpret_without_validating ( ) )
@@ -221,8 +228,8 @@ where
221228 Err ( offset) => {
222229 self . inner_sink . error ( "invalid byte sequence" . into ( ) ) ;
223230 self . inner_sink
224- . process ( Tendril :: from_slice ( utf8 :: REPLACEMENT_CHARACTER ) ) ;
225- t . pop_front ( offset as u32 ) ;
231+ . process ( Tendril :: from_slice ( REPLACEMENT_CHARACTER ) ) ;
232+ bytes . pop_front ( offset as u32 ) ;
226233 } ,
227234 }
228235 } ,
@@ -243,7 +250,7 @@ where
243250 self . inner_sink
244251 . error ( "incomplete byte sequence at end of stream" . into ( ) ) ;
245252 self . inner_sink
246- . process ( Tendril :: from_slice ( utf8 :: REPLACEMENT_CHARACTER ) ) ;
253+ . process ( Tendril :: from_slice ( REPLACEMENT_CHARACTER ) ) ;
247254 }
248255 self . inner_sink . finish ( )
249256 }
@@ -380,7 +387,7 @@ where
380387
381388#[ cfg( feature = "encoding_rs" ) ]
382389fn decode_to_sink < Sink , A > (
383- mut t : Tendril < fmt:: Bytes , A > ,
390+ mut input : Tendril < fmt:: Bytes , A > ,
384391 decoder : & mut encoding_rs:: Decoder ,
385392 sink : & mut Sink ,
386393 last : bool ,
@@ -391,13 +398,13 @@ fn decode_to_sink<Sink, A>(
391398 loop {
392399 let mut out = <Tendril < fmt:: Bytes , A > >:: new ( ) ;
393400 let max_len = decoder
394- . max_utf8_buffer_length_without_replacement ( t . len ( ) )
401+ . max_utf8_buffer_length_without_replacement ( input . len ( ) )
395402 . unwrap_or ( 8192 ) ;
396403 unsafe {
397- out. push_uninitialized ( std :: cmp :: min ( max_len as u32 , 8192 ) ) ;
404+ out. push_uninitialized ( max_len . min ( 8192 ) as u32 ) ;
398405 }
399406 let ( result, bytes_read, bytes_written) =
400- decoder. decode_to_utf8_without_replacement ( & t , & mut out, last) ;
407+ decoder. decode_to_utf8_without_replacement ( & input , & mut out, last) ;
401408 if bytes_written > 0 {
402409 sink. process ( unsafe {
403410 out. subtendril ( 0 , bytes_written as u32 )
@@ -409,11 +416,11 @@ fn decode_to_sink<Sink, A>(
409416 DecoderResult :: OutputFull => { } ,
410417 DecoderResult :: Malformed ( _, _) => {
411418 sink. error ( Cow :: Borrowed ( "invalid sequence" ) ) ;
412- sink. process ( " \u{FFFD} " . into ( ) ) ;
419+ sink. process ( Tendril :: from_slice ( REPLACEMENT_CHARACTER ) ) ;
413420 } ,
414421 }
415- t . pop_front ( bytes_read as u32 ) ;
416- if t . is_empty ( ) {
422+ input . pop_front ( bytes_read as u32 ) ;
423+ if input . is_empty ( ) {
417424 return ;
418425 }
419426 }
0 commit comments