@@ -44,6 +44,7 @@ module Streamly.Internal.Data.Unicode.Stream
4444 , decodeUtf8ArraysD
4545 , decodeUtf8ArraysLenientD
4646 , foldUtf8With
47+ , escapeFoldUtf8With
4748
4849 -- * Transformation
4950 , stripStart
@@ -759,9 +760,13 @@ oDecode table state codep byte = do
759760data Fp m s = FreshPoint ! CodePoint ! DecodeState (m s )
760761
761762{-# INLINE_NORMAL foldUtf8WithE #-}
762- foldUtf8WithE :: Monad m => CodingFailureMode -> Fold m Char container -> Fold m Word8 container
763+ foldUtf8WithE
764+ :: Monad m
765+ => CodingFailureMode
766+ -> Fold m Char container
767+ -> Fold m Word8 container
763768foldUtf8WithE cfm (FL. Fold arrayStep arrayInit arrayExtract) =
764- let A. Array p _ _ = utf8d
769+ let A. Array p _ = utf8d
765770 ! ptr = (unsafeForeignPtrToPtr p)
766771 in FL. Fold (step' ptr) (return $ FreshPoint 0 0 arrayInit) extract
767772 where
@@ -823,6 +828,111 @@ foldUtf8WithE cfm (FL.Fold arrayStep arrayInit arrayExtract) =
823828 aSt <- arrayState
824829 if statePtr /= 0 then inputUnderflow aSt else arrayExtract aSt
825830
831+ data EscapeState m s
832+ = EscapeState (m s )
833+ | NotEscapeState ! CodePoint ! DecodeState (m s )
834+
835+ {-# INLINE_NORMAL escapeFoldUtf8WithE #-}
836+ escapeFoldUtf8WithE
837+ :: Monad m
838+ => CodingFailureMode
839+ -> Word8
840+ -> (Word8 -> Maybe Word8 )
841+ -> Fold m Char container
842+ -> Fold m Word8 container
843+ escapeFoldUtf8WithE cfm escape trans (FL. Fold arrayStep arrayInit arrayExtract) =
844+ let A. Array p _ = utf8d
845+ ! ptr = (unsafeForeignPtrToPtr p)
846+ in FL. Fold (step' ptr) (return $ NotEscapeState 0 0 arrayInit) extract
847+ where
848+ {-# INLINE transliterateOrError #-}
849+ transliterateOrError e arrayState =
850+ case cfm of
851+ ErrorOnCodingFailure -> error e
852+ TransliterateCodingFailure ->
853+ return $ NotEscapeState 0 0 (arrayStep arrayState replacementChar)
854+
855+ {-# INLINE transliterateOrError1 #-}
856+ transliterateOrError1 table statePtr codepointPtr e arrayState x =
857+ case cfm of
858+ ErrorOnCodingFailure -> error e
859+ TransliterateCodingFailure -> do
860+ aS <- arrayState
861+ aSt <- arrayStep aS replacementChar
862+ if x <= 0x7f
863+ then return $ NotEscapeState 0 0 (arrayStep aSt (unsafeChr (fromIntegral x)))
864+ else
865+ let (Tuple' sv cp) = oDecode table statePtr codepointPtr x
866+ in
867+ case sv of
868+ 12 -> transliterateOrError e aSt
869+ 0 -> return $ NotEscapeState cp sv (arrayStep aSt (unsafeChr cp))
870+ _ -> return $ NotEscapeState cp sv (return aSt)
871+
872+ step' _ (NotEscapeState _ _ arrayState) c
873+ | c == escape = return $ EscapeState arrayState
874+
875+ step' table (NotEscapeState codepointPtr statePtr arrayState) x =
876+ if statePtr == 0 && x <= 0x7f
877+ then do
878+ aSt <- arrayState
879+ return $ NotEscapeState 0 0 (arrayStep aSt (unsafeChr (fromIntegral x)))
880+ else
881+ let (Tuple' sv cp) = oDecode table statePtr codepointPtr x
882+ in
883+ case sv of
884+ 12 ->
885+ transliterateOrError1
886+ table
887+ statePtr
888+ codepointPtr
889+ " Streamly.Streams.StreamD.escapeFoldUtf8With: Invalid UTF8 codepoint encountered"
890+ arrayState
891+ x
892+ 0 -> do
893+ aSt <- arrayState
894+ return $ NotEscapeState cp sv (arrayStep aSt (unsafeChr cp))
895+ _ -> return $ NotEscapeState cp sv arrayState
896+
897+ step' _ (EscapeState arrayState) c = do
898+ aSt <- arrayState
899+ case trans c of
900+ Just x ->
901+ return $
902+ NotEscapeState 0 0 (arrayStep aSt (unsafeChr (fromIntegral x)))
903+ Nothing ->
904+ transliterateOrError
905+ " Streamly.Streams.StreamD.escapeFoldUtf8With: Invalid UTF8 codepoint encountered"
906+ aSt
907+
908+ {-# INLINE inputUnderflow #-}
909+ inputUnderflow arrayState =
910+ case cfm of
911+ ErrorOnCodingFailure ->
912+ error " Streamly.Internal.Data.Stream.StreamD.escapeFoldUtf8With: Input Underflow"
913+ TransliterateCodingFailure -> do
914+ aSt <- arrayStep arrayState replacementChar
915+ arrayExtract aSt
916+
917+ extract (NotEscapeState _ statePtr arrayState) = do
918+ aSt <- arrayState
919+ if statePtr /= 0
920+ then inputUnderflow aSt
921+ else arrayExtract aSt
922+
923+ extract (EscapeState arrayState) = do
924+ aSt <- arrayState
925+ inputUnderflow aSt
926+
826927{-# INLINE foldUtf8With #-}
827928foldUtf8With :: Monad m => Fold m Char container -> Fold m Word8 container
828929foldUtf8With = foldUtf8WithE ErrorOnCodingFailure
930+
931+ {-# INLINE escapeFoldUtf8With #-}
932+ escapeFoldUtf8With ::
933+ Monad m
934+ => Word8
935+ -> (Word8 -> Maybe Word8 )
936+ -> Fold m Char container
937+ -> Fold m Word8 container
938+ escapeFoldUtf8With = escapeFoldUtf8WithE ErrorOnCodingFailure
0 commit comments