Skip to content

Commit 411c04d

Browse files
author
pranaysashank
committed
Properly escape characters in JSON strings.
1 parent b6a077f commit 411c04d

4 files changed

Lines changed: 136 additions & 12 deletions

File tree

src/Streamly/Internal/Data/Json/Stream.hs

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import Streamly.Internal.Data.Array (Array)
3131
import Streamly.Internal.Data.Fold.Types (Fold(..))
3232
import Streamly.Internal.Data.Strict (Tuple' (..))
3333
import qualified Streamly.Internal.Data.Parser as PR
34+
import qualified Streamly.Internal.Data.Parser.ParserK.Types as K
3435
import qualified Streamly.Internal.Data.Parser.ParserD as P
3536
import qualified Streamly.Internal.Data.Array as A
3637
import qualified Streamly.Internal.Data.Fold as IFL
@@ -67,6 +68,22 @@ instance Enum a => Hashable (A.Array a) where
6768
hashWithSalt salt arr = fromIntegral $ runIdentity $
6869
IUF.fold A.read (IFL.rollingHashWithSalt $ fromIntegral salt) arr
6970

71+
{-# INLINE jsonEscapes #-}
72+
jsonEscapes :: Word8 -> Maybe Word8
73+
jsonEscapes 34 = Just 34 -- \" -> "
74+
jsonEscapes 92 = Just 92 -- \\ -> \
75+
jsonEscapes 47 = Just 47 -- \/ -> /
76+
jsonEscapes 98 = Just 8 -- \b -> BS
77+
jsonEscapes 102 = Just 12 -- \f -> FF
78+
jsonEscapes 110 = Just 10 -- \n -> LF
79+
jsonEscapes 114 = Just 13 -- \r -> CR
80+
jsonEscapes 116 = Just 9 -- \t -> TAB
81+
jsonEscapes _ = Nothing
82+
83+
{-# INLINE escapeFoldUtf8With #-}
84+
escapeFoldUtf8With :: Monad m => Fold m Char container -> Fold m Word8 container
85+
escapeFoldUtf8With = Uni.escapeFoldUtf8With 92 jsonEscapes
86+
7087
type JsonString = Array Char
7188

7289
type JsonArray = Array Value
@@ -168,13 +185,13 @@ parseJsonString = do
168185
w <- P.peek
169186
case w of
170187
DOUBLE_QUOTE -> skip 1 >> return s
171-
BACKSLASH -> (fmap (s <>) escapeParseJsonString) <* skip 1
188+
BACKSLASH -> fmap (s <>) escapeParseJsonString
172189
_ -> do
173190
P.die $ [(chr . fromIntegral) w] ++ " : String without end."
174191

175192
{-# INLINE escapeParseJsonString #-}
176193
escapeParseJsonString :: MonadCatch m => Parser m Word8 JsonString
177-
escapeParseJsonString = P.scan startState go (Uni.foldUtf8With A.unsafeWrite)
194+
escapeParseJsonString = P.scan startState go (escapeFoldUtf8With A.unsafeWrite)
178195
where
179196
startState = False
180197
go s a
@@ -236,15 +253,15 @@ parseJsonArray = do
236253
{-# INLINE parseJsonEOF #-}
237254
parseJsonEOF :: MonadCatch m => PR.Parser m Word8 Value
238255
parseJsonEOF =
239-
P.toParserK $ do
256+
K.toParserK $ do
240257
v <- parseJsonValue
241258
skipSpace
242259
P.eof
243260
return v
244261

245262
{-# INLINE parseJson #-}
246263
parseJson :: MonadCatch m => PR.Parser m Word8 Value
247-
parseJson = P.toParserK $ parseJsonValue
264+
parseJson = K.toParserK $ parseJsonValue
248265

249266
{-
250267

src/Streamly/Internal/Data/Parser.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ dieM = K.toParserK . D.dieM
293293

294294
{-# INLINE peekMaybe #-}
295295
peekMaybe :: MonadCatch m => Parser m a (Maybe a)
296-
peekMaybe = D.toParserK D.peekMaybe
296+
peekMaybe = K.toParserK D.peekMaybe
297297

298298
-------------------------------------------------------------------------------
299299
-- Failing Parsers
@@ -500,7 +500,7 @@ sliceSepByP _cond = undefined -- K.toParserK . D.sliceSepByP cond
500500
--
501501
{-# INLINE scan #-}
502502
scan :: MonadCatch m => s -> (s -> a -> Maybe s) -> Fold m a b -> Parser m a b
503-
scan s f fl = D.toParserK $ D.scan s f fl
503+
scan s f fl = K.toParserK $ D.scan s f fl
504504

505505
-- | @sepBy fl p sep@ collects zero or more stream elements separated by @sep@.
506506
--
@@ -518,7 +518,7 @@ sepBy :: MonadCatch m
518518
-> Parser m a b
519519
-> Parser m a sep
520520
-> Parser m a c
521-
sepBy fl pa = D.toParserK . D.sepBy fl (D.fromParserK pa) . D.fromParserK
521+
sepBy fl pa = K.toParserK . D.sepBy fl (K.fromParserK pa) . K.fromParserK
522522

523523
-- Note: Keep this consistent with S.splitOn. In fact we should eliminate
524524
-- S.splitOn in favor of the parser.

src/Streamly/Internal/Data/Parser/ParserD.hs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,6 @@ import Prelude
166166

167167
import Streamly.Internal.Data.Fold.Types (Fold(..))
168168

169-
import qualified Streamly.Internal.Data.Parser.ParserK.Types as K
170-
import qualified Streamly.Internal.Data.Zipper as Z
171-
172169
import Fusion.Plugin.Types (Fuse(..))
173170
import Streamly.Internal.Data.Parser.ParserD.Tee
174171
import Streamly.Internal.Data.Parser.ParserD.Types

src/Streamly/Internal/Data/Unicode/Stream.hs

Lines changed: 112 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ module Streamly.Internal.Data.Unicode.Stream
4444
, decodeUtf8ArraysD
4545
, decodeUtf8ArraysLenientD
4646
, foldUtf8With
47+
, escapeFoldUtf8With
4748

4849
-- * Transformation
4950
, stripStart
@@ -759,9 +760,13 @@ oDecode table state codep byte = do
759760
data Fp m s = FreshPoint !CodePoint !DecodeState (m s)
760761

761762
{-# INLINE_NORMAL foldUtf8WithE #-}
762-
foldUtf8WithE :: Monad m => CodingFailureMode -> Fold m Char container -> Fold m Word8 container
763+
foldUtf8WithE
764+
:: Monad m
765+
=> CodingFailureMode
766+
-> Fold m Char container
767+
-> Fold m Word8 container
763768
foldUtf8WithE cfm (FL.Fold arrayStep arrayInit arrayExtract) =
764-
let A.Array p _ _ = utf8d
769+
let A.Array p _ = utf8d
765770
!ptr = (unsafeForeignPtrToPtr p)
766771
in FL.Fold (step' ptr) (return $ FreshPoint 0 0 arrayInit) extract
767772
where
@@ -823,6 +828,111 @@ foldUtf8WithE cfm (FL.Fold arrayStep arrayInit arrayExtract) =
823828
aSt <- arrayState
824829
if statePtr /= 0 then inputUnderflow aSt else arrayExtract aSt
825830

831+
data EscapeState m s
832+
= EscapeState (m s)
833+
| NotEscapeState !CodePoint !DecodeState (m s)
834+
835+
{-# INLINE_NORMAL escapeFoldUtf8WithE #-}
836+
escapeFoldUtf8WithE
837+
:: Monad m
838+
=> CodingFailureMode
839+
-> Word8
840+
-> (Word8 -> Maybe Word8)
841+
-> Fold m Char container
842+
-> Fold m Word8 container
843+
escapeFoldUtf8WithE cfm escape trans (FL.Fold arrayStep arrayInit arrayExtract) =
844+
let A.Array p _ = utf8d
845+
!ptr = (unsafeForeignPtrToPtr p)
846+
in FL.Fold (step' ptr) (return $ NotEscapeState 0 0 arrayInit) extract
847+
where
848+
{-# INLINE transliterateOrError #-}
849+
transliterateOrError e arrayState =
850+
case cfm of
851+
ErrorOnCodingFailure -> error e
852+
TransliterateCodingFailure ->
853+
return $ NotEscapeState 0 0 (arrayStep arrayState replacementChar)
854+
855+
{-# INLINE transliterateOrError1 #-}
856+
transliterateOrError1 table statePtr codepointPtr e arrayState x =
857+
case cfm of
858+
ErrorOnCodingFailure -> error e
859+
TransliterateCodingFailure -> do
860+
aS <- arrayState
861+
aSt <- arrayStep aS replacementChar
862+
if x <= 0x7f
863+
then return $ NotEscapeState 0 0 (arrayStep aSt (unsafeChr (fromIntegral x)))
864+
else
865+
let (Tuple' sv cp) = oDecode table statePtr codepointPtr x
866+
in
867+
case sv of
868+
12 -> transliterateOrError e aSt
869+
0 -> return $ NotEscapeState cp sv (arrayStep aSt (unsafeChr cp))
870+
_ -> return $ NotEscapeState cp sv (return aSt)
871+
872+
step' _ (NotEscapeState _ _ arrayState) c
873+
| c == escape = return $ EscapeState arrayState
874+
875+
step' table (NotEscapeState codepointPtr statePtr arrayState) x =
876+
if statePtr == 0 && x <= 0x7f
877+
then do
878+
aSt <- arrayState
879+
return $ NotEscapeState 0 0 (arrayStep aSt (unsafeChr (fromIntegral x)))
880+
else
881+
let (Tuple' sv cp) = oDecode table statePtr codepointPtr x
882+
in
883+
case sv of
884+
12 ->
885+
transliterateOrError1
886+
table
887+
statePtr
888+
codepointPtr
889+
"Streamly.Streams.StreamD.escapeFoldUtf8With: Invalid UTF8 codepoint encountered"
890+
arrayState
891+
x
892+
0 -> do
893+
aSt <- arrayState
894+
return $ NotEscapeState cp sv (arrayStep aSt (unsafeChr cp))
895+
_ -> return $ NotEscapeState cp sv arrayState
896+
897+
step' _ (EscapeState arrayState) c = do
898+
aSt <- arrayState
899+
case trans c of
900+
Just x ->
901+
return $
902+
NotEscapeState 0 0 (arrayStep aSt (unsafeChr (fromIntegral x)))
903+
Nothing ->
904+
transliterateOrError
905+
"Streamly.Streams.StreamD.escapeFoldUtf8With: Invalid UTF8 codepoint encountered"
906+
aSt
907+
908+
{-# INLINE inputUnderflow #-}
909+
inputUnderflow arrayState =
910+
case cfm of
911+
ErrorOnCodingFailure ->
912+
error "Streamly.Internal.Data.Stream.StreamD.escapeFoldUtf8With: Input Underflow"
913+
TransliterateCodingFailure -> do
914+
aSt <- arrayStep arrayState replacementChar
915+
arrayExtract aSt
916+
917+
extract (NotEscapeState _ statePtr arrayState) = do
918+
aSt <- arrayState
919+
if statePtr /= 0
920+
then inputUnderflow aSt
921+
else arrayExtract aSt
922+
923+
extract (EscapeState arrayState) = do
924+
aSt <- arrayState
925+
inputUnderflow aSt
926+
826927
{-# INLINE foldUtf8With #-}
827928
foldUtf8With :: Monad m => Fold m Char container -> Fold m Word8 container
828929
foldUtf8With = foldUtf8WithE ErrorOnCodingFailure
930+
931+
{-# INLINE escapeFoldUtf8With #-}
932+
escapeFoldUtf8With ::
933+
Monad m
934+
=> Word8
935+
-> (Word8 -> Maybe Word8)
936+
-> Fold m Char container
937+
-> Fold m Word8 container
938+
escapeFoldUtf8With = escapeFoldUtf8WithE ErrorOnCodingFailure

0 commit comments

Comments
 (0)