thrift/lib/hs/Thrift/Protocol/JSONUtils.hs (124 lines of code) (raw):

-- -- Licensed to the Apache Software Foundation (ASF) under one -- or more contributor license agreements. See the NOTICE file -- distributed with this work for additional information -- regarding copyright ownership. The ASF licenses this file -- to you under the Apache License, Version 2.0 (the -- "License"); you may not use this file except in compliance -- with the License. You may obtain a copy of the License at -- -- http://www.apache.org/licenses/LICENSE-2.0 -- -- Unless required by applicable law or agreed to in writing, -- software distributed under the License is distributed on an -- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -- KIND, either express or implied. See the License for the -- specific language governing permissions and limitations -- under the License. -- {-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} module Thrift.Protocol.JSONUtils ( escapedString , escapedChar , escape , lexeme , notChar8 , between ) where import Control.Applicative import Data.Attoparsec.ByteString as P import Data.Attoparsec.ByteString.Char8 as PC import Data.Bits import Data.ByteString.Builder as B import Data.ByteString.Internal (c2w, w2c) import Data.Char as C #if __GLASGOW_HASKELL__ < 804 import Data.Monoid #endif import Data.Word import qualified Data.ByteString.Lazy as LBS escapedString :: Parser LBS.ByteString escapedString = PC.char8 '"' *> (LBS.pack <$> P.many' (escapedChar <|> notChar8 '"')) <* PC.char8 '"' escapedChar :: Parser Word8 escapedChar = PC.char8 '\\' *> (c2w <$> choice [ '\0' <$ PC.char '0' , '\a' <$ PC.char 'a' , '\b' <$ PC.char 'b' , '\f' <$ PC.char 'f' , '\n' <$ PC.char 'n' , '\r' <$ PC.char 'r' , '\t' <$ PC.char 't' , '\v' <$ PC.char 'v' , '\"' <$ PC.char '"' , '\'' <$ PC.char '\'' , '\\' <$ PC.char '\\' , '/' <$ PC.char '/' , PC.string "u00" *> hexdigits ]) where -- The cpp implementation only accepts escaped characters of the form -- "\u00XX" where the X's are hex digits, so we will do the same here hexdigits :: Parser Char hexdigits = do msB <- hexdigit =<< anyWord8 lsB <- hexdigit =<< anyWord8 return $ C.chr $ (msB `shiftL` 4) .|. lsB hexdigit n | n >= w_0 && n <= w_9 = return $ fromIntegral $ n - w_0 | n >= w_a && n <= w_f = return $ fromIntegral $ n - w_a + 10 | n >= w_A && n <= w_F = return $ fromIntegral $ n - w_A + 10 | otherwise = fail $ "not a hex digit: " ++ show n w_0 = fromIntegral $ ord '0' w_9 = fromIntegral $ ord '9' w_a = fromIntegral $ ord 'a' w_f = fromIntegral $ ord 'f' w_A = fromIntegral $ ord 'A' w_F = fromIntegral $ ord 'F' escape :: LBS.ByteString -> Builder escape = LBS.foldl' escapeChar mempty where escapeChar b w = b <> case w2c w of '\0' -> "\\0" '\b' -> "\\b" '\f' -> "\\f" '\n' -> "\\n" '\r' -> "\\r" '\t' -> "\\t" '\"' -> "\\\"" '\\' -> "\\\\" '\SOH' -> "\\u0001" '\STX' -> "\\u0002" '\ETX' -> "\\u0003" '\EOT' -> "\\u0004" '\ENQ' -> "\\u0005" '\ACK' -> "\\u0006" '\BEL' -> "\\u0007" '\VT' -> "\\u000b" '\SO' -> "\\u000e" '\SI' -> "\\u000f" '\DLE' -> "\\u0010" '\DC1' -> "\\u0011" '\DC2' -> "\\u0012" '\DC3' -> "\\u0013" '\DC4' -> "\\u0014" '\NAK' -> "\\u0015" '\SYN' -> "\\u0016" '\ETB' -> "\\u0017" '\CAN' -> "\\u0018" '\EM' -> "\\u0019" '\SUB' -> "\\u001a" '\ESC' -> "\\u001b" '\FS' -> "\\u001c" '\GS' -> "\\u001d" '\RS' -> "\\u001e" '\US' -> "\\u001f" '\DEL' -> "\\u007f" _ -> B.word8 w lexeme :: Parser a -> Parser a lexeme = (<* skipSpace) notChar8 :: Char -> Parser Word8 notChar8 c = P.satisfy (/= c2w c) between :: Char -> Char -> Parser a -> Parser a between a b p = lexeme (PC.char8 a) *> lexeme p <* lexeme (PC.char8 b)