module Agda.Syntax.Parser.StringLiterals
( litString, litChar
) where
import Data.Char
import Agda.Syntax.Parser.Alex
import Agda.Syntax.Parser.Monad
import Agda.Syntax.Parser.Tokens
import Agda.Syntax.Parser.LookAhead
import Agda.Syntax.Position
import Agda.Syntax.Literal
import Agda.Utils.Tuple ( (-*-) )
litString :: LexAction Token
litString :: LexAction Token
litString = Char -> (Interval -> String -> Parser Token) -> LexAction Token
forall tok.
Char -> (Interval -> String -> Parser tok) -> LexAction tok
stringToken '"' (\i :: Interval
i s :: String
s ->
Token -> Parser Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> Parser Token) -> Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ Literal -> Token
TokLiteral (Literal -> Token) -> Literal -> Token
forall a b. (a -> b) -> a -> b
$ Range -> String -> Literal
LitString (Interval -> Range
forall t. HasRange t => t -> Range
getRange Interval
i) String
s)
litChar :: LexAction Token
litChar :: LexAction Token
litChar = Char -> (Interval -> String -> Parser Token) -> LexAction Token
forall tok.
Char -> (Interval -> String -> Parser tok) -> LexAction tok
stringToken '\'' ((Interval -> String -> Parser Token) -> LexAction Token)
-> (Interval -> String -> Parser Token) -> LexAction Token
forall a b. (a -> b) -> a -> b
$ \i :: Interval
i s :: String
s ->
do case String
s of
[c :: Char
c] -> Token -> Parser Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> Parser Token) -> Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ Literal -> Token
TokLiteral (Literal -> Token) -> Literal -> Token
forall a b. (a -> b) -> a -> b
$ Range -> Char -> Literal
LitChar (Interval -> Range
forall t. HasRange t => t -> Range
getRange Interval
i) Char
c
_ -> String -> Parser Token
forall a. String -> Parser a
lexError
"character literal must contain a single character"
litError :: String -> LookAhead a
litError :: String -> LookAhead a
litError msg :: String
msg =
do LookAhead ()
sync
Parser a -> LookAhead a
forall a. Parser a -> LookAhead a
liftP (Parser a -> LookAhead a) -> Parser a -> LookAhead a
forall a b. (a -> b) -> a -> b
$ String -> Parser a
forall a. String -> Parser a
lexError (String -> Parser a) -> String -> Parser a
forall a b. (a -> b) -> a -> b
$
"Lexical error in string or character literal: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
msg
stringToken :: Char -> (Interval -> String -> Parser tok) -> LexAction tok
stringToken :: Char -> (Interval -> String -> Parser tok) -> LexAction tok
stringToken del :: Char
del mkTok :: Interval -> String -> Parser tok
mkTok inp :: PreviousInput
inp inp' :: PreviousInput
inp' n :: TokenLength
n =
do PositionWithoutFile -> Parser ()
setLastPos (PositionWithoutFile -> PositionWithoutFile
forall a. Position' a -> Position' a
backupPos (PositionWithoutFile -> PositionWithoutFile)
-> PositionWithoutFile -> PositionWithoutFile
forall a b. (a -> b) -> a -> b
$ PreviousInput -> PositionWithoutFile
lexPos PreviousInput
inp')
PreviousInput -> Parser ()
setLexInput PreviousInput
inp'
String
tok <- (forall b. String -> LookAhead b)
-> LookAhead String -> Parser String
forall a.
(forall b. String -> LookAhead b) -> LookAhead a -> Parser a
runLookAhead forall b. String -> LookAhead b
litError (LookAhead String -> Parser String)
-> LookAhead String -> Parser String
forall a b. (a -> b) -> a -> b
$ Char -> String -> LookAhead String
lexString Char
del ""
Interval
i <- Parser Interval
getParseInterval
Interval -> String -> Parser tok
mkTok Interval
i String
tok
lexString :: Char -> String -> LookAhead String
lexString :: Char -> String -> LookAhead String
lexString del :: Char
del s :: String
s =
do Char
c <- LookAhead Char
nextChar
case Char
c of
c :: Char
c | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
del -> LookAhead ()
sync LookAhead () -> LookAhead String -> LookAhead String
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> String -> LookAhead String
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> String
forall a. [a] -> [a]
reverse String
s)
'\\' ->
do Char
c' <- LookAhead Char
nextChar
case Char
c' of
'&' -> LookAhead ()
sync LookAhead () -> LookAhead String -> LookAhead String
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> String -> LookAhead String
lexString Char
del String
s
c :: Char
c | Char -> Bool
isSpace Char
c -> LookAhead ()
sync LookAhead () -> LookAhead String -> LookAhead String
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> String -> LookAhead String
lexStringGap Char
del String
s
_ -> LookAhead String
normalChar
_ -> LookAhead String
normalChar
where
normalChar :: LookAhead String
normalChar =
do LookAhead ()
rollback
Char
c <- LookAhead Char
lexChar
Char -> String -> LookAhead String
lexString Char
del (Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
s)
lexStringGap :: Char -> String -> LookAhead String
lexStringGap :: Char -> String -> LookAhead String
lexStringGap del :: Char
del s :: String
s =
do Char
c <- LookAhead Char
eatNextChar
case Char
c of
'\\' -> Char -> String -> LookAhead String
lexString Char
del String
s
c :: Char
c | Char -> Bool
isSpace Char
c -> Char -> String -> LookAhead String
lexStringGap Char
del String
s
_ -> String -> LookAhead String
forall b. String -> LookAhead b
lookAheadError "non-space in string gap"
lexChar :: LookAhead Char
lexChar :: LookAhead Char
lexChar =
do Char
c <- LookAhead Char
eatNextChar
case Char
c of
'\\' -> LookAhead Char
lexEscape
_ -> Char -> LookAhead Char
forall (m :: * -> *) a. Monad m => a -> m a
return Char
c
lexEscape :: LookAhead Char
lexEscape :: LookAhead Char
lexEscape =
do Char
c <- LookAhead Char
eatNextChar
case Char
c of
'^' -> do Char
c <- LookAhead Char
eatNextChar
if Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '@' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '_'
then Char -> LookAhead Char
forall (m :: * -> *) a. Monad m => a -> m a
return (TokenLength -> Char
chr (Char -> TokenLength
ord Char
c TokenLength -> TokenLength -> TokenLength
forall a. Num a => a -> a -> a
- Char -> TokenLength
ord '@'))
else String -> LookAhead Char
forall b. String -> LookAhead b
lookAheadError "invalid control character"
'x' -> (Char -> Bool)
-> TokenLength -> (Char -> TokenLength) -> LookAhead Char
readNum Char -> Bool
isHexDigit 16 Char -> TokenLength
digitToInt
'o' -> (Char -> Bool)
-> TokenLength -> (Char -> TokenLength) -> LookAhead Char
readNum Char -> Bool
isOctDigit 8 Char -> TokenLength
digitToInt
x :: Char
x | Char -> Bool
isDigit Char
x
-> (Char -> Bool)
-> TokenLength
-> (Char -> TokenLength)
-> TokenLength
-> LookAhead Char
readNumAcc Char -> Bool
isDigit 10 Char -> TokenLength
digitToInt (Char -> TokenLength
digitToInt Char
x)
c :: Char
c ->
do Char
esc <- Char
-> [(String, LookAhead Char)] -> LookAhead Char -> LookAhead Char
forall a.
Char -> [(String, LookAhead a)] -> LookAhead a -> LookAhead a
match' Char
c (((String, Char) -> (String, LookAhead Char))
-> [(String, Char)] -> [(String, LookAhead Char)]
forall a b. (a -> b) -> [a] -> [b]
map (String -> String
forall a. a -> a
id (String -> String)
-> (Char -> LookAhead Char)
-> (String, Char)
-> (String, LookAhead Char)
forall a c b d. (a -> c) -> (b -> d) -> (a, b) -> (c, d)
-*- Char -> LookAhead Char
forall (m :: * -> *) a. Monad m => a -> m a
return) [(String, Char)]
sillyEscapeChars)
(String -> LookAhead Char
forall b. String -> LookAhead b
lookAheadError "bad escape code")
LookAhead ()
sync
Char -> LookAhead Char
forall (m :: * -> *) a. Monad m => a -> m a
return Char
esc
readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> LookAhead Char
readNum :: (Char -> Bool)
-> TokenLength -> (Char -> TokenLength) -> LookAhead Char
readNum isDigit :: Char -> Bool
isDigit base :: TokenLength
base conv :: Char -> TokenLength
conv =
do Char
c <- LookAhead Char
eatNextChar
if Char -> Bool
isDigit Char
c
then (Char -> Bool)
-> TokenLength
-> (Char -> TokenLength)
-> TokenLength
-> LookAhead Char
readNumAcc Char -> Bool
isDigit TokenLength
base Char -> TokenLength
conv (Char -> TokenLength
conv Char
c)
else String -> LookAhead Char
forall b. String -> LookAhead b
lookAheadError "non-digit in numeral"
readNumAcc :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> LookAhead Char
readNumAcc :: (Char -> Bool)
-> TokenLength
-> (Char -> TokenLength)
-> TokenLength
-> LookAhead Char
readNumAcc isDigit :: Char -> Bool
isDigit base :: TokenLength
base conv :: Char -> TokenLength
conv i :: TokenLength
i = TokenLength -> LookAhead Char
scan TokenLength
i
where
scan :: TokenLength -> LookAhead Char
scan i :: TokenLength
i =
do PreviousInput
inp <- LookAhead PreviousInput
getInput
Char
c <- LookAhead Char
nextChar
case Char
c of
c :: Char
c | Char -> Bool
isDigit Char
c -> TokenLength -> LookAhead Char
scan (TokenLength
iTokenLength -> TokenLength -> TokenLength
forall a. Num a => a -> a -> a
*TokenLength
base TokenLength -> TokenLength -> TokenLength
forall a. Num a => a -> a -> a
+ Char -> TokenLength
conv Char
c)
_ ->
do PreviousInput -> LookAhead ()
setInput PreviousInput
inp
LookAhead ()
sync
if TokenLength
i TokenLength -> TokenLength -> Bool
forall a. Ord a => a -> a -> Bool
>= Char -> TokenLength
ord Char
forall a. Bounded a => a
minBound Bool -> Bool -> Bool
&& TokenLength
i TokenLength -> TokenLength -> Bool
forall a. Ord a => a -> a -> Bool
<= Char -> TokenLength
ord Char
forall a. Bounded a => a
maxBound
then Char -> LookAhead Char
forall (m :: * -> *) a. Monad m => a -> m a
return (TokenLength -> Char
chr TokenLength
i)
else String -> LookAhead Char
forall b. String -> LookAhead b
lookAheadError "character literal out of bounds"
sillyEscapeChars :: [(String, Char)]
sillyEscapeChars :: [(String, Char)]
sillyEscapeChars =
[ ("a", '\a')
, ("b", '\b')
, ("f", '\f')
, ("n", '\n')
, ("r", '\r')
, ("t", '\t')
, ("v", '\v')
, ("\\", '\\')
, ("\"", '\"')
, ("'", '\'')
, ("NUL", '\NUL')
, ("SOH", '\SOH')
, ("STX", '\STX')
, ("ETX", '\ETX')
, ("EOT", '\EOT')
, ("ENQ", '\ENQ')
, ("ACK", '\ACK')
, ("BEL", '\BEL')
, ("BS", '\BS')
, ("HT", '\HT')
, ("LF", '\LF')
, ("VT", '\VT')
, ("FF", '\FF')
, ("CR", '\CR')
, ("SO", '\SO')
, ("SI", '\SI')
, ("DLE", '\DLE')
, ("DC1", '\DC1')
, ("DC2", '\DC2')
, ("DC3", '\DC3')
, ("DC4", '\DC4')
, ("NAK", '\NAK')
, ("SYN", '\SYN')
, ("ETB", '\ETB')
, ("CAN", '\CAN')
, ("EM", '\EM')
, ("SUB", '\SUB')
, ("ESC", '\ESC')
, ("FS", '\FS')
, ("GS", '\GS')
, ("RS", '\RS')
, ("US", '\US')
, ("SP", '\SP')
, ("DEL", '\DEL')
]