{-# LANGUAGE BangPatterns #-}
-- | This module is separate from the Lexer.x input to Alex
-- to segregate the automatically generated code from the
-- hand written code. The automatically generated code
-- causes lots of warnings which mask the interesting warnings.
module Config.LexerUtils
  (
  -- * Alex wrapper
    AlexInput
  , alexGetByte

  -- * Lexer modes
  , LexerMode(..)
  , startString
  , nestMode
  , endMode

  -- * Token builders
  , token
  , token_
  , section
  , number

  -- * Final actions
  , untermString
  , eofAction
  , errorAction
  ) where

import           Data.Char (GeneralCategory(..), generalCategory, isAscii, isSpace, ord)
import           Data.Text (Text)
import           Data.Word (Word8)
import qualified Data.Text as Text

import           Config.Tokens
import qualified Config.NumberParser

------------------------------------------------------------------------
-- Custom Alex wrapper - these functions are used by generated code
------------------------------------------------------------------------

-- | The generated code expects the lexer input type to be named 'AlexInput'
type AlexInput = Located Text

-- | Get the next characteristic byte from the input source.
alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte (Located Position
p Text
cs)
  = do (Char
c,Text
cs') <- Text -> Maybe (Char, Text)
Text.uncons Text
cs
       let !b :: Word8
b = Char -> Word8
byteForChar Char
c
           !inp :: AlexInput
inp = Position -> Text -> AlexInput
forall a. Position -> a -> Located a
Located (Position -> Char -> Position
move Position
p Char
c) Text
cs'
       (Word8, AlexInput) -> Maybe (Word8, AlexInput)
forall (m :: * -> *) a. Monad m => a -> m a
return (Word8
b, AlexInput
inp)

------------------------------------------------------------------------

-- | Advance the position according to the kind of character lexed.
move :: Position -> Char -> Position
move :: Position -> Char -> Position
move (Position Int
ix Int
line Int
column) Char
c =
  case Char
c of
    Char
'\t' -> Int -> Int -> Int -> Position
Position (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
line (((Int
column Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
8) Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
    Char
'\n' -> Int -> Int -> Int -> Position
Position (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
line Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
1
    Char
_    -> Int -> Int -> Int -> Position
Position (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
line (Int
column Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)

-- | Action to perform upon end of file. Produce errors if EOF was unexpected.
eofAction :: Position -> LexerMode -> [Located Token]
eofAction :: Position -> LexerMode -> [Located Token]
eofAction Position
eofPosn LexerMode
st =
  case LexerMode
st of
    InComment       Position
posn LexerMode
_     -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermComment)]
    InCommentString Position
posn LexerMode
_     -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermComment)]
    InString        Position
posn Text
_     -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermString)]
    LexerMode
InNormal                   -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located (Position -> Position
park Position
eofPosn) Token
EOF]

-- | Terminate the line if needed and move the cursor to column 0 to ensure
-- that it terminates any top-level block.
park :: Position -> Position
park :: Position -> Position
park Position
pos
  | Position -> Int
posColumn Position
pos Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = Position
pos { posColumn :: Int
posColumn = Int
0 }
  | Bool
otherwise          = Position
pos { posColumn :: Int
posColumn = Int
0, posLine :: Int
posLine = Position -> Int
posLine Position
pos Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 }

-- | Action to perform when lexer gets stuck. Emits an error.
errorAction :: AlexInput -> [Located Token]
errorAction :: AlexInput -> [Located Token]
errorAction AlexInput
inp = [(Text -> Token) -> AlexInput -> Located Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Error -> Token
Error (Error -> Token) -> (Text -> Error) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Error
NoMatch (Char -> Error) -> (Text -> Char) -> Text -> Error
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Char
Text.head) AlexInput
inp]

------------------------------------------------------------------------
-- Lexer Modes
------------------------------------------------------------------------

-- | The lexer can be in any of four modes which determine which rules
-- are active.
data LexerMode
  = InNormal
  | InComment       !Position !LexerMode -- ^ Start of comment and return mode
  | InCommentString !Position !LexerMode -- ^ Start of string and return mode
  | InString        !Position !Text      -- ^ Start of string and input text

-- | Type of actions used by lexer upon matching a rule
type Action =
  Int                          {- ^ match length                       -} ->
  Located Text                 {- ^ current input                      -} ->
  LexerMode                    {- ^ lexer mode                         -} ->
  (LexerMode, [Located Token]) {- ^ updated lexer mode, emitted tokens -}

-- | Helper function for building an 'Action' using the lexeme
token :: (Text -> Token) -> Action
token :: (Text -> Token) -> Action
token Text -> Token
f Int
len AlexInput
match LexerMode
st = (LexerMode
st, [(Text -> Token) -> AlexInput -> Located Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text -> Token
f (Text -> Token) -> (Text -> Text) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
Text.take Int
len) AlexInput
match])

-- | Helper function for building an 'Action' where the lexeme is unused.
token_ :: Token -> Action
token_ :: Token -> Action
token_ = (Text -> Token) -> Action
token ((Text -> Token) -> Action)
-> (Token -> Text -> Token) -> Token -> Action
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Token -> Text -> Token
forall a b. a -> b -> a
const

------------------------------------------------------------------------
-- Alternative modes
------------------------------------------------------------------------

-- | Used to enter one of the nested modes
nestMode :: (Position -> LexerMode -> LexerMode) -> Action
nestMode :: (Position -> LexerMode -> LexerMode) -> Action
nestMode Position -> LexerMode -> LexerMode
f Int
_ AlexInput
match LexerMode
st = (Position -> LexerMode -> LexerMode
f (AlexInput -> Position
forall a. Located a -> Position
locPosition AlexInput
match) LexerMode
st, [])

-- | Enter the string literal lexer
startString :: Action
startString :: Action
startString Int
_ (Located Position
posn Text
text) LexerMode
_ = (Position -> Text -> LexerMode
InString Position
posn Text
text, [])

-- | Successfully terminate the current mode and emit tokens as needed
endMode :: Action
endMode :: Action
endMode Int
len (Located Position
endPosn Text
_) LexerMode
mode =
  case LexerMode
mode of
    LexerMode
InNormal                 -> (LexerMode
InNormal, [])
    InCommentString Position
_ LexerMode
st     -> (LexerMode
st, [])
    InComment       Position
_ LexerMode
st     -> (LexerMode
st, [])
    InString Position
startPosn Text
input ->
      let n :: Int
n = Position -> Int
posIndex Position
endPosn Int -> Int -> Int
forall a. Num a => a -> a -> a
- Position -> Int
posIndex Position
startPosn Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len
          badEscape :: Error
badEscape = Text -> Error
BadEscape (String -> Text
Text.pack String
"out of range")
      in case ReadS String
forall a. Read a => ReadS a
reads (Text -> String
Text.unpack (Int -> Text -> Text
Text.take Int
n Text
input)) of
           [(String
s,String
"")] -> (LexerMode
InNormal, [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
startPosn (Text -> Token
String (String -> Text
Text.pack String
s))])
           [(String, String)]
_        -> (LexerMode
InNormal, [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
startPosn (Error -> Token
Error Error
badEscape)])

-- | Action for unterminated string constant
untermString :: Action
untermString :: Action
untermString Int
_ AlexInput
_ = \(InString Position
posn Text
_) ->
  (LexerMode
InNormal, [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermString)])

------------------------------------------------------------------------
-- Token builders
------------------------------------------------------------------------

-- | Construct a 'Number' token from a token using a
-- given base. This function expect the token to be
-- legal for the given base. This is checked by Alex.
number ::
  Text {- ^ sign-prefix-digits -} ->
  Token
number :: Text -> Token
number = Number -> Token
Number (Number -> Token) -> (Text -> Number) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Number
Config.NumberParser.number
       (String -> Number) -> (Text -> String) -> Text -> Number
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
Text.unpack (Text -> String) -> (Text -> Text) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
Text.toUpper (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
Text.filter (Char
'_' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=)

-- | Process a section heading token
section :: Text -> Token
section :: Text -> Token
section = Text -> Token
Section (Text -> Token) -> (Text -> Text) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
Text.dropWhileEnd Char -> Bool
isSpace (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
Text.init

------------------------------------------------------------------------
-- Embed all of unicode, kind of, in a single byte!
------------------------------------------------------------------------

-- | Alex is driven by looking up elements in a 128 element array.
-- This function maps each ASCII character to its ASCII encoding
-- and it maps non-ASCII code-points to a character class (0-6)
byteForChar :: Char -> Word8
byteForChar :: Char -> Word8
byteForChar Char
c
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\6' = Word8
non_graphic
  | Char -> Bool
isAscii Char
c = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c)
  | Bool
otherwise = case Char -> GeneralCategory
generalCategory Char
c of
                  GeneralCategory
LowercaseLetter       -> Word8
lower
                  GeneralCategory
OtherLetter           -> Word8
lower
                  GeneralCategory
UppercaseLetter       -> Word8
upper
                  GeneralCategory
TitlecaseLetter       -> Word8
upper
                  GeneralCategory
DecimalNumber         -> Word8
digit
                  GeneralCategory
OtherNumber           -> Word8
digit
                  GeneralCategory
ConnectorPunctuation  -> Word8
symbol
                  GeneralCategory
DashPunctuation       -> Word8
symbol
                  GeneralCategory
OtherPunctuation      -> Word8
symbol
                  GeneralCategory
MathSymbol            -> Word8
symbol
                  GeneralCategory
CurrencySymbol        -> Word8
symbol
                  GeneralCategory
ModifierSymbol        -> Word8
symbol
                  GeneralCategory
OtherSymbol           -> Word8
symbol
                  GeneralCategory
Space                 -> Word8
space
                  GeneralCategory
ModifierLetter        -> Word8
other
                  GeneralCategory
NonSpacingMark        -> Word8
other
                  GeneralCategory
SpacingCombiningMark  -> Word8
other
                  GeneralCategory
EnclosingMark         -> Word8
other
                  GeneralCategory
LetterNumber          -> Word8
other
                  GeneralCategory
OpenPunctuation       -> Word8
other
                  GeneralCategory
ClosePunctuation      -> Word8
other
                  GeneralCategory
InitialQuote          -> Word8
other
                  GeneralCategory
FinalQuote            -> Word8
other
                  GeneralCategory
_                     -> Word8
non_graphic
  where
  non_graphic :: Word8
non_graphic     = Word8
0
  upper :: Word8
upper           = Word8
1
  lower :: Word8
lower           = Word8
2
  digit :: Word8
digit           = Word8
3
  symbol :: Word8
symbol          = Word8
4
  space :: Word8
space           = Word8
5
  other :: Word8
other           = Word8
6