{-# LANGUAGE ScopedTypeVariables #-}
module Hledger.Utils.Regex (
Regexp
,Replacement
,regexMatches
,regexMatchesCI
,regexReplace
,regexReplaceCI
,regexReplaceMemo
,regexReplaceCIMemo
,regexReplaceBy
,regexReplaceByCI
)
where
import Data.Array
import Data.Char
import Data.List (foldl')
import Data.MemoUgly (memo)
import Text.Regex.TDFA (
Regex, CompOption(..), ExecOption(..), defaultCompOpt, defaultExecOpt,
makeRegexOpts, AllMatches(getAllMatches), match, (=~), MatchText
)
import Hledger.Utils.UTF8IOCompat (error')
type Regexp = String
type Replacement = String
toRegex :: Regexp -> Regex
toRegex :: Regexp -> Regex
toRegex = (Regexp -> Regex) -> Regexp -> Regex
forall a b. Ord a => (a -> b) -> a -> b
memo (CompOption -> ExecOption -> Regexp -> Regex
forall regex compOpt execOpt source.
RegexMaker regex compOpt execOpt source =>
compOpt -> execOpt -> source -> regex
makeRegexOpts CompOption
compOpt ExecOption
execOpt)
toRegexCI :: Regexp -> Regex
toRegexCI :: Regexp -> Regex
toRegexCI = (Regexp -> Regex) -> Regexp -> Regex
forall a b. Ord a => (a -> b) -> a -> b
memo (CompOption -> ExecOption -> Regexp -> Regex
forall regex compOpt execOpt source.
RegexMaker regex compOpt execOpt source =>
compOpt -> execOpt -> source -> regex
makeRegexOpts CompOption
compOpt{caseSensitive :: Bool
caseSensitive=Bool
False} ExecOption
execOpt)
compOpt :: CompOption
compOpt :: CompOption
compOpt = CompOption
forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
compOpt
defaultCompOpt
execOpt :: ExecOption
execOpt :: ExecOption
execOpt = ExecOption
forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
execOpt
defaultExecOpt
regexMatches :: Regexp -> String -> Bool
regexMatches :: Regexp -> Regexp -> Bool
regexMatches = (Regexp -> Regexp -> Bool) -> Regexp -> Regexp -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip Regexp -> Regexp -> Bool
forall source source1 target.
(RegexMaker Regex CompOption ExecOption source,
RegexContext Regex source1 target) =>
source1 -> source -> target
(=~)
regexMatchesCI :: Regexp -> String -> Bool
regexMatchesCI :: Regexp -> Regexp -> Bool
regexMatchesCI r :: Regexp
r = Regex -> Regexp -> Bool
forall regex source target.
RegexContext regex source target =>
regex -> source -> target
match (Regexp -> Regex
toRegexCI Regexp
r)
regexReplaceBy :: Regexp -> (String -> String) -> String -> String
regexReplaceBy :: Regexp -> (Regexp -> Regexp) -> Regexp -> Regexp
regexReplaceBy r :: Regexp
r = Regex -> (Regexp -> Regexp) -> Regexp -> Regexp
replaceAllBy (Regexp -> Regex
toRegex Regexp
r)
regexReplaceByCI :: Regexp -> (String -> String) -> String -> String
regexReplaceByCI :: Regexp -> (Regexp -> Regexp) -> Regexp -> Regexp
regexReplaceByCI r :: Regexp
r = Regex -> (Regexp -> Regexp) -> Regexp -> Regexp
replaceAllBy (Regexp -> Regex
toRegexCI Regexp
r)
regexReplace :: Regexp -> Replacement -> String -> String
regexReplace :: Regexp -> Regexp -> Regexp -> Regexp
regexReplace re :: Regexp
re = Regex -> Regexp -> Regexp -> Regexp
replaceRegex (Regexp -> Regex
toRegex Regexp
re)
regexReplaceCI :: Regexp -> Replacement -> String -> String
regexReplaceCI :: Regexp -> Regexp -> Regexp -> Regexp
regexReplaceCI re :: Regexp
re = Regex -> Regexp -> Regexp -> Regexp
replaceRegex (Regexp -> Regex
toRegexCI Regexp
re)
regexReplaceMemo :: Regexp -> Replacement -> String -> String
regexReplaceMemo :: Regexp -> Regexp -> Regexp -> Regexp
regexReplaceMemo re :: Regexp
re repl :: Regexp
repl = (Regexp -> Regexp) -> Regexp -> Regexp
forall a b. Ord a => (a -> b) -> a -> b
memo (Regexp -> Regexp -> Regexp -> Regexp
regexReplace Regexp
re Regexp
repl)
regexReplaceCIMemo :: Regexp -> Replacement -> String -> String
regexReplaceCIMemo :: Regexp -> Regexp -> Regexp -> Regexp
regexReplaceCIMemo re :: Regexp
re repl :: Regexp
repl = (Regexp -> Regexp) -> Regexp -> Regexp
forall a b. Ord a => (a -> b) -> a -> b
memo (Regexp -> Regexp -> Regexp -> Regexp
regexReplaceCI Regexp
re Regexp
repl)
replaceRegex :: Regex -> Replacement -> String -> String
replaceRegex :: Regex -> Regexp -> Regexp -> Regexp
replaceRegex re :: Regex
re repl :: Regexp
repl s :: Regexp
s = (Regexp -> MatchText Regexp -> Regexp)
-> Regexp -> [MatchText Regexp] -> Regexp
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (Regexp -> Regexp -> MatchText Regexp -> Regexp
replaceMatch Regexp
repl) Regexp
s ([MatchText Regexp] -> [MatchText Regexp]
forall a. [a] -> [a]
reverse ([MatchText Regexp] -> [MatchText Regexp])
-> [MatchText Regexp] -> [MatchText Regexp]
forall a b. (a -> b) -> a -> b
$ Regex -> Regexp -> [MatchText Regexp]
forall regex source target.
RegexContext regex source target =>
regex -> source -> target
match Regex
re Regexp
s :: [MatchText String])
replaceMatch :: Replacement -> String -> MatchText String -> String
replaceMatch :: Regexp -> Regexp -> MatchText Regexp -> Regexp
replaceMatch replpat :: Regexp
replpat s :: Regexp
s matchgroups :: MatchText Regexp
matchgroups = Regexp
pre Regexp -> Regexp -> Regexp
forall a. [a] -> [a] -> [a]
++ Regexp
repl Regexp -> Regexp -> Regexp
forall a. [a] -> [a] -> [a]
++ Regexp
post
where
((_,(off :: MatchOffset
off,len :: MatchOffset
len)):_) = MatchText Regexp -> [(Regexp, (MatchOffset, MatchOffset))]
forall i e. Array i e -> [e]
elems MatchText Regexp
matchgroups
(pre :: Regexp
pre, post' :: Regexp
post') = MatchOffset -> Regexp -> (Regexp, Regexp)
forall a. MatchOffset -> [a] -> ([a], [a])
splitAt MatchOffset
off Regexp
s
post :: Regexp
post = MatchOffset -> Regexp -> Regexp
forall a. MatchOffset -> [a] -> [a]
drop MatchOffset
len Regexp
post'
repl :: Regexp
repl = Regex -> (Regexp -> Regexp) -> Regexp -> Regexp
replaceAllBy (Regexp -> Regex
toRegex "\\\\[0-9]+") (MatchText Regexp -> Regexp -> Regexp
replaceBackReference MatchText Regexp
matchgroups) Regexp
replpat
replaceBackReference :: MatchText String -> String -> String
replaceBackReference :: MatchText Regexp -> Regexp -> Regexp
replaceBackReference grps :: MatchText Regexp
grps ('\\':s :: Regexp
s@(_:_)) | (Char -> Bool) -> Regexp -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit Regexp
s =
case Regexp -> MatchOffset
forall a. Read a => Regexp -> a
read Regexp
s of n :: MatchOffset
n | MatchOffset
n MatchOffset -> [MatchOffset] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` MatchText Regexp -> [MatchOffset]
forall i e. Ix i => Array i e -> [i]
indices MatchText Regexp
grps -> (Regexp, (MatchOffset, MatchOffset)) -> Regexp
forall a b. (a, b) -> a
fst (MatchText Regexp
grps MatchText Regexp
-> MatchOffset -> (Regexp, (MatchOffset, MatchOffset))
forall i e. Ix i => Array i e -> i -> e
! MatchOffset
n)
_ -> Regexp -> Regexp
forall a. Regexp -> a
error' (Regexp -> Regexp) -> Regexp -> Regexp
forall a b. (a -> b) -> a -> b
$ "no match group exists for backreference \"\\"Regexp -> Regexp -> Regexp
forall a. [a] -> [a] -> [a]
++Regexp
sRegexp -> Regexp -> Regexp
forall a. [a] -> [a] -> [a]
++"\""
replaceBackReference _ s :: Regexp
s = Regexp -> Regexp
forall a. Regexp -> a
error' (Regexp -> Regexp) -> Regexp -> Regexp
forall a b. (a -> b) -> a -> b
$ "replaceBackReference called on non-numeric-backreference \""Regexp -> Regexp -> Regexp
forall a. [a] -> [a] -> [a]
++Regexp
sRegexp -> Regexp -> Regexp
forall a. [a] -> [a] -> [a]
++"\", shouldn't happen"
replaceAllBy :: Regex -> (String -> String) -> String -> String
replaceAllBy :: Regex -> (Regexp -> Regexp) -> Regexp -> Regexp
replaceAllBy re :: Regex
re f :: Regexp -> Regexp
f s :: Regexp
s = Regexp -> Regexp
start Regexp
end
where
(_, end :: Regexp
end, start :: Regexp -> Regexp
start) = ((MatchOffset, Regexp, Regexp -> Regexp)
-> (MatchOffset, MatchOffset)
-> (MatchOffset, Regexp, Regexp -> Regexp))
-> (MatchOffset, Regexp, Regexp -> Regexp)
-> [(MatchOffset, MatchOffset)]
-> (MatchOffset, Regexp, Regexp -> Regexp)
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (MatchOffset, Regexp, Regexp -> Regexp)
-> (MatchOffset, MatchOffset)
-> (MatchOffset, Regexp, Regexp -> Regexp)
forall c.
(MatchOffset, Regexp, Regexp -> c)
-> (MatchOffset, MatchOffset) -> (MatchOffset, Regexp, Regexp -> c)
go (0, Regexp
s, Regexp -> Regexp
forall a. a -> a
id) ([(MatchOffset, MatchOffset)]
-> (MatchOffset, Regexp, Regexp -> Regexp))
-> [(MatchOffset, MatchOffset)]
-> (MatchOffset, Regexp, Regexp -> Regexp)
forall a b. (a -> b) -> a -> b
$ (AllMatches [] (MatchOffset, MatchOffset)
-> [(MatchOffset, MatchOffset)]
forall (f :: * -> *) b. AllMatches f b -> f b
getAllMatches (AllMatches [] (MatchOffset, MatchOffset)
-> [(MatchOffset, MatchOffset)])
-> AllMatches [] (MatchOffset, MatchOffset)
-> [(MatchOffset, MatchOffset)]
forall a b. (a -> b) -> a -> b
$ Regex -> Regexp -> AllMatches [] (MatchOffset, MatchOffset)
forall regex source target.
RegexContext regex source target =>
regex -> source -> target
match Regex
re Regexp
s :: [(Int, Int)])
go :: (MatchOffset, Regexp, Regexp -> c)
-> (MatchOffset, MatchOffset) -> (MatchOffset, Regexp, Regexp -> c)
go (ind :: MatchOffset
ind,read :: Regexp
read,write :: Regexp -> c
write) (off :: MatchOffset
off,len :: MatchOffset
len) =
let (skip :: Regexp
skip, start :: Regexp
start) = MatchOffset -> Regexp -> (Regexp, Regexp)
forall a. MatchOffset -> [a] -> ([a], [a])
splitAt (MatchOffset
off MatchOffset -> MatchOffset -> MatchOffset
forall a. Num a => a -> a -> a
- MatchOffset
ind) Regexp
read
(matched :: Regexp
matched, remaining :: Regexp
remaining) = MatchOffset -> Regexp -> (Regexp, Regexp)
forall a. MatchOffset -> [a] -> ([a], [a])
splitAt MatchOffset
len Regexp
start
in (MatchOffset
off MatchOffset -> MatchOffset -> MatchOffset
forall a. Num a => a -> a -> a
+ MatchOffset
len, Regexp
remaining, Regexp -> c
write (Regexp -> c) -> (Regexp -> Regexp) -> Regexp -> c
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Regexp
skipRegexp -> Regexp -> Regexp
forall a. [a] -> [a] -> [a]
++) (Regexp -> Regexp) -> (Regexp -> Regexp) -> Regexp -> Regexp
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Regexp -> Regexp
f Regexp
matched Regexp -> Regexp -> Regexp
forall a. [a] -> [a] -> [a]
++))