Duckling/Numeral/TE/Rules.hs (144 lines of code) (raw):
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoRebindableSyntax #-}
module Duckling.Numeral.TE.Rules
( rules
) where
import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Regex.Types
import Duckling.Types
teluguMap :: HashMap Char Char
teluguMap = HashMap.fromList
[ ( '౦', '0' )
, ( '౧', '1' )
, ( '౨', '2' )
, ( '౩', '3' )
, ( '౪', '4' )
, ( '౫', '5' )
, ( '౬', '6' )
, ( '౭', '7' )
, ( '౮', '8' )
, ( '౯', '9' )
]
teluguToArab :: Char -> Char
teluguToArab c = HashMap.lookupDefault c c teluguMap
ruleTelugu :: Rule
ruleTelugu = Rule
{ name = "telugu forms"
, pattern =
[ regex "([౦౧౨౩౪౫౬౭౮౯]{1,18})"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
toInteger <$> parseInt (Text.map teluguToArab match) >>= integer
_ -> Nothing
}
zeroToNineMap :: HashMap Text Integer
zeroToNineMap = HashMap.fromList
[ ( "సున్న", 0 )
, ( "ఒకటి", 1 )
, ( "రెండు", 2 )
, ( "మూడు", 3 )
, ( "నాలుగు", 4 )
, ( "ఐదు", 5 )
, ( "ఆరు", 6 )
, ( "ఏడు", 7 )
, ( "ఎనిమిది", 8 )
, ( "తొమ్మిది", 9 )
]
ruleZeroToNine :: Rule
ruleZeroToNine = Rule
{ name = "integer (0..9)"
, pattern =
[ regex "(సున్న|ఒకటి|రెండు|మూడు|నాలుగు|ఐదు|ఆరు|ఏడు|ఎనిమిది|తొమ్మిది)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) zeroToNineMap >>= integer
_ -> Nothing
}
tenToNineteenMap :: HashMap Text Integer
tenToNineteenMap = HashMap.fromList
[ ( "పది", 10 )
, ( "పదకొండు", 11 )
, ( "పన్నెండు", 12 )
, ( "పదమూడు", 13 )
, ( "పద్నాల్గు", 14 )
, ( "పదిహేను", 15 )
, ( "పదహారు", 16 )
, ( "పదిహేడు", 17 )
, ( "పద్దెనిమిది", 18 )
, ( "పంతొమ్మిది", 19 )
]
ruleTenToNineteen :: Rule
ruleTenToNineteen = Rule
{ name = "integer (10..19)"
, pattern =
[ regex "(పదకొండు|పన్నెండు|పదమూడు|పద్నాల్గు|పదిహేను|పదహారు|పదిహేడు|పద్దెనిమిది|పంతొమ్మిది|పది)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) tenToNineteenMap >>= integer
_ -> Nothing
}
tensMap :: HashMap Text Integer
tensMap = HashMap.fromList
[ ( "ఇరవై", 20 )
, ( "ముప్పై", 30 )
, ( "నలబై", 40 )
, ( "యాబై", 50 )
, ( "అరవై", 60 )
, ( "డెబ్బై", 70 )
, ( "ఎనబై", 80 )
, ( "తొంబై", 90 )
]
ruleTens :: Rule
ruleTens = Rule
{ name = "integer (20..90)"
, pattern =
[ regex "(ఇరవై|ముప్పై|నలబై|యాబై|అరవై|డెబ్బై|ఎనబై|తొంబై)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup match tensMap >>= integer
_ -> Nothing
}
hundredsMap :: HashMap Text Integer
hundredsMap = HashMap.fromList
[ ( "వంద", 100 )
, ( "వెయ్యి", 1000 )
, ( "లక్ష", 100000 )
, ( "కోటి", 10000000 )
]
rulehundreds :: Rule
rulehundreds = Rule
{ name = "integer (100,1000,100000,10000000)"
, pattern =
[ regex "(వంద|వెయ్యి|లక్ష|కోటి)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) hundredsMap >>= integer
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleTelugu
, ruleZeroToNine
, ruleTenToNineteen
, ruleTens
, rulehundreds
]