Duckling/Numeral/ES/Rules.hs (326 lines of code) (raw):
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}
module Duckling.Numeral.ES.Rules (rules) where
import qualified Data.HashMap.Strict as HashMap
import Data.Maybe
import Data.String
import qualified Data.Text as Text
import Prelude
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData(..))
import qualified Duckling.Numeral.Types as TNumeral
import Duckling.Regex.Types
import Duckling.Types
zeroToFifteenMap :: HashMap.HashMap Text.Text Integer
zeroToFifteenMap =
HashMap.fromList
[ ("zero", 0)
, ("cero", 0)
, ("un", 1)
, ("una", 1)
, ("uno", 1)
, ("dos", 2)
, ("trés", 3)
, ("tres", 3)
, ("cuatro", 4)
, ("cinco", 5)
, ("seis", 6)
, ("séis", 6)
, ("siete", 7)
, ("ocho", 8)
, ("nueve", 9)
, ("diez", 10)
, ("dies", 10)
, ("once", 11)
, ("doce", 12)
, ("trece", 13)
, ("catorce", 14)
, ("quince", 15)
]
ruleNumeralZeroToFifteen :: Rule
ruleNumeralZeroToFifteen = Rule
{ name = "number (0..15)"
, pattern =
[ regex
"((c|z)ero|un(o|a)?|dos|tr(é|e)s|cuatro|cinco|s(e|é)is|siete|ocho|nueve|die(z|s)|once|doce|trece|catorce|quince)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match : _)) : _) ->
HashMap.lookup (Text.toLower match) zeroToFifteenMap >>= integer
_ -> Nothing
}
ruleBelowTenWithTwoDigits :: Rule
ruleBelowTenWithTwoDigits = Rule
{
name = "integer (0-9) with two digits"
, pattern =
[ regex "((c|z)ero)|0"
, Predicate $ numberBetween 1 10
]
, prod = \case
(
_:
Token Numeral NumeralData { TNumeral.value = v }:
_
) -> double v
_ -> Nothing
}
sixteenToTwentyNineMap :: HashMap.HashMap Text.Text Integer
sixteenToTwentyNineMap =
HashMap.fromList
[ ("dieciseis", 16)
, ("diesiséis", 16)
, ("diesiseis", 16)
, ("dieciséis", 16)
, ("diecisiete", 17)
, ("dieciocho", 18)
, ("diecinueve", 19)
, ("veintiuno", 21)
, ("veintiuna", 21)
, ("veintidos", 22)
, ("veintidós", 22)
, ("veintitrés", 23)
, ("veintitres", 23)
, ("veinticuatro", 24)
, ("veinticinco", 25)
, ("veintiséis", 26)
, ("veintiseis", 26)
, ("veintisiete", 27)
, ("veintiocho", 28)
, ("veintinueve", 29)
]
ruleNumeralSixteenToTwentyNine :: Rule
ruleNumeralSixteenToTwentyNine = Rule
{ name = "number (16..19 21..29)"
, pattern =
[ regex
"(die(c|s)is(é|e)is|diecisiete|dieciocho|diecinueve|veintiun(o|a)|veintid(o|ó)s|veintitr(é|e)s|veinticuatro|veinticinco|veintis(é|e)is|veintisiete|veintiocho|veintinueve|treinta)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match : _)) : _) ->
HashMap.lookup (Text.toLower match) sixteenToTwentyNineMap >>= integer
_ -> Nothing
}
ruleNumeralSixteenToNineteenWithDiez :: Rule
ruleNumeralSixteenToNineteenWithDiez = Rule
{ name = "number (16..19, two words)"
, pattern =
[ numberWith TNumeral.value (== 10)
, regex "y"
, Predicate $ numberBetween 6 10
]
, prod = \case
(_ : _ : Token Numeral NumeralData { TNumeral.value = v } : _) ->
double $ 10 + v
_ -> Nothing
}
byTensMap :: HashMap.HashMap Text.Text Integer
byTensMap =
HashMap.fromList
[ ("veinte", 20)
, ("treinta", 30)
, ("cuarenta", 40)
, ("cincuenta", 50)
, ("sesenta", 60)
, ("setenta", 70)
, ("ochenta", 80)
, ("noventa", 90)
]
ruleNumeralTwentyToNinetyTens :: Rule
ruleNumeralTwentyToNinetyTens = Rule
{ name = "number (20..90)"
, pattern =
[ regex
"(veinte|treinta|cuarenta|cincuenta|sesenta|setenta|ochenta|noventa)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match : _)) : _) ->
HashMap.lookup (Text.toLower match) byTensMap >>= integer
_ -> Nothing
}
ruleNumeralTwentyOneToNinetyNine :: Rule
ruleNumeralTwentyOneToNinetyNine = Rule
{ name = "number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)"
, pattern =
[ oneOf [20, 30 .. 90]
, regex "y"
, Predicate $ numberBetween 1 10
]
, prod = \case
(Token Numeral NumeralData { TNumeral.value = v1 } : _ : Token Numeral NumeralData { TNumeral.value = v2 } : _) ->
double $ v1 + v2
_ -> Nothing
}
bigNumbersMap :: HashMap.HashMap Text.Text Integer
bigNumbersMap =
HashMap.fromList
[ ("cien", 100)
, ("cientos", 100)
, ("ciento", 100)
, ("doscientos", 200)
, ("trescientos", 300)
, ("cuatrocientos", 400)
, ("quinientos", 500)
, ("seiscientos", 600)
, ("setecientos", 700)
, ("ochocientos", 800)
, ("novecientos", 900)
, ("mil", 1000)
, ("millon", 1000000)
, ("millón", 1000000)
, ("un millon", 1000000)
, ("un millón", 1000000)
, ("millones", 1000000)
-- Note: billion and larger is ambiguous becaouse of long vs short scale
]
ruleBigNumeral :: Rule
ruleBigNumeral = Rule
{ name = "big number 100 to 1K"
, pattern =
[ regex
"(cien(to|tos)?|doscientos|trescientos|cuatrocientos|quinientos|seiscientos|setecientos|ochocientos|novecientos|(un )?mill(o|ó)n)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match : _)) : _) ->
HashMap.lookup (Text.toLower match) bigNumbersMap >>= integer
_ -> Nothing
}
ruleBigNumeralMultipliable :: Rule
ruleBigNumeralMultipliable = Rule
{ name = "1K or 1M in multipliable form"
, pattern =
[ regex
"(mil(lones)?)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match : _)) : _) ->
HashMap.lookup (Text.toLower match) bigNumbersMap >>= integer >>= withMultipliable
_ -> Nothing
}
ruleTwoPartHundreds :: Rule
ruleTwoPartHundreds = Rule
{ name = "2..9 cientos"
, pattern =
[ Predicate $ numberBetween 2 10
, regex "cientos"
]
, prod = \case
(Token Numeral NumeralData { TNumeral.value = v1 } : _ : _) ->
double $ 100 * v1
_ -> Nothing
}
ruleNumeralHundredsAndSmaller :: Rule
ruleNumeralHundredsAndSmaller = Rule
{ name = "<hundreds> 0..99"
, pattern =
[ numberWith TNumeral.value (TNumeral.isMultiple 100)
, Predicate $ numberBetween 0 100
]
, prod = \case
(Token Numeral NumeralData { TNumeral.value = v1 } : Token Numeral NumeralData { TNumeral.value = v2 } : _)
| v1 > 0 && v1 < 1000 -> double $ v1 + v2
_ -> Nothing
}
ruleNumeralMultiply :: Rule
ruleNumeralMultiply = Rule
{ name = "2..999 <multipliable>"
, pattern =
[ Predicate $ numberBetween 2 1000
, Predicate isMultipliable
]
, prod = \case
(Token Numeral NumeralData { TNumeral.value = v1 } : Token Numeral NumeralData { TNumeral.value = v2 } : _) ->
double $ v1 * v2
_ -> Nothing
}
ruleNumeralThousandsAnd :: Rule
ruleNumeralThousandsAnd = Rule
{ name = "<thousands> 0..999"
, pattern =
[ numberWith TNumeral.value (TNumeral.isMultiple 1000)
, Predicate $ numberBetween 0 999
]
, prod = \case
(Token Numeral NumeralData { TNumeral.value = v1 } : Token Numeral NumeralData { TNumeral.value = v2 } : _)
| 0 < v1 && v1 < 1000000 -> double $ v1 + v2
_ -> Nothing
}
ruleNumeralMillionsAnd :: Rule
ruleNumeralMillionsAnd = Rule
{ name = "<millions> 0..999999"
, pattern =
[ numberWith TNumeral.value (TNumeral.isMultiple 1000000)
, Predicate $ numberBetween 0 999999
]
, prod = \case
(Token Numeral NumeralData { TNumeral.value = v1 } : Token Numeral NumeralData { TNumeral.value = v2 } : _)
| 0 < v1 -> double $ v1 + v2
_ -> Nothing
}
ruleNumeralDotNumeral :: Rule
ruleNumeralDotNumeral = Rule
{ name = "number dot number"
, pattern = [dimension Numeral, regex "(co(n|ma)|punto)", Predicate $ not . hasGrain]
, prod = \case
(Token Numeral NumeralData { TNumeral.value = v1 } : _ : Token Numeral NumeralData { TNumeral.value = v2 } : _) ->
double $ v1 + decimalsToDouble v2
_ -> Nothing
}
ruleLeadingDotNumeral :: Rule
ruleLeadingDotNumeral = Rule
{ name = "dot number"
, pattern = [regex "coma|punto", Predicate $ not . hasGrain]
, prod = \case
(_:Token Numeral NumeralData{TNumeral.value = v}:_) ->
double $ decimalsToDouble v
_ -> Nothing
}
ruleNumeralsSuffixesKMG :: Rule
ruleNumeralsSuffixesKMG = Rule
{ name = "numbers suffixes (K, M, G)"
, pattern = [dimension Numeral, regex "([kmg])(?=[\\W\\$€]|$)"]
, prod = \case
(Token Numeral NumeralData { TNumeral.value = v } : Token RegexMatch (GroupMatch (match : _)) : _) ->
case Text.toLower match of
"k" -> double $ v * 1e3
"m" -> double $ v * 1e6
"g" -> double $ v * 1e9
_ -> Nothing
_ -> Nothing
}
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
ruleNumeralsPrefixWithNegativeOrMinus = Rule
{ name = "numbers prefix with -, negative or minus"
, pattern = [regex "-|menos|negativ(o|a)", Predicate isPositive]
, prod = \case
(_ : Token Numeral NumeralData { TNumeral.value = v } : _) ->
double $ v * (-1)
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleNumeralZeroToFifteen
, ruleBelowTenWithTwoDigits
, ruleNumeralSixteenToTwentyNine
, ruleNumeralSixteenToNineteenWithDiez
, ruleNumeralTwentyToNinetyTens
, ruleNumeralTwentyOneToNinetyNine
, ruleBigNumeral
, ruleBigNumeralMultipliable
, ruleNumeralMultiply
, ruleNumeralThousandsAnd
, ruleNumeralMillionsAnd
, ruleTwoPartHundreds
, ruleNumeralHundredsAndSmaller
, ruleNumeralDotNumeral
, ruleLeadingDotNumeral
, ruleNumeralsSuffixesKMG
, ruleNumeralsPrefixWithNegativeOrMinus
]