Duckling/Time/JA/Rules.hs (591 lines of code) (raw):
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Time.JA.Rules
( rules
) where
import Control.Monad (guard)
import Prelude
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Duration.Types (DurationData (..))
import Duckling.Numeral.Helpers (isNatural, parseInt)
import Duckling.Regex.Types
import Duckling.Time.Helpers
import Duckling.Types
import qualified Duckling.Duration.Types as TDuration
import qualified Duckling.Time.Types as TTime
import qualified Duckling.TimeGrain.Types as TG
ruleHHMM :: Rule
ruleHHMM = Rule
{ name = "hh:mm"
, pattern = [regex "((?:[01]?\\d)|(?:2[0-3]))[::. ]([0-5]\\d)"]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (hh:mm:_)):_) -> do
h <- parseInt hh
m <- parseInt mm
tt $ hourMinute (h /= 0 && h < 12) h m
_ -> Nothing
}
ruleHHMMKanji :: Rule
ruleHHMMKanji = Rule
{ name = "hh時mm分"
, pattern = [regex "((?:[01]?\\d)|(?:2[0-3]))時([0-5]\\d)分"]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (hh:mm:_)):_) -> do
h <- parseInt hh
m <- parseInt mm
tt $ hourMinute (h /= 0 && h < 12) h m
_ -> Nothing
}
ruleHHMMKanjiNumeral :: Rule
ruleHHMMKanjiNumeral = Rule
{ name = "hh時mm分"
, pattern =
[ Predicate isNatural
, regex "時"
, Predicate isNatural
, regex "分"
]
, prod = \tokens -> case tokens of
(hh:_:mm:_) -> do
h <- getIntValue hh
m <- getIntValue mm
guard (h > 0 && h < 25 && m < 60)
tt $ hourMinute (h /= 0 && h < 12) h m
_ -> Nothing
}
ruleTODLatent :: Rule
ruleTODLatent = Rule
{ name = "time-of-day (latent)"
, pattern =
[ Predicate $ isIntegerBetween 0 23
]
, prod = \tokens -> case tokens of
(token:_) -> do
n <- getIntValue token
tt $ mkLatent $ hour (n < 13) n
_ -> Nothing
}
ruleHHOClock :: Rule
ruleHHOClock = Rule
{ name = "<time-of-day> o'clock"
, pattern =
[ Predicate isATimeOfDay
, regex "時(ちょうど|きっかり|ぴったり)"
]
, prod = \tokens -> case tokens of
(Token Time td:_) -> tt $ notLatent td
_ -> Nothing
}
ruleTODAMPM :: Rule
ruleTODAMPM = Rule
{ name = "<time-of-day> am|pm"
, pattern =
[ Predicate isATimeOfDay
, regex "([ap])(\\s|\\.)?m?\\.?"
]
, prod = \tokens -> case tokens of
(Token Time td:Token RegexMatch (GroupMatch (ap:_)):_) ->
tt $ timeOfDayAMPM (Text.toLower ap == "a") td
_ -> Nothing
}
ruleTODAMPMKanji :: Rule
ruleTODAMPMKanji = Rule
{ name = "午後|午前 <time-of-day>"
, pattern =
[ regex "午(前|後)"
, Predicate isATimeOfDay
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (ap:_)):Token Time td:_) ->
tt $ timeOfDayAMPM (ap == "前") td
_ -> Nothing
}
ruleAtTOD :: Rule
ruleAtTOD = Rule
{ name = "at <time-of-day>"
, pattern =
[ Predicate isATimeOfDay
, regex "に|で|の"
]
, prod = \tokens -> case tokens of
(Token Time td:_) -> tt $ notLatent td
_ -> Nothing
}
ruleMMDD :: Rule
ruleMMDD = Rule
{ name = "mm/dd"
, pattern =
[ regex "(1[0-2]|0?[1-9])\\s?[//]\\s?(3[01]|[12]\\d|0?[1-9])"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (mm:dd:_)):_) -> do
m <- parseInt mm
d <- parseInt dd
tt $ monthDay m d
_ -> Nothing
}
ruleYYYYMM :: Rule
ruleYYYYMM = Rule
{ name = "yyyy/mm"
, pattern =
[ regex "(\\d{4})\\s*[//.]\\s*(1[0-2]|0?[1-9])"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (yy:mm:_)):_) -> do
y <- parseInt yy
m <- parseInt mm
tt $ yearMonth y m
_ -> Nothing
}
ruleYYYYMMDD :: Rule
ruleYYYYMMDD = Rule
{ name = "yyyy/mm/dd"
, pattern =
[ regex "(\\d{2,4})\\s*[//.]\\s*(0?[1-9]|1[0-2])\\s*[//.]\\s*(3[01]|[12]\\d|0?[1-9])"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (yy:mm:dd:_)):_) -> do
y <- parseInt yy
m <- parseInt mm
d <- parseInt dd
tt $ yearMonthDay y m d
_ -> Nothing
}
ruleMonthDOM :: Rule
ruleMonthDOM = Rule
{ name = "<named-month> <day-of-month>"
, pattern =
[ Predicate isAMonth
, Predicate isDOMValue
, regex "日"
]
, prod = \tokens -> case tokens of
(Token Time td:token:_) -> Token Time <$> intersectDOM td token
_ -> Nothing
}
ruleYearMonthDOM :: Rule
ruleYearMonthDOM = Rule
{ name = "<year> <named-month> <day-of-month>"
, pattern =
[ regex "(\\d{2,4})年"
, Predicate isAMonth
, Predicate isDOMValue
, regex "日"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):Token Time td:token:_) -> do
intVal <- parseInt match
dom <- intersectDOM td token
Token Time <$> intersect dom (year intVal)
_ -> Nothing
}
ruleImperialYearMonthDOM :: Rule
ruleImperialYearMonthDOM = Rule
{ name = "imperial <year> <named-month> <day-of-month>"
, pattern =
[ Predicate $ isGrainOfTime TG.Year
, Predicate isAMonth
, Predicate isDOMValue
, regex "日"
]
, prod = \tokens -> case tokens of
(Token Time year:Token Time td:token:_) -> do
dom <- intersectDOM td token
Token Time <$> intersect dom year
_ -> Nothing
}
ruleYearMonth :: Rule
ruleYearMonth = Rule
{ name = "<year> <named-month>"
, pattern =
[ Predicate $ isGrainOfTime TG.Year
, Predicate isAMonth
]
, prod = \tokens -> case tokens of
(Token Time year:Token Time month:_) ->
Token Time . notLatent <$> intersect year month
_ -> Nothing
}
ruleOnADOW :: Rule
ruleOnADOW = Rule
{ name = "on <named-day>"
, pattern =
[ regex "(\\(|()?"
, Predicate isADayOfWeek
, regex "(\\)|))?(に|は|で)"
]
, prod = \tokens -> case tokens of
(_:Token Time td:_) -> tt $ notLatent td
_ -> Nothing
}
ruleAbsorbOnDay :: Rule
ruleAbsorbOnDay = Rule
{ name = "on <day>"
, pattern =
[ Predicate $ isGrainOfTime TG.Day
, regex "に|は|で"
]
, prod = \tokens -> case tokens of
(Token Time td:_) -> tt $ notLatent td
_ -> Nothing
}
ruleOnDay :: Rule
ruleOnDay = Rule
{ name = "on the <day-of-month>"
, pattern =
[ Predicate isDOMValue
, regex "日(に|は|で)"
]
, prod = \tokens -> case tokens of
(token:_) -> do
n <- getIntValue token
tt $ dayOfMonth n
_ -> Nothing
}
ruleInNamedMonth :: Rule
ruleInNamedMonth = Rule
{ name = "in <named-month>"
, pattern =
[ Predicate isAMonth
, regex "(の間|中)?(に|は|で)"
]
, prod = \tokens -> case tokens of
(td2:_) -> Just td2
_ -> Nothing
}
ruleYearLatent :: Rule
ruleYearLatent = Rule
{ name = "year (latent)"
, pattern =
[ Predicate $ isIntegerBetween 25 10000
, regex "年"
]
, prod = \tokens -> case tokens of
(token:_) -> do
n <- getIntValue token
tt $ mkLatent $ year n
_ -> Nothing
}
ruleImperialYearLatent :: Rule
ruleImperialYearLatent = Rule
{ name = "Imperial year (latent)"
, pattern =
[ regex "(明治|大正|昭和|平成|令和)"
, Predicate $ isIntegerBetween 0 65
, regex "年"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):token:_) -> do
n <- getIntValue token
ry <- case Text.toLower match of
"明治" -> Just 1868
"大正" -> Just 1912
"昭和" -> Just 1926
"平成" -> Just 1989
"令和" -> Just 2019
_ -> Nothing
tt $ mkLatent $ year (ry+n-1)
_ -> Nothing
}
ruleFirstImperialYearLatent :: Rule
ruleFirstImperialYearLatent = Rule
{ name = "First Imperial year (latent)"
, pattern =
[ regex "(明治|大正|昭和|平成|令和)元年"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
ry <- case Text.toLower match of
"明治" -> Just 1868
"大正" -> Just 1912
"昭和" -> Just 1926
"平成" -> Just 1989
"令和" -> Just 2019
_ -> Nothing
tt $ mkLatent $ year ry
_ -> Nothing
}
ruleInYear :: Rule
ruleInYear = Rule
{ name = "in <year>"
, pattern =
[ Predicate $ isGrainOfTime TG.Year
, regex "に|は|で"
]
, prod = \tokens -> case tokens of
(Token Time td:_) -> tt $ notLatent td
_ -> Nothing
}
ruleInMonth :: Rule
ruleInMonth = Rule
{ name = "in <month>"
, pattern =
[ Predicate $ isGrainOfTime TG.Month
, regex "に"
]
, prod = \tokens -> case tokens of
(Token Time td:_) -> tt $ notLatent td
_ -> Nothing
}
ruleInDuration :: Rule
ruleInDuration = Rule
{ name = "in <duration>"
, pattern =
[ dimension Duration
, regex "後|(間)?で|経ったら|経ってから|経過後に|経過してから|経過したら|過ぎに|過ぎたら"
]
, prod = \tokens -> case tokens of
(Token Duration dd:_) ->
tt $ inDuration dd
_ -> Nothing
}
ruleInTheLastDuration :: Rule
ruleInTheLastDuration = Rule
{ name = "in/for the last <duration>"
, pattern =
[ regex "この"
, Predicate $ isDurationGreaterThan TG.Hour
]
, prod = \tokens -> case tokens of
(_:Token Duration dd:_) ->
Token Time <$> interval TTime.Closed (durationBefore dd now) now
_ -> Nothing
}
ruleIntervalFromTime :: Rule
ruleIntervalFromTime = Rule
{ name = "from <time>"
, pattern =
[ dimension Time
, regex "(初め)?以(降|来)に?|~|よりも?後|から"
]
, prod = \tokens -> case tokens of
(Token Time td:_) -> tt $ withDirection TTime.After $ notLatent td
_ -> Nothing
}
ruleIntervalUntilTime :: Rule
ruleIntervalUntilTime = Rule
{ name = "until <time>"
, pattern =
[ dimension Time
, regex "までに?|よりも?前|以前"
]
, prod = \tokens -> case tokens of
(Token Time td:_) -> tt $ withDirection TTime.Before $ notLatent td
_ -> Nothing
}
ruleIntervalFromToTime :: Rule
ruleIntervalFromToTime = Rule
{ name = "from <time> to <time>"
, pattern =
[ dimension Time
, regex "(初め)?以(降|来)に?|~|よりも?後?|(初め|頭)?から|以降"
, dimension Time
, regex "末?まで|(より|以)前|の間|にかけて|いっぱい"
]
, prod = \tokens -> case tokens of
(Token Time td1:_:Token Time td2:_) ->
Token Time <$> interval TTime.Closed td1 td2
_ -> Nothing
}
ruleIntervalTimes :: Rule
ruleIntervalTimes = Rule
{ name = "<time> - <time>"
, pattern =
[ dimension Time
, regex "-|~|~|・|、|から"
, dimension Time
]
, prod = \tokens -> case tokens of
(Token Time td1:_:Token Time td2:_) ->
Token Time <$> interval TTime.Closed td1 td2
_ -> Nothing
}
ruleIntervalYear :: Rule
ruleIntervalYear = Rule
{ name = "<year> - <year>"
, pattern =
[ Predicate $ isIntegerBetween 1000 10000
, regex "-|~|~|・|、|から"
, Predicate $ isIntegerBetween 1000 10000
, regex "年"
]
, prod = \tokens -> case tokens of
(t1:_:t2:_) -> do
y1 <- getIntValue t1
y2 <- getIntValue t2
guard (y1 < y2)
Token Time <$> interval TTime.Closed (year y1) (year y2)
_ -> Nothing
}
ruleWeekend :: Rule
ruleWeekend = Rule
{ name = "week-end"
, pattern =
[ regex "週末|しゅうまつ"
]
, prod = \_ -> tt $ mkOkForThisNext weekend
}
ruleNow :: Rule
ruleNow = Rule
{ name = "now"
, pattern =
[ regex "今すぐ|いま|即|ただ(いま|ちに)"
]
, prod = \_ -> tt now
}
ruleThisTimeGrain :: Rule
ruleThisTimeGrain = Rule
{ name = "this|current <time-grain>"
, pattern =
[ regex "現在の|(今|こ(ん|の)|当|現)(1|一)?"
, dimension TimeGrain
]
, prod = \tokens -> case tokens of
( _:
Token TimeGrain grain:
_) -> tt $ cycleNth grain 0
_ -> Nothing
}
ruleNextTimeGrain :: Rule
ruleNextTimeGrain = Rule
{ name = "next <time-grain>"
, pattern =
[ regex "来"
, dimension TimeGrain
]
, prod = \tokens -> case tokens of
( _:
Token TimeGrain grain:
_) -> tt $ cycleNth grain 1
_ -> Nothing
}
ruleLastTimeGrain :: Rule
ruleLastTimeGrain = Rule
{ name = "last <time-grain>"
, pattern =
[ regex "(前|直近|最後|昨|先)の?(1|一)?"
, dimension TimeGrain
]
, prod = \tokens -> case tokens of
( _:
Token TimeGrain grain:
_) -> tt $ cycleNth grain $ - 1
_ -> Nothing
}
ruleDurationLastNext :: Rule
ruleDurationLastNext = Rule
{ name = "last|past|next <duration>"
, pattern =
[ regex "(過去|直在|次の)"
, dimension Duration
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
Token Duration DurationData{TDuration.grain, TDuration.value}:
_) -> case Text.toLower match of
"次の" -> tt $ cycleN True grain value
"過去" -> tt $ cycleN True grain (- value)
"直在" -> tt $ cycleN True grain (- value)
_ -> Nothing
_ -> Nothing
}
ruleInstants :: [Rule]
ruleInstants = mkRuleInstants
[ ("today" , TG.Day , 0 , "今日(1日)?|きょう|本日|ほんじつ" )
, ("tomorrow" , TG.Day , 1 , "明日|あした|あす|みょうにち" )
, ("yesterday" , TG.Day , -1 , "(前|昨)日|きのう|さくじつ" )
, ("next week" , TG.Week , 1 , "らいしゅう" )
, ("last week" , TG.Week , -1 , "せんしゅう" )
, ("a week after next", TG.Week , 2 , "再来週|さらいしゅう" )
, ("this month" , TG.Month , 0 , "このひと月" )
, ("this quarter" , TG.Quarter, 0 , "本四半期|今の四半期|現行四半期" )
, ("this year" , TG.Year , 0 , "本年" )
, ("last year" , TG.Year , -1 , "去年" )
]
ruleDaysOfWeek :: [Rule]
ruleDaysOfWeek = mkRuleDaysOfWeek
[ ( "Monday" , "月曜日?|げつようび" )
, ( "Tuesday" , "火曜日?|かようび" )
, ( "Wednesday", "水曜日?|すいようび" )
, ( "Thursday" , "木曜日?|もくようび" )
, ( "Friday" , "金曜日?|きんようび" )
, ( "Saturday" , "土曜日?|どようび" )
, ( "Sunday" , "日曜日?|にちようび" )
]
ruleDaysOfWeekLatent :: [Rule]
ruleDaysOfWeekLatent = mkRuleDaysOfWeekLatent
[ ( "Monday" , "月|げつよう" )
, ( "Tuesday" , "火|かよう" )
, ( "Wednesday", "水|すいよう" )
, ( "Thursday" , "木|もくよう" )
, ( "Friday" , "金|きんよう" )
, ( "Saturday" , "土|どよう" )
, ( "Sunday" , "日|にちよう" )
]
ruleMonths :: [Rule]
ruleMonths = mkRuleMonths
[ ( "January" , "(1|一|1)(月|がつ)|いちがつ" )
, ( "February" , "(2|二|2)(月|がつ)|にがつ" )
, ( "March" , "(3|三|3)(月|がつ)|さんがつ" )
, ( "April" , "(4|四|4)(月|がつ)|しがつ" )
, ( "May" , "(5|五|5)(月|がつ)|ごがつ" )
, ( "June" , "(6|六|6)(月|がつ)|ろくがつ" )
, ( "July" , "(7|七|7)(月|がつ)|しちがつ" )
, ( "August" , "(8|八|8)(月|がつ)|はちがつ" )
, ( "September", "(9|九|9)(月|がつ)|くがつ" )
, ( "October" , "(10|十|10)(月|がつ)|じゅうがつ" )
, ( "November" , "(11|十一|11)(月|がつ)|じゅういちがつ" )
, ( "December" , "(12|十二|12)(月|がつ)|じゅうにがつ" )
]
rules :: [Rule]
rules =
[ ruleHHMM
, ruleHHMMKanji
, ruleHHMMKanjiNumeral
, ruleTODLatent
, ruleHHOClock
, ruleTODAMPM
, ruleTODAMPMKanji
, ruleAtTOD
, ruleMMDD
, ruleYYYYMM
, ruleYYYYMMDD
, ruleYearLatent
, ruleImperialYearLatent
, ruleFirstImperialYearLatent
, ruleMonthDOM
, ruleYearMonth
, ruleYearMonthDOM
, ruleImperialYearMonthDOM
, ruleOnADOW
, ruleAbsorbOnDay
, ruleOnDay
, ruleInNamedMonth
, ruleInYear
, ruleInMonth
, ruleInDuration
, ruleInTheLastDuration
, ruleIntervalFromTime
, ruleIntervalUntilTime
, ruleIntervalFromToTime
, ruleIntervalTimes
, ruleIntervalYear
, ruleWeekend
, ruleNow
, ruleThisTimeGrain
, ruleNextTimeGrain
, ruleLastTimeGrain
, ruleDurationLastNext
]
++ ruleDaysOfWeek
++ ruleDaysOfWeekLatent
++ ruleMonths
++ ruleInstants