in hgext/pushlog/parsedatetime/__init__.py [0:0]
def __init__(self, localeID=None, usePyICU=True,
fallbackLocales=['en_US']):
self.localeID = localeID
self.fallbackLocales = fallbackLocales[:]
if 'en_US' not in self.fallbackLocales:
self.fallbackLocales.append('en_US')
# define non-locale specific constants
self.locale = None
self.usePyICU = usePyICU
# starting cache of leap years
# daysInMonth will add to this if during
# runtime it gets a request for a year not found
self._leapYears = list(range(1904, 2097, 4))
self.Second = 1
self.Minute = 60 # 60 * self.Second
self.Hour = 3600 # 60 * self.Minute
self.Day = 86400 # 24 * self.Hour
self.Week = 604800 # 7 * self.Day
self.Month = 2592000 # 30 * self.Day
self.Year = 31536000 # 365 * self.Day
self._DaysInMonthList = (31, 28, 31, 30, 31, 30,
31, 31, 30, 31, 30, 31)
self.rangeSep = '-'
self.BirthdayEpoch = 50
# When True the starting time for all relative calculations will come
# from the given SourceTime, otherwise it will be 9am
self.StartTimeFromSourceTime = False
# YearParseStyle controls how we parse "Jun 12", i.e. dates that do
# not have a year present. The default is to compare the date given
# to the current date, and if prior, then assume the next year.
# Setting this to 0 will prevent that.
self.YearParseStyle = 1
# DOWParseStyle controls how we parse "Tuesday"
# If the current day was Thursday and the text to parse is "Tuesday"
# then the following table shows how each style would be returned
# -1, 0, +1
#
# Current day marked as ***
#
# Sun Mon Tue Wed Thu Fri Sat
# week -1
# current -1,0 ***
# week +1 +1
#
# If the current day was Monday and the text to parse is "Tuesday"
# then the following table shows how each style would be returned
# -1, 0, +1
#
# Sun Mon Tue Wed Thu Fri Sat
# week -1 -1
# current *** 0,+1
# week +1
self.DOWParseStyle = 1
# CurrentDOWParseStyle controls how we parse "Friday"
# If the current day was Friday and the text to parse is "Friday"
# then the following table shows how each style would be returned
# True/False. This also depends on DOWParseStyle.
#
# Current day marked as ***
#
# DOWParseStyle = 0
# Sun Mon Tue Wed Thu Fri Sat
# week -1
# current T,F
# week +1
#
# DOWParseStyle = -1
# Sun Mon Tue Wed Thu Fri Sat
# week -1 F
# current T
# week +1
#
# DOWParseStyle = +1
#
# Sun Mon Tue Wed Thu Fri Sat
# week -1
# current T
# week +1 F
self.CurrentDOWParseStyle = False
if self.usePyICU:
self.locale = get_icu(self.localeID)
if self.locale.icu is None:
self.usePyICU = False
self.locale = None
if self.locale is None:
if self.localeID not in pdtLocales:
for localeId in range(0, len(self.fallbackLocales)):
self.localeID = self.fallbackLocales[localeId]
if self.localeID in pdtLocales:
break
self.locale = pdtLocales[self.localeID]
if self.locale is not None:
def _getLocaleDataAdjusted(localeData):
"""
If localeData is defined as ["mon|mnd", 'tu|tues'...] then this
function splits those definitions on |
"""
adjusted = []
for d in localeData:
if '|' in d:
adjusted += d.split("|")
else:
adjusted.append(d)
return adjusted
def re_join(g):
return '|'.join(re.escape(i) for i in g)
mths = _getLocaleDataAdjusted(self.locale.Months)
smths = _getLocaleDataAdjusted(self.locale.shortMonths)
swds = _getLocaleDataAdjusted(self.locale.shortWeekdays)
wds = _getLocaleDataAdjusted(self.locale.Weekdays)
# escape any regex special characters that may be found
self.locale.re_values['months'] = re_join(mths)
self.locale.re_values['shortmonths'] = re_join(smths)
self.locale.re_values['days'] = re_join(wds)
self.locale.re_values['shortdays'] = re_join(swds)
self.locale.re_values['dayoffsets'] = \
re_join(self.locale.dayOffsets)
self.locale.re_values['numbers'] = \
re_join(self.locale.numbers)
self.locale.re_values['decimal_mark'] = \
re.escape(self.locale.decimal_mark)
units = [unit for units in self.locale.units.values()
for unit in units] # flatten
units.sort(key=len, reverse=True) # longest first
self.locale.re_values['units'] = re_join(units)
self.locale.re_values['modifiers'] = re_join(self.locale.Modifiers)
self.locale.re_values['sources'] = re_join(self.locale.re_sources)
# For distinguishing numeric dates from times, look for timeSep
# and meridian, if specified in the locale
self.locale.re_values['timecomponents'] = \
re_join(self.locale.timeSep + self.locale.meridian)
# build weekday offsets - yes, it assumes the Weekday and
# shortWeekday lists are in the same order and Mon..Sun
# (Python style)
def _buildOffsets(offsetDict, localeData, indexStart):
o = indexStart
for key in localeData:
if '|' in key:
for k in key.split('|'):
offsetDict[k] = o
else:
offsetDict[key] = o
o += 1
_buildOffsets(self.locale.WeekdayOffsets,
self.locale.Weekdays, 0)
_buildOffsets(self.locale.WeekdayOffsets,
self.locale.shortWeekdays, 0)
# build month offsets - yes, it assumes the Months and shortMonths
# lists are in the same order and Jan..Dec
_buildOffsets(self.locale.MonthOffsets,
self.locale.Months, 1)
_buildOffsets(self.locale.MonthOffsets,
self.locale.shortMonths, 1)
_initSymbols(self)
# TODO: add code to parse the date formats and build the regexes up
# from sub-parts, find all hard-coded uses of date/time separators
# not being used in code, but kept in case others are manually
# utilizing this regex for their own purposes
self.RE_DATE4 = r'''(?P<date>
(
(
(?P<day>\d\d?)
(?P<suffix>{daysuffix})?
(,)?
(\s)*
)
(?P<mthname>
\b({months}|{shortmonths})\b
)\s*
(?P<year>\d\d
(\d\d)?
)?
)
)'''.format(**self.locale.re_values)
# still not completely sure of the behavior of the regex and
# whether it would be best to consume all possible irrelevant
# characters before the option groups (but within the {1,3} repetition
# group or inside of each option group, as it currently does
# however, right now, all tests are passing that were,
# including fixing the bug of matching a 4-digit year as ddyy
# when the day is absent from the string
self.RE_DATE3 = r'''(?P<date>
(?:
(?:^|\s+)
(?P<mthname>
{months}|{shortmonths}
)\b
|
(?:^|\s+)
(?P<day>[1-9]|[012]\d|3[01])
(?P<suffix>{daysuffix}|)\b
(?!\s*(?:{timecomponents}))
|
,?\s+
(?P<year>\d\d(?:\d\d|))\b
(?!\s*(?:{timecomponents}))
){{1,3}}
(?(mthname)|$-^)
)'''.format(**self.locale.re_values)
# not being used in code, but kept in case others are manually
# utilizing this regex for their own purposes
self.RE_MONTH = r'''(\s+|^)
(?P<month>
(
(?P<mthname>
\b({months}|{shortmonths})\b
)
(\s*
(?P<year>(\d{{4}}))
)?
)
)
(?=\s+|$|[^\w])'''.format(**self.locale.re_values)
self.RE_WEEKDAY = r'''\b
(?:
{days}|{shortdays}
)
\b'''.format(**self.locale.re_values)
self.RE_NUMBER = (r'(\b(?:{numbers})\b|\d+(?:{decimal_mark}\d+|))'
.format(**self.locale.re_values))
self.RE_SPECIAL = (r'(?P<special>^[{specials}]+)\s+'
.format(**self.locale.re_values))
self.RE_UNITS_ONLY = (r'''\b({units})\b'''
.format(**self.locale.re_values))
self.RE_UNITS = r'''\b(?P<qty>
-?
(?:\d+(?:{decimal_mark}\d+|)|(?:{numbers})\b)\s*
(?P<units>{units})
)\b'''.format(**self.locale.re_values)
self.RE_QUNITS = r'''\b(?P<qty>
-?
(?:\d+(?:{decimal_mark}\d+|)|(?:{numbers})\s+)\s*
(?P<qunits>{qunits})
)\b'''.format(**self.locale.re_values)
self.RE_MODIFIER = r'''\b(?:
{modifiers}
)\b'''.format(**self.locale.re_values)
self.RE_TIMEHMS = r'''([\s(\["'-]|^)
(?P<hours>\d\d?)
(?P<tsep>{timeseparator}|)
(?P<minutes>\d\d)
(?:(?P=tsep)
(?P<seconds>\d\d
(?:[\.,]\d+)?
)
)?\b'''.format(**self.locale.re_values)
self.RE_TIMEHMS2 = r'''([\s(\["'-]|^)
(?P<hours>\d\d?)
(?:
(?P<tsep>{timeseparator}|)
(?P<minutes>\d\d?)
(?:(?P=tsep)
(?P<seconds>\d\d?
(?:[\.,]\d+)?
)
)?
)?'''.format(**self.locale.re_values)
# 1, 2, and 3 here refer to the type of match date, time, or units
self.RE_NLP_PREFIX = r'''\b(?P<nlp_prefix>
(on)
(\s)+1
|
(at|in)
(\s)+2
|
(in)
(\s)+3
)'''
if 'meridian' in self.locale.re_values:
self.RE_TIMEHMS2 += (r'\s*(?P<meridian>{meridian})\b'
.format(**self.locale.re_values))
else:
self.RE_TIMEHMS2 += r'\b'
# Always support common . and - separators
dateSeps = ''.join(re.escape(s)
for s in self.locale.dateSep + ['-', '.'])
self.RE_DATE = r'''([\s(\["'-]|^)
(?P<date>
\d\d?[{0}]\d\d?(?:[{0}]\d\d(?:\d\d)?)?
|
\d{{4}}[{0}]\d\d?[{0}]\d\d?
)
\b'''.format(dateSeps)
self.RE_DATE2 = r'[{0}]'.format(dateSeps)
assert 'dayoffsets' in self.locale.re_values
self.RE_DAY = r'''\b
(?:
{dayoffsets}
)
\b'''.format(**self.locale.re_values)
self.RE_DAY2 = r'''(?P<day>\d\d?)
(?P<suffix>{daysuffix})?
'''.format(**self.locale.re_values)
self.RE_TIME = r'''\b
(?:
{sources}
)
\b'''.format(**self.locale.re_values)
self.RE_REMAINING = r'\s+'
# Regex for date/time ranges
self.RE_RTIMEHMS = r'''(\s*|^)
(\d\d?){timeseparator}
(\d\d)
({timeseparator}(\d\d))?
(\s*|$)'''.format(**self.locale.re_values)
self.RE_RTIMEHMS2 = (r'''(\s*|^)
(\d\d?)
({timeseparator}(\d\d?))?
({timeseparator}(\d\d?))?'''
.format(**self.locale.re_values))
if 'meridian' in self.locale.re_values:
self.RE_RTIMEHMS2 += (r'\s*({meridian})'
.format(**self.locale.re_values))
self.RE_RDATE = r'(\d+([%s]\d+)+)' % dateSeps
self.RE_RDATE3 = r'''(
(
(
\b({months})\b
)\s*
(
(\d\d?)
(\s?|{daysuffix}|$)+
)?
(,\s*\d{{4}})?
)
)'''.format(**self.locale.re_values)
# "06/07/06 - 08/09/06"
self.DATERNG1 = (r'{0}\s*{rangeseparator}\s*{0}'
.format(self.RE_RDATE, **self.locale.re_values))
# "march 31 - june 1st, 2006"
self.DATERNG2 = (r'{0}\s*{rangeseparator}\s*{0}'
.format(self.RE_RDATE3, **self.locale.re_values))
# "march 1rd -13th"
self.DATERNG3 = (r'{0}\s*{rangeseparator}\s*(\d\d?)\s*(rd|st|nd|th)?'
.format(self.RE_RDATE3, **self.locale.re_values))
# "4:00:55 pm - 5:90:44 am", '4p-5p'
self.TIMERNG1 = (r'{0}\s*{rangeseparator}\s*{0}'
.format(self.RE_RTIMEHMS2, **self.locale.re_values))
self.TIMERNG2 = (r'{0}\s*{rangeseparator}\s*{0}'
.format(self.RE_RTIMEHMS, **self.locale.re_values))
# "4-5pm "
self.TIMERNG3 = (r'\d\d?\s*{rangeseparator}\s*{0}'
.format(self.RE_RTIMEHMS2, **self.locale.re_values))
# "4:30-5pm "
self.TIMERNG4 = (r'{0}\s*{rangeseparator}\s*{1}'
.format(self.RE_RTIMEHMS, self.RE_RTIMEHMS2,
**self.locale.re_values))
self.re_option = re.IGNORECASE + re.VERBOSE
self.cre_source = {'CRE_SPECIAL': self.RE_SPECIAL,
'CRE_NUMBER': self.RE_NUMBER,
'CRE_UNITS': self.RE_UNITS,
'CRE_UNITS_ONLY': self.RE_UNITS_ONLY,
'CRE_QUNITS': self.RE_QUNITS,
'CRE_MODIFIER': self.RE_MODIFIER,
'CRE_TIMEHMS': self.RE_TIMEHMS,
'CRE_TIMEHMS2': self.RE_TIMEHMS2,
'CRE_DATE': self.RE_DATE,
'CRE_DATE2': self.RE_DATE2,
'CRE_DATE3': self.RE_DATE3,
'CRE_DATE4': self.RE_DATE4,
'CRE_MONTH': self.RE_MONTH,
'CRE_WEEKDAY': self.RE_WEEKDAY,
'CRE_DAY': self.RE_DAY,
'CRE_DAY2': self.RE_DAY2,
'CRE_TIME': self.RE_TIME,
'CRE_REMAINING': self.RE_REMAINING,
'CRE_RTIMEHMS': self.RE_RTIMEHMS,
'CRE_RTIMEHMS2': self.RE_RTIMEHMS2,
'CRE_RDATE': self.RE_RDATE,
'CRE_RDATE3': self.RE_RDATE3,
'CRE_TIMERNG1': self.TIMERNG1,
'CRE_TIMERNG2': self.TIMERNG2,
'CRE_TIMERNG3': self.TIMERNG3,
'CRE_TIMERNG4': self.TIMERNG4,
'CRE_DATERNG1': self.DATERNG1,
'CRE_DATERNG2': self.DATERNG2,
'CRE_DATERNG3': self.DATERNG3,
'CRE_NLP_PREFIX': self.RE_NLP_PREFIX}
self.cre_keys = set(self.cre_source.keys())