in Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_date.py [0:0]
def number_with_month(self, source: str, reference: datetime) -> []:
from .utilities import DateUtils
ret: List[Token] = list()
extract_results = self.config.ordinal_extractor.extract(source)
extract_results.extend(self.config.integer_extractor.extract(source))
for result in extract_results:
num = int(self.config.number_parser.parse(result).value)
if num < 1 or num > 31:
continue
if result.start >= 0:
front_string = source[0:result.start or 0]
# Check that the extracted number is not part of a decimal number, time expression or currency
# (e.g. '123.24', '12:24', '$12')
if MatchingUtil.is_invalid_day_number_prefix(front_string):
continue
match = regex.search(self.config.month_end, front_string)
if match is not None:
start_index = match.start()
result_length = result.length if result.length else 0
end_index = match.start() + len(match.group()) + result_length
start_index, end_index = self.extend_with_week_day_and_year(
start_index, end_index, self.config.month_of_year[str(RegExpUtility.get_group(
match, Constants.MONTH_GROUP_NAME)).lower()], num, source, reference)
ret.append(
Token(match.start(), end_index))
continue
# handling cases like 'for the 25th'
matches = regex.finditer(self.config.for_the_regex, source)
is_found = False
for match_case in matches:
if match_case is not None:
ordinal_num = RegExpUtility.get_group(
match_case, Constants.DAY_OF_MONTH)
if ordinal_num == result.text:
length = len(
RegExpUtility.get_group(match_case, TimeTypeConstants.END))
ret.append(Token(match_case.start(),
match_case.end() - length))
is_found = True
if is_found:
continue
# handling cases like 'Thursday the 21st', which both 'Thursday' and '21st' refer to a same date
matches = regex.finditer(
self.config.week_day_and_day_of_month_regex, source)
for match_case in matches:
if match_case is not None:
ordinal_num = RegExpUtility.get_group(
match_case, Constants.DAY_OF_MONTH)
if ordinal_num == result.text:
month = reference.month
year = reference.year
# get week of day for the ordinal number which is regarded as a date of reference month
date = DateUtils.safe_create_from_min_value(
year, month, num)
num_week_day_str: str = calendar.day_name[date.weekday()].lower(
)
# get week day from text directly, compare it with the weekday generated above
# to see whether they refer to a same week day
extracted_week_day_str = RegExpUtility.get_group(
match_case, 'weekday').lower()
if (date != DateUtils.min_value and
self.config.day_of_week[num_week_day_str] ==
self.config.day_of_week[extracted_week_day_str]):
ret.append(
Token(match_case.start(), match_case.end()))
is_found = True
if is_found:
continue
# Handling cases like 'Monday 21', which both 'Monday' and '21' refer to the same date
# The year of expected date can be different to the year of referenceDate.
matches = regex.finditer(self.config.week_day_and_day_regex, source)
for match_case in matches:
if match_case:
match_length = result.start + result.length - match_case.start()
if match_length == match_case.start():
ret.append(Token(match_case.start(), match_case.end()))
is_found = True
if is_found:
continue
# handling cases like '20th of next month'
suffix_str: str = source[result.start + result.length:].lower()
match = regex.match(
self.config.relative_month_regex, suffix_str.strip())
space_len = len(suffix_str) - len(suffix_str.strip())
if match is not None and match.start() == 0:
space_len = len(suffix_str) - len(suffix_str.strip())
res_start = result.start
res_end = res_start + result.length + space_len + len(match.group())
# Check if prefix contains 'the', include it if any
prefix = source[: res_start or 0]
prefix_match = self.config.prefix_article_regex.match(prefix)
if prefix_match:
res_start = prefix_match.start()
ret.append(
Token(res_start, res_end))
# handling cases like 'second Sunday'
suffix_str = source[result.start + result.length:]
match = regex.match(
self.config.week_day_regex, suffix_str.strip())
if (match is not None and match.start() == 0 and 1 <= num <= 5 and
result.type == NumberConstants.SYS_NUM_ORDINAL):
week_day_str = RegExpUtility.get_group(match, Constants.WEEKDAY_GROUP_NAME).lower()
if week_day_str in self.config.day_of_week:
ret.append(
Token(result.start, result.start + result.length + space_len + len(match.group())))
# For cases like "I'll go back twenty second of June"
if result.start + result.length < len(source):
after_string = source[result.start + result.length:]
match = regex.match(self.config.of_month, after_string)
if match is not None:
start_index = result.start if result.start else 0
result_length = result.length if result.length else 0
end_index = (start_index + result_length) + len(match.group())
self.extend_with_week_day_and_year(start_index, end_index,
self.config.month_of_year[RegExpUtility.get_group(
match, Constants.MONTH_GROUP_NAME).lower() or str(
reference.month)], num, source, reference)
ret.append(Token(start_index, start_index +
result.length + len(match.group())))
return ret