in Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_datetimeperiod.py [0:0]
def match_time_of_day(self, source: str, reference: datetime, date_extract_results: [ExtractResult] = None):
tokens = []
result = []
matches = list(regex.finditer(self.config.specific_time_of_day_regex, source))
for match in matches:
tokens.append(Token(match.start(), match.end()))
# Date followed by morning, afternoon or morning, afternoon followed by Date
if len(date_extract_results) == 0:
return tokens
for extracted_result in date_extract_results:
after_str = source[extracted_result.start + extracted_result.length:]
match = regex.search(self.config.period_time_of_day_with_date_regex, after_str)
if match:
# For cases like "Friday afternoon between 1PM and 4PM" which "Friday afternoon" need to be
# extracted first
match_start = match.start()
if not after_str[0:match.start()] or after_str[0:match.start()].isspace():
start = extracted_result.start
end = extracted_result.start + extracted_result.length + len(
RegExpUtility.get_group(match, Constants.TIME_OF_DAY_GROUP_NAME)) + \
match.start(Constants.TIME_OF_DAY_GROUP_NAME)
tokens.append(Token(start, end))
break
connector_str = after_str[0:match.start()]
# Trim here is set to false as the Regex might catch white spaces before or after the text
if RegExpUtility.is_exact_match(self.config.middle_pause_regex, connector_str, False):
suffix = after_str[match.end():].strip()
ending_match = regex.search(self.config.general_ending_regex, suffix)
if ending_match:
tokens.append(Token(extracted_result.start, extracted_result.start +
extracted_result.length + match.end()))
if not match:
match = regex.search(self.config.am_desc_regex, after_str)
if not match or after_str[0:match.start()]:
match = regex.search(self.config.pm_desc_regex, after_str)
if match:
if not after_str[0:match.start()]:
tokens.append(Token(extracted_result.start, extracted_result.end + match.end()))
prefix_str = source[0: extracted_result.start]
match = regex.search(self.config.period_time_of_day_with_date_regex, prefix_str)
if match:
if not prefix_str[match.end():] or prefix_str[match.end():].isspace():
mid_str = source[match.end(): extracted_result.start]
if mid_str and mid_str.isspace():
tokens.append(Token(match.start(), extracted_result.start + extracted_result.length))
m_start = match.start()
else:
connector_str = prefix_str[match.end():]
# Trim here is set to false as the Regex might catch white spaces before or after the text
if RegExpUtility.is_exact_match(self.config.middle_pause_regex, connector_str, False):
suffix = source[extracted_result.start + extracted_result.length:].strip(' ')
ending_match = self.config.general_ending_regex.match(suffix)
if ending_match:
tokens.append(Token(match.start(), extracted_result.start + extracted_result.length))
result = list(tokens)
# Check whether there are adjacent time period strings, before or after
for token in result:
# Try to extract a time period in before-string
if token.start > 0:
before_str = source[0:token.start]
if before_str:
time_extract_results = self.config.time_period_extractor.extract(before_str)
if len(time_extract_results) > 0:
for time_period in time_extract_results:
mid_str = before_str[time_period.start + time_period.length:]
if (not mid_str or mid_str.isspace()) and not time_period.meta_data:
tokens.append(Token(time_period.start, time_period.start + time_period.length +
len(mid_str) + token.length))
# Try to extract a time period in after-string
if token.end <= len(source):
after_str = source[token.start + token.length:]
if after_str:
time_extract_results = self.config.time_period_extractor.extract(after_str)
if len(time_extract_results) > 0:
for time_period in time_extract_results:
mid_str = after_str[0:time_period.start]
if (not mid_str or mid_str.isspace()) and not time_period.meta_data:
tokens.append(Token(token.start, token.end + len(mid_str) + time_period.length))
return tokens