in Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_datetime.py [0:0]
def merge_date_and_time(self, source: str, reference: datetime) -> List[Token]:
tokens: List[Token] = list()
date_ers: List[ExtractResult] = self.config.date_point_extractor.extract(
source, reference)
if not date_ers:
return tokens
time_ers = self.config.time_point_extractor.extract(source, reference)
time_num_matches = self.config.number_as_time_regex.match(source)
if len(time_ers) == 0 and time_num_matches == 0:
return tokens
extract_results = date_ers
extract_results.extend(time_ers)
# handle cases which use numbers as time points
# only enabled in CalendarMode
if (self.config.options & DateTimeOptions.CALENDAR) != 0:
num_ers = []
idx = 0
for idx in range(idx, len(time_num_matches), 1):
match = time_num_matches[idx]
node = ExtractResult()
node.start = source.index(match.group())
node.length = len(match.group())
node.text = match.text
node.type = NumConstants.SYS_NUM_INTEGER
num_ers.append(node)
extract_results.extend(num_ers)
extract_results = sorted(extract_results, key=lambda x: x.start)
i = 0
while i < len(extract_results) - 1:
j = i + 1
while j < len(extract_results) and extract_results[i].overlap(extract_results[j]):
j += 1
if j >= len(extract_results):
break
if ((extract_results[i].type is Constants.SYS_DATETIME_DATE and extract_results[j].type is
Constants.SYS_DATETIME_TIME) or
(extract_results[i].type is Constants.SYS_DATETIME_TIME and extract_results[j].type is
Constants.SYS_DATETIME_DATE) or
(extract_results[i].type is Constants.SYS_DATETIME_DATE and extract_results[j] is
NumConstants.SYS_NUM_INTEGER)):
middle_begin = extract_results[i].start + (extract_results[i].length or 0)
middle_end = extract_results[j].start or 0
if middle_begin > middle_end:
i = j + 1
continue
middle_str = source[middle_begin: middle_end].strip()
valid = False
# for cases like "tomorrow 3", "tomorrow at 3"
if extract_results[j].type is NumConstants.SYS_NUM_INTEGER:
match = self.config.date_number_connector_regex.search(middle_str)
if not middle_str or match:
valid = True
else:
# for case like "3 pm or later on monday"
match = self.config.suffix_after_regex.search(middle_str)
if match:
middle_str = middle_str[
middle_str.index(match.group()) + len(match.group()): len(middle_end)].strip()
if not (match and len(middle_str) == 0):
if self.config.is_connector_token(middle_str):
valid = True
if valid:
begin = extract_results[i].start or 0
end = (extract_results[j].start or 0) + (extract_results[j].length or 0)
end_index, start_index = self.extend_with_date_time_and_year(begin, end, source, reference)
tokens.append(Token(start_index, end_index))
i = j + 1
continue
i = j
# handle "in the afternoon" at the end of entity
idx = 0
for idx in range(idx, len(tokens), 1):
after_str = source[tokens[idx].end:]
match = self.config.suffix_regex.search(after_str)
if match:
tokens[idx] = Token(tokens[idx].start, tokens[idx].end + len(match.group()))
# handle "day" prefixes
idx = 0
for idx in range(idx, len(tokens), 1):
before_str = source[0: tokens[idx].start]
match = self.config.utility_configuration.common_date_prefix_regex.search(before_str)
if match:
tokens[idx] = Token(tokens[idx].start - len(match.group()), tokens[idx].end)
return tokens