in libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateExtractor.java [184:320]
private Collection<Token> numberWithMonth(String text, LocalDateTime reference) {
List<Token> tokens = new ArrayList<>();
List<ExtractResult> ers = config.getOrdinalExtractor().extract(text);
ers.addAll(config.getIntegerExtractor().extract(text));
for (ExtractResult result : ers) {
int num;
try {
ParseResult parseResult = config.getNumberParser().parse(result);
num = Float.valueOf(parseResult.getValue().toString()).intValue();
} catch (NumberFormatException e) {
num = 0;
}
if (num < 1 || num > 31) {
continue;
}
if (result.getStart() >= 0) {
// Handling cases like '(Monday,) Jan twenty two'
String frontStr = text.substring(0, result.getStart());
Optional<Match> match = Arrays.stream(RegExpUtility.getMatches(config.getMonthEnd(), frontStr)).findFirst();
if (match.isPresent()) {
int startIndex = match.get().index;
int endIndex = match.get().index + match.get().length + result.getLength();
int month = config.getMonthOfYear().getOrDefault(match.get().getGroup("month").value.toLowerCase(), reference.getMonthValue());
Pair<Integer, Integer> startEnd = extendWithWeekdayAndYear(startIndex, endIndex, month, num, text, reference);
tokens.add(new Token(startEnd.getValue0(), startEnd.getValue1()));
continue;
}
// Handling cases like 'for the 25th'
Match[] matches = RegExpUtility.getMatches(config.getForTheRegex(), text);
boolean isFound = false;
for (Match matchCase : matches) {
if (matchCase != null) {
String ordinalNum = matchCase.getGroup("DayOfMonth").value;
if (ordinalNum.equals(result.getText())) {
int endLenght = 0;
if (!matchCase.getGroup("end").value.equals("")) {
endLenght = matchCase.getGroup("end").value.length();
}
tokens.add(new Token(matchCase.index, matchCase.index + matchCase.length - endLenght));
isFound = true;
}
}
}
if (isFound) {
continue;
}
// Handling cases like 'Thursday the 21st', which both 'Thursday' and '21st' refer to a same date
matches = RegExpUtility.getMatches(config.getWeekDayAndDayOfMonthRegex(), text);
isFound = false;
for (Match matchCase : matches) {
if (matchCase != null) {
String ordinalNum = matchCase.getGroup("DayOfMonth").value;
if (ordinalNum.equals(result.getText())) {
// Get week of day for the ordinal number which is regarded as a date of reference month
LocalDateTime date = DateUtil.safeCreateFromMinValue(reference.getYear(), reference.getMonthValue(), num);
String numWeekDayStr = date.getDayOfWeek().toString().toLowerCase();
// Get week day from text directly, compare it with the weekday generated above
// to see whether they refer to the same week day
String extractedWeekDayStr = matchCase.getGroup("weekday").value.toLowerCase();
int numWeekDay = config.getDayOfWeek().get(numWeekDayStr);
int extractedWeekDay = config.getDayOfWeek().get(extractedWeekDayStr);
if (date != DateUtil.minValue() && numWeekDay == extractedWeekDay) {
tokens.add(new Token(matchCase.index, result.getStart() + result.getLength()));
isFound = true;
}
}
}
}
if (isFound) {
continue;
}
// Handling cases like '20th of next month'
String suffixStr = text.substring(result.getStart() + result.getLength());
ConditionalMatch beginMatch = RegexExtension.matchBegin(config.getRelativeMonthRegex(), suffixStr.trim(), true);
if (beginMatch.getSuccess() && beginMatch.getMatch().get().index == 0) {
int spaceLen = suffixStr.length() - suffixStr.trim().length();
int resStart = result.getStart();
int resEnd = resStart + result.getLength() + spaceLen + beginMatch.getMatch().get().length;
// Check if prefix contains 'the', include it if any
String prefix = text.substring(0, resStart);
Optional<Match> prefixMatch = Arrays.stream(RegExpUtility.getMatches(config.getPrefixArticleRegex(), prefix)).findFirst();
if (prefixMatch.isPresent()) {
resStart = prefixMatch.get().index;
}
tokens.add(new Token(resStart, resEnd));
}
// Handling cases like 'second Sunday'
suffixStr = text.substring(result.getStart() + result.getLength());
beginMatch = RegexExtension.matchBegin(config.getWeekDayRegex(), suffixStr.trim(), true);
if (beginMatch.getSuccess() && num >= 1 && num <= 5 && result.getType().equals("builtin.num.ordinal")) {
String weekDayStr = beginMatch.getMatch().get().getGroup("weekday").value.toLowerCase();
if (config.getDayOfWeek().containsKey(weekDayStr)) {
int spaceLen = suffixStr.length() - suffixStr.trim().length();
tokens.add(new Token(result.getStart(), result.getStart() + result.getLength() + spaceLen + beginMatch.getMatch().get().length));
}
}
}
// For cases like "I'll go back twenty second of June"
if (result.getStart() + result.getLength() < text.length()) {
String afterStr = text.substring(result.getStart() + result.getLength());
Optional<Match> match = Arrays.stream(RegExpUtility.getMatches(config.getOfMonth(), afterStr)).findFirst();
if (match.isPresent()) {
int startIndex = result.getStart();
int endIndex = result.getStart() + result.getLength() + match.get().length;
int month = config.getMonthOfYear().getOrDefault(match.get().getGroup("month").value.toLowerCase(), reference.getMonthValue());
Pair<Integer, Integer> startEnd = extendWithWeekdayAndYear(startIndex, endIndex, month, num, text, reference);
tokens.add(new Token(startEnd.getValue0(), startEnd.getValue1()));
}
}
}
return tokens;
}