private Collection numberWithMonth()

in libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateExtractor.java [184:320]


    private Collection<Token> numberWithMonth(String text, LocalDateTime reference) {
        List<Token> tokens = new ArrayList<>();

        List<ExtractResult> ers = config.getOrdinalExtractor().extract(text);
        ers.addAll(config.getIntegerExtractor().extract(text));

        for (ExtractResult result : ers) {
            int num;
            try {
                ParseResult parseResult = config.getNumberParser().parse(result);
                num = Float.valueOf(parseResult.getValue().toString()).intValue();
            } catch (NumberFormatException e) {
                num = 0;
            }

            if (num < 1 || num > 31) {
                continue;
            }

            if (result.getStart() >= 0) {
                // Handling cases like '(Monday,) Jan twenty two'
                String frontStr = text.substring(0, result.getStart());

                Optional<Match> match = Arrays.stream(RegExpUtility.getMatches(config.getMonthEnd(), frontStr)).findFirst();
                if (match.isPresent()) {
                    int startIndex = match.get().index;
                    int endIndex = match.get().index + match.get().length + result.getLength();

                    int month = config.getMonthOfYear().getOrDefault(match.get().getGroup("month").value.toLowerCase(), reference.getMonthValue());

                    Pair<Integer, Integer> startEnd = extendWithWeekdayAndYear(startIndex, endIndex, month, num, text, reference);

                    tokens.add(new Token(startEnd.getValue0(), startEnd.getValue1()));
                    continue;
                }

                // Handling cases like 'for the 25th'
                Match[] matches = RegExpUtility.getMatches(config.getForTheRegex(), text);
                boolean isFound = false;

                for (Match matchCase : matches) {
                    if (matchCase != null) {
                        String ordinalNum = matchCase.getGroup("DayOfMonth").value;
                        if (ordinalNum.equals(result.getText())) {
                            int endLenght = 0;
                            if (!matchCase.getGroup("end").value.equals("")) {
                                endLenght = matchCase.getGroup("end").value.length();
                            }

                            tokens.add(new Token(matchCase.index, matchCase.index + matchCase.length - endLenght));
                            isFound = true;
                        }
                    }
                }

                if (isFound) {
                    continue;
                }

                // Handling cases like 'Thursday the 21st', which both 'Thursday' and '21st' refer to a same date
                matches = RegExpUtility.getMatches(config.getWeekDayAndDayOfMonthRegex(), text);
                isFound = false;
                for (Match matchCase : matches) {
                    if (matchCase != null) {
                        String ordinalNum = matchCase.getGroup("DayOfMonth").value;
                        if (ordinalNum.equals(result.getText())) {
                            // Get week of day for the ordinal number which is regarded as a date of reference month
                            LocalDateTime date = DateUtil.safeCreateFromMinValue(reference.getYear(), reference.getMonthValue(), num);
                            String numWeekDayStr = date.getDayOfWeek().toString().toLowerCase();

                            // Get week day from text directly, compare it with the weekday generated above
                            // to see whether they refer to the same week day
                            String extractedWeekDayStr = matchCase.getGroup("weekday").value.toLowerCase();
                            int numWeekDay = config.getDayOfWeek().get(numWeekDayStr);
                            int extractedWeekDay = config.getDayOfWeek().get(extractedWeekDayStr);

                            if (date != DateUtil.minValue() && numWeekDay == extractedWeekDay) {
                                tokens.add(new Token(matchCase.index, result.getStart() + result.getLength()));
                                isFound = true;
                            }
                        }
                    }
                }

                if (isFound) {
                    continue;
                }

                // Handling cases like '20th of next month'
                String suffixStr = text.substring(result.getStart() + result.getLength());
                ConditionalMatch beginMatch = RegexExtension.matchBegin(config.getRelativeMonthRegex(), suffixStr.trim(), true);
                if (beginMatch.getSuccess() && beginMatch.getMatch().get().index == 0) {
                    int spaceLen = suffixStr.length() - suffixStr.trim().length();
                    int resStart = result.getStart();
                    int resEnd = resStart + result.getLength() + spaceLen + beginMatch.getMatch().get().length;

                    // Check if prefix contains 'the', include it if any
                    String prefix = text.substring(0, resStart);
                    Optional<Match> prefixMatch = Arrays.stream(RegExpUtility.getMatches(config.getPrefixArticleRegex(), prefix)).findFirst();
                    if (prefixMatch.isPresent()) {
                        resStart = prefixMatch.get().index;
                    }

                    tokens.add(new Token(resStart, resEnd));
                }

                // Handling cases like 'second Sunday'
                suffixStr = text.substring(result.getStart() + result.getLength());
                beginMatch = RegexExtension.matchBegin(config.getWeekDayRegex(), suffixStr.trim(), true);
                if (beginMatch.getSuccess() && num >= 1 && num <= 5 && result.getType().equals("builtin.num.ordinal")) {
                    String weekDayStr = beginMatch.getMatch().get().getGroup("weekday").value.toLowerCase();
                    if (config.getDayOfWeek().containsKey(weekDayStr)) {
                        int spaceLen = suffixStr.length() - suffixStr.trim().length();
                        tokens.add(new Token(result.getStart(), result.getStart() + result.getLength() + spaceLen + beginMatch.getMatch().get().length));
                    }
                }
            }

            // For cases like "I'll go back twenty second of June"
            if (result.getStart() + result.getLength() < text.length()) {
                String afterStr = text.substring(result.getStart() + result.getLength());

                Optional<Match> match = Arrays.stream(RegExpUtility.getMatches(config.getOfMonth(), afterStr)).findFirst();
                if (match.isPresent()) {
                    int startIndex = result.getStart();
                    int endIndex = result.getStart() + result.getLength() + match.get().length;

                    int month = config.getMonthOfYear().getOrDefault(match.get().getGroup("month").value.toLowerCase(), reference.getMonthValue());

                    Pair<Integer, Integer> startEnd = extendWithWeekdayAndYear(startIndex, endIndex, month, num, text, reference);
                    tokens.add(new Token(startEnd.getValue0(), startEnd.getValue1()));
                }
            }
        }

        return tokens;
    }