public List extract()

in libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/sequence/extractors/BasePhoneNumberExtractor.java [76:200]


    public List<ExtractResult> extract(String text) {
        if (!PRE_CHECK_PHONE_NUMBER_REGEX.matcher(text).find()) {
            return new ArrayList<ExtractResult>();
        }

        List<ExtractResult> ers = super.extract(text);

        for (int i = 0; i < ers.size(); i++) {
            ExtractResult er = ers.get(i);
            if ((BasePhoneNumberExtractor.countDigits(er.getText()) < 7 && er.getData().toString() != "ITPhoneNumber") ||
                Pattern.matches(SSN_FILTER_REGEX.toString(), er.getText())) {
                ers.remove(er);
                i--;
                continue;
            }

            if ((BasePhoneNumberExtractor.countDigits(er.getText()) == 16 && !er.getText().startsWith("+"))) {
                ers.remove(er);
                i--;
                continue;
            }

            if (BasePhoneNumberExtractor.countDigits(er.getText()) == 15) {
                Boolean flag = false;
                for (String numSpan : er.getText().split(" ")) {
                    if (BasePhoneNumberExtractor.countDigits(numSpan) == 4 || BasePhoneNumberExtractor.countDigits(numSpan) == 3) {
                        flag = false;
                    } else {
                        flag = true;
                        break;
                    }
                }

                if (flag == false) {
                    ers.remove(er);
                    i--;
                    continue;
                }
            }

            if (er.getStart() + er.getLength() < text.length()) {
                Character ch = text.charAt(er.getStart() + er.getLength());
                if (BasePhoneNumbers.ForbiddenSuffixMarkers.contains(ch)) {
                    ers.remove(er);
                    i--;
                    continue;
                }
            }

            if (er.getStart() != 0) {
                Character ch = text.charAt(er.getStart() - 1);
                String front = text.substring(0, er.getStart() - 1);

                if (this.config.getFalsePositivePrefixRegex() != null &&
                    this.config.getFalsePositivePrefixRegex().matcher(front).find()) {
                    ers.remove(er);
                    i--;
                    continue;
                }

                if (BasePhoneNumbers.BoundaryMarkers.contains(ch)) {
                    if (SPECIAL_BOUNDARY_MARKERS.contains(ch) &&
                        BasePhoneNumberExtractor.checkFormattedPhoneNumber(er.getText()) && er.getStart() >= 2) {
                        Character charGap = text.charAt(er.getStart() - 2);
                        if (!Character.isDigit(charGap) && !Character.isWhitespace(charGap)) {
                            // check if the extracted string has a non-digit string before "-".
                            Boolean flag = Pattern.matches("^[^0-9]+$", text.substring(0, er.getStart() - 2));

                            // Handle cases like "91a-677-0060".
                            if (Character.isLowerCase(charGap) && !flag) {
                                ers.remove(er);
                                i--;
                            }

                            continue;
                        }

                        // check the international dialing prefix
                        if (INTERNATIONAL_DIALING_PREFIX_REGEX.matcher(front).find()) {
                            Integer moveOffset = RegExpUtility.getMatches(INTERNATIONAL_DIALING_PREFIX_REGEX,
                                    front)[0].length + 1;
                            er.setStart(er.getStart() - moveOffset);
                            er.setLength(er.getLength() + moveOffset);
                            er.setText(text.substring(er.getStart(), er.getStart() + er.getLength()));
                            continue;
                        }
                    }

                    // Handle cases like "-1234567" and "-1234+5678"
                    ers.remove(er);
                    i--;
                }

                if (this.config.getForbiddenPrefixMarkers().contains(ch)) {
                    {
                        // Handle "tel:123456"
                        if (BasePhoneNumbers.ColonMarkers.contains(ch)) {
                            if (this.config.getColonPrefixCheckRegex().matcher(front).find()) {
                                continue;
                            }
                        }

                        ers.remove(er);
                        i--;
                    }
                }
            }
        }

        // filter hexadecimal address like 00 10 00 31 46 D9 E9 11
        Match[] maskMatchCollection = RegExpUtility.getMatches(Pattern.compile(BasePhoneNumbers.PhoneNumberMaskRegex),
                text);

        for (int index = ers.size() - 1; index >= 0; --index) {
            for (Match m : maskMatchCollection) {
                if (ers.get(index).getStart() >= m.index &&
                    ers.get(index).getStart() + ers.get(index).getLength() <= m.index + m.length) {
                    ers.remove(index);
                    break;
                }
            }
        }

        return ers;
    }