in libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/sequence/extractors/BasePhoneNumberExtractor.java [76:200]
public List<ExtractResult> extract(String text) {
if (!PRE_CHECK_PHONE_NUMBER_REGEX.matcher(text).find()) {
return new ArrayList<ExtractResult>();
}
List<ExtractResult> ers = super.extract(text);
for (int i = 0; i < ers.size(); i++) {
ExtractResult er = ers.get(i);
if ((BasePhoneNumberExtractor.countDigits(er.getText()) < 7 && er.getData().toString() != "ITPhoneNumber") ||
Pattern.matches(SSN_FILTER_REGEX.toString(), er.getText())) {
ers.remove(er);
i--;
continue;
}
if ((BasePhoneNumberExtractor.countDigits(er.getText()) == 16 && !er.getText().startsWith("+"))) {
ers.remove(er);
i--;
continue;
}
if (BasePhoneNumberExtractor.countDigits(er.getText()) == 15) {
Boolean flag = false;
for (String numSpan : er.getText().split(" ")) {
if (BasePhoneNumberExtractor.countDigits(numSpan) == 4 || BasePhoneNumberExtractor.countDigits(numSpan) == 3) {
flag = false;
} else {
flag = true;
break;
}
}
if (flag == false) {
ers.remove(er);
i--;
continue;
}
}
if (er.getStart() + er.getLength() < text.length()) {
Character ch = text.charAt(er.getStart() + er.getLength());
if (BasePhoneNumbers.ForbiddenSuffixMarkers.contains(ch)) {
ers.remove(er);
i--;
continue;
}
}
if (er.getStart() != 0) {
Character ch = text.charAt(er.getStart() - 1);
String front = text.substring(0, er.getStart() - 1);
if (this.config.getFalsePositivePrefixRegex() != null &&
this.config.getFalsePositivePrefixRegex().matcher(front).find()) {
ers.remove(er);
i--;
continue;
}
if (BasePhoneNumbers.BoundaryMarkers.contains(ch)) {
if (SPECIAL_BOUNDARY_MARKERS.contains(ch) &&
BasePhoneNumberExtractor.checkFormattedPhoneNumber(er.getText()) && er.getStart() >= 2) {
Character charGap = text.charAt(er.getStart() - 2);
if (!Character.isDigit(charGap) && !Character.isWhitespace(charGap)) {
// check if the extracted string has a non-digit string before "-".
Boolean flag = Pattern.matches("^[^0-9]+$", text.substring(0, er.getStart() - 2));
// Handle cases like "91a-677-0060".
if (Character.isLowerCase(charGap) && !flag) {
ers.remove(er);
i--;
}
continue;
}
// check the international dialing prefix
if (INTERNATIONAL_DIALING_PREFIX_REGEX.matcher(front).find()) {
Integer moveOffset = RegExpUtility.getMatches(INTERNATIONAL_DIALING_PREFIX_REGEX,
front)[0].length + 1;
er.setStart(er.getStart() - moveOffset);
er.setLength(er.getLength() + moveOffset);
er.setText(text.substring(er.getStart(), er.getStart() + er.getLength()));
continue;
}
}
// Handle cases like "-1234567" and "-1234+5678"
ers.remove(er);
i--;
}
if (this.config.getForbiddenPrefixMarkers().contains(ch)) {
{
// Handle "tel:123456"
if (BasePhoneNumbers.ColonMarkers.contains(ch)) {
if (this.config.getColonPrefixCheckRegex().matcher(front).find()) {
continue;
}
}
ers.remove(er);
i--;
}
}
}
}
// filter hexadecimal address like 00 10 00 31 46 D9 E9 11
Match[] maskMatchCollection = RegExpUtility.getMatches(Pattern.compile(BasePhoneNumbers.PhoneNumberMaskRegex),
text);
for (int index = ers.size() - 1; index >= 0; --index) {
for (Match m : maskMatchCollection) {
if (ers.get(index).getStart() >= m.index &&
ers.get(index).getStart() + ers.get(index).getLength() <= m.index + m.length) {
ers.remove(index);
break;
}
}
}
return ers;
}