in ctakes-core/src/main/java/org/apache/ctakes/core/nlp/tokenizer/TokenizerPTB.java [542:608]
private int lenIfIsTelephoneNumber(int currentPosition, String text, int nextNonTelephoneNumberChar) {
if (nextNonTelephoneNumberChar < 0) return nextNonTelephoneNumberChar;
int len = nextNonTelephoneNumberChar-currentPosition;
String s = text.substring(currentPosition, nextNonTelephoneNumberChar);
// extension like 4-5555
// or without area code like 555-1212
// or with area code 507-555-1212
// or with 1, like 1-507-555-1212
// or like example in guidelines like 02-2348-2192
if (len==6) {
if (!isDigit(s.charAt(0))) return -1;
if (s.charAt(1)!=DASH) return -1;
if (!isDigit(s.charAt(2))) return -1;
if (!isDigit(s.charAt(3))) return -1;
if (!isDigit(s.charAt(4))) return -1;
if (!isDigit(s.charAt(5))) return -1;
return len;
} else if (len == 8) {
if (!isDigit(s.charAt(0))) return -1;
if (!isDigit(s.charAt(1))) return -1;
if (!isDigit(s.charAt(2))) return -1;
if (s.charAt(3)!=DASH) return -1;
if (!isDigit(s.charAt(4))) return -1;
if (!isDigit(s.charAt(5))) return -1;
if (!isDigit(s.charAt(6))) return -1;
if (!isDigit(s.charAt(7))) return -1;
return len;
} else if (len == 12) { // two possible formats
// first check 507-555-1212 format
if (!isDigit(s.charAt(0))) return checkFormat2(s);
if (!isDigit(s.charAt(1))) return checkFormat2(s);
if (!isDigit(s.charAt(2))) return checkFormat2(s);
if (s.charAt(3)!=DASH) return checkFormat2(s);
if (!isDigit(s.charAt(4))) return checkFormat2(s);
if (!isDigit(s.charAt(5))) return checkFormat2(s);
if (!isDigit(s.charAt(6))) return checkFormat2(s);
if (s.charAt(7)!=DASH) return checkFormat2(s);
if (!isDigit(s.charAt(8))) return checkFormat2(s);
if (!isDigit(s.charAt(9))) return checkFormat2(s);
if (!isDigit(s.charAt(10))) return checkFormat2(s);
if (!isDigit(s.charAt(11))) return checkFormat2(s);
return len;
} else if (len == 14) { // 1-507-555-1212
if (!isDigit(s.charAt(0))) return -1;
if (s.charAt(1)!=DASH) return -1;
if (!isDigit(s.charAt(2))) return -1;
if (!isDigit(s.charAt(3))) return -1;
if (!isDigit(s.charAt(4))) return -1;
if (s.charAt(5)!=DASH) return -1;
if (!isDigit(s.charAt(6))) return -1;
if (!isDigit(s.charAt(7))) return -1;
if (!isDigit(s.charAt(8))) return -1;
if (s.charAt(9)!=DASH) return -1;
if (!isDigit(s.charAt(10))) return -1;
if (!isDigit(s.charAt(11))) return -1;
if (!isDigit(s.charAt(12))) return -1;
if (!isDigit(s.charAt(13))) return -1;
return len;
} else {
return -1;
}
}