private List processTok()

in opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java [332:386]


  private List<String> processTok(String tok) {
    boolean tokAdded = false;
    String original = tok;
    List<String> out = new ArrayList<>();
    LinkedList<String> suffix = new LinkedList<>();
    char first = tok.charAt(0);
    if (first == '«') {
      out.add(Character.toString(first));
      tok = tok.substring(1);
    }
    char last = tok.charAt(tok.length() - 1);
    if (last == '»' || last == ':' || last == ',' || last == '!' ) {
      suffix.add(Character.toString(last));
      tok = tok.substring(0, tok.length() - 1);
    }

    // lets split all hyphens
    if (this.splitHyphenatedTokens && tok.contains("-") && tok.length() > 1) {
      Matcher matcher = HYPHEN_PATTERN.matcher(tok);

      String firstTok = null;
      String hyphen = "-";
      String secondTok = null;
      String rest = null;

      if (matcher.matches()) {
        if (matcher.group(1) != null) {
          firstTok = matcher.group(2);
        } else if (matcher.group(3) != null) {
          secondTok = matcher.group(4);
          rest = matcher.group(5);
        } else if (matcher.group(6) != null) {
          firstTok = matcher.group(7);
          secondTok = matcher.group(8);
          rest = matcher.group(9);
        }

        addIfNotEmpty(firstTok, out);
        addIfNotEmpty(hyphen, out);
        addIfNotEmpty(secondTok, out);
        addIfNotEmpty(rest, out);
        tokAdded = true;
      }
    }
    if (!tokAdded) {
      if (!original.equals(tok) && tok.length() > 1
          && !ALPHANUMERIC_PATTERN.matcher(tok).matches()) {
        out.addAll(processTok(tok));
      } else {
        out.add(tok);
      }
    }
    out.addAll(suffix);
    return out;
  }