public static List getStringMatchFeatures()

in opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java [240:316]


  public static List<String> getStringMatchFeatures(MentionContext mention, DiscourseEntity entity) {
    boolean sameHead = false;
    boolean modsMatch = false;
    boolean titleMatch = false;
    boolean nonTheModsMatch = false;
    List<String> features = new ArrayList<>();
    Parse[] mtokens = mention.getTokenParses();
    Set<String> ecModSet = constructModifierSet(mtokens, mention.getHeadTokenIndex());
    String mentionHeadString = mention.getHeadTokenText().toLowerCase();
    Set<String> featureSet = new HashSet<>();
    for (Iterator<MentionContext> ei = entity.getMentions(); ei.hasNext();) {
      MentionContext entityMention = ei.next();
      String exactMatchFeature = getExactMatchFeature(entityMention, mention);
      if (exactMatchFeature != null) {
        featureSet.add(exactMatchFeature);
      }
      else if (entityMention.getParse().isCoordinatedNounPhrase()
          && !mention.getParse().isCoordinatedNounPhrase()) {
        featureSet.add("cmix");
      }
      else {
        String mentionStrip = stripNp(mention);
        String entityMentionStrip = stripNp(entityMention);
        if (mentionStrip != null && entityMentionStrip != null) {
          if (isSubstring(mentionStrip, entityMentionStrip)) {
            featureSet.add("substring");
          }
        }
      }
      Parse[] xtoks = entityMention.getTokenParses();
      int headIndex = entityMention.getHeadTokenIndex();

      String entityMentionHeadString = entityMention.getHeadTokenText().toLowerCase();
      // model lexical similarity
      if (mentionHeadString.equals(entityMentionHeadString)) {
        sameHead = true;
        featureSet.add("hds=" + mentionHeadString);
        if (!modsMatch || !nonTheModsMatch) { //only check if we haven't already found one which is the same
          modsMatch = true;
          nonTheModsMatch = true;
          Set<String> entityMentionModifierSet = constructModifierSet(xtoks, headIndex);
          for (String mw : ecModSet) {
            if (!entityMentionModifierSet.contains(mw)) {
              modsMatch = false;
              if (!mw.equals("the")) {
                nonTheModsMatch = false;
                featureSet.add("mmw=" + mw);
              }
            }
          }
        }
      }
      Set<String> descModSet = constructModifierSet(xtoks, entityMention.getNonDescriptorStart());
      if (descModSet.contains(mentionHeadString)) {
        titleMatch = true;
      }
    }
    if (!featureSet.isEmpty()) {
      features.addAll(featureSet);
    }
    if (sameHead) {
      features.add("sameHead");
      if (modsMatch) {
        features.add("modsMatch");
      }
      else if (nonTheModsMatch) {
        features.add("nonTheModsMatch");
      }
      else {
        features.add("modsMisMatch");
      }
    }
    if (titleMatch) {
      features.add("titleMatch");
    }
    return features;
  }