in ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/UMLSFeatureExtractor.java [27:135]
public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
List<Feature> feats = new ArrayList<>();
if(docId == null || !getDocId(jCas).equals(docId)){
docId = getDocId(jCas);
coveringMap = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class);
}
if(arg1 instanceof Markable && arg2 instanceof Markable){
// feats.add(new Feature("AntecedentSalience", arg1.getConfidence()));
// feats.add(new Feature("AnaphorSalience", arg2.getConfidence()));
// get the head of each markable
ConllDependencyNode head1 = DependencyUtility.getNominalHeadNode(jCas, arg1);
ConllDependencyNode head2 = DependencyUtility.getNominalHeadNode(jCas, arg2);
List<IdentifiedAnnotation> rmList = new ArrayList<>();
if(head1 != null && head2 != null){
List<IdentifiedAnnotation> ents1 = new ArrayList<>(coveringMap.get(head1)); //JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, head1.getBegin(), head1.getEnd());'
for(IdentifiedAnnotation ann : ents1){
if(!(ann instanceof EntityMention || ann instanceof EventMention) || ann.getClass() == EventMention.class){
rmList.add(ann);
}
}
for(IdentifiedAnnotation toRm : rmList){
ents1.remove(toRm);
}
rmList.clear();
List<IdentifiedAnnotation> ents2 = new ArrayList<>(coveringMap.get(head2)); //JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, head2.getBegin(), head2.getEnd());
for(IdentifiedAnnotation ann : ents2){
if(!(ann instanceof EntityMention || ann instanceof EventMention)|| ann.getClass() == EventMention.class){
rmList.add(ann);
}
}
for(IdentifiedAnnotation toRm : rmList){
ents2.remove(toRm);
}
if(ents1.size() == 0 && ents2.size() > 0){
feats.add(new Feature("Arg1NoCui_Arg2Cui", true));
}else if(ents1.size() > 0 && ents2.size() == 0){
feats.add(new Feature("Arg1Cui_Arg2NoCui", true));
}else if(ents1.size() == 0 && ents2.size() == 0){
feats.add(new Feature("Arg1Arg2NoCui", true));
}else{
feats.add(new Feature("Arg1Arg2BothCui", true));
}
if((ents1.size() == 0 && ents2.size() > 0) ||
(ents1.size() > 0 && ents2.size() == 0)){
feats.add(new Feature("Arg1OrArg2NoCui", true));
}
for(IdentifiedAnnotation ent1 : ents1){
HashSet<String> a1Tuis = new HashSet<>();
String a1SemType = ent1.getClass().getSimpleName();
feats.add(new Feature("Arg1SemType" + a1SemType, true));
FSArray cons1 = ent1.getOntologyConceptArr();
if(cons1 != null){
for(int i = 0; i < cons1.size(); i++){
if(cons1.get(i) instanceof UmlsConcept){
a1Tuis.add(((UmlsConcept)cons1.get(i)).getTui());
}
}
}
for(IdentifiedAnnotation ent2 : ents2){
HashSet<String> a2Tuis = new HashSet<>();
String a2SemType = ent2.getClass().getSimpleName();
feats.add(new Feature("Arg2SemType" + a2SemType, true));
if(alias(ent1, ent2)){
feats.add(new Feature("UMLS_ALIAS", true));
// break;
}
// if(!alias(ent1, ent2) && isHypernym(ent1, ent2)){
// feats.add(new Feature("IS_HYPERNYM", true));
// }
// if(!alias(ent1, ent2) && isHyponym(ent1, ent2)){
// feats.add(new Feature("IS_HYPONYM", true));
// }
feats.add(new Feature("Arg1Arg2SemType" + a1SemType + "_" + a2SemType, true));
FSArray cons2 = ent2.getOntologyConceptArr();
if(cons2 != null){
for(int i = 0; i < cons2.size(); i++){
if(cons2.get(i) instanceof UmlsConcept){
a2Tuis.add(((UmlsConcept)cons2.get(i)).getTui());
}
}
}
for(String tui1 : a1Tuis){
feats.add(new Feature("Arg1Tui_" + tui1, true));
for(String tui2 : a2Tuis){
feats.add(new Feature("Arg1Tui_" + tui1 + "_Arg2Tui_ " + tui2, true));
if(tui1.equals(tui2)){
feats.add(new Feature("Arg1Arg2TuiMatch", true));
}
}
}
for(String tui2 : a2Tuis){
feats.add(new Feature("Arg2Tui_" + tui2, true));
}
}
}
}
}
return feats;
}