public List extract()

in ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java [35:181]


  public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
      IdentifiedAnnotation mention) throws AnalysisEngineProcessException {

    if(cache == null){
      throw new RuntimeException("This extractor requires a Markable cache.");
    }

    List<Feature> feats = new ArrayList<>();
    Set<String> trueFeats = new HashSet<>();
    
    if(docId == null || !getDocId(jCas).equals(docId)){
      docId = getDocId(jCas);
      coveringMap = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class);
    }
    
    ConllDependencyNode head = cache.get(mention);
    
    if(head != null){
      List<IdentifiedAnnotation> rmList = new ArrayList<>();
      // get the entities covering this markable:
      List<IdentifiedAnnotation> mentionEnts = new ArrayList<>(coveringMap.get(head)); //JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, head1.getBegin(), head1.getEnd());'
      for(IdentifiedAnnotation ann : mentionEnts){
        if(!(ann instanceof EntityMention || ann instanceof EventMention) || ann.getClass() == EventMention.class){
          rmList.add(ann);
        }
      }
      for(IdentifiedAnnotation toRm : rmList){
        mentionEnts.remove(toRm);
      }
      
      Set<IdentifiedAnnotation> clusterEnts = new HashSet<>();
      for(Markable member : new ListIterable<Markable>(cluster.getMembers())){
        ConllDependencyNode memberHead = cache.get(member);
        rmList.clear();
        // get the named entities covering this cluster member:
        List<IdentifiedAnnotation> ents2 = new ArrayList<>(coveringMap.get(memberHead)); //JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, head2.getBegin(), head2.getEnd());
        for(IdentifiedAnnotation ann : ents2){
          if(!(ann instanceof EntityMention || ann instanceof EventMention) || ann.getClass() == EventMention.class){
            rmList.add(ann);
          }
        }
        for(IdentifiedAnnotation toRm : rmList){
          ents2.remove(toRm);
        }
        
        clusterEnts.addAll(ents2);
      }
      
      if(clusterEnts.size() == 0 && mentionEnts.size() > 0){
        trueFeats.add("ClusterNoCui_MentionCui");
      }else if(clusterEnts.size() > 0 && mentionEnts.size() == 0){
        trueFeats.add("ClusterCui_MentionNoCui");          
      }else if(clusterEnts.size() == 0 && mentionEnts.size() == 0){
        trueFeats.add("ClusterMentionNoCui");
      }else{
        trueFeats.add("ClusterMentionBothCui");
      }
      
      if((clusterEnts.size() == 0 && mentionEnts.size() > 0) ||
          (clusterEnts.size() > 0 && mentionEnts.size() == 0)){
        trueFeats.add("ClusterOrMentionNoCui");
      }
      
//      int minDistance = Integer.MAX_VALUE;
      for(IdentifiedAnnotation ent1 : clusterEnts){
        HashSet<String> a1Tuis = new HashSet<>(); 
        String a1SemType = ent1.getClass().getSimpleName();
        trueFeats.add("ClusterSemType" + a1SemType);
        FSArray cons1 = ent1.getOntologyConceptArr();
        if(cons1 != null){
          for(int i = 0; i < cons1.size(); i++){
            if(cons1.get(i) instanceof UmlsConcept){
              UmlsConcept concept = (UmlsConcept)cons1.get(i);
              if(concept.getTui() != null){
                a1Tuis.add(concept.getTui());
              }
            }
          }
        }
        for(IdentifiedAnnotation ent2 : mentionEnts){
          HashSet<String> a2Tuis = new HashSet<>();
          String a2SemType = ent2.getClass().getSimpleName();
//          trueFeats.add("MentionSemType" + a2SemType);
                   
          if(alias(ent1, ent2)){
            trueFeats.add("UMLS_ALIAS");
          }

          /*
          if(!trueFeats.contains("UMLS_ALIAS") && isHypernym(ent1, ent2)){
            trueFeats.add("IS_HYPERNYM");
          }
          
          if(!trueFeats.contains("UMLS_ALIAS") && isHyponym(ent1, ent2)){
            trueFeats.add("IS_HYPONYM");
          }
          */

//          int pairDist = graphDistance(ent1, ent2);
//          if(Math.abs(pairDist) < Math.abs(minDistance)){
//            minDistance = pairDist;
//          }
          
          trueFeats.add("MentionClusterSemTypePair" + a1SemType + "_" + a2SemType);
          
          FSArray cons2 = ent2.getOntologyConceptArr();
          if(cons2 != null){
            for(int i = 0; i < cons2.size(); i++){
              if(cons2.get(i) instanceof UmlsConcept){
                UmlsConcept concept = (UmlsConcept)cons2.get(i);
                if(concept.getTui() != null){
                  a2Tuis.add(concept.getTui());
                }
              }
            }
          }
          for(String tui1 : a1Tuis){
//            trueFeats.add("ClusterTui_" +  tui1);
            for(String tui2 : a2Tuis){
//              trueFeats.add("ClusterTui_" + tui1 + "_MentionTui_ " + tui2);
              if(tui1.equals(tui2)){
                trueFeats.add("ClusterMentionTuiMatch");
              }
            }
          }
//          for(String tui2 : a2Tuis){
//            trueFeats.add("MentionTui_" + tui2);
//          }
        }
      }
//      double distFeat = 0.0;
//      if(minDistance != Integer.MAX_VALUE){
//        distFeat = 1.0 / minDistance;
//        if(distFeat < 0){
//          feats.add(new Feature("AncestorDistance", -distFeat));
//        }else{
//          feats.add(new Feature("DescendentDistance", distFeat));
//        }
//      }        
    }
    
    
    for(String feat : trueFeats){
      feats.add(new Feature(feat, true));
    }
    return feats;
  }