public static HashMap extract()

in ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/HistoryAttributeClassifier.java [168:346]


	public static HashMap<String, Boolean> extract(JCas jCas,
			Annotation arg) {
		HashMap<String,Boolean> vfeat = new HashMap<String,Boolean>();
		for (String feat : FeatureIndex) {
			vfeat.put(feat, false);
		}
		
		// find the sentence that entityMention is in
		Sentence sEntity = null;
		Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
		for (Sentence s : sentences) {
			if ( s.getBegin()<=arg.getBegin() && s.getEnd()>=arg.getEnd()) {
				sEntity = s;
				break;
			}
		}
		
		DocumentAnnotation docAnnot = null;
		
		Collection<DocumentAnnotation> docAnnots = 
				JCasUtil.select(jCas, DocumentAnnotation.class);
		
		if (!docAnnots.isEmpty())
		{
			Object[] docAnnotArray = docAnnots.toArray();
			docAnnot = (DocumentAnnotation) docAnnotArray[0];
		}
		
		if (sEntity!=null) {
			
			// but I actually need to find out if this sentence is preceded by
			// a newline or if I have to find the preceding one that does.
			if (docAnnot != null)
			{
				String doctext = docAnnot.getCoveredText();
				int sentStart = sEntity.getBegin();
				
				if (sentStart > 0)
				{
					boolean argInHistSection = false;
					
					// sort the sentences
					// TODO: make it so you don't sort every time for same sentence.
					ArrayList<Sentence> sentList = new ArrayList<Sentence>(sentences);
					Collections.sort(sentList, new AnnotLocationComparator());
					
					// get index of sEntity
					int currind = sentList.indexOf(sEntity);
					
					if (currind == 0) {
						argInHistSection = isInHistSection(sEntity);						
					} else {
						currind--;
						Sentence prevSent = sentList.get(currind);
						String tweenSents = "";
						try
						{
							tweenSents = doctext.substring(prevSent.getEnd(), sentStart);
						} catch (IndexOutOfBoundsException e)
						{
							// this is of no consequence
							tweenSents = "";
						}
						
						if (tweenSents.indexOf("\n") != -1) {
							// there is a newline between this sentence and prior sentence
							argInHistSection = isInHistSection(sEntity);
						} else if (currind == 0) {
							argInHistSection = isInHistSection(prevSent);
						} else {
							while (currind > 0) {
								Sentence currSent = prevSent;
								currind--;
								prevSent = sentList.get(currind);

								sentStart = currSent.getBegin();
								int prevSentEnd = prevSent.getEnd();
								
								try {
									tweenSents = doctext.substring(prevSentEnd, sentStart);
								} catch (StringIndexOutOfBoundsException e) {
									tweenSents = "";
								}

								if (tweenSents.indexOf("\n") != -1 || currind == 0) {
									argInHistSection = isInHistSection(currSent);
									break;
								} else if (currind == 0) {
									argInHistSection = isInHistSection(prevSent);
									break;										
								}
							}
						}
					}

					// and here do something with argInHistSection.
					// ie, create the feature
					vfeat.put(IN_HIST_SECTION, argInHistSection);
				}
				
			}

			// 2) some other identified annotation subsumes this one?
			
			// Get all IdentifiedAnnotations covering the boundaries of the
			// annotation
			List<IdentifiedAnnotation> lsmentions = JCasUtil.selectCovering(jCas,
													IdentifiedAnnotation.class, arg.getBegin(),
													arg.getEnd());
			
			Collections.sort(lsmentions, new AnnotLocationComparator());

			// NB: arg is annotation input to this method. annot is current
			// lsmentions in loop
			for (IdentifiedAnnotation annot : lsmentions) {
				if ( annot.getBegin()>arg.getBegin()) {
					// annot starts after our arg, so if ordered correctly(?)
					// then I break b/c I won't find any more that cover arg
					break;
				}
				
				// INVARIANT: arg starts at or after annot begins
				if ( annot.getEnd()<arg.getEnd()) {
					// INVARIANT: arg ends at or after annot ends
					continue;
				} else if ( !DependencyUtility.equalCoverage(
						DependencyUtility.getNominalHeadNode(jCas, annot),
						DependencyUtility.getNominalHeadNode(jCas, arg)) ) {
					// INVARIANT: arg start at or before annot starts
					// INVARIANT: arg ends at or before annot ends
					// INVARIANT: ergo, arg falls within bounds of annot
					// now verify that annot is an EventMention or EntityMention
					if ((annot instanceof EntityMention) || (annot instanceof EventMention)) {
						// annot has boundaries at or exceeding those of arg.
						// They also have different head nodes (I guess)
						// and annot is either an EntityMention of EventMention
						vfeat.put(SUBSUMED_ANNOT, true);		
						break; // no reason to keep checking
					}
				}
			}
			
			// 3) some chunk subsumes this?
			List<Chunk> lschunks = JCasUtil.selectPreceding(jCas, Chunk.class, arg, 5);
			lschunks.addAll(JCasUtil.selectFollowing(jCas, Chunk.class, arg, 5));
			for (Chunk chunk : lschunks) {
				if ( chunk.getBegin()>arg.getBegin()) {
					break;
				}
				if ( chunk.getEnd()<arg.getEnd()) {
					continue;
				} else if ( !DependencyUtility.equalCoverage(
						DependencyUtility.getNominalHeadNode(jCas, chunk), 
						DependencyUtility.getNominalHeadNode(jCas, arg)) ) {
					// the case that annot is a superset
					vfeat.put(SUBSUMED_CHUNK, true);
				}
			}
		}
		
		
		List<ConllDependencyNode> depnodes = JCasUtil.selectCovered(jCas, ConllDependencyNode.class, arg);
		if (!depnodes.isEmpty()) { 
			ConllDependencyNode depnode = DependencyUtility.getNominalHeadNode(depnodes);

			// 1) check if the head node of the entity mention is really just part of a larger noun phrase
			if (depnode.getDeprel().matches("(NMOD|amod|nmod|det|predet|nn|poss|possessive|infmod|partmod|rcmod)")) {
				vfeat.put(POSTCOORD_NMOD, true);
			}

			// 4) search dependency paths for discussion context
			for (ConllDependencyNode dn : DependencyUtility.getPathToTop(jCas, depnode)) {
				if ( isDiscussionContext(dn) ) {
					vfeat.put(DISCUSSION_DEPPATH, true);
				}
			}
		}
		return vfeat;
	}