private List getLocationFeaturesFromMetaKeywords()

in ctakes-side-effect/src/main/java/org/apache/ctakes/sideeffect/ae/SESentenceFeatureAnnotator.java [125:269]


	private List<String> getLocationFeaturesFromMetaKeywords(JCas jcas, PSESentence ps) {
		List<String> feature = new ArrayList<>();
		List<IdentifiedAnnotation> drug = new ArrayList<>();
		List<IdentifiedAnnotation> pse = new ArrayList<>();

		Iterator<?> neIter = FSUtil.getAnnotationsIteratorInSpan(jcas, IdentifiedAnnotation.type, ps.getBegin(),
																				 ps.getEnd() + 1);
		while (neIter.hasNext()) {
			IdentifiedAnnotation n = (IdentifiedAnnotation) neIter.next();
			// drug
			if (n.getTypeID() == 1) {
				drug.add(n);
			}
			// signs/symptoms or disease/disorders
			if (n.getTypeID() == 2 || n.getTypeID() == 3) {
				pse.add(n);
			}
		}

		// for each metaKey
		for ( final String mk : metaKey ) {
			Set<String> kwSet = metaKeywords.get( mk );

			// for each keyword in a given metaKey
			Iterator<String> kwIter = kwSet.iterator();
			String kw = "";
			int kwPos = -1;
			boolean foundKw = false;

			while ( kwIter.hasNext() ) {
				String pseSenText = ps.getCoveredText()
											 .toLowerCase();
				kw = kwIter.next();
				kwPos = pseSenText.indexOf( kw );
				if ( kwPos == -1 )
					continue;
				// if 1st before & after char is non-alphabet
				int kwB = kwPos - 1;
				int kwA = kwPos + kw.length();
				// cf) end is the end index + 1;
				final boolean matches = pseSenText.substring( kwA, kwA + 1 )
															 .matches( "\\W" );
				if ( ( pseSenText.length() <= kwA ) || // == would be satisfied
					  ( kwPos == 0 && matches )
					  || ( matches && pseSenText
									 .substring( kwB, kwB + 1 )
									 .matches( "\\W" ) ) ) {
					foundKw = true;
					break;
				}

				// String lcCoveredText = ps.getCoveredText().toLowerCase();
				// if( (kwPos==0 && ( lcCoveredText.length() >= kwA+1 &&
				// lcCoveredText.substring(kwA,kwA+1).matches("\\W"))) ||
				// ((lcCoveredText.length() >= kwA+1
				// && lcCoveredText.length() >= kwB+1 &&
				// lcCoveredText.substring(kwA,kwA+1).matches("\\W")
				// && lcCoveredText.substring(kwB,kwB+1).matches("\\W")) )) {
				// foundKw = true;
				// break;
				// }

			}

			// if not found the keyword, go to next keyword
			if ( !foundKw ) {
				feature.add( "nul" );
				continue;
			}

			int kwBegin = kwPos + ps.getBegin();
			int kwEnd = kwBegin + kw.length(); // index of end ch + 1
			boolean beforePse = false;
			boolean afterPse = false;
			boolean betweenPseAndDrug = false;
			boolean betweenDrugAndPse = false;

			// check if keyword exists between PSE and Drug
			Iterator<IdentifiedAnnotation> iter1, iter2;
			iter1 = pse.iterator();
			while ( iter1.hasNext() ) {
				IdentifiedAnnotation pseNE = iter1.next();
				if ( kwBegin > pseNE.getEnd() ) {
					iter2 = drug.iterator();
					while ( iter2.hasNext() ) {
						IdentifiedAnnotation drugNE =  iter2.next();
						if ( kwEnd < drugNE.getBegin() ) {
							betweenPseAndDrug = true;
							break;
						}
					}
				}
				if ( betweenPseAndDrug )
					break;
			}

			// check if keyword exists between Drug and PSE
			iter1 = drug.iterator();
			while ( iter1.hasNext() ) {
				IdentifiedAnnotation drugNE =  iter1.next();
				if ( kwBegin > drugNE.getEnd() ) {
					iter2 = pse.iterator();
					while ( iter2.hasNext() ) {
						IdentifiedAnnotation pseNE =  iter2.next();
						if ( kwEnd < pseNE.getBegin() ) {
							betweenDrugAndPse = true;
							break;
						}
					}
				}
				if ( betweenDrugAndPse )
					break;
			}

			if ( ( !betweenPseAndDrug ) && ( !betweenDrugAndPse ) ) {
				for ( final IdentifiedAnnotation n : pse ) {
					if ( kwEnd < n.getBegin() )
						beforePse = true;
					if ( kwBegin > n.getEnd() )
						afterPse = true;
				}
			}

			if ( mk.equals( "SideEffectWord" ) ) {
				feature.add( "pre" );
			} else {
				if ( betweenPseAndDrug && betweenDrugAndPse )
					feature.add( "bet" );
				else if ( betweenPseAndDrug )
					feature.add( "bpd" );
				else if ( betweenDrugAndPse )
					feature.add( "bdp" );
				else if ( beforePse && afterPse )
					feature.add( "bap" );
				else if ( beforePse )
					feature.add( "bep" );
				else if ( afterPse )
					feature.add( "afp" );
				else
					feature.add( "any" );
			}
		}

		return feature;
	}