in ctakes-side-effect/src/main/java/org/apache/ctakes/sideeffect/ae/SESentenceFeatureAnnotator.java [125:269]
private List<String> getLocationFeaturesFromMetaKeywords(JCas jcas, PSESentence ps) {
List<String> feature = new ArrayList<>();
List<IdentifiedAnnotation> drug = new ArrayList<>();
List<IdentifiedAnnotation> pse = new ArrayList<>();
Iterator<?> neIter = FSUtil.getAnnotationsIteratorInSpan(jcas, IdentifiedAnnotation.type, ps.getBegin(),
ps.getEnd() + 1);
while (neIter.hasNext()) {
IdentifiedAnnotation n = (IdentifiedAnnotation) neIter.next();
// drug
if (n.getTypeID() == 1) {
drug.add(n);
}
// signs/symptoms or disease/disorders
if (n.getTypeID() == 2 || n.getTypeID() == 3) {
pse.add(n);
}
}
// for each metaKey
for ( final String mk : metaKey ) {
Set<String> kwSet = metaKeywords.get( mk );
// for each keyword in a given metaKey
Iterator<String> kwIter = kwSet.iterator();
String kw = "";
int kwPos = -1;
boolean foundKw = false;
while ( kwIter.hasNext() ) {
String pseSenText = ps.getCoveredText()
.toLowerCase();
kw = kwIter.next();
kwPos = pseSenText.indexOf( kw );
if ( kwPos == -1 )
continue;
// if 1st before & after char is non-alphabet
int kwB = kwPos - 1;
int kwA = kwPos + kw.length();
// cf) end is the end index + 1;
final boolean matches = pseSenText.substring( kwA, kwA + 1 )
.matches( "\\W" );
if ( ( pseSenText.length() <= kwA ) || // == would be satisfied
( kwPos == 0 && matches )
|| ( matches && pseSenText
.substring( kwB, kwB + 1 )
.matches( "\\W" ) ) ) {
foundKw = true;
break;
}
// String lcCoveredText = ps.getCoveredText().toLowerCase();
// if( (kwPos==0 && ( lcCoveredText.length() >= kwA+1 &&
// lcCoveredText.substring(kwA,kwA+1).matches("\\W"))) ||
// ((lcCoveredText.length() >= kwA+1
// && lcCoveredText.length() >= kwB+1 &&
// lcCoveredText.substring(kwA,kwA+1).matches("\\W")
// && lcCoveredText.substring(kwB,kwB+1).matches("\\W")) )) {
// foundKw = true;
// break;
// }
}
// if not found the keyword, go to next keyword
if ( !foundKw ) {
feature.add( "nul" );
continue;
}
int kwBegin = kwPos + ps.getBegin();
int kwEnd = kwBegin + kw.length(); // index of end ch + 1
boolean beforePse = false;
boolean afterPse = false;
boolean betweenPseAndDrug = false;
boolean betweenDrugAndPse = false;
// check if keyword exists between PSE and Drug
Iterator<IdentifiedAnnotation> iter1, iter2;
iter1 = pse.iterator();
while ( iter1.hasNext() ) {
IdentifiedAnnotation pseNE = iter1.next();
if ( kwBegin > pseNE.getEnd() ) {
iter2 = drug.iterator();
while ( iter2.hasNext() ) {
IdentifiedAnnotation drugNE = iter2.next();
if ( kwEnd < drugNE.getBegin() ) {
betweenPseAndDrug = true;
break;
}
}
}
if ( betweenPseAndDrug )
break;
}
// check if keyword exists between Drug and PSE
iter1 = drug.iterator();
while ( iter1.hasNext() ) {
IdentifiedAnnotation drugNE = iter1.next();
if ( kwBegin > drugNE.getEnd() ) {
iter2 = pse.iterator();
while ( iter2.hasNext() ) {
IdentifiedAnnotation pseNE = iter2.next();
if ( kwEnd < pseNE.getBegin() ) {
betweenDrugAndPse = true;
break;
}
}
}
if ( betweenDrugAndPse )
break;
}
if ( ( !betweenPseAndDrug ) && ( !betweenDrugAndPse ) ) {
for ( final IdentifiedAnnotation n : pse ) {
if ( kwEnd < n.getBegin() )
beforePse = true;
if ( kwBegin > n.getEnd() )
afterPse = true;
}
}
if ( mk.equals( "SideEffectWord" ) ) {
feature.add( "pre" );
} else {
if ( betweenPseAndDrug && betweenDrugAndPse )
feature.add( "bet" );
else if ( betweenPseAndDrug )
feature.add( "bpd" );
else if ( betweenDrugAndPse )
feature.add( "bdp" );
else if ( beforePse && afterPse )
feature.add( "bap" );
else if ( beforePse )
feature.add( "bep" );
else if ( afterPse )
feature.add( "afp" );
else
feature.add( "any" );
}
}
return feature;
}