in ctakes-drug-ner/src/main/java/org/apache/ctakes/drugner/ae/DrugMentionAnnotator.java [885:1092]
private void generateDrugMentions(JCas jcas, Segment seg, boolean narrativeType) throws Exception
{
int begin = seg.getBegin(), end = seg.getEnd() + 1;
MedicationMention nextNER = null;
int nextNERPosition = 0;
List uniqueNEs;
List allNEs;
int[] validNeTypes = { CONST.NE_TYPE_ID_DRUG, CONST.NE_TYPE_ID_UNKNOWN };
try {
uniqueNEs = findUniqueMentions( FSUtil.getAnnotationsInSpan(jcas, MedicationMention.type, begin, end, validNeTypes).toArray());
// FIX ID: 3476114, ID: 3476113, and ID: 3476110
int globalArraySize = uniqueNEs.size()*3;
int [][] windowSpans = new int [globalArraySize][2];
int globalWindowSize = 0;
if (narrativeType) {
for (int neCount = 0; neCount < uniqueNEs.size() ; neCount ++ ) {
boolean processedSpan = false;
MedicationMention neNarrative = (MedicationMention) uniqueNEs.get(neCount);
for (int spanCheck = 0 ; spanCheck < windowSpans.length && !processedSpan && windowSpans[spanCheck][0] != 0; spanCheck ++ ) {
if (windowSpans[spanCheck][0] == neNarrative.getBegin())
processedSpan = true;
}
if (!processedSpan) {
int [][] narrativeSpans = getWindowSpan(jcas, "narrative", MedicationMention.type, neNarrative.getBegin(), neNarrative.getEnd(), false, globalArraySize);
for (int elementCount = 0; elementCount < narrativeSpans.length; elementCount ++ ) {
windowSpans[globalWindowSize] = narrativeSpans[elementCount];
globalWindowSize++;
}
}
}
} else if (uniqueNEs.size() > 0){ // don't bother finding spans if no ne in list
windowSpans = getWindowSpan(jcas, "list", MedicationMention.type, begin, end, false, globalArraySize);
if (windowSpans.length > 0 && windowSpans[0][0] == -1) {
windowSpans[0][0] = begin;
windowSpans[0][1] = end;
}
}
for (int count= 0; count < windowSpans.length; count++) {
List neTokenUpdatedList = getAnnotationsInSpan(jcas,
MedicationMention.type, windowSpans[count][0], windowSpans[count][1]);
if (!neTokenUpdatedList.isEmpty())
{
List globalDrugNERList = new ArrayList();
try
{
generateDrugMentionsAndAnnotations(jcas, neTokenUpdatedList,
windowSpans[count][0], windowSpans[count][1], null, null, 0, globalDrugNERList);
} catch (NumberFormatException nfe)
{
LOGGER.info(nfe.getMessage());
} catch (Exception e)
{
LOGGER.info(e.getMessage());
}
globalDrugNERList.clear();
}
}
} catch (ArrayIndexOutOfBoundsException aioobe) {
allNEs =
FSUtil.getAnnotationsInSpan(jcas, MedicationMention.type, begin, end, validNeTypes);
uniqueNEs = findUniqueMentions(allNEs.toArray());
int lastNL = seg.getEnd();
boolean lastOne = false;
Iterator newLineItr =
FSUtil.getAnnotationsIteratorInSpan(jcas, NewlineToken.type, begin, end);
for (int i = 0; i < uniqueNEs.size(); i++)
{
MedicationMention thisNER = (MedicationMention) uniqueNEs.get(i);
boolean hasNext = false;
if (uniqueNEs.size() > i + 1)
{
nextNER = (MedicationMention) uniqueNEs.get(i + 1);
nextNERPosition = nextNER.getBegin();
if (nextNER != null)
hasNext = true;
} else if (!uniqueNEs.isEmpty())
{
nextNER = (MedicationMention) uniqueNEs.get(i);
nextNERPosition = nextNER.getBegin();
lastOne = true;
}
boolean foundLeftParen = false;
boolean foundRightParen = false;
foundRightParen = findCoveredTextInSpan(jcas, PunctuationToken.type, thisNER.getEnd(), thisNER.getEnd()+3, (new String[]{")","/"}));
if (hasNext && !lastOne)
end = nextNERPosition;
else
end = seg.getEnd();
boolean hasNLEnd = true;
boolean wrapItUp = false;
while (hasNLEnd && !wrapItUp && end <= seg.getEnd()
&& ((begin < end) || (!hasNext && begin <= end) || foundLeftParen))
{
if (begin == end) {
foundLeftParen = false;
end = end+1;
}
NewlineToken nl = null;
if (hasNLEnd && newLineItr.hasNext())
{
nl = (NewlineToken) newLineItr.next();
hasNLEnd = true;
}
if ((!hasNext && begin <= end) || (nextNERPosition < end))
wrapItUp = true;
boolean findNextNL = false;
if (lastNL <= thisNER.getBegin())
{
begin = thisNER.getBegin();
}
if ((nl != null) && (thisNER.getBegin() >= nl.getEnd()))
{
findNextNL = true;
} else if (nl != null)
{
lastNL = nl.getEnd();
}
if (!hasNext)
{
findNextNL = false;
end = seg.getEnd();
}
if (!findNextNL)
{
if ((nextNER != null)
&& (((nextNER.getCoveredText().compareToIgnoreCase(
thisNER.getCoveredText()) == 0) || ((foundRightParen) || nextNER.getBegin() == thisNER.getEnd() + 2))))
{
if (nl == null)
{
if (!hasNext)
end = seg.getEnd();
} else if (nextNER.getBegin() >= nl.getEnd() && hasNext)
{
end = nextNERPosition;
}
} else if (hasNLEnd && hasNext)
{
foundLeftParen = findCoveredTextInSpan(jcas, PunctuationToken.type, nextNER.getBegin()-1, nextNER.getBegin()+1, (new String[]{"(","/"}));
/* if (nl == null && foundLeftParen && !hasNext)
{
end = seg.getEnd();
} else */if (nl != null && nl.getEnd() > nextNER.getBegin()
&& !foundLeftParen)
{
end = nextNERPosition;
} /*else if (foundLeftParen && nl != null)
{
end = nl.getEnd();
} */else
{
end = nextNER.getBegin();
}
} else if (hasNext)
{
end = nextNERPosition;
} else
end = seg.getEnd();
if (begin < end)
{
findDrugAttributesInRange(jcas, begin, end);
//TODO: need to fix - use the list above - uniqueNEs and subset that list instead of getting new list of annotations
List neTokenUpdatedList = getAnnotationsInSpan(jcas, MedicationMention.type, begin, end + 1);
//TODO: 10/28/2010 -- exception
// it seems that this can still happen triggered by either from FSM or a case where the array exceeds the length
if (!neTokenUpdatedList.isEmpty())
{
List globalDrugNERList = new ArrayList();
try
{
generateDrugMentionsAndAnnotations(jcas, neTokenUpdatedList, begin, end, null, null, 0, globalDrugNERList);
} catch (NumberFormatException nfe)
{
LOGGER.info(nfe.getMessage());
} catch (Exception e)
{
LOGGER.info(e.getMessage());
}
globalDrugNERList.clear();
}
}
begin = end;
}
}
}
}
}