scriptators/python/ae.py (50 lines of code) (raw):
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import re
import math
pattern = re.compile('Dave|David|(human\s+rights)', re.IGNORECASE)
# everything in the global namespace is eval'ed during initialization
# by the Pythonnator UIMA annotator
def initialize(annotContext):
global pattern
global source
global debug
global ac
ac = annotContext
source = ac.extractValue("SourceFile")
debug = ac.extractIntegerValue("DebugLevel")
if debug > 0:
print(source + ": initialized")
def typeSystemInit(ts):
global source
global debug
global ac
if debug > 10:
print(source + ": Type sytem init called")
global keywordtype
keywordtype =ts.getType('com.ibm.uima.examples.keyword')
if not keywordtype.isValid():
error = source + ": com.ibm.uima.examples.keyword is NOT found in type system!"
ac.logError(error)
raise Exception(error)
#
# the process method is passed two parameters, the CAS and
# the ResultsSpecification
def process(tcas, rs):
global keywordtype
global source
global debug
global ac
if debug > 10:
print(source + ": This is a process function")
ac.logMessage("process called")
text = tcas.getDocumentText()
index = tcas.getIndexRepository()
iterator = pattern.finditer(text)
annotCount = 0
for match in iterator:
fs = tcas.createAnnotation(keywordtype, match.start(), match.end())
index.addFS(fs)
annotCount += 1
if debug > 0:
print(source + ": created " + str(annotCount) + " annotations")
if debug > 20:
annots = 0
iterator = tcas.getAnnotationIndex(keywordtype).iterator()
while iterator.isValid():
annots += 1
iterator.moveToNext()
print(source + ": found " + str(annots) + " annotations")