scriptators/perl/ae.pl (61 lines of code) (raw):

#!/usr/bin/perl # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. use perltator; use strict; sub initialize { $main::ac = shift; $main::matchString = 'Dave|David|human\s+rights'; $main::thisScript = $main::ac->extractValue("SourceFile"); $main::debug = $main::ac->extractIntegerValue("DebugLevel"); if ($main::debug > 0) { print "$main::thisScript: Using match string = $main::matchString \n"; } } sub typeSystemInit { my $ts = shift; if ($main::debug > 10) { print "$main::thisScript: Type sytem init called" } my $keytype = "com.ibm.uima.examples.keyword"; $main::keywordtype = $ts->getType($keytype); if (!$main::keywordtype->isValid()) { my $error = "$main::thisScript: $keytype is NOT found in type system!"; $main::ac->logError($error); # set eval error to cause annotator to exit prematurely $@ = $error; } } # # the process method is passed two parameters, the CAS and # the ResultsSpecification sub process { my ($tcas, $rs) = @_; if ($main::debug > 10) { print "$main::thisScript: This is a process function\n"; } my $text = $tcas->getDocumentText(); my $index = $tcas->getIndexRepository(); my $annotCount = 0; while ($text =~ m/($main::matchString)/igo) { my $length = length($1); my $end= pos($text); my $fs = $tcas->createAnnotation($main::keywordtype, $end-$length, $end); $index->addFS($fs); $annotCount++; } if ($main::debug > 10) { print "$main::thisScript: created $annotCount annotations\n"; } if ($main::debug > 20) { my $annots = 0; my $iterator = $tcas->getAnnotationIndex($main::keywordtype)->iterator(); while ($iterator->isValid()) { $annots += 1; if ($main::debug > 30) { my $anno = $iterator->get(); my $text = $anno->getCoveredText(); if (length($text)>40) { $text = substr($text,0,20) . "..."; } $text =~ s/\n+/ /g; print "Annotation type=", $main::keywordtype->getName(),": \"$text\"\n"; } $iterator->moveToNext(); } print "$main::thisScript: found $annots annotations\n"; } }