public void process()

in OpenCalaisAnnotator/src/main/java/org/apache/uima/annotator/calais/OpenCalaisAnnotator.java [120:213]


  public void process(CAS aCas) throws AnalysisEngineProcessException {

    try {
      String modifiedText = aCas.getDocumentText();
      
      // open connection and send data
      InputStream serviceInputStream = callServiceOnText(modifiedText);

      // result is an XML that contains the RDF XML result
      // first get the RDF XML result out of the returned XML
      DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
      BufferedInputStream in = new BufferedInputStream(serviceInputStream);
      Document feedDoc = docBuilder.parse(in);

      in.close();
      String RdfXmlContent = feedDoc.getDocumentElement().getTextContent();
//      System.out.println(RdfXmlContent);

      // create new InputStream for the RDF XML content
      BufferedInputStream bufByteIn = new BufferedInputStream(new ByteArrayInputStream(
              RdfXmlContent.getBytes(feedDoc.getXmlEncoding())));

      // create SAX handler
      HashMap<String, DescriptionElement> elements = new HashMap<String, DescriptionElement>();
      ArrayList<DescriptionElement> subjectMap = new ArrayList<DescriptionElement>();
      Offset offset = new Offset();
      RDFSaxHandler saxHandler = new RDFSaxHandler(elements, subjectMap, offset);

      // parse RDF XML content returned by the calais service
      this.saxParser.parse(bufByteIn, saxHandler);

      //check offset correction
      String text = aCas.getDocumentText();
      ArrayList<Integer> positionsList = new ArrayList<Integer>();
      int index = -1;
      for(int i = 0; i < this.charsToReplace.length; i++) {
        index = text.indexOf(this.charsToReplace[i]);
        while(index > -1) {
          positionsList.add(index);
          index = text.indexOf(this.charsToReplace[i],index + 1);
        }
      }
     //now the positions list contains all positions where characters have been removed
      Integer[] positions = positionsList.toArray(new Integer[]{});
      
      Arrays.sort(positions);
            
      // analyze entities
      Iterator<DescriptionElement> elementIt = subjectMap.iterator();
      while (elementIt.hasNext()) {
        DescriptionElement element = elementIt.next();
        
        // retrieve subject URL, the subject URL must be equal to an about URL in the elements
        // map to get the type of the current element
        DescriptionElement typeElement = elements.get(element.getSubjectURL());
        String typeURL = typeElement.getTypeURL();
        
        // get current CAS type for the type URL
        Type currentType = this.typeMapping.get(typeURL);
        
        //if mapping is available, create an annotation
        if (currentType != null) {
          // get reference element that contains the annotation span
          int begin = 0;
          if (element.getOffset()>0) {
            begin = element.getOffset() - offset.getOffset() - 1;
          }
          
          //make begin offset correction
          for(int i = 0; i < positions.length; i++) {
            Integer pos = positions[i];
            if(pos < begin) {
              begin++;
            }
          }
                  
          int end = begin + element.getLength();
          // create annotation
          if (end - begin > 0) {
            AnnotationFS annotFs = aCas.createAnnotation(currentType, begin, end);
            annotFs.setStringValue(this.calaisTypeFeat, element.getTypeURL().intern());
            aCas.addFsToIndexes(annotFs);
          }
        }
      }
    } catch (IOException ex) {
      throw new AnalysisEngineProcessException(ex);
    } catch (SAXException ex) {
      throw new AnalysisEngineProcessException(ex);
    } catch (ParserConfigurationException ex) {
      throw new AnalysisEngineProcessException(ex);
    }

  }