public void runProcessingThread()

in uimaj-tools/src/main/java/org/apache/uima/tools/docanalyzer/DocumentAnalyzer.java [1262:1450]


  public void runProcessingThread(File inputDir, String inputFileFormat, Boolean lenient,
          File outputDir, File aeSpecifierFile, String xmlTag, String language, String encoding) {
    try {
      // create and configure collection reader that will read input docs
      CollectionReaderDescription collectionReaderDesc = FileSystemCollectionReader
              .getDescription();
      ConfigurationParameterSettings paramSettings = collectionReaderDesc.getMetaData()
              .getConfigurationParameterSettings();
      paramSettings.setParameterValue(FileSystemCollectionReader.PARAM_INPUTDIR,
              inputDir.getAbsolutePath());
      paramSettings.setParameterValue(FileSystemCollectionReader.PARAM_XCAS, inputFileFormat);
      paramSettings.setParameterValue(FileSystemCollectionReader.PARAM_LENIENT,
              lenient ? "true" : "false");
      paramSettings.setParameterValue(FileSystemCollectionReader.PARAM_LANGUAGE, language);
      paramSettings.setParameterValue(FileSystemCollectionReader.PARAM_ENCODING, encoding);
      collectionReader = (FileSystemCollectionReader) UIMAFramework
              .produceCollectionReader(collectionReaderDesc);

      // show progress Monitor
      String progressMsg = "  Processing " + collectionReader.getNumberOfDocuments()
              + " Documents.";

      numDocs = collectionReader.getNumberOfDocuments();
      progressMonitor = new ProgressMonitor(DocumentAnalyzer.this, progressMsg, "", 0, numDocs + 2);
      String initial = "Initializing.... Please wait ";
      progressMonitor.setNote(initial);
      progressMonitor.setMillisToPopup(-1);
      progressMonitor.setMillisToDecideToPopup(-1);
      numDocsProcessed = 0;
      progressTimer.start();

      // set wait cursor
      setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));

      // Disable frame while processing:
      setEnabled(false);

      // create CPM instance that will drive processing
      mCPM = UIMAFramework.newCollectionProcessingManager();

      // read AE descriptor from file
      XMLInputSource in = new XMLInputSource(aeSpecifierFile);
      ResourceSpecifier aeSpecifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);

      // create and configure CAS consumer that will write the output (in
      // XMI format)
      CasConsumerDescription casConsumerDesc = XmiWriterCasConsumer.getDescription();
      ConfigurationParameterSettings consumerParamSettings = casConsumerDesc.getMetaData()
              .getConfigurationParameterSettings();
      consumerParamSettings.setParameterValue(XmiWriterCasConsumer.PARAM_OUTPUTDIR,
              outputDir.getAbsolutePath());
      // declare uima.cas.TOP as an input so that ResultSpec on user's AE will be set to produce all
      // types
      casConsumerDesc.getCasConsumerMetaData().getCapabilities()[0].addInputType("uima.cas.TOP",
              true);

      // if XML tag was specified, also create XmlDetagger annotator that handles this
      AnalysisEngineDescription xmlDetaggerDesc = null;
      if (xmlTag != null && xmlTag.length() > 0) {
        xmlDetaggerDesc = XmlDetagger.getDescription();
        ConfigurationParameterSettings xmlDetaggerParamSettings = xmlDetaggerDesc.getMetaData()
                .getConfigurationParameterSettings();
        xmlDetaggerParamSettings.setParameterValue(XmlDetagger.PARAM_TEXT_TAG, xmlTag);
        usingXmlDetagger = true;
      } else {
        usingXmlDetagger = false;
      }

      // create an aggregate AE that includes the XmlDetagger (if needed), followed by
      // th user's AE descriptor, followed by the XMI Writer CAS Consumer, using fixed flow.
      // We use an aggregate AE here, rather than just adding the CAS Consumer to the CPE, so
      // that we can support the user's AE being a CAS Multiplier and we can specify sofa mappings.
      AnalysisEngineDescription aggDesc = UIMAFramework.getResourceSpecifierFactory()
              .createAnalysisEngineDescription();
      aggDesc.setPrimitive(false);
      aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put("UserAE", aeSpecifier);
      aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put("XmiWriter", casConsumerDesc);
      FixedFlow flow = UIMAFramework.getResourceSpecifierFactory().createFixedFlow();

      if (xmlDetaggerDesc != null) {
        aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put("XmlDetagger",
                xmlDetaggerDesc);
        flow.setFixedFlow(new String[] { "XmlDetagger", "UserAE", "XmiWriter" });

        // to run XmlDetagger we need sofa mappings
        // XmlDetagger's "xmlDocument" input sofa gets mapped to the default sofa
        SofaMapping sofaMapping1 = UIMAFramework.getResourceSpecifierFactory().createSofaMapping();
        sofaMapping1.setComponentKey("XmlDetagger");
        sofaMapping1.setComponentSofaName("xmlDocument");
        sofaMapping1.setAggregateSofaName(CAS.NAME_DEFAULT_SOFA);

        // for UserAE and XmiWriter, may default sofa to the "plainTextDocument" produced by the
        // XmlDetagger
        SofaMapping sofaMapping2 = UIMAFramework.getResourceSpecifierFactory().createSofaMapping();
        sofaMapping2.setComponentKey("UserAE");
        sofaMapping2.setAggregateSofaName("plainTextDocument");
        SofaMapping sofaMapping3 = UIMAFramework.getResourceSpecifierFactory().createSofaMapping();
        sofaMapping3.setComponentKey("XmiWriter");
        sofaMapping3.setAggregateSofaName("plainTextDocument");

        aggDesc.setSofaMappings(new SofaMapping[] { sofaMapping1, sofaMapping2, sofaMapping3 });
      } else {
        // no XML detagger needed in the aggregate in flow
        flow.setFixedFlow(new String[] { "UserAE", "XmiWriter" });
      }

      aggDesc.getAnalysisEngineMetaData().setName("DocumentAnalyzerAE");
      aggDesc.getAnalysisEngineMetaData().setFlowConstraints(flow);
      aggDesc.getAnalysisEngineMetaData().getOperationalProperties()
              .setMultipleDeploymentAllowed(false);
      progressMonitor.setProgress(1);

      // instantiate AE
      // keep this a local variable - so it doesn't hang on to the ae object
      // preventing gc (some ae objects are huge)
      AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(aggDesc);
      mCPM.setAnalysisEngine(ae);

      progressMonitor.setProgress(2);

      // register callback listener
      mCPM.addStatusCallbackListener(DocumentAnalyzer.this);

      // create a CAS, including all types from all components, that
      // we'll later use for deserializing XCASes
      List descriptorList = new ArrayList();
      descriptorList.add(collectionReaderDesc);
      descriptorList.add(ae.getMetaData());
      descriptorList.add(casConsumerDesc);
      cas = CasCreationUtils.createCas(descriptorList);
      currentTypeSystem = cas.getTypeSystem();

      // save AE output types for later use in configuring viewer
      if (aeSpecifier instanceof AnalysisEngineDescription) {
        ArrayList<String> outputTypeList = new ArrayList<>();
        Capability[] capabilities = ((AnalysisEngineDescription) aeSpecifier)
                .getAnalysisEngineMetaData().getCapabilities();
        for (int i = 0; i < capabilities.length; i++) {
          TypeOrFeature[] outputs = capabilities[i].getOutputs();
          for (int j = 0; j < outputs.length; j++) {
            if (outputs[j].isType()) {
              outputTypeList.add(outputs[j].getName());
              // also add subsumed types
              // UIMA-2565 - Clash btw. cas.Type and Window.Type on JDK 7
              org.apache.uima.cas.Type t = currentTypeSystem.getType(outputs[j].getName());
              if (t != null) {
                List<org.apache.uima.cas.Type> subsumedTypes = currentTypeSystem
                        .getProperlySubsumedTypes(t);
                Iterator<org.apache.uima.cas.Type> it = subsumedTypes.iterator();
                while (it.hasNext()) {
                  outputTypeList.add(it.next().getName());
                }
              }
            }
          }
        }
        // always allow viewing document annotation
        outputTypeList.add("uima.tcas.DocumentAnnotation");
        currentTaeOutputTypes = new String[outputTypeList.size()];
        outputTypeList.toArray(currentTaeOutputTypes);
      } else {
        currentTaeOutputTypes = null; // indicates all types should be
        // selected
      }

      // Process (in separate thread)
      mCPM.process(collectionReader);

      // if the user has already clicked cancel, call the
      // runner.terminate() immediately.
      if (progressMonitor.isCanceled()) {
        mCPM.stop();
        progressMonitor.close();
      }

    } catch (Throwable t) {
      // special check for using XML detagger with remotes, which will generate an error
      // since sofa mappings aren't supported for remotes
      if (usingXmlDetagger && (t instanceof UIMAException) && ((UIMAException) t).hasMessageKey(
              ResourceInitializationException.SOFA_MAPPING_NOT_SUPPORTED_FOR_REMOTE)) {
        displayError(
                "The XML detagging feature is not supported for remote Analysis Engines or for Aggregates containing remotes.  "
                        + "If you are running a remote Analysis Engine the \"XML Tag Containing Text\" field must be left blank.");
      } else {
        displayError(t);
      }
      aborted();
    }
  }