void processInputFiles()

in src/utils/runAECpp.cpp [506:623]


void processInputFiles(AnalysisEngine * pEngine) {
    TyErrorId utErrorId;          // Variable to store UIMACPP return codes
    ErrorInfo errorInfo;          // Variable to stored detailed error info

    int count = 0;

    stringstream str;
    str << endl << "ThreadId: " << apr_os_thread_current();
    str << " runAECpp: Processing started. Number of runs " << numruns 
                << " rdelay " << rdelay << " millis. " ;
    if (randomize)
        str << " Inputs processed in random order. ";
    
    cout << str.str() << endl;
    //uima::ResourceManager::getInstance().getLogger().logMessage(str.str() + " started: " );

    /* Get a new CAS */
    CAS* cas = pEngine->newCAS();
    if (cas == NULL) {
      cerr << "runAECpp: pEngine->newCAS() failed." << endl;
      exit (1);
    }

    /* initialize random seed: */
    srand( time(NULL) + (apr_time_now() % 10000) );

    for (int i=0; i < numruns; i++) {
      stringstream str;
      cout << endl << "ThreadId: " << apr_os_thread_current() << " runAECpp::processing start iteration: " << i << endl;
      //uima::ResourceManager::getInstance().getLogger().logMessage(str.str() );
      /* process input */
      util::DirectoryWalk dirwalker(in.c_str());
      if (dirwalker.isValid()) {
        cout << "ThreadId: " << apr_os_thread_current() << " runAECpp::processing all files in directory: " << in.c_str() << endl;
        util::Filename infile(in.c_str(),"FilenamePlaceHolder");
        if (!randomize) { 
          while (dirwalker.isValid()) {
          // Process all files or just the ones with matching suffix
            if ( dirwalker.isFile() ) {
              infile.setNewName(dirwalker.getNameWithoutPath());
              std::string afile(infile.getAsCString());
            
              stringstream str;
              if (count % 100 == 0 && count > 0)  {
                str << apr_time_now() << " ThreadId: " << apr_os_thread_current() <<  " numProcessed=" << count;
                cerr << str.str() << endl;  
                uima::ResourceManager::getInstance().getLogger().logMessage(str.str() );
              }
              //process the cas
              process(pEngine,cas,afile, out);

              //reset the cas
              cas->reset();
              if (rdelay > 0) {
                int howlong = rand() % rdelay;
                cout << "ThreadId: " << apr_os_thread_current() << " runAECpp::processing sleep for " << howlong << " millis " << endl;
                apr_sleep(howlong*1000);
              }
              count++;
            }
            //get the next input file in the directory
            dirwalker.setToNext();
          }
        } else {
          //construct a list of the input files.
          vector<std::string> filenames;
          while (dirwalker.isValid()) {
             // Process all files or just the ones with matching suffix
             if ( dirwalker.isFile() ) {
              infile.setNewName(dirwalker.getNameWithoutPath());
              filenames.push_back(infile.getAsCString());
             }
             //get the next input file in the directory
             dirwalker.setToNext();
          } 
         
          //how many to process in this run.
          int num = filenames.size();   
          for (int i=0; i < num; i++) {
            //select next file to be processed.   
            int index =   rand() % filenames.size();  //number between 1 and number of files
            
            stringstream str;
            if (count % 100 == 0 && count > 0)  {
              str << apr_time_now() << " ThreadId: " << apr_os_thread_current() <<  " runAECpp::processing numProcessed=" << count;
              cerr << str.str() << endl;  
              uima::ResourceManager::getInstance().getLogger().logMessage(str.str() );
            }

            string afile = filenames.at(index);
            //cout << "ThreadId: " << apr_os_thread_current() << "runAECpp::processing file " << index << " " << afile  << endl;
            //process 
            process(pEngine, cas, afile, out);
            cas->reset();

            //sleep for time specified by rdelay
            if (rdelay > 0) {
              int howlong = rand() % rdelay;
              cout << "ThreadId: " << apr_os_thread_current() << " runAECpp::processing sleep for " << howlong << " millis " << endl;
              apr_sleep(howlong*1000);
            }
            count++;
          }
        }
      } else {
        //process the cas
        process(pEngine,cas, in, out);
      }
      /* call collectionProcessComplete */
      utErrorId = pEngine->collectionProcessComplete();
    }
    /* Free annotator */
    utErrorId = pEngine->destroy();

    delete cas;
    delete pEngine;
    cout << "ThreadId: " << apr_os_thread_current()  << " runAECpp finished processing." << endl;
}