private void run()

in languagetool-wikipedia/src/main/java/org/languagetool/dev/dumpcheck/SentenceSourceChecker.java [152:304]


  private void run(File propFile, Set<String> disabledRules, String langCode, String motherTongueCode,
                   int maxSentences, int maxErrors, int contextSize,
                   CommandLine options) throws IOException {
    long startTime = System.currentTimeMillis();
    String[] ruleIds = options.hasOption('r') ? options.getOptionValue('r').split(",") : null;
    String[] additionalCategoryIds = options.hasOption("also-enable-categories") ? options.getOptionValue("also-enable-categories").split(",") : null;
    String[] fileNames = options.getOptionValues('f');
    File languageModelDir = options.hasOption("languagemodel") ? new File(options.getOptionValue("languagemodel")) : null;
    File remoteRules = options.hasOption("remoterules") ? new File(options.getOptionValue("remoterules")) : null;
    Pattern filter = options.hasOption("filter") ? Pattern.compile(options.getOptionValue("filter")) : null;
    String ruleSource = options.hasOption("rulesource") ? options.getOptionValue("rulesource") : null;
    int sentencesToSkip = options.hasOption("skip") ? Integer.parseInt(options.getOptionValue("skip")) : 0;
    Language lang = Languages.getLanguageForShortCode(langCode);
    Language motherTongue = motherTongueCode != null ? Languages.getLanguageForShortCode(motherTongueCode) : null;
    GlobalConfig globalConfig = new GlobalConfig();
    System.out.println("Premium: " + Premium.isPremiumVersion());
    if (options.hasOption("nerUrl")) {
      System.out.println("Using NER service: " + options.getOptionValue("nerUrl"));
      globalConfig.setNERUrl(options.getOptionValue("nerUrl"));
    }
    if (options.hasOption("print-correct")) {
      System.out.println("In print-correct mode, will only print sentences for which no error is found.");
    }
    MultiThreadedJLanguageTool lt = new MultiThreadedJLanguageTool(lang, motherTongue, -1, globalConfig, null);
    lt.setCleanOverlappingMatches(false);
    if (languageModelDir != null) {
      lt.activateLanguageModelRules(languageModelDir);
    }
    int activatedBySource = 0;
    for (Rule rule : lt.getAllRules()) {
      if (rule.isDefaultTempOff()) {
        System.out.println("Activating " + rule.getFullId() + ", which is default='temp_off'");
        lt.enableRule(rule.getId());
      }
      if (ruleSource != null) {
        boolean enable = false;
        if (rule instanceof AbstractPatternRule) {
          String sourceFile = rule.getSourceFile();
          if (sourceFile != null && sourceFile.endsWith("/" + ruleSource) && !rule.isDefaultOff()) {
            enable = true;
            activatedBySource++;
          }
        }
        if (enable) {
          lt.enableRule(rule.getId());
        } else {
          lt.disableRule(rule.getId());
        }
      }
    }
    lt.activateRemoteRules(remoteRules);
    if (ruleSource == null) {
      if (ruleIds != null) {
        enableOnlySpecifiedRules(ruleIds, lt);
      } else {
        applyRuleDeactivation(lt, disabledRules);
      }
    } else {
      System.out.println("Activated " + activatedBySource + " rules from " + ruleSource);
    }
    if (filter != null) {
      System.out.println("*** NOTE: only sentences that match regular expression '" + filter + "' will be checked");
    }
    activateAdditionalCategories(additionalCategoryIds, lt);
    if (options.hasOption("spelling")) {
      System.out.println("Spelling rules active: yes (only if you're using a language code like en-US which comes with spelling)");
    } else if (ruleIds == null) {
      disableSpellingRules(lt);
      System.out.println("Spelling rules active: no");
    }
    System.out.println("Working on: " + StringUtils.join(fileNames, ", "));
    System.out.println("Sentence limit: " + (maxSentences > 0 ? maxSentences : "no limit"));
    System.out.println("Context size: " + contextSize);
    System.out.println("Error limit: " + (maxErrors > 0 ? maxErrors : "no limit"));
    System.out.println("Skip: " + sentencesToSkip);
    //System.out.println("Version: " + JLanguageTool.VERSION + " (" + JLanguageTool.BUILD_DATE + ")");

    ResultHandler resultHandler = null;
    int ruleMatchCount = 0;
    int sentenceCount = 0;
    int skipCount = 0;
    int ignoredCount = 0;
    boolean skipMessageShown = false;
    try {
      if (options.hasOption("csv"))  {
        resultHandler = new CSVHandler(maxSentences, maxErrors);
      } else if (propFile != null) {
        resultHandler = new DatabaseHandler(propFile, maxSentences, maxErrors);
      } else {
        resultHandler = new StdoutHandler(maxSentences, maxErrors, contextSize, options.hasOption("verbose"));
      }
      MixingSentenceSource mixingSource = MixingSentenceSource.create(Arrays.asList(fileNames), lang, filter);
      while (mixingSource.hasNext()) {
        Sentence sentence = mixingSource.next();
        if (sentencesToSkip > 0 && skipCount < sentencesToSkip) {
          if (skipCount % 5000 == 0) {
            System.err.printf("%s sentences skipped...\n", NumberFormat.getNumberInstance(Locale.US).format(skipCount));
          }
          skipCount++;
          continue;
        } else if (sentencesToSkip > 0 && !skipMessageShown) {
          System.err.println("Done skipping " + sentencesToSkip + " sentences.");
          skipMessageShown = true;
        }
        try {
          AnnotatedText annotatedText = new AnnotatedTextBuilder().addText(sentence.getText()).build();
          CheckResults matches = lt.check2(annotatedText, true, JLanguageTool.ParagraphHandling.NORMAL, null,
            JLanguageTool.Mode.ALL, JLanguageTool.Level.PICKY, new HashSet<>(Arrays.asList(ToneTag.values())), null);
          if (options.hasOption("print-correct")) {
            if (matches.getRuleMatches().size() == 0) {
              System.out.println(sentence.getText());
            }
          } else {
            resultHandler.handleResult(sentence, matches.getRuleMatches(), lang);
          }
          sentenceCount++;
          if (sentenceCount % 5000 == 0) {
            System.err.printf("%s sentences checked...\n", NumberFormat.getNumberInstance(Locale.US).format(sentenceCount));
          }
          ruleMatchCount += matches.getRuleMatches().size();
        } catch (DocumentLimitReachedException | ErrorLimitReachedException e) {
          throw e;
        } catch (Exception e) {
          if (options.hasOption("skip-exceptions")) {
            e.printStackTrace();
          } else {
            throw new RuntimeException("Check failed on sentence: " + StringUtils.abbreviate(sentence.getText(), 250), e);
          }
        }
      }
      ignoredCount = mixingSource.getIgnoredCount();
    } catch (DocumentLimitReachedException | ErrorLimitReachedException e) {
      System.out.println(getClass().getSimpleName() + ": " + e);
    } finally {
      lt.shutdown();
      if (resultHandler != null) {
        System.out.printf(lang + ": %d total matches\n", ruleMatchCount);
        System.out.printf(lang + ": %d total sentences considered\n", sentenceCount);
        float matchesPerSentence = (float)ruleMatchCount / sentenceCount;
        System.out.printf(Locale.ENGLISH, lang + ": ø%.2f rule matches per sentence\n", matchesPerSentence);
        System.out.printf(Locale.ENGLISH, lang + ": %d input lines ignored (e.g. not between %d and %d chars or at least %d tokens)\n", ignoredCount, 
          SentenceSource.MIN_SENTENCE_LENGTH, SentenceSource.MAX_SENTENCE_LENGTH, SentenceSource.MIN_SENTENCE_TOKEN_COUNT);
        if (options.hasOption("print-duration")) {
          System.out.println("The analysis took " + (System.currentTimeMillis() - startTime) + "ms");
        }
        try {
          resultHandler.close();
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
    }
  }