private static void run()

in languagetool-http-client/src/main/java/org/languagetool/remote/ArtificialErrorEval.java [319:591]


  private static void run(boolean printSummaryDetails) throws IOException {
    int ignoredLines = 0;
    Arrays.fill(results[0], 0);
    Arrays.fill(results[1], 0);
    fakeRuleIDs[0] = "rules_" + words[0] + "->" + words[1]; // rules in one direction
    fakeRuleIDs[1] = "rules_" + words[1] + "->" + words[0]; // rules in the other direction
    CheckConfiguration config;
    CheckConfigurationBuilder cfgBuilder = new CheckConfigurationBuilder(langCode);
    //cfgBuilder.textSessionID("-2");
    if (enabledOnlyRules.isEmpty()) {
      cfgBuilder.disabledRuleIds("WHITESPACE_RULE");
      if (!disabledRules.isEmpty()) {
        cfgBuilder.disabledRuleIds(disabledRules);
      }
    } else {
      cfgBuilder.enabledRuleIds(enabledOnlyRules).enabledOnly();
    }
    if (!userName.isEmpty() && !apiKey.isEmpty()) {
      cfgBuilder.username(userName).apiKey(apiKey).build();
    }
    config = cfgBuilder.build();
    long start = System.currentTimeMillis();
    List<String> lines = Files.readAllLines(Paths.get(corpusFilePath));
    if (!inflected && !isDoubleLetters && !isDiacritics && !isParallelCorpus) {
      final Pattern p0;
      Matcher mWordBoundaries = pWordboundaries.matcher(words[0]);
      if (mWordBoundaries.matches() && wholeword) {
        p0 = Pattern.compile("\\b" + words[0] + "\\b", Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE);
      } else {
        p0 = Pattern.compile(words[0], Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE);
      }
      final Pattern p1;
      mWordBoundaries = pWordboundaries.matcher(words[1]);
      if (mWordBoundaries.matches() && wholeword) {
        p1 = Pattern.compile("\\b" + words[1] + "\\b", Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE);
      } else {
        p1 = Pattern.compile(words[1], Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE);
      }
      countLine = 0;
      checkedSentences = 0;
      for (String line : lines) {
        cachedMatches = new HashMap<>();
        countLine++;
        if (countLine > maxInputSentences || checkedSentences > maxCheckedSentences) {
          break;
        }
        boolean foundSomething = false;
        if (words[0].length() > 0) {
          Matcher m = p0.matcher(line);
          while (m.find()) {
            foundSomething = true;
            analyzeSentence(line, 0, m.start(), config);
          }
        }
        if (words[1].length() > 0) {
          Matcher m = p1.matcher(line);
          while (m.find()) {
            foundSomething = true;
            analyzeSentence(line, 1, m.start(), config);
          }
        }
        if (!foundSomething) {
          // printSentenceOutput("Ignored, no error", line, "");
        }
      } 
    } 
    if (isParallelCorpus) {
      final Pattern p = Pattern.compile("(.*)__(.*)__(.*)");
      countLine = 0;
      checkedSentences = 0;
      for (String line : lines) {
        cachedMatches = new HashMap<>();
        countLine++;
        if (countLine > maxInputSentences || checkedSentences > maxCheckedSentences) {
          break;
        }
        String[] parts = line.split("\t");
        // adjust the numbers 3 and 4 according to the source file
        if (parts.length < columnCorrect && parts.length < columnIncorrect) {
          continue;
        }
        String correctSource = parts[columnCorrect - 1];
        String incorrectSource = parts[columnIncorrect - 1];
        words[0] = null;
        words[1] = null;
        /*String correctSentence = "";
        String incorrectSentence = "";
        /*Matcher mIncorrect = p.matcher(incorrectSource);
        if (mIncorrect.matches()) {
          words[0] = mIncorrect.group(2);
        }
        int posError = -1;
        Matcher mCorrect = p.matcher(correctSource);
        if (mCorrect.matches()) {
          words[1] = mCorrect.group(2);
          correctSentence = mCorrect.group(1) + mCorrect.group(2) + mCorrect.group(3);
          posError = mCorrect.group(1).length();
        }*/
        String correctSentence = correctSource.replace("__", "");
        String incorrectSentence = incorrectSource.replace("__", "");
        if (correctSentence.equals(incorrectSentence)) {
          printSentenceOutput("IGNORED LINE: sentences are identical!", correctSource, 0, "");
          ignoredLines++;
          continue;
        }
        List<String> diffs = StringTools.getDifference(correctSentence, incorrectSentence);
        int posError = diffs.get(0).length();
        words[1] = diffs.get(1);
        words[0] = diffs.get(2);
        if (words[1] != null) {
          // words[0] may be null!
          // check FN
          analyzeSentence(correctSentence, 1, posError, config);
          // check FP in the correct sentence
          words[0] = words[1];
          words[1] = null;
          analyzeSentence(correctSentence, 0, posError, config);
        }
      }
    }
    if (isDoubleLetters) {
      // introduce error: nn -> n
      fakeRuleIDs[0] = "rules_double_letters";
      countLine = 0;
      checkedSentences = 0;
      final Pattern p1 = Pattern.compile("([a-zA-Z])\\1+");
      for (String line : lines) {
        cachedMatches = new HashMap<>();
        countLine++;
        if (countLine > maxInputSentences || checkedSentences > maxCheckedSentences) {
          break;
        }
        Matcher m = p1.matcher(line);
        while (m.find()) {
          words[1] = m.group(0);
          words[0] = words[1].substring(0, 1); 
          analyzeSentence(line, 1, m.start(), config);
        }
      }
    }
    if (isDiacritics) {
      // check missing diacritics 
      countLine = 0;
      checkedSentences = 0;
      for (String line : lines) {
        cachedMatches = new HashMap<>();
        countLine++;
        if (countLine > maxInputSentences || checkedSentences > maxCheckedSentences) {
          break;
        }
        List<String> tokens = language.getWordTokenizer().tokenize(line);
        int pos = 0;
        for (String token: tokens) {
          if (StringTools.hasDiacritics(token)) {
            words[1] = token;
            words[0] = StringTools.removeDiacritics(token);
            analyzeSentence(line, 1, pos, config);
          }
          pos += token.length();
        }
      }
    }
    if (inflected) {
      // search lemma
      countLine = 0;
      checkedSentences = 0;
      for (String line : lines) {
        cachedMatches = new HashMap<>();
        countLine++;
        if (countLine > maxInputSentences || checkedSentences > maxCheckedSentences) {
          break;
        }
        List<AnalyzedSentence> analyzedSentences = localLt.analyzeText(line);
        boolean foundSomething = false;
        for (AnalyzedSentence analyzedSentence: analyzedSentences) {
          for (AnalyzedTokenReadings token : analyzedSentence.getTokensWithoutWhitespace()) {
            if (lemmas[0].length() > 0) {
              if (token.hasLemma(lemmas[0])) {
                words[0] = token.getToken();
                AnalyzedToken atr1 = token.readingWithLemma(lemmas[0]);
                AnalyzedToken atr2 = new AnalyzedToken(atr1.getToken(), atr1.getPOSTag(), lemmas[1]);
                String[] syntheziedWords = synth.synthesize(atr2, atr2.getPOSTag());
                words[1] = syntheziedWords[0];
                foundSomething = true;
                analyzeSentence(line, 0, token.getStartPos(), config);
              }
            }
            if (lemmas[1].length() > 0) {
              if (token.hasLemma(lemmas[1])) {
                words[1] = token.getToken();
                AnalyzedToken atr1 = token.readingWithLemma(lemmas[1]);
                AnalyzedToken atr2 = new AnalyzedToken(atr1.getToken(), atr1.getPOSTag(), lemmas[0]);
                String[] syntheziedWords = synth.synthesize(atr2, atr2.getPOSTag());
                words[0] = syntheziedWords[0];
                foundSomething = true;
                analyzeSentence(line, 1, token.getStartPos(), config);
              }
            }
          }
        }
      }
    }
    
    // print results
    int oneOrTwo = (unidirectional ? 1 : 2);
    for (int i = 0; i < oneOrTwo; i++) {
      float precision = results[i][classifyTypes.indexOf("TP")]
          / (float) (results[i][classifyTypes.indexOf("TP")] + results[i][classifyTypes.indexOf("FP")]);
      float recall = results[i][classifyTypes.indexOf("TP")]
          / (float) (results[i][classifyTypes.indexOf("TP")] + results[i][classifyTypes.indexOf("FN")] 
              + results[i][classifyTypes.indexOf("TPns")] + results[i][classifyTypes.indexOf("TPws")]);
      // recall including empty suggestions
      float recall2 = (results[i][classifyTypes.indexOf("TP")] + results[i][classifyTypes.indexOf("TPns")])
          / (float) (results[i][classifyTypes.indexOf("TP")] + results[i][classifyTypes.indexOf("FN")]
              + results[i][classifyTypes.indexOf("TPns")] + results[i][classifyTypes.indexOf("TPws")]);
      //float expectedSuggestionPercentage = (float) results[i][classifyTypes.indexOf("TPs")]
      //    / results[i][classifyTypes.indexOf("TP")];
      int errorsTotal = results[i][classifyTypes.indexOf("TP")] + results[i][classifyTypes.indexOf("FP")]
          + results[i][classifyTypes.indexOf("TN")] + results[i][classifyTypes.indexOf("FN")] + results[i][classifyTypes.indexOf("TPns")]
          + results[i][classifyTypes.indexOf("TPws")];
      StringWriter resultsString = new StringWriter();

      resultsString.append("-------------------------------------\n");
      resultsString.append("Results for " + fakeRuleIDs[i] + "\n");
      
      int nCorrectSentences =  results[i][1] + results[i][2] ; // FP + TN
      int nIncorrectSentences =  results[i][0] + results[i][4] + results[i][5] + results[i][3]; // TP + TPns + TPws + FN  
      
      resultsString.append("Total sentences: " + String.valueOf(errorsTotal) + "\n");
      resultsString.append(formattedAbsoluteAndPercentage("\nCorrect sentences", nCorrectSentences, nCorrectSentences + nIncorrectSentences));
      resultsString.append(formattedAbsoluteAndPercentage("FP", results[i][1], nCorrectSentences));
      resultsString.append(formattedAbsoluteAndPercentage("TN", results[i][2], nCorrectSentences));
      
      resultsString.append(formattedAbsoluteAndPercentage("\nIncorrect sentences", nIncorrectSentences, nCorrectSentences + nIncorrectSentences));
      resultsString.append(formattedAbsoluteAndPercentage("TP (total)", results[i][4] + results[i][5] + results[i][0], nIncorrectSentences));
      resultsString.append(formattedAbsoluteAndPercentage(" TP (expected suggestion)", results[i][0], nIncorrectSentences));
      resultsString.append(formattedAbsoluteAndPercentage(" TPns (no suggestion)", results[i][4], nIncorrectSentences));
      resultsString.append(formattedAbsoluteAndPercentage(" TPws (wrong suggestion)", results[i][5], nIncorrectSentences));
      resultsString.append(formattedAbsoluteAndPercentage("FN", results[i][3], nIncorrectSentences));

      resultsString.append("\nPrecision: " + String.format(Locale.ROOT, "%.4f", precision) + "\n");
      resultsString.append("Recall: " + String.format(Locale.ROOT, "%.4f", recall) + "\n");
      resultsString.append("Recall (including empty suggestions): " + String.format(Locale.ROOT, "%.4f", recall2) + "\n");
      
      if (ignoredLines > 0) {
        resultsString.append("\nIgnored lines from source: " + ignoredLines + "\n");
      }
      
      resultsString.append(printTimeFromStart(start, ""));
      resultsString.append("\n" + printCurrentDateTime() + "\n");
      appendToFile(verboseOutputFilename, resultsString.toString());
      
      if (printSummaryDetails) {
          appendToFile(summaryOutputFilename, errorCategory + "\t" + fakeRuleIDs[i]
                  + "\t" + errorsTotal + "\t" + String.format(Locale.ROOT, "%.4f", precision) + "\t" + String.format(Locale.ROOT, "%.4f", recall) + "\t"
                  + results[i][classifyTypes.indexOf("TP")] + "\t"
                  + results[i][classifyTypes.indexOf("FP")] + "\t"
                  + results[i][classifyTypes.indexOf("TN")] + "\t"
                  + results[i][classifyTypes.indexOf("FN")] + "\t"
                  + results[i][classifyTypes.indexOf("TPns")] + "\t"
                  + results[i][classifyTypes.indexOf("TPws")] + "\t");
      }
      
      accumulateResults[0] += errorsTotal;
      accumulateResults[1] += results[i][classifyTypes.indexOf("TP")];
      accumulateResults[2] += results[i][classifyTypes.indexOf("FP")];
      accumulateResults[3] += results[i][classifyTypes.indexOf("TN")];
      accumulateResults[4] += results[i][classifyTypes.indexOf("FN")];
      
    }
    System.out.println(printTimeFromStart(start, ""));
    System.out.println("-------------------------------------");
  }