public ParseTreeChunk generalizeTwoGroupedPhrasesRandomSelectHighestScore()

in opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcher.java [135:239]


  public ParseTreeChunk generalizeTwoGroupedPhrasesRandomSelectHighestScore(
      ParseTreeChunk chunk1, ParseTreeChunk chunk2) {
    List<String> pos1 = chunk1.getPOSs();
    List<String> pos2 = chunk2.getPOSs();
    // Map <ParseTreeChunk, Double> scoredResults = new HashMap <ParseTreeChunk,
    // Double> ();
    int timesRepetitiveRun = NUMBER_OF_ITERATIONS;

    double globalScore = -1.0;
    ParseTreeChunk result = null;

    for (int timesRun = 0; timesRun < timesRepetitiveRun; timesRun++) {
      List<String> commonPOS = new ArrayList<>(), commonLemmas = new ArrayList<>();
      int k1 = 0, k2 = 0;
      double score;
      while (k1 < pos1.size() && k2 < pos2.size()) {
        // first check if the same POS
        String sim = posManager.similarPOS(pos1.get(k1), pos2.get(k2));
        String lemmaMatch = lemmaFormManager.matchLemmas(null, chunk1
            .getLemmas().get(k1), chunk2.getLemmas().get(k2), sim);
	Random rand = new Random();
        // if (LemmaFormManager.acceptableLemmaAndPOS(sim, lemmaMatch)){
        if ((sim != null)
            && (lemmaMatch == null || (lemmaMatch != null && !lemmaMatch
                .equals("fail")))) {
          // if (sim!=null){ // && (lemmaMatch!=null &&
          // !lemmaMatch.equals("fail"))){
          commonPOS.add(pos1.get(k1));
          if (chunk1.getLemmas().size() > k1 && chunk2.getLemmas().size() > k2
              && lemmaMatch != null) {
            commonLemmas.add(lemmaMatch);

          } else {
            commonLemmas.add("*");

          }
          k1++;
          k2++;
        } else if (rand.nextDouble() > 0.5) {
          k1++;
        } else {
          k2++;
        }

      }
      ParseTreeChunk currResult = new ParseTreeChunk(commonLemmas, commonPOS,
          0, 0);
      score = parseTreeChunkListScorer.getScore(currResult);
      if (score > globalScore) {
        // System.out.println(chunk1 + "  + \n"+ chunk2 + " = \n" +
        // result+" score = "+ score +"\n\n");
        result = currResult;
        globalScore = score;
      }
    }

    for (int timesRun = 0; timesRun < timesRepetitiveRun; timesRun++) {
      List<String> commonPOS = new ArrayList<>(), commonLemmas = new ArrayList<>();
      int k1 = pos1.size() - 1, k2 = pos2.size() - 1;
      double score;
      while (k1 >= 0 && k2 >= 0) {
        // first check if the same POS
        String sim = posManager.similarPOS(pos1.get(k1), pos2.get(k2));
        String lemmaMatch = lemmaFormManager.matchLemmas(null, chunk1
            .getLemmas().get(k1), chunk2.getLemmas().get(k2), sim);
        // if (acceptableLemmaAndPOS(sim, lemmaMatch)){
        if ((sim != null)
            && (lemmaMatch == null || (lemmaMatch != null && !lemmaMatch
                .equals("fail")))) {
          commonPOS.add(pos1.get(k1));
          if (chunk1.getLemmas().size() > k1 && chunk2.getLemmas().size() > k2
              && lemmaMatch != null) {
            commonLemmas.add(lemmaMatch);
          } else {
            commonLemmas.add("*");

          }
          k1--;
          k2--;
        } else if (Math.random() > 0.5) {
          k1--;
        } else {
          k2--;
        }

      }
      Collections.reverse(commonLemmas);
      Collections.reverse(commonPOS);

      ParseTreeChunk currResult = new ParseTreeChunk(commonLemmas, commonPOS,
          0, 0);
      score = parseTreeChunkListScorer.getScore(currResult);
      if (score > globalScore) {
        // System.out.println(chunk1 + "  + \n"+ chunk2 + " = \n" +
        // currResult+" score = "+ score +"\n\n");
        result = currResult;
        globalScore = score;
      }
    }

    // // System.out.println(chunk1 + "  + \n"+ chunk2 + " = \n" + result
    // +" score = " +
    // // parseTreeChunkListScorer.getScore(result)+"\n\n");
    return result;
  }