in opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcher.java [135:239]
public ParseTreeChunk generalizeTwoGroupedPhrasesRandomSelectHighestScore(
ParseTreeChunk chunk1, ParseTreeChunk chunk2) {
List<String> pos1 = chunk1.getPOSs();
List<String> pos2 = chunk2.getPOSs();
// Map <ParseTreeChunk, Double> scoredResults = new HashMap <ParseTreeChunk,
// Double> ();
int timesRepetitiveRun = NUMBER_OF_ITERATIONS;
double globalScore = -1.0;
ParseTreeChunk result = null;
for (int timesRun = 0; timesRun < timesRepetitiveRun; timesRun++) {
List<String> commonPOS = new ArrayList<>(), commonLemmas = new ArrayList<>();
int k1 = 0, k2 = 0;
double score;
while (k1 < pos1.size() && k2 < pos2.size()) {
// first check if the same POS
String sim = posManager.similarPOS(pos1.get(k1), pos2.get(k2));
String lemmaMatch = lemmaFormManager.matchLemmas(null, chunk1
.getLemmas().get(k1), chunk2.getLemmas().get(k2), sim);
Random rand = new Random();
// if (LemmaFormManager.acceptableLemmaAndPOS(sim, lemmaMatch)){
if ((sim != null)
&& (lemmaMatch == null || (lemmaMatch != null && !lemmaMatch
.equals("fail")))) {
// if (sim!=null){ // && (lemmaMatch!=null &&
// !lemmaMatch.equals("fail"))){
commonPOS.add(pos1.get(k1));
if (chunk1.getLemmas().size() > k1 && chunk2.getLemmas().size() > k2
&& lemmaMatch != null) {
commonLemmas.add(lemmaMatch);
} else {
commonLemmas.add("*");
}
k1++;
k2++;
} else if (rand.nextDouble() > 0.5) {
k1++;
} else {
k2++;
}
}
ParseTreeChunk currResult = new ParseTreeChunk(commonLemmas, commonPOS,
0, 0);
score = parseTreeChunkListScorer.getScore(currResult);
if (score > globalScore) {
// System.out.println(chunk1 + " + \n"+ chunk2 + " = \n" +
// result+" score = "+ score +"\n\n");
result = currResult;
globalScore = score;
}
}
for (int timesRun = 0; timesRun < timesRepetitiveRun; timesRun++) {
List<String> commonPOS = new ArrayList<>(), commonLemmas = new ArrayList<>();
int k1 = pos1.size() - 1, k2 = pos2.size() - 1;
double score;
while (k1 >= 0 && k2 >= 0) {
// first check if the same POS
String sim = posManager.similarPOS(pos1.get(k1), pos2.get(k2));
String lemmaMatch = lemmaFormManager.matchLemmas(null, chunk1
.getLemmas().get(k1), chunk2.getLemmas().get(k2), sim);
// if (acceptableLemmaAndPOS(sim, lemmaMatch)){
if ((sim != null)
&& (lemmaMatch == null || (lemmaMatch != null && !lemmaMatch
.equals("fail")))) {
commonPOS.add(pos1.get(k1));
if (chunk1.getLemmas().size() > k1 && chunk2.getLemmas().size() > k2
&& lemmaMatch != null) {
commonLemmas.add(lemmaMatch);
} else {
commonLemmas.add("*");
}
k1--;
k2--;
} else if (Math.random() > 0.5) {
k1--;
} else {
k2--;
}
}
Collections.reverse(commonLemmas);
Collections.reverse(commonPOS);
ParseTreeChunk currResult = new ParseTreeChunk(commonLemmas, commonPOS,
0, 0);
score = parseTreeChunkListScorer.getScore(currResult);
if (score > globalScore) {
// System.out.println(chunk1 + " + \n"+ chunk2 + " = \n" +
// currResult+" score = "+ score +"\n\n");
result = currResult;
globalScore = score;
}
}
// // System.out.println(chunk1 + " + \n"+ chunk2 + " = \n" + result
// +" score = " +
// // parseTreeChunkListScorer.getScore(result)+"\n\n");
return result;
}