in opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java [283:364]
private Fragment verifyCandidateSentencesAndFormParagraph(
String[] candidateSentences, HitBase item, String fragment, String originalSentence, List<String> sentsAll) {
Fragment result = null;
String pageSentence = candidateSentences[0];
StringBuilder followSent = new StringBuilder();
for(int i = 1; i< candidateSentences.length; i++)
followSent.append(candidateSentences[i]);
String title = item.getTitle();
// resultant sentence SHOULD NOT be longer than for times the size of
// snippet fragment
if (!(pageSentence != null && pageSentence.length()>50
&& (float) pageSentence.length() / (float) fragment.length() < 4.0) )
return null;
try { // get score from syntactic match between sentence in
// original text and mined sentence
double measScore, syntScore, mentalScore = 0.0;
SentencePairMatchResult matchRes = sm.assessRelevance(pageSentence
+ " " + title, originalSentence);
List<List<ParseTreeChunk>> match = matchRes.getMatchResult();
if (!matchRes.isVerbExists() || matchRes.isImperativeVerb()) {
System.out.println("Rejected Sentence : No verb OR Yes imperative verb :" + pageSentence);
return null;
}
syntScore = parseTreeChunkListScorer.getParseTreeChunkListScore(match);
LOG.debug("{} {}\n pre-processed sent = '{}'", parseTreeChunk.listToString(match), syntScore, pageSentence);
if (syntScore < RELEVANCE_THRESHOLD){ // 1.5) { // trying other sents
for (String currSent : sentsAll) {
if (currSent.startsWith(originalSentence))
continue;
match = sm.assessRelevance(currSent, pageSentence).getMatchResult();
double syntScoreCurr = parseTreeChunkListScorer.getParseTreeChunkListScore(match);
if (syntScoreCurr > syntScore) {
syntScore = syntScoreCurr;
}
}
if (syntScore > RELEVANCE_THRESHOLD) {
System.out.println("Got match with other sent: "
+ parseTreeChunk.listToString(match) + " " + syntScore);
}
}
measScore = STRING_DISTANCE_MEASURER.measureStringDistance(originalSentence, pageSentence);
if ((syntScore > RELEVANCE_THRESHOLD || measScore > 0.5)
&& measScore < 0.8 && pageSentence.length() > 40) // >70
{
String pageSentenceProc = GeneratedSentenceProcessor
.acceptableMinedSentence(pageSentence);
if (pageSentenceProc != null) {
pageSentenceProc = GeneratedSentenceProcessor
.processSentence(pageSentenceProc);
followSent = new StringBuilder(GeneratedSentenceProcessor.processSentence(followSent.toString()));
if (followSent != null) {
pageSentenceProc += " "+ followSent;
}
pageSentenceProc = Utils.convertToASCII(pageSentenceProc);
result = new Fragment(pageSentenceProc, syntScore + measScore
+ mentalScore + (double) pageSentenceProc.length() / (double) 50);
result.setSourceURL(item.getUrl());
result.fragment = fragment;
LOG.debug("Accepted sentence: {} | with title = {}", pageSentenceProc, title);
LOG.debug("For fragment = {}", fragment);
} else
LOG.debug("Rejected sentence due to wrong area at webpage: {}", pageSentence);
} else
LOG.debug("Rejected sentence due to low score: {}", pageSentence);
// }
} catch (Throwable t) {
LOG.error(t.getLocalizedMessage(), t);
}
return result;
}