private Fragment verifyCandidateSentencesAndFormParagraph()

in opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java [783:877]


	private Fragment verifyCandidateSentencesAndFormParagraph(
					String[] candidateSentences, HitBase item, String fragment, String originalSentence, List<String> sentsAll) {
		Fragment result = null;

		String pageSentence = candidateSentences[0];
		StringBuilder followSent = new StringBuilder();
		for(int i = 1; i< candidateSentences.length; i++)
			followSent.append(candidateSentences[i]);
		String title = item.getTitle();

		// resultant sentence SHOULD NOT be longer than for times the size of
		// snippet fragment
		if (!(pageSentence != null && pageSentence.length()>50) ){
			LOG.debug("Cannot accept the sentence = "+ pageSentence +
							"!(pageSentence != null && pageSentence.length()>50 && (float) pageSentence.length() / (float) fragment.length() < 4.0) )");

			return null;
		}


		try { // get score from syntactic match between sentence in
			// original text and mined sentence
			double measScore, syntScore, mentalScore = 0.0;

			SentencePairMatchResult matchRes = sm.assessRelevance(pageSentence
							+ " " + title, originalSentence);
			List<List<ParseTreeChunk>> match = matchRes.getMatchResult();
			if (match==null || match.size()<1){
				LOG.debug("Rejected Sentence : empty match {}", pageSentence);
				return null;
			}

			if (!matchRes.isVerbExists() || matchRes.isImperativeVerb()) {
				LOG.debug("Rejected Sentence : No verb OR Yes imperative verb: {}", pageSentence);
				return null;
			}

			syntScore = parseTreeChunkListScorer.getParseTreeChunkListScore(match);
			LOG.debug("{} {}\n pre-processed sent = '{}'",parseTreeChunk.listToString(match), syntScore, pageSentence);

			try {
				if (sentsAll!=null && syntScore < RELEVANCE_THRESHOLD){ // 1.5) { // trying other sents
					for (String currSent : sentsAll) {
						if (currSent.startsWith(originalSentence))
							continue;
						match = sm.assessRelevance(currSent, pageSentence).getMatchResult();
						double syntScoreCurr = parseTreeChunkListScorer.getParseTreeChunkListScore(match);
						if (syntScoreCurr > syntScore) {
							syntScore = syntScoreCurr;
						}
					}
					if (syntScore > RELEVANCE_THRESHOLD) {
						LOG.debug("Got match with other sent: {} {}", parseTreeChunk.listToString(match), syntScore);
					}
				}
			} catch (Exception e) {
				LOG.error(e.getLocalizedMessage(), e);
			}

			measScore = STRING_DISTANCE_MEASURER.measureStringDistance(
							originalSentence, pageSentence);


			if ((syntScore > RELEVANCE_THRESHOLD || measScore > 0.5)
							&& measScore < 0.8 && pageSentence.length() > 40) // >70
			{
				String pageSentenceProc = GeneratedSentenceProcessor
								.acceptableMinedSentence(pageSentence);
				if (pageSentenceProc != null) {
					pageSentenceProc = GeneratedSentenceProcessor
									.processSentence(pageSentenceProc);
					followSent = new StringBuilder(GeneratedSentenceProcessor.processSentence(followSent.toString()));
					if (followSent != null) {
						pageSentenceProc += " "+ followSent;
					}

					pageSentenceProc = Utils.convertToASCII(pageSentenceProc);
					result = new Fragment(pageSentenceProc, syntScore + measScore
									+ mentalScore + (double) pageSentenceProc.length() / (double) 50);
					result.setSourceURL(item.getUrl());
					result.fragment = fragment;

					LOG.debug("Accepted sentence: {} | with title = {}", pageSentenceProc, title);
					LOG.debug("For fragment = {}", fragment);
				} else
					LOG.debug("Rejected sentence due to wrong area at webpage: {}", pageSentence);
			} else
				LOG.debug("Rejected sentence due to low score: {}", pageSentence);
			// }
		} catch (Throwable t) {
			LOG.error(t.getLocalizedMessage(), t);
		}

		return result;
	}