public static List buildSearchEngineQueryFromSentence()

in opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java [196:267]


	public static List<String> buildSearchEngineQueryFromSentence(String sentence) {
		ParserChunker2MatcherProcessor pos = ParserChunker2MatcherProcessor.getInstance();

		List<ParseTreeChunk> nPhrases = pos.formGroupedPhrasesFromChunksForSentence(sentence).get(0);
		List<String> queryArrayStr = new ArrayList<>();
		for (ParseTreeChunk ch : nPhrases) {
			StringBuilder query = new StringBuilder();
			int size = ch.getLemmas().size();

			for (int i = 0; i < size; i++) {
				if (ch.getPOSs().get(i).startsWith("N")
								|| ch.getPOSs().get(i).startsWith("J")) {
					query.append(ch.getLemmas().get(i)).append(" ");
				}
			}
			query = new StringBuilder(query.toString().trim());
			int len = query.toString().split("\\s+").length;
			if (len < 2 || len > 5)
				continue;
			if (len < 4) { // every word should start with capital
				String[] qs = query.toString().split("\\s+");
				boolean bAccept = true;
				for (String w : qs) {
					if (w.toLowerCase().equals(w)) // idf only two words then
					// has to be person name,
					// title or geolocation
					{
						bAccept = false;
						break;
					}
				}
				if (!bAccept)
					continue;
			}

			query = new StringBuilder(query.toString().trim().replace(" ", " +"));
			query.insert(0, " +");

			queryArrayStr.add(query.toString());

		}
		if (queryArrayStr.size() < 1) { // release constraints on NP down to 2
			// keywords
			for (ParseTreeChunk ch : nPhrases) {
				StringBuilder query = new StringBuilder();
				int size = ch.getLemmas().size();

				for (int i = 0; i < size; i++) {
					if (ch.getPOSs().get(i).startsWith("N")
									|| ch.getPOSs().get(i).startsWith("J")) {
						query.append(ch.getLemmas().get(i)).append(" ");
					}
				}
				query = new StringBuilder(query.toString().trim());
				int len = query.toString().split("\\s+").length;
				if (len < 2)
					continue;

				query = new StringBuilder(query.toString().trim().replace(" ", " +"));
				query.insert(0, " +");

				queryArrayStr.add(query.toString());

			}
		}

		queryArrayStr = removeDuplicatesFromQueries(queryArrayStr);
		queryArrayStr.add(sentence);

		return queryArrayStr;

	}