private List classifySentence()

in opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java [120:212]


	private List<String> classifySentence(String queryStr) {

		List<String> results = new ArrayList<>();
		// too short of a query
		if (queryStr.length() < MIN_CHARS_IN_QUERY) {
			return results;
		}

		Analyzer std = new StandardAnalyzer();
		QueryParser parser = new QueryParser("text", std);
		parser.setDefaultOperator(QueryParser.Operator.OR);
		Query query;
		try {
			query = parser.parse(queryStr);

		} catch (ParseException e2) {

			return results;
		}
		TopDocs hits = null; // TopDocs search(Query, int)
		// Finds the top n hits for query.
		try {
			hits = indexSearcher
					.search(query, MAX_DOCS_TO_USE_FOR_CLASSIFY + 2);
		} catch (IOException e1) {
			LOGGER.error("problem searching index \n" + e1);
		}
		LOGGER.debug("Found " + hits.totalHits + " hits for " + queryStr);
		int count = 0;
		

		for (ScoreDoc scoreDoc : hits.scoreDocs) {
			Document doc;
			try {
				doc = indexSearcher.doc(scoreDoc.doc);
			} catch (IOException e) {
				LOGGER.error("Problem searching training set for classif \n"
						+ e);
				continue;
			}
			String flag = doc.get("class");

			Float scoreForClass = scoredClasses.get(flag);
			if (scoreForClass == null)
				scoredClasses.put(flag, scoreDoc.score);
			else
				scoredClasses.put(flag, scoreForClass + scoreDoc.score);

			LOGGER.debug(" <<categorized as>> " + flag + " | score="
					+ scoreDoc.score + " \n text =" + doc.get("text") + "\n");

			if (count > MAX_DOCS_TO_USE_FOR_CLASSIFY) {
				break;
			}
			count++;
		}
		try {
			scoredClasses = ValueSortMap.sortMapByValue(scoredClasses, false);
			List<String> resultsAll = new ArrayList<>(
							scoredClasses.keySet()), resultsAboveThresh = new ArrayList<>();
			for (String key : resultsAll) {
				if (scoredClasses.get(key) > MIN_TOTAL_SCORE_FOR_CATEGORY)
					resultsAboveThresh.add(key);
				else
					LOGGER.debug("Too low score of " + scoredClasses.get(key)
							+ " for category = " + key);
			}

			int len = resultsAboveThresh.size();
			if (len > MAX_CATEG_RESULTS)
				results = resultsAboveThresh.subList(0, MAX_CATEG_RESULTS); // get
			// maxRes
			// elements
			else
				results = resultsAboveThresh;
		} catch (Exception e) {
			LOGGER.error("Problem aggregating search results\n" + e);
		}
		if (results.size() < 2)
			return results;

		// if two categories, one is very high and another is relatively low
		if (scoredClasses.get(results.get(0))
				/ scoredClasses.get(results.get(1)) > BEST_TO_NEX_BEST_RATIO) // second
			// best
			// is
			// much
			// worse
			return results.subList(0, 1);
		else
			return results;

	}