in opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SyntGenRequestHandler.java [105:182]
public DocList filterResultsBySyntMatchReduceDocSet(DocList docList,
SolrQueryRequest req, SolrParams params) {
//if (!docList.hasScores())
// return docList;
int len = docList.size();
if (len < 1) // do nothing
return docList;
ParserChunker2MatcherProcessor pos = ParserChunker2MatcherProcessor .getInstance();
DocIterator iter = docList.iterator();
float[] syntMatchScoreArr = new float[len];
String requestExpression = req.getParamString();
String[] exprParts = requestExpression.split("&");
for(String part: exprParts){
if (part.startsWith("q="))
requestExpression = part;
}
String fieldNameQuery = StringUtils.substringBetween(requestExpression, "=", ":");
// extract phrase query (in double-quotes)
String[] queryParts = requestExpression.split("\"");
if (queryParts.length>=2 && queryParts[1].length()>5)
requestExpression = queryParts[1].replace('+', ' ');
else if (requestExpression.contains(":")) {// still field-based expression
requestExpression = requestExpression.replaceAll(fieldNameQuery+":", "").replace('+',' ').replaceAll(" ", " ").replace("q=", "");
}
if (fieldNameQuery ==null)
return docList;
if (requestExpression==null || requestExpression.length()<5 || requestExpression.split(" ").length<3)
return docList;
int[] docIDsHits = new int[len];
IndexReader indexReader = req.getSearcher().getIndexReader();
List<Integer> bestMatchesDocIds = new ArrayList<>(); List<Float> bestMatchesScore = new ArrayList<>();
List<Pair<Integer, Float>> docIdsScores = new ArrayList<> ();
try {
for (int i=0; i<docList.size(); ++i) {
int docId = iter.nextDoc();
docIDsHits[i] = docId;
Document doc = indexReader.document(docId);
// get text for event
String answerText = doc.get(fieldNameQuery);
if (answerText==null)
continue;
SentencePairMatchResult matchResult = pos.assessRelevance( requestExpression , answerText);
float syntMatchScore = Double.valueOf(parseTreeChunkListScorer.getParseTreeChunkListScore(matchResult.getMatchResult())).floatValue();
bestMatchesDocIds.add(docId);
bestMatchesScore.add(syntMatchScore);
syntMatchScoreArr[i] = syntMatchScore; //*iter.score();
System.out.println(" Matched query = '"+requestExpression + "' with answer = '"+answerText +"' | doc_id = '"+docId);
System.out.println(" Match result = '"+matchResult.getMatchResult() + "' with score = '"+syntMatchScore +"';" );
docIdsScores.add(new Pair<>(docId, syntMatchScore));
}
} catch (CorruptIndexException e1) {
e1.printStackTrace();
//log.severe("Corrupt index"+e1);
} catch (IOException e1) {
e1.printStackTrace();
//log.severe("File read IO / index"+e1);
}
docIdsScores.sort(new PairComparable<>());
for (int i = 0; i<docIdsScores.size(); i++){
bestMatchesDocIds.set(i, docIdsScores.get(i).getFirst());
bestMatchesScore.set(i, docIdsScores.get(i).getSecond());
}
System.out.println(bestMatchesScore);
float maxScore = docList.maxScore(); // do not change
int limit = docIdsScores.size();
int start = 0;
return new DocSlice(start, limit,
ArrayUtils.toPrimitive(bestMatchesDocIds.toArray(new Integer[0])),
ArrayUtils.toPrimitive(bestMatchesScore.toArray(new Float[0])),
bestMatchesDocIds.size(), maxScore, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO);
}