in oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java [620:726]
private Map<String, String> getExcerpt(Query query, Set<String> excerptFields,
Analyzer analyzer, IndexSearcher searcher, ScoreDoc doc, FieldInfos fieldInfos)
throws IOException {
Set<String> excerptFieldNames = Sets.newHashSet();
Map<String, String> fieldNameToColumnNameMap = Maps.newHashMap();
Map<String, String> columnNameToExcerpts = Maps.newHashMap();
Set<String> nodeExcerptColumns = Sets.newHashSet();
excerptFields.forEach(columnName -> {
String fieldName;
if (REP_EXCERPT.equals(columnName)) {
fieldName = FulltextIndexConstants.EXCERPT_NODE_FIELD_NAME;
} else {
fieldName = columnName.substring(REP_EXCERPT.length() + 1, columnName.length() - 1);
}
if (!FulltextIndexConstants.EXCERPT_NODE_FIELD_NAME.equals(fieldName)) {
excerptFieldNames.add(fieldName);
fieldNameToColumnNameMap.put(fieldName, columnName);
} else {
nodeExcerptColumns.add(columnName);
}
});
final boolean requireNodeLevelExcerpt = nodeExcerptColumns.size() > 0;
int docID = doc.doc;
List<String> names = new LinkedList<>();
for (IndexableField field : searcher.getIndexReader().document(docID).getFields()) {
String name = field.name();
// postings highlighter can be used on analyzed fields with docs, freqs, positions and offsets stored.
if (name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX) && fieldInfos.hasProx() && fieldInfos.hasOffsets()) {
names.add(name);
}
}
if (!requireNodeLevelExcerpt) {
names.retainAll(excerptFieldNames);
}
if (names.size() > 0) {
int[] maxPassages = new int[names.size()];
Arrays.fill(maxPassages, 1);
try {
Map<String, String[]> stringMap = postingsHighlighter.highlightFields(names.toArray(new String[names.size()]),
query, searcher, new int[]{docID}, maxPassages);
for (Map.Entry<String, String[]> entry : stringMap.entrySet()) {
String value = Arrays.toString(entry.getValue());
if (value.contains("<b>")) {
String fieldName = entry.getKey();
String columnName = fieldNameToColumnNameMap.get(fieldName);
columnNameToExcerpts.put(columnName, value);
}
}
} catch (Exception e) {
LOG.debug("postings highlighting failed", e);
}
}
// fallback if no excerpt could be retrieved using postings highlighter
if (columnNameToExcerpts.size() == 0) {
for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) {
String name = field.name();
// only full text or analyzed fields
if (name.startsWith(FieldNames.FULLTEXT) || name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) {
String text = field.stringValue();
TokenStream tokenStream = analyzer.tokenStream(name, text);
try {
TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, true, 1);
if (textFragments != null && textFragments.length > 0) {
for (TextFragment fragment : textFragments) {
String columnName = null;
if (name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) {
columnName = fieldNameToColumnNameMap.get(name.substring(FieldNames.ANALYZED_FIELD_PREFIX.length()));
}
if (columnName == null && requireNodeLevelExcerpt) {
columnName = name;
}
if (columnName != null) {
columnNameToExcerpts.put(columnName, fragment.toString());
}
}
if (excerptFieldNames.size() == 0) {
break;
}
}
} catch (InvalidTokenOffsetsException e) {
LOG.error("highlighting failed", e);
}
}
}
}
if (requireNodeLevelExcerpt) {
String nodeExcerpt = Joiner.on("...").join(columnNameToExcerpts.values());
nodeExcerptColumns.forEach(nodeExcerptColumnName -> columnNameToExcerpts.put(nodeExcerptColumnName, nodeExcerpt));
}
columnNameToExcerpts.keySet().retainAll(excerptFields);
return columnNameToExcerpts;
}