in jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java [256:443]
protected String mergeFragments(TermVectorOffsetInfo[] offsets,
String text,
String excerptStart,
String excerptEnd,
String fragmentStart,
String fragmentEnd,
String hlStart,
String hlEnd,
int maxFragments,
int surround) throws IOException {
if (offsets == null || offsets.length == 0) {
// nothing to highlight
return createDefaultExcerpt(text, excerptStart, excerptEnd,
fragmentStart, fragmentEnd, surround * 2);
}
int lastOffset = offsets.length; // Math.min(10, offsets.length); // 10 terms is plenty?
List<FragmentInfo> fragmentInfoList = new ArrayList<FragmentInfo>();
if (offsets[0].getEndOffset() <= text.length()) {
FragmentInfo fi = new FragmentInfo(offsets[0], surround * 2);
for (int i = 1; i < lastOffset; i++) {
if (offsets[i].getEndOffset() > text.length()) {
break;
}
if (fi.add(offsets[i])) {
continue;
}
fragmentInfoList.add(fi);
fi = new FragmentInfo(offsets[i], surround * 2);
}
fragmentInfoList.add(fi);
}
if (fragmentInfoList.isEmpty()) {
// nothing to highlight
return createDefaultExcerpt(text, excerptStart, excerptEnd,
fragmentStart, fragmentEnd, surround * 2);
}
// sort with score
Collections.sort(fragmentInfoList, new FragmentInfoScoreSorter());
// extract best fragments
List<FragmentInfo> bestFragmentsList = new ArrayList<FragmentInfo>();
for (int i = 0; i < Math.min(fragmentInfoList.size(), maxFragments); i++) {
bestFragmentsList.add(fragmentInfoList.get(i));
}
// re-sort with positions
Collections.sort(bestFragmentsList, new FragmentInfoPositionSorter());
// merge #maxFragments fragments
StringReader reader = new StringReader(text);
StringBuffer sb = new StringBuffer(excerptStart);
int pos = 0;
char[] cbuf;
int skip;
int nextStart;
int skippedChars;
int firstWhitespace;
for (int i = 0; i < bestFragmentsList.size(); i++) {
FragmentInfo fi = bestFragmentsList.get(i);
fi.trim();
nextStart = fi.getStartOffset();
skip = nextStart - pos;
if (skip > surround * 2) {
skip -= surround;
if (i > 0) {
// end last fragment
cbuf = new char[surround];
reader.read(cbuf, 0, surround);
// find last whitespace
skippedChars = 1;
for (; skippedChars < surround + 1; skippedChars++) {
if (Character.isWhitespace(cbuf[surround - skippedChars])) {
break;
}
}
pos += surround;
if (skippedChars > surround) {
skippedChars = surround;
}
sb.append(escape(new String(cbuf, 0, surround
- skippedChars)));
sb.append(fragmentEnd);
}
}
if (skip >= surround) {
if (i > 0) {
skip -= surround;
}
// skip
reader.skip((long) skip);
pos += skip;
}
// start fragment
cbuf = new char[nextStart - pos];
skippedChars = Math.max(cbuf.length - 1, 0);
firstWhitespace = skippedChars;
reader.read(cbuf, 0, nextStart - pos);
pos += (nextStart - pos);
sb.append(fragmentStart);
// find last period followed by whitespace
if (cbuf.length > 0) {
for (; skippedChars >= 0; skippedChars--) {
if (Character.isWhitespace(cbuf[skippedChars])) {
firstWhitespace = skippedChars;
if (skippedChars - 1 >= 0
&& cbuf[skippedChars - 1] == '.') {
skippedChars++;
break;
}
}
}
}
boolean sentenceStart = true;
if (skippedChars == -1) {
if (pos == cbuf.length) {
// this fragment is the start of the text -> skip none
skippedChars = 0;
} else {
sentenceStart = false;
skippedChars = firstWhitespace + 1;
}
}
if (!sentenceStart) {
sb.append("... ");
}
sb.append(escape(new String(cbuf, skippedChars, cbuf.length
- skippedChars)));
// iterate terms
for (Iterator<TermVectorOffsetInfo> iter = fi.iterator(); iter.hasNext();) {
TermVectorOffsetInfo ti = iter.next();
nextStart = ti.getStartOffset();
if (nextStart - pos > 0) {
cbuf = new char[nextStart - pos];
int charsRead = reader.read(cbuf, 0, nextStart - pos);
pos += (nextStart - pos);
sb.append(escape(new String(cbuf, 0, charsRead)));
}
sb.append(hlStart);
nextStart = ti.getEndOffset();
// print term
cbuf = new char[nextStart - pos];
reader.read(cbuf, 0, nextStart - pos);
pos += (nextStart - pos);
sb.append(escape(new String(cbuf)));
sb.append(hlEnd);
}
}
if (pos != 0) {
// end fragment
if (offsets.length > lastOffset) {
surround = Math.min(offsets[lastOffset].getStartOffset() - pos, surround);
}
cbuf = new char[surround];
skip = reader.read(cbuf, 0, surround);
boolean EOF = reader.read() == -1;
if (skip >= 0) {
if (!EOF) {
skippedChars = 1;
for (; skippedChars < surround + 1; skippedChars++) {
if (Character.isWhitespace(cbuf[surround - skippedChars])) {
break;
}
}
if (skippedChars > surround) {
skippedChars = surround;
}
} else {
skippedChars = 0;
}
sb.append(escape(new String(cbuf, 0, EOF ? skip
: (surround - skippedChars))));
if (!EOF) {
char lastChar = sb.charAt(sb.length() - 1);
if (lastChar != '.' && lastChar != '!' && lastChar != '?') {
sb.append(" ...");
}
}
}
sb.append(fragmentEnd);
}
sb.append(excerptEnd);
return sb.toString();
}