in lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java [453:676]
private MergeSpecification doFindMerges(
List<SegmentSizeAndDocs> sortedEligibleInfos,
final long maxMergedSegmentBytes,
final int mergeFactor,
final int allowedSegCount,
final int allowedDelCount,
final int allowedDocCount,
final MERGE_TYPE mergeType,
MergeContext mergeContext,
boolean maxMergeIsRunning)
throws IOException {
List<SegmentSizeAndDocs> sortedEligible = new ArrayList<>(sortedEligibleInfos);
Map<SegmentCommitInfo, SegmentSizeAndDocs> segInfosSizes = new HashMap<>();
for (SegmentSizeAndDocs segSizeDocs : sortedEligible) {
segInfosSizes.put(segSizeDocs.segInfo, segSizeDocs);
}
int originalSortedSize = sortedEligible.size();
if (verbose(mergeContext)) {
message("findMerges: " + originalSortedSize + " segments", mergeContext);
}
if (originalSortedSize == 0) {
return null;
}
final Set<SegmentCommitInfo> toBeMerged = new HashSet<>();
MergeSpecification spec = null;
// Cycle to possibly select more than one merge:
// The trigger point for total deleted documents in the index leads to a bunch of large segment
// merges at the same time. So only put one large merge in the list of merges per cycle. We'll
// pick up another
// merge next time around.
boolean haveOneLargeMerge = false;
while (true) {
// Gather eligible segments for merging, ie segments
// not already being merged and not already picked (by
// prior iteration of this loop) for merging:
// Remove ineligible segments. These are either already being merged or already picked by
// prior iterations
Iterator<SegmentSizeAndDocs> iter = sortedEligible.iterator();
while (iter.hasNext()) {
SegmentSizeAndDocs segSizeDocs = iter.next();
if (toBeMerged.contains(segSizeDocs.segInfo)) {
iter.remove();
}
}
if (verbose(mergeContext)) {
message(
" allowedSegmentCount="
+ allowedSegCount
+ " vs count="
+ originalSortedSize
+ " (eligible count="
+ sortedEligible.size()
+ ")",
mergeContext);
}
if (sortedEligible.size() == 0) {
return spec;
}
final int remainingDelCount = sortedEligible.stream().mapToInt(c -> c.delCount).sum();
if (mergeType == MERGE_TYPE.NATURAL
&& sortedEligible.size() <= allowedSegCount
&& remainingDelCount <= allowedDelCount) {
return spec;
}
// OK we are over budget -- find best merge!
MergeScore bestScore = null;
List<SegmentCommitInfo> best = null;
boolean bestTooLarge = false;
long bestMergeBytes = 0;
for (int startIdx = 0; startIdx < sortedEligible.size(); startIdx++) {
final List<SegmentCommitInfo> candidate = new ArrayList<>();
boolean hitTooLarge = false;
long bytesThisMerge = 0;
long docCountThisMerge = 0;
for (int idx = startIdx;
idx < sortedEligible.size()
// We allow merging more than mergeFactor segments together if the merged segment
// would be less than the floor segment size. This is important because segments
// below the floor segment size are more aggressively merged by this policy, so we
// need to grow them as quickly as possible.
&& (candidate.size() < mergeFactor || bytesThisMerge < floorSegmentBytes)
&& bytesThisMerge < maxMergedSegmentBytes
&& (bytesThisMerge < floorSegmentBytes || docCountThisMerge <= allowedDocCount);
idx++) {
final SegmentSizeAndDocs segSizeDocs = sortedEligible.get(idx);
final long segBytes = segSizeDocs.sizeInBytes;
int segDocCount = segSizeDocs.maxDoc - segSizeDocs.delCount;
if (bytesThisMerge + segBytes > maxMergedSegmentBytes
|| (bytesThisMerge > floorSegmentBytes
&& docCountThisMerge + segDocCount > allowedDocCount)) {
// Only set hitTooLarge when reaching the maximum byte size, as this will create
// segments of the maximum size which will no longer be eligible for merging for a long
// time (until they accumulate enough deletes).
hitTooLarge |= bytesThisMerge + segBytes > maxMergedSegmentBytes;
// We should never have something coming in that _cannot_ be merged, so handle
// singleton merges
if (candidate.size() > 0) {
// NOTE: we continue, so that we can try
// "packing" smaller segments into this merge
// to see if we can get closer to the max
// size; this in general is not perfect since
// this is really "bin packing" and we'd have
// to try different permutations.
continue;
}
}
candidate.add(segSizeDocs.segInfo);
bytesThisMerge += segBytes;
docCountThisMerge += segDocCount;
}
// We should never see an empty candidate: we iterated over maxMergeAtOnce
// segments, and already pre-excluded the too-large segments:
assert candidate.size() > 0;
SegmentSizeAndDocs maxCandidateSegmentSize = segInfosSizes.get(candidate.get(0));
if (hitTooLarge == false
&& mergeType == MERGE_TYPE.NATURAL
&& bytesThisMerge < maxCandidateSegmentSize.sizeInBytes * 1.5
&& maxCandidateSegmentSize.delCount
< maxCandidateSegmentSize.maxDoc * deletesPctAllowed / 100) {
// Ignore any merge where the resulting segment is not at least 50% larger than the
// biggest input segment.
// Otherwise we could run into pathological O(N^2) merging where merges keep rewriting
// again and again the biggest input segment into a segment that is barely bigger.
// The only exception we make is when the merge would reclaim lots of deletes in the
// biggest segment. This is important for cases when lots of documents get deleted at once
// without introducing new segments of a similar size for instance.
continue;
}
// A singleton merge with no deletes makes no sense. We can get here when forceMerge is
// looping around...
if (candidate.size() == 1 && maxCandidateSegmentSize.delCount == 0) {
continue;
}
// If we didn't find a too-large merge and have a list of candidates
// whose length is less than the merge factor, it means we are reaching
// the tail of the list of segments and will only find smaller merges.
// Stop here.
if (bestScore != null && hitTooLarge == false && candidate.size() < mergeFactor) {
break;
}
final MergeScore score = score(candidate, hitTooLarge, segInfosSizes);
if (verbose(mergeContext)) {
message(
" maybe="
+ segString(mergeContext, candidate)
+ " score="
+ score.getScore()
+ " "
+ score.getExplanation()
+ " tooLarge="
+ hitTooLarge
+ " size="
+ String.format(Locale.ROOT, "%.3f MB", bytesThisMerge / 1024. / 1024.),
mergeContext);
}
if ((bestScore == null || score.getScore() < bestScore.getScore())
&& (!hitTooLarge || !maxMergeIsRunning)) {
best = candidate;
bestScore = score;
bestTooLarge = hitTooLarge;
bestMergeBytes = bytesThisMerge;
}
}
if (best == null) {
return spec;
}
// The mergeType == FORCE_MERGE_DELETES behaves as the code does currently and can create a
// large number of
// concurrent big merges. If we make findForcedDeletesMerges behave as findForcedMerges and
// cycle through
// we should remove this.
if (haveOneLargeMerge == false
|| bestTooLarge == false
|| mergeType == MERGE_TYPE.FORCE_MERGE_DELETES) {
haveOneLargeMerge |= bestTooLarge;
if (spec == null) {
spec = new MergeSpecification();
}
final OneMerge merge = new OneMerge(best);
spec.add(merge);
if (verbose(mergeContext)) {
message(
" add merge="
+ segString(mergeContext, merge.segments)
+ " size="
+ String.format(Locale.ROOT, "%.3f MB", bestMergeBytes / 1024. / 1024.)
+ " score="
+ String.format(Locale.ROOT, "%.3f", bestScore.getScore())
+ " "
+ bestScore.getExplanation()
+ (bestTooLarge ? " [max merge]" : ""),
mergeContext);
}
}
// whether we're going to return this list in the spec of not, we need to remove it from
// consideration on the next loop.
toBeMerged.addAll(best);
}
}