in lucene/core/src/java/org/apache/lucene/index/CheckIndex.java [617:947]
public Status checkIndex(List<String> onlySegments, ExecutorService executorService)
throws IOException {
ensureOpen();
long startNS = System.nanoTime();
Status result = new Status();
result.dir = dir;
String[] files = dir.listAll();
String lastSegmentsFile = SegmentInfos.getLastCommitSegmentsFileName(files);
if (lastSegmentsFile == null) {
throw new IndexNotFoundException(
"no segments* file found in " + dir + ": files: " + Arrays.toString(files));
}
// https://github.com/apache/lucene/issues/7820: also attempt to open any older commit
// points (segments_N), which will catch certain corruption like missing _N.si files
// for segments not also referenced by the newest commit point (which was already
// loaded, successfully, above). Note that we do not do a deeper check of segments
// referenced ONLY by these older commit points, because such corruption would not
// prevent a new IndexWriter from opening on the newest commit point. but it is still
// corruption, e.g. a reader opened on those old commit points can hit corruption
// exceptions which we (still) will not detect here. progress not perfection!
SegmentInfos lastCommit = null;
List<String> allSegmentsFiles = new ArrayList<>();
for (String fileName : files) {
if (fileName.startsWith(IndexFileNames.SEGMENTS)
&& fileName.equals(SegmentInfos.OLD_SEGMENTS_GEN) == false) {
allSegmentsFiles.add(fileName);
}
}
// Sort descending by generation so that we always attempt to read the last commit first. This
// way if an index has a broken last commit AND a broken old commit, we report the last commit
// error first:
allSegmentsFiles.sort(
(a, b) -> {
long genA = SegmentInfos.generationFromSegmentsFileName(a);
long genB = SegmentInfos.generationFromSegmentsFileName(b);
// reversed natural sort (largest generation first):
return -Long.compare(genA, genB);
});
for (String fileName : allSegmentsFiles) {
boolean isLastCommit = fileName.equals(lastSegmentsFile);
SegmentInfos infos;
try {
// Do not use SegmentInfos.read(Directory) since the spooky
// retrying it does is not necessary here (we hold the write lock):
// always open old indices if codecs are around
infos = SegmentInfos.readCommit(dir, fileName, 0);
} catch (Throwable t) {
if (failFast) {
throw IOUtils.rethrowAlways(t);
}
String message;
if (isLastCommit) {
message =
"ERROR: could not read latest commit point from segments file \""
+ fileName
+ "\" in directory";
} else {
message =
"ERROR: could not read old (not latest) commit point segments file \""
+ fileName
+ "\" in directory";
}
msg(infoStream, message);
result.missingSegments = true;
if (infoStream != null) {
t.printStackTrace(infoStream);
}
return result;
}
if (isLastCommit) {
// record the latest commit point: we will deeply check all segments referenced by it
lastCommit = infos;
}
}
// we know there is a lastSegmentsFileName, so we must've attempted to load it in the above for
// loop. if it failed to load, we threw the exception (fastFail == true) or we returned the
// failure (fastFail == false). so if we get here, we should // always have a valid lastCommit:
assert lastCommit != null;
if (lastCommit == null) {
msg(infoStream, "ERROR: could not read any segments file in directory");
result.missingSegments = true;
return result;
}
if (infoStream != null) {
int maxDoc = 0;
int delCount = 0;
for (SegmentCommitInfo info : lastCommit) {
maxDoc += info.info.maxDoc();
delCount += info.getDelCount();
}
infoStream.printf(
Locale.ROOT,
"%.2f%% total deletions; %d documents; %d deletions%n",
100. * delCount / maxDoc,
maxDoc,
delCount);
}
// find the oldest and newest segment versions
Version oldest = null;
Version newest = null;
String oldSegs = null;
for (SegmentCommitInfo si : lastCommit) {
Version version = si.info.getVersion();
if (version == null) {
// pre-3.1 segment
oldSegs = "pre-3.1";
} else {
if (oldest == null || version.onOrAfter(oldest) == false) {
oldest = version;
}
if (newest == null || version.onOrAfter(newest)) {
newest = version;
}
}
}
final int numSegments = lastCommit.size();
final String segmentsFileName = lastCommit.getSegmentsFileName();
result.segmentsFileName = segmentsFileName;
result.numSegments = numSegments;
result.userData = lastCommit.getUserData();
String userDataString;
if (lastCommit.getUserData().size() > 0) {
userDataString = " userData=" + lastCommit.getUserData();
} else {
userDataString = "";
}
String versionString = "";
if (oldSegs != null) {
if (newest != null) {
versionString = "versions=[" + oldSegs + " .. " + newest + "]";
} else {
versionString = "version=" + oldSegs;
}
} else if (newest != null) { // implies oldest != null
versionString =
oldest.equals(newest)
? ("version=" + oldest)
: ("versions=[" + oldest + " .. " + newest + "]");
}
msg(
infoStream,
"Segments file="
+ segmentsFileName
+ " numSegments="
+ numSegments
+ " "
+ versionString
+ " id="
+ StringHelper.idToString(lastCommit.getId())
+ userDataString);
if (onlySegments != null) {
result.partial = true;
if (infoStream != null) {
infoStream.print("\nChecking only these segments:");
for (String s : onlySegments) {
infoStream.print(" " + s);
}
}
result.segmentsChecked.addAll(onlySegments);
msg(infoStream, ":");
}
result.newSegments = lastCommit.clone();
result.newSegments.clear();
result.maxSegmentName = -1;
// checks segments sequentially
if (executorService == null) {
for (int i = 0; i < numSegments; i++) {
final SegmentCommitInfo info = lastCommit.info(i);
updateMaxSegmentName(result, info);
if (onlySegments != null && !onlySegments.contains(info.info.name)) {
continue;
}
msg(
infoStream,
(1 + i)
+ " of "
+ numSegments
+ ": name="
+ info.info.name
+ " maxDoc="
+ info.info.maxDoc());
Status.SegmentInfoStatus segmentInfoStatus = testSegment(lastCommit, info, infoStream);
processSegmentInfoStatusResult(result, info, segmentInfoStatus);
}
} else {
ByteArrayOutputStream[] outputs = new ByteArrayOutputStream[numSegments];
@SuppressWarnings({"unchecked", "rawtypes"})
CompletableFuture<Status.SegmentInfoStatus>[] futures = new CompletableFuture[numSegments];
// checks segments concurrently
List<SegmentCommitInfo> segmentCommitInfos = new ArrayList<>();
for (SegmentCommitInfo sci : lastCommit) {
segmentCommitInfos.add(sci);
}
// sort segmentCommitInfos by segment size, as smaller segment tends to finish faster, and
// hence its output can be printed out faster
segmentCommitInfos.sort(
(info1, info2) -> {
try {
return Long.compare(info1.sizeInBytes(), info2.sizeInBytes());
} catch (IOException e) {
msg(
infoStream,
"ERROR: IOException occurred when comparing SegmentCommitInfo file sizes");
if (infoStream != null) e.printStackTrace(infoStream);
return 0;
}
});
// start larger segments earlier
for (int i = numSegments - 1; i >= 0; i--) {
final SegmentCommitInfo info = segmentCommitInfos.get(i);
updateMaxSegmentName(result, info);
if (onlySegments != null && !onlySegments.contains(info.info.name)) {
continue;
}
SegmentInfos finalSis = lastCommit;
ByteArrayOutputStream output = new ByteArrayOutputStream();
PrintStream stream = new PrintStream(output, true, UTF_8);
msg(
stream,
(1 + i)
+ " of "
+ numSegments
+ ": name="
+ info.info.name
+ " maxDoc="
+ info.info.maxDoc());
outputs[i] = output;
futures[i] =
runAsyncSegmentCheck(() -> testSegment(finalSis, info, stream), executorService);
}
for (int i = 0; i < numSegments; i++) {
SegmentCommitInfo info = segmentCommitInfos.get(i);
if (onlySegments != null && !onlySegments.contains(info.info.name)) {
continue;
}
ByteArrayOutputStream output = outputs[i];
// print segment results in order
Status.SegmentInfoStatus segmentInfoStatus = null;
try {
segmentInfoStatus = futures[i].get();
} catch (InterruptedException e) {
// the segment test output should come before interrupted exception message that follows,
// hence it's not emitted from finally clause
msg(infoStream, output);
msg(
infoStream,
"ERROR: Interrupted exception occurred when getting segment check result for segment "
+ info.info.name);
if (infoStream != null) e.printStackTrace(infoStream);
} catch (ExecutionException e) {
msg(infoStream, output.toString(UTF_8));
assert failFast;
throw new CheckIndexException(
"Segment " + info.info.name + " check failed.", e.getCause());
}
msg(infoStream, output);
processSegmentInfoStatusResult(result, info, segmentInfoStatus);
}
}
if (0 == result.numBadSegments) {
result.clean = true;
} else {
msg(
infoStream,
"WARNING: "
+ result.numBadSegments
+ " broken segments (containing "
+ result.totLoseDocCount
+ " documents) detected");
}
result.validCounter = result.maxSegmentName < lastCommit.counter;
if (result.validCounter == false) {
result.clean = false;
result.newSegments.counter = result.maxSegmentName + 1;
msg(
infoStream,
"ERROR: Next segment name counter "
+ lastCommit.counter
+ " is not greater than max segment name "
+ result.maxSegmentName);
}
if (result.clean) {
msg(infoStream, "No problems were detected with this index.\n");
}
msg(
infoStream,
String.format(Locale.ROOT, "Took %.3f sec total.", nsToSec(System.nanoTime() - startNS)));
return result;
}