in oak-lucene/src/main/java/org/apache/lucene/index/CheckIndex.java [395:688]
public Status checkIndex(List<String> onlySegments) throws IOException {
NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
SegmentInfos sis = new SegmentInfos();
Status result = new Status();
result.dir = dir;
try {
sis.read(dir);
} catch (Throwable t) {
msg(infoStream, "ERROR: could not read any segments file in directory");
result.missingSegments = true;
if (infoStream != null)
t.printStackTrace(infoStream);
return result;
}
// find the oldest and newest segment versions
String oldest = Integer.toString(Integer.MAX_VALUE), newest = Integer.toString(Integer.MIN_VALUE);
String oldSegs = null;
boolean foundNonNullVersion = false;
Comparator<String> versionComparator = StringHelper.getVersionComparator();
for (SegmentCommitInfo si : sis) {
String version = si.info.getVersion();
if (version == null) {
// pre-3.1 segment
oldSegs = "pre-3.1";
} else {
foundNonNullVersion = true;
if (versionComparator.compare(version, oldest) < 0) {
oldest = version;
}
if (versionComparator.compare(version, newest) > 0) {
newest = version;
}
}
}
final int numSegments = sis.size();
final String segmentsFileName = sis.getSegmentsFileName();
// note: we only read the format byte (required preamble) here!
IndexInput input = null;
try {
input = dir.openInput(segmentsFileName, IOContext.READONCE);
} catch (Throwable t) {
msg(infoStream, "ERROR: could not open segments file in directory");
if (infoStream != null)
t.printStackTrace(infoStream);
result.cantOpenSegments = true;
return result;
}
int format = 0;
try {
format = input.readInt();
} catch (Throwable t) {
msg(infoStream, "ERROR: could not read segment file version in directory");
if (infoStream != null)
t.printStackTrace(infoStream);
result.missingSegmentVersion = true;
return result;
} finally {
if (input != null)
input.close();
}
String sFormat = "";
boolean skip = false;
result.segmentsFileName = segmentsFileName;
result.numSegments = numSegments;
result.userData = sis.getUserData();
String userDataString;
if (sis.getUserData().size() > 0) {
userDataString = " userData=" + sis.getUserData();
} else {
userDataString = "";
}
String versionString = null;
if (oldSegs != null) {
if (foundNonNullVersion) {
versionString = "versions=[" + oldSegs + " .. " + newest + "]";
} else {
versionString = "version=" + oldSegs;
}
} else {
versionString = oldest.equals(newest) ? ( "version=" + oldest ) : ("versions=[" + oldest + " .. " + newest + "]");
}
msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments
+ " " + versionString + " format=" + sFormat + userDataString);
if (onlySegments != null) {
result.partial = true;
if (infoStream != null) {
infoStream.print("\nChecking only these segments:");
for (String s : onlySegments) {
infoStream.print(" " + s);
}
}
result.segmentsChecked.addAll(onlySegments);
msg(infoStream, ":");
}
if (skip) {
msg(infoStream, "\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
result.toolOutOfDate = true;
return result;
}
result.newSegments = sis.clone();
result.newSegments.clear();
result.maxSegmentName = -1;
for(int i=0;i<numSegments;i++) {
final SegmentCommitInfo info = sis.info(i);
int segmentName = Integer.parseInt(info.info.name.substring(1), Character.MAX_RADIX);
if (segmentName > result.maxSegmentName) {
result.maxSegmentName = segmentName;
}
if (onlySegments != null && !onlySegments.contains(info.info.name)) {
continue;
}
Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
result.segmentInfos.add(segInfoStat);
msg(infoStream, " " + (1+i) + " of " + numSegments + ": name=" + info.info.name + " docCount=" + info.info.getDocCount());
segInfoStat.name = info.info.name;
segInfoStat.docCount = info.info.getDocCount();
final String version = info.info.getVersion();
if (info.info.getDocCount() <= 0 && version != null && versionComparator.compare(version, "4.5") >= 0) {
throw new RuntimeException("illegal number of documents: maxDoc=" + info.info.getDocCount());
}
int toLoseDocCount = info.info.getDocCount();
AtomicReader reader = null;
try {
final Codec codec = info.info.getCodec();
msg(infoStream, " codec=" + codec);
segInfoStat.codec = codec;
msg(infoStream, " compound=" + info.info.getUseCompoundFile());
segInfoStat.compound = info.info.getUseCompoundFile();
msg(infoStream, " numFiles=" + info.files().size());
segInfoStat.numFiles = info.files().size();
segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.);
if (info.info.getAttribute(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY) == null) {
// don't print size in bytes if its a 3.0 segment with shared docstores
msg(infoStream, " size (MB)=" + nf.format(segInfoStat.sizeMB));
}
Map<String,String> diagnostics = info.info.getDiagnostics();
segInfoStat.diagnostics = diagnostics;
if (diagnostics.size() > 0) {
msg(infoStream, " diagnostics = " + diagnostics);
}
if (!info.hasDeletions()) {
msg(infoStream, " no deletions");
segInfoStat.hasDeletions = false;
}
else{
msg(infoStream, " has deletions [delGen=" + info.getDelGen() + "]");
segInfoStat.hasDeletions = true;
segInfoStat.deletionsGen = info.getDelGen();
}
if (infoStream != null)
infoStream.print(" test: open reader.........");
reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.DEFAULT);
segInfoStat.openReaderPassed = true;
final int numDocs = reader.numDocs();
toLoseDocCount = numDocs;
if (reader.hasDeletions()) {
if (reader.numDocs() != info.info.getDocCount() - info.getDelCount()) {
throw new RuntimeException("delete count mismatch: info=" + (info.info.getDocCount() - info.getDelCount()) + " vs reader=" + reader.numDocs());
}
if ((info.info.getDocCount()-reader.numDocs()) > reader.maxDoc()) {
throw new RuntimeException("too many deleted docs: maxDoc()=" + reader.maxDoc() + " vs del count=" + (info.info.getDocCount()-reader.numDocs()));
}
if (info.info.getDocCount() - numDocs != info.getDelCount()) {
throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.info.getDocCount() - numDocs));
}
Bits liveDocs = reader.getLiveDocs();
if (liveDocs == null) {
throw new RuntimeException("segment should have deletions, but liveDocs is null");
} else {
int numLive = 0;
for (int j = 0; j < liveDocs.length(); j++) {
if (liveDocs.get(j)) {
numLive++;
}
}
if (numLive != numDocs) {
throw new RuntimeException("liveDocs count mismatch: info=" + numDocs + ", vs bits=" + numLive);
}
}
segInfoStat.numDeleted = info.info.getDocCount() - numDocs;
msg(infoStream, "OK [" + (segInfoStat.numDeleted) + " deleted docs]");
} else {
if (info.getDelCount() != 0) {
throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.info.getDocCount() - numDocs));
}
Bits liveDocs = reader.getLiveDocs();
if (liveDocs != null) {
// its ok for it to be non-null here, as long as none are set right?
for (int j = 0; j < liveDocs.length(); j++) {
if (!liveDocs.get(j)) {
throw new RuntimeException("liveDocs mismatch: info says no deletions but doc " + j + " is deleted.");
}
}
}
msg(infoStream, "OK");
}
if (reader.maxDoc() != info.info.getDocCount()) {
throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.info.getDocCount());
}
// Test getFieldInfos()
if (infoStream != null) {
infoStream.print(" test: fields..............");
}
FieldInfos fieldInfos = reader.getFieldInfos();
msg(infoStream, "OK [" + fieldInfos.size() + " fields]");
segInfoStat.numFields = fieldInfos.size();
// Test Field Norms
segInfoStat.fieldNormStatus = testFieldNorms(reader, infoStream);
// Test the Term Index
segInfoStat.termIndexStatus = testPostings(reader, infoStream, verbose);
// Test Stored Fields
segInfoStat.storedFieldStatus = testStoredFields(reader, infoStream);
// Test Term Vectors
segInfoStat.termVectorStatus = testTermVectors(reader, infoStream, verbose, crossCheckTermVectors);
segInfoStat.docValuesStatus = testDocValues(reader, infoStream);
// Rethrow the first exception we encountered
// This will cause stats for failed segments to be incremented properly
if (segInfoStat.fieldNormStatus.error != null) {
throw new RuntimeException("Field Norm test failed");
} else if (segInfoStat.termIndexStatus.error != null) {
throw new RuntimeException("Term Index test failed");
} else if (segInfoStat.storedFieldStatus.error != null) {
throw new RuntimeException("Stored Field test failed");
} else if (segInfoStat.termVectorStatus.error != null) {
throw new RuntimeException("Term Vector test failed");
} else if (segInfoStat.docValuesStatus.error != null) {
throw new RuntimeException("DocValues test failed");
}
msg(infoStream, "");
} catch (Throwable t) {
msg(infoStream, "FAILED");
String comment;
comment = "fixIndex() would remove reference to this segment";
msg(infoStream, " WARNING: " + comment + "; full exception:");
if (infoStream != null)
t.printStackTrace(infoStream);
msg(infoStream, "");
result.totLoseDocCount += toLoseDocCount;
result.numBadSegments++;
continue;
} finally {
if (reader != null)
reader.close();
}
// Keeper
result.newSegments.add(info.clone());
}
if (0 == result.numBadSegments) {
result.clean = true;
} else
msg(infoStream, "WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
if ( ! (result.validCounter = (result.maxSegmentName < sis.counter))) {
result.clean = false;
result.newSegments.counter = result.maxSegmentName + 1;
msg(infoStream, "ERROR: Next segment name counter " + sis.counter + " is not greater than max segment name " + result.maxSegmentName);
}
if (result.clean) {
msg(infoStream, "No problems were detected with this index.\n");
}
return result;
}