in server/src/main/java/org/elasticsearch/index/store/Store.java [816:1159]
public record MetadataSnapshot(Map<String, StoreFileMetadata> fileMetadataMap, Map<String, String> commitUserData, long numDocs)
implements
Iterable<StoreFileMetadata>,
Writeable {
public static final MetadataSnapshot EMPTY = new MetadataSnapshot(emptyMap(), emptyMap(), 0L);
static MetadataSnapshot loadFromIndexCommit(@Nullable IndexCommit commit, Directory directory, Logger logger) throws IOException {
final long numDocs;
final Map<String, StoreFileMetadata> metadataByFile = new HashMap<>();
final Map<String, String> commitUserData;
try {
final SegmentInfos segmentCommitInfos = readSegmentsInfo(commit, directory);
numDocs = Lucene.getNumDocs(segmentCommitInfos);
commitUserData = Map.copyOf(segmentCommitInfos.getUserData());
// we don't know which version was used to write so we take the max version.
Version maxVersion = segmentCommitInfos.getMinSegmentLuceneVersion();
for (SegmentCommitInfo info : segmentCommitInfos) {
final Version version = info.info.getVersion();
if (version == null) {
// version is written since 3.1+: we should have already hit IndexFormatTooOld.
throw new IllegalArgumentException("expected valid version value: " + info.info.toString());
}
if (version.onOrAfter(maxVersion)) {
maxVersion = version;
}
final BytesRef segmentInfoId = StoreFileMetadata.toWriterUuid(info.info.getId());
final BytesRef segmentCommitInfoId = StoreFileMetadata.toWriterUuid(info.getId());
for (String file : info.files()) {
checksumFromLuceneFile(
directory,
file,
metadataByFile,
logger,
version.toString(),
SEGMENT_INFO_EXTENSION.equals(IndexFileNames.getExtension(file)),
IndexFileNames.parseGeneration(file) == 0 ? segmentInfoId : segmentCommitInfoId
);
}
}
if (maxVersion == null) {
maxVersion = IndexVersions.MINIMUM_COMPATIBLE.luceneVersion();
}
final String segmentsFile = segmentCommitInfos.getSegmentsFileName();
checksumFromLuceneFile(
directory,
segmentsFile,
metadataByFile,
logger,
maxVersion.toString(),
true,
StoreFileMetadata.toWriterUuid(segmentCommitInfos.getId())
);
} catch (CorruptIndexException | IndexNotFoundException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
// we either know the index is corrupted or it's just not there
throw ex;
} catch (Exception ex) {
try {
// Lucene checks the checksum after it tries to lookup the codec etc.
// in that case we might get only IAE or similar exceptions while we are really corrupt...
// TODO we should check the checksum in lucene if we hit an exception
logger.warn(
() -> format(
"failed to build store metadata. checking segment info integrity (with commit [%s])",
commit == null ? "no" : "yes"
),
ex
);
Lucene.checkSegmentInfoIntegrity(directory);
} catch (Exception inner) {
inner.addSuppressed(ex);
throw inner;
}
throw ex;
}
final var metadataSnapshot = new MetadataSnapshot(unmodifiableMap(metadataByFile), commitUserData, numDocs);
assert metadataSnapshot.fileMetadataMap.isEmpty() || metadataSnapshot.numSegmentFiles() == 1
: "numSegmentFiles: " + metadataSnapshot.numSegmentFiles();
return metadataSnapshot;
}
public static MetadataSnapshot readFrom(StreamInput in) throws IOException {
final Map<String, StoreFileMetadata> metadata = in.readMapValues(StoreFileMetadata::new, StoreFileMetadata::name);
final var commitUserData = in.readMap(StreamInput::readString);
final var numDocs = in.readLong();
if (metadata.size() == 0 && commitUserData.size() == 0 && numDocs == 0) {
return MetadataSnapshot.EMPTY;
} else {
return new MetadataSnapshot(metadata, commitUserData, numDocs);
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeMapValues(fileMetadataMap);
out.writeMap(commitUserData, StreamOutput::writeString);
out.writeLong(numDocs);
}
@Nullable
public IndexVersion getCommitVersion() {
String version = commitUserData.get(ES_VERSION);
return version == null ? null : Engine.readIndexVersion(version);
}
public static boolean isReadAsHash(String file) {
return SEGMENT_INFO_EXTENSION.equals(IndexFileNames.getExtension(file)) || file.startsWith(IndexFileNames.SEGMENTS + "_");
}
private static void checksumFromLuceneFile(
Directory directory,
String file,
Map<String, StoreFileMetadata> builder,
Logger logger,
String version,
boolean readFileAsHash,
BytesRef writerUuid
) throws IOException {
// We select the read once context carefully here since these constants, while equivalent are
// checked by identity in the different directory implementations.
var context = file.startsWith(IndexFileNames.SEGMENTS) ? IOContext.READONCE : READONCE_CHECKSUM;
try (IndexInput in = directory.openInput(file, context)) {
final long length = in.length();
if (length < CodecUtil.footerLength()) {
// If the file isn't long enough to contain the footer then verifying it triggers an IAE, but really it's corrupted
throw new CorruptIndexException(
Strings.format(
"Cannot retrieve checksum from file: %s file length must be >= %d but was: %d",
file,
CodecUtil.footerLength(),
length
),
in
);
}
final BytesRef fileHash; // not really a "hash", it's either the exact contents of certain small files or it's empty
final long footerChecksum;
assert readFileAsHash == isReadAsHash(file) : file;
if (readFileAsHash) {
assert length <= ByteSizeUnit.MB.toIntBytes(1) : file + " has length " + length;
fileHash = new BytesRef(Math.toIntExact(length));
fileHash.length = fileHash.bytes.length;
in.readBytes(fileHash.bytes, fileHash.offset, fileHash.length);
final var crc32 = new CRC32();
crc32.update(fileHash.bytes, fileHash.offset, fileHash.length - 8);
final var computedChecksum = crc32.getValue();
footerChecksum = CodecUtil.retrieveChecksum(in);
if (computedChecksum != footerChecksum) {
throw new CorruptIndexException(
Strings.format("Checksum from footer=%d did not match computed checksum=%d", footerChecksum, computedChecksum),
in
);
}
} else {
fileHash = new BytesRef(BytesRef.EMPTY_BYTES);
footerChecksum = CodecUtil.retrieveChecksum(in);
}
builder.put(file, new StoreFileMetadata(file, length, digestToString(footerChecksum), version, fileHash, writerUuid));
} catch (Exception ex) {
logger.debug(() -> "Failed computing metadata for file [" + file + "]", ex);
throw ex;
}
}
@Override
public Iterator<StoreFileMetadata> iterator() {
return fileMetadataMap.values().iterator();
}
public StoreFileMetadata get(String name) {
return fileMetadataMap.get(name);
}
private static final String SEGMENT_INFO_EXTENSION = "si";
/**
* Returns a diff between the two snapshots that can be used for recovery. The given snapshot is treated as the
* recovery target and this snapshot as the source. The returned diff will hold a list of files that are:
* <ul>
* <li>identical: they exist in both snapshots and they can be considered the same ie. they don't need to be recovered</li>
* <li>different: they exist in both snapshots but their they are not identical</li>
* <li>missing: files that exist in the source but not in the target</li>
* </ul>
* <p>
* Individual files are compared by name, length, checksum and (if present) a UUID that was assigned when the file was originally
* written. The segment info ({@code *.si}) files and the segments file ({@code segments_N}) are also checked to be a byte-for-byte
* match.
* <p>
* Files are collected together into a group for each segment plus one group of "per-commit" ({@code segments_N}) files. Each
* per-segment group is subdivided into a nongenerational group (most of them) and a generational group (e.g. {@code *.liv},
* {@code *.fnm}, {@code *.dvm}, {@code *.dvd} that have been updated by subsequent commits).
* <p>
* For each segment, if any nongenerational files are different then the whole segment is considered to be different and will be
* recovered in full. If all the nongenerational files are the same but any generational files are different then all the
* generational files are considered to be different and will be recovered in full, but the nongenerational files are left alone.
* Finally, if any file is different then all the per-commit files are recovered too.
*/
public RecoveryDiff recoveryDiff(final MetadataSnapshot targetSnapshot) {
final List<StoreFileMetadata> perCommitSourceFiles = new ArrayList<>();
final Map<String, Tuple<List<StoreFileMetadata>, List<StoreFileMetadata>>> perSegmentSourceFiles = new HashMap<>();
// per segment, a tuple of <<non-generational files, generational files>>
for (StoreFileMetadata sourceFile : this) {
if (sourceFile.name().startsWith("_")) {
final String segmentId = IndexFileNames.parseSegmentName(sourceFile.name());
final boolean isGenerationalFile = IndexFileNames.parseGeneration(sourceFile.name()) > 0L;
final Tuple<List<StoreFileMetadata>, List<StoreFileMetadata>> perSegmentTuple = perSegmentSourceFiles.computeIfAbsent(
segmentId,
k -> Tuple.tuple(new ArrayList<>(), new ArrayList<>())
);
(isGenerationalFile ? perSegmentTuple.v2() : perSegmentTuple.v1()).add(sourceFile);
} else {
assert sourceFile.name().startsWith(IndexFileNames.SEGMENTS + "_") : "unexpected " + sourceFile;
perCommitSourceFiles.add(sourceFile);
}
}
final List<StoreFileMetadata> identical = new ArrayList<>();
final List<StoreFileMetadata> different = new ArrayList<>();
final List<StoreFileMetadata> missing = new ArrayList<>();
final List<StoreFileMetadata> tmpIdentical = new ArrayList<>(); // confirm whole group is identical before adding to 'identical'
final Predicate<List<StoreFileMetadata>> groupComparer = sourceGroup -> {
assert tmpIdentical.isEmpty() : "not cleaned up: " + tmpIdentical;
boolean groupIdentical = true;
for (StoreFileMetadata sourceFile : sourceGroup) {
final StoreFileMetadata targetFile = targetSnapshot.get(sourceFile.name());
if (targetFile == null) {
groupIdentical = false;
missing.add(sourceFile);
} else if (groupIdentical && targetFile.isSame(sourceFile)) {
tmpIdentical.add(sourceFile);
} else {
groupIdentical = false;
different.add(sourceFile);
}
}
if (groupIdentical) {
identical.addAll(tmpIdentical);
} else {
different.addAll(tmpIdentical);
}
tmpIdentical.clear();
return groupIdentical;
};
final Consumer<List<StoreFileMetadata>> allDifferent = sourceGroup -> {
for (StoreFileMetadata sourceFile : sourceGroup) {
final StoreFileMetadata targetFile = targetSnapshot.get(sourceFile.name());
if (targetFile == null) {
missing.add(sourceFile);
} else {
different.add(sourceFile);
}
}
};
boolean segmentsIdentical = true;
for (Tuple<List<StoreFileMetadata>, List<StoreFileMetadata>> segmentFiles : perSegmentSourceFiles.values()) {
final List<StoreFileMetadata> nonGenerationalFiles = segmentFiles.v1();
final List<StoreFileMetadata> generationalFiles = segmentFiles.v2();
if (groupComparer.test(nonGenerationalFiles)) {
// non-generational files are identical, now check the generational files
segmentsIdentical = groupComparer.test(generationalFiles) && segmentsIdentical;
} else {
// non-generational files were different, so consider the whole segment as different
segmentsIdentical = false;
allDifferent.accept(generationalFiles);
}
}
if (segmentsIdentical) {
// segments were the same, check the per-commit files
groupComparer.test(perCommitSourceFiles);
} else {
// at least one segment was different, so treat all the per-commit files as different too
allDifferent.accept(perCommitSourceFiles);
}
final RecoveryDiff recoveryDiff = new RecoveryDiff(
Collections.unmodifiableList(identical),
Collections.unmodifiableList(different),
Collections.unmodifiableList(missing)
);
assert recoveryDiff.size() == fileMetadataMap.size()
: "some files are missing: recoveryDiff is ["
+ recoveryDiff
+ "] comparing: ["
+ fileMetadataMap
+ "] to ["
+ targetSnapshot.fileMetadataMap
+ "]";
return recoveryDiff;
}
/**
* Returns the number of files in this snapshot
*/
public int size() {
return fileMetadataMap.size();
}
/**
* returns the history uuid the store points at, or null if nonexistent.
*/
public String getHistoryUUID() {
return commitUserData.get(Engine.HISTORY_UUID_KEY);
}
/**
* Returns true iff this metadata contains the given file.
*/
public boolean contains(String existingFile) {
return fileMetadataMap.containsKey(existingFile);
}
/**
* Returns the segments file that this metadata snapshot represents or null if the snapshot is empty.
*/
public StoreFileMetadata getSegmentsFile() {
for (StoreFileMetadata file : this) {
if (file.name().startsWith(IndexFileNames.SEGMENTS)) {
return file;
}
}
assert fileMetadataMap.isEmpty();
return null;
}
private int numSegmentFiles() { // only for asserts
int count = 0;
for (StoreFileMetadata file : this) {
if (file.name().startsWith(IndexFileNames.SEGMENTS)) {
count++;
}
}
return count;
}
}