public record MetadataSnapshot()

in server/src/main/java/org/elasticsearch/index/store/Store.java [816:1159]


    public record MetadataSnapshot(Map<String, StoreFileMetadata> fileMetadataMap, Map<String, String> commitUserData, long numDocs)
        implements
            Iterable<StoreFileMetadata>,
            Writeable {

        public static final MetadataSnapshot EMPTY = new MetadataSnapshot(emptyMap(), emptyMap(), 0L);

        static MetadataSnapshot loadFromIndexCommit(@Nullable IndexCommit commit, Directory directory, Logger logger) throws IOException {
            final long numDocs;
            final Map<String, StoreFileMetadata> metadataByFile = new HashMap<>();
            final Map<String, String> commitUserData;
            try {
                final SegmentInfos segmentCommitInfos = readSegmentsInfo(commit, directory);
                numDocs = Lucene.getNumDocs(segmentCommitInfos);
                commitUserData = Map.copyOf(segmentCommitInfos.getUserData());
                // we don't know which version was used to write so we take the max version.
                Version maxVersion = segmentCommitInfos.getMinSegmentLuceneVersion();
                for (SegmentCommitInfo info : segmentCommitInfos) {
                    final Version version = info.info.getVersion();
                    if (version == null) {
                        // version is written since 3.1+: we should have already hit IndexFormatTooOld.
                        throw new IllegalArgumentException("expected valid version value: " + info.info.toString());
                    }
                    if (version.onOrAfter(maxVersion)) {
                        maxVersion = version;
                    }

                    final BytesRef segmentInfoId = StoreFileMetadata.toWriterUuid(info.info.getId());
                    final BytesRef segmentCommitInfoId = StoreFileMetadata.toWriterUuid(info.getId());

                    for (String file : info.files()) {
                        checksumFromLuceneFile(
                            directory,
                            file,
                            metadataByFile,
                            logger,
                            version.toString(),
                            SEGMENT_INFO_EXTENSION.equals(IndexFileNames.getExtension(file)),
                            IndexFileNames.parseGeneration(file) == 0 ? segmentInfoId : segmentCommitInfoId
                        );
                    }
                }
                if (maxVersion == null) {
                    maxVersion = IndexVersions.MINIMUM_COMPATIBLE.luceneVersion();
                }
                final String segmentsFile = segmentCommitInfos.getSegmentsFileName();
                checksumFromLuceneFile(
                    directory,
                    segmentsFile,
                    metadataByFile,
                    logger,
                    maxVersion.toString(),
                    true,
                    StoreFileMetadata.toWriterUuid(segmentCommitInfos.getId())
                );
            } catch (CorruptIndexException | IndexNotFoundException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
                // we either know the index is corrupted or it's just not there
                throw ex;
            } catch (Exception ex) {
                try {
                    // Lucene checks the checksum after it tries to lookup the codec etc.
                    // in that case we might get only IAE or similar exceptions while we are really corrupt...
                    // TODO we should check the checksum in lucene if we hit an exception
                    logger.warn(
                        () -> format(
                            "failed to build store metadata. checking segment info integrity (with commit [%s])",
                            commit == null ? "no" : "yes"
                        ),
                        ex
                    );
                    Lucene.checkSegmentInfoIntegrity(directory);
                } catch (Exception inner) {
                    inner.addSuppressed(ex);
                    throw inner;
                }
                throw ex;
            }
            final var metadataSnapshot = new MetadataSnapshot(unmodifiableMap(metadataByFile), commitUserData, numDocs);
            assert metadataSnapshot.fileMetadataMap.isEmpty() || metadataSnapshot.numSegmentFiles() == 1
                : "numSegmentFiles: " + metadataSnapshot.numSegmentFiles();
            return metadataSnapshot;
        }

        public static MetadataSnapshot readFrom(StreamInput in) throws IOException {
            final Map<String, StoreFileMetadata> metadata = in.readMapValues(StoreFileMetadata::new, StoreFileMetadata::name);
            final var commitUserData = in.readMap(StreamInput::readString);
            final var numDocs = in.readLong();

            if (metadata.size() == 0 && commitUserData.size() == 0 && numDocs == 0) {
                return MetadataSnapshot.EMPTY;
            } else {
                return new MetadataSnapshot(metadata, commitUserData, numDocs);
            }
        }

        @Override
        public void writeTo(StreamOutput out) throws IOException {
            out.writeMapValues(fileMetadataMap);
            out.writeMap(commitUserData, StreamOutput::writeString);
            out.writeLong(numDocs);
        }

        @Nullable
        public IndexVersion getCommitVersion() {
            String version = commitUserData.get(ES_VERSION);
            return version == null ? null : Engine.readIndexVersion(version);
        }

        public static boolean isReadAsHash(String file) {
            return SEGMENT_INFO_EXTENSION.equals(IndexFileNames.getExtension(file)) || file.startsWith(IndexFileNames.SEGMENTS + "_");
        }

        private static void checksumFromLuceneFile(
            Directory directory,
            String file,
            Map<String, StoreFileMetadata> builder,
            Logger logger,
            String version,
            boolean readFileAsHash,
            BytesRef writerUuid
        ) throws IOException {
            // We select the read once context carefully here since these constants, while equivalent are
            // checked by identity in the different directory implementations.
            var context = file.startsWith(IndexFileNames.SEGMENTS) ? IOContext.READONCE : READONCE_CHECKSUM;
            try (IndexInput in = directory.openInput(file, context)) {
                final long length = in.length();
                if (length < CodecUtil.footerLength()) {
                    // If the file isn't long enough to contain the footer then verifying it triggers an IAE, but really it's corrupted
                    throw new CorruptIndexException(
                        Strings.format(
                            "Cannot retrieve checksum from file: %s file length must be >= %d but was: %d",
                            file,
                            CodecUtil.footerLength(),
                            length
                        ),
                        in
                    );
                }
                final BytesRef fileHash; // not really a "hash", it's either the exact contents of certain small files or it's empty
                final long footerChecksum;
                assert readFileAsHash == isReadAsHash(file) : file;
                if (readFileAsHash) {
                    assert length <= ByteSizeUnit.MB.toIntBytes(1) : file + " has length " + length;
                    fileHash = new BytesRef(Math.toIntExact(length));
                    fileHash.length = fileHash.bytes.length;
                    in.readBytes(fileHash.bytes, fileHash.offset, fileHash.length);
                    final var crc32 = new CRC32();
                    crc32.update(fileHash.bytes, fileHash.offset, fileHash.length - 8);
                    final var computedChecksum = crc32.getValue();
                    footerChecksum = CodecUtil.retrieveChecksum(in);
                    if (computedChecksum != footerChecksum) {
                        throw new CorruptIndexException(
                            Strings.format("Checksum from footer=%d did not match computed checksum=%d", footerChecksum, computedChecksum),
                            in
                        );
                    }
                } else {
                    fileHash = new BytesRef(BytesRef.EMPTY_BYTES);
                    footerChecksum = CodecUtil.retrieveChecksum(in);
                }
                builder.put(file, new StoreFileMetadata(file, length, digestToString(footerChecksum), version, fileHash, writerUuid));
            } catch (Exception ex) {
                logger.debug(() -> "Failed computing metadata for file [" + file + "]", ex);
                throw ex;
            }
        }

        @Override
        public Iterator<StoreFileMetadata> iterator() {
            return fileMetadataMap.values().iterator();
        }

        public StoreFileMetadata get(String name) {
            return fileMetadataMap.get(name);
        }

        private static final String SEGMENT_INFO_EXTENSION = "si";

        /**
         * Returns a diff between the two snapshots that can be used for recovery. The given snapshot is treated as the
         * recovery target and this snapshot as the source. The returned diff will hold a list of files that are:
         * <ul>
         * <li>identical: they exist in both snapshots and they can be considered the same ie. they don't need to be recovered</li>
         * <li>different: they exist in both snapshots but their they are not identical</li>
         * <li>missing: files that exist in the source but not in the target</li>
         * </ul>
         * <p>
         * Individual files are compared by name, length, checksum and (if present) a UUID that was assigned when the file was originally
         * written. The segment info ({@code *.si}) files and the segments file ({@code segments_N}) are also checked to be a byte-for-byte
         * match.
         * <p>
         * Files are collected together into a group for each segment plus one group of "per-commit" ({@code segments_N}) files. Each
         * per-segment group is subdivided into a nongenerational group (most of them) and a generational group (e.g. {@code *.liv},
         * {@code *.fnm}, {@code *.dvm}, {@code *.dvd} that have been updated by subsequent commits).
         * <p>
         * For each segment, if any nongenerational files are different then the whole segment is considered to be different and will be
         * recovered in full. If all the nongenerational files are the same but any generational files are different then all the
         * generational files are considered to be different and will be recovered in full, but the nongenerational files are left alone.
         * Finally, if any file is different then all the per-commit files are recovered too.
         */
        public RecoveryDiff recoveryDiff(final MetadataSnapshot targetSnapshot) {
            final List<StoreFileMetadata> perCommitSourceFiles = new ArrayList<>();
            final Map<String, Tuple<List<StoreFileMetadata>, List<StoreFileMetadata>>> perSegmentSourceFiles = new HashMap<>();
            // per segment, a tuple of <<non-generational files, generational files>>

            for (StoreFileMetadata sourceFile : this) {
                if (sourceFile.name().startsWith("_")) {
                    final String segmentId = IndexFileNames.parseSegmentName(sourceFile.name());
                    final boolean isGenerationalFile = IndexFileNames.parseGeneration(sourceFile.name()) > 0L;
                    final Tuple<List<StoreFileMetadata>, List<StoreFileMetadata>> perSegmentTuple = perSegmentSourceFiles.computeIfAbsent(
                        segmentId,
                        k -> Tuple.tuple(new ArrayList<>(), new ArrayList<>())
                    );
                    (isGenerationalFile ? perSegmentTuple.v2() : perSegmentTuple.v1()).add(sourceFile);
                } else {
                    assert sourceFile.name().startsWith(IndexFileNames.SEGMENTS + "_") : "unexpected " + sourceFile;
                    perCommitSourceFiles.add(sourceFile);
                }
            }

            final List<StoreFileMetadata> identical = new ArrayList<>();
            final List<StoreFileMetadata> different = new ArrayList<>();
            final List<StoreFileMetadata> missing = new ArrayList<>();

            final List<StoreFileMetadata> tmpIdentical = new ArrayList<>(); // confirm whole group is identical before adding to 'identical'
            final Predicate<List<StoreFileMetadata>> groupComparer = sourceGroup -> {
                assert tmpIdentical.isEmpty() : "not cleaned up: " + tmpIdentical;
                boolean groupIdentical = true;
                for (StoreFileMetadata sourceFile : sourceGroup) {
                    final StoreFileMetadata targetFile = targetSnapshot.get(sourceFile.name());
                    if (targetFile == null) {
                        groupIdentical = false;
                        missing.add(sourceFile);
                    } else if (groupIdentical && targetFile.isSame(sourceFile)) {
                        tmpIdentical.add(sourceFile);
                    } else {
                        groupIdentical = false;
                        different.add(sourceFile);
                    }
                }
                if (groupIdentical) {
                    identical.addAll(tmpIdentical);
                } else {
                    different.addAll(tmpIdentical);
                }
                tmpIdentical.clear();
                return groupIdentical;
            };
            final Consumer<List<StoreFileMetadata>> allDifferent = sourceGroup -> {
                for (StoreFileMetadata sourceFile : sourceGroup) {
                    final StoreFileMetadata targetFile = targetSnapshot.get(sourceFile.name());
                    if (targetFile == null) {
                        missing.add(sourceFile);
                    } else {
                        different.add(sourceFile);
                    }
                }
            };

            boolean segmentsIdentical = true;

            for (Tuple<List<StoreFileMetadata>, List<StoreFileMetadata>> segmentFiles : perSegmentSourceFiles.values()) {
                final List<StoreFileMetadata> nonGenerationalFiles = segmentFiles.v1();
                final List<StoreFileMetadata> generationalFiles = segmentFiles.v2();

                if (groupComparer.test(nonGenerationalFiles)) {
                    // non-generational files are identical, now check the generational files
                    segmentsIdentical = groupComparer.test(generationalFiles) && segmentsIdentical;
                } else {
                    // non-generational files were different, so consider the whole segment as different
                    segmentsIdentical = false;
                    allDifferent.accept(generationalFiles);
                }
            }

            if (segmentsIdentical) {
                // segments were the same, check the per-commit files
                groupComparer.test(perCommitSourceFiles);
            } else {
                // at least one segment was different, so treat all the per-commit files as different too
                allDifferent.accept(perCommitSourceFiles);
            }

            final RecoveryDiff recoveryDiff = new RecoveryDiff(
                Collections.unmodifiableList(identical),
                Collections.unmodifiableList(different),
                Collections.unmodifiableList(missing)
            );
            assert recoveryDiff.size() == fileMetadataMap.size()
                : "some files are missing: recoveryDiff is ["
                    + recoveryDiff
                    + "] comparing: ["
                    + fileMetadataMap
                    + "] to ["
                    + targetSnapshot.fileMetadataMap
                    + "]";
            return recoveryDiff;
        }

        /**
         * Returns the number of files in this snapshot
         */
        public int size() {
            return fileMetadataMap.size();
        }

        /**
         * returns the history uuid the store points at, or null if nonexistent.
         */
        public String getHistoryUUID() {
            return commitUserData.get(Engine.HISTORY_UUID_KEY);
        }

        /**
         * Returns true iff this metadata contains the given file.
         */
        public boolean contains(String existingFile) {
            return fileMetadataMap.containsKey(existingFile);
        }

        /**
         * Returns the segments file that this metadata snapshot represents or null if the snapshot is empty.
         */
        public StoreFileMetadata getSegmentsFile() {
            for (StoreFileMetadata file : this) {
                if (file.name().startsWith(IndexFileNames.SEGMENTS)) {
                    return file;
                }
            }
            assert fileMetadataMap.isEmpty();
            return null;
        }

        private int numSegmentFiles() { // only for asserts
            int count = 0;
            for (StoreFileMetadata file : this) {
                if (file.name().startsWith(IndexFileNames.SEGMENTS)) {
                    count++;
                }
            }
            return count;
        }

    }