public Status checkIndex()

in lucene/core/src/java/org/apache/lucene/index/CheckIndex.java [617:947]


  public Status checkIndex(List<String> onlySegments, ExecutorService executorService)
      throws IOException {
    ensureOpen();
    long startNS = System.nanoTime();

    Status result = new Status();
    result.dir = dir;
    String[] files = dir.listAll();
    String lastSegmentsFile = SegmentInfos.getLastCommitSegmentsFileName(files);
    if (lastSegmentsFile == null) {
      throw new IndexNotFoundException(
          "no segments* file found in " + dir + ": files: " + Arrays.toString(files));
    }

    // https://github.com/apache/lucene/issues/7820: also attempt to open any older commit
    // points (segments_N), which will catch certain corruption like missing _N.si files
    // for segments not also referenced by the newest commit point (which was already
    // loaded, successfully, above).  Note that we do not do a deeper check of segments
    // referenced ONLY by these older commit points, because such corruption would not
    // prevent a new IndexWriter from opening on the newest commit point.  but it is still
    // corruption, e.g. a reader opened on those old commit points can hit corruption
    // exceptions which we (still) will not detect here.  progress not perfection!

    SegmentInfos lastCommit = null;

    List<String> allSegmentsFiles = new ArrayList<>();
    for (String fileName : files) {
      if (fileName.startsWith(IndexFileNames.SEGMENTS)
          && fileName.equals(SegmentInfos.OLD_SEGMENTS_GEN) == false) {
        allSegmentsFiles.add(fileName);
      }
    }

    // Sort descending by generation so that we always attempt to read the last commit first.  This
    // way if an index has a broken last commit AND a broken old commit, we report the last commit
    // error first:
    allSegmentsFiles.sort(
        (a, b) -> {
          long genA = SegmentInfos.generationFromSegmentsFileName(a);
          long genB = SegmentInfos.generationFromSegmentsFileName(b);

          // reversed natural sort (largest generation first):
          return -Long.compare(genA, genB);
        });

    for (String fileName : allSegmentsFiles) {

      boolean isLastCommit = fileName.equals(lastSegmentsFile);

      SegmentInfos infos;

      try {
        // Do not use SegmentInfos.read(Directory) since the spooky
        // retrying it does is not necessary here (we hold the write lock):
        // always open old indices if codecs are around
        infos = SegmentInfos.readCommit(dir, fileName, 0);
      } catch (Throwable t) {
        if (failFast) {
          throw IOUtils.rethrowAlways(t);
        }

        String message;

        if (isLastCommit) {
          message =
              "ERROR: could not read latest commit point from segments file \""
                  + fileName
                  + "\" in directory";
        } else {
          message =
              "ERROR: could not read old (not latest) commit point segments file \""
                  + fileName
                  + "\" in directory";
        }
        msg(infoStream, message);
        result.missingSegments = true;
        if (infoStream != null) {
          t.printStackTrace(infoStream);
        }
        return result;
      }

      if (isLastCommit) {
        // record the latest commit point: we will deeply check all segments referenced by it
        lastCommit = infos;
      }
    }

    // we know there is a lastSegmentsFileName, so we must've attempted to load it in the above for
    // loop.  if it failed to load, we threw the exception (fastFail == true) or we returned the
    // failure (fastFail == false).  so if we get here, we should // always have a valid lastCommit:
    assert lastCommit != null;

    if (lastCommit == null) {
      msg(infoStream, "ERROR: could not read any segments file in directory");
      result.missingSegments = true;
      return result;
    }

    if (infoStream != null) {
      int maxDoc = 0;
      int delCount = 0;
      for (SegmentCommitInfo info : lastCommit) {
        maxDoc += info.info.maxDoc();
        delCount += info.getDelCount();
      }
      infoStream.printf(
          Locale.ROOT,
          "%.2f%% total deletions; %d documents; %d deletions%n",
          100. * delCount / maxDoc,
          maxDoc,
          delCount);
    }

    // find the oldest and newest segment versions
    Version oldest = null;
    Version newest = null;
    String oldSegs = null;
    for (SegmentCommitInfo si : lastCommit) {
      Version version = si.info.getVersion();
      if (version == null) {
        // pre-3.1 segment
        oldSegs = "pre-3.1";
      } else {
        if (oldest == null || version.onOrAfter(oldest) == false) {
          oldest = version;
        }
        if (newest == null || version.onOrAfter(newest)) {
          newest = version;
        }
      }
    }

    final int numSegments = lastCommit.size();
    final String segmentsFileName = lastCommit.getSegmentsFileName();
    result.segmentsFileName = segmentsFileName;
    result.numSegments = numSegments;
    result.userData = lastCommit.getUserData();
    String userDataString;
    if (lastCommit.getUserData().size() > 0) {
      userDataString = " userData=" + lastCommit.getUserData();
    } else {
      userDataString = "";
    }

    String versionString = "";
    if (oldSegs != null) {
      if (newest != null) {
        versionString = "versions=[" + oldSegs + " .. " + newest + "]";
      } else {
        versionString = "version=" + oldSegs;
      }
    } else if (newest != null) { // implies oldest != null
      versionString =
          oldest.equals(newest)
              ? ("version=" + oldest)
              : ("versions=[" + oldest + " .. " + newest + "]");
    }

    msg(
        infoStream,
        "Segments file="
            + segmentsFileName
            + " numSegments="
            + numSegments
            + " "
            + versionString
            + " id="
            + StringHelper.idToString(lastCommit.getId())
            + userDataString);

    if (onlySegments != null) {
      result.partial = true;
      if (infoStream != null) {
        infoStream.print("\nChecking only these segments:");
        for (String s : onlySegments) {
          infoStream.print(" " + s);
        }
      }
      result.segmentsChecked.addAll(onlySegments);
      msg(infoStream, ":");
    }

    result.newSegments = lastCommit.clone();
    result.newSegments.clear();
    result.maxSegmentName = -1;

    // checks segments sequentially
    if (executorService == null) {
      for (int i = 0; i < numSegments; i++) {
        final SegmentCommitInfo info = lastCommit.info(i);
        updateMaxSegmentName(result, info);
        if (onlySegments != null && !onlySegments.contains(info.info.name)) {
          continue;
        }

        msg(
            infoStream,
            (1 + i)
                + " of "
                + numSegments
                + ": name="
                + info.info.name
                + " maxDoc="
                + info.info.maxDoc());
        Status.SegmentInfoStatus segmentInfoStatus = testSegment(lastCommit, info, infoStream);

        processSegmentInfoStatusResult(result, info, segmentInfoStatus);
      }
    } else {
      ByteArrayOutputStream[] outputs = new ByteArrayOutputStream[numSegments];
      @SuppressWarnings({"unchecked", "rawtypes"})
      CompletableFuture<Status.SegmentInfoStatus>[] futures = new CompletableFuture[numSegments];

      // checks segments concurrently
      List<SegmentCommitInfo> segmentCommitInfos = new ArrayList<>();
      for (SegmentCommitInfo sci : lastCommit) {
        segmentCommitInfos.add(sci);
      }

      // sort segmentCommitInfos by segment size, as smaller segment tends to finish faster, and
      // hence its output can be printed out faster
      segmentCommitInfos.sort(
          (info1, info2) -> {
            try {
              return Long.compare(info1.sizeInBytes(), info2.sizeInBytes());
            } catch (IOException e) {
              msg(
                  infoStream,
                  "ERROR: IOException occurred when comparing SegmentCommitInfo file sizes");
              if (infoStream != null) e.printStackTrace(infoStream);
              return 0;
            }
          });

      // start larger segments earlier
      for (int i = numSegments - 1; i >= 0; i--) {
        final SegmentCommitInfo info = segmentCommitInfos.get(i);
        updateMaxSegmentName(result, info);
        if (onlySegments != null && !onlySegments.contains(info.info.name)) {
          continue;
        }

        SegmentInfos finalSis = lastCommit;

        ByteArrayOutputStream output = new ByteArrayOutputStream();
        PrintStream stream = new PrintStream(output, true, UTF_8);
        msg(
            stream,
            (1 + i)
                + " of "
                + numSegments
                + ": name="
                + info.info.name
                + " maxDoc="
                + info.info.maxDoc());

        outputs[i] = output;
        futures[i] =
            runAsyncSegmentCheck(() -> testSegment(finalSis, info, stream), executorService);
      }

      for (int i = 0; i < numSegments; i++) {
        SegmentCommitInfo info = segmentCommitInfos.get(i);
        if (onlySegments != null && !onlySegments.contains(info.info.name)) {
          continue;
        }

        ByteArrayOutputStream output = outputs[i];

        // print segment results in order
        Status.SegmentInfoStatus segmentInfoStatus = null;
        try {
          segmentInfoStatus = futures[i].get();
        } catch (InterruptedException e) {
          // the segment test output should come before interrupted exception message that follows,
          // hence it's not emitted from finally clause
          msg(infoStream, output);
          msg(
              infoStream,
              "ERROR: Interrupted exception occurred when getting segment check result for segment "
                  + info.info.name);
          if (infoStream != null) e.printStackTrace(infoStream);
        } catch (ExecutionException e) {
          msg(infoStream, output.toString(UTF_8));

          assert failFast;
          throw new CheckIndexException(
              "Segment " + info.info.name + " check failed.", e.getCause());
        }

        msg(infoStream, output);

        processSegmentInfoStatusResult(result, info, segmentInfoStatus);
      }
    }

    if (0 == result.numBadSegments) {
      result.clean = true;
    } else {
      msg(
          infoStream,
          "WARNING: "
              + result.numBadSegments
              + " broken segments (containing "
              + result.totLoseDocCount
              + " documents) detected");
    }

    result.validCounter = result.maxSegmentName < lastCommit.counter;
    if (result.validCounter == false) {
      result.clean = false;
      result.newSegments.counter = result.maxSegmentName + 1;
      msg(
          infoStream,
          "ERROR: Next segment name counter "
              + lastCommit.counter
              + " is not greater than max segment name "
              + result.maxSegmentName);
    }

    if (result.clean) {
      msg(infoStream, "No problems were detected with this index.\n");
    }

    msg(
        infoStream,
        String.format(Locale.ROOT, "Took %.3f sec total.", nsToSec(System.nanoTime() - startNS)));

    return result;
  }