async getPageDict()

in src/core/catalog.js [1163:1292]


  async getPageDict(pageIndex) {
    const nodesToVisit = [this.toplevelPagesDict];
    const visitedNodes = new RefSet();

    const pagesRef = this._catDict.getRaw("Pages");
    if (pagesRef instanceof Ref) {
      visitedNodes.put(pagesRef);
    }
    const xref = this.xref,
      pageKidsCountCache = this.pageKidsCountCache,
      pageIndexCache = this.pageIndexCache,
      pageDictCache = this.pageDictCache;
    let currentPageIndex = 0;

    while (nodesToVisit.length) {
      const currentNode = nodesToVisit.pop();

      if (currentNode instanceof Ref) {
        const count = pageKidsCountCache.get(currentNode);
        // Skip nodes where the page can't be.
        if (count >= 0 && currentPageIndex + count <= pageIndex) {
          currentPageIndex += count;
          continue;
        }
        // Prevent circular references in the /Pages tree.
        if (visitedNodes.has(currentNode)) {
          throw new FormatError("Pages tree contains circular reference.");
        }
        visitedNodes.put(currentNode);

        const obj = await (pageDictCache.get(currentNode) ||
          xref.fetchAsync(currentNode));
        if (obj instanceof Dict) {
          let type = obj.getRaw("Type");
          if (type instanceof Ref) {
            type = await xref.fetchAsync(type);
          }
          if (isName(type, "Page") || !obj.has("Kids")) {
            // Cache the Page reference, since it can *greatly* improve
            // performance by reducing redundant lookups in long documents
            // where all nodes are found at *one* level of the tree.
            if (!pageKidsCountCache.has(currentNode)) {
              pageKidsCountCache.put(currentNode, 1);
            }
            // Help improve performance of the `getPageIndex` method.
            if (!pageIndexCache.has(currentNode)) {
              pageIndexCache.put(currentNode, currentPageIndex);
            }

            if (currentPageIndex === pageIndex) {
              return [obj, currentNode];
            }
            currentPageIndex++;
            continue;
          }
        }
        nodesToVisit.push(obj);
        continue;
      }

      // Must be a child page dictionary.
      if (!(currentNode instanceof Dict)) {
        throw new FormatError(
          "Page dictionary kid reference points to wrong type of object."
        );
      }
      const { objId } = currentNode;

      let count = currentNode.getRaw("Count");
      if (count instanceof Ref) {
        count = await xref.fetchAsync(count);
      }
      if (Number.isInteger(count) && count >= 0) {
        // Cache the Kids count, since it can reduce redundant lookups in
        // documents where all nodes are found at *one* level of the tree.
        if (objId && !pageKidsCountCache.has(objId)) {
          pageKidsCountCache.put(objId, count);
        }

        // Skip nodes where the page can't be.
        if (currentPageIndex + count <= pageIndex) {
          currentPageIndex += count;
          continue;
        }
      }

      let kids = currentNode.getRaw("Kids");
      if (kids instanceof Ref) {
        kids = await xref.fetchAsync(kids);
      }
      if (!Array.isArray(kids)) {
        // Prevent errors in corrupt PDF documents that violate the
        // specification by *inlining* Page dicts directly in the Kids
        // array, rather than using indirect objects (fixes issue9540.pdf).
        let type = currentNode.getRaw("Type");
        if (type instanceof Ref) {
          type = await xref.fetchAsync(type);
        }
        if (isName(type, "Page") || !currentNode.has("Kids")) {
          if (currentPageIndex === pageIndex) {
            return [currentNode, null];
          }
          currentPageIndex++;
          continue;
        }

        throw new FormatError("Page dictionary kids object is not an array.");
      }

      // Always check all `Kids` nodes, to avoid getting stuck in an empty
      // node further down in the tree (see issue5644.pdf, issue8088.pdf),
      // and to ensure that we actually find the correct `Page` dict.
      for (let last = kids.length - 1; last >= 0; last--) {
        const lastKid = kids[last];
        nodesToVisit.push(lastKid);

        // Launch all requests in parallel so we don't wait for each one in turn
        // when looking for a page near the end, if all the pages are top level.
        if (
          currentNode === this.toplevelPagesDict &&
          lastKid instanceof Ref &&
          !pageDictCache.has(lastKid)
        ) {
          pageDictCache.put(lastKid, xref.fetchAsync(lastKid));
        }
      }
    }

    throw new Error(`Page index ${pageIndex} not found.`);
  }