in src/core/catalog.js [1163:1292]
async getPageDict(pageIndex) {
const nodesToVisit = [this.toplevelPagesDict];
const visitedNodes = new RefSet();
const pagesRef = this._catDict.getRaw("Pages");
if (pagesRef instanceof Ref) {
visitedNodes.put(pagesRef);
}
const xref = this.xref,
pageKidsCountCache = this.pageKidsCountCache,
pageIndexCache = this.pageIndexCache,
pageDictCache = this.pageDictCache;
let currentPageIndex = 0;
while (nodesToVisit.length) {
const currentNode = nodesToVisit.pop();
if (currentNode instanceof Ref) {
const count = pageKidsCountCache.get(currentNode);
// Skip nodes where the page can't be.
if (count >= 0 && currentPageIndex + count <= pageIndex) {
currentPageIndex += count;
continue;
}
// Prevent circular references in the /Pages tree.
if (visitedNodes.has(currentNode)) {
throw new FormatError("Pages tree contains circular reference.");
}
visitedNodes.put(currentNode);
const obj = await (pageDictCache.get(currentNode) ||
xref.fetchAsync(currentNode));
if (obj instanceof Dict) {
let type = obj.getRaw("Type");
if (type instanceof Ref) {
type = await xref.fetchAsync(type);
}
if (isName(type, "Page") || !obj.has("Kids")) {
// Cache the Page reference, since it can *greatly* improve
// performance by reducing redundant lookups in long documents
// where all nodes are found at *one* level of the tree.
if (!pageKidsCountCache.has(currentNode)) {
pageKidsCountCache.put(currentNode, 1);
}
// Help improve performance of the `getPageIndex` method.
if (!pageIndexCache.has(currentNode)) {
pageIndexCache.put(currentNode, currentPageIndex);
}
if (currentPageIndex === pageIndex) {
return [obj, currentNode];
}
currentPageIndex++;
continue;
}
}
nodesToVisit.push(obj);
continue;
}
// Must be a child page dictionary.
if (!(currentNode instanceof Dict)) {
throw new FormatError(
"Page dictionary kid reference points to wrong type of object."
);
}
const { objId } = currentNode;
let count = currentNode.getRaw("Count");
if (count instanceof Ref) {
count = await xref.fetchAsync(count);
}
if (Number.isInteger(count) && count >= 0) {
// Cache the Kids count, since it can reduce redundant lookups in
// documents where all nodes are found at *one* level of the tree.
if (objId && !pageKidsCountCache.has(objId)) {
pageKidsCountCache.put(objId, count);
}
// Skip nodes where the page can't be.
if (currentPageIndex + count <= pageIndex) {
currentPageIndex += count;
continue;
}
}
let kids = currentNode.getRaw("Kids");
if (kids instanceof Ref) {
kids = await xref.fetchAsync(kids);
}
if (!Array.isArray(kids)) {
// Prevent errors in corrupt PDF documents that violate the
// specification by *inlining* Page dicts directly in the Kids
// array, rather than using indirect objects (fixes issue9540.pdf).
let type = currentNode.getRaw("Type");
if (type instanceof Ref) {
type = await xref.fetchAsync(type);
}
if (isName(type, "Page") || !currentNode.has("Kids")) {
if (currentPageIndex === pageIndex) {
return [currentNode, null];
}
currentPageIndex++;
continue;
}
throw new FormatError("Page dictionary kids object is not an array.");
}
// Always check all `Kids` nodes, to avoid getting stuck in an empty
// node further down in the tree (see issue5644.pdf, issue8088.pdf),
// and to ensure that we actually find the correct `Page` dict.
for (let last = kids.length - 1; last >= 0; last--) {
const lastKid = kids[last];
nodesToVisit.push(lastKid);
// Launch all requests in parallel so we don't wait for each one in turn
// when looking for a page near the end, if all the pages are top level.
if (
currentNode === this.toplevelPagesDict &&
lastKid instanceof Ref &&
!pageDictCache.has(lastKid)
) {
pageDictCache.put(lastKid, xref.fetchAsync(lastKid));
}
}
}
throw new Error(`Page index ${pageIndex} not found.`);
}