in src/core/evaluator.js [3534:3671]
async extractDataStructures(dict, properties) {
const xref = this.xref;
let cidToGidBytes;
// 9.10.2
const toUnicodePromise = this.readToUnicode(properties.toUnicode);
if (properties.composite) {
// CIDSystemInfo helps to match CID to glyphs
const cidSystemInfo = dict.get("CIDSystemInfo");
if (cidSystemInfo instanceof Dict) {
properties.cidSystemInfo = {
registry: stringToPDFString(cidSystemInfo.get("Registry")),
ordering: stringToPDFString(cidSystemInfo.get("Ordering")),
supplement: cidSystemInfo.get("Supplement"),
};
}
try {
const cidToGidMap = dict.get("CIDToGIDMap");
if (cidToGidMap instanceof BaseStream) {
cidToGidBytes = cidToGidMap.getBytes();
}
} catch (ex) {
if (!this.options.ignoreErrors) {
throw ex;
}
warn(`extractDataStructures - ignoring CIDToGIDMap data: "${ex}".`);
}
}
// Based on 9.6.6 of the spec the encoding can come from multiple places
// and depends on the font type. The base encoding and differences are
// read here, but the encoding that is actually used is chosen during
// glyph mapping in the font.
// TODO: Loading the built in encoding in the font would allow the
// differences to be merged in here not require us to hold on to it.
const differences = [];
let baseEncodingName = null;
let encoding;
if (dict.has("Encoding")) {
encoding = dict.get("Encoding");
if (encoding instanceof Dict) {
baseEncodingName = encoding.get("BaseEncoding");
baseEncodingName =
baseEncodingName instanceof Name ? baseEncodingName.name : null;
// Load the differences between the base and original
if (encoding.has("Differences")) {
const diffEncoding = encoding.get("Differences");
let index = 0;
for (const entry of diffEncoding) {
const data = xref.fetchIfRef(entry);
if (typeof data === "number") {
index = data;
} else if (data instanceof Name) {
differences[index++] = data.name;
} else {
throw new FormatError(
`Invalid entry in 'Differences' array: ${data}`
);
}
}
}
} else if (encoding instanceof Name) {
baseEncodingName = encoding.name;
} else {
const msg = "Encoding is not a Name nor a Dict";
if (!this.options.ignoreErrors) {
throw new FormatError(msg);
}
warn(msg);
}
// According to table 114 if the encoding is a named encoding it must be
// one of these predefined encodings.
if (
baseEncodingName !== "MacRomanEncoding" &&
baseEncodingName !== "MacExpertEncoding" &&
baseEncodingName !== "WinAnsiEncoding"
) {
baseEncodingName = null;
}
}
const nonEmbeddedFont = !properties.file || properties.isInternalFont,
isSymbolsFontName = getSymbolsFonts()[properties.name];
// Ignore an incorrectly specified named encoding for non-embedded
// symbol fonts (fixes issue16464.pdf).
if (baseEncodingName && nonEmbeddedFont && isSymbolsFontName) {
baseEncodingName = null;
}
if (baseEncodingName) {
properties.defaultEncoding = getEncoding(baseEncodingName);
} else {
const isSymbolicFont = !!(properties.flags & FontFlags.Symbolic);
const isNonsymbolicFont = !!(properties.flags & FontFlags.Nonsymbolic);
// According to "Table 114" in section "9.6.6.1 General" (under
// "9.6.6 Character Encoding") of the PDF specification, a Nonsymbolic
// font should use the `StandardEncoding` if no encoding is specified.
encoding = StandardEncoding;
if (properties.type === "TrueType" && !isNonsymbolicFont) {
encoding = WinAnsiEncoding;
}
// The Symbolic attribute can be misused for regular fonts
// Heuristic: we have to check if the font is a standard one also
if (isSymbolicFont || isSymbolsFontName) {
encoding = MacRomanEncoding;
if (nonEmbeddedFont) {
if (/Symbol/i.test(properties.name)) {
encoding = SymbolSetEncoding;
} else if (/Dingbats/i.test(properties.name)) {
encoding = ZapfDingbatsEncoding;
} else if (/Wingdings/i.test(properties.name)) {
encoding = WinAnsiEncoding;
}
}
}
properties.defaultEncoding = encoding;
}
properties.differences = differences;
properties.baseEncodingName = baseEncodingName;
properties.hasEncoding = !!baseEncodingName || differences.length > 0;
properties.dict = dict;
properties.toUnicode = await toUnicodePromise;
const builtToUnicode = await this.buildToUnicode(properties);
properties.toUnicode = builtToUnicode;
if (cidToGidBytes) {
properties.cidToGidMap = this.readCidToGidMap(
cidToGidBytes,
builtToUnicode
);
}
return properties;
}