in src/core/fonts.js [1490:1798]
function readCmapTable(cmap, file, isSymbolicFont, hasEncoding) {
if (!cmap) {
warn("No cmap table available.");
return {
platformId: -1,
encodingId: -1,
mappings: [],
hasShortCmap: false,
};
}
let segment;
let start = (file.start || 0) + cmap.offset;
file.pos = start;
file.skip(2); // version
const numTables = file.getUint16();
let potentialTable;
let canBreak = false;
// There's an order of preference in terms of which cmap subtable to
// use:
// - non-symbolic fonts the preference is a 3,1 table then a 1,0 table
// - symbolic fonts the preference is a 3,0 table then a 1,0 table
// The following takes advantage of the fact that the tables are sorted
// to work.
for (let i = 0; i < numTables; i++) {
const platformId = file.getUint16();
const encodingId = file.getUint16();
const offset = file.getInt32() >>> 0;
let useTable = false;
// Sometimes there are multiple of the same type of table. Default
// to choosing the first table and skip the rest.
if (
potentialTable?.platformId === platformId &&
potentialTable?.encodingId === encodingId
) {
continue;
}
if (
platformId === 0 &&
(encodingId === /* Unicode Default */ 0 ||
encodingId === /* Unicode 1.1 */ 1 ||
encodingId === /* Unicode BMP */ 3)
) {
useTable = true;
// Continue the loop since there still may be a higher priority
// table.
} else if (platformId === 1 && encodingId === 0) {
useTable = true;
// Continue the loop since there still may be a higher priority
// table.
} else if (
platformId === 3 &&
encodingId === 1 &&
(hasEncoding || !potentialTable)
) {
useTable = true;
if (!isSymbolicFont) {
canBreak = true;
}
} else if (isSymbolicFont && platformId === 3 && encodingId === 0) {
useTable = true;
let correctlySorted = true;
if (i < numTables - 1) {
const nextBytes = file.peekBytes(2),
nextPlatformId = int16(nextBytes[0], nextBytes[1]);
if (nextPlatformId < platformId) {
correctlySorted = false;
}
}
if (correctlySorted) {
canBreak = true;
}
}
if (useTable) {
potentialTable = {
platformId,
encodingId,
offset,
};
}
if (canBreak) {
break;
}
}
if (potentialTable) {
file.pos = start + potentialTable.offset;
}
if (!potentialTable || file.peekByte() === -1) {
warn("Could not find a preferred cmap table.");
return {
platformId: -1,
encodingId: -1,
mappings: [],
hasShortCmap: false,
};
}
const format = file.getUint16();
let hasShortCmap = false;
const mappings = [];
let j, glyphId;
// TODO(mack): refactor this cmap subtable reading logic out
if (format === 0) {
file.skip(2 + 2); // length + language
for (j = 0; j < 256; j++) {
const index = file.getByte();
if (!index) {
continue;
}
mappings.push({
charCode: j,
glyphId: index,
});
}
hasShortCmap = true;
} else if (format === 2) {
file.skip(2 + 2); // length + language
const subHeaderKeys = [];
let maxSubHeaderKey = 0;
// Read subHeaderKeys. If subHeaderKeys[i] === 0, then i is a
// single-byte character. Otherwise, i is the first byte of a
// multi-byte character, and the value is 8*index into
// subHeaders.
for (let i = 0; i < 256; i++) {
const subHeaderKey = file.getUint16() >> 3;
subHeaderKeys.push(subHeaderKey);
maxSubHeaderKey = Math.max(subHeaderKey, maxSubHeaderKey);
}
// Read subHeaders. The number of entries is determined
// dynamically based on the subHeaderKeys found above.
const subHeaders = [];
for (let i = 0; i <= maxSubHeaderKey; i++) {
subHeaders.push({
firstCode: file.getUint16(),
entryCount: file.getUint16(),
idDelta: signedInt16(file.getByte(), file.getByte()),
idRangePos: file.pos + file.getUint16(),
});
}
for (let i = 0; i < 256; i++) {
if (subHeaderKeys[i] === 0) {
// i is a single-byte code.
file.pos = subHeaders[0].idRangePos + 2 * i;
glyphId = file.getUint16();
mappings.push({
charCode: i,
glyphId,
});
} else {
// i is the first byte of a two-byte code.
const s = subHeaders[subHeaderKeys[i]];
for (j = 0; j < s.entryCount; j++) {
const charCode = (i << 8) + j + s.firstCode;
file.pos = s.idRangePos + 2 * j;
glyphId = file.getUint16();
if (glyphId !== 0) {
glyphId = (glyphId + s.idDelta) % 65536;
}
mappings.push({
charCode,
glyphId,
});
}
}
}
} else if (format === 4) {
file.skip(2 + 2); // length + language
// re-creating the table in format 4 since the encoding
// might be changed
const segCount = file.getUint16() >> 1;
file.skip(6); // skipping range fields
const segments = [];
let segIndex;
for (segIndex = 0; segIndex < segCount; segIndex++) {
segments.push({ end: file.getUint16() });
}
file.skip(2);
for (segIndex = 0; segIndex < segCount; segIndex++) {
segments[segIndex].start = file.getUint16();
}
for (segIndex = 0; segIndex < segCount; segIndex++) {
segments[segIndex].delta = file.getUint16();
}
let offsetsCount = 0,
offsetIndex;
for (segIndex = 0; segIndex < segCount; segIndex++) {
segment = segments[segIndex];
const rangeOffset = file.getUint16();
if (!rangeOffset) {
segment.offsetIndex = -1;
continue;
}
offsetIndex = (rangeOffset >> 1) - (segCount - segIndex);
segment.offsetIndex = offsetIndex;
offsetsCount = Math.max(
offsetsCount,
offsetIndex + segment.end - segment.start + 1
);
}
const offsets = [];
for (j = 0; j < offsetsCount; j++) {
offsets.push(file.getUint16());
}
for (segIndex = 0; segIndex < segCount; segIndex++) {
segment = segments[segIndex];
start = segment.start;
const end = segment.end;
const delta = segment.delta;
offsetIndex = segment.offsetIndex;
for (j = start; j <= end; j++) {
if (j === 0xffff) {
continue;
}
glyphId = offsetIndex < 0 ? j : offsets[offsetIndex + j - start];
glyphId = (glyphId + delta) & 0xffff;
mappings.push({
charCode: j,
glyphId,
});
}
}
} else if (format === 6) {
file.skip(2 + 2); // length + language
// Format 6 is a 2-bytes dense mapping, which means the font data
// lives glue together even if they are pretty far in the unicode
// table. (This looks weird, so I can have missed something), this
// works on Linux but seems to fails on Mac so let's rewrite the
// cmap table to a 3-1-4 style
const firstCode = file.getUint16();
const entryCount = file.getUint16();
for (j = 0; j < entryCount; j++) {
glyphId = file.getUint16();
const charCode = firstCode + j;
mappings.push({
charCode,
glyphId,
});
}
} else if (format === 12) {
file.skip(2 + 4 + 4); // reserved + length + language
const nGroups = file.getInt32() >>> 0;
for (j = 0; j < nGroups; j++) {
const startCharCode = file.getInt32() >>> 0;
const endCharCode = file.getInt32() >>> 0;
let glyphCode = file.getInt32() >>> 0;
for (
let charCode = startCharCode;
charCode <= endCharCode;
charCode++
) {
mappings.push({
charCode,
glyphId: glyphCode++,
});
}
}
} else {
warn("cmap table has unsupported format: " + format);
return {
platformId: -1,
encodingId: -1,
mappings: [],
hasShortCmap: false,
};
}
// removing duplicate entries
mappings.sort((a, b) => a.charCode - b.charCode);
const finalMappings = [],
seenCharCodes = new Set();
for (const map of mappings) {
const { charCode } = map;
if (seenCharCodes.has(charCode)) {
continue;
}
seenCharCodes.add(charCode);
finalMappings.push(map);
}
return {
platformId: potentialTable.platformId,
encodingId: potentialTable.encodingId,
mappings: finalMappings,
hasShortCmap,
};
}