function readCmapTable()

in src/core/fonts.js [1490:1798]


    function readCmapTable(cmap, file, isSymbolicFont, hasEncoding) {
      if (!cmap) {
        warn("No cmap table available.");
        return {
          platformId: -1,
          encodingId: -1,
          mappings: [],
          hasShortCmap: false,
        };
      }
      let segment;
      let start = (file.start || 0) + cmap.offset;
      file.pos = start;

      file.skip(2); // version
      const numTables = file.getUint16();

      let potentialTable;
      let canBreak = false;
      // There's an order of preference in terms of which cmap subtable to
      // use:
      // - non-symbolic fonts the preference is a 3,1 table then a 1,0 table
      // - symbolic fonts the preference is a 3,0 table then a 1,0 table
      // The following takes advantage of the fact that the tables are sorted
      // to work.
      for (let i = 0; i < numTables; i++) {
        const platformId = file.getUint16();
        const encodingId = file.getUint16();
        const offset = file.getInt32() >>> 0;
        let useTable = false;

        // Sometimes there are multiple of the same type of table. Default
        // to choosing the first table and skip the rest.
        if (
          potentialTable?.platformId === platformId &&
          potentialTable?.encodingId === encodingId
        ) {
          continue;
        }

        if (
          platformId === 0 &&
          (encodingId === /* Unicode Default */ 0 ||
            encodingId === /* Unicode 1.1 */ 1 ||
            encodingId === /* Unicode BMP */ 3)
        ) {
          useTable = true;
          // Continue the loop since there still may be a higher priority
          // table.
        } else if (platformId === 1 && encodingId === 0) {
          useTable = true;
          // Continue the loop since there still may be a higher priority
          // table.
        } else if (
          platformId === 3 &&
          encodingId === 1 &&
          (hasEncoding || !potentialTable)
        ) {
          useTable = true;
          if (!isSymbolicFont) {
            canBreak = true;
          }
        } else if (isSymbolicFont && platformId === 3 && encodingId === 0) {
          useTable = true;

          let correctlySorted = true;
          if (i < numTables - 1) {
            const nextBytes = file.peekBytes(2),
              nextPlatformId = int16(nextBytes[0], nextBytes[1]);
            if (nextPlatformId < platformId) {
              correctlySorted = false;
            }
          }
          if (correctlySorted) {
            canBreak = true;
          }
        }

        if (useTable) {
          potentialTable = {
            platformId,
            encodingId,
            offset,
          };
        }
        if (canBreak) {
          break;
        }
      }

      if (potentialTable) {
        file.pos = start + potentialTable.offset;
      }
      if (!potentialTable || file.peekByte() === -1) {
        warn("Could not find a preferred cmap table.");
        return {
          platformId: -1,
          encodingId: -1,
          mappings: [],
          hasShortCmap: false,
        };
      }

      const format = file.getUint16();
      let hasShortCmap = false;
      const mappings = [];
      let j, glyphId;

      // TODO(mack): refactor this cmap subtable reading logic out
      if (format === 0) {
        file.skip(2 + 2); // length + language

        for (j = 0; j < 256; j++) {
          const index = file.getByte();
          if (!index) {
            continue;
          }
          mappings.push({
            charCode: j,
            glyphId: index,
          });
        }
        hasShortCmap = true;
      } else if (format === 2) {
        file.skip(2 + 2); // length + language

        const subHeaderKeys = [];
        let maxSubHeaderKey = 0;
        // Read subHeaderKeys. If subHeaderKeys[i] === 0, then i is a
        // single-byte character. Otherwise, i is the first byte of a
        // multi-byte character, and the value is 8*index into
        // subHeaders.
        for (let i = 0; i < 256; i++) {
          const subHeaderKey = file.getUint16() >> 3;
          subHeaderKeys.push(subHeaderKey);
          maxSubHeaderKey = Math.max(subHeaderKey, maxSubHeaderKey);
        }
        // Read subHeaders. The number of entries is determined
        // dynamically based on the subHeaderKeys found above.
        const subHeaders = [];
        for (let i = 0; i <= maxSubHeaderKey; i++) {
          subHeaders.push({
            firstCode: file.getUint16(),
            entryCount: file.getUint16(),
            idDelta: signedInt16(file.getByte(), file.getByte()),
            idRangePos: file.pos + file.getUint16(),
          });
        }
        for (let i = 0; i < 256; i++) {
          if (subHeaderKeys[i] === 0) {
            // i is a single-byte code.
            file.pos = subHeaders[0].idRangePos + 2 * i;
            glyphId = file.getUint16();
            mappings.push({
              charCode: i,
              glyphId,
            });
          } else {
            // i is the first byte of a two-byte code.
            const s = subHeaders[subHeaderKeys[i]];
            for (j = 0; j < s.entryCount; j++) {
              const charCode = (i << 8) + j + s.firstCode;
              file.pos = s.idRangePos + 2 * j;
              glyphId = file.getUint16();
              if (glyphId !== 0) {
                glyphId = (glyphId + s.idDelta) % 65536;
              }
              mappings.push({
                charCode,
                glyphId,
              });
            }
          }
        }
      } else if (format === 4) {
        file.skip(2 + 2); // length + language

        // re-creating the table in format 4 since the encoding
        // might be changed
        const segCount = file.getUint16() >> 1;
        file.skip(6); // skipping range fields
        const segments = [];
        let segIndex;
        for (segIndex = 0; segIndex < segCount; segIndex++) {
          segments.push({ end: file.getUint16() });
        }
        file.skip(2);
        for (segIndex = 0; segIndex < segCount; segIndex++) {
          segments[segIndex].start = file.getUint16();
        }

        for (segIndex = 0; segIndex < segCount; segIndex++) {
          segments[segIndex].delta = file.getUint16();
        }

        let offsetsCount = 0,
          offsetIndex;
        for (segIndex = 0; segIndex < segCount; segIndex++) {
          segment = segments[segIndex];
          const rangeOffset = file.getUint16();
          if (!rangeOffset) {
            segment.offsetIndex = -1;
            continue;
          }

          offsetIndex = (rangeOffset >> 1) - (segCount - segIndex);
          segment.offsetIndex = offsetIndex;
          offsetsCount = Math.max(
            offsetsCount,
            offsetIndex + segment.end - segment.start + 1
          );
        }

        const offsets = [];
        for (j = 0; j < offsetsCount; j++) {
          offsets.push(file.getUint16());
        }

        for (segIndex = 0; segIndex < segCount; segIndex++) {
          segment = segments[segIndex];
          start = segment.start;
          const end = segment.end;
          const delta = segment.delta;
          offsetIndex = segment.offsetIndex;

          for (j = start; j <= end; j++) {
            if (j === 0xffff) {
              continue;
            }

            glyphId = offsetIndex < 0 ? j : offsets[offsetIndex + j - start];
            glyphId = (glyphId + delta) & 0xffff;
            mappings.push({
              charCode: j,
              glyphId,
            });
          }
        }
      } else if (format === 6) {
        file.skip(2 + 2); // length + language

        // Format 6 is a 2-bytes dense mapping, which means the font data
        // lives glue together even if they are pretty far in the unicode
        // table. (This looks weird, so I can have missed something), this
        // works on Linux but seems to fails on Mac so let's rewrite the
        // cmap table to a 3-1-4 style
        const firstCode = file.getUint16();
        const entryCount = file.getUint16();

        for (j = 0; j < entryCount; j++) {
          glyphId = file.getUint16();
          const charCode = firstCode + j;

          mappings.push({
            charCode,
            glyphId,
          });
        }
      } else if (format === 12) {
        file.skip(2 + 4 + 4); // reserved + length + language

        const nGroups = file.getInt32() >>> 0;
        for (j = 0; j < nGroups; j++) {
          const startCharCode = file.getInt32() >>> 0;
          const endCharCode = file.getInt32() >>> 0;
          let glyphCode = file.getInt32() >>> 0;

          for (
            let charCode = startCharCode;
            charCode <= endCharCode;
            charCode++
          ) {
            mappings.push({
              charCode,
              glyphId: glyphCode++,
            });
          }
        }
      } else {
        warn("cmap table has unsupported format: " + format);
        return {
          platformId: -1,
          encodingId: -1,
          mappings: [],
          hasShortCmap: false,
        };
      }

      // removing duplicate entries
      mappings.sort((a, b) => a.charCode - b.charCode);
      const finalMappings = [],
        seenCharCodes = new Set();
      for (const map of mappings) {
        const { charCode } = map;

        if (seenCharCodes.has(charCode)) {
          continue;
        }
        seenCharCodes.add(charCode);
        finalMappings.push(map);
      }

      return {
        platformId: potentialTable.platformId,
        encodingId: potentialTable.encodingId,
        mappings: finalMappings,
        hasShortCmap,
      };
    }