def write_type_data()

in util/generate_unicode_database.py [0:0]


def write_type_data(unicode, db, trace):  # noqa: C901
    """Writes Unicode character type tables to the database file."""

    # extract unicode types
    dummy = TypeRecord(0, 0, 0, 0, 0, 0)
    table = [dummy]
    cache = {0: dummy}
    index = [0] * NUM_CODE_POINTS
    numeric = {}
    spaces = []
    linebreaks = []
    extended_cases = []

    for char in CODE_POINTS:
        record = unicode.table[char]
        if record:
            # extract database properties
            category = record[2]
            bidirectional = record[4]
            properties = record[16]
            flags = 0
            # TODO(T55176519): delta = True
            if category in ("Lm", "Lt", "Lu", "Ll", "Lo"):
                flags |= ALPHA_MASK
            if "Lowercase" in properties:
                flags |= LOWER_MASK
            if "Line_Break" in properties or bidirectional == "B":
                flags |= LINEBREAK_MASK
                linebreaks.append(char)
            if category == "Zs" or bidirectional in ("WS", "B", "S"):
                flags |= SPACE_MASK
                spaces.append(char)
            if category == "Lt":
                flags |= TITLE_MASK
            if "Uppercase" in properties:
                flags |= UPPER_MASK
            if char == ord(" ") or category[0] not in ("C", "Z"):
                flags |= PRINTABLE_MASK
            if "XID_Start" in properties:
                flags |= XID_START_MASK
            if "XID_Continue" in properties:
                flags |= XID_CONTINUE_MASK
            if "Cased" in properties:
                flags |= CASED_MASK
            if "Case_Ignorable" in properties:
                flags |= CASE_IGNORABLE_MASK
            sc = unicode.special_casing.get(char)
            cf = unicode.case_folding.get(char, [char])
            if record[12]:
                upper = int(record[12], 16)
            else:
                upper = char
            if record[13]:
                lower = int(record[13], 16)
            else:
                lower = char
            if record[14]:
                title = int(record[14], 16)
            else:
                title = upper
            if sc is None and cf != [lower]:
                sc = ([lower], [title], [upper])
            if sc is None:
                if upper == lower == title:
                    upper = lower = title = 0
                else:
                    upper = upper - char
                    lower = lower - char
                    title = title - char
                    assert (
                        abs(upper) <= 2147483647
                        and abs(lower) <= 2147483647
                        and abs(title) <= 2147483647
                    )
            else:
                # This happens either when some character maps to more than one
                # character in uppercase, lowercase, or titlecase or the
                # casefolded version of the character is different from the
                # lowercase. The extra characters are stored in a different
                # array.
                flags |= EXTENDED_CASE_MASK
                lower = len(extended_cases) | (len(sc[0]) << 24)
                extended_cases.extend(sc[0])
                if cf != sc[0]:
                    lower |= len(cf) << 20
                    extended_cases.extend(cf)
                upper = len(extended_cases) | (len(sc[2]) << 24)
                extended_cases.extend(sc[2])
                # Title is probably equal to upper.
                if sc[1] == sc[2]:
                    title = upper
                else:
                    title = len(extended_cases) | (len(sc[1]) << 24)
                    extended_cases.extend(sc[1])
            # decimal digit, integer digit
            decimal = 0
            if record[6]:
                flags |= DECIMAL_MASK
                decimal = int(record[6])
            digit = 0
            if record[7]:
                flags |= DIGIT_MASK
                digit = int(record[7])
            if record[8]:
                flags |= NUMERIC_MASK
                numeric.setdefault(record[8], []).append(char)
            item = TypeRecord(decimal, digit, flags, lower, title, upper)
            # add entry to index and item tables
            i = cache.get(item)
            if i is None:
                cache[item] = i = len(table)
                table.append(item)
            index[char] = i

    print(len(table), "unique character type entries")
    print(sum(map(len, numeric.values())), "numeric code points")
    print(len(spaces), "whitespace code points")
    print(len(linebreaks), "linebreak code points")
    print(len(extended_cases), "extended case array")

    StructArray(
        "UnicodeTypeRecord",
        "kTypeRecords",
        table,
        "a list of unique character type descriptors",
    ).dump(db, trace)

    # split record index table
    index1, index2, shift = splitbins(index, trace)
    db.write(
        f"""
// type indices
static const int kTypeIndexShift = {shift};
static const int32_t kTypeIndexMask = (int32_t{{1}} << kTypeIndexShift) - 1;
"""
    )
    UIntArray("kTypeIndex1", index1).dump(db, trace)
    UIntArray("kTypeIndex2", index2).dump(db, trace)

    # extended case mappings
    CodePointArray("kExtendedCase", extended_cases).dump(db, trace)

    db.write(
        """
double numericValue(int32_t code_point) {
  switch (code_point) {"""
    )
    for value, codepoints in sorted(numeric.items()):
        parts = value.split("/")
        value = " / ".join(repr(float(part)) for part in parts)

        codepoints.sort()
        for codepoint in codepoints:
            db.write(f"\n    case {codepoint:#08x}:")
        db.write(f"\n      return {value};")
    db.write(
        """
    default:
      return -1.0;
  }
}

bool unicodeIsLinebreak(int32_t code_point) {
  switch (code_point) {"""
    )
    for codepoint in sorted(linebreaks):
        db.write(f"\n    case {codepoint:#08x}:")
    db.write(
        """
      return true;
    default:
      return false;
  }
}

bool unicodeIsWhitespace(int32_t code_point) {
  switch (code_point) {"""
    )
    for codepoint in sorted(spaces):
        db.write(f"\n    case {codepoint:#08x}:")
    db.write(
        """
      return true;
    default:
      return false;
  }
}
"""
    )