def write_database_records()

in util/generate_unicode_database.py [0:0]


def write_database_records(unicode, db, trace):  # noqa: C901
    dummy = DatabaseRecord(0, 0, 0, 0, False, 0)
    table = [dummy]
    cache = {dummy: 0}
    index = [0] * NUM_CODE_POINTS

    decomp_data = [0]
    decomp_prefix = [""]
    decomp_index = [0] * NUM_CODE_POINTS
    decomp_size = 0

    comp_pairs = []
    comp_first = [None] * NUM_CODE_POINTS
    comp_last = [None] * NUM_CODE_POINTS

    for char in CODE_POINTS:
        record = unicode.table[char]
        if record:
            # database properties
            item = DatabaseRecord(
                BIDIRECTIONAL_NAMES.index(record[4]),
                CATEGORY_NAMES.index(record[2]),
                int(record[3]),
                EASTASIANWIDTH_NAMES.index(record[15]),
                record[9] == "Y",
                record[17],
            )

            idx = cache.get(item)
            if idx is None:
                cache[item] = idx = len(table)
                table.append(item)
            index[char] = idx

            # decomposition data
            decomp_idx = 0
            if record[5]:
                decomp = record[5].split()
                prefix = decomp.pop(0) if decomp[0][0] == "<" else ""
                if len(decomp) > MAX_DECOMPOSITION:
                    raise Exception(
                        f"decomposition of code point {char:#x} is too large"
                    )

                try:
                    idx = decomp_prefix.index(prefix)
                except ValueError:
                    idx = len(decomp_prefix)
                    decomp_prefix.append(prefix)
                assert idx < 256

                decomp = [idx + (len(decomp) << 8)] + [int(s, 16) for s in decomp]
                if (
                    not idx
                    and len(decomp) == 3
                    and char not in unicode.exclusions
                    and unicode.table[decomp[1]][3] == "0"
                ):
                    _, l, r = decomp
                    comp_first[l] = 1
                    comp_last[r] = 1
                    comp_pairs.append((l, r, char))
                try:
                    decomp_idx = decomp_data.index(decomp)
                except ValueError:
                    decomp_idx = len(decomp_data)
                    decomp_data.extend(decomp)
                    decomp_size = decomp_size + len(decomp) * 2
            decomp_index[char] = decomp_idx

    first = last = 0
    comp_first_ranges = []
    comp_last_ranges = []
    prev_first = prev_last = None
    for ch in CODE_POINTS:
        if comp_first[ch] is not None:
            comp_first[ch] = first
            first += 1
            if prev_first is None:
                prev_first = (ch, ch)
            elif prev_first[1] + 1 == ch:
                prev_first = prev_first[0], ch
            else:
                comp_first_ranges.append(prev_first)
                prev_first = (ch, ch)
        if comp_last[ch] is not None:
            comp_last[ch] = last
            last += 1
            if prev_last is None:
                prev_last = (ch, ch)
            elif prev_last[1] + 1 == ch:
                prev_last = prev_last[0], ch
            else:
                comp_last_ranges.append(prev_last)
                prev_last = (ch, ch)
    comp_first_ranges.append(prev_first)
    comp_last_ranges.append(prev_last)
    total_first = first
    total_last = last

    comp_data = [0] * (total_first * total_last)
    for first, last, char in comp_pairs:
        first = comp_first[first]
        last = comp_last[last]
        comp_data[first * total_last + last] = char

    if trace:
        print(len(table), "unique properties")
        print(len(decomp_prefix), "unique decomposition prefixes")
        print(len(decomp_data), "unique decomposition entries:", end=" ")
        print(decomp_size, "bytes")
        print(total_first, "first characters in NFC")
        print(total_last, "last characters in NFC")
        print(len(comp_pairs), "NFC pairs")

    StructArray(
        "UnicodeDatabaseRecord",
        "kDatabaseRecords",
        table,
        "a list of unique database records",
    ).dump(db, trace)

    # split record index table
    index1, index2, shift = splitbins(index, trace)
    db.write(
        f"""
// type indices
static const int kDatabaseIndexShift = {shift};
static const int32_t kDatabaseIndexMask =
    (int32_t{{1}} << kDatabaseIndexShift) - 1;
"""
    )
    UIntArray("kDatabaseIndex1", index1).dump(db, trace)
    UIntArray("kDatabaseIndex2", index2).dump(db, trace)

    db.write(
        f"""
// Reindexing of NFC first and last characters
struct Reindex {{
  const int32_t start;
  const short count;
  const short index;
}};

static const int kTotalLast = {total_last};
"""
    )

    nfc_first = [
        Reindex(start, end - start, comp_first[start])
        for start, end in comp_first_ranges
    ] + [Reindex(0, 0, 0)]
    nfc_last = [
        Reindex(start, end - start, comp_last[start]) for start, end in comp_last_ranges
    ] + [Reindex(0, 0, 0)]
    StructArray("Reindex", "kNFCFirst", nfc_first).dump(db, trace)
    StructArray("Reindex", "kNFCLast", nfc_last).dump(db, trace)

    # split decomposition index table
    index1, index2, shift = splitbins(decomp_index, trace)

    db.write(
        f"""
// decomposition mappings
static const int kDecompShift = {shift};
static const int32_t kDecompMask = (int32_t{{1}} << kDecompShift) - 1;

const char* kDecompPrefix[] = {{
"""
    )
    for name in decomp_prefix:
        db.write(f'    "{name}",\n')
    db.write("};\n")

    CodePointArray("kDecompData", decomp_data).dump(db, trace)
    UIntArray("kDecompIndex1", index1).dump(db, trace)
    UIntArray("kDecompIndex2", index2).dump(db, trace)

    index1, index2, shift = splitbins(comp_data, trace)
    db.write(
        f"""
// NFC pairs
static const int kCompShift = {shift};
static const int32_t kCompMask = (int32_t{{1}} << kCompShift) - 1;
"""
    )
    UIntArray("kCompIndex", index1).dump(db, trace)
    UIntArray("kCompData", index2).dump(db, trace)