in internal/language/gen.go [715:847]
func (b *builder) writeLanguage() {
meta := b.supp.Metadata
b.writeConst("nonCanonicalUnd", b.lang.index("und"))
b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
b.writeConst("langPrivateStart", b.langIndex("qaa"))
b.writeConst("langPrivateEnd", b.langIndex("qtz"))
// Get language codes that need to be mapped (overlong 3-letter codes,
// deprecated 2-letter codes, legacy and grandfathered tags.)
langAliasMap := stringSet{}
aliasTypeMap := map[string]AliasType{}
// altLangISO3 get the alternative ISO3 names that need to be mapped.
altLangISO3 := stringSet{}
// Add dummy start to avoid the use of index 0.
altLangISO3.add("---")
altLangISO3.updateLater("---", "aa")
lang := b.lang.clone()
for _, a := range meta.Alias.LanguageAlias {
if a.Replacement == "" {
a.Replacement = "und"
}
// TODO: support mapping to tags
repl := strings.SplitN(a.Replacement, "_", 2)[0]
if a.Reason == "overlong" {
if len(a.Replacement) == 2 && len(a.Type) == 3 {
lang.updateLater(a.Replacement, a.Type)
}
} else if len(a.Type) <= 3 {
switch a.Reason {
case "macrolanguage":
aliasTypeMap[a.Type] = Macro
case "deprecated":
// handled elsewhere
continue
case "bibliographic", "legacy":
if a.Type == "no" {
continue
}
aliasTypeMap[a.Type] = Legacy
default:
log.Fatalf("new %s alias: %s", a.Reason, a.Type)
}
langAliasMap.add(a.Type)
langAliasMap.updateLater(a.Type, repl)
}
}
// Manually add the mapping of "nb" (Norwegian) to its macro language.
// This can be removed if CLDR adopts this change.
langAliasMap.add("nb")
langAliasMap.updateLater("nb", "no")
aliasTypeMap["nb"] = Macro
for k, v := range b.registry {
// Also add deprecated values for 3-letter ISO codes, which CLDR omits.
if v.typ == "language" && v.deprecated != "" && v.preferred != "" {
langAliasMap.add(k)
langAliasMap.updateLater(k, v.preferred)
aliasTypeMap[k] = Deprecated
}
}
// Fix CLDR mappings.
lang.updateLater("tl", "tgl")
lang.updateLater("sh", "hbs")
lang.updateLater("mo", "mol")
lang.updateLater("no", "nor")
lang.updateLater("tw", "twi")
lang.updateLater("nb", "nob")
lang.updateLater("ak", "aka")
lang.updateLater("bh", "bih")
// Ensure that each 2-letter code is matched with a 3-letter code.
for _, v := range lang.s[1:] {
s, ok := lang.update[v]
if !ok {
if s, ok = lang.update[langAliasMap.update[v]]; !ok {
continue
}
lang.update[v] = s
}
if v[0] != s[0] {
altLangISO3.add(s)
altLangISO3.updateLater(s, v)
}
}
// Complete canonicalized language tags.
lang.freeze()
for i, v := range lang.s {
// We can avoid these manual entries by using the IANA registry directly.
// Seems easier to update the list manually, as changes are rare.
// The panic in this loop will trigger if we miss an entry.
add := ""
if s, ok := lang.update[v]; ok {
if s[0] == v[0] {
add = s[1:]
} else {
add = string([]byte{0, byte(altLangISO3.index(s))})
}
} else if len(v) == 3 {
add = "\x00"
} else {
log.Panicf("no data for long form of %q", v)
}
lang.s[i] += add
}
b.writeConst("lang", tag.Index(lang.join()))
b.writeConst("langNoIndexOffset", len(b.lang.s))
// space of all valid 3-letter language identifiers.
b.writeBitVector("langNoIndex", b.langNoIndex.slice())
altLangIndex := []uint16{}
for i, s := range altLangISO3.slice() {
altLangISO3.s[i] += string([]byte{byte(len(altLangIndex))})
if i > 0 {
idx := b.lang.index(altLangISO3.update[s])
altLangIndex = append(altLangIndex, uint16(idx))
}
}
b.writeConst("altLangISO3", tag.Index(altLangISO3.join()))
b.writeSlice("altLangIndex", altLangIndex)
b.writeSortedMap("AliasMap", &langAliasMap, b.langIndex)
types := make([]AliasType, len(langAliasMap.s))
for i, s := range langAliasMap.s {
types[i] = aliasTypeMap[s]
}
b.writeSlice("AliasTypes", types)
}