func()

in language/gen.go [144:308]


func (b *builder) writeMatchData() {
	lm := b.supp.LanguageMatching.LanguageMatches
	cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new")

	regionHierarchy := map[string][]string{}
	for _, g := range b.supp.TerritoryContainment.Group {
		regions := strings.Split(g.Contains, " ")
		regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...)
	}
	// Regions start at 1, so the slice must be one larger than the number of
	// regions.
	regionToGroups := make([]uint8, language.NumRegions+1)

	idToIndex := map[string]uint8{}
	for i, mv := range lm[0].MatchVariable {
		if i > 6 {
			log.Fatalf("Too many groups: %d", i)
		}
		idToIndex[mv.Id] = uint8(i + 1)
		// TODO: also handle '-'
		for _, r := range strings.Split(mv.Value, "+") {
			todo := []string{r}
			for k := 0; k < len(todo); k++ {
				r := todo[k]
				regionToGroups[b.regionIndex(r)] |= 1 << uint8(i)
				todo = append(todo, regionHierarchy[r]...)
			}
		}
	}
	b.w.WriteVar("regionToGroups", regionToGroups)

	// maps language id to in- and out-of-group region.
	paradigmLocales := [][3]uint16{}
	locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ")
	for i := 0; i < len(locales); i += 2 {
		x := [3]uint16{}
		for j := 0; j < 2; j++ {
			pc := strings.SplitN(locales[i+j], "-", 2)
			x[0] = b.langIndex(pc[0])
			if len(pc) == 2 {
				x[1+j] = uint16(b.regionIndex(pc[1]))
			}
		}
		paradigmLocales = append(paradigmLocales, x)
	}
	b.w.WriteVar("paradigmLocales", paradigmLocales)

	b.w.WriteType(mutualIntelligibility{})
	b.w.WriteType(scriptIntelligibility{})
	b.w.WriteType(regionIntelligibility{})

	matchLang := []mutualIntelligibility{}
	matchScript := []scriptIntelligibility{}
	matchRegion := []regionIntelligibility{}
	// Convert the languageMatch entries in lists keyed by desired language.
	for _, m := range lm[0].LanguageMatch {
		// Different versions of CLDR use different separators.
		desired := strings.Replace(m.Desired, "-", "_", -1)
		supported := strings.Replace(m.Supported, "-", "_", -1)
		d := strings.Split(desired, "_")
		s := strings.Split(supported, "_")
		if len(d) != len(s) {
			log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
			continue
		}
		distance, _ := strconv.ParseInt(m.Distance, 10, 8)
		switch len(d) {
		case 2:
			if desired == supported && desired == "*_*" {
				continue
			}
			// language-script pair.
			matchScript = append(matchScript, scriptIntelligibility{
				wantLang:   uint16(b.langIndex(d[0])),
				haveLang:   uint16(b.langIndex(s[0])),
				wantScript: uint8(b.scriptIndex(d[1])),
				haveScript: uint8(b.scriptIndex(s[1])),
				distance:   uint8(distance),
			})
			if m.Oneway != "true" {
				matchScript = append(matchScript, scriptIntelligibility{
					wantLang:   uint16(b.langIndex(s[0])),
					haveLang:   uint16(b.langIndex(d[0])),
					wantScript: uint8(b.scriptIndex(s[1])),
					haveScript: uint8(b.scriptIndex(d[1])),
					distance:   uint8(distance),
				})
			}
		case 1:
			if desired == supported && desired == "*" {
				continue
			}
			if distance == 1 {
				// nb == no is already handled by macro mapping. Check there
				// really is only this case.
				if d[0] != "no" || s[0] != "nb" {
					log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
				}
				continue
			}
			// TODO: consider dropping oneway field and just doubling the entry.
			matchLang = append(matchLang, mutualIntelligibility{
				want:     uint16(b.langIndex(d[0])),
				have:     uint16(b.langIndex(s[0])),
				distance: uint8(distance),
				oneway:   m.Oneway == "true",
			})
		case 3:
			if desired == supported && desired == "*_*_*" {
				continue
			}
			if desired != supported {
				// This is now supported by CLDR, but only one case, which
				// should already be covered by paradigm locales. For instance,
				// test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in
				// testdata/CLDRLocaleMatcherTest.txt tests this.
				if supported != "en_*_GB" {
					log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
				}
				continue
			}
			ri := regionIntelligibility{
				lang:     b.langIndex(d[0]),
				distance: uint8(distance),
			}
			if d[1] != "*" {
				ri.script = uint8(b.scriptIndex(d[1]))
			}
			switch {
			case d[2] == "*":
				ri.group = 0x80 // not contained in anything
			case strings.HasPrefix(d[2], "$!"):
				ri.group = 0x80
				d[2] = "$" + d[2][len("$!"):]
				fallthrough
			case strings.HasPrefix(d[2], "$"):
				ri.group |= idToIndex[d[2]]
			}
			matchRegion = append(matchRegion, ri)
		default:
			log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
		}
	}
	sort.SliceStable(matchLang, func(i, j int) bool {
		return matchLang[i].distance < matchLang[j].distance
	})
	b.w.WriteComment(`
		matchLang holds pairs of langIDs of base languages that are typically
		mutually intelligible. Each pair is associated with a confidence and
		whether the intelligibility goes one or both ways.`)
	b.w.WriteVar("matchLang", matchLang)

	b.w.WriteComment(`
		matchScript holds pairs of scriptIDs where readers of one script
		can typically also read the other. Each is associated with a confidence.`)
	sort.SliceStable(matchScript, func(i, j int) bool {
		return matchScript[i].distance < matchScript[j].distance
	})
	b.w.WriteVar("matchScript", matchScript)

	sort.SliceStable(matchRegion, func(i, j int) bool {
		return matchRegion[i].distance < matchRegion[j].distance
	})
	b.w.WriteVar("matchRegion", matchRegion)
}