generator.go (577 lines of code) (raw):

// Licensed to Elasticsearch B.V. under one or more contributor // license agreements. See the NOTICE file distributed with // this work for additional information regarding copyright // ownership. Elasticsearch B.V. licenses this file to you under // the Apache License, Version 2.0 (the "License"); you may // not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. //go:build ignore package main import ( "archive/zip" "bytes" "encoding/xml" "flag" "fmt" "go/format" "io" "log" "maps" "net/http" "os" "path" "path/filepath" "sort" "strconv" "strings" "text/template" "golang.org/x/text/language" ) var shortMonthNamesStd = []string{ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", } var longMonthNamesStd = []string{ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", } var shortDayNamesStd = []string{ "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", } var longDayNamesStd = []string{ "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", } var dayPeriodsStd = []string{ "AM", "PM", } var longDayNamesStdMap = map[string]string{ "sun": "Sunday", "mon": "Monday", "tue": "Tuesday", "wed": "Wednesday", "thu": "Thursday", "fri": "Friday", "sat": "Saturday", } var shortDayNamesStdMap = map[string]string{ "sun": "Sun", "mon": "Mon", "tue": "Tue", "wed": "Wed", "thu": "Thu", "fri": "Fri", "sat": "Sat", } var dayPeriodsStdMap = map[string]string{ "am": "AM", "pm": "PM", } var localesData = map[string]*cldrLocaleData{} func main() { cldrVersion := flag.Int("cldr", 45, "CLDR version") cldrZipFilePath := flag.String("file", "", "CLDR core.zip path") flag.Parse() data, err := readCLDRCoreFile(*cldrZipFilePath, *cldrVersion) if err != nil { log.Fatalf("failed to read CLDR zip: %v", err) } sortedLanguages := buildLanguageGraph(data).getSorted() var nonEmptyLanguages []string for _, tag := range sortedLanguages { localeLDML := data[tag] parsedTag := language.Make(tag) var localeCalendar cldrLocaleData if parsedTag.Parent() != language.Und { existing, ok := localesData[localeLDML.Parent] if ok { localeCalendar = existing.clone() } else { localeCalendar = cldrLocaleData{} } } else { localeCalendar = cldrLocaleData{} } gregorianCalendar := findGregorianCalendar(localeLDML.LDML) if gregorianCalendar != nil { err = fillLocaleData(tag, gregorianCalendar, &localeCalendar) if err != nil { log.Fatal(err) } } if !localeCalendar.isEmpty() { localesData[tag] = &localeCalendar nonEmptyLanguages = append(nonEmptyLanguages, tag) } else { log.Printf("skipped locale with empty dates: %s\n", tag) } } sort.Strings(nonEmptyLanguages) var tablesTmplDataItems []*tablesTmplDataItem for _, tag := range nonEmptyLanguages { localeCalendar := localesData[tag] if localeCalendar.isValid() { tablesTmplDataItems = append(tablesTmplDataItems, newTablesTmplDataItem(tag, localeCalendar)) } else { log.Printf("skipped invalid locale: %s\n", tag) } } err = writeTableGoFile(cldrVersion, tablesTmplDataItems) if err != nil { log.Fatal(err) } } func fillLocaleData(tag string, gregorianCalendar *Calendar, locale *cldrLocaleData) error { var err error if gregorianCalendar.Months != nil && gregorianCalendar.Months.MonthContext != nil { for _, monthContext := range gregorianCalendar.Months.MonthContext { if monthContext.Type != "format" { continue } for _, monthWidth := range monthContext.MonthWidth { if monthWidth.Type == "abbreviated" { locale.shortMonthNames, err = lookupMonthValue(locale.shortMonthNames, shortMonthNamesStd, monthWidth.Month) if err != nil { return fmt.Errorf("failed to read %s short month names %w", tag, err) } } else if monthWidth.Type == "wide" { locale.longMonthNames, err = lookupMonthValue(locale.longMonthNames, longMonthNamesStd, monthWidth.Month) if err != nil { return fmt.Errorf("failed to read %s long month names %w", tag, err) } } } } } if gregorianCalendar.Days != nil && gregorianCalendar.Days.DayContext != nil { for _, dayContext := range gregorianCalendar.Days.DayContext { if dayContext.Type != "format" { continue } for _, dayWidth := range dayContext.DayWidth { if dayWidth.Type == "abbreviated" { locale.shortDayNames = lookupDayValue(locale.shortDayNames, shortDayNamesStdMap, dayWidth.Day) } else if dayWidth.Type == "wide" { locale.longDayNames = lookupDayValue(locale.longDayNames, longDayNamesStdMap, dayWidth.Day) } } } } if gregorianCalendar.DayPeriods != nil && gregorianCalendar.DayPeriods.DayPeriodContext != nil { for _, periodContext := range gregorianCalendar.DayPeriods.DayPeriodContext { if periodContext.Type != "format" { continue } periods := map[string]string{} for _, periodWidth := range periodContext.DayPeriodWidth { if periodWidth.Type != "abbreviated" && periodWidth.Type != "narrow" { continue } for _, period := range periodWidth.DayPeriod { if p, ok := dayPeriodsStdMap[period.Type]; ok { // preference for non-variant periods if _, ok = periods[p]; ok && period.Alt == "variant" { continue } periods[p] = strings.ReplaceAll(period.CharData, "\u202F", "") } } if len(periods) == 2 { locale.dayPeriods = periods break } } } } return nil } func lookupMonthValue(curr map[string]string, stdTab []string, lookupTable []*MonthWidth) (map[string]string, error) { if curr == nil && len(lookupTable) == 0 { return nil, nil } val := make(map[string]string, 12) if curr != nil { maps.Copy(val, curr) } for _, month := range lookupTable { m, err := strconv.Atoi(month.Type) if err != nil { return nil, err } val[stdTab[m-1]] = month.CharData } return val, nil } func lookupDayValue(curr map[string]string, stdTab map[string]string, lookupTable []*Common) map[string]string { if curr == nil && len(lookupTable) == 0 { return nil } val := make(map[string]string, 7) if curr != nil { maps.Copy(val, curr) } for _, day := range lookupTable { d, ok := stdTab[day.Type] if !ok { continue } val[d] = day.CharData } return val } func findGregorianCalendar(lang *LDML) *Calendar { if lang == nil || lang.Dates == nil || lang.Dates.Calendars == nil || lang.Dates.Calendars.Calendar == nil { return nil } for _, calendar := range lang.Dates.Calendars.Calendar { if calendar.Type == "gregorian" { return calendar } } return nil } func readCLDRCoreFile(path string, version int) (map[string]*cldrLocaleModel, error) { cldrCoreZipFile, err := getCLDRCoreFile(path, version) if err != nil { return nil, err } defer cldrCoreZipFile.Close() zipFile, err := zip.OpenReader(cldrCoreZipFile.Name()) if err != nil { return nil, err } defer zipFile.Close() models := make(map[string]*cldrLocaleModel) for _, file := range zipFile.File { fileInfo := file.FileInfo() if strings.HasPrefix(file.Name, "common/main") && !fileInfo.IsDir() { if strings.HasSuffix(fileInfo.Name(), ".xml") { model := &LDML{} entry, err := file.Open() if err != nil { return nil, err } decoder := xml.NewDecoder(entry) if err = decoder.Decode(model); err != nil { return nil, err } tag := fileInfo.Name()[:len(fileInfo.Name())-4] parsedTag, err := language.Parse(tag) if err != nil { return nil, err } var parent string if parsedTag.Parent() != language.Und { parent = parsedTag.Parent().String() } models[parsedTag.String()] = &cldrLocaleModel{parent, model} } } } return models, nil } func getCLDRCoreFile(path string, version int) (*os.File, error) { var cldrCoreZipFile *os.File var err error if path != "" { cldrCoreZipFile, err = os.Open(path) if err != nil { return nil, fmt.Errorf("failed to open CLDR file: %w", err) } return cldrCoreZipFile, nil } cldrCoreZipFile, err = downloadCLDRCoreFile(version) if err != nil { return nil, fmt.Errorf("failed to download CLDR file: %w", err) } return os.Open(cldrCoreZipFile.Name()) } func downloadCLDRCoreFile(version int) (file *os.File, err error) { tmpFile, err := os.CreateTemp("", "cldr-core*.zip") if err != nil { return nil, err } defer tmpFile.Close() url := fmt.Sprintf("https://unicode.org/Public/cldr/%d/core.zip", version) resp, err := http.Get(url) //nolint:gosec,noctx //not unsafe if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("CLDR download failed with status %s", resp.Status) } _, err = io.Copy(tmpFile, resp.Body) if err != nil { return nil, err } return tmpFile, nil } type cldrLocaleData struct { longDayNames map[string]string shortDayNames map[string]string longMonthNames map[string]string shortMonthNames map[string]string dayPeriods map[string]string } func (g *cldrLocaleData) clone() cldrLocaleData { return cldrLocaleData{ longDayNames: maps.Clone(g.longDayNames), shortDayNames: maps.Clone(g.shortDayNames), longMonthNames: maps.Clone(g.longMonthNames), shortMonthNames: maps.Clone(g.shortMonthNames), dayPeriods: maps.Clone(g.dayPeriods), } } func (g *cldrLocaleData) isEmpty() bool { return g.shortDayNames == nil && g.longDayNames == nil && g.shortMonthNames == nil && g.longMonthNames == nil && g.dayPeriods == nil } func (g *cldrLocaleData) isValid() bool { if g.isEmpty() { return false } if g.shortDayNames != nil && len(g.shortDayNames) != 7 { return false } if g.longDayNames != nil && len(g.longDayNames) != 7 { return false } if g.shortMonthNames != nil && len(g.shortMonthNames) != 12 { return false } if g.longMonthNames != nil && len(g.longMonthNames) != 12 { return false } if g.dayPeriods != nil && len(g.dayPeriods) != 2 { return false } return true } type tablesTmplData struct { CLDRVersion *int Tables []*tablesTmplDataItem } type tablesTmplDataItem struct { Name string Language string ShortDaysNames []string LongDaysNames []string ShortMonthNames []string LongMonthNames []string DayPeriods []string } func newTablesTmplDataItem(tag string, data *cldrLocaleData) *tablesTmplDataItem { name := strings.ReplaceAll(tag, "-", "") name = strings.ToUpper(name[:1]) + name[1:] return &tablesTmplDataItem{ Name: name, Language: tag, ShortDaysNames: sortTableValues(data.shortDayNames, shortDayNamesStd), LongDaysNames: sortTableValues(data.longDayNames, longDayNamesStd), ShortMonthNames: sortTableValues(data.shortMonthNames, shortMonthNamesStd), LongMonthNames: sortTableValues(data.longMonthNames, longMonthNamesStd), DayPeriods: sortTableValues(data.dayPeriods, dayPeriodsStd), } } func sortTableValues(table map[string]string, keys []string) []string { if table == nil { return []string{} } sorted := make([]string, 0, len(keys)) for _, m := range keys { sorted = append(sorted, table[m]) } return sorted } func stringSliceValue(values []string) string { if values == nil { return "" } var sb strings.Builder for i, v := range values { sb.WriteString(fmt.Sprintf(`"%s"`, v)) if i+1 < len(values) { sb.WriteString(", ") } } return sb.String() } func writeTableGoFile(cldrVersion *int, tables []*tablesTmplDataItem) error { data := tablesTmplData{ CLDRVersion: cldrVersion, Tables: tables, } tablesTmpl := filepath.Join("templates", "tables.go.tmpl") tmpl, err := template.New(path.Base(tablesTmpl)). Funcs(template.FuncMap{"StringSliceValue": stringSliceValue}). ParseFiles(tablesTmpl) if err != nil { return fmt.Errorf("failed to parse tables.go template: %w", err) } tablesOutPath := filepath.Join("tables.go") tablesOutFile, err := os.OpenFile(tablesOutPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) if err != nil { return fmt.Errorf("failed to open file %s: %w", tablesOutPath, err) } defer tablesOutFile.Close() tmplBuffer := bytes.Buffer{} err = tmpl.Execute(&tmplBuffer, data) if err != nil { return fmt.Errorf("failed to execute tables.go template: %w", err) } formattedBuffer, err := format.Source(tmplBuffer.Bytes()) if err != nil { return fmt.Errorf("failed to format tables.go template output: %w", err) } _, err = tablesOutFile.Write(formattedBuffer) if err != nil { return fmt.Errorf("failed to write file %s: %w", tablesOutPath, err) } return nil } type cldrLocaleModel struct { Parent string *LDML } // cldrGraph is used to sort the languages tags considering its upper level dependencies, // ensuring that base languages are parsed first, so derivatives can copy the data. // E.g.: ["en", "en-001", "en-AU"] type cldrGraph struct { vertices []string edges map[string][]*cldrGraphEdge } type cldrGraphEdge struct { from string to string } func (c *cldrGraph) add(lang, parent string) { c.vertices = append(c.vertices, lang) if parent != "" { c.edges[lang] = append(c.edges[lang], &cldrGraphEdge{lang, parent}) } } func (c *cldrGraph) getSorted() []string { visited := map[string]bool{} var stack []string for _, node := range c.vertices { if _, ok := visited[node]; !ok { c.dfs(node, visited, &stack) } } return stack } func (c *cldrGraph) dfs(from string, visited map[string]bool, stack *[]string) { visited[from] = true edges, ok := c.edges[from] if ok { for _, edge := range edges { if _, ok = visited[edge.to]; !ok { c.dfs(edge.to, visited, stack) } } } *stack = append(*stack, from) } func buildLanguageGraph(models map[string]*cldrLocaleModel) *cldrGraph { graph := &cldrGraph{ vertices: []string{}, edges: make(map[string][]*cldrGraphEdge), } for tag, model := range models { graph.add(tag, model.Parent) } return graph } // Common holds several of the most common attributes and sub elements of an XML element. type Common struct { XMLName xml.Name Type string `xml:"type,attr,omitempty"` Reference string `xml:"reference,attr,omitempty"` Alt string `xml:"alt,attr,omitempty"` ValidSubLocales string `xml:"validSubLocales,attr,omitempty"` Draft string `xml:"draft,attr,omitempty"` hidden } type hidden struct { CharData string `xml:",chardata"` Alias *struct { Common Source string `xml:"source,attr"` Path string `xml:"path,attr"` } `xml:"alias"` Def *struct { Common Choice string `xml:"choice,attr,omitempty"` Type string `xml:"type,attr,omitempty"` } `xml:"default"` } // LDML is the top-level type for locale-specific data. type LDML struct { Common Dates *struct { Common Calendars *struct { Common Calendar []*Calendar `xml:"calendar"` } `xml:"calendars"` } `xml:"dates"` } // Calendar specifies the fields used for formatting and parsing dates and times. type Calendar struct { Common Months *struct { Common MonthContext []*struct { Common MonthWidth []*struct { Common Month []*MonthWidth `xml:"month"` } `xml:"monthWidth"` } `xml:"monthContext"` } `xml:"months"` Days *struct { Common DayContext []*struct { Common DayWidth []*struct { Common Day []*Common `xml:"day"` } `xml:"dayWidth"` } `xml:"dayContext"` } `xml:"days"` DayPeriods *struct { Common DayPeriodContext []*struct { Common DayPeriodWidth []*struct { Common DayPeriod []*Common `xml:"dayPeriod"` } `xml:"dayPeriodWidth"` } `xml:"dayPeriodContext"` } `xml:"dayPeriods"` } type MonthWidth = struct { Common Yeartype string `xml:"yeartype,attr"` }