cmd/buildmetadata/main.go (326 lines of code) (raw):

package main import ( "bytes" "compress/gzip" "encoding/base64" "encoding/binary" "fmt" "math" "os" "os/exec" "path/filepath" "sort" "strconv" "strings" "github.com/nyaruka/phonenumbers" "golang.org/x/exp/maps" "google.golang.org/protobuf/proto" ) func main() { if err := buildMetadata(); err != nil { fmt.Fprintln(os.Stderr, err) os.Exit(1) } } func buildMetadata() error { fmt.Print("Cloning upstream repo... ") if err := cloneUpstreamRepo("https://github.com/google/libphonenumber.git"); err != nil { return err } fmt.Print("OK\nBuilding number metadata...") metadata, err := buildNumberMetadata("resources/PhoneNumberMetadata.xml", "NumberData", "metadata_bin.go", false) if err != nil { return err } fmt.Print("OK\nBuilding short number metadata...") _, err = buildNumberMetadata("resources/ShortNumberMetadata.xml", "ShortNumberData", "shortnumber_metadata_bin.go", true) if err != nil { return err } fmt.Print("OK\nBuilding region metadata...") if err := buildRegionMetadata(metadata, "RegionData", "countrycode_to_region_bin.go"); err != nil { return err } fmt.Print("OK\nBuilding timezone metadata...") if err := buildTimezoneMetadata("resources/timezones/map_data.txt", "TimezoneData", "prefix_to_timezone_bin.go"); err != nil { return err } fmt.Println("OK\nBuilding carrier prefix metadata...") if err := buildPrefixMetadata("resources/carrier", "CarrierData", "prefix_to_carriers_bin.go"); err != nil { return err } fmt.Println("Building geographic prefix metadata...") if err := buildPrefixMetadata("resources/geocoding", "GeocodingData", "prefix_to_geocodings_bin.go"); err != nil { return err } return nil } func cloneUpstreamRepo(url string) error { os.RemoveAll("_build") cmd := exec.Command("git", "clone", "--depth=1", url, "_build") if err := cmd.Run(); err != nil { return fmt.Errorf("error cloning upstream repo: %w", err) } return nil } func buildNumberMetadata(srcFile, varName, dstFile string, short bool) (*phonenumbers.PhoneMetadataCollection, error) { body, err := os.ReadFile("_build/" + srcFile) if err != nil { return nil, fmt.Errorf("error reading %s: %w", srcFile, err) } collection, err := phonenumbers.BuildPhoneMetadataCollection(body, false, false, short) if err != nil { return nil, fmt.Errorf("error parsing %s: %w", srcFile, err) } data, err := proto.Marshal(collection) if err != nil { return nil, fmt.Errorf("error marshaling metadata as protobuf: %w", err) } if err := os.WriteFile("gen/"+dstFile, generateBinFile(varName, data), os.FileMode(0664)); err != nil { return nil, fmt.Errorf("error writing %s: %w", dstFile, err) } return collection, nil } func buildRegionMetadata(metadata *phonenumbers.PhoneMetadataCollection, varName, dstFile string) error { regionMap := phonenumbers.BuildCountryCodeToRegionMap(metadata) // generate our map data data, err := renderMap(regionMap) if err != nil { return fmt.Errorf("error generating %s: %w", dstFile, err) } if err := os.WriteFile("gen/"+dstFile, generateBinFile(varName, data), os.FileMode(0664)); err != nil { return fmt.Errorf("error writing %s: %w", dstFile, err) } return nil } func buildTimezoneMetadata(srcFile, varName, dstFile string) error { body, err := os.ReadFile("_build/" + srcFile) if err != nil { return fmt.Errorf("error reading %s: %w", srcFile, err) } // build our map of prefix to timezones prefixMap := make(map[int][]string) for _, line := range strings.Split(string(body), "\n") { if strings.HasPrefix(line, "#") { continue } if strings.TrimSpace(line) == "" { continue } fields := strings.Split(line, "|") if len(fields) != 2 { return fmt.Errorf("invalid format in timezone file: %s", line) } zones := strings.Split(fields[1], "&") if len(zones) < 1 { return fmt.Errorf("invalid format in timezone file: %s", line) } // parse our prefix prefix, err := strconv.Atoi(fields[0]) if err != nil { return fmt.Errorf("invalid prefix in line: %s", line) } prefixMap[prefix] = zones } // generate our map data data, err := renderMap(prefixMap) if err != nil { return fmt.Errorf("error generating %s: %w", dstFile, err) } if err := os.WriteFile("gen/"+dstFile, generateBinFile(varName, data), os.FileMode(0664)); err != nil { return fmt.Errorf("error writing %s: %w", dstFile, err) } return nil } func buildPrefixMetadata(srcDir, varName, dstFile string) error { // get our top level language directories dirs, err := filepath.Glob(fmt.Sprintf("_build/%s/*", srcDir)) if err != nil { return err } // for each directory languageMappings := make(map[string]map[int]string) for _, dir := range dirs { fi, _ := os.Stat(dir) // only look at directories if !fi.IsDir() { continue } // build a map for that directory mappings, err := readMappingsForDir(dir) if err != nil { return fmt.Errorf("error reading mappings for %s: %w", dir, err) } // save it for our language languageMappings[filepath.Base(dir)] = mappings } output := bytes.Buffer{} output.WriteString("package gen\n\n") output.WriteString(fmt.Sprintf("var %s = map[string]string {\n", varName)) langs := maps.Keys(languageMappings) sort.Strings(langs) for _, lang := range langs { mappings := languageMappings[lang] // iterate through our map, creating our full set of values and prefixes prefixes := make([]int, 0, len(mappings)) seenValues := make(map[string]bool) values := make([]string, 0, 255) for prefix, value := range mappings { prefixes = append(prefixes, prefix) _, seen := seenValues[value] if !seen { values = append(values, value) seenValues[value] = true } } // make sure we won't overrun uint16s if len(values) > math.MaxUint16 { return fmt.Errorf("too many values to represent in uint16") } // need sorted prefixes for our diff writing to work sort.Ints(prefixes) // sorted values compress better sort.Strings(values) // build our reverse mapping from value to offset internMappings := make(map[string]uint16) for i, value := range values { internMappings[value] = uint16(i) } // write our map data := &bytes.Buffer{} // first write our values, as length of string and raw bytes joinedValues := strings.Join(values, "\n") if err = binary.Write(data, binary.LittleEndian, uint32(len(joinedValues))); err != nil { return err } if err = binary.Write(data, binary.LittleEndian, []byte(joinedValues)); err != nil { return err } // then then number of prefix / value pairs if err = binary.Write(data, binary.LittleEndian, uint32(len(prefixes))); err != nil { return err } // we write our prefix / value pairs as a varint of the difference of the previous prefix // and a uint16 of the value index last := 0 intBuf := make([]byte, 6) for _, prefix := range prefixes { value := mappings[prefix] valueIntern := internMappings[value] diff := prefix - last l := binary.PutUvarint(intBuf, uint64(diff)) if err = binary.Write(data, binary.LittleEndian, intBuf[:l]); err != nil { return err } if err = binary.Write(data, binary.LittleEndian, uint16(valueIntern)); err != nil { return err } last = prefix } var compressed bytes.Buffer w := gzip.NewWriter(&compressed) w.Write(data.Bytes()) w.Close() c := base64.StdEncoding.EncodeToString(compressed.Bytes()) output.WriteString("\t") output.WriteString(strconv.Quote(lang)) output.WriteString(": ") output.WriteString(strconv.Quote(c)) output.WriteString(",\n") } output.WriteString("}") if err := os.WriteFile("gen/"+dstFile, output.Bytes(), os.FileMode(0664)); err != nil { return fmt.Errorf("error writing %s: %w", dstFile, err) } return nil } func renderMap(prefixMap map[int][]string) ([]byte, error) { // build lists of our keys and values keys := make([]int, 0, len(prefixMap)) values := make([]string, 0, 255) seenValues := make(map[string]bool, 255) for k, vs := range prefixMap { keys = append(keys, k) for _, v := range vs { _, seen := seenValues[v] if !seen { seenValues[v] = true values = append(values, v) } } } sort.Strings(values) sort.Ints(keys) internMap := make(map[string]int, len(values)) for i, v := range values { internMap[v] = i } data := &bytes.Buffer{} // first write our values, as length of string and raw bytes joinedValues := strings.Join(values, "\n") if err := binary.Write(data, binary.LittleEndian, uint32(len(joinedValues))); err != nil { return nil, err } if err := binary.Write(data, binary.LittleEndian, []byte(joinedValues)); err != nil { return nil, err } // then the number of keys if err := binary.Write(data, binary.LittleEndian, uint32(len(keys))); err != nil { return nil, err } // we write our key / value pairs as a varint of the difference of the previous prefix // and a uint16 of the value index last := 0 intBuf := make([]byte, 6) for _, key := range keys { // first write our prefix diff := key - last l := binary.PutUvarint(intBuf, uint64(diff)) if err := binary.Write(data, binary.LittleEndian, intBuf[:l]); err != nil { return nil, err } // then our values values := prefixMap[key] // write our number of values if err := binary.Write(data, binary.LittleEndian, uint8(len(values))); err != nil { return nil, err } // then each value as the interned index for _, v := range values { valueIntern := internMap[v] if err := binary.Write(data, binary.LittleEndian, uint16(valueIntern)); err != nil { return nil, err } } last = key } return data.Bytes(), nil } // generates the file contents for a data file func generateBinFile(varName string, data []byte) []byte { var compressed bytes.Buffer w := gzip.NewWriter(&compressed) w.Write(data) w.Close() encoded := base64.StdEncoding.EncodeToString(compressed.Bytes()) // create our output output := &bytes.Buffer{} // write our header output.WriteString("package gen\n\nvar ") output.WriteString(varName) output.WriteString(" = ") output.WriteString(strconv.Quote(string(encoded))) output.WriteString("\n") return output.Bytes() } func readMappingsForDir(dir string) (map[int]string, error) { lang := filepath.Base(dir) mappings := make(map[int]string) files, err := filepath.Glob(dir + "/*.txt") if err != nil { return nil, err } for _, file := range files { body, err := os.ReadFile(file) if err != nil { return nil, err } for _, line := range strings.Split(string(body), "\n") { if strings.HasPrefix(line, "#") { continue } if strings.TrimSpace(line) == "" { continue } fields := strings.Split(line, "|") if len(fields) != 2 { continue } prefix := fields[0] prefixInt, err := strconv.Atoi(prefix) if err != nil || prefixInt < 0 { return nil, fmt.Errorf("error parsing line: %s", line) } value := strings.TrimSpace(fields[1]) if value == "" { continue } _, repeat := mappings[prefixInt] if repeat { return nil, fmt.Errorf("found repeated prefix on line: %s", line) } mappings[prefixInt] = fields[1] } } fmt.Printf(" > read %d mappings for %s\n", len(mappings), lang) return mappings, nil }