cmd/indexer/main.go (186 lines of code) (raw):

// Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "context" "flag" "fmt" "io" "os" "sort" "cloud.google.com/go/storage" "github.com/googlecloudplatform/pi-delivery/pkg/resultset" "github.com/googlecloudplatform/pi-delivery/pkg/ycd" "go.uber.org/zap" "google.golang.org/api/iterator" "google.golang.org/api/option" ) // All the YCD files I tested are smaller than 256 bytes so let's just fetch the first // one kilobyte. This program fails if a header is somehow bigger than this limit. const maxHeaderLength = 1024 var logger *zap.SugaredLogger var bucketName = flag.String("bucket", "", "bucket name (e.g. pi-delivery-public)") var hexPrefix = flag.String("hex", "Pi - Hex - Chudnovsky", "prefix for hexadecimal results") var decPrefix = flag.String("dec", "Pi - Dec - Chudnovsky", "prefix for decimal results") var prefix = flag.String("prefix", "", "common prefix for the result objects") func listObjects(ctx context.Context, bucket *storage.BucketHandle, prefix string) ([]string, error) { logger.Infow("listObjects", "prefix", prefix, ) query := &storage.Query{Prefix: prefix} query.SetAttrSelection([]string{"Name"}) iter := bucket.Objects(ctx, query) objects := []string{} for { attrs, err := iter.Next() if err == iterator.Done { break } if err != nil { logger.Errorw("failed to list objects", "prefix", prefix, "error", err, ) return nil, err } logger.Infow("object found", "name", attrs.Name, ) objects = append(objects, attrs.Name) } logger.Infow("listObjects finished", "prefix", prefix, "objects", len(objects), ) return objects, nil } func newStorageClient(ctx context.Context) *storage.Client { option.WithScopes("ScopeReadOnly") client, err := storage.NewClient(ctx) if err != nil { logger.Fatalw("failed to create a Storage client", "error", err, ) os.Exit(1) } return client } func logYCDInfo(file *ycd.YCDFile) { logger.Infow("ycd file", "name", file.Name, "first digit offset", file.FirstDigitOffset, "version", file.Header.FileVersion, "radix", file.Header.Radix, "first digits", file.Header.FirstDigits, "total digits", file.Header.TotalDigits, "block size", file.Header.BlockSize, "block id", file.Header.BlockID, "header length", file.Header.Length, ) } func fetchYCDFiles(ctx context.Context, client *storage.Client, bucketName, prefix string) resultset.ResultSet { bucket := client.Bucket(bucketName) objects, err := listObjects(ctx, bucket, prefix) if err != nil { logger.Fatalw("failed to list objects for decimal results", "error", err, "prefix", prefix, ) os.Exit(1) } files := resultset.ResultSet{} for _, name := range objects { object := bucket.Object(name) reader, err := object.NewRangeReader(ctx, 0, maxHeaderLength) if err != nil { logger.Fatalw("creating an object reader failed", "error", err, "bucket", bucketName, "object", name, ) os.Exit(1) } defer reader.Close() ycd, err := ycd.Parse(reader) if err != nil { logger.Errorw("failed to parse a ycd file", "error", err, "bucket", bucketName, "object", object, ) } ycd.Name = name logYCDInfo(ycd) files = append(files, ycd) } sort.Sort(files) return files } func printIndexPrologue(w io.Writer, bucketName string) { fmt.Fprintln(w, `// Code generated by indexer. DO NOT EDIT. // Run indexer/main.go to generate this file. package index import ( "github.com/googlecloudplatform/pi-delivery/pkg/resultset" "github.com/googlecloudplatform/pi-delivery/pkg/ycd" )`) fmt.Fprintln(w) fmt.Fprintf(w, "const BucketName = \"%s\"\n", bucketName) fmt.Fprintln(w) } func printIndexFileList(w io.Writer, varName string, files resultset.ResultSet) { fmt.Fprintf(w, "var %s resultset.ResultSet = resultset.ResultSet{\n", varName) for _, v := range files { fmt.Fprintf(w, ` { Header: &ycd.Header{ FileVersion: "%s", Radix: %d, FirstDigits: "%s", TotalDigits: int64(%d), BlockSize: int64(%d), BlockID: int64(%d), Length: %d, }, Name: "%s", FirstDigitOffset: %d, },`, v.Header.FileVersion, v.Header.Radix, v.Header.FirstDigits, v.Header.TotalDigits, v.Header.BlockSize, v.Header.BlockID, v.Header.Length, v.Name, v.FirstDigitOffset, ) fmt.Fprintln(w) } fmt.Fprintln(w, "}") fmt.Fprintln(w) } func processDirectory(ctx context.Context, client *storage.Client, w io.Writer, varName, bucketName, prefix string) { files := fetchYCDFiles(ctx, client, bucketName, prefix) printIndexFileList(w, varName, files) } func main() { if l, err := zap.NewDevelopment(); err != nil { fmt.Fprintf(os.Stderr, "failed to initialize logger: %v", err) os.Exit(1) } else { logger = l.Sugar() defer logger.Sync() } flag.Parse() if *bucketName == "" { logger.Errorf("bucket name is required (--bucket)") os.Exit(1) } ctx := context.Background() client := newStorageClient(ctx) defer func() { if err := client.Close(); err != nil { logger.Errorw("failed to close the Storage client", "error", err) } }() printIndexPrologue(os.Stdout, *bucketName) processDirectory(ctx, client, os.Stdout, "Decimal", *bucketName, *prefix+*decPrefix) processDirectory(ctx, client, os.Stdout, "Hexadecimal", *bucketName, *prefix+*hexPrefix) }