cmd/indexer/main.go (186 lines of code) (raw):
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"flag"
"fmt"
"io"
"os"
"sort"
"cloud.google.com/go/storage"
"github.com/googlecloudplatform/pi-delivery/pkg/resultset"
"github.com/googlecloudplatform/pi-delivery/pkg/ycd"
"go.uber.org/zap"
"google.golang.org/api/iterator"
"google.golang.org/api/option"
)
// All the YCD files I tested are smaller than 256 bytes so let's just fetch the first
// one kilobyte. This program fails if a header is somehow bigger than this limit.
const maxHeaderLength = 1024
var logger *zap.SugaredLogger
var bucketName = flag.String("bucket", "", "bucket name (e.g. pi-delivery-public)")
var hexPrefix = flag.String("hex", "Pi - Hex - Chudnovsky", "prefix for hexadecimal results")
var decPrefix = flag.String("dec", "Pi - Dec - Chudnovsky", "prefix for decimal results")
var prefix = flag.String("prefix", "", "common prefix for the result objects")
func listObjects(ctx context.Context, bucket *storage.BucketHandle, prefix string) ([]string, error) {
logger.Infow("listObjects",
"prefix", prefix,
)
query := &storage.Query{Prefix: prefix}
query.SetAttrSelection([]string{"Name"})
iter := bucket.Objects(ctx, query)
objects := []string{}
for {
attrs, err := iter.Next()
if err == iterator.Done {
break
}
if err != nil {
logger.Errorw("failed to list objects",
"prefix", prefix,
"error", err,
)
return nil, err
}
logger.Infow("object found",
"name", attrs.Name,
)
objects = append(objects, attrs.Name)
}
logger.Infow("listObjects finished",
"prefix", prefix,
"objects", len(objects),
)
return objects, nil
}
func newStorageClient(ctx context.Context) *storage.Client {
option.WithScopes("ScopeReadOnly")
client, err := storage.NewClient(ctx)
if err != nil {
logger.Fatalw("failed to create a Storage client",
"error", err,
)
os.Exit(1)
}
return client
}
func logYCDInfo(file *ycd.YCDFile) {
logger.Infow("ycd file",
"name", file.Name,
"first digit offset", file.FirstDigitOffset,
"version", file.Header.FileVersion,
"radix", file.Header.Radix,
"first digits", file.Header.FirstDigits,
"total digits", file.Header.TotalDigits,
"block size", file.Header.BlockSize,
"block id", file.Header.BlockID,
"header length", file.Header.Length,
)
}
func fetchYCDFiles(ctx context.Context, client *storage.Client, bucketName, prefix string) resultset.ResultSet {
bucket := client.Bucket(bucketName)
objects, err := listObjects(ctx, bucket, prefix)
if err != nil {
logger.Fatalw("failed to list objects for decimal results",
"error", err,
"prefix", prefix,
)
os.Exit(1)
}
files := resultset.ResultSet{}
for _, name := range objects {
object := bucket.Object(name)
reader, err := object.NewRangeReader(ctx, 0, maxHeaderLength)
if err != nil {
logger.Fatalw("creating an object reader failed",
"error", err,
"bucket", bucketName,
"object", name,
)
os.Exit(1)
}
defer reader.Close()
ycd, err := ycd.Parse(reader)
if err != nil {
logger.Errorw("failed to parse a ycd file",
"error", err,
"bucket", bucketName,
"object", object,
)
}
ycd.Name = name
logYCDInfo(ycd)
files = append(files, ycd)
}
sort.Sort(files)
return files
}
func printIndexPrologue(w io.Writer, bucketName string) {
fmt.Fprintln(w, `// Code generated by indexer. DO NOT EDIT.
// Run indexer/main.go to generate this file.
package index
import (
"github.com/googlecloudplatform/pi-delivery/pkg/resultset"
"github.com/googlecloudplatform/pi-delivery/pkg/ycd"
)`)
fmt.Fprintln(w)
fmt.Fprintf(w, "const BucketName = \"%s\"\n", bucketName)
fmt.Fprintln(w)
}
func printIndexFileList(w io.Writer, varName string, files resultset.ResultSet) {
fmt.Fprintf(w, "var %s resultset.ResultSet = resultset.ResultSet{\n", varName)
for _, v := range files {
fmt.Fprintf(w, ` {
Header: &ycd.Header{
FileVersion: "%s",
Radix: %d,
FirstDigits: "%s",
TotalDigits: int64(%d),
BlockSize: int64(%d),
BlockID: int64(%d),
Length: %d,
},
Name: "%s",
FirstDigitOffset: %d,
},`,
v.Header.FileVersion,
v.Header.Radix,
v.Header.FirstDigits,
v.Header.TotalDigits,
v.Header.BlockSize,
v.Header.BlockID,
v.Header.Length,
v.Name,
v.FirstDigitOffset,
)
fmt.Fprintln(w)
}
fmt.Fprintln(w, "}")
fmt.Fprintln(w)
}
func processDirectory(ctx context.Context, client *storage.Client, w io.Writer, varName, bucketName, prefix string) {
files := fetchYCDFiles(ctx, client, bucketName, prefix)
printIndexFileList(w, varName, files)
}
func main() {
if l, err := zap.NewDevelopment(); err != nil {
fmt.Fprintf(os.Stderr, "failed to initialize logger: %v", err)
os.Exit(1)
} else {
logger = l.Sugar()
defer logger.Sync()
}
flag.Parse()
if *bucketName == "" {
logger.Errorf("bucket name is required (--bucket)")
os.Exit(1)
}
ctx := context.Background()
client := newStorageClient(ctx)
defer func() {
if err := client.Close(); err != nil {
logger.Errorw("failed to close the Storage client",
"error", err)
}
}()
printIndexPrologue(os.Stdout, *bucketName)
processDirectory(ctx, client, os.Stdout, "Decimal", *bucketName, *prefix+*decPrefix)
processDirectory(ctx, client, os.Stdout, "Hexadecimal", *bucketName, *prefix+*hexPrefix)
}