in cmd/state_comparer/main.go [265:375]
func main() {
opt := gatherOptions()
if err := opt.validate(); err != nil {
logrus.Fatalf("Invalid options %v: %v", opt, err)
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
if opt.debug {
logrus.SetLevel(logrus.DebugLevel)
} else if opt.trace {
logrus.SetLevel(logrus.TraceLevel)
}
storageClient, err := gcs.ClientWithCreds(ctx, opt.creds)
if err != nil {
logrus.Fatalf("Failed to create storage client: %v", err)
}
defer storageClient.Close()
client := gcs.NewClient(storageClient)
cfg, err := config.Read(ctx, opt.configPath.String(), storageClient)
if err != nil {
logrus.WithError(err).WithField("path", opt.configPath.String()).Error("Failed to read configuration, proceeding without config info.")
}
firstFiles, err := filenames(ctx, opt.first, client)
if err != nil {
logrus.Fatalf("Failed to list files in %q: %v", opt.first.String(), err)
}
var diffedMsgs, errorMsgs []string
var total, notFound int
rowFirstDups := make(map[string]bool) // Good; second deduplicates.
rowSecondDups := make(map[string]bool) // Bad; second adds duplicates.
colFirstDups := make(map[string]bool) // Good; second deduplicates.
colSecondDups := make(map[string]bool) // Bad; second adds duplicates.
otherDiffed := make(map[string]bool) // Bad; found unknown differences.
for _, firstP := range firstFiles {
tgName := filepath.Base(firstP)
secondP := opt.second.String()
if !strings.HasSuffix(secondP, "/") {
secondP += "/"
}
secondP += tgName
firstPath, err := gcs.NewPath(firstP)
if err != nil {
errorMsgs = append(errorMsgs, fmt.Sprintf("gcs.NewPath(%q): %v", firstP, err))
continue
}
// Optionally skip processing some groups.
tg := config.FindTestGroup(tgName, cfg)
if tg == nil {
logrus.Tracef("Did not find test group %q in config", tgName)
notFound++
continue
}
firstGrid, _, err := gcs.DownloadGrid(ctx, client, *firstPath)
if err != nil {
errorMsgs = append(errorMsgs, fmt.Sprintf("gcs.DownloadGrid(%q): %v", firstP, err))
continue
}
secondPath, err := gcs.NewPath(secondP)
if err != nil {
errorMsgs = append(errorMsgs, fmt.Sprintf("gcs.NewPath(%q): %v", secondP, err))
continue
}
secondGrid, _, err := gcs.DownloadGrid(ctx, client, *secondPath)
if err != nil {
errorMsgs = append(errorMsgs, fmt.Sprintf("gcs.DownloadGrid(%q): %v", secondP, err))
continue
}
if diffed, rowReasons, colReasons := compare(ctx, firstGrid, secondGrid, opt.diffRatioOK, tg.GetNumColumnsRecent()); diffed {
msg := fmt.Sprintf("%q vs. %q", firstP, secondP)
if opt.testGroupURL != "" {
parts := strings.Split(firstP, "/")
msg = opt.testGroupURL + parts[len(parts)-1]
}
if rowReasons.secondHasDuplicates {
rowSecondDups[msg] = true
} else if colReasons.secondHasDuplicates {
colSecondDups[msg] = true
} else if rowReasons.firstHasDuplicates {
rowFirstDups[msg] = true
} else if colReasons.firstHasDuplicates {
colFirstDups[msg] = true
} else {
otherDiffed[msg] = true
}
diffedMsgs = append(diffedMsgs, msg)
}
total++
}
logrus.Infof("Found diffs for %d of %d pairs (%d not found):", len(diffedMsgs), total, notFound)
report := func(diffs map[string]bool, name string) {
logrus.Infof("found %d %q:", len(diffs), name)
for msg := range diffs {
logrus.Infof("\t* %s", msg)
}
}
report(rowFirstDups, "✅ rows get deduplicated")
report(colFirstDups, "✅ columns get deduplicated")
report(rowSecondDups, "❌ rows get duplicated")
report(colSecondDups, "❌ columns get duplicated")
report(otherDiffed, "❌ other diffs")
if n := len(errorMsgs); n > 0 {
logrus.WithField("count", n).WithField("errors", errorMsgs).Fatal("Errors when diffing directories.")
}
}