in scripts/go/curconvert/curconvert.go [283:355]
func (c *CurConvert) ParseCur() error {
// init S3 manager
s3dl, err := c.initS3Downloader(c.sourceBucket, c.sourceArn, c.sourceExternalID)
if err != nil {
return err
}
// Download CUR manifest JSON
buff := &aws.WriteAtBuffer{}
_, err = s3dl.Download(buff, &s3.GetObjectInput{
Bucket: aws.String(c.sourceBucket),
Key: aws.String(c.sourceObject),
})
if err != nil {
return fmt.Errorf("failed to download manifest, bucket: %s, object: %s, error: %s", c.sourceBucket, c.sourceObject, err.Error())
}
// Unmarshall JSON
var j map[string]interface{}
err = json.Unmarshal(buff.Bytes(), &j)
if err != nil {
return fmt.Errorf("failed to parse manifest, bucket: %s, object: %s, error: %s", c.sourceBucket, c.sourceObject, err.Error())
}
// Store all column names from manifests
cols := j["columns"].([]interface{})
seen := make(map[string]bool)
c.skipCols = make(map[int]bool)
i := -1
for column := range cols {
i++
t := cols[column].(map[string]interface{})
columnName := t["category"].(string) + "/" + t["name"].(string)
// convert columns names to allowed characters (lowercase) and substitute '_' for any non-allowed character
columnName = strings.ToLower(columnName)
r := func(r rune) rune {
switch {
case r >= 'a' && r <= 'z':
return r
case r >= '0' && r <= '9':
return r
case r == '/':
return r
default:
return '_'
}
}
columnName = strings.Map(r, columnName)
// Skip duplicate columns
if _, ok := seen[columnName]; ok {
c.skipCols[i] = true
continue
}
// Check for type over-ride
colType, ok := c.CurColumnTypes[columnName]
if !ok {
colType = "UTF8"
}
c.CurColumns = append(c.CurColumns, "name="+columnName+", type="+colType+", encoding=PLAIN_DICTIONARY")
seen[columnName] = true
}
// Store CSV CUR files
reportKeys := j["reportKeys"].([]interface{})
for key := range reportKeys {
c.CurFiles = append(c.CurFiles, reportKeys[key].(string))
}
return nil
}