func()

in scripts/go/curconvert/curconvert.go [283:355]


func (c *CurConvert) ParseCur() error {

	// init S3 manager
	s3dl, err := c.initS3Downloader(c.sourceBucket, c.sourceArn, c.sourceExternalID)
	if err != nil {
		return err
	}

	// Download CUR manifest JSON
	buff := &aws.WriteAtBuffer{}
	_, err = s3dl.Download(buff, &s3.GetObjectInput{
		Bucket: aws.String(c.sourceBucket),
		Key:    aws.String(c.sourceObject),
	})
	if err != nil {
		return fmt.Errorf("failed to download manifest, bucket: %s, object: %s, error: %s", c.sourceBucket, c.sourceObject, err.Error())
	}

	// Unmarshall JSON
	var j map[string]interface{}
	err = json.Unmarshal(buff.Bytes(), &j)
	if err != nil {
		return fmt.Errorf("failed to parse manifest, bucket: %s, object: %s, error: %s", c.sourceBucket, c.sourceObject, err.Error())
	}

	// Store all column names from manifests
	cols := j["columns"].([]interface{})
	seen := make(map[string]bool)
	c.skipCols = make(map[int]bool)
	i := -1
	for column := range cols {
		i++
		t := cols[column].(map[string]interface{})
		columnName := t["category"].(string) + "/" + t["name"].(string)

		// convert columns names to allowed characters (lowercase) and substitute '_' for any non-allowed character
		columnName = strings.ToLower(columnName)
		r := func(r rune) rune {
			switch {
			case r >= 'a' && r <= 'z':
				return r
			case r >= '0' && r <= '9':
				return r
			case r == '/':
				return r
			default:
				return '_'
			}
		}
		columnName = strings.Map(r, columnName)

		// Skip duplicate columns
		if _, ok := seen[columnName]; ok {
			c.skipCols[i] = true
			continue
		}
		// Check for type over-ride
		colType, ok := c.CurColumnTypes[columnName]
		if !ok {
			colType = "UTF8"
		}

		c.CurColumns = append(c.CurColumns, "name="+columnName+", type="+colType+", encoding=PLAIN_DICTIONARY")
		seen[columnName] = true
	}

	// Store CSV CUR files
	reportKeys := j["reportKeys"].([]interface{})
	for key := range reportKeys {
		c.CurFiles = append(c.CurFiles, reportKeys[key].(string))
	}
	return nil
}