in go/adbc/driver/bigquery/connection.go [688:793]
func buildField(schema *bigquery.FieldSchema, level uint) (arrow.Field, error) {
field := arrow.Field{Name: schema.Name}
metadata := make(map[string]string)
metadata["Description"] = schema.Description
metadata["Repeated"] = strconv.FormatBool(schema.Repeated)
metadata["Required"] = strconv.FormatBool(schema.Required)
field.Nullable = !schema.Required
metadata["Type"] = string(schema.Type)
if schema.PolicyTags != nil {
policyTagList, err := json.Marshal(schema.PolicyTags)
if err != nil {
return arrow.Field{}, err
}
metadata["PolicyTags"] = string(policyTagList)
}
// https://cloud.google.com/bigquery/docs/reference/storage#arrow_schema_details
switch schema.Type {
case bigquery.StringFieldType:
metadata["MaxLength"] = strconv.FormatInt(schema.MaxLength, 10)
metadata["Collation"] = schema.Collation
field.Type = arrow.BinaryTypes.String
case bigquery.BytesFieldType:
metadata["MaxLength"] = strconv.FormatInt(schema.MaxLength, 10)
field.Type = arrow.BinaryTypes.Binary
case bigquery.IntegerFieldType:
field.Type = arrow.PrimitiveTypes.Int64
case bigquery.FloatFieldType:
field.Type = arrow.PrimitiveTypes.Float64
case bigquery.BooleanFieldType:
field.Type = arrow.FixedWidthTypes.Boolean
case bigquery.TimestampFieldType:
field.Type = arrow.FixedWidthTypes.Timestamp_ms
case bigquery.RecordFieldType:
if schema.Repeated {
if len(schema.Schema) == 1 {
arrayField, err := buildField(schema.Schema[0], level+1)
if err != nil {
return arrow.Field{}, err
}
field.Type = arrow.ListOf(arrayField.Type)
field.Metadata = arrayField.Metadata
field.Nullable = arrayField.Nullable
} else {
return arrow.Field{}, adbc.Error{
Code: adbc.StatusInvalidArgument,
Msg: fmt.Sprintf("Cannot create array schema for filed `%s`: len(schema.Schema) != 1", schema.Name),
}
}
} else {
nestedFields := make([]arrow.Field, len(schema.Schema))
for i, nestedSchema := range schema.Schema {
f, err := buildField(nestedSchema, level+1)
if err != nil {
return arrow.Field{}, err
}
nestedFields[i] = f
}
structType := arrow.StructOf(nestedFields...)
if structType == nil {
return arrow.Field{}, adbc.Error{
Code: adbc.StatusInvalidArgument,
Msg: fmt.Sprintf("Cannot create a struct schema for record `%s`", schema.Name),
}
}
field.Type = structType
}
case bigquery.DateFieldType:
field.Type = arrow.FixedWidthTypes.Date32
case bigquery.TimeFieldType:
field.Type = arrow.FixedWidthTypes.Time64us
case bigquery.DateTimeFieldType:
field.Type = arrow.FixedWidthTypes.Timestamp_us
case bigquery.NumericFieldType:
field.Type = &arrow.Decimal128Type{
Precision: int32(schema.Precision),
Scale: int32(schema.Scale),
}
case bigquery.GeographyFieldType:
// TODO: potentially we should consider using GeoArrow for this
field.Type = arrow.BinaryTypes.String
case bigquery.BigNumericFieldType:
field.Type = &arrow.Decimal256Type{
Precision: int32(schema.Precision),
Scale: int32(schema.Scale),
}
case bigquery.JSONFieldType:
field.Type = arrow.BinaryTypes.String
default:
// TODO: unsupported ones are:
// - bigquery.IntervalFieldType
// - bigquery.RangeFieldType
return arrow.Field{}, adbc.Error{
Code: adbc.StatusInvalidArgument,
Msg: fmt.Sprintf("Google SQL type `%s` is not supported yet", schema.Type),
}
}
if level == 0 {
metadata["DefaultValueExpression"] = schema.DefaultValueExpression
}
field.Metadata = arrow.MetadataFrom(metadata)
return field, nil
}