func buildField()

in go/adbc/driver/bigquery/connection.go [688:793]


func buildField(schema *bigquery.FieldSchema, level uint) (arrow.Field, error) {
	field := arrow.Field{Name: schema.Name}
	metadata := make(map[string]string)
	metadata["Description"] = schema.Description
	metadata["Repeated"] = strconv.FormatBool(schema.Repeated)
	metadata["Required"] = strconv.FormatBool(schema.Required)
	field.Nullable = !schema.Required
	metadata["Type"] = string(schema.Type)

	if schema.PolicyTags != nil {
		policyTagList, err := json.Marshal(schema.PolicyTags)
		if err != nil {
			return arrow.Field{}, err
		}
		metadata["PolicyTags"] = string(policyTagList)
	}

	// https://cloud.google.com/bigquery/docs/reference/storage#arrow_schema_details
	switch schema.Type {
	case bigquery.StringFieldType:
		metadata["MaxLength"] = strconv.FormatInt(schema.MaxLength, 10)
		metadata["Collation"] = schema.Collation
		field.Type = arrow.BinaryTypes.String
	case bigquery.BytesFieldType:
		metadata["MaxLength"] = strconv.FormatInt(schema.MaxLength, 10)
		field.Type = arrow.BinaryTypes.Binary
	case bigquery.IntegerFieldType:
		field.Type = arrow.PrimitiveTypes.Int64
	case bigquery.FloatFieldType:
		field.Type = arrow.PrimitiveTypes.Float64
	case bigquery.BooleanFieldType:
		field.Type = arrow.FixedWidthTypes.Boolean
	case bigquery.TimestampFieldType:
		field.Type = arrow.FixedWidthTypes.Timestamp_ms
	case bigquery.RecordFieldType:
		if schema.Repeated {
			if len(schema.Schema) == 1 {
				arrayField, err := buildField(schema.Schema[0], level+1)
				if err != nil {
					return arrow.Field{}, err
				}
				field.Type = arrow.ListOf(arrayField.Type)
				field.Metadata = arrayField.Metadata
				field.Nullable = arrayField.Nullable
			} else {
				return arrow.Field{}, adbc.Error{
					Code: adbc.StatusInvalidArgument,
					Msg:  fmt.Sprintf("Cannot create array schema for filed `%s`: len(schema.Schema) != 1", schema.Name),
				}
			}
		} else {
			nestedFields := make([]arrow.Field, len(schema.Schema))
			for i, nestedSchema := range schema.Schema {
				f, err := buildField(nestedSchema, level+1)
				if err != nil {
					return arrow.Field{}, err
				}
				nestedFields[i] = f
			}
			structType := arrow.StructOf(nestedFields...)
			if structType == nil {
				return arrow.Field{}, adbc.Error{
					Code: adbc.StatusInvalidArgument,
					Msg:  fmt.Sprintf("Cannot create a struct schema for record `%s`", schema.Name),
				}
			}
			field.Type = structType
		}

	case bigquery.DateFieldType:
		field.Type = arrow.FixedWidthTypes.Date32
	case bigquery.TimeFieldType:
		field.Type = arrow.FixedWidthTypes.Time64us
	case bigquery.DateTimeFieldType:
		field.Type = arrow.FixedWidthTypes.Timestamp_us
	case bigquery.NumericFieldType:
		field.Type = &arrow.Decimal128Type{
			Precision: int32(schema.Precision),
			Scale:     int32(schema.Scale),
		}
	case bigquery.GeographyFieldType:
		// TODO: potentially we should consider using GeoArrow for this
		field.Type = arrow.BinaryTypes.String
	case bigquery.BigNumericFieldType:
		field.Type = &arrow.Decimal256Type{
			Precision: int32(schema.Precision),
			Scale:     int32(schema.Scale),
		}
	case bigquery.JSONFieldType:
		field.Type = arrow.BinaryTypes.String
	default:
		// TODO: unsupported ones are:
		// - bigquery.IntervalFieldType
		// - bigquery.RangeFieldType
		return arrow.Field{}, adbc.Error{
			Code: adbc.StatusInvalidArgument,
			Msg:  fmt.Sprintf("Google SQL type `%s` is not supported yet", schema.Type),
		}
	}

	if level == 0 {
		metadata["DefaultValueExpression"] = schema.DefaultValueExpression
	}
	field.Metadata = arrow.MetadataFrom(metadata)
	return field, nil
}