catalog/glue/schema.go (79 lines of code) (raw):
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package glue
import (
"fmt"
"strconv"
"strings"
"github.com/apache/iceberg-go"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/glue/types"
)
// schemaToGlueColumns converts an Iceberg schema to a list of Glue columns.
func schemaToGlueColumns(schema *iceberg.Schema, isCurrent bool) []types.Column {
var columns []types.Column
for _, field := range schema.Fields() {
columns = append(columns, fieldToGlueColumn(field, isCurrent))
}
return columns
}
// fieldToGlueColumn converts an Iceberg nested field to a Glue column.
func fieldToGlueColumn(field iceberg.NestedField, isCurrent bool) types.Column {
column := types.Column{
Name: aws.String(field.Name),
Comment: aws.String(field.Doc),
Type: aws.String(icebergTypeToGlueType(field.Type)),
Parameters: map[string]string{
icebergFieldIDKey: strconv.Itoa(field.ID),
icebergFieldOptionalKey: strconv.FormatBool(!field.Required),
icebergFieldCurrentKey: strconv.FormatBool(isCurrent),
},
}
return column
}
// icebergTypeToGlueType converts an Iceberg type to a Glue type string representation.
// It handles primitive types as well as nested types like structs, lists, and maps.
// Reference: https://docs.aws.amazon.com/glue/latest/dg/glue-types.html#glue-types-cataloghttps://cwiki.apache.org/confluence/display/hive/languagemanual+types%23LanguageManualTypes-Date/TimeTypes
// Apache Hive type: https://cwiki.apache.org/confluence/display/hive/languagemanual+types
func icebergTypeToGlueType(typ iceberg.Type) string {
switch t := typ.(type) {
case iceberg.BooleanType:
return "boolean"
case iceberg.Int32Type:
return "int"
case iceberg.Int64Type:
return "bigint"
case iceberg.Float32Type:
return "float"
case iceberg.Float64Type:
return "double"
case iceberg.DateType:
return "date"
case iceberg.TimeType:
return "string"
case iceberg.TimestampType:
return "timestamp"
case iceberg.TimestampTzType:
return "timestamp"
case iceberg.StringType:
return "string"
case iceberg.UUIDType:
return "string" // Represent UUID as string
case iceberg.BinaryType:
return "binary"
case iceberg.DecimalType:
return fmt.Sprintf("decimal(%d,%d)", t.Precision(), t.Scale())
case iceberg.FixedType:
return fmt.Sprintf("binary(%d)", t.Len())
case *iceberg.StructType:
// For struct types, create a struct<field1:type1,field2:type2,...> representation
var fieldStrings []string
for _, field := range t.Fields() {
fieldStrings = append(fieldStrings,
fmt.Sprintf("%s:%s", field.Name, icebergTypeToGlueType(field.Type)))
}
return fmt.Sprintf("struct<%s>", strings.Join(fieldStrings, ","))
case *iceberg.ListType:
// For list types, create an array<type> representation
elementField := t.ElementField()
return fmt.Sprintf("array<%s>", icebergTypeToGlueType(elementField.Type))
case *iceberg.MapType:
// For map types, create a map<keyType,valueType> representation
keyField := t.KeyField()
valueField := t.ValueField()
return fmt.Sprintf("map<%s,%s>",
icebergTypeToGlueType(keyField.Type),
icebergTypeToGlueType(valueField.Type))
default:
return "string"
}
}