in athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/util/DDBTypeUtils.java [72:145]
public static Field inferArrowField(String key, Object value)
{
logger.debug("inferArrowField invoked for key {} of class {}", key,
value != null ? value.getClass() : null);
if (value == null) {
return null;
}
if (value instanceof String) {
return new Field(key, FieldType.nullable(Types.MinorType.VARCHAR.getType()), null);
}
else if (value instanceof byte[]) {
return new Field(key, FieldType.nullable(Types.MinorType.VARBINARY.getType()), null);
}
else if (value instanceof Boolean) {
return new Field(key, FieldType.nullable(Types.MinorType.BIT.getType()), null);
}
else if (value instanceof BigDecimal) {
return new Field(key, FieldType.nullable(new ArrowType.Decimal(38, 9)), null);
}
else if (value instanceof List || value instanceof Set) {
Field child = null;
if (((Collection) value).isEmpty()) {
logger.warn("Automatic schema inference encountered empty List or Set {}. Unable to determine element types. Falling back to VARCHAR representation", key);
child = inferArrowField("", "");
}
else {
Iterator iterator = ((Collection) value).iterator();
Object firstValue = iterator.next();
Class<?> aClass = firstValue.getClass();
boolean allElementsAreSameType = true;
while (iterator.hasNext()) {
if (!aClass.equals(iterator.next().getClass())) {
allElementsAreSameType = false;
break;
}
}
if (allElementsAreSameType) {
child = inferArrowField(key + ".element", firstValue);
}
else {
logger.warn("Automatic schema inference encountered List or Set {} containing multiple element types. Falling back to VARCHAR representation of elements", key);
child = inferArrowField("", "");
}
}
return child == null
? null
: new Field(key, FieldType.nullable(Types.MinorType.LIST.getType()),
Collections.singletonList(child));
}
else if (value instanceof Map) {
List<Field> children = new ArrayList<>();
// keys are always Strings in DDB's case
Map<String, Object> doc = (Map<String, Object>) value;
for (String childKey : doc.keySet()) {
Object childVal = doc.get(childKey);
Field child = inferArrowField(childKey, childVal);
if (child != null) {
children.add(child);
}
}
// Athena requires Structs to have child types and not be empty
if (children.isEmpty()) {
logger.warn("Automatic schema inference encountered empty Map {}. Unable to determine element types. Falling back to VARCHAR representation", key);
return new Field(key, FieldType.nullable(Types.MinorType.VARCHAR.getType()), null);
}
return new Field(key, FieldType.nullable(Types.MinorType.STRUCT.getType()), children);
}
String className = (value == null || value.getClass() == null) ? "null" : value.getClass().getName();
throw new RuntimeException("Unknown type[" + className + "] for field[" + key + "]");
}