public static Field inferArrowField()

in athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/util/DDBTypeUtils.java [72:145]


    public static Field inferArrowField(String key, Object value)
    {
        logger.debug("inferArrowField invoked for key {} of class {}", key,
                value != null ? value.getClass() : null);
        if (value == null) {
            return null;
        }

        if (value instanceof String) {
            return new Field(key, FieldType.nullable(Types.MinorType.VARCHAR.getType()), null);
        }
        else if (value instanceof byte[]) {
            return new Field(key, FieldType.nullable(Types.MinorType.VARBINARY.getType()), null);
        }
        else if (value instanceof Boolean) {
            return new Field(key, FieldType.nullable(Types.MinorType.BIT.getType()), null);
        }
        else if (value instanceof BigDecimal) {
            return new Field(key, FieldType.nullable(new ArrowType.Decimal(38, 9)), null);
        }
        else if (value instanceof List || value instanceof Set) {
            Field child = null;
            if (((Collection) value).isEmpty()) {
                logger.warn("Automatic schema inference encountered empty List or Set {}. Unable to determine element types. Falling back to VARCHAR representation", key);
                child = inferArrowField("", "");
            }
            else {
                Iterator iterator = ((Collection) value).iterator();
                Object firstValue = iterator.next();
                Class<?> aClass = firstValue.getClass();
                boolean allElementsAreSameType = true;
                while (iterator.hasNext()) {
                    if (!aClass.equals(iterator.next().getClass())) {
                        allElementsAreSameType = false;
                        break;
                    }
                }
                if (allElementsAreSameType) {
                    child = inferArrowField(key + ".element", firstValue);
                }
                else {
                    logger.warn("Automatic schema inference encountered List or Set {} containing multiple element types. Falling back to VARCHAR representation of elements", key);
                    child = inferArrowField("", "");
                }
            }
            return child == null
                    ? null
                    : new Field(key, FieldType.nullable(Types.MinorType.LIST.getType()),
                                Collections.singletonList(child));
        }
        else if (value instanceof Map) {
            List<Field> children = new ArrayList<>();
            // keys are always Strings in DDB's case
            Map<String, Object> doc = (Map<String, Object>) value;
            for (String childKey : doc.keySet()) {
                Object childVal = doc.get(childKey);
                Field child = inferArrowField(childKey, childVal);
                if (child != null) {
                    children.add(child);
                }
            }

            // Athena requires Structs to have child types and not be empty
            if (children.isEmpty()) {
                logger.warn("Automatic schema inference encountered empty Map {}. Unable to determine element types. Falling back to VARCHAR representation", key);
                return new Field(key, FieldType.nullable(Types.MinorType.VARCHAR.getType()), null);
            }

            return new Field(key, FieldType.nullable(Types.MinorType.STRUCT.getType()), children);
        }

        String className = (value == null || value.getClass() == null) ? "null" : value.getClass().getName();
        throw new RuntimeException("Unknown type[" + className + "] for field[" + key + "]");
    }