in parquet-cli/src/main/java/org/apache/parquet/cli/json/AvroJson.java [104:200]
public static Object convertToAvro(GenericData model, JsonNode datum, Schema schema) {
if (datum == null) {
return null;
}
switch (schema.getType()) {
case RECORD:
RecordException.check(datum.isObject(), "Cannot convert non-object to record: %s", datum);
Object record = model.newRecord(null, schema);
for (Schema.Field field : schema.getFields()) {
model.setField(
record, field.name(), field.pos(), convertField(model, datum.get(field.name()), field));
}
return record;
case MAP:
RecordException.check(datum.isObject(), "Cannot convert non-object to map: %s", datum);
Map<String, Object> map = Maps.newLinkedHashMap();
Iterator<Map.Entry<String, JsonNode>> iter = datum.fields();
while (iter.hasNext()) {
Map.Entry<String, JsonNode> entry = iter.next();
map.put(entry.getKey(), convertToAvro(model, entry.getValue(), schema.getValueType()));
}
return map;
case ARRAY:
RecordException.check(datum.isArray(), "Cannot convert to array: %s", datum);
List<Object> list = Lists.newArrayListWithExpectedSize(datum.size());
for (JsonNode element : datum) {
list.add(convertToAvro(model, element, schema.getElementType()));
}
return list;
case UNION:
return convertToAvro(model, datum, resolveUnion(datum, schema.getTypes()));
case BOOLEAN:
RecordException.check(datum.isBoolean(), "Cannot convert to boolean: %s", datum);
return datum.booleanValue();
case FLOAT:
RecordException.check(datum.isFloat() || datum.isInt(), "Cannot convert to float: %s", datum);
return datum.floatValue();
case DOUBLE:
RecordException.check(
datum.isDouble() || datum.isFloat() || datum.isLong() || datum.isInt(),
"Cannot convert to double: %s",
datum);
return datum.doubleValue();
case INT:
RecordException.check(datum.isInt(), "Cannot convert to int: %s", datum);
return datum.intValue();
case LONG:
RecordException.check(datum.isLong() || datum.isInt(), "Cannot convert to long: %s", datum);
return datum.longValue();
case STRING:
RecordException.check(datum.isTextual(), "Cannot convert to string: %s", datum);
return datum.textValue();
case ENUM:
RecordException.check(datum.isTextual(), "Cannot convert to string: %s", datum);
return model.createEnum(datum.textValue(), schema);
case BYTES:
RecordException.check(datum.isBinary(), "Cannot convert to binary: %s", datum);
try {
return ByteBuffer.wrap(datum.binaryValue());
} catch (IOException e) {
throw new RecordException("Failed to read JSON binary", e);
}
case FIXED:
RecordException.check(datum.isBinary(), "Cannot convert to fixed: %s", datum);
byte[] bytes;
try {
bytes = datum.binaryValue();
} catch (IOException e) {
throw new RecordException("Failed to read JSON binary", e);
}
RecordException.check(
bytes.length < schema.getFixedSize(),
"Binary data is too short: %s bytes for %s",
bytes.length,
schema);
return model.createFixed(null, bytes, schema);
case NULL:
return null;
default:
// don't use DatasetRecordException because this is a Schema problem
throw new IllegalArgumentException("Unknown schema type: " + schema);
}
}