in parquet-cli/src/main/java/org/apache/parquet/cli/json/AvroJson.java [104:211]
public static Object convertToAvro(GenericData model, JsonNode datum,
Schema schema) {
if (datum == null) {
return null;
}
switch (schema.getType()) {
case RECORD:
RecordException.check(datum.isObject(),
"Cannot convert non-object to record: %s", datum);
Object record = model.newRecord(null, schema);
for (Schema.Field field : schema.getFields()) {
model.setField(record, field.name(), field.pos(),
convertField(model, datum.get(field.name()), field));
}
return record;
case MAP:
RecordException.check(datum.isObject(),
"Cannot convert non-object to map: %s", datum);
Map<String, Object> map = Maps.newLinkedHashMap();
Iterator<Map.Entry<String, JsonNode>> iter = datum.fields();
while (iter.hasNext()) {
Map.Entry<String, JsonNode> entry = iter.next();
map.put(entry.getKey(), convertToAvro(
model, entry.getValue(), schema.getValueType()));
}
return map;
case ARRAY:
RecordException.check(datum.isArray(),
"Cannot convert to array: %s", datum);
List<Object> list = Lists.newArrayListWithExpectedSize(datum.size());
for (JsonNode element : datum) {
list.add(convertToAvro(model, element, schema.getElementType()));
}
return list;
case UNION:
return convertToAvro(model, datum,
resolveUnion(datum, schema.getTypes()));
case BOOLEAN:
RecordException.check(datum.isBoolean(),
"Cannot convert to boolean: %s", datum);
return datum.booleanValue();
case FLOAT:
RecordException.check(datum.isFloat() || datum.isInt(),
"Cannot convert to float: %s", datum);
return datum.floatValue();
case DOUBLE:
RecordException.check(
datum.isDouble() || datum.isFloat() ||
datum.isLong() || datum.isInt(),
"Cannot convert to double: %s", datum);
return datum.doubleValue();
case INT:
RecordException.check(datum.isInt(),
"Cannot convert to int: %s", datum);
return datum.intValue();
case LONG:
RecordException.check(datum.isLong() || datum.isInt(),
"Cannot convert to long: %s", datum);
return datum.longValue();
case STRING:
RecordException.check(datum.isTextual(),
"Cannot convert to string: %s", datum);
return datum.textValue();
case ENUM:
RecordException.check(datum.isTextual(),
"Cannot convert to string: %s", datum);
return model.createEnum(datum.textValue(), schema);
case BYTES:
RecordException.check(datum.isBinary(),
"Cannot convert to binary: %s", datum);
try {
return ByteBuffer.wrap(datum.binaryValue());
} catch (IOException e) {
throw new RecordException("Failed to read JSON binary", e);
}
case FIXED:
RecordException.check(datum.isBinary(),
"Cannot convert to fixed: %s", datum);
byte[] bytes;
try {
bytes = datum.binaryValue();
} catch (IOException e) {
throw new RecordException("Failed to read JSON binary", e);
}
RecordException.check(bytes.length < schema.getFixedSize(),
"Binary data is too short: %s bytes for %s", bytes.length, schema);
return model.createFixed(null, bytes, schema);
case NULL:
return null;
default:
// don't use DatasetRecordException because this is a Schema problem
throw new IllegalArgumentException("Unknown schema type: " + schema);
}
}