in contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5BatchReader.java [539:669]
private void projectDataset(RowSetLoader rowWriter, String datapath) {
String fieldName = HDF5Utils.getNameFromPath(datapath);
Dataset dataset= hdfFile.getDatasetByPath(datapath);
// If the dataset is larger than 16MB, do not project the dataset
if (dataset.getSizeInBytes() > MAX_DATASET_SIZE) {
logger.warn("Dataset {} is greater than 16MB. Data will be truncated in Metadata view.", datapath);
}
int[] dimensions = dataset.getDimensions();
// Case for single dimensional data
if (dimensions.length == 1) {
MinorType currentDataType = HDF5Utils.getDataType(dataset.getDataType());
Object data;
try {
data = dataset.getData();
} catch (Exception e) {
logger.debug("Error reading {}", datapath);
return;
}
assert currentDataType != null;
// Skip null datasets
if (data == null) {
return;
}
switch (currentDataType) {
case GENERIC_OBJECT:
logger.warn("Couldn't read {}", datapath );
break;
case VARCHAR:
String[] stringData = (String[])data;
writeStringListColumn(rowWriter, fieldName, stringData);
break;
case TIMESTAMP:
long[] longList = (long[])data;
writeTimestampListColumn(rowWriter, fieldName, longList);
break;
case INT:
int[] intList = (int[])data;
writeIntListColumn(rowWriter, fieldName, intList);
break;
case SMALLINT:
short[] shortList = (short[])data;
writeSmallIntColumn(rowWriter, fieldName, shortList);
break;
case TINYINT:
byte[] byteList = (byte[])data;
writeByteListColumn(rowWriter, fieldName, byteList);
break;
case FLOAT4:
float[] tempFloatList = (float[])data;
writeFloat4ListColumn(rowWriter, fieldName, tempFloatList);
break;
case FLOAT8:
double[] tempDoubleList = (double[])data;
writeFloat8ListColumn(rowWriter, fieldName, tempDoubleList);
break;
case BIGINT:
long[] tempBigIntList = (long[])data;
writeLongListColumn(rowWriter, fieldName, tempBigIntList);
break;
case MAP:
try {
getAndMapCompoundData(datapath, hdfFile, rowWriter);
} catch (Exception e) {
throw UserException
.dataReadError()
.message("Error writing Compound Field: " + e.getMessage())
.addContext(errorContext)
.build(logger);
}
break;
default:
// Case for data types that cannot be read
logger.warn("{} not implemented.", currentDataType.name());
}
} else if (dimensions.length == 2) {
// Case for 2D data sets. These are projected as lists of lists or maps of maps
int cols = dimensions[1];
int rows = dimensions[0];
// TODO Add Boolean, Small and TinyInt data types
switch (HDF5Utils.getDataType(dataset.getDataType())) {
case INT:
int[][] colData = (int[][])dataset.getData();
mapIntMatrixField(colData, cols, rows, rowWriter);
break;
case FLOAT4:
float[][] floatData = (float[][])dataset.getData();
mapFloatMatrixField(floatData, cols, rows, rowWriter);
break;
case FLOAT8:
double[][] doubleData = (double[][])dataset.getData();
mapDoubleMatrixField(doubleData, cols, rows, rowWriter);
break;
case BIGINT:
long[][] longData = (long[][])dataset.getData();
mapBigIntMatrixField(longData, cols, rows, rowWriter);
break;
default:
logger.warn("{} not implemented.", HDF5Utils.getDataType(dataset.getDataType()));
}
} else if (dimensions.length > 2){
// Case for data sets with dimensions > 2
int cols = dimensions[1];
int rows = dimensions[0];
switch (HDF5Utils.getDataType(dataset.getDataType())) {
case INT:
int[][] intMatrix = HDF5Utils.toIntMatrix((Object[]) dataset.getData());
mapIntMatrixField(intMatrix, cols, rows, rowWriter);
break;
case FLOAT4:
float[][] floatData = HDF5Utils.toFloatMatrix((Object[]) dataset.getData());
mapFloatMatrixField(floatData, cols, rows, rowWriter);
break;
case FLOAT8:
double[][] doubleData = HDF5Utils.toDoubleMatrix((Object[]) dataset.getData());
mapDoubleMatrixField(doubleData, cols, rows, rowWriter);
break;
case BIGINT:
long[][] longData = HDF5Utils.toLongMatrix((Object[]) dataset.getData());
mapBigIntMatrixField(longData, cols, rows, rowWriter);
break;
default:
logger.warn("{} not implemented.", HDF5Utils.getDataType(dataset.getDataType()));
}
}
}