in tensorflow_io/core/kernels/hdf5_kernels.cc [339:577]
Status Read(const string& component,
const absl::InlinedVector<int64, 4>& start,
const TensorShape& shape,
std::function<Status(const TensorShape& shape, Tensor** value)>
allocate_func) {
mutex_lock l(mu_);
std::unordered_map<std::string, int64>::const_iterator lookup =
columns_index_.find(component);
if (lookup == columns_index_.end()) {
return errors::InvalidArgument("dataset ", component, " not found");
}
const int64 column_index = lookup->second;
Tensor* value;
TF_RETURN_IF_ERROR(allocate_func(shape, &value));
H5::H5File* file = file_image_->GetFile();
try {
H5::DataSet data_set = file->openDataSet(component);
H5::DataType data_type = data_set.getDataType();
H5::DataSpace data_space = data_set.getSpace();
H5::DataSpace memory_space = H5::DataSpace::ALL;
if (shape.dims() != 0) {
int rank = data_space.getSimpleExtentNdims();
if (rank != shape.dims()) {
return errors::InvalidArgument("rank does not match: ", rank, " vs. ",
shape.dims());
}
absl::InlinedVector<hsize_t, 4> dims(rank);
absl::InlinedVector<hsize_t, 4> dims_start(rank);
data_space.getSimpleExtentDims(dims.data());
for (int i = 0; i < rank; i++) {
if (start[i] > dims[i] || start[i] + shape.dim_size(i) > dims[i]) {
return errors::InvalidArgument(
"dimension [", i, "] out of boundary: start=", start[i],
", slice=", shape.dim_size(i), ", boundary=", dims[i]);
}
dims_start[i] = start[i];
dims[i] = shape.dim_size(i);
}
memory_space = H5::DataSpace(dims.size(), dims.data());
data_space.selectHyperslab(H5S_SELECT_SET, dims.data(),
dims_start.data());
}
switch (dtypes_[column_index]) {
case DT_UINT8:
data_set.read(value->flat<uint8>().data(), data_type, memory_space,
data_space);
break;
case DT_UINT16:
data_set.read(value->flat<uint16>().data(), data_type, memory_space,
data_space);
break;
case DT_UINT32:
data_set.read(value->flat<uint32>().data(), data_type, memory_space,
data_space);
break;
case DT_UINT64:
data_set.read(value->flat<uint64>().data(), data_type, memory_space,
data_space);
break;
case DT_INT8:
data_set.read(value->flat<int8>().data(), data_type, memory_space,
data_space);
break;
case DT_INT16:
data_set.read(value->flat<int16>().data(), data_type, memory_space,
data_space);
break;
case DT_INT32:
data_set.read(value->flat<int32>().data(), data_type, memory_space,
data_space);
break;
case DT_INT64:
data_set.read(value->flat<int64>().data(), data_type, memory_space,
data_space);
break;
case DT_FLOAT:
data_set.read(value->flat<float>().data(), data_type, memory_space,
data_space);
break;
case DT_DOUBLE:
data_set.read(value->flat<double>().data(), data_type, memory_space,
data_space);
break;
case DT_COMPLEX64:
data_set.read(value->flat<complex64>().data(), data_type,
memory_space, data_space);
break;
case DT_COMPLEX128:
data_set.read(value->flat<complex128>().data(), data_type,
memory_space, data_space);
break;
case DT_STRING:
switch (data_type.getClass()) {
case H5T_STRING:
if (data_set.getStrType().isVariableStr()) {
int64 total = value->NumElements();
std::unique_ptr<char*[]> buffer(new char*[total]);
data_set.read(buffer.get(), data_set.getStrType(), memory_space,
data_space);
for (int64 i = 0; i < value->NumElements(); i++) {
char* p = (char*)(buffer.get()[i]);
value->flat<tstring>()(i) = string(p);
}
H5::DataSet::vlenReclaim(buffer.get(), data_type, data_space);
} else {
int64 total = value->NumElements();
std::unique_ptr<char[]> buffer(
new char[data_type.getSize() * total]);
data_set.read(buffer.get(), data_type, memory_space,
data_space);
switch (static_cast<H5::StrType&>(data_type).getStrpad()) {
case H5T_STR_NULLTERM:
for (int64 i = 0; i < value->NumElements(); i++) {
const char* p =
(const char*)(buffer.get() + data_type.getSize() * i);
size_t len = 0;
while (len < data_type.getSize() && p[len] != 0x00) {
len++;
}
value->flat<tstring>()(i) = string(p, len);
}
break;
case H5T_STR_NULLPAD:
for (int64 i = 0; i < value->NumElements(); i++) {
const char* p =
(const char*)(buffer.get() + data_type.getSize() * i);
size_t len = data_type.getSize();
while (len > 0 && p[len - 1] == 0x00) {
len--;
}
value->flat<tstring>()(i) = string(p, len);
}
break;
case H5T_STR_SPACEPAD:
return errors::InvalidArgument(
"string pad type not supported: ",
static_cast<H5::StrType&>(data_type).getStrpad());
}
}
break;
case H5T_VLEN: {
int64 total = value->NumElements();
std::unique_ptr<hvl_t[]> buffer(new hvl_t[total]);
data_set.read(buffer.get(), data_type, memory_space, data_space);
for (int64 i = 0; i < value->NumElements(); i++) {
hvl_t* h = (hvl_t*)(buffer.get()) + i;
value->flat<tstring>()(i) = string((const char*)(h->p), h->len);
}
H5::DataSet::vlenReclaim(buffer.get(), data_type, data_space);
} break;
default:
return errors::Unimplemented(
"data type class for string not supported: ",
data_type.getClass());
}
break;
case DT_BOOL:
switch (data_type.getClass()) {
case H5T_ENUM: {
bool success = false;
if (data_type.getSize() == 1 &&
data_type.getSize() == DataTypeSize(DT_BOOL) &&
static_cast<H5::EnumType&>(data_type).getNmembers() == 2) {
int index_false = 0, index_true = 0;
try {
index_false =
static_cast<H5::EnumType&>(data_type).getMemberIndex(
"FALSE");
index_true =
static_cast<H5::EnumType&>(data_type).getMemberIndex(
"TRUE");
} catch (H5::DataTypeIException e) {
}
char value_false = 0, value_true = 0;
try {
static_cast<H5::EnumType&>(data_type).getMemberValue(
0, &value_false);
static_cast<H5::EnumType&>(data_type).getMemberValue(
1, &value_true);
} catch (H5::DataTypeIException e) {
}
if (index_false == 0 && index_true == 1 && value_false == 0 &&
value_true == 1) {
success = true;
}
}
if (!success) {
string names = "[";
for (int ii = 0;
ii < static_cast<H5::EnumType&>(data_type).getNmembers();
ii++) {
int value;
static_cast<H5::EnumType&>(data_type).getMemberValue(ii,
&value);
string name =
static_cast<H5::EnumType&>(data_type).nameOf(&value, 100);
if (ii != 0) {
names += ", ";
}
names += name;
}
names += "]";
return errors::InvalidArgument(
"unsupported data class for enum: ", names);
}
}
data_set.read(value->flat<bool>().data(), data_type, memory_space,
data_space);
break;
default:
return errors::Unimplemented(
"data type class for bool not supported: ",
data_type.getClass());
}
break;
default:
return errors::Unimplemented("data type class not supported yet: ",
data_type.getClass());
}
} catch (H5::FileIException e) {
return errors::InvalidArgument("unable to open dataset file ", filename_,
": ", e.getCDetailMsg());
} catch (H5::DataSetIException e) {
return errors::InvalidArgument("unable to process dataset file",
filename_, ": ", e.getCDetailMsg());
}
return Status::OK();
}