Status Read()

in tensorflow_io/core/kernels/hdf5_kernels.cc [339:577]


  Status Read(const string& component,
              const absl::InlinedVector<int64, 4>& start,
              const TensorShape& shape,
              std::function<Status(const TensorShape& shape, Tensor** value)>
                  allocate_func) {
    mutex_lock l(mu_);

    std::unordered_map<std::string, int64>::const_iterator lookup =
        columns_index_.find(component);
    if (lookup == columns_index_.end()) {
      return errors::InvalidArgument("dataset ", component, " not found");
    }
    const int64 column_index = lookup->second;

    Tensor* value;
    TF_RETURN_IF_ERROR(allocate_func(shape, &value));

    H5::H5File* file = file_image_->GetFile();
    try {
      H5::DataSet data_set = file->openDataSet(component);
      H5::DataType data_type = data_set.getDataType();
      H5::DataSpace data_space = data_set.getSpace();

      H5::DataSpace memory_space = H5::DataSpace::ALL;

      if (shape.dims() != 0) {
        int rank = data_space.getSimpleExtentNdims();
        if (rank != shape.dims()) {
          return errors::InvalidArgument("rank does not match: ", rank, " vs. ",
                                         shape.dims());
        }
        absl::InlinedVector<hsize_t, 4> dims(rank);
        absl::InlinedVector<hsize_t, 4> dims_start(rank);

        data_space.getSimpleExtentDims(dims.data());
        for (int i = 0; i < rank; i++) {
          if (start[i] > dims[i] || start[i] + shape.dim_size(i) > dims[i]) {
            return errors::InvalidArgument(
                "dimension [", i, "] out of boundary: start=", start[i],
                ", slice=", shape.dim_size(i), ", boundary=", dims[i]);
          }
          dims_start[i] = start[i];
          dims[i] = shape.dim_size(i);
        }

        memory_space = H5::DataSpace(dims.size(), dims.data());

        data_space.selectHyperslab(H5S_SELECT_SET, dims.data(),
                                   dims_start.data());
      }

      switch (dtypes_[column_index]) {
        case DT_UINT8:
          data_set.read(value->flat<uint8>().data(), data_type, memory_space,
                        data_space);
          break;
        case DT_UINT16:
          data_set.read(value->flat<uint16>().data(), data_type, memory_space,
                        data_space);
          break;
        case DT_UINT32:
          data_set.read(value->flat<uint32>().data(), data_type, memory_space,
                        data_space);
          break;
        case DT_UINT64:
          data_set.read(value->flat<uint64>().data(), data_type, memory_space,
                        data_space);
          break;
        case DT_INT8:
          data_set.read(value->flat<int8>().data(), data_type, memory_space,
                        data_space);
          break;
        case DT_INT16:
          data_set.read(value->flat<int16>().data(), data_type, memory_space,
                        data_space);
          break;
        case DT_INT32:
          data_set.read(value->flat<int32>().data(), data_type, memory_space,
                        data_space);
          break;
        case DT_INT64:
          data_set.read(value->flat<int64>().data(), data_type, memory_space,
                        data_space);
          break;
        case DT_FLOAT:
          data_set.read(value->flat<float>().data(), data_type, memory_space,
                        data_space);
          break;
        case DT_DOUBLE:
          data_set.read(value->flat<double>().data(), data_type, memory_space,
                        data_space);
          break;
        case DT_COMPLEX64:
          data_set.read(value->flat<complex64>().data(), data_type,
                        memory_space, data_space);
          break;
        case DT_COMPLEX128:
          data_set.read(value->flat<complex128>().data(), data_type,
                        memory_space, data_space);
          break;
        case DT_STRING:
          switch (data_type.getClass()) {
            case H5T_STRING:
              if (data_set.getStrType().isVariableStr()) {
                int64 total = value->NumElements();
                std::unique_ptr<char*[]> buffer(new char*[total]);
                data_set.read(buffer.get(), data_set.getStrType(), memory_space,
                              data_space);
                for (int64 i = 0; i < value->NumElements(); i++) {
                  char* p = (char*)(buffer.get()[i]);
                  value->flat<tstring>()(i) = string(p);
                }
                H5::DataSet::vlenReclaim(buffer.get(), data_type, data_space);
              } else {
                int64 total = value->NumElements();
                std::unique_ptr<char[]> buffer(
                    new char[data_type.getSize() * total]);
                data_set.read(buffer.get(), data_type, memory_space,
                              data_space);

                switch (static_cast<H5::StrType&>(data_type).getStrpad()) {
                  case H5T_STR_NULLTERM:
                    for (int64 i = 0; i < value->NumElements(); i++) {
                      const char* p =
                          (const char*)(buffer.get() + data_type.getSize() * i);
                      size_t len = 0;
                      while (len < data_type.getSize() && p[len] != 0x00) {
                        len++;
                      }
                      value->flat<tstring>()(i) = string(p, len);
                    }
                    break;
                  case H5T_STR_NULLPAD:
                    for (int64 i = 0; i < value->NumElements(); i++) {
                      const char* p =
                          (const char*)(buffer.get() + data_type.getSize() * i);
                      size_t len = data_type.getSize();
                      while (len > 0 && p[len - 1] == 0x00) {
                        len--;
                      }
                      value->flat<tstring>()(i) = string(p, len);
                    }
                    break;
                  case H5T_STR_SPACEPAD:
                    return errors::InvalidArgument(
                        "string pad type not supported: ",
                        static_cast<H5::StrType&>(data_type).getStrpad());
                }
              }
              break;
            case H5T_VLEN: {
              int64 total = value->NumElements();
              std::unique_ptr<hvl_t[]> buffer(new hvl_t[total]);
              data_set.read(buffer.get(), data_type, memory_space, data_space);
              for (int64 i = 0; i < value->NumElements(); i++) {
                hvl_t* h = (hvl_t*)(buffer.get()) + i;
                value->flat<tstring>()(i) = string((const char*)(h->p), h->len);
              }
              H5::DataSet::vlenReclaim(buffer.get(), data_type, data_space);
            } break;
            default:
              return errors::Unimplemented(
                  "data type class for string not supported: ",
                  data_type.getClass());
          }
          break;
        case DT_BOOL:
          switch (data_type.getClass()) {
            case H5T_ENUM: {
              bool success = false;
              if (data_type.getSize() == 1 &&
                  data_type.getSize() == DataTypeSize(DT_BOOL) &&
                  static_cast<H5::EnumType&>(data_type).getNmembers() == 2) {
                int index_false = 0, index_true = 0;
                try {
                  index_false =
                      static_cast<H5::EnumType&>(data_type).getMemberIndex(
                          "FALSE");
                  index_true =
                      static_cast<H5::EnumType&>(data_type).getMemberIndex(
                          "TRUE");
                } catch (H5::DataTypeIException e) {
                }
                char value_false = 0, value_true = 0;
                try {
                  static_cast<H5::EnumType&>(data_type).getMemberValue(
                      0, &value_false);
                  static_cast<H5::EnumType&>(data_type).getMemberValue(
                      1, &value_true);
                } catch (H5::DataTypeIException e) {
                }
                if (index_false == 0 && index_true == 1 && value_false == 0 &&
                    value_true == 1) {
                  success = true;
                }
              }
              if (!success) {
                string names = "[";
                for (int ii = 0;
                     ii < static_cast<H5::EnumType&>(data_type).getNmembers();
                     ii++) {
                  int value;
                  static_cast<H5::EnumType&>(data_type).getMemberValue(ii,
                                                                       &value);
                  string name =
                      static_cast<H5::EnumType&>(data_type).nameOf(&value, 100);
                  if (ii != 0) {
                    names += ", ";
                  }
                  names += name;
                }
                names += "]";
                return errors::InvalidArgument(
                    "unsupported data class for enum: ", names);
              }
            }
              data_set.read(value->flat<bool>().data(), data_type, memory_space,
                            data_space);
              break;
            default:
              return errors::Unimplemented(
                  "data type class for bool not supported: ",
                  data_type.getClass());
          }
          break;
        default:
          return errors::Unimplemented("data type class not supported yet: ",
                                       data_type.getClass());
      }
    } catch (H5::FileIException e) {
      return errors::InvalidArgument("unable to open dataset file ", filename_,
                                     ": ", e.getCDetailMsg());
    } catch (H5::DataSetIException e) {
      return errors::InvalidArgument("unable to process dataset file",
                                     filename_, ": ", e.getCDetailMsg());
    }

    return Status::OK();
  }