in tensorflow_io/core/kernels/csv_kernels.cc [34:132]
Status Init(const std::vector<string>& input,
const std::vector<string>& metadata, const void* memory_data,
const int64 memory_size) override {
if (input.size() > 1) {
return errors::InvalidArgument("more than 1 filename is not supported");
}
const string& filename = input[0];
file_.reset(
new SizedRandomAccessFile(env_, filename, memory_data, memory_size));
TF_RETURN_IF_ERROR(file_->GetFileSize(&file_size_));
csv_file_.reset(new ArrowRandomAccessFile(file_.get(), file_size_));
auto result = ::arrow::csv::TableReader::Make(
::arrow::default_memory_pool(), ::arrow::io::default_io_context(),
csv_file_, ::arrow::csv::ReadOptions::Defaults(),
::arrow::csv::ParseOptions::Defaults(),
::arrow::csv::ConvertOptions::Defaults());
if (!result.status().ok()) {
return errors::InvalidArgument("unable to make a TableReader: ",
result.status());
}
reader_ = std::move(result).ValueUnsafe();
{
auto result = reader_->Read();
if (!result.status().ok()) {
return errors::InvalidArgument("unable to read table: ",
result.status());
}
table_ = std::move(result).ValueUnsafe();
}
for (int i = 0; i < table_->num_columns(); i++) {
::tensorflow::DataType dtype;
switch (table_->column(i)->type()->id()) {
case ::arrow::Type::BOOL:
dtype = ::tensorflow::DT_BOOL;
break;
case ::arrow::Type::UINT8:
dtype = ::tensorflow::DT_UINT8;
break;
case ::arrow::Type::INT8:
dtype = ::tensorflow::DT_INT8;
break;
case ::arrow::Type::UINT16:
dtype = ::tensorflow::DT_UINT16;
break;
case ::arrow::Type::INT16:
dtype = ::tensorflow::DT_INT16;
break;
case ::arrow::Type::UINT32:
dtype = ::tensorflow::DT_UINT32;
break;
case ::arrow::Type::INT32:
dtype = ::tensorflow::DT_INT32;
break;
case ::arrow::Type::UINT64:
dtype = ::tensorflow::DT_UINT64;
break;
case ::arrow::Type::INT64:
dtype = ::tensorflow::DT_INT64;
break;
case ::arrow::Type::HALF_FLOAT:
dtype = ::tensorflow::DT_HALF;
break;
case ::arrow::Type::FLOAT:
dtype = ::tensorflow::DT_FLOAT;
break;
case ::arrow::Type::DOUBLE:
dtype = ::tensorflow::DT_DOUBLE;
break;
case ::arrow::Type::STRING:
dtype = ::tensorflow::DT_STRING;
break;
case ::arrow::Type::BINARY:
case ::arrow::Type::FIXED_SIZE_BINARY:
case ::arrow::Type::DATE32:
case ::arrow::Type::DATE64:
case ::arrow::Type::TIMESTAMP:
case ::arrow::Type::TIME32:
case ::arrow::Type::TIME64:
case ::arrow::Type::DECIMAL:
case ::arrow::Type::LIST:
case ::arrow::Type::STRUCT:
case ::arrow::Type::DICTIONARY:
case ::arrow::Type::MAP:
default:
return errors::InvalidArgument("arrow data type is not supported: ",
table_->column(i)->type()->ToString());
}
shapes_.push_back(TensorShape({static_cast<int64>(table_->num_rows())}));
dtypes_.push_back(dtype);
columns_.push_back(table_->ColumnNames()[i]);
columns_index_[table_->ColumnNames()[i]] = i;
}
return Status::OK();
}