in cli/src/util.h [275:349]
std::shared_ptr<arrow::Table> ChangeNameAndDataType(
const std::shared_ptr<arrow::Table>& table,
const std::unordered_map<
std::string, std::pair<std::string, std::shared_ptr<arrow::DataType>>>&
columns_to_change) {
// Retrieve original schema and number of columns
auto original_schema = table->schema();
int64_t num_columns = table->num_columns();
// Prepare vectors for new schema fields and new column data
std::vector<std::shared_ptr<arrow::Field>> new_fields;
std::vector<std::shared_ptr<arrow::ChunkedArray>> new_columns;
for (int64_t i = 0; i < num_columns; ++i) {
auto original_field = original_schema->field(i);
auto original_column = table->column(i); // This is a ChunkedArray
std::string original_name = original_field->name();
std::shared_ptr<arrow::DataType> original_type = original_field->type();
// Check if this column needs to be changed
auto it = columns_to_change.find(original_name);
if (it != columns_to_change.end()) {
std::string new_name = it->second.first;
std::shared_ptr<arrow::DataType> new_type = it->second.second;
bool name_changed = (new_name != original_name);
bool type_changed = !original_type->Equals(*new_type);
std::shared_ptr<arrow::ChunkedArray> new_chunked_array;
// If data type needs to be changed, cast each chunk
if (type_changed) {
std::vector<std::shared_ptr<arrow::Array>> casted_chunks;
for (const auto& chunk : original_column->chunks()) {
// Perform type casting using Compute API
arrow::compute::CastOptions cast_options;
cast_options.allow_int_overflow = false; // Set as needed
auto cast_result =
arrow::compute::Cast(*chunk, new_type, cast_options);
if (!cast_result.ok()) {
throw std::runtime_error("Failed to cast column data.");
}
casted_chunks.push_back(cast_result.ValueOrDie());
}
// Create a new ChunkedArray with casted chunks
new_chunked_array =
std::make_shared<arrow::ChunkedArray>(casted_chunks, new_type);
} else {
// If type is not changed, keep the original column
new_chunked_array = original_column;
}
// Create a new Field with the updated name and type
auto new_field =
arrow::field(new_name, type_changed ? new_type : original_type,
original_field->nullable());
new_fields.push_back(new_field);
new_columns.push_back(new_chunked_array);
} else {
// Columns not in the change map remain unchanged
new_fields.push_back(original_field);
new_columns.push_back(original_column);
}
}
// Create the new schema
auto new_schema = arrow::schema(new_fields);
// Construct the new table with updated schema and columns
auto new_table = arrow::Table::Make(new_schema, new_columns);
return new_table;
}