std::shared_ptr ChangeNameAndDataType()

in cli/src/util.h [275:349]


std::shared_ptr<arrow::Table> ChangeNameAndDataType(
    const std::shared_ptr<arrow::Table>& table,
    const std::unordered_map<
        std::string, std::pair<std::string, std::shared_ptr<arrow::DataType>>>&
        columns_to_change) {
  // Retrieve original schema and number of columns
  auto original_schema = table->schema();
  int64_t num_columns = table->num_columns();

  // Prepare vectors for new schema fields and new column data
  std::vector<std::shared_ptr<arrow::Field>> new_fields;
  std::vector<std::shared_ptr<arrow::ChunkedArray>> new_columns;

  for (int64_t i = 0; i < num_columns; ++i) {
    auto original_field = original_schema->field(i);
    auto original_column = table->column(i);  // This is a ChunkedArray

    std::string original_name = original_field->name();
    std::shared_ptr<arrow::DataType> original_type = original_field->type();

    // Check if this column needs to be changed
    auto it = columns_to_change.find(original_name);
    if (it != columns_to_change.end()) {
      std::string new_name = it->second.first;
      std::shared_ptr<arrow::DataType> new_type = it->second.second;

      bool name_changed = (new_name != original_name);
      bool type_changed = !original_type->Equals(*new_type);

      std::shared_ptr<arrow::ChunkedArray> new_chunked_array;

      // If data type needs to be changed, cast each chunk
      if (type_changed) {
        std::vector<std::shared_ptr<arrow::Array>> casted_chunks;
        for (const auto& chunk : original_column->chunks()) {
          // Perform type casting using Compute API
          arrow::compute::CastOptions cast_options;
          cast_options.allow_int_overflow = false;  // Set as needed

          auto cast_result =
              arrow::compute::Cast(*chunk, new_type, cast_options);
          if (!cast_result.ok()) {
            throw std::runtime_error("Failed to cast column data.");
          }
          casted_chunks.push_back(cast_result.ValueOrDie());
        }
        // Create a new ChunkedArray with casted chunks
        new_chunked_array =
            std::make_shared<arrow::ChunkedArray>(casted_chunks, new_type);
      } else {
        // If type is not changed, keep the original column
        new_chunked_array = original_column;
      }

      // Create a new Field with the updated name and type
      auto new_field =
          arrow::field(new_name, type_changed ? new_type : original_type,
                       original_field->nullable());
      new_fields.push_back(new_field);
      new_columns.push_back(new_chunked_array);
    } else {
      // Columns not in the change map remain unchanged
      new_fields.push_back(original_field);
      new_columns.push_back(original_column);
    }
  }

  // Create the new schema
  auto new_schema = arrow::schema(new_fields);

  // Construct the new table with updated schema and columns
  auto new_table = arrow::Table::Make(new_schema, new_columns);

  return new_table;
}