cpp/fury/encoder/row_encode_trait.h (250 lines of code) (raw):
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#pragma once
#include "fury/meta/field_info.h"
#include "fury/meta/type_traits.h"
#include "fury/row/writer.h"
#include <memory>
#include <string_view>
#include <type_traits>
#include <utility>
namespace fury {
namespace encoder {
namespace details {
template <typename> struct ArrowSchemaBasicType;
template <> struct ArrowSchemaBasicType<bool> {
static inline constexpr const auto value = arrow::boolean;
};
template <> struct ArrowSchemaBasicType<int8_t> {
static inline constexpr const auto value = arrow::int8;
};
template <> struct ArrowSchemaBasicType<int16_t> {
static inline constexpr const auto value = arrow::int16;
};
template <> struct ArrowSchemaBasicType<int32_t> {
static inline constexpr const auto value = arrow::int32;
};
template <> struct ArrowSchemaBasicType<int64_t> {
static inline constexpr const auto value = arrow::int64;
};
template <> struct ArrowSchemaBasicType<float> {
static inline constexpr const auto value = arrow::float32;
};
template <> struct ArrowSchemaBasicType<double> {
static inline constexpr const auto value = arrow::float64;
};
inline std::string StringViewToString(std::string_view s) {
return {s.begin(), s.end()};
}
template <typename T>
inline constexpr bool IsString =
meta::IsOneOf<T, std::string, std::string_view>::value;
template <typename T> inline constexpr bool IsMap = meta::IsPairIterable<T>;
template <typename T>
inline constexpr bool IsArray =
meta::IsIterable<T> && !IsString<T> && !IsMap<T>;
template <typename> inline constexpr bool IsOptional = false;
template <typename T> inline constexpr bool IsOptional<std::optional<T>> = true;
template <typename T>
inline constexpr bool IsClassButNotBuiltin =
std::is_class_v<T> &&
!(IsString<T> || IsArray<T> || IsOptional<T> || IsMap<T>);
inline decltype(auto) GetChildType(RowWriter &writer, int index) {
return writer.schema()->field(index)->type();
}
inline decltype(auto) GetChildType(ArrayWriter &writer, int index) {
return writer.type()->field(0)->type();
}
} // namespace details
using meta::FuryFieldInfo;
struct EmptyWriteVisitor {
template <typename, typename T> void Visit(T &&) {}
};
template <typename C> struct DefaultWriteVisitor {
C &cont;
DefaultWriteVisitor(C &cont) : cont(cont) {}
template <typename, typename T> void Visit(std::unique_ptr<T> writer) {
cont.push_back(std::move(writer));
}
};
// RowEncodeTrait<T> defines how to serialize `T` to the row format
// it includes:
// - Type(): construct arrow format type of type `T`
// - Schema(): construct schema of type `T` (only for class types)
// - Write(auto&& visitor, const T& value, ...):
// encode `T` via the provided writer
template <typename T, typename Enable = void> struct RowEncodeTrait {
static_assert(meta::AlwaysFalse<T>,
"type T is currently not supported for encoding");
};
template <typename T>
struct RowEncodeTrait<
T, meta::Void<details::ArrowSchemaBasicType<std::remove_cv_t<T>>::value>> {
static auto Type() {
return details::ArrowSchemaBasicType<std::remove_cv_t<T>>::value();
}
template <typename V, typename W,
std::enable_if_t<meta::IsOneOf<W, RowWriter, ArrayWriter>::value,
int> = 0>
static void Write(V &&, const T &value, W &writer, int index) {
writer.Write(index, value);
}
};
template <typename T>
struct RowEncodeTrait<
T, std::enable_if_t<details::IsString<std::remove_cv_t<T>>>> {
static auto Type() { return arrow::utf8(); }
template <typename V, typename W,
std::enable_if_t<meta::IsOneOf<W, RowWriter, ArrayWriter>::value,
int> = 0>
static void Write(V &&, const T &value, W &writer, int index) {
writer.WriteString(index, value);
}
};
template <typename T>
struct RowEncodeTrait<
T, std::enable_if_t<details::IsOptional<std::remove_cv_t<T>>>> {
static auto Type() { return RowEncodeTrait<typename T::value_type>::Type(); }
template <typename V, typename W,
std::enable_if_t<meta::IsOneOf<W, RowWriter, ArrayWriter>::value,
int> = 0>
static void Write(V &&visitor, const T &value, W &writer, int index) {
if (value) {
RowEncodeTrait<typename T::value_type>::Write(std::forward<V>(visitor),
*value, writer, index);
} else {
writer.SetNullAt(index);
}
}
};
template <typename T>
struct RowEncodeTrait<
T, std::enable_if_t<details::IsClassButNotBuiltin<std::remove_cv_t<T>>>> {
private:
template <typename FieldInfo, size_t... I>
static arrow::FieldVector FieldVectorImpl(std::index_sequence<I...>) {
return {arrow::field(
details::StringViewToString(FieldInfo::Names[I]),
RowEncodeTrait<meta::RemoveMemberPointerCVRefT<decltype(std::get<I>(
FieldInfo::Ptrs))>>::Type())...};
}
template <typename FieldInfo, typename V, size_t... I>
static void WriteImpl(V &&visitor, const T &value, RowWriter &writer,
std::index_sequence<I...>) {
(RowEncodeTrait<meta::RemoveMemberPointerCVRefT<decltype(std::get<I>(
FieldInfo::Ptrs))>>::Write(std::forward<V>(visitor),
value.*std::get<I>(FieldInfo::Ptrs), writer,
I),
...);
}
public:
static auto FieldVector() {
using FieldInfo = decltype(FuryFieldInfo(std::declval<T>()));
return FieldVectorImpl<FieldInfo>(
std::make_index_sequence<FieldInfo::Size>());
}
static auto Type() { return arrow::struct_(FieldVector()); }
static auto Schema() { return arrow::schema(FieldVector()); }
template <typename V>
static auto Write(V &&visitor, const T &value, RowWriter &writer) {
using FieldInfo = decltype(FuryFieldInfo(std::declval<T>()));
return WriteImpl<FieldInfo>(std::forward<V>(visitor), value, writer,
std::make_index_sequence<FieldInfo::Size>());
}
template <typename V, typename W,
std::enable_if_t<meta::IsOneOf<W, RowWriter, ArrayWriter>::value,
int> = 0>
static void Write(V &&visitor, const T &value, W &writer, int index) {
auto offset = writer.cursor();
auto inner_writer = std::make_unique<RowWriter>(
arrow::schema(details::GetChildType(writer, index)->fields()), &writer);
inner_writer->Reset();
RowEncodeTrait<T>::Write(std::forward<V>(visitor), value,
*inner_writer.get());
writer.SetOffsetAndSize(index, offset, writer.cursor() - offset);
std::forward<V>(visitor).template Visit<std::remove_cv_t<T>>(
std::move(inner_writer));
}
};
template <typename T>
struct RowEncodeTrait<T,
std::enable_if_t<details::IsArray<std::remove_cv_t<T>>>> {
static auto Type() {
return arrow::list(RowEncodeTrait<meta::GetValueType<T>>::Type());
}
template <typename V>
static void Write(V &&visitor, const T &value, ArrayWriter &writer) {
int index = 0;
for (const auto &v : value) {
RowEncodeTrait<meta::GetValueType<T>>::Write(std::forward<V>(visitor), v,
writer, index);
++index;
}
}
template <typename V, typename W,
std::enable_if_t<meta::IsOneOf<W, RowWriter, ArrayWriter>::value,
int> = 0>
static void Write(V &&visitor, const T &value, W &writer, int index) {
auto offset = writer.cursor();
auto inner_writer = std::make_unique<ArrayWriter>(
std::dynamic_pointer_cast<arrow::ListType>(
details::GetChildType(writer, index)),
&writer);
inner_writer->Reset(value.size());
RowEncodeTrait<T>::Write(std::forward<V>(visitor), value,
*inner_writer.get());
writer.SetOffsetAndSize(index, offset, writer.cursor() - offset);
std::forward<V>(visitor).template Visit<std::remove_cv_t<T>>(
std::move(inner_writer));
}
};
template <typename T>
struct RowEncodeTrait<T,
std::enable_if_t<details::IsMap<std::remove_cv_t<T>>>> {
static auto Type() {
return arrow::map(
RowEncodeTrait<typename T::value_type::first_type>::Type(),
RowEncodeTrait<typename T::value_type::second_type>::Type());
}
template <typename V>
static void WriteKey(V &&visitor, const T &value, ArrayWriter &writer) {
int index = 0;
for (const auto &v : value) {
RowEncodeTrait<typename T::value_type::first_type>::Write(
std::forward<V>(visitor), v.first, writer, index);
++index;
}
}
template <typename V>
static void WriteValue(V &&visitor, const T &value, ArrayWriter &writer) {
int index = 0;
for (const auto &v : value) {
RowEncodeTrait<typename T::value_type::second_type>::Write(
std::forward<V>(visitor), v.second, writer, index);
++index;
}
}
template <typename V, typename W,
std::enable_if_t<meta::IsOneOf<W, RowWriter, ArrayWriter>::value,
int> = 0>
static void Write(V &&visitor, const T &value, W &writer, int index) {
auto offset = writer.cursor();
writer.WriteDirectly(-1);
auto map_type = std::dynamic_pointer_cast<arrow::MapType>(
details::GetChildType(writer, index));
auto key_writer =
std::make_unique<ArrayWriter>(std::static_pointer_cast<arrow::ListType>(
arrow::list(map_type->key_type())),
&writer);
key_writer->Reset(value.size());
RowEncodeTrait<T>::WriteKey(std::forward<V>(visitor), value,
*key_writer.get());
writer.WriteDirectly(offset, key_writer->size());
auto value_writer =
std::make_unique<ArrayWriter>(std::static_pointer_cast<arrow::ListType>(
arrow::list(map_type->item_type())),
&writer);
value_writer->Reset(value.size());
RowEncodeTrait<T>::WriteValue(std::forward<V>(visitor), value,
*value_writer.get());
writer.SetOffsetAndSize(index, offset, writer.cursor() - offset);
std::forward<V>(visitor).template Visit<std::remove_cv_t<T>>(
std::move(key_writer));
std::forward<V>(visitor).template Visit<std::remove_cv_t<T>>(
std::move(value_writer));
}
};
} // namespace encoder
} // namespace fury