c_glib/arrow-dataset-glib/partitioning.cpp (535 lines of code) (raw):
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <arrow-glib/array.hpp>
#include <arrow-glib/error.hpp>
#include <arrow-glib/schema.hpp>
#include <arrow-dataset-glib/enums.h>
#include <arrow-dataset-glib/partitioning.hpp>
G_BEGIN_DECLS
/**
* SECTION: partitioning
* @section_id: partitioning
* @title: Partitioning classes
* @include: arrow-dataset-glib/arrow-dataset-glib.h
*
* #GADatasetPartitioningFactoryOptions is a class for partitioning
* factory options.
*
* #GADatasetPartitioning is a base class for partitioning classes
* such as #GADatasetDirectoryPartitioning.
*
* #GADatasetKeyValuePartitioningOptions is a class for key-value
* partitioning options.
*
* #GADatasetKeyValuePartitioning is a base class for key-value style
* partitioning classes such as #GADatasetDirectoryPartitioning.
*
* #GADatasetDirectoryPartitioning is a class for partitioning that
* uses directory structure.
*
* #GADatasetHivePartitioningOptions is a class for Hive-style
* partitioning options.
*
* #GADatasetHivePartitioning is a class for partitioning that
* uses Hive-style partitioning.
*
* Since: 6.0.0
*/
struct GADatasetPartitioningFactoryOptionsPrivate
{
gboolean infer_dictionary;
GArrowSchema *schema;
GADatasetSegmentEncoding segment_encoding;
};
enum {
PROP_FACTORY_OPTIONS_INFER_DICTIONARY = 1,
PROP_FACTORY_OPTIONS_SCHEMA,
PROP_FACTORY_OPTIONS_SEGMENT_ENCODING,
};
G_DEFINE_TYPE_WITH_PRIVATE(GADatasetPartitioningFactoryOptions,
gadataset_partitioning_factory_options,
G_TYPE_OBJECT)
#define GADATASET_PARTITIONING_FACTORY_OPTIONS_GET_PRIVATE(obj) \
static_cast<GADatasetPartitioningFactoryOptionsPrivate *>( \
gadataset_partitioning_factory_options_get_instance_private( \
GADATASET_PARTITIONING_FACTORY_OPTIONS(obj)))
static void
gadataset_partitioning_factory_options_dispose(GObject *object)
{
auto priv = GADATASET_PARTITIONING_FACTORY_OPTIONS_GET_PRIVATE(object);
if (priv->schema) {
g_object_unref(priv->schema);
priv->schema = nullptr;
}
G_OBJECT_CLASS(gadataset_partitioning_factory_options_parent_class)->dispose(object);
}
static void
gadataset_partitioning_factory_options_set_property(GObject *object,
guint prop_id,
const GValue *value,
GParamSpec *pspec)
{
auto priv = GADATASET_PARTITIONING_FACTORY_OPTIONS_GET_PRIVATE(object);
switch (prop_id) {
case PROP_FACTORY_OPTIONS_INFER_DICTIONARY:
priv->infer_dictionary = g_value_get_boolean(value);
break;
case PROP_FACTORY_OPTIONS_SCHEMA:
{
auto schema = g_value_get_object(value);
if (priv->schema == schema) {
break;
}
auto old_schema = priv->schema;
if (schema) {
g_object_ref(schema);
priv->schema = GARROW_SCHEMA(schema);
} else {
priv->schema = nullptr;
}
if (old_schema) {
g_object_unref(old_schema);
}
}
break;
case PROP_FACTORY_OPTIONS_SEGMENT_ENCODING:
priv->segment_encoding =
static_cast<GADatasetSegmentEncoding>(g_value_get_enum(value));
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
}
}
static void
gadataset_partitioning_factory_options_get_property(GObject *object,
guint prop_id,
GValue *value,
GParamSpec *pspec)
{
auto priv = GADATASET_PARTITIONING_FACTORY_OPTIONS_GET_PRIVATE(object);
switch (prop_id) {
case PROP_FACTORY_OPTIONS_INFER_DICTIONARY:
g_value_set_boolean(value, priv->infer_dictionary);
break;
case PROP_FACTORY_OPTIONS_SCHEMA:
g_value_set_object(value, priv->schema);
break;
case PROP_FACTORY_OPTIONS_SEGMENT_ENCODING:
g_value_set_enum(value, priv->segment_encoding);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
}
}
static void
gadataset_partitioning_factory_options_init(GADatasetPartitioningFactoryOptions *object)
{
}
static void
gadataset_partitioning_factory_options_class_init(
GADatasetPartitioningFactoryOptionsClass *klass)
{
auto gobject_class = G_OBJECT_CLASS(klass);
gobject_class->dispose = gadataset_partitioning_factory_options_dispose;
gobject_class->set_property = gadataset_partitioning_factory_options_set_property;
gobject_class->get_property = gadataset_partitioning_factory_options_get_property;
arrow::dataset::PartitioningFactoryOptions default_options;
GParamSpec *spec;
/**
* GADatasetPartitioningFactoryOptions:infer-dictionary:
*
* When inferring a schema for partition fields, yield dictionary
* encoded types instead of plain. This can be more efficient when
* materializing virtual columns, and Expressions parsed by the
* finished Partitioning will include dictionaries of all unique
* inspected values for each field.
*
* Since: 11.0.0
*/
spec = g_param_spec_boolean("infer-dictionary",
"Infer dictionary",
"Whether encode partitioned field values as "
"dictionary",
default_options.infer_dictionary,
static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class,
PROP_FACTORY_OPTIONS_INFER_DICTIONARY,
spec);
/**
* GADatasetPartitioningFactoryOptions:schema:
*
* Optionally, an expected schema can be provided, in which case
* inference will only check discovered fields against the schema
* and update internal state (such as dictionaries).
*
* Since: 11.0.0
*/
spec = g_param_spec_object("schema",
"Schema",
"Inference will only check discovered fields "
"against the schema and update internal state",
GARROW_TYPE_SCHEMA,
static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class, PROP_FACTORY_OPTIONS_SCHEMA, spec);
/**
* GADatasetPartitioningFactoryOptions:segment-encoding:
*
* After splitting a path into components, decode the path
* components before parsing according to this scheme.
*
* Since: 11.0.0
*/
spec = g_param_spec_enum(
"segment-encoding",
"Segment encoding",
"After splitting a path into components, "
"decode the path components before "
"parsing according to this scheme",
GADATASET_TYPE_SEGMENT_ENCODING,
static_cast<GADatasetSegmentEncoding>(default_options.segment_encoding),
static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class,
PROP_FACTORY_OPTIONS_SEGMENT_ENCODING,
spec);
}
/**
* gadataset_partitioning_factory_options_new:
*
* Returns: The newly created #GADatasetPartitioningFactoryOptions.
*
* Since: 11.0.0
*/
GADatasetPartitioningFactoryOptions *
gadataset_partitioning_factory_options_new(void)
{
return GADATASET_PARTITIONING_FACTORY_OPTIONS(
g_object_new(GADATASET_TYPE_PARTITIONING_FACTORY_OPTIONS, nullptr));
}
struct GADatasetPartitioningPrivate
{
std::shared_ptr<arrow::dataset::Partitioning> partitioning;
};
enum {
PROP_PARTITIONING = 1,
};
G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetPartitioning,
gadataset_partitioning,
G_TYPE_OBJECT)
#define GADATASET_PARTITIONING_GET_PRIVATE(obj) \
static_cast<GADatasetPartitioningPrivate *>( \
gadataset_partitioning_get_instance_private(GADATASET_PARTITIONING(obj)))
static void
gadataset_partitioning_finalize(GObject *object)
{
auto priv = GADATASET_PARTITIONING_GET_PRIVATE(object);
priv->partitioning.~shared_ptr();
G_OBJECT_CLASS(gadataset_partitioning_parent_class)->finalize(object);
}
static void
gadataset_partitioning_set_property(GObject *object,
guint prop_id,
const GValue *value,
GParamSpec *pspec)
{
auto priv = GADATASET_PARTITIONING_GET_PRIVATE(object);
switch (prop_id) {
case PROP_PARTITIONING:
priv->partitioning = *static_cast<std::shared_ptr<arrow::dataset::Partitioning> *>(
g_value_get_pointer(value));
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
}
}
static void
gadataset_partitioning_init(GADatasetPartitioning *object)
{
auto priv = GADATASET_PARTITIONING_GET_PRIVATE(object);
new (&priv->partitioning) std::shared_ptr<arrow::dataset::Partitioning>;
}
static void
gadataset_partitioning_class_init(GADatasetPartitioningClass *klass)
{
auto gobject_class = G_OBJECT_CLASS(klass);
gobject_class->finalize = gadataset_partitioning_finalize;
gobject_class->set_property = gadataset_partitioning_set_property;
GParamSpec *spec;
spec = g_param_spec_pointer(
"partitioning",
"Partitioning",
"The raw "
"std::shared<arrow::dataset::Partitioning> *",
static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
g_object_class_install_property(gobject_class, PROP_PARTITIONING, spec);
}
/**
* gadataset_partitioning_get_type_name:
* @partitioning: A #GADatasetPartitioning.
*
* Returns: The type name of @partitioning.
*
* It should be freed with g_free() when no longer needed.
*
* Since: 6.0.0
*/
gchar *
gadataset_partitioning_get_type_name(GADatasetPartitioning *partitioning)
{
auto arrow_partitioning = gadataset_partitioning_get_raw(partitioning);
auto arrow_type_name = arrow_partitioning->type_name();
return g_strndup(arrow_type_name.c_str(), arrow_type_name.size());
}
/**
* gadataset_partitioning_create_default:
*
* Returns: (transfer full): The newly created #GADatasetPartitioning
* that doesn't partition.
*
* Since: 12.0.0
*/
GADatasetPartitioning *
gadataset_partitioning_create_default(void)
{
auto arrow_partitioning = arrow::dataset::Partitioning::Default();
return gadataset_partitioning_new_raw(&arrow_partitioning);
}
struct GADatasetKeyValuePartitioningOptionsPrivate
{
GADatasetSegmentEncoding segment_encoding;
};
enum {
PROP_OPTIONS_SEGMENT_ENCODING = 1,
};
G_DEFINE_TYPE_WITH_PRIVATE(GADatasetKeyValuePartitioningOptions,
gadataset_key_value_partitioning_options,
G_TYPE_OBJECT)
#define GADATASET_KEY_VALUE_PARTITIONING_OPTIONS_GET_PRIVATE(obj) \
static_cast<GADatasetKeyValuePartitioningOptionsPrivate *>( \
gadataset_key_value_partitioning_options_get_instance_private( \
GADATASET_KEY_VALUE_PARTITIONING_OPTIONS(obj)))
static void
gadataset_key_value_partitioning_options_set_property(GObject *object,
guint prop_id,
const GValue *value,
GParamSpec *pspec)
{
auto priv = GADATASET_KEY_VALUE_PARTITIONING_OPTIONS_GET_PRIVATE(object);
switch (prop_id) {
case PROP_OPTIONS_SEGMENT_ENCODING:
priv->segment_encoding =
static_cast<GADatasetSegmentEncoding>(g_value_get_enum(value));
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
}
}
static void
gadataset_key_value_partitioning_options_get_property(GObject *object,
guint prop_id,
GValue *value,
GParamSpec *pspec)
{
auto priv = GADATASET_KEY_VALUE_PARTITIONING_OPTIONS_GET_PRIVATE(object);
switch (prop_id) {
case PROP_OPTIONS_SEGMENT_ENCODING:
g_value_set_enum(value, priv->segment_encoding);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
}
}
static void
gadataset_key_value_partitioning_options_init(
GADatasetKeyValuePartitioningOptions *object)
{
}
static void
gadataset_key_value_partitioning_options_class_init(
GADatasetKeyValuePartitioningOptionsClass *klass)
{
auto gobject_class = G_OBJECT_CLASS(klass);
gobject_class->set_property = gadataset_key_value_partitioning_options_set_property;
gobject_class->get_property = gadataset_key_value_partitioning_options_get_property;
arrow::dataset::KeyValuePartitioningOptions default_options;
GParamSpec *spec;
/**
* GADatasetKeyValuePartitioningOptions:segment-encoding:
*
* After splitting a path into components, decode the path
* components before parsing according to this scheme.
*
* Since: 11.0.0
*/
spec = g_param_spec_enum(
"segment-encoding",
"Segment encoding",
"After splitting a path into components, "
"decode the path components before "
"parsing according to this scheme",
GADATASET_TYPE_SEGMENT_ENCODING,
static_cast<GADatasetSegmentEncoding>(default_options.segment_encoding),
static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class, PROP_OPTIONS_SEGMENT_ENCODING, spec);
}
/**
* gadataset_key_value_partitioning_options_new:
*
* Returns: The newly created #GADatasetKeyValuePartitioningOptions.
*
* Since: 11.0.0
*/
GADatasetKeyValuePartitioningOptions *
gadataset_key_value_partitioning_options_new(void)
{
return GADATASET_KEY_VALUE_PARTITIONING_OPTIONS(
g_object_new(GADATASET_TYPE_KEY_VALUE_PARTITIONING_OPTIONS, nullptr));
}
G_DEFINE_ABSTRACT_TYPE(GADatasetKeyValuePartitioning,
gadataset_key_value_partitioning,
GADATASET_TYPE_PARTITIONING)
static void
gadataset_key_value_partitioning_init(GADatasetKeyValuePartitioning *object)
{
}
static void
gadataset_key_value_partitioning_class_init(GADatasetKeyValuePartitioningClass *klass)
{
}
G_END_DECLS
template <typename Partitioning, typename PartitioningOptions>
GADatasetPartitioning *
garrow_key_value_partitioning_new(GArrowSchema *schema,
GList *dictionaries,
PartitioningOptions &arrow_options,
GError **error)
{
auto arrow_schema = garrow_schema_get_raw(schema);
std::vector<std::shared_ptr<arrow::Array>> arrow_dictionaries;
for (auto node = dictionaries; node; node = node->next) {
auto dictionary = GARROW_ARRAY(node->data);
if (dictionary) {
arrow_dictionaries.push_back(garrow_array_get_raw(dictionary));
} else {
arrow_dictionaries.push_back(nullptr);
}
}
auto arrow_partitioning = std::static_pointer_cast<arrow::dataset::Partitioning>(
std::make_shared<Partitioning>(arrow_schema, arrow_dictionaries, arrow_options));
return gadataset_partitioning_new_raw(&arrow_partitioning);
}
G_BEGIN_DECLS
G_DEFINE_TYPE(GADatasetDirectoryPartitioning,
gadataset_directory_partitioning,
GADATASET_TYPE_KEY_VALUE_PARTITIONING)
static void
gadataset_directory_partitioning_init(GADatasetDirectoryPartitioning *object)
{
}
static void
gadataset_directory_partitioning_class_init(GADatasetDirectoryPartitioningClass *klass)
{
}
/**
* gadataset_directory_partitioning_new:
* @schema: A #GArrowSchema that describes all partitioned segments.
* @dictionaries: (nullable) (element-type GArrowArray): A list of #GArrowArray
* for dictionary data types in @schema.
* @options: (nullable): A #GADatasetKeyValuePartitioningOptions.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: The newly created #GADatasetDirectoryPartitioning on success,
* %NULL on error.
*
* Since: 6.0.0
*/
GADatasetDirectoryPartitioning *
gadataset_directory_partitioning_new(GArrowSchema *schema,
GList *dictionaries,
GADatasetKeyValuePartitioningOptions *options,
GError **error)
{
arrow::dataset::KeyValuePartitioningOptions arrow_options;
if (options) {
arrow_options = gadataset_key_value_partitioning_options_get_raw(options);
}
return GADATASET_DIRECTORY_PARTITIONING(
garrow_key_value_partitioning_new<arrow::dataset::DirectoryPartitioning>(
schema,
dictionaries,
arrow_options,
error));
}
struct GADatasetHivePartitioningOptionsPrivate
{
gchar *null_fallback;
};
enum {
PROP_OPTIONS_NULL_FALLBACK = 1,
};
G_DEFINE_TYPE_WITH_PRIVATE(GADatasetHivePartitioningOptions,
gadataset_hive_partitioning_options,
GADATASET_TYPE_KEY_VALUE_PARTITIONING_OPTIONS)
#define GADATASET_HIVE_PARTITIONING_OPTIONS_GET_PRIVATE(obj) \
static_cast<GADatasetHivePartitioningOptionsPrivate *>( \
gadataset_hive_partitioning_options_get_instance_private( \
GADATASET_HIVE_PARTITIONING_OPTIONS(obj)))
static void
gadataset_hive_partitioning_options_finalize(GObject *object)
{
auto priv = GADATASET_HIVE_PARTITIONING_OPTIONS_GET_PRIVATE(object);
if (priv->null_fallback) {
g_free(priv->null_fallback);
priv->null_fallback = nullptr;
}
G_OBJECT_CLASS(gadataset_hive_partitioning_options_parent_class)->finalize(object);
}
static void
gadataset_hive_partitioning_options_set_property(GObject *object,
guint prop_id,
const GValue *value,
GParamSpec *pspec)
{
auto priv = GADATASET_HIVE_PARTITIONING_OPTIONS_GET_PRIVATE(object);
switch (prop_id) {
case PROP_OPTIONS_NULL_FALLBACK:
if (priv->null_fallback == g_value_get_string(value)) {
break;
}
if (priv->null_fallback) {
g_free(priv->null_fallback);
}
priv->null_fallback = g_value_dup_string(value);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
}
}
static void
gadataset_hive_partitioning_options_get_property(GObject *object,
guint prop_id,
GValue *value,
GParamSpec *pspec)
{
auto priv = GADATASET_HIVE_PARTITIONING_OPTIONS_GET_PRIVATE(object);
switch (prop_id) {
case PROP_OPTIONS_NULL_FALLBACK:
g_value_set_string(value, priv->null_fallback);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
}
}
static void
gadataset_hive_partitioning_options_init(GADatasetHivePartitioningOptions *object)
{
}
static void
gadataset_hive_partitioning_options_class_init(
GADatasetHivePartitioningOptionsClass *klass)
{
auto gobject_class = G_OBJECT_CLASS(klass);
gobject_class->finalize = gadataset_hive_partitioning_options_finalize;
gobject_class->set_property = gadataset_hive_partitioning_options_set_property;
gobject_class->get_property = gadataset_hive_partitioning_options_get_property;
arrow::dataset::HivePartitioningOptions default_options;
GParamSpec *spec;
/**
* GADatasetHivePartitioningOptions:null-fallback:
*
* The fallback string for null. This is used only by
* #GADatasetHivePartitioning.
*
* Since: 11.0.0
*/
spec = g_param_spec_string("null-fallback",
"Null fallback",
"The fallback string for null",
default_options.null_fallback.c_str(),
static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class, PROP_OPTIONS_NULL_FALLBACK, spec);
}
/**
* gadataset_hive_partitioning_options_new:
*
* Returns: The newly created #GADatasetHivePartitioningOptions.
*
* Since: 11.0.0
*/
GADatasetHivePartitioningOptions *
gadataset_hive_partitioning_options_new(void)
{
return GADATASET_HIVE_PARTITIONING_OPTIONS(
g_object_new(GADATASET_TYPE_HIVE_PARTITIONING_OPTIONS, nullptr));
}
G_DEFINE_TYPE(GADatasetHivePartitioning,
gadataset_hive_partitioning,
GADATASET_TYPE_KEY_VALUE_PARTITIONING)
static void
gadataset_hive_partitioning_init(GADatasetHivePartitioning *object)
{
}
static void
gadataset_hive_partitioning_class_init(GADatasetHivePartitioningClass *klass)
{
}
/**
* gadataset_hive_partitioning_new:
* @schema: A #GArrowSchema that describes all partitioned segments.
* @dictionaries: (nullable) (element-type GArrowArray): A list of #GArrowArray
* for dictionary data types in @schema.
* @options: (nullable): A #GADatasetHivePartitioningOptions.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: The newly created #GADatasetHivePartitioning on success,
* %NULL on error.
*
* Since: 11.0.0
*/
GADatasetHivePartitioning *
gadataset_hive_partitioning_new(GArrowSchema *schema,
GList *dictionaries,
GADatasetHivePartitioningOptions *options,
GError **error)
{
arrow::dataset::HivePartitioningOptions arrow_options;
if (options) {
arrow_options = gadataset_hive_partitioning_options_get_raw(options);
}
return GADATASET_HIVE_PARTITIONING(
garrow_key_value_partitioning_new<arrow::dataset::HivePartitioning>(schema,
dictionaries,
arrow_options,
error));
}
/**
* gadataset_hive_partitioning_get_null_fallback:
*
* Returns: The fallback string for null.
*
* It should be freed with g_free() when no longer needed.
*
* Since: 11.0.0
*/
gchar *
gadataset_hive_partitioning_get_null_fallback(GADatasetHivePartitioning *partitioning)
{
const auto arrow_partitioning =
std::static_pointer_cast<arrow::dataset::HivePartitioning>(
gadataset_partitioning_get_raw(GADATASET_PARTITIONING(partitioning)));
const auto null_fallback = arrow_partitioning->null_fallback();
return g_strdup(null_fallback.c_str());
}
G_END_DECLS
arrow::dataset::PartitioningFactoryOptions
gadataset_partitioning_factory_options_get_raw(
GADatasetPartitioningFactoryOptions *options)
{
auto priv = GADATASET_PARTITIONING_FACTORY_OPTIONS_GET_PRIVATE(options);
arrow::dataset::PartitioningFactoryOptions arrow_options;
arrow_options.infer_dictionary = priv->infer_dictionary;
if (priv->schema) {
arrow_options.schema = garrow_schema_get_raw(priv->schema);
}
arrow_options.segment_encoding =
static_cast<arrow::dataset::SegmentEncoding>(priv->segment_encoding);
return arrow_options;
}
arrow::dataset::KeyValuePartitioningOptions
gadataset_key_value_partitioning_options_get_raw(
GADatasetKeyValuePartitioningOptions *options)
{
auto priv = GADATASET_KEY_VALUE_PARTITIONING_OPTIONS_GET_PRIVATE(options);
arrow::dataset::KeyValuePartitioningOptions arrow_options;
arrow_options.segment_encoding =
static_cast<arrow::dataset::SegmentEncoding>(priv->segment_encoding);
return arrow_options;
}
arrow::dataset::HivePartitioningOptions
gadataset_hive_partitioning_options_get_raw(GADatasetHivePartitioningOptions *options)
{
auto priv = GADATASET_HIVE_PARTITIONING_OPTIONS_GET_PRIVATE(options);
auto arrow_key_value_options = gadataset_key_value_partitioning_options_get_raw(
GADATASET_KEY_VALUE_PARTITIONING_OPTIONS(options));
arrow::dataset::HivePartitioningOptions arrow_options;
arrow_options.segment_encoding = arrow_key_value_options.segment_encoding;
arrow_options.null_fallback = priv->null_fallback;
return arrow_options;
}
GADatasetPartitioning *
gadataset_partitioning_new_raw(
std::shared_ptr<arrow::dataset::Partitioning> *arrow_partitioning)
{
GType type = GADATASET_TYPE_PARTITIONING;
const auto arrow_type_name = (*arrow_partitioning)->type_name();
if (arrow_type_name == "directory") {
type = GADATASET_TYPE_DIRECTORY_PARTITIONING;
} else if (arrow_type_name == "hive") {
type = GADATASET_TYPE_HIVE_PARTITIONING;
}
return GADATASET_PARTITIONING(
g_object_new(type, "partitioning", arrow_partitioning, nullptr));
}
std::shared_ptr<arrow::dataset::Partitioning>
gadataset_partitioning_get_raw(GADatasetPartitioning *partitioning)
{
auto priv = GADATASET_PARTITIONING_GET_PRIVATE(partitioning);
return priv->partitioning;
}