in c/driver/postgresql/connection.cc [673:932]
AdbcStatusCode PostgresConnectionGetStatisticsImpl(PGconn* conn, const char* db_schema,
const char* table_name,
struct ArrowSchema* schema,
struct ArrowArray* array,
struct AdbcError* error) {
// Set up schema
auto uschema = nanoarrow::UniqueSchema();
{
ArrowSchemaInit(uschema.get());
CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(uschema.get(), /*num_columns=*/2), error);
CHECK_NA(INTERNAL, ArrowSchemaSetType(uschema->children[0], NANOARROW_TYPE_STRING),
error);
CHECK_NA(INTERNAL, ArrowSchemaSetName(uschema->children[0], "catalog_name"), error);
CHECK_NA(INTERNAL, ArrowSchemaSetType(uschema->children[1], NANOARROW_TYPE_LIST),
error);
CHECK_NA(INTERNAL, ArrowSchemaSetName(uschema->children[1], "catalog_db_schemas"),
error);
CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(uschema->children[1]->children[0], 2),
error);
uschema->children[1]->flags &= ~ARROW_FLAG_NULLABLE;
struct ArrowSchema* db_schema_schema = uschema->children[1]->children[0];
CHECK_NA(INTERNAL,
ArrowSchemaSetType(db_schema_schema->children[0], NANOARROW_TYPE_STRING),
error);
CHECK_NA(INTERNAL,
ArrowSchemaSetName(db_schema_schema->children[0], "db_schema_name"), error);
CHECK_NA(INTERNAL,
ArrowSchemaSetType(db_schema_schema->children[1], NANOARROW_TYPE_LIST),
error);
CHECK_NA(INTERNAL,
ArrowSchemaSetName(db_schema_schema->children[1], "db_schema_statistics"),
error);
CHECK_NA(INTERNAL,
ArrowSchemaSetTypeStruct(db_schema_schema->children[1]->children[0], 5),
error);
db_schema_schema->children[1]->flags &= ~ARROW_FLAG_NULLABLE;
struct ArrowSchema* statistics_schema = db_schema_schema->children[1]->children[0];
CHECK_NA(INTERNAL,
ArrowSchemaSetType(statistics_schema->children[0], NANOARROW_TYPE_STRING),
error);
CHECK_NA(INTERNAL, ArrowSchemaSetName(statistics_schema->children[0], "table_name"),
error);
statistics_schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE;
CHECK_NA(INTERNAL,
ArrowSchemaSetType(statistics_schema->children[1], NANOARROW_TYPE_STRING),
error);
CHECK_NA(INTERNAL, ArrowSchemaSetName(statistics_schema->children[1], "column_name"),
error);
CHECK_NA(INTERNAL,
ArrowSchemaSetType(statistics_schema->children[2], NANOARROW_TYPE_INT16),
error);
CHECK_NA(INTERNAL,
ArrowSchemaSetName(statistics_schema->children[2], "statistic_key"), error);
statistics_schema->children[2]->flags &= ~ARROW_FLAG_NULLABLE;
CHECK_NA(INTERNAL,
ArrowSchemaSetTypeUnion(statistics_schema->children[3],
NANOARROW_TYPE_DENSE_UNION, 4),
error);
CHECK_NA(INTERNAL,
ArrowSchemaSetName(statistics_schema->children[3], "statistic_value"),
error);
statistics_schema->children[3]->flags &= ~ARROW_FLAG_NULLABLE;
CHECK_NA(INTERNAL,
ArrowSchemaSetType(statistics_schema->children[4], NANOARROW_TYPE_BOOL),
error);
CHECK_NA(
INTERNAL,
ArrowSchemaSetName(statistics_schema->children[4], "statistic_is_approximate"),
error);
statistics_schema->children[4]->flags &= ~ARROW_FLAG_NULLABLE;
struct ArrowSchema* value_schema = statistics_schema->children[3];
CHECK_NA(INTERNAL,
ArrowSchemaSetType(value_schema->children[0], NANOARROW_TYPE_INT64), error);
CHECK_NA(INTERNAL, ArrowSchemaSetName(value_schema->children[0], "int64"), error);
CHECK_NA(INTERNAL,
ArrowSchemaSetType(value_schema->children[1], NANOARROW_TYPE_UINT64), error);
CHECK_NA(INTERNAL, ArrowSchemaSetName(value_schema->children[1], "uint64"), error);
CHECK_NA(INTERNAL,
ArrowSchemaSetType(value_schema->children[2], NANOARROW_TYPE_DOUBLE), error);
CHECK_NA(INTERNAL, ArrowSchemaSetName(value_schema->children[2], "float64"), error);
CHECK_NA(INTERNAL,
ArrowSchemaSetType(value_schema->children[3], NANOARROW_TYPE_BINARY), error);
CHECK_NA(INTERNAL, ArrowSchemaSetName(value_schema->children[3], "binary"), error);
}
// Set up builders
struct ArrowError na_error = {0};
CHECK_NA_DETAIL(INTERNAL, ArrowArrayInitFromSchema(array, uschema.get(), &na_error),
&na_error, error);
CHECK_NA(INTERNAL, ArrowArrayStartAppending(array), error);
struct ArrowArray* catalog_name_col = array->children[0];
struct ArrowArray* catalog_db_schemas_col = array->children[1];
struct ArrowArray* catalog_db_schemas_items = catalog_db_schemas_col->children[0];
struct ArrowArray* db_schema_name_col = catalog_db_schemas_items->children[0];
struct ArrowArray* db_schema_statistics_col = catalog_db_schemas_items->children[1];
struct ArrowArray* db_schema_statistics_items = db_schema_statistics_col->children[0];
struct ArrowArray* statistics_table_name_col = db_schema_statistics_items->children[0];
struct ArrowArray* statistics_column_name_col = db_schema_statistics_items->children[1];
struct ArrowArray* statistics_key_col = db_schema_statistics_items->children[2];
struct ArrowArray* statistics_value_col = db_schema_statistics_items->children[3];
struct ArrowArray* statistics_is_approximate_col =
db_schema_statistics_items->children[4];
// struct ArrowArray* value_int64_col = statistics_value_col->children[0];
// struct ArrowArray* value_uint64_col = statistics_value_col->children[1];
struct ArrowArray* value_float64_col = statistics_value_col->children[2];
// struct ArrowArray* value_binary_col = statistics_value_col->children[3];
// Query (could probably be massively improved)
std::string query = R"(
WITH
class AS (
SELECT nspname, relname, reltuples
FROM pg_namespace
INNER JOIN pg_class ON pg_class.relnamespace = pg_namespace.oid
)
SELECT tablename, attname, null_frac, avg_width, n_distinct, reltuples
FROM pg_stats
INNER JOIN class ON pg_stats.schemaname = class.nspname AND pg_stats.tablename = class.relname
WHERE pg_stats.schemaname = $1 AND tablename LIKE $2
ORDER BY tablename
)";
CHECK_NA(INTERNAL, ArrowArrayAppendString(catalog_name_col, ArrowCharView(PQdb(conn))),
error);
CHECK_NA(INTERNAL, ArrowArrayAppendString(db_schema_name_col, ArrowCharView(db_schema)),
error);
constexpr int8_t kStatsVariantFloat64 = 2;
std::string prev_table;
{
PqResultHelper result_helper{conn, query};
RAISE_STATUS(error,
result_helper.Execute({db_schema, table_name ? table_name : "%"}));
for (PqResultRow row : result_helper) {
auto reltuples = row[5].ParseDouble();
if (!reltuples) {
SetError(error, "[libpq] Invalid double value in reltuples: '%s'", row[5].data);
return ADBC_STATUS_INTERNAL;
}
if (std::strcmp(prev_table.c_str(), row[0].data) != 0) {
CHECK_NA(INTERNAL,
ArrowArrayAppendString(statistics_table_name_col,
ArrowStringView{row[0].data, row[0].len}),
error);
CHECK_NA(INTERNAL, ArrowArrayAppendNull(statistics_column_name_col, 1), error);
CHECK_NA(INTERNAL,
ArrowArrayAppendInt(statistics_key_col, ADBC_STATISTIC_ROW_COUNT_KEY),
error);
CHECK_NA(INTERNAL, ArrowArrayAppendDouble(value_float64_col, *reltuples), error);
CHECK_NA(INTERNAL,
ArrowArrayFinishUnionElement(statistics_value_col, kStatsVariantFloat64),
error);
CHECK_NA(INTERNAL, ArrowArrayAppendInt(statistics_is_approximate_col, 1), error);
CHECK_NA(INTERNAL, ArrowArrayFinishElement(db_schema_statistics_items), error);
prev_table = std::string(row[0].data, row[0].len);
}
auto null_frac = row[2].ParseDouble();
if (!null_frac) {
SetError(error, "[libpq] Invalid double value in null_frac: '%s'", row[2].data);
return ADBC_STATUS_INTERNAL;
}
CHECK_NA(INTERNAL,
ArrowArrayAppendString(statistics_table_name_col,
ArrowStringView{row[0].data, row[0].len}),
error);
CHECK_NA(INTERNAL,
ArrowArrayAppendString(statistics_column_name_col,
ArrowStringView{row[1].data, row[1].len}),
error);
CHECK_NA(INTERNAL,
ArrowArrayAppendInt(statistics_key_col, ADBC_STATISTIC_NULL_COUNT_KEY),
error);
CHECK_NA(INTERNAL,
ArrowArrayAppendDouble(value_float64_col, *null_frac * *reltuples), error);
CHECK_NA(INTERNAL,
ArrowArrayFinishUnionElement(statistics_value_col, kStatsVariantFloat64),
error);
CHECK_NA(INTERNAL, ArrowArrayAppendInt(statistics_is_approximate_col, 1), error);
CHECK_NA(INTERNAL, ArrowArrayFinishElement(db_schema_statistics_items), error);
auto average_byte_width = row[3].ParseDouble();
if (!average_byte_width) {
SetError(error, "[libpq] Invalid double value in avg_width: '%s'", row[3].data);
return ADBC_STATUS_INTERNAL;
}
CHECK_NA(INTERNAL,
ArrowArrayAppendString(statistics_table_name_col,
ArrowStringView{row[0].data, row[0].len}),
error);
CHECK_NA(INTERNAL,
ArrowArrayAppendString(statistics_column_name_col,
ArrowStringView{row[1].data, row[1].len}),
error);
CHECK_NA(
INTERNAL,
ArrowArrayAppendInt(statistics_key_col, ADBC_STATISTIC_AVERAGE_BYTE_WIDTH_KEY),
error);
CHECK_NA(INTERNAL, ArrowArrayAppendDouble(value_float64_col, *average_byte_width),
error);
CHECK_NA(INTERNAL,
ArrowArrayFinishUnionElement(statistics_value_col, kStatsVariantFloat64),
error);
CHECK_NA(INTERNAL, ArrowArrayAppendInt(statistics_is_approximate_col, 1), error);
CHECK_NA(INTERNAL, ArrowArrayFinishElement(db_schema_statistics_items), error);
auto n_distinct = row[4].ParseDouble();
if (!n_distinct) {
SetError(error, "[libpq] Invalid double value in avg_width: '%s'", row[4].data);
return ADBC_STATUS_INTERNAL;
}
CHECK_NA(INTERNAL,
ArrowArrayAppendString(statistics_table_name_col,
ArrowStringView{row[0].data, row[0].len}),
error);
CHECK_NA(INTERNAL,
ArrowArrayAppendString(statistics_column_name_col,
ArrowStringView{row[1].data, row[1].len}),
error);
CHECK_NA(INTERNAL,
ArrowArrayAppendInt(statistics_key_col, ADBC_STATISTIC_DISTINCT_COUNT_KEY),
error);
// > If greater than zero, the estimated number of distinct values in
// > the column. If less than zero, the negative of the number of
// > distinct values divided by the number of rows.
// https://www.postgresql.org/docs/current/view-pg-stats.html
CHECK_NA(INTERNAL,
ArrowArrayAppendDouble(
value_float64_col,
*n_distinct > 0 ? *n_distinct : (std::fabs(*n_distinct) * *reltuples)),
error);
CHECK_NA(INTERNAL,
ArrowArrayFinishUnionElement(statistics_value_col, kStatsVariantFloat64),
error);
CHECK_NA(INTERNAL, ArrowArrayAppendInt(statistics_is_approximate_col, 1), error);
CHECK_NA(INTERNAL, ArrowArrayFinishElement(db_schema_statistics_items), error);
}
}
CHECK_NA(INTERNAL, ArrowArrayFinishElement(db_schema_statistics_col), error);
CHECK_NA(INTERNAL, ArrowArrayFinishElement(catalog_db_schemas_items), error);
CHECK_NA(INTERNAL, ArrowArrayFinishElement(catalog_db_schemas_col), error);
CHECK_NA(INTERNAL, ArrowArrayFinishElement(array), error);
CHECK_NA_DETAIL(INTERNAL, ArrowArrayFinishBuildingDefault(array, &na_error), &na_error,
error);
uschema.move(schema);
return ADBC_STATUS_OK;
}