in python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/python/subprojects/arrow-nanoarrow/src/nanoarrow/common/schema.c [656:1102]
static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
const char* format,
const char** format_end_out,
struct ArrowError* error) {
*format_end_out = format;
// needed for decimal parsing
const char* parse_start;
char* parse_end;
switch (format[0]) {
case 'n':
schema_view->type = NANOARROW_TYPE_NA;
schema_view->storage_type = NANOARROW_TYPE_NA;
*format_end_out = format + 1;
return NANOARROW_OK;
case 'b':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_BOOL);
*format_end_out = format + 1;
return NANOARROW_OK;
case 'c':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT8);
*format_end_out = format + 1;
return NANOARROW_OK;
case 'C':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT8);
*format_end_out = format + 1;
return NANOARROW_OK;
case 's':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT16);
*format_end_out = format + 1;
return NANOARROW_OK;
case 'S':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT16);
*format_end_out = format + 1;
return NANOARROW_OK;
case 'i':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
*format_end_out = format + 1;
return NANOARROW_OK;
case 'I':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT32);
*format_end_out = format + 1;
return NANOARROW_OK;
case 'l':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
*format_end_out = format + 1;
return NANOARROW_OK;
case 'L':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT64);
*format_end_out = format + 1;
return NANOARROW_OK;
case 'e':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_HALF_FLOAT);
*format_end_out = format + 1;
return NANOARROW_OK;
case 'f':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_FLOAT);
*format_end_out = format + 1;
return NANOARROW_OK;
case 'g':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DOUBLE);
*format_end_out = format + 1;
return NANOARROW_OK;
// decimal
case 'd':
if (format[1] != ':' || format[2] == '\0') {
ArrowErrorSet(error, "Expected ':precision,scale[,bitwidth]' following 'd'");
return EINVAL;
}
parse_start = format + 2;
schema_view->decimal_precision = (int32_t)strtol(parse_start, &parse_end, 10);
if (parse_end == parse_start || parse_end[0] != ',') {
ArrowErrorSet(error, "Expected 'precision,scale[,bitwidth]' following 'd:'");
return EINVAL;
}
parse_start = parse_end + 1;
schema_view->decimal_scale = (int32_t)strtol(parse_start, &parse_end, 10);
if (parse_end == parse_start) {
ArrowErrorSet(error, "Expected 'scale[,bitwidth]' following 'd:precision,'");
return EINVAL;
} else if (parse_end[0] != ',') {
schema_view->decimal_bitwidth = 128;
} else {
parse_start = parse_end + 1;
schema_view->decimal_bitwidth = (int32_t)strtol(parse_start, &parse_end, 10);
if (parse_start == parse_end) {
ArrowErrorSet(error, "Expected precision following 'd:precision,scale,'");
return EINVAL;
}
}
*format_end_out = parse_end;
switch (schema_view->decimal_bitwidth) {
case 32:
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL32);
return NANOARROW_OK;
case 64:
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL64);
return NANOARROW_OK;
case 128:
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL128);
return NANOARROW_OK;
case 256:
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL256);
return NANOARROW_OK;
default:
ArrowErrorSet(error,
"Expected decimal bitwidth of 128 or 256 but found %" PRId32,
schema_view->decimal_bitwidth);
return EINVAL;
}
// validity + data
case 'w':
schema_view->type = NANOARROW_TYPE_FIXED_SIZE_BINARY;
schema_view->storage_type = NANOARROW_TYPE_FIXED_SIZE_BINARY;
if (format[1] != ':' || format[2] == '\0') {
ArrowErrorSet(error, "Expected ':<width>' following 'w'");
return EINVAL;
}
schema_view->fixed_size = (int32_t)strtol(format + 2, (char**)format_end_out, 10);
return NANOARROW_OK;
// validity + offset + data
case 'z':
schema_view->type = NANOARROW_TYPE_BINARY;
schema_view->storage_type = NANOARROW_TYPE_BINARY;
*format_end_out = format + 1;
return NANOARROW_OK;
case 'u':
schema_view->type = NANOARROW_TYPE_STRING;
schema_view->storage_type = NANOARROW_TYPE_STRING;
*format_end_out = format + 1;
return NANOARROW_OK;
// validity + large_offset + data
case 'Z':
schema_view->type = NANOARROW_TYPE_LARGE_BINARY;
schema_view->storage_type = NANOARROW_TYPE_LARGE_BINARY;
*format_end_out = format + 1;
return NANOARROW_OK;
case 'U':
schema_view->type = NANOARROW_TYPE_LARGE_STRING;
schema_view->storage_type = NANOARROW_TYPE_LARGE_STRING;
*format_end_out = format + 1;
return NANOARROW_OK;
// nested types
case '+':
switch (format[1]) {
// list has validity + offset or offset
case 'l':
schema_view->storage_type = NANOARROW_TYPE_LIST;
schema_view->type = NANOARROW_TYPE_LIST;
*format_end_out = format + 2;
return NANOARROW_OK;
// large list has validity + large_offset or large_offset
case 'L':
schema_view->storage_type = NANOARROW_TYPE_LARGE_LIST;
schema_view->type = NANOARROW_TYPE_LARGE_LIST;
*format_end_out = format + 2;
return NANOARROW_OK;
// run end encoded has no buffer at all
case 'r':
schema_view->storage_type = NANOARROW_TYPE_RUN_END_ENCODED;
schema_view->type = NANOARROW_TYPE_RUN_END_ENCODED;
*format_end_out = format + 2;
return NANOARROW_OK;
// just validity buffer
case 'w':
if (format[2] != ':' || format[3] == '\0') {
ArrowErrorSet(error, "Expected ':<width>' following '+w'");
return EINVAL;
}
schema_view->storage_type = NANOARROW_TYPE_FIXED_SIZE_LIST;
schema_view->type = NANOARROW_TYPE_FIXED_SIZE_LIST;
schema_view->fixed_size =
(int32_t)strtol(format + 3, (char**)format_end_out, 10);
return NANOARROW_OK;
case 's':
schema_view->storage_type = NANOARROW_TYPE_STRUCT;
schema_view->type = NANOARROW_TYPE_STRUCT;
*format_end_out = format + 2;
return NANOARROW_OK;
case 'm':
schema_view->storage_type = NANOARROW_TYPE_MAP;
schema_view->type = NANOARROW_TYPE_MAP;
*format_end_out = format + 2;
return NANOARROW_OK;
// unions
case 'u':
switch (format[2]) {
case 'd':
schema_view->storage_type = NANOARROW_TYPE_DENSE_UNION;
schema_view->type = NANOARROW_TYPE_DENSE_UNION;
break;
case 's':
schema_view->storage_type = NANOARROW_TYPE_SPARSE_UNION;
schema_view->type = NANOARROW_TYPE_SPARSE_UNION;
break;
default:
ArrowErrorSet(error,
"Expected union format string +us:<type_ids> or "
"+ud:<type_ids> but found '%s'",
format);
return EINVAL;
}
if (format[3] == ':') {
schema_view->union_type_ids = format + 4;
int64_t n_type_ids =
_ArrowParseUnionTypeIds(schema_view->union_type_ids, NULL);
if (n_type_ids != schema_view->schema->n_children) {
ArrowErrorSet(error,
"Expected union type_ids parameter to be a comma-separated "
"list of %" PRId64 " values between 0 and 127 but found '%s'",
schema_view->schema->n_children, schema_view->union_type_ids);
return EINVAL;
}
*format_end_out = format + strlen(format);
return NANOARROW_OK;
} else {
ArrowErrorSet(error,
"Expected union format string +us:<type_ids> or +ud:<type_ids> "
"but found '%s'",
format);
return EINVAL;
}
// views
case 'v':
switch (format[2]) {
case 'l':
schema_view->storage_type = NANOARROW_TYPE_LIST_VIEW;
schema_view->type = NANOARROW_TYPE_LIST_VIEW;
*format_end_out = format + 3;
return NANOARROW_OK;
case 'L':
schema_view->storage_type = NANOARROW_TYPE_LARGE_LIST_VIEW;
schema_view->type = NANOARROW_TYPE_LARGE_LIST_VIEW;
*format_end_out = format + 3;
return NANOARROW_OK;
default:
ArrowErrorSet(
error, "Expected view format string +vl or +vL but found '%s'", format);
return EINVAL;
}
default:
ArrowErrorSet(error, "Expected nested type format string but found '%s'",
format);
return EINVAL;
}
// date/time types
case 't':
switch (format[1]) {
// date
case 'd':
switch (format[2]) {
case 'D':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
schema_view->type = NANOARROW_TYPE_DATE32;
*format_end_out = format + 3;
return NANOARROW_OK;
case 'm':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
schema_view->type = NANOARROW_TYPE_DATE64;
*format_end_out = format + 3;
return NANOARROW_OK;
default:
ArrowErrorSet(error, "Expected 'D' or 'm' following 'td' but found '%s'",
format + 2);
return EINVAL;
}
// time of day
case 't':
switch (format[2]) {
case 's':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
schema_view->type = NANOARROW_TYPE_TIME32;
schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
*format_end_out = format + 3;
return NANOARROW_OK;
case 'm':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
schema_view->type = NANOARROW_TYPE_TIME32;
schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
*format_end_out = format + 3;
return NANOARROW_OK;
case 'u':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
schema_view->type = NANOARROW_TYPE_TIME64;
schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
*format_end_out = format + 3;
return NANOARROW_OK;
case 'n':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
schema_view->type = NANOARROW_TYPE_TIME64;
schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
*format_end_out = format + 3;
return NANOARROW_OK;
default:
ArrowErrorSet(
error, "Expected 's', 'm', 'u', or 'n' following 'tt' but found '%s'",
format + 2);
return EINVAL;
}
// timestamp
case 's':
switch (format[2]) {
case 's':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
schema_view->type = NANOARROW_TYPE_TIMESTAMP;
schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
break;
case 'm':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
schema_view->type = NANOARROW_TYPE_TIMESTAMP;
schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
break;
case 'u':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
schema_view->type = NANOARROW_TYPE_TIMESTAMP;
schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
break;
case 'n':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
schema_view->type = NANOARROW_TYPE_TIMESTAMP;
schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
break;
default:
ArrowErrorSet(
error, "Expected 's', 'm', 'u', or 'n' following 'ts' but found '%s'",
format + 2);
return EINVAL;
}
if (format[3] != ':') {
ArrowErrorSet(error, "Expected ':' following '%.3s' but found '%s'", format,
format + 3);
return EINVAL;
}
schema_view->timezone = format + 4;
*format_end_out = format + strlen(format);
return NANOARROW_OK;
// duration
case 'D':
switch (format[2]) {
case 's':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
schema_view->type = NANOARROW_TYPE_DURATION;
schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
*format_end_out = format + 3;
return NANOARROW_OK;
case 'm':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
schema_view->type = NANOARROW_TYPE_DURATION;
schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
*format_end_out = format + 3;
return NANOARROW_OK;
case 'u':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
schema_view->type = NANOARROW_TYPE_DURATION;
schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
*format_end_out = format + 3;
return NANOARROW_OK;
case 'n':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
schema_view->type = NANOARROW_TYPE_DURATION;
schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
*format_end_out = format + 3;
return NANOARROW_OK;
default:
ArrowErrorSet(error,
"Expected 's', 'm', u', or 'n' following 'tD' but found '%s'",
format + 2);
return EINVAL;
}
// interval
case 'i':
switch (format[2]) {
case 'M':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_MONTHS);
*format_end_out = format + 3;
return NANOARROW_OK;
case 'D':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_DAY_TIME);
*format_end_out = format + 3;
return NANOARROW_OK;
case 'n':
ArrowSchemaViewSetPrimitive(schema_view,
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO);
*format_end_out = format + 3;
return NANOARROW_OK;
default:
ArrowErrorSet(error,
"Expected 'M', 'D', or 'n' following 'ti' but found '%s'",
format + 2);
return EINVAL;
}
default:
ArrowErrorSet(
error, "Expected 'd', 't', 's', 'D', or 'i' following 't' but found '%s'",
format + 1);
return EINVAL;
}
// view types
case 'v': {
switch (format[1]) {
case 'u':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_STRING_VIEW);
*format_end_out = format + 2;
return NANOARROW_OK;
case 'z':
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_BINARY_VIEW);
*format_end_out = format + 2;
return NANOARROW_OK;
default:
ArrowErrorSet(error, "Expected 'u', or 'z' following 'v' but found '%s'",
format + 1);
return EINVAL;
}
}
default:
ArrowErrorSet(error, "Unknown format: '%s'", format);
return EINVAL;
}
}