in csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs [141:331]
public ArrowType GetAsType()
{
var format = StringUtil.PtrToStringUtf8(_cSchema->format);
if (_cSchema->dictionary != null)
{
ArrowType indicesType = format switch
{
"c" => Int8Type.Default,
"C" => UInt8Type.Default,
"s" => Int16Type.Default,
"S" => UInt16Type.Default,
"i" => Int32Type.Default,
"I" => UInt32Type.Default,
"l" => Int64Type.Default,
"L" => UInt64Type.Default,
_ => throw new InvalidDataException($"Indices must be an integer, but got format string {format}"),
};
var dictionarySchema = new ImportedArrowSchema(_cSchema->dictionary, isRoot: false);
ArrowType dictionaryType = dictionarySchema.GetAsType();
bool ordered = _cSchema->GetFlag(CArrowSchema.ArrowFlagDictionaryOrdered);
return new DictionaryType(indicesType, dictionaryType, ordered);
}
// Special handling for nested types
if (format == "+l" || format == "+vl" || format == "+L")
{
if (_cSchema->n_children != 1)
{
throw new InvalidDataException("Expected list type to have exactly one child.");
}
ImportedArrowSchema childSchema;
if (_cSchema->GetChild(0) == null)
{
throw new InvalidDataException("Expected list type child to be non-null.");
}
childSchema = new ImportedArrowSchema(_cSchema->GetChild(0), isRoot: false);
Field childField = childSchema.GetAsField();
return format[1] switch
{
'l' => new ListType(childField),
'v' => new ListViewType(childField),
'L' => new LargeListType(childField),
_ => throw new InvalidDataException($"Invalid format for list: '{format}'"),
};
}
else if (format == "+s")
{
return new StructType(ParseChildren("struct"));
}
else if (format.StartsWith("+w:"))
{
// Fixed-width list
int width = Int32.Parse(format.Substring(3));
if (_cSchema->n_children != 1)
{
throw new InvalidDataException("Expected fixed-length list type to have exactly one child.");
}
ImportedArrowSchema childSchema;
if (_cSchema->GetChild(0) == null)
{
throw new InvalidDataException("Expected fixed-length list type child to be non-null.");
}
childSchema = new ImportedArrowSchema(_cSchema->GetChild(0), isRoot: false);
Field childField = childSchema.GetAsField();
return new FixedSizeListType(childField, width);
}
else if (format == "+m")
{
return new MapType(
ParseChildren("map").Single(),
(_cSchema->flags & CArrowSchema.ArrowFlagMapKeysSorted) != 0);
}
// TODO: Large list type
// Decimals
if (format.StartsWith("d:"))
{
string[] parameters = format.Substring(2).Split(',');
int precision = Int32.Parse(parameters[0]);
int scale = Int32.Parse(parameters[1]);
int bitWidth = parameters.Length == 2 ? 128 : Int32.Parse(parameters[2]);
switch (bitWidth)
{
case 32: return new Decimal32Type(precision, scale);
case 64: return new Decimal64Type(precision, scale);
case 128: return new Decimal128Type(precision, scale);
case 256: return new Decimal256Type(precision, scale);
default: throw new InvalidDataException($"Unexpected bit width {bitWidth}");
}
}
// Timestamps
if (format.StartsWith("ts"))
{
TimeUnit timeUnit = format[2] switch
{
's' => TimeUnit.Second,
'm' => TimeUnit.Millisecond,
'u' => TimeUnit.Microsecond,
'n' => TimeUnit.Nanosecond,
_ => throw new InvalidDataException($"Unsupported time unit for import: {format[2]}"),
};
string timezone = format.Substring(format.IndexOf(':') + 1);
if (timezone.Length == 0)
{
timezone = null;
}
return new TimestampType(timeUnit, timezone);
}
// Fixed-width binary
if (format.StartsWith("w:"))
{
int width = Int32.Parse(format.Substring(2));
return new FixedSizeBinaryType(width);
}
// Unions
if (format.StartsWith("+ud:") || format.StartsWith("+us:"))
{
UnionMode unionMode = format[2] == 'd' ? UnionMode.Dense : UnionMode.Sparse;
List<int> typeIds = new List<int>();
int pos = 4;
do
{
int next = format.IndexOf(',', pos);
if (next < 0) { next = format.Length; }
int code;
if (!int.TryParse(format.Substring(pos, next - pos), out code))
{
throw new InvalidDataException($"Invalid type code for union import: {format.Substring(pos, next - pos)}");
}
typeIds.Add(code);
pos = next + 1;
} while (pos < format.Length);
return new UnionType(ParseChildren("union"), typeIds, unionMode);
}
return format switch
{
// Primitives
"n" => NullType.Default,
"b" => BooleanType.Default,
"c" => Int8Type.Default,
"C" => UInt8Type.Default,
"s" => Int16Type.Default,
"S" => UInt16Type.Default,
"i" => Int32Type.Default,
"I" => UInt32Type.Default,
"l" => Int64Type.Default,
"L" => UInt64Type.Default,
"e" => HalfFloatType.Default,
"f" => FloatType.Default,
"g" => DoubleType.Default,
// Binary data
"z" => BinaryType.Default,
"vz" => BinaryViewType.Default,
"Z" => LargeBinaryType.Default,
"u" => StringType.Default,
"vu" => StringViewType.Default,
"U" => LargeStringType.Default,
// Date and time
"tdD" => Date32Type.Default,
"tdm" => Date64Type.Default,
"tts" => TimeType.Second,
"ttm" => TimeType.Millisecond,
"ttu" => TimeType.Microsecond,
"ttn" => TimeType.Nanosecond,
"tDs" => DurationType.Second,
"tDm" => DurationType.Millisecond,
"tDu" => DurationType.Microsecond,
"tDn" => DurationType.Nanosecond,
"tiM" => IntervalType.YearMonth,
"tiD" => IntervalType.DayTime,
"tin" => IntervalType.MonthDayNanosecond,
_ => throw new NotSupportedException("Data type is not yet supported in import.")
};
}