in cpp/velox/substrait/VeloxSubstraitSignature.cc [117:249]
TypePtr VeloxSubstraitSignature::fromSubstraitSignature(const std::string& signature) {
if (signature == "bool") {
return BOOLEAN();
}
if (signature == "i8") {
return TINYINT();
}
if (signature == "i16") {
return SMALLINT();
}
if (signature == "i32") {
return INTEGER();
}
if (signature == "i64") {
return BIGINT();
}
if (signature == "fp32") {
return REAL();
}
if (signature == "fp64") {
return DOUBLE();
}
if (signature == "str") {
return VARCHAR();
}
if (signature == "vbin") {
return VARBINARY();
}
if (signature == "ts") {
return TIMESTAMP();
}
if (signature == "date") {
return DATE();
}
if (signature == "nothing") {
return UNKNOWN();
}
auto startWith = [](const std::string& str, const std::string& prefix) {
return str.size() >= prefix.size() && str.substr(0, prefix.size()) == prefix;
};
auto parseNestedTypeSignature = [&](const std::string& signature) -> std::vector<TypePtr> {
auto start = signature.find_first_of('<');
auto end = findEnclosingPos(signature, start, '<', '>');
VELOX_CHECK(
end - start > 1,
"Native validation failed due to: more information is needed to create nested type for {}",
signature);
std::string childrenTypes = signature.substr(start + 1, end - start - 1);
// Split the types with delimiter.
const char delimiter = ',';
std::vector<TypePtr> types;
size_t typeStart = 0;
while (true) {
if (typeStart == childrenTypes.size()) {
break;
}
VELOX_CHECK(typeStart < childrenTypes.size());
const size_t typeEnd = findSansNesting(childrenTypes, typeStart, delimiter, '<', '>');
if (typeEnd == std::string::npos) {
std::string typeStr = childrenTypes.substr(typeStart);
types.emplace_back(fromSubstraitSignature(typeStr));
break;
}
VELOX_CHECK(childrenTypes.at(typeEnd) == delimiter);
std::string typeStr = childrenTypes.substr(typeStart, typeEnd - typeStart);
types.emplace_back(fromSubstraitSignature(typeStr));
typeStart = typeEnd + 1;
}
return types;
};
if (startWith(signature, "dec")) {
// Decimal type name is in the format of dec<precision,scale>.
auto precisionStart = signature.find_first_of('<');
auto tokenIndex = signature.find_first_of(',');
auto scaleEnd = signature.find_first_of('>');
auto precision = stoi(signature.substr(precisionStart + 1, (tokenIndex - precisionStart - 1)));
auto scale = stoi(signature.substr(tokenIndex + 1, (scaleEnd - tokenIndex - 1)));
return DECIMAL(precision, scale);
}
if (startWith(signature, "struct")) {
// Struct type name is in the format of struct<T1,T2,...,Tn>.
auto types = parseNestedTypeSignature(signature);
if (types.empty()) {
VELOX_UNSUPPORTED(
"VeloxSubstraitSignature::fromSubstraitSignature: Unrecognizable struct type signature {}.", signature);
}
std::vector<std::string> names(types.size());
for (int i = 0; i < types.size(); i++) {
names[i] = "";
}
return std::make_shared<RowType>(std::move(names), std::move(types));
}
if (startWith(signature, "map")) {
// Map type name is in the format of map<T1,T2>.
auto types = parseNestedTypeSignature(signature);
if (types.size() != 2) {
VELOX_UNSUPPORTED(
"VeloxSubstraitSignature::fromSubstraitSignature: Unrecognizable map type signature {}.", signature);
}
return MAP(std::move(types)[0], std::move(types)[1]);
}
if (startWith(signature, "list")) {
// Array type name is in the format of list<T>.
auto types = parseNestedTypeSignature(signature);
if (types.size() != 1) {
VELOX_UNSUPPORTED(
"VeloxSubstraitSignature::fromSubstraitSignature: Unrecognizable list type signature {}.", signature);
}
return ARRAY(std::move(types)[0]);
}
VELOX_UNSUPPORTED("Substrait type signature conversion to Velox type not supported for {}.", signature);
}