in parquet/src/schema/parser.rs [290:514]
fn add_primitive_type(
&mut self,
repetition: Repetition,
physical_type: PhysicalType,
) -> Result<Type> {
// Read type length if the type is FIXED_LEN_BYTE_ARRAY.
let mut length: i32 = -1;
if physical_type == PhysicalType::FIXED_LEN_BYTE_ARRAY {
assert_token(self.tokenizer.next(), "(")?;
length = parse_i32(
self.tokenizer.next(),
"Expected length for FIXED_LEN_BYTE_ARRAY, found None",
"Failed to parse length for FIXED_LEN_BYTE_ARRAY",
)?;
assert_token(self.tokenizer.next(), ")")?;
}
// Parse name of the primitive type
let name = self
.tokenizer
.next()
.ok_or_else(|| general_err!("Expected name, found None"))?;
// Parse converted type
let (logical_type, converted_type, precision, scale) =
if let Some("(") = self.tokenizer.next() {
let (mut logical, mut converted) = self
.tokenizer
.next()
.ok_or_else(|| general_err!("Expected logical or converted type, found None"))
.and_then(|v| {
let upper = v.to_uppercase();
let logical = upper.parse::<LogicalType>();
match logical {
Ok(logical) => {
Ok((Some(logical.clone()), ConvertedType::from(Some(logical))))
}
Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
}
})?;
// Parse precision and scale for decimals
let mut precision: i32 = -1;
let mut scale: i32 = -1;
// Parse the concrete logical type
if let Some(tpe) = &logical {
match tpe {
LogicalType::Decimal { .. } => {
if let Some("(") = self.tokenizer.next() {
precision = parse_i32(
self.tokenizer.next(),
"Expected precision, found None",
"Failed to parse precision for DECIMAL type",
)?;
if let Some(",") = self.tokenizer.next() {
scale = parse_i32(
self.tokenizer.next(),
"Expected scale, found None",
"Failed to parse scale for DECIMAL type",
)?;
assert_token(self.tokenizer.next(), ")")?;
} else {
scale = 0
}
logical = Some(LogicalType::Decimal { scale, precision });
converted = ConvertedType::from(logical.clone());
}
}
LogicalType::Time { .. } => {
if let Some("(") = self.tokenizer.next() {
let unit = parse_timeunit(
self.tokenizer.next(),
"Invalid timeunit found",
"Failed to parse timeunit for TIME type",
)?;
if let Some(",") = self.tokenizer.next() {
let is_adjusted_to_u_t_c = parse_bool(
self.tokenizer.next(),
"Invalid boolean found",
"Failed to parse timezone info for TIME type",
)?;
assert_token(self.tokenizer.next(), ")")?;
logical = Some(LogicalType::Time {
is_adjusted_to_u_t_c,
unit,
});
converted = ConvertedType::from(logical.clone());
} else {
// Invalid token for unit
self.tokenizer.backtrack();
}
}
}
LogicalType::Timestamp { .. } => {
if let Some("(") = self.tokenizer.next() {
let unit = parse_timeunit(
self.tokenizer.next(),
"Invalid timeunit found",
"Failed to parse timeunit for TIMESTAMP type",
)?;
if let Some(",") = self.tokenizer.next() {
let is_adjusted_to_u_t_c = parse_bool(
self.tokenizer.next(),
"Invalid boolean found",
"Failed to parse timezone info for TIMESTAMP type",
)?;
assert_token(self.tokenizer.next(), ")")?;
logical = Some(LogicalType::Timestamp {
is_adjusted_to_u_t_c,
unit,
});
converted = ConvertedType::from(logical.clone());
} else {
// Invalid token for unit
self.tokenizer.backtrack();
}
}
}
LogicalType::Integer { .. } => {
if let Some("(") = self.tokenizer.next() {
let bit_width = parse_i32(
self.tokenizer.next(),
"Invalid bit_width found",
"Failed to parse bit_width for INTEGER type",
)? as i8;
match physical_type {
PhysicalType::INT32 => match bit_width {
8 | 16 | 32 => {}
_ => {
return Err(general_err!(
"Incorrect bit width {} for INT32",
bit_width
))
}
},
PhysicalType::INT64 => {
if bit_width != 64 {
return Err(general_err!(
"Incorrect bit width {} for INT64",
bit_width
));
}
}
_ => {
return Err(general_err!(
"Logical type Integer cannot be used with physical type {}",
physical_type
))
}
}
if let Some(",") = self.tokenizer.next() {
let is_signed = parse_bool(
self.tokenizer.next(),
"Invalid boolean found",
"Failed to parse is_signed for INTEGER type",
)?;
assert_token(self.tokenizer.next(), ")")?;
logical = Some(LogicalType::Integer {
bit_width,
is_signed,
});
converted = ConvertedType::from(logical.clone());
} else {
// Invalid token for unit
self.tokenizer.backtrack();
}
}
}
_ => {}
}
} else if converted == ConvertedType::DECIMAL {
if let Some("(") = self.tokenizer.next() {
// Parse precision
precision = parse_i32(
self.tokenizer.next(),
"Expected precision, found None",
"Failed to parse precision for DECIMAL type",
)?;
// Parse scale
scale = if let Some(",") = self.tokenizer.next() {
parse_i32(
self.tokenizer.next(),
"Expected scale, found None",
"Failed to parse scale for DECIMAL type",
)?
} else {
// Scale is not provided, set it to 0.
self.tokenizer.backtrack();
0
};
assert_token(self.tokenizer.next(), ")")?;
} else {
self.tokenizer.backtrack();
}
}
assert_token(self.tokenizer.next(), ")")?;
(logical, converted, precision, scale)
} else {
self.tokenizer.backtrack();
(None, ConvertedType::NONE, -1, -1)
};
// Parse optional id
let id = if let Some("=") = self.tokenizer.next() {
self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
} else {
self.tokenizer.backtrack();
None
};
assert_token(self.tokenizer.next(), ";")?;
Type::primitive_type_builder(name, physical_type)
.with_repetition(repetition)
.with_logical_type(logical_type)
.with_converted_type(converted_type)
.with_length(length)
.with_precision(precision)
.with_scale(scale)
.with_id(id)
.build()
}