in arrow-schema/src/datatype_parse.rs [358:490]
fn parse_word(&mut self) -> ArrowResult<Token> {
// reset temp space
self.word.clear();
loop {
match self.peek_next_char() {
None => break,
Some(c) if is_separator(c) => break,
Some(c) => {
self.next_char();
self.word.push(c);
}
}
}
if let Some(c) = self.word.chars().next() {
// if it started with a number, try parsing it as an integer
if c == '-' || c.is_numeric() {
let val: i64 = self.word.parse().map_err(|e| {
make_error(self.val, &format!("parsing {} as integer: {e}", self.word))
})?;
return Ok(Token::Integer(val));
}
// if it started with a double quote `"`, try parsing it as a double quoted string
else if c == '"' {
let len = self.word.chars().count();
// to verify it's double quoted
if let Some(last_c) = self.word.chars().last() {
if last_c != '"' || len < 2 {
return Err(make_error(
self.val,
&format!(
"parsing {} as double quoted string: last char must be \"",
self.word
),
));
}
}
if len == 2 {
return Err(make_error(
self.val,
&format!(
"parsing {} as double quoted string: empty string isn't supported",
self.word
),
));
}
let val: String = self.word.parse().map_err(|e| {
make_error(
self.val,
&format!("parsing {} as double quoted string: {e}", self.word),
)
})?;
let s = val[1..len - 1].to_string();
if s.contains('"') {
return Err(make_error(
self.val,
&format!("parsing {} as double quoted string: escaped double quote isn't supported", self.word),
));
}
return Ok(Token::DoubleQuotedString(s));
}
}
// figure out what the word was
let token = match self.word.as_str() {
"Null" => Token::SimpleType(DataType::Null),
"Boolean" => Token::SimpleType(DataType::Boolean),
"Int8" => Token::SimpleType(DataType::Int8),
"Int16" => Token::SimpleType(DataType::Int16),
"Int32" => Token::SimpleType(DataType::Int32),
"Int64" => Token::SimpleType(DataType::Int64),
"UInt8" => Token::SimpleType(DataType::UInt8),
"UInt16" => Token::SimpleType(DataType::UInt16),
"UInt32" => Token::SimpleType(DataType::UInt32),
"UInt64" => Token::SimpleType(DataType::UInt64),
"Utf8" => Token::SimpleType(DataType::Utf8),
"LargeUtf8" => Token::SimpleType(DataType::LargeUtf8),
"Utf8View" => Token::SimpleType(DataType::Utf8View),
"Binary" => Token::SimpleType(DataType::Binary),
"BinaryView" => Token::SimpleType(DataType::BinaryView),
"LargeBinary" => Token::SimpleType(DataType::LargeBinary),
"Float16" => Token::SimpleType(DataType::Float16),
"Float32" => Token::SimpleType(DataType::Float32),
"Float64" => Token::SimpleType(DataType::Float64),
"Date32" => Token::SimpleType(DataType::Date32),
"Date64" => Token::SimpleType(DataType::Date64),
"List" => Token::List,
"LargeList" => Token::LargeList,
"FixedSizeList" => Token::FixedSizeList,
"Second" => Token::TimeUnit(TimeUnit::Second),
"Millisecond" => Token::TimeUnit(TimeUnit::Millisecond),
"Microsecond" => Token::TimeUnit(TimeUnit::Microsecond),
"Nanosecond" => Token::TimeUnit(TimeUnit::Nanosecond),
"Timestamp" => Token::Timestamp,
"Time32" => Token::Time32,
"Time64" => Token::Time64,
"Duration" => Token::Duration,
"Interval" => Token::Interval,
"Dictionary" => Token::Dictionary,
"FixedSizeBinary" => Token::FixedSizeBinary,
"Decimal128" => Token::Decimal128,
"Decimal256" => Token::Decimal256,
"YearMonth" => Token::IntervalUnit(IntervalUnit::YearMonth),
"DayTime" => Token::IntervalUnit(IntervalUnit::DayTime),
"MonthDayNano" => Token::IntervalUnit(IntervalUnit::MonthDayNano),
"Some" => Token::Some,
"None" => Token::None,
_ => {
return Err(make_error(
self.val,
&format!("unrecognized word: {}", self.word),
))
}
};
Ok(token)
}