fn parse_word()

in arrow-schema/src/datatype_parse.rs [358:490]


    fn parse_word(&mut self) -> ArrowResult<Token> {
        // reset temp space
        self.word.clear();
        loop {
            match self.peek_next_char() {
                None => break,
                Some(c) if is_separator(c) => break,
                Some(c) => {
                    self.next_char();
                    self.word.push(c);
                }
            }
        }

        if let Some(c) = self.word.chars().next() {
            // if it started with a number, try parsing it as an integer
            if c == '-' || c.is_numeric() {
                let val: i64 = self.word.parse().map_err(|e| {
                    make_error(self.val, &format!("parsing {} as integer: {e}", self.word))
                })?;
                return Ok(Token::Integer(val));
            }
            // if it started with a double quote `"`, try parsing it as a double quoted string
            else if c == '"' {
                let len = self.word.chars().count();

                // to verify it's double quoted
                if let Some(last_c) = self.word.chars().last() {
                    if last_c != '"' || len < 2 {
                        return Err(make_error(
                            self.val,
                            &format!(
                                "parsing {} as double quoted string: last char must be \"",
                                self.word
                            ),
                        ));
                    }
                }

                if len == 2 {
                    return Err(make_error(
                        self.val,
                        &format!(
                            "parsing {} as double quoted string: empty string isn't supported",
                            self.word
                        ),
                    ));
                }

                let val: String = self.word.parse().map_err(|e| {
                    make_error(
                        self.val,
                        &format!("parsing {} as double quoted string: {e}", self.word),
                    )
                })?;

                let s = val[1..len - 1].to_string();
                if s.contains('"') {
                    return Err(make_error(
                        self.val,
                        &format!("parsing {} as double quoted string: escaped double quote isn't supported", self.word),
                    ));
                }

                return Ok(Token::DoubleQuotedString(s));
            }
        }

        // figure out what the word was
        let token = match self.word.as_str() {
            "Null" => Token::SimpleType(DataType::Null),
            "Boolean" => Token::SimpleType(DataType::Boolean),

            "Int8" => Token::SimpleType(DataType::Int8),
            "Int16" => Token::SimpleType(DataType::Int16),
            "Int32" => Token::SimpleType(DataType::Int32),
            "Int64" => Token::SimpleType(DataType::Int64),

            "UInt8" => Token::SimpleType(DataType::UInt8),
            "UInt16" => Token::SimpleType(DataType::UInt16),
            "UInt32" => Token::SimpleType(DataType::UInt32),
            "UInt64" => Token::SimpleType(DataType::UInt64),

            "Utf8" => Token::SimpleType(DataType::Utf8),
            "LargeUtf8" => Token::SimpleType(DataType::LargeUtf8),
            "Utf8View" => Token::SimpleType(DataType::Utf8View),
            "Binary" => Token::SimpleType(DataType::Binary),
            "BinaryView" => Token::SimpleType(DataType::BinaryView),
            "LargeBinary" => Token::SimpleType(DataType::LargeBinary),

            "Float16" => Token::SimpleType(DataType::Float16),
            "Float32" => Token::SimpleType(DataType::Float32),
            "Float64" => Token::SimpleType(DataType::Float64),

            "Date32" => Token::SimpleType(DataType::Date32),
            "Date64" => Token::SimpleType(DataType::Date64),

            "List" => Token::List,
            "LargeList" => Token::LargeList,
            "FixedSizeList" => Token::FixedSizeList,

            "Second" => Token::TimeUnit(TimeUnit::Second),
            "Millisecond" => Token::TimeUnit(TimeUnit::Millisecond),
            "Microsecond" => Token::TimeUnit(TimeUnit::Microsecond),
            "Nanosecond" => Token::TimeUnit(TimeUnit::Nanosecond),

            "Timestamp" => Token::Timestamp,
            "Time32" => Token::Time32,
            "Time64" => Token::Time64,
            "Duration" => Token::Duration,
            "Interval" => Token::Interval,
            "Dictionary" => Token::Dictionary,

            "FixedSizeBinary" => Token::FixedSizeBinary,
            "Decimal128" => Token::Decimal128,
            "Decimal256" => Token::Decimal256,

            "YearMonth" => Token::IntervalUnit(IntervalUnit::YearMonth),
            "DayTime" => Token::IntervalUnit(IntervalUnit::DayTime),
            "MonthDayNano" => Token::IntervalUnit(IntervalUnit::MonthDayNano),

            "Some" => Token::Some,
            "None" => Token::None,

            _ => {
                return Err(make_error(
                    self.val,
                    &format!("unrecognized word: {}", self.word),
                ))
            }
        };
        Ok(token)
    }