fn to_substrait_literal()

in datafusion/substrait/src/logical_plan/producer.rs [2196:2451]


fn to_substrait_literal(
    producer: &mut impl SubstraitProducer,
    value: &ScalarValue,
) -> Result<Literal> {
    if value.is_null() {
        return Ok(Literal {
            nullable: true,
            type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
            literal_type: Some(LiteralType::Null(to_substrait_type(
                &value.data_type(),
                true,
            )?)),
        });
    }
    let (literal_type, type_variation_reference) = match value {
        ScalarValue::Boolean(Some(b)) => {
            (LiteralType::Boolean(*b), DEFAULT_TYPE_VARIATION_REF)
        }
        ScalarValue::Int8(Some(n)) => {
            (LiteralType::I8(*n as i32), DEFAULT_TYPE_VARIATION_REF)
        }
        ScalarValue::UInt8(Some(n)) => (
            LiteralType::I8(*n as i32),
            UNSIGNED_INTEGER_TYPE_VARIATION_REF,
        ),
        ScalarValue::Int16(Some(n)) => {
            (LiteralType::I16(*n as i32), DEFAULT_TYPE_VARIATION_REF)
        }
        ScalarValue::UInt16(Some(n)) => (
            LiteralType::I16(*n as i32),
            UNSIGNED_INTEGER_TYPE_VARIATION_REF,
        ),
        ScalarValue::Int32(Some(n)) => (LiteralType::I32(*n), DEFAULT_TYPE_VARIATION_REF),
        ScalarValue::UInt32(Some(n)) => (
            LiteralType::I32(*n as i32),
            UNSIGNED_INTEGER_TYPE_VARIATION_REF,
        ),
        ScalarValue::Int64(Some(n)) => (LiteralType::I64(*n), DEFAULT_TYPE_VARIATION_REF),
        ScalarValue::UInt64(Some(n)) => (
            LiteralType::I64(*n as i64),
            UNSIGNED_INTEGER_TYPE_VARIATION_REF,
        ),
        ScalarValue::Float32(Some(f)) => {
            (LiteralType::Fp32(*f), DEFAULT_TYPE_VARIATION_REF)
        }
        ScalarValue::Float64(Some(f)) => {
            (LiteralType::Fp64(*f), DEFAULT_TYPE_VARIATION_REF)
        }
        ScalarValue::TimestampSecond(Some(t), None) => (
            LiteralType::PrecisionTimestamp(PrecisionTimestamp {
                precision: 0,
                value: *t,
            }),
            DEFAULT_TYPE_VARIATION_REF,
        ),
        ScalarValue::TimestampMillisecond(Some(t), None) => (
            LiteralType::PrecisionTimestamp(PrecisionTimestamp {
                precision: 3,
                value: *t,
            }),
            DEFAULT_TYPE_VARIATION_REF,
        ),
        ScalarValue::TimestampMicrosecond(Some(t), None) => (
            LiteralType::PrecisionTimestamp(PrecisionTimestamp {
                precision: 6,
                value: *t,
            }),
            DEFAULT_TYPE_VARIATION_REF,
        ),
        ScalarValue::TimestampNanosecond(Some(t), None) => (
            LiteralType::PrecisionTimestamp(PrecisionTimestamp {
                precision: 9,
                value: *t,
            }),
            DEFAULT_TYPE_VARIATION_REF,
        ),
        // If timezone is present, no matter what the actual tz value is, it indicates the
        // value of the timestamp is tied to UTC epoch. That's all that Substrait cares about.
        // As the timezone is lost, this conversion may be lossy for downstream use of the value.
        ScalarValue::TimestampSecond(Some(t), Some(_)) => (
            LiteralType::PrecisionTimestampTz(PrecisionTimestamp {
                precision: 0,
                value: *t,
            }),
            DEFAULT_TYPE_VARIATION_REF,
        ),
        ScalarValue::TimestampMillisecond(Some(t), Some(_)) => (
            LiteralType::PrecisionTimestampTz(PrecisionTimestamp {
                precision: 3,
                value: *t,
            }),
            DEFAULT_TYPE_VARIATION_REF,
        ),
        ScalarValue::TimestampMicrosecond(Some(t), Some(_)) => (
            LiteralType::PrecisionTimestampTz(PrecisionTimestamp {
                precision: 6,
                value: *t,
            }),
            DEFAULT_TYPE_VARIATION_REF,
        ),
        ScalarValue::TimestampNanosecond(Some(t), Some(_)) => (
            LiteralType::PrecisionTimestampTz(PrecisionTimestamp {
                precision: 9,
                value: *t,
            }),
            DEFAULT_TYPE_VARIATION_REF,
        ),
        ScalarValue::Date32(Some(d)) => {
            (LiteralType::Date(*d), DATE_32_TYPE_VARIATION_REF)
        }
        // Date64 literal is not supported in Substrait
        ScalarValue::IntervalYearMonth(Some(i)) => (
            LiteralType::IntervalYearToMonth(IntervalYearToMonth {
                // DF only tracks total months, but there should always be 12 months in a year
                years: *i / 12,
                months: *i % 12,
            }),
            DEFAULT_TYPE_VARIATION_REF,
        ),
        ScalarValue::IntervalMonthDayNano(Some(i)) => (
            LiteralType::IntervalCompound(IntervalCompound {
                interval_year_to_month: Some(IntervalYearToMonth {
                    years: i.months / 12,
                    months: i.months % 12,
                }),
                interval_day_to_second: Some(IntervalDayToSecond {
                    days: i.days,
                    seconds: (i.nanoseconds / NANOSECONDS) as i32,
                    subseconds: i.nanoseconds % NANOSECONDS,
                    precision_mode: Some(PrecisionMode::Precision(9)), // nanoseconds
                }),
            }),
            DEFAULT_TYPE_VARIATION_REF,
        ),
        ScalarValue::IntervalDayTime(Some(i)) => (
            LiteralType::IntervalDayToSecond(IntervalDayToSecond {
                days: i.days,
                seconds: i.milliseconds / 1000,
                subseconds: (i.milliseconds % 1000) as i64,
                precision_mode: Some(PrecisionMode::Precision(3)), // 3 for milliseconds
            }),
            DEFAULT_TYPE_VARIATION_REF,
        ),
        ScalarValue::Binary(Some(b)) => (
            LiteralType::Binary(b.clone()),
            DEFAULT_CONTAINER_TYPE_VARIATION_REF,
        ),
        ScalarValue::LargeBinary(Some(b)) => (
            LiteralType::Binary(b.clone()),
            LARGE_CONTAINER_TYPE_VARIATION_REF,
        ),
        ScalarValue::BinaryView(Some(b)) => (
            LiteralType::Binary(b.clone()),
            VIEW_CONTAINER_TYPE_VARIATION_REF,
        ),
        ScalarValue::FixedSizeBinary(_, Some(b)) => (
            LiteralType::FixedBinary(b.clone()),
            DEFAULT_TYPE_VARIATION_REF,
        ),
        ScalarValue::Utf8(Some(s)) => (
            LiteralType::String(s.clone()),
            DEFAULT_CONTAINER_TYPE_VARIATION_REF,
        ),
        ScalarValue::LargeUtf8(Some(s)) => (
            LiteralType::String(s.clone()),
            LARGE_CONTAINER_TYPE_VARIATION_REF,
        ),
        ScalarValue::Utf8View(Some(s)) => (
            LiteralType::String(s.clone()),
            VIEW_CONTAINER_TYPE_VARIATION_REF,
        ),
        ScalarValue::Decimal128(v, p, s) if v.is_some() => (
            LiteralType::Decimal(Decimal {
                value: v.unwrap().to_le_bytes().to_vec(),
                precision: *p as i32,
                scale: *s as i32,
            }),
            DECIMAL_128_TYPE_VARIATION_REF,
        ),
        ScalarValue::List(l) => (
            convert_array_to_literal_list(producer, l)?,
            DEFAULT_CONTAINER_TYPE_VARIATION_REF,
        ),
        ScalarValue::LargeList(l) => (
            convert_array_to_literal_list(producer, l)?,
            LARGE_CONTAINER_TYPE_VARIATION_REF,
        ),
        ScalarValue::Map(m) => {
            let map = if m.is_empty() || m.value(0).is_empty() {
                let mt = to_substrait_type(m.data_type(), m.is_nullable())?;
                let mt = match mt {
                    substrait::proto::Type {
                        kind: Some(r#type::Kind::Map(mt)),
                    } => Ok(mt.as_ref().to_owned()),
                    _ => exec_err!("Unexpected type for a map: {mt:?}"),
                }?;
                LiteralType::EmptyMap(mt)
            } else {
                let keys = (0..m.keys().len())
                    .map(|i| {
                        to_substrait_literal(
                            producer,
                            &ScalarValue::try_from_array(&m.keys(), i)?,
                        )
                    })
                    .collect::<Result<Vec<_>>>()?;
                let values = (0..m.values().len())
                    .map(|i| {
                        to_substrait_literal(
                            producer,
                            &ScalarValue::try_from_array(&m.values(), i)?,
                        )
                    })
                    .collect::<Result<Vec<_>>>()?;

                let key_values = keys
                    .into_iter()
                    .zip(values.into_iter())
                    .map(|(k, v)| {
                        Ok(KeyValue {
                            key: Some(k),
                            value: Some(v),
                        })
                    })
                    .collect::<Result<Vec<_>>>()?;
                LiteralType::Map(Map { key_values })
            };
            (map, DEFAULT_CONTAINER_TYPE_VARIATION_REF)
        }
        ScalarValue::Struct(s) => (
            LiteralType::Struct(Struct {
                fields: s
                    .columns()
                    .iter()
                    .map(|col| {
                        to_substrait_literal(
                            producer,
                            &ScalarValue::try_from_array(col, 0)?,
                        )
                    })
                    .collect::<Result<Vec<_>>>()?,
            }),
            DEFAULT_TYPE_VARIATION_REF,
        ),
        _ => (
            not_impl_err!("Unsupported literal: {value:?}")?,
            DEFAULT_TYPE_VARIATION_REF,
        ),
    };

    Ok(Literal {
        nullable: false,
        type_variation_reference,
        literal_type: Some(literal_type),
    })
}