arrow/datatype_fixedwidth.go (666 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package arrow import ( "fmt" "strconv" "sync" "time" "github.com/apache/arrow-go/v18/arrow/decimal" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/internal/json" "golang.org/x/xerrors" ) type BooleanType struct{} func (t *BooleanType) ID() Type { return BOOL } func (t *BooleanType) Name() string { return "bool" } func (t *BooleanType) String() string { return "bool" } func (t *BooleanType) Fingerprint() string { return typeFingerprint(t) } func (BooleanType) Bytes() int { return 1 } // BitWidth returns the number of bits required to store a single element of this data type in memory. func (t *BooleanType) BitWidth() int { return 1 } func (BooleanType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecBitmap()}} } type FixedSizeBinaryType struct { ByteWidth int } func (*FixedSizeBinaryType) ID() Type { return FIXED_SIZE_BINARY } func (*FixedSizeBinaryType) Name() string { return "fixed_size_binary" } func (t *FixedSizeBinaryType) BitWidth() int { return 8 * t.ByteWidth } func (t *FixedSizeBinaryType) Bytes() int { return t.ByteWidth } func (t *FixedSizeBinaryType) Fingerprint() string { return typeFingerprint(t) } func (t *FixedSizeBinaryType) String() string { return "fixed_size_binary[" + strconv.Itoa(t.ByteWidth) + "]" } func (t *FixedSizeBinaryType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(t.ByteWidth)}} } type ( Timestamp int64 Time32 int32 Time64 int64 TimeUnit int Date32 int32 Date64 int64 Duration int64 ) // Date32FromTime returns a Date32 value from a time object func Date32FromTime(t time.Time) Date32 { return Date32(t.Truncate(24*time.Hour).Unix() / int64((time.Hour * 24).Seconds())) } func (d Date32) ToTime() time.Time { return time.Unix(0, 0).UTC().AddDate(0, 0, int(d)) } func (d Date32) FormattedString() string { return d.ToTime().Format("2006-01-02") } // Date64FromTime returns a Date64 value from a time object func Date64FromTime(t time.Time) Date64 { // truncate to the start of the day to get the correct value t = t.Truncate(24 * time.Hour) return Date64(t.Unix()*1e3 + int64(t.Nanosecond())/1e6) } func (d Date64) ToTime() time.Time { days := int(int64(d) / (time.Hour * 24).Milliseconds()) return time.Unix(0, 0).UTC().AddDate(0, 0, days) } func (d Date64) FormattedString() string { return d.ToTime().Format("2006-01-02") } // TimestampFromStringInLocation is like TimestampFromString, but treats the time instant // as if it were in the provided timezone before converting to UTC for internal representation. func TimestampFromStringInLocation(val string, unit TimeUnit, loc *time.Location) (Timestamp, bool, error) { if len(val) < 10 { return 0, false, fmt.Errorf("%w: invalid timestamp string", ErrInvalid) } var ( format = "2006-01-02" zoneFmt string lenWithoutZone = len(val) ) if lenWithoutZone > 10 { switch { case val[len(val)-1] == 'Z': zoneFmt = "Z" lenWithoutZone-- case val[len(val)-3] == '+' || val[len(val)-3] == '-': zoneFmt = "-07" lenWithoutZone -= 3 case val[len(val)-5] == '+' || val[len(val)-5] == '-': zoneFmt = "-0700" lenWithoutZone -= 5 case val[len(val)-6] == '+' || val[len(val)-6] == '-': zoneFmt = "-07:00" lenWithoutZone -= 6 } } switch { case lenWithoutZone == 13: format += string(val[10]) + "15" case lenWithoutZone == 16: format += string(val[10]) + "15:04" case lenWithoutZone >= 19: format += string(val[10]) + "15:04:05.999999999" } // error if we're truncating precision // don't need a case for nano as time.Parse will already error if // more than nanosecond precision is provided switch { case unit == Second && lenWithoutZone > 19: return 0, zoneFmt != "", xerrors.New("provided more than second precision for timestamp[s]") case unit == Millisecond && lenWithoutZone > 23: return 0, zoneFmt != "", xerrors.New("provided more than millisecond precision for timestamp[ms]") case unit == Microsecond && lenWithoutZone > 26: return 0, zoneFmt != "", xerrors.New("provided more than microsecond precision for timestamp[us]") } format += zoneFmt out, err := time.Parse(format, val) if err != nil { return 0, zoneFmt != "", fmt.Errorf("%w: %s", ErrInvalid, err) } if loc != time.UTC { // convert to UTC by putting the same time instant in the desired location // before converting to UTC out = out.In(loc).UTC() } ts, err := TimestampFromTime(out, unit) return ts, zoneFmt != "", err } // TimestampFromString parses a string and returns a timestamp for the given unit // level. // // The timestamp should be in one of the following forms, [T] can be either T // or a space, and [.zzzzzzzzz] can be either left out or up to 9 digits of // fractions of a second. // // YYYY-MM-DD // YYYY-MM-DD[T]HH // YYYY-MM-DD[T]HH:MM // YYYY-MM-DD[T]HH:MM:SS[.zzzzzzzz] // // You can also optionally have an ending Z to indicate UTC or indicate a specific // timezone using ±HH, ±HHMM or ±HH:MM at the end of the string. func TimestampFromString(val string, unit TimeUnit) (Timestamp, error) { tm, _, err := TimestampFromStringInLocation(val, unit, time.UTC) return tm, err } func (t Timestamp) ToTime(unit TimeUnit) time.Time { switch unit { case Second: return time.Unix(int64(t), 0).UTC() case Millisecond: return time.UnixMilli(int64(t)).UTC() case Microsecond: return time.UnixMicro(int64(t)).UTC() default: return time.Unix(0, int64(t)).UTC() } } // TimestampFromTime allows converting time.Time to Timestamp func TimestampFromTime(val time.Time, unit TimeUnit) (Timestamp, error) { switch unit { case Second: return Timestamp(val.Unix()), nil case Millisecond: return Timestamp(val.Unix()*1e3 + int64(val.Nanosecond())/1e6), nil case Microsecond: return Timestamp(val.Unix()*1e6 + int64(val.Nanosecond())/1e3), nil case Nanosecond: return Timestamp(val.UnixNano()), nil default: return 0, fmt.Errorf("%w: unexpected timestamp unit: %s", ErrInvalid, unit) } } // Time32FromString parses a string to return a Time32 value in the given unit, // unit needs to be only seconds or milliseconds and the string should be in the // form of HH:MM or HH:MM:SS[.zzz] where the fractions of a second are optional. func Time32FromString(val string, unit TimeUnit) (Time32, error) { switch unit { case Second: if len(val) > 8 { return 0, xerrors.New("cannot convert larger than second precision to time32s") } case Millisecond: if len(val) > 12 { return 0, xerrors.New("cannot convert larger than millisecond precision to time32ms") } case Microsecond, Nanosecond: return 0, xerrors.New("time32 can only be seconds or milliseconds") } var ( out time.Time err error ) switch { case len(val) == 5: out, err = time.Parse("15:04", val) default: out, err = time.Parse("15:04:05.999", val) } if err != nil { return 0, err } t := out.Sub(time.Date(0, 1, 1, 0, 0, 0, 0, time.UTC)) if unit == Second { return Time32(t.Seconds()), nil } return Time32(t.Milliseconds()), nil } func (t Time32) ToTime(unit TimeUnit) time.Time { return time.Unix(0, int64(t)*int64(unit.Multiplier())).UTC() } func (t Time32) FormattedString(unit TimeUnit) string { const baseFmt = "15:04:05" tm := t.ToTime(unit) switch unit { case Second: return tm.Format(baseFmt) case Millisecond: return tm.Format(baseFmt + ".000") } return "" } // Time64FromString parses a string to return a Time64 value in the given unit, // unit needs to be only microseconds or nanoseconds and the string should be in the // form of HH:MM or HH:MM:SS[.zzzzzzzzz] where the fractions of a second are optional. func Time64FromString(val string, unit TimeUnit) (Time64, error) { // don't need to check length for nanoseconds as Parse will already error // if more than 9 digits are provided for the fractional second switch unit { case Microsecond: if len(val) > 15 { return 0, xerrors.New("cannot convert larger than microsecond precision to time64us") } case Second, Millisecond: return 0, xerrors.New("time64 should only be microseconds or nanoseconds") } var ( out time.Time err error ) switch { case len(val) == 5: out, err = time.Parse("15:04", val) default: out, err = time.Parse("15:04:05.999999999", val) } if err != nil { return 0, err } t := out.Sub(time.Date(0, 1, 1, 0, 0, 0, 0, time.UTC)) if unit == Microsecond { return Time64(t.Microseconds()), nil } return Time64(t.Nanoseconds()), nil } func (t Time64) ToTime(unit TimeUnit) time.Time { return time.Unix(0, int64(t)*int64(unit.Multiplier())).UTC() } func (t Time64) FormattedString(unit TimeUnit) string { const baseFmt = "15:04:05.000000" tm := t.ToTime(unit) switch unit { case Microsecond: return tm.Format(baseFmt) case Nanosecond: return tm.Format(baseFmt + "000") } return "" } const ( Second TimeUnit = iota Millisecond Microsecond Nanosecond ) var TimeUnitValues = []TimeUnit{Second, Millisecond, Microsecond, Nanosecond} // Multiplier returns a time.Duration value to multiply by in order to // convert the value into nanoseconds func (u TimeUnit) Multiplier() time.Duration { return [...]time.Duration{time.Second, time.Millisecond, time.Microsecond, time.Nanosecond}[uint(u)&3] } func (u TimeUnit) String() string { return [...]string{"s", "ms", "us", "ns"}[uint(u)&3] } type TemporalWithUnit interface { FixedWidthDataType TimeUnit() TimeUnit } // TimestampType is encoded as a 64-bit signed integer since the UNIX epoch (2017-01-01T00:00:00Z). // The zero-value is a second and time zone neutral. In Arrow semantics, time zone neutral does not // represent a physical point in time, but rather a "wall clock" time that only has meaning within // the context that produced it. In Go, time.Time can only represent instants; there is no notion // of "wall clock" time. Therefore, time zone neutral timestamps are represented as UTC per Go // conventions even though the Arrow type itself has no time zone. type TimestampType struct { Unit TimeUnit TimeZone string loc *time.Location mx sync.RWMutex } func (*TimestampType) ID() Type { return TIMESTAMP } func (*TimestampType) Name() string { return "timestamp" } func (t *TimestampType) String() string { switch len(t.TimeZone) { case 0: return "timestamp[" + t.Unit.String() + "]" default: return "timestamp[" + t.Unit.String() + ", tz=" + t.TimeZone + "]" } } func (t *TimestampType) Fingerprint() string { return fmt.Sprintf("%s%d:%s", typeFingerprint(t)+string(timeUnitFingerprint(t.Unit)), len(t.TimeZone), t.TimeZone) } // BitWidth returns the number of bits required to store a single element of this data type in memory. func (*TimestampType) BitWidth() int { return 64 } func (*TimestampType) Bytes() int { return Int64SizeBytes } func (*TimestampType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(TimestampSizeBytes)}} } func (t *TimestampType) TimeUnit() TimeUnit { return t.Unit } // ClearCachedLocation clears the cached time.Location object in the type. // This should be called if you change the value of the TimeZone after having // potentially called GetZone. func (t *TimestampType) ClearCachedLocation() { t.mx.Lock() defer t.mx.Unlock() t.loc = nil } // GetZone returns a *time.Location that represents the current TimeZone member // of the TimestampType. If it is "", "UTC", or "utc", you'll get time.UTC. // Otherwise it must either be a valid tzdata string such as "America/New_York" // or of the format +HH:MM or -HH:MM indicating an absolute offset. // // The location object will be cached in the TimestampType for subsequent calls // so if you change the value of TimeZone after calling this, make sure to call // ClearCachedLocation. func (t *TimestampType) GetZone() (*time.Location, error) { t.mx.RLock() if t.loc != nil { defer t.mx.RUnlock() return t.loc, nil } t.mx.RUnlock() t.mx.Lock() defer t.mx.Unlock() // in case GetZone() was called in between releasing the read lock and // getting the write lock if t.loc != nil { return t.loc, nil } // the TimeZone string is allowed to be either a valid tzdata string // such as "America/New_York" or an absolute offset of the form -XX:XX // or +XX:XX // // As such we have two methods we can try, first we'll try LoadLocation // and if that fails, we'll test for an absolute offset. if t.TimeZone == "" || t.TimeZone == "UTC" || t.TimeZone == "utc" { t.loc = time.UTC return time.UTC, nil } if loc, err := time.LoadLocation(t.TimeZone); err == nil { t.loc = loc return loc, err } // at this point we know that the timezone isn't empty, and didn't match // anything in the tzdata names. So either it's an absolute offset // or it's invalid. timetz, err := time.Parse("-07:00", t.TimeZone) if err != nil { return time.UTC, fmt.Errorf("could not find timezone location for '%s'", t.TimeZone) } _, offset := timetz.Zone() t.loc = time.FixedZone(t.TimeZone, offset) return t.loc, nil } // GetToTimeFunc returns a function for converting an arrow.Timestamp value into a // time.Time object with proper TimeZone and precision. If the TimeZone is invalid // this will return an error. It calls GetZone to get the timezone for consistency. func (t *TimestampType) GetToTimeFunc() (func(Timestamp) time.Time, error) { tz, err := t.GetZone() if err != nil { return nil, err } return func(v Timestamp) time.Time { return v.ToTime(t.Unit).In(tz) }, nil } // Time32Type is encoded as a 32-bit signed integer, representing either seconds or milliseconds since midnight. type Time32Type struct { Unit TimeUnit } func (*Time32Type) ID() Type { return TIME32 } func (*Time32Type) Name() string { return "time32" } func (*Time32Type) BitWidth() int { return 32 } func (*Time32Type) Bytes() int { return Int32SizeBytes } func (t *Time32Type) String() string { return "time32[" + t.Unit.String() + "]" } func (t *Time32Type) Fingerprint() string { return typeFingerprint(t) + string(timeUnitFingerprint(t.Unit)) } func (Time32Type) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Time32SizeBytes)}} } func (t *Time32Type) TimeUnit() TimeUnit { return t.Unit } // Time64Type is encoded as a 64-bit signed integer, representing either microseconds or nanoseconds since midnight. type Time64Type struct { Unit TimeUnit } func (*Time64Type) ID() Type { return TIME64 } func (*Time64Type) Name() string { return "time64" } func (*Time64Type) BitWidth() int { return 64 } func (*Time64Type) Bytes() int { return Int64SizeBytes } func (t *Time64Type) String() string { return "time64[" + t.Unit.String() + "]" } func (t *Time64Type) Fingerprint() string { return typeFingerprint(t) + string(timeUnitFingerprint(t.Unit)) } func (Time64Type) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Time64SizeBytes)}} } func (t *Time64Type) TimeUnit() TimeUnit { return t.Unit } // DurationType is encoded as a 64-bit signed integer, representing an amount // of elapsed time without any relation to a calendar artifact. type DurationType struct { Unit TimeUnit } func (*DurationType) ID() Type { return DURATION } func (*DurationType) Name() string { return "duration" } func (*DurationType) BitWidth() int { return 64 } func (*DurationType) Bytes() int { return Int64SizeBytes } func (t *DurationType) String() string { return "duration[" + t.Unit.String() + "]" } func (t *DurationType) Fingerprint() string { return typeFingerprint(t) + string(timeUnitFingerprint(t.Unit)) } func (DurationType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(DurationSizeBytes)}} } func (t *DurationType) TimeUnit() TimeUnit { return t.Unit } // Float16Type represents a floating point value encoded with a 16-bit precision. type Float16Type struct{} func (t *Float16Type) ID() Type { return FLOAT16 } func (t *Float16Type) Name() string { return "float16" } func (t *Float16Type) String() string { return "float16" } func (t *Float16Type) Fingerprint() string { return typeFingerprint(t) } // BitWidth returns the number of bits required to store a single element of this data type in memory. func (t *Float16Type) BitWidth() int { return 16 } func (Float16Type) Bytes() int { return Float16SizeBytes } func (Float16Type) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Float16SizeBytes)}} } type DecimalType interface { DataType GetPrecision() int32 GetScale() int32 BitWidth() int } // NarrowestDecimalType constructs the smallest decimal type that can represent // the requested precision. An error is returned if the requested precision // cannot be represented (prec <= 0 || prec > 76). // // For reference: // // prec in [ 1, 9] => Decimal32Type // prec in [10, 18] => Decimal64Type // prec in [19, 38] => Decimal128Type // prec in [39, 76] => Decimal256Type func NarrowestDecimalType(prec, scale int32) (DecimalType, error) { switch { case prec <= 0: return nil, fmt.Errorf("%w: precision must be > 0 for decimal types, got %d", ErrInvalid, prec) case prec <= int32(decimal.MaxPrecision[decimal.Decimal32]()): return &Decimal32Type{Precision: prec, Scale: scale}, nil case prec <= int32(decimal.MaxPrecision[decimal.Decimal64]()): return &Decimal64Type{Precision: prec, Scale: scale}, nil case prec <= int32(decimal.MaxPrecision[decimal.Decimal128]()): return &Decimal128Type{Precision: prec, Scale: scale}, nil case prec <= int32(decimal.MaxPrecision[decimal.Decimal256]()): return &Decimal256Type{Precision: prec, Scale: scale}, nil default: return nil, fmt.Errorf("%w: invalid precision for decimal types, %d", ErrInvalid, prec) } } func NewDecimalType(id Type, prec, scale int32) (DecimalType, error) { switch id { case DECIMAL32: debug.Assert(prec <= int32(decimal.MaxPrecision[decimal.Decimal32]()), "invalid precision for decimal32") return &Decimal32Type{Precision: prec, Scale: scale}, nil case DECIMAL64: debug.Assert(prec <= int32(decimal.MaxPrecision[decimal.Decimal64]()), "invalid precision for decimal64") return &Decimal64Type{Precision: prec, Scale: scale}, nil case DECIMAL128: debug.Assert(prec <= int32(decimal.MaxPrecision[decimal.Decimal128]()), "invalid precision for decimal128") return &Decimal128Type{Precision: prec, Scale: scale}, nil case DECIMAL256: debug.Assert(prec <= int32(decimal.MaxPrecision[decimal.Decimal256]()), "invalid precision for decimal256") return &Decimal256Type{Precision: prec, Scale: scale}, nil default: return nil, fmt.Errorf("%w: must use one of the DECIMAL IDs to create a DecimalType", ErrInvalid) } } // Decimal32Type represents a fixed-size 32-bit decimal type. type Decimal32Type struct { Precision int32 Scale int32 } func (*Decimal32Type) ID() Type { return DECIMAL32 } func (*Decimal32Type) Name() string { return "decimal32" } func (*Decimal32Type) BitWidth() int { return 32 } func (*Decimal32Type) Bytes() int { return Decimal32SizeBytes } func (t *Decimal32Type) String() string { return fmt.Sprintf("%s(%d, %d)", t.Name(), t.Precision, t.Scale) } func (t *Decimal32Type) Fingerprint() string { return fmt.Sprintf("%s[%d,%d,%d]", typeFingerprint(t), t.BitWidth(), t.Precision, t.Scale) } func (t *Decimal32Type) GetPrecision() int32 { return t.Precision } func (t *Decimal32Type) GetScale() int32 { return t.Scale } func (Decimal32Type) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal32SizeBytes)}} } // Decimal64Type represents a fixed-size 32-bit decimal type. type Decimal64Type struct { Precision int32 Scale int32 } func (*Decimal64Type) ID() Type { return DECIMAL64 } func (*Decimal64Type) Name() string { return "decimal64" } func (*Decimal64Type) BitWidth() int { return 64 } func (*Decimal64Type) Bytes() int { return Decimal64SizeBytes } func (t *Decimal64Type) String() string { return fmt.Sprintf("%s(%d, %d)", t.Name(), t.Precision, t.Scale) } func (t *Decimal64Type) Fingerprint() string { return fmt.Sprintf("%s[%d,%d,%d]", typeFingerprint(t), t.BitWidth(), t.Precision, t.Scale) } func (t *Decimal64Type) GetPrecision() int32 { return t.Precision } func (t *Decimal64Type) GetScale() int32 { return t.Scale } func (Decimal64Type) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal64SizeBytes)}} } // Decimal128Type represents a fixed-size 128-bit decimal type. type Decimal128Type struct { Precision int32 Scale int32 } func (*Decimal128Type) ID() Type { return DECIMAL128 } func (*Decimal128Type) Name() string { return "decimal" } func (*Decimal128Type) BitWidth() int { return 128 } func (*Decimal128Type) Bytes() int { return Decimal128SizeBytes } func (t *Decimal128Type) String() string { return fmt.Sprintf("%s(%d, %d)", t.Name(), t.Precision, t.Scale) } func (t *Decimal128Type) Fingerprint() string { return fmt.Sprintf("%s[%d,%d,%d]", typeFingerprint(t), t.BitWidth(), t.Precision, t.Scale) } func (t *Decimal128Type) GetPrecision() int32 { return t.Precision } func (t *Decimal128Type) GetScale() int32 { return t.Scale } func (Decimal128Type) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal128SizeBytes)}} } // Decimal256Type represents a fixed-size 256-bit decimal type. type Decimal256Type struct { Precision int32 Scale int32 } func (*Decimal256Type) ID() Type { return DECIMAL256 } func (*Decimal256Type) Name() string { return "decimal256" } func (*Decimal256Type) BitWidth() int { return 256 } func (*Decimal256Type) Bytes() int { return Decimal256SizeBytes } func (t *Decimal256Type) String() string { return fmt.Sprintf("%s(%d, %d)", t.Name(), t.Precision, t.Scale) } func (t *Decimal256Type) Fingerprint() string { return fmt.Sprintf("%s[%d,%d,%d]", typeFingerprint(t), t.BitWidth(), t.Precision, t.Scale) } func (t *Decimal256Type) GetPrecision() int32 { return t.Precision } func (t *Decimal256Type) GetScale() int32 { return t.Scale } func (Decimal256Type) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal256SizeBytes)}} } // MonthInterval represents a number of months. type MonthInterval int32 func (m *MonthInterval) UnmarshalJSON(data []byte) error { var val struct { Months int32 `json:"months"` } if err := json.Unmarshal(data, &val); err != nil { return err } *m = MonthInterval(val.Months) return nil } func (m MonthInterval) MarshalJSON() ([]byte, error) { return json.Marshal(struct { Months int32 `json:"months"` }{int32(m)}) } // MonthIntervalType is encoded as a 32-bit signed integer, // representing a number of months. type MonthIntervalType struct{} func (*MonthIntervalType) ID() Type { return INTERVAL_MONTHS } func (*MonthIntervalType) Name() string { return "month_interval" } func (*MonthIntervalType) String() string { return "month_interval" } func (*MonthIntervalType) Fingerprint() string { return typeIDFingerprint(INTERVAL_MONTHS) + "M" } // BitWidth returns the number of bits required to store a single element of this data type in memory. func (t *MonthIntervalType) BitWidth() int { return 32 } func (MonthIntervalType) Bytes() int { return Int32SizeBytes } func (MonthIntervalType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(MonthIntervalSizeBytes)}} } // DayTimeInterval represents a number of days and milliseconds (fraction of day). type DayTimeInterval struct { Days int32 `json:"days"` Milliseconds int32 `json:"milliseconds"` } // DayTimeIntervalType is encoded as a pair of 32-bit signed integer, // representing a number of days and milliseconds (fraction of day). type DayTimeIntervalType struct{} func (*DayTimeIntervalType) ID() Type { return INTERVAL_DAY_TIME } func (*DayTimeIntervalType) Name() string { return "day_time_interval" } func (*DayTimeIntervalType) String() string { return "day_time_interval" } func (*DayTimeIntervalType) Fingerprint() string { return typeIDFingerprint(INTERVAL_DAY_TIME) + "d" } // BitWidth returns the number of bits required to store a single element of this data type in memory. func (t *DayTimeIntervalType) BitWidth() int { return 64 } func (DayTimeIntervalType) Bytes() int { return DayTimeIntervalSizeBytes } func (DayTimeIntervalType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(DayTimeIntervalSizeBytes)}} } // MonthDayNanoInterval represents a number of months, days and nanoseconds (fraction of day). type MonthDayNanoInterval struct { Months int32 `json:"months"` Days int32 `json:"days"` Nanoseconds int64 `json:"nanoseconds"` } // MonthDayNanoIntervalType is encoded as two signed 32-bit integers representing // a number of months and a number of days, followed by a 64-bit integer representing // the number of nanoseconds since midnight for fractions of a day. type MonthDayNanoIntervalType struct{} func (*MonthDayNanoIntervalType) ID() Type { return INTERVAL_MONTH_DAY_NANO } func (*MonthDayNanoIntervalType) Name() string { return "month_day_nano_interval" } func (*MonthDayNanoIntervalType) String() string { return "month_day_nano_interval" } func (*MonthDayNanoIntervalType) Fingerprint() string { return typeIDFingerprint(INTERVAL_MONTH_DAY_NANO) + "N" } // BitWidth returns the number of bits required to store a single element of this data type in memory. func (*MonthDayNanoIntervalType) BitWidth() int { return 128 } func (*MonthDayNanoIntervalType) Bytes() int { return MonthDayNanoIntervalSizeBytes } func (MonthDayNanoIntervalType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(MonthDayNanoIntervalSizeBytes)}} } type TimestampConvertOp int8 const ( ConvDIVIDE = iota ConvMULTIPLY ) var timestampConversion = [...][4]struct { op TimestampConvertOp factor int64 }{ Nanosecond: { Nanosecond: {ConvMULTIPLY, int64(time.Nanosecond)}, Microsecond: {ConvDIVIDE, int64(time.Microsecond)}, Millisecond: {ConvDIVIDE, int64(time.Millisecond)}, Second: {ConvDIVIDE, int64(time.Second)}, }, Microsecond: { Nanosecond: {ConvMULTIPLY, int64(time.Microsecond)}, Microsecond: {ConvMULTIPLY, 1}, Millisecond: {ConvDIVIDE, int64(time.Millisecond / time.Microsecond)}, Second: {ConvDIVIDE, int64(time.Second / time.Microsecond)}, }, Millisecond: { Nanosecond: {ConvMULTIPLY, int64(time.Millisecond)}, Microsecond: {ConvMULTIPLY, int64(time.Millisecond / time.Microsecond)}, Millisecond: {ConvMULTIPLY, 1}, Second: {ConvDIVIDE, int64(time.Second / time.Millisecond)}, }, Second: { Nanosecond: {ConvMULTIPLY, int64(time.Second)}, Microsecond: {ConvMULTIPLY, int64(time.Second / time.Microsecond)}, Millisecond: {ConvMULTIPLY, int64(time.Second / time.Millisecond)}, Second: {ConvMULTIPLY, 1}, }, } func GetTimestampConvert(in, out TimeUnit) (op TimestampConvertOp, factor int64) { conv := timestampConversion[int(in)][int(out)] return conv.op, conv.factor } func ConvertTimestampValue(in, out TimeUnit, value int64) int64 { conv := timestampConversion[int(in)][int(out)] switch conv.op { case ConvMULTIPLY: return value * conv.factor case ConvDIVIDE: return value / conv.factor } return 0 } // DictionaryType represents categorical or dictionary-encoded in-memory data // It contains a dictionary-encoded value type (any type) and an index type // (any integer type). type DictionaryType struct { IndexType DataType ValueType DataType Ordered bool } func (*DictionaryType) ID() Type { return DICTIONARY } func (*DictionaryType) Name() string { return "dictionary" } func (d *DictionaryType) BitWidth() int { return d.IndexType.(FixedWidthDataType).BitWidth() } func (d *DictionaryType) Bytes() int { return d.IndexType.(FixedWidthDataType).Bytes() } func (d *DictionaryType) String() string { return fmt.Sprintf("%s<values=%s, indices=%s, ordered=%t>", d.Name(), d.ValueType, d.IndexType, d.Ordered) } func (d *DictionaryType) Fingerprint() string { indexFingerprint := d.IndexType.Fingerprint() valueFingerprint := d.ValueType.Fingerprint() ordered := "1" if !d.Ordered { ordered = "0" } if len(valueFingerprint) > 0 { return typeFingerprint(d) + indexFingerprint + valueFingerprint + ordered } return ordered } func (d *DictionaryType) Layout() DataTypeLayout { layout := d.IndexType.Layout() layout.HasDict = true return layout } var ( FixedWidthTypes = struct { Boolean FixedWidthDataType Date32 FixedWidthDataType Date64 FixedWidthDataType DayTimeInterval FixedWidthDataType Duration_s FixedWidthDataType Duration_ms FixedWidthDataType Duration_us FixedWidthDataType Duration_ns FixedWidthDataType Float16 FixedWidthDataType MonthInterval FixedWidthDataType Time32s FixedWidthDataType Time32ms FixedWidthDataType Time64us FixedWidthDataType Time64ns FixedWidthDataType Timestamp_s FixedWidthDataType Timestamp_ms FixedWidthDataType Timestamp_us FixedWidthDataType Timestamp_ns FixedWidthDataType MonthDayNanoInterval FixedWidthDataType }{ Boolean: &BooleanType{}, Date32: &Date32Type{}, Date64: &Date64Type{}, DayTimeInterval: &DayTimeIntervalType{}, Duration_s: &DurationType{Unit: Second}, Duration_ms: &DurationType{Unit: Millisecond}, Duration_us: &DurationType{Unit: Microsecond}, Duration_ns: &DurationType{Unit: Nanosecond}, Float16: &Float16Type{}, MonthInterval: &MonthIntervalType{}, Time32s: &Time32Type{Unit: Second}, Time32ms: &Time32Type{Unit: Millisecond}, Time64us: &Time64Type{Unit: Microsecond}, Time64ns: &Time64Type{Unit: Nanosecond}, Timestamp_s: &TimestampType{Unit: Second, TimeZone: "UTC"}, Timestamp_ms: &TimestampType{Unit: Millisecond, TimeZone: "UTC"}, Timestamp_us: &TimestampType{Unit: Microsecond, TimeZone: "UTC"}, Timestamp_ns: &TimestampType{Unit: Nanosecond, TimeZone: "UTC"}, MonthDayNanoInterval: &MonthDayNanoIntervalType{}, } _ FixedWidthDataType = (*FixedSizeBinaryType)(nil) )