bool operator()

in flex/utils/arrow_utils.h [34:206]


  bool operator()(const char* s, size_t length, arrow::TimeUnit::type out_unit,
                  int64_t* out,
                  bool* out_zone_offset_present = NULLPTR) const override {
    using seconds_type = std::chrono::duration<arrow::TimestampType::c_type>;

    // We allow the following zone offset formats:
    // - (none)
    // - Z
    // - [+-]HH(:?MM)?
    //
    // We allow the following formats for all units:
    // - "YYYY-MM-DD"
    // - "YYYY-MM-DD[ T]hhZ?"
    // - "YYYY-MM-DD[ T]hh:mmZ?"
    // - "YYYY-MM-DD[ T]hh:mm:ssZ?"
    //
    // We allow the following formats for unit == MILLI, MICRO, or NANO:
    // - "YYYY-MM-DD[ T]hh:mm:ss.s{1,3}Z?"
    //
    // We allow the following formats for unit == MICRO, or NANO:
    // - "YYYY-MM-DD[ T]hh:mm:ss.s{4,6}Z?"
    //
    // We allow the following formats for unit == NANO:
    // - "YYYY-MM-DD[ T]hh:mm:ss.s{7,9}Z?"
    //
    // UTC is always assumed, and the DataType's timezone is ignored.
    //

    if (ARROW_PREDICT_FALSE(length < 10))
      return false;

    seconds_type seconds_since_epoch;
#if defined(ARROW_VERSION) && ARROW_VERSION < 15000000
    if (ARROW_PREDICT_FALSE(!arrow::internal::detail::ParseYYYY_MM_DD(
            s, &seconds_since_epoch))) {
#else
    if (ARROW_PREDICT_FALSE(
            !arrow::internal::ParseYYYY_MM_DD(s, &seconds_since_epoch))) {
#endif
      return false;
    }

    if (length == 10) {
      *out =
          arrow::util::CastSecondsToUnit(out_unit, seconds_since_epoch.count());
      return true;
    }

    if (ARROW_PREDICT_FALSE(s[10] != ' ') &&
        ARROW_PREDICT_FALSE(s[10] != 'T')) {
      return false;
    }

    // In the implementation of arrow ISO8601 timestamp parser, the zone offset
    // is set to true if the input string contains a zone offset. However, we
    // parse the zone offset here but don't set the boolean flag.
    // https://github.com/apache/arrow/blob/3e7ae5340a123c1040f98f1c36687b81362fab52/cpp/src/arrow/csv/converter.cc#L373
    // The reason is that, if we want the zone offset to be set, we need to
    // to declare the zone offset in the schema and construct TimeStampType with
    // that offset. However, we just want to parse the timestamp string and
    // convert it to a timestamp value, we have no assumption of the local time
    // zone, and we don't require the zone offset to be set in the schema.
    // Same for following commented code.
    //-------------------------------------------------------------------------
    // if (out_zone_offset_present) {
    //   *out_zone_offset_present = false;
    // }
    //-------------------------------------------------------------------------

    seconds_type zone_offset(0);
    if (s[length - 1] == 'Z') {
      --length;
      // if (out_zone_offset_present)
      //   *out_zone_offset_present = true;
    } else if (s[length - 3] == '+' || s[length - 3] == '-') {
      // [+-]HH
      length -= 3;
      if (ARROW_PREDICT_FALSE(!arrow::internal::detail::ParseHH(
              s + length + 1, &zone_offset))) {
        return false;
      }
      if (s[length] == '+')
        zone_offset *= -1;
      // if (out_zone_offset_present)
      //   *out_zone_offset_present = true;
    } else if (s[length - 5] == '+' || s[length - 5] == '-') {
      // [+-]HHMM
      length -= 5;
      if (ARROW_PREDICT_FALSE(!arrow::internal::detail::ParseHHMM(
              s + length + 1, &zone_offset))) {
        return false;
      }
      if (s[length] == '+')
        zone_offset *= -1;
      // if (out_zone_offset_present)
      //   *out_zone_offset_present = true;
    } else if ((s[length - 6] == '+' || s[length - 6] == '-') &&
               (s[length - 3] == ':')) {
      // [+-]HH:MM
      length -= 6;
      if (ARROW_PREDICT_FALSE(!arrow::internal::detail::ParseHH_MM(
              s + length + 1, &zone_offset))) {
        return false;
      }
      if (s[length] == '+')
        zone_offset *= -1;
      // if (out_zone_offset_present)
      //   *out_zone_offset_present = true;
    }

    seconds_type seconds_since_midnight;
    switch (length) {
    case 13:  // YYYY-MM-DD[ T]hh
      if (ARROW_PREDICT_FALSE(!arrow::internal::detail::ParseHH(
              s + 11, &seconds_since_midnight))) {
        return false;
      }
      break;
    case 16:  // YYYY-MM-DD[ T]hh:mm
      if (ARROW_PREDICT_FALSE(!arrow::internal::detail::ParseHH_MM(
              s + 11, &seconds_since_midnight))) {
        return false;
      }
      break;
    case 19:  // YYYY-MM-DD[ T]hh:mm:ss
    case 21:  // YYYY-MM-DD[ T]hh:mm:ss.s
    case 22:  // YYYY-MM-DD[ T]hh:mm:ss.ss
    case 23:  // YYYY-MM-DD[ T]hh:mm:ss.sss
    case 24:  // YYYY-MM-DD[ T]hh:mm:ss.ssss
    case 25:  // YYYY-MM-DD[ T]hh:mm:ss.sssss
    case 26:  // YYYY-MM-DD[ T]hh:mm:ss.ssssss
    case 27:  // YYYY-MM-DD[ T]hh:mm:ss.sssssss
    case 28:  // YYYY-MM-DD[ T]hh:mm:ss.ssssssss
    case 29:  // YYYY-MM-DD[ T]hh:mm:ss.sssssssss
      if (ARROW_PREDICT_FALSE(!arrow::internal::detail::ParseHH_MM_SS(
              s + 11, &seconds_since_midnight))) {
        return false;
      }
      break;
    default:
      LOG(ERROR) << "unsupported length: " << length;
      return false;
    }

    seconds_since_epoch += seconds_since_midnight;
    seconds_since_epoch += zone_offset;

    if (length <= 19) {
      *out =
          arrow::util::CastSecondsToUnit(out_unit, seconds_since_epoch.count());
      return true;
    }

    if (ARROW_PREDICT_FALSE(s[19] != '.')) {
      return false;
    }

    uint32_t subseconds = 0;
    if (ARROW_PREDICT_FALSE(!arrow::internal::detail::ParseSubSeconds(
            s + 20, length - 20, out_unit, &subseconds))) {
      return false;
    }

    *out =
        arrow::util::CastSecondsToUnit(out_unit, seconds_since_epoch.count()) +
        subseconds;
    return true;
  }

  const char* kind() const override { return "LDBC timestamp parser"; }

  const char* format() const override { return "EmptyFormat"; }
};