sql_utils/public/functions/parse_date_time_utils.cc (125 lines of code) (raw):

/* * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "sql_utils/public/functions/parse_date_time_utils.h" #include <time.h> #include <limits> #include <string> #include "sql_utils/base/logging.h" #include "sql_utils/public/functions/date_time_util.h" #include "sql_utils/public/strings.h" #include "absl/time/time.h" namespace bigquery_ml_utils { namespace functions { namespace parse_date_time_utils { namespace { const char kDigits[] = "0123456789"; const int64_t powers_of_ten[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000}; } // namespace bool ConvertTimeToTimestamp(absl::Time time, int64_t* timestamp) { *timestamp = absl::ToUnixMicros(time); return IsValidTimestamp(*timestamp, kMicroseconds); } const char* ParseInt(const char* dp, const char* end_of_data, int max_width, int64_t min, int64_t max, int64_t* vp) { if (dp == nullptr || dp >= end_of_data || max_width <= 0) { return nullptr; } const int64_t kmin = std::numeric_limits<int64_t>::min(); bool neg = false; int64_t value = 0; if (*dp == '-') { neg = true; if (max_width <= 0 || --max_width != 0) { ++dp; } else { return nullptr; // <max_width> was 1. } } if (const char* const bp = dp) { const char* cp; while (dp < end_of_data && (cp = strchr(kDigits, *dp))) { int d = static_cast<int>(cp - kDigits); if (d < 0 || d >= 10) break; // Not a digit. if (ABSL_PREDICT_FALSE(value < kmin / 10)) { return nullptr; } value *= 10; if (ABSL_PREDICT_FALSE(value < kmin + d)) { return nullptr; } value -= d; dp += 1; if (max_width > 0 && --max_width == 0) break; } if (dp != bp && (neg || value != kmin)) { if (!neg || value != 0) { if (!neg) value = -value; // Make positive. if (min <= value && value <= max) { *vp = value; } else { return nullptr; } } else { return nullptr; } } else { return nullptr; } } return dp; } const char* ParseInt(const char* dp, const char* end_of_data, int max_width, int64_t min, int64_t max, int* vp) { int64_t int64_res; const char* res_dp = ParseInt(dp, end_of_data, max_width, min, max, &int64_res); if (res_dp == nullptr || int64_res < std::numeric_limits<int>::min() || int64_res > std::numeric_limits<int>::max()) { return nullptr; } *vp = static_cast<int>(int64_res); return res_dp; } const char* ParseSubSeconds(const char* dp, const char* end_of_data, int max_digits, TimestampScale scale, absl::Duration* subseconds) { if (dp != nullptr) { if (dp < end_of_data || scale != kSeconds) { int64_t parsed_value = 0; int64_t num_digits_parsed = 0; const char* const bp = dp; const char* cp; while (dp < end_of_data && (cp = strchr(kDigits, *dp)) && (max_digits == 0 || num_digits_parsed < max_digits)) { int d = static_cast<int>(cp - kDigits); if (d < 0 || d >= 10) break; // Not a digit. ++dp; ++num_digits_parsed; if (num_digits_parsed > scale) { // Consume but ignore digits beyond the given precision. continue; } parsed_value *= 10; parsed_value += d; } if (dp != bp) { if (num_digits_parsed < scale) { // We consumed less than precision digits, so widen parsed_value to // given precision. parsed_value *= powers_of_ten[scale - num_digits_parsed]; } if (scale == kMicroseconds) { *subseconds = absl::Microseconds(parsed_value); } else if (scale == kMilliseconds) { *subseconds = absl::Milliseconds(parsed_value); } else { // NANO precision. *subseconds = absl::Nanoseconds(parsed_value); } } else { dp = nullptr; } } else { dp = nullptr; } } return dp; } } // namespace parse_date_time_utils } // namespace functions } // namespace bigquery_ml_utils