int parse_iso_8601_datetime()

in amplify/backend/function/iamxawswrangler/lib/python/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c [69:536]


int parse_iso_8601_datetime(const char *str, int len, int want_exc,
                            npy_datetimestruct *out,
                            int *out_local, int *out_tzoffset) {
    int year_leap = 0;
    int i, numdigits;
    const char *substr;
    int sublen;

    /* If year-month-day are separated by a valid separator,
     * months/days without leading zeroes will be parsed
     * (though not iso8601). If the components aren't separated,
     * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are
     * forbidden here (but parsed as YYMMDD elsewhere).
    */
    int has_ymd_sep = 0;
    char ymd_sep = '\0';
    char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '};
    int valid_ymd_sep_len = sizeof(valid_ymd_sep);

    /* hour-minute-second may or may not separated by ':'. If not, then
     * each component must be 2 digits. */
    int has_hms_sep = 0;
    int hour_was_2_digits = 0;

    /* Initialize the output to all zeros */
    memset(out, 0, sizeof(npy_datetimestruct));
    out->month = 1;
    out->day = 1;

    substr = str;
    sublen = len;

    /* Skip leading whitespace */
    while (sublen > 0 && isspace(*substr)) {
        ++substr;
        --sublen;
    }

    /* Leading '-' sign for negative year */
    if (*substr == '-') {
        ++substr;
        --sublen;
    }

    if (sublen == 0) {
        goto parse_error;
    }

    /* PARSE THE YEAR (4 digits) */
    out->year = 0;
    if (sublen >= 4 && isdigit(substr[0]) && isdigit(substr[1]) &&
        isdigit(substr[2]) && isdigit(substr[3])) {
        out->year = 1000 * (substr[0] - '0') + 100 * (substr[1] - '0') +
                    10 * (substr[2] - '0') + (substr[3] - '0');

        substr += 4;
        sublen -= 4;
    }

    /* Negate the year if necessary */
    if (str[0] == '-') {
        out->year = -out->year;
    }
    /* Check whether it's a leap-year */
    year_leap = is_leapyear(out->year);

    /* Next character must be a separator, start of month, or end of string */
    if (sublen == 0) {
        if (out_local != NULL) {
            *out_local = 0;
        }
        goto finish;
    }

    if (!isdigit(*substr)) {
        for (i = 0; i < valid_ymd_sep_len; ++i) {
            if (*substr == valid_ymd_sep[i]) {
                break;
            }
        }
        if (i == valid_ymd_sep_len) {
            goto parse_error;
        }
        has_ymd_sep = 1;
        ymd_sep = valid_ymd_sep[i];
        ++substr;
        --sublen;
        /* Cannot have trailing separator */
        if (sublen == 0 || !isdigit(*substr)) {
            goto parse_error;
        }
    }

    /* PARSE THE MONTH */
    /* First digit required */
    out->month = (*substr - '0');
    ++substr;
    --sublen;
    /* Second digit optional if there was a separator */
    if (isdigit(*substr)) {
        out->month = 10 * out->month + (*substr - '0');
        ++substr;
        --sublen;
    } else if (!has_ymd_sep) {
        goto parse_error;
    }
    if (out->month < 1 || out->month > 12) {
        if (want_exc) {
            PyErr_Format(PyExc_ValueError,
                        "Month out of range in datetime string \"%s\"", str);
        }
        goto error;
    }

    /* Next character must be the separator, start of day, or end of string */
    if (sublen == 0) {
        /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */
        if (!has_ymd_sep) {
            goto parse_error;
        }
        if (out_local != NULL) {
            *out_local = 0;
        }
        goto finish;
    }

    if (has_ymd_sep) {
        /* Must have separator, but cannot be trailing */
        if (*substr != ymd_sep || sublen == 1) {
            goto parse_error;
        }
        ++substr;
        --sublen;
    }

    /* PARSE THE DAY */
    /* First digit required */
    if (!isdigit(*substr)) {
        goto parse_error;
    }
    out->day = (*substr - '0');
    ++substr;
    --sublen;
    /* Second digit optional if there was a separator */
    if (isdigit(*substr)) {
        out->day = 10 * out->day + (*substr - '0');
        ++substr;
        --sublen;
    } else if (!has_ymd_sep) {
        goto parse_error;
    }
    if (out->day < 1 ||
        out->day > days_per_month_table[year_leap][out->month - 1]) {
        if (want_exc) {
            PyErr_Format(PyExc_ValueError,
                        "Day out of range in datetime string \"%s\"", str);
        }
        goto error;
    }

    /* Next character must be a 'T', ' ', or end of string */
    if (sublen == 0) {
        if (out_local != NULL) {
            *out_local = 0;
        }
        goto finish;
    }

    if ((*substr != 'T' && *substr != ' ') || sublen == 1) {
        goto parse_error;
    }
    ++substr;
    --sublen;

    /* PARSE THE HOURS */
    /* First digit required */
    if (!isdigit(*substr)) {
        goto parse_error;
    }
    out->hour = (*substr - '0');
    ++substr;
    --sublen;
    /* Second digit optional */
    if (isdigit(*substr)) {
        hour_was_2_digits = 1;
        out->hour = 10 * out->hour + (*substr - '0');
        ++substr;
        --sublen;
        if (out->hour >= 24) {
            if (want_exc) {
                PyErr_Format(PyExc_ValueError,
                             "Hours out of range in datetime string \"%s\"",
                             str);
            }
            goto error;
        }
    }

    /* Next character must be a ':' or the end of the string */
    if (sublen == 0) {
        if (!hour_was_2_digits) {
            goto parse_error;
        }
        goto finish;
    }

    if (*substr == ':') {
        has_hms_sep = 1;
        ++substr;
        --sublen;
        /* Cannot have a trailing separator */
        if (sublen == 0 || !isdigit(*substr)) {
            goto parse_error;
        }
    } else if (!isdigit(*substr)) {
        if (!hour_was_2_digits) {
            goto parse_error;
        }
        goto parse_timezone;
    }

    /* PARSE THE MINUTES */
    /* First digit required */
    out->min = (*substr - '0');
    ++substr;
    --sublen;
    /* Second digit optional if there was a separator */
    if (isdigit(*substr)) {
        out->min = 10 * out->min + (*substr - '0');
        ++substr;
        --sublen;
        if (out->min >= 60) {
            if (want_exc) {
                PyErr_Format(PyExc_ValueError,
                             "Minutes out of range in datetime string \"%s\"",
                             str);
            }
            goto error;
        }
    } else if (!has_hms_sep) {
        goto parse_error;
    }

    if (sublen == 0) {
        goto finish;
    }

    /* If we make it through this condition block, then the next
     * character is a digit. */
    if (has_hms_sep && *substr == ':') {
        ++substr;
        --sublen;
        /* Cannot have a trailing ':' */
        if (sublen == 0 || !isdigit(*substr)) {
            goto parse_error;
        }
    } else if (!has_hms_sep && isdigit(*substr)) {
    } else {
        goto parse_timezone;
    }

    /* PARSE THE SECONDS */
    /* First digit required */
    out->sec = (*substr - '0');
    ++substr;
    --sublen;
    /* Second digit optional if there was a separator */
    if (isdigit(*substr)) {
        out->sec = 10 * out->sec + (*substr - '0');
        ++substr;
        --sublen;
        if (out->sec >= 60) {
            if (want_exc) {
                PyErr_Format(PyExc_ValueError,
                             "Seconds out of range in datetime string \"%s\"",
                             str);
            }
            goto error;
        }
    } else if (!has_hms_sep) {
        goto parse_error;
    }

    /* Next character may be a '.' indicating fractional seconds */
    if (sublen > 0 && *substr == '.') {
        ++substr;
        --sublen;
    } else {
        goto parse_timezone;
    }

    /* PARSE THE MICROSECONDS (0 to 6 digits) */
    numdigits = 0;
    for (i = 0; i < 6; ++i) {
        out->us *= 10;
        if (sublen > 0 && isdigit(*substr)) {
            out->us += (*substr - '0');
            ++substr;
            --sublen;
            ++numdigits;
        }
    }

    if (sublen == 0 || !isdigit(*substr)) {
        goto parse_timezone;
    }

    /* PARSE THE PICOSECONDS (0 to 6 digits) */
    numdigits = 0;
    for (i = 0; i < 6; ++i) {
        out->ps *= 10;
        if (sublen > 0 && isdigit(*substr)) {
            out->ps += (*substr - '0');
            ++substr;
            --sublen;
            ++numdigits;
        }
    }

    if (sublen == 0 || !isdigit(*substr)) {
        goto parse_timezone;
    }

    /* PARSE THE ATTOSECONDS (0 to 6 digits) */
    numdigits = 0;
    for (i = 0; i < 6; ++i) {
        out->as *= 10;
        if (sublen > 0 && isdigit(*substr)) {
            out->as += (*substr - '0');
            ++substr;
            --sublen;
            ++numdigits;
        }
    }

parse_timezone:
    /* trim any whitepsace between time/timeezone */
    while (sublen > 0 && isspace(*substr)) {
        ++substr;
        --sublen;
    }

    if (sublen == 0) {
        // Unlike NumPy, treating no time zone as naive
        goto finish;
    }

    /* UTC specifier */
    if (*substr == 'Z') {
        /* "Z" should be equivalent to tz offset "+00:00" */
        if (out_local != NULL) {
            *out_local = 1;
        }

        if (out_tzoffset != NULL) {
            *out_tzoffset = 0;
        }

        if (sublen == 1) {
            goto finish;
        } else {
            ++substr;
            --sublen;
        }
    } else if (*substr == '-' || *substr == '+') {
        /* Time zone offset */
        int offset_neg = 0, offset_hour = 0, offset_minute = 0;

        /*
         * Since "local" means local with respect to the current
         * machine, we say this is non-local.
         */

        if (*substr == '-') {
            offset_neg = 1;
        }
        ++substr;
        --sublen;

        /* The hours offset */
        if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
            offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0');
            substr += 2;
            sublen -= 2;
            if (offset_hour >= 24) {
                if (want_exc) {
                    PyErr_Format(PyExc_ValueError,
                                "Timezone hours offset out of range "
                                "in datetime string \"%s\"",
                                str);
                }
                goto error;
            }
        } else if (sublen >= 1 && isdigit(substr[0])) {
            offset_hour = substr[0] - '0';
            ++substr;
            --sublen;
        } else {
            goto parse_error;
        }

        /* The minutes offset is optional */
        if (sublen > 0) {
            /* Optional ':' */
            if (*substr == ':') {
                ++substr;
                --sublen;
            }

            /* The minutes offset (at the end of the string) */
            if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
                offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0');
                substr += 2;
                sublen -= 2;
                if (offset_minute >= 60) {
                    if (want_exc) {
                        PyErr_Format(PyExc_ValueError,
                                    "Timezone minutes offset out of range "
                                    "in datetime string \"%s\"",
                                    str);
                    }
                    goto error;
                }
            } else if (sublen >= 1 && isdigit(substr[0])) {
                offset_minute = substr[0] - '0';
                ++substr;
                --sublen;
            } else {
                goto parse_error;
            }
        }

        /* Apply the time zone offset */
        if (offset_neg) {
            offset_hour = -offset_hour;
            offset_minute = -offset_minute;
        }
        if (out_local != NULL) {
            *out_local = 1;
            // Unlike NumPy, do not change internal value to local time
            *out_tzoffset = 60 * offset_hour + offset_minute;
        }
    }

    /* Skip trailing whitespace */
    while (sublen > 0 && isspace(*substr)) {
        ++substr;
        --sublen;
    }

    if (sublen != 0) {
        goto parse_error;
    }

finish:
    return 0;

parse_error:
    if (want_exc) {
        PyErr_Format(PyExc_ValueError,
                    "Error parsing datetime string \"%s\" at position %d", str,
                    (int)(substr - str));
    }
    return -1;

error:
    return -1;
}