def array_strptime()

in amplify/backend/function/iamxawswrangler/lib/python/pandas/_libs/tslibs/strptime.pyx [0:0]


def array_strptime(ndarray[object] values, object fmt, bint exact=True, errors='raise'):
    """
    Calculates the datetime structs represented by the passed array of strings

    Parameters
    ----------
    values : ndarray of string-like objects
    fmt : string-like regex
    exact : matches must be exact if True, search if False
    errors : string specifying error handling, {'raise', 'ignore', 'coerce'}
    """

    cdef:
        Py_ssize_t i, n = len(values)
        npy_datetimestruct dts
        int64_t[:] iresult
        object[:] result_timezone
        int year, month, day, minute, hour, second, weekday, julian
        int week_of_year, week_of_year_start, parse_code, ordinal
        int iso_week, iso_year
        int64_t us, ns
        object val, group_key, ampm, found, timezone
        dict found_key
        bint is_raise = errors=='raise'
        bint is_ignore = errors=='ignore'
        bint is_coerce = errors=='coerce'

    assert is_raise or is_ignore or is_coerce

    if fmt is not None:
        if '%W' in fmt or '%U' in fmt:
            if '%Y' not in fmt and '%y' not in fmt:
                raise ValueError("Cannot use '%W' or '%U' without day and year")
            if '%A' not in fmt and '%a' not in fmt and '%w' not in fmt:
                raise ValueError("Cannot use '%W' or '%U' without day and year")
        elif '%Z' in fmt and '%z' in fmt:
            raise ValueError("Cannot parse both %Z and %z")

    global _TimeRE_cache, _regex_cache
    with _cache_lock:
        if _getlang() != _TimeRE_cache.locale_time.lang:
            _TimeRE_cache = TimeRE()
            _regex_cache.clear()
        if len(_regex_cache) > _CACHE_MAX_SIZE:
            _regex_cache.clear()
        locale_time = _TimeRE_cache.locale_time
        format_regex = _regex_cache.get(fmt)
        if not format_regex:
            try:
                format_regex = _TimeRE_cache.compile(fmt)
            # KeyError raised when a bad format is found; can be specified as
            # \\, in which case it was a stray % but with a space after it
            except KeyError, err:
                bad_directive = err.args[0]
                if bad_directive == "\\":
                    bad_directive = "%"
                del err
                raise ValueError(f"'{bad_directive}' is a bad directive "
                                 f"in format '{fmt}'")
            # IndexError only occurs when the format string is "%"
            except IndexError:
                raise ValueError(f"stray % in format '{fmt}'")
            _regex_cache[fmt] = format_regex

    result = np.empty(n, dtype='M8[ns]')
    iresult = result.view('i8')
    result_timezone = np.empty(n, dtype='object')

    dts.us = dts.ps = dts.as = 0

    for i in range(n):
        val = values[i]
        if isinstance(val, str):
            if val in nat_strings:
                iresult[i] = NPY_NAT
                continue
        else:
            if checknull_with_nat(val):
                iresult[i] = NPY_NAT
                continue
            else:
                val = str(val)

        # exact matching
        if exact:
            found = format_regex.match(val)
            if not found:
                if is_coerce:
                    iresult[i] = NPY_NAT
                    continue
                raise ValueError(f"time data '{val}' does not match "
                                 f"format '{fmt}' (match)")
            if len(val) != found.end():
                if is_coerce:
                    iresult[i] = NPY_NAT
                    continue
                raise ValueError(f"unconverted data remains: {val[found.end():]}")

        # search
        else:
            found = format_regex.search(val)
            if not found:
                if is_coerce:
                    iresult[i] = NPY_NAT
                    continue
                raise ValueError(f"time data {repr(val)} does not match format "
                                 f"{repr(fmt)} (search)")

        iso_year = -1
        year = 1900
        month = day = 1
        hour = minute = second = ns = us = 0
        timezone = None
        # Default to -1 to signify that values not known; not critical to have,
        # though
        iso_week = week_of_year = -1
        week_of_year_start = -1
        # weekday and julian defaulted to -1 so as to signal need to calculate
        # values
        weekday = julian = -1
        found_dict = found.groupdict()
        for group_key in found_dict.iterkeys():
            # Directives not explicitly handled below:
            #   c, x, X
            #      handled by making out of other directives
            #   U, W
            #      worthless without day of the week
            parse_code = _parse_code_table[group_key]

            if parse_code == 0:
                year = int(found_dict['y'])
                # Open Group specification for strptime() states that a %y
                # value in the range of [00, 68] is in the century 2000, while
                # [69,99] is in the century 1900
                if year <= 68:
                    year += 2000
                else:
                    year += 1900
            elif parse_code == 1:
                year = int(found_dict['Y'])
            elif parse_code == 2:
                month = int(found_dict['m'])
            elif parse_code == 3:
            # elif group_key == 'B':
                month = locale_time.f_month.index(found_dict['B'].lower())
            elif parse_code == 4:
            # elif group_key == 'b':
                month = locale_time.a_month.index(found_dict['b'].lower())
            elif parse_code == 5:
            # elif group_key == 'd':
                day = int(found_dict['d'])
            elif parse_code == 6:
            # elif group_key == 'H':
                hour = int(found_dict['H'])
            elif parse_code == 7:
                hour = int(found_dict['I'])
                ampm = found_dict.get('p', '').lower()
                # If there was no AM/PM indicator, we'll treat this like AM
                if ampm in ('', locale_time.am_pm[0]):
                    # We're in AM so the hour is correct unless we're
                    # looking at 12 midnight.
                    # 12 midnight == 12 AM == hour 0
                    if hour == 12:
                        hour = 0
                elif ampm == locale_time.am_pm[1]:
                    # We're in PM so we need to add 12 to the hour unless
                    # we're looking at 12 noon.
                    # 12 noon == 12 PM == hour 12
                    if hour != 12:
                        hour += 12
            elif parse_code == 8:
                minute = int(found_dict['M'])
            elif parse_code == 9:
                second = int(found_dict['S'])
            elif parse_code == 10:
                s = found_dict['f']
                # Pad to always return nanoseconds
                s += "0" * (9 - len(s))
                us = long(s)
                ns = us % 1000
                us = us // 1000
            elif parse_code == 11:
                weekday = locale_time.f_weekday.index(found_dict['A'].lower())
            elif parse_code == 12:
                weekday = locale_time.a_weekday.index(found_dict['a'].lower())
            elif parse_code == 13:
                weekday = int(found_dict['w'])
                if weekday == 0:
                    weekday = 6
                else:
                    weekday -= 1
            elif parse_code == 14:
                julian = int(found_dict['j'])
            elif parse_code == 15 or parse_code == 16:
                week_of_year = int(found_dict[group_key])
                if group_key == 'U':
                    # U starts week on Sunday.
                    week_of_year_start = 6
                else:
                    # W starts week on Monday.
                    week_of_year_start = 0
            elif parse_code == 17:
                timezone = pytz.timezone(found_dict['Z'])
            elif parse_code == 19:
                timezone = parse_timezone_directive(found_dict['z'])
            elif parse_code == 20:
                iso_year = int(found_dict['G'])
            elif parse_code == 21:
                iso_week = int(found_dict['V'])
            elif parse_code == 22:
                weekday = int(found_dict['u'])
                weekday -= 1

        # don't assume default values for ISO week/year
        if iso_year != -1:
            if iso_week == -1 or weekday == -1:
                raise ValueError("ISO year directive '%G' must be used with "
                                 "the ISO week directive '%V' and a weekday "
                                 "directive '%A', '%a', '%w', or '%u'.")
            if julian != -1:
                raise ValueError("Day of the year directive '%j' is not "
                                 "compatible with ISO year directive '%G'. "
                                 "Use '%Y' instead.")
        elif year != -1 and week_of_year == -1 and iso_week != -1:
            if weekday == -1:
                raise ValueError("ISO week directive '%V' must be used with "
                                 "the ISO year directive '%G' and a weekday "
                                 "directive '%A', '%a', '%w', or '%u'.")
            else:
                raise ValueError("ISO week directive '%V' is incompatible with "
                                 "the year directive '%Y'. Use the ISO year "
                                 "'%G' instead.")

        # If we know the wk of the year and what day of that wk, we can figure
        # out the Julian day of the year.
        if julian == -1 and weekday != -1:
            if week_of_year != -1:
                week_starts_Mon = week_of_year_start == 0
                julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
                                                  week_starts_Mon)
            elif iso_year != -1 and iso_week != -1:
                year, julian = _calc_julian_from_V(iso_year, iso_week,
                                                   weekday + 1)
        # Cannot pre-calculate date() since can change in Julian
        # calculation and thus could have different value for the day of the wk
        # calculation.
        try:
            if julian == -1:
                # Need to add 1 to result since first day of the year is 1, not
                # 0.
                ordinal = date(year, month, day).toordinal()
                julian = ordinal - date(year, 1, 1).toordinal() + 1
            else:
                # Assume that if they bothered to include Julian day it will
                # be accurate.
                datetime_result = date.fromordinal(
                    (julian - 1) + date(year, 1, 1).toordinal())
                year = datetime_result.year
                month = datetime_result.month
                day = datetime_result.day
        except ValueError:
            if is_coerce:
                iresult[i] = NPY_NAT
                continue
            raise
        if weekday == -1:
            weekday = date(year, month, day).weekday()

        dts.year = year
        dts.month = month
        dts.day = day
        dts.hour = hour
        dts.min = minute
        dts.sec = second
        dts.us = us
        dts.ps = ns * 1000

        iresult[i] = dtstruct_to_dt64(&dts)
        try:
            check_dts_bounds(&dts)
        except ValueError:
            if is_coerce:
                iresult[i] = NPY_NAT
                continue
            raise

        result_timezone[i] = timezone

    return result, result_timezone.base