def _parse()

in lib/dateutil/parser.py [0:0]
274 lines of code
38 McCabe index (conditional complexity)

    def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
               fuzzy_with_tokens=False):
        """
        Private method which performs the heavy lifting of parsing, called from
        ``parse()``, which passes on its ``kwargs`` to this function.

        :param timestr:
            The string to parse.

        :param dayfirst:
            Whether to interpret the first value in an ambiguous 3-integer date
            (e.g. 01/05/09) as the day (``True``) or month (``False``). If
            ``yearfirst`` is set to ``True``, this distinguishes between YDM
            and YMD. If set to ``None``, this value is retrieved from the
            current :class:`parserinfo` object (which itself defaults to
            ``False``).

        :param yearfirst:
            Whether to interpret the first value in an ambiguous 3-integer date
            (e.g. 01/05/09) as the year. If ``True``, the first number is taken
            to be the year, otherwise the last number is taken to be the year.
            If this is set to ``None``, the value is retrieved from the current
            :class:`parserinfo` object (which itself defaults to ``False``).

        :param fuzzy:
            Whether to allow fuzzy parsing, allowing for string like "Today is
            January 1, 2047 at 8:21:00AM".

        :param fuzzy_with_tokens:
            If ``True``, ``fuzzy`` is automatically set to True, and the parser
            will return a tuple where the first element is the parsed
            :class:`datetime.datetime` datetimestamp and the second element is
            a tuple containing the portions of the string which were ignored:

            .. doctest::

                >>> from dateutil.parser import parse
                >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
                (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))

        """
        if fuzzy_with_tokens:
            fuzzy = True

        info = self.info

        if dayfirst is None:
            dayfirst = info.dayfirst

        if yearfirst is None:
            yearfirst = info.yearfirst

        res = self._result()
        l = _timelex.split(timestr)         # Splits the timestr into tokens

        # keep up with the last token skipped so we can recombine
        # consecutively skipped tokens (-2 for when i begins at 0).
        last_skipped_token_i = -2
        skipped_tokens = list()

        try:
            # year/month/day list
            ymd = _ymd(timestr)

            # Index of the month string in ymd
            mstridx = -1

            len_l = len(l)
            i = 0
            while i < len_l:

                # Check if it's a number
                try:
                    value_repr = l[i]
                    value = float(value_repr)
                except ValueError:
                    value = None

                if value is not None:
                    # Token is a number
                    len_li = len(l[i])
                    i += 1

                    if (len(ymd) == 3 and len_li in (2, 4)
                        and res.hour is None and (i >= len_l or (l[i] != ':' and
                                                  info.hms(l[i]) is None))):
                        # 19990101T23[59]
                        s = l[i-1]
                        res.hour = int(s[:2])

                        if len_li == 4:
                            res.minute = int(s[2:])

                    elif len_li == 6 or (len_li > 6 and l[i-1].find('.') == 6):
                        # YYMMDD or HHMMSS[.ss]
                        s = l[i-1]

                        if not ymd and l[i-1].find('.') == -1:
                            #ymd.append(info.convertyear(int(s[:2])))

                            ymd.append(s[:2])
                            ymd.append(s[2:4])
                            ymd.append(s[4:])
                        else:
                            # 19990101T235959[.59]
                            res.hour = int(s[:2])
                            res.minute = int(s[2:4])
                            res.second, res.microsecond = _parsems(s[4:])

                    elif len_li in (8, 12, 14):
                        # YYYYMMDD
                        s = l[i-1]
                        ymd.append(s[:4])
                        ymd.append(s[4:6])
                        ymd.append(s[6:8])

                        if len_li > 8:
                            res.hour = int(s[8:10])
                            res.minute = int(s[10:12])

                            if len_li > 12:
                                res.second = int(s[12:])

                    elif ((i < len_l and info.hms(l[i]) is not None) or
                          (i+1 < len_l and l[i] == ' ' and
                           info.hms(l[i+1]) is not None)):

                        # HH[ ]h or MM[ ]m or SS[.ss][ ]s
                        if l[i] == ' ':
                            i += 1

                        idx = info.hms(l[i])

                        while True:
                            if idx == 0:
                                res.hour = int(value)

                                if value % 1:
                                    res.minute = int(60*(value % 1))

                            elif idx == 1:
                                res.minute = int(value)

                                if value % 1:
                                    res.second = int(60*(value % 1))

                            elif idx == 2:
                                res.second, res.microsecond = \
                                    _parsems(value_repr)

                            i += 1

                            if i >= len_l or idx == 2:
                                break

                            # 12h00
                            try:
                                value_repr = l[i]
                                value = float(value_repr)
                            except ValueError:
                                break
                            else:
                                i += 1
                                idx += 1

                                if i < len_l:
                                    newidx = info.hms(l[i])

                                    if newidx is not None:
                                        idx = newidx

                    elif (i == len_l and l[i-2] == ' ' and
                          info.hms(l[i-3]) is not None):
                        # X h MM or X m SS
                        idx = info.hms(l[i-3]) + 1

                        if idx == 1:
                            res.minute = int(value)

                            if value % 1:
                                res.second = int(60*(value % 1))
                            elif idx == 2:
                                res.second, res.microsecond = \
                                    _parsems(value_repr)
                                i += 1

                    elif i+1 < len_l and l[i] == ':':
                        # HH:MM[:SS[.ss]]
                        res.hour = int(value)
                        i += 1
                        value = float(l[i])
                        res.minute = int(value)

                        if value % 1:
                            res.second = int(60*(value % 1))

                        i += 1

                        if i < len_l and l[i] == ':':
                            res.second, res.microsecond = _parsems(l[i+1])
                            i += 2

                    elif i < len_l and l[i] in ('-', '/', '.'):
                        sep = l[i]
                        ymd.append(value_repr)
                        i += 1

                        if i < len_l and not info.jump(l[i]):
                            try:
                                # 01-01[-01]
                                ymd.append(l[i])
                            except ValueError:
                                # 01-Jan[-01]
                                value = info.month(l[i])

                                if value is not None:
                                    ymd.append(value)
                                    assert mstridx == -1
                                    mstridx = len(ymd)-1
                                else:
                                    return None, None

                            i += 1

                            if i < len_l and l[i] == sep:
                                # We have three members
                                i += 1
                                value = info.month(l[i])

                                if value is not None:
                                    ymd.append(value)
                                    mstridx = len(ymd)-1
                                    assert mstridx == -1
                                else:
                                    ymd.append(l[i])

                                i += 1
                    elif i >= len_l or info.jump(l[i]):
                        if i+1 < len_l and info.ampm(l[i+1]) is not None:
                            # 12 am
                            res.hour = int(value)

                            if res.hour < 12 and info.ampm(l[i+1]) == 1:
                                res.hour += 12
                            elif res.hour == 12 and info.ampm(l[i+1]) == 0:
                                res.hour = 0

                            i += 1
                        else:
                            # Year, month or day
                            ymd.append(value)
                        i += 1
                    elif info.ampm(l[i]) is not None:

                        # 12am
                        res.hour = int(value)

                        if res.hour < 12 and info.ampm(l[i]) == 1:
                            res.hour += 12
                        elif res.hour == 12 and info.ampm(l[i]) == 0:
                            res.hour = 0
                        i += 1

                    elif not fuzzy:
                        return None, None
                    else:
                        i += 1
                    continue

                # Check weekday
                value = info.weekday(l[i])
                if value is not None:
                    res.weekday = value
                    i += 1
                    continue

                # Check month name
                value = info.month(l[i])
                if value is not None:
                    ymd.append(value)
                    assert mstridx == -1
                    mstridx = len(ymd)-1

                    i += 1
                    if i < len_l:
                        if l[i] in ('-', '/'):
                            # Jan-01[-99]
                            sep = l[i]
                            i += 1
                            ymd.append(l[i])
                            i += 1

                            if i < len_l and l[i] == sep:
                                # Jan-01-99
                                i += 1
                                ymd.append(l[i])
                                i += 1

                        elif (i+3 < len_l and l[i] == l[i+2] == ' '
                              and info.pertain(l[i+1])):
                            # Jan of 01
                            # In this case, 01 is clearly year
                            try:
                                value = int(l[i+3])
                            except ValueError:
                                # Wrong guess
                                pass
                            else:
                                # Convert it here to become unambiguous
                                ymd.append(str(info.convertyear(value)))
                            i += 4
                    continue

                # Check am/pm
                value = info.ampm(l[i])
                if value is not None:
                    # For fuzzy parsing, 'a' or 'am' (both valid English words)
                    # may erroneously trigger the AM/PM flag. Deal with that
                    # here.
                    val_is_ampm = True

                    # If there's already an AM/PM flag, this one isn't one.
                    if fuzzy and res.ampm is not None:
                        val_is_ampm = False

                    # If AM/PM is found and hour is not, raise a ValueError
                    if res.hour is None:
                        if fuzzy:
                            val_is_ampm = False
                        else:
                            raise ValueError('No hour specified with ' +
                                             'AM or PM flag.')
                    elif not 0 <= res.hour <= 12:
                        # If AM/PM is found, it's a 12 hour clock, so raise
                        # an error for invalid range
                        if fuzzy:
                            val_is_ampm = False
                        else:
                            raise ValueError('Invalid hour specified for ' +
                                             '12-hour clock.')

                    if val_is_ampm:
                        if value == 1 and res.hour < 12:
                            res.hour += 12
                        elif value == 0 and res.hour == 12:
                            res.hour = 0

                        res.ampm = value

                    elif fuzzy:
                        last_skipped_token_i = self._skip_token(skipped_tokens,
                                                    last_skipped_token_i, i, l)
                    i += 1
                    continue

                # Check for a timezone name
                if (res.hour is not None and len(l[i]) <= 5 and
                        res.tzname is None and res.tzoffset is None and
                        not [x for x in l[i] if x not in
                             string.ascii_uppercase]):
                    res.tzname = l[i]
                    res.tzoffset = info.tzoffset(res.tzname)
                    i += 1

                    # Check for something like GMT+3, or BRST+3. Notice
                    # that it doesn't mean "I am 3 hours after GMT", but
                    # "my time +3 is GMT". If found, we reverse the
                    # logic so that timezone parsing code will get it
                    # right.
                    if i < len_l and l[i] in ('+', '-'):
                        l[i] = ('+', '-')[l[i] == '+']
                        res.tzoffset = None
                        if info.utczone(res.tzname):
                            # With something like GMT+3, the timezone
                            # is *not* GMT.
                            res.tzname = None

                    continue

                # Check for a numbered timezone
                if res.hour is not None and l[i] in ('+', '-'):
                    signal = (-1, 1)[l[i] == '+']
                    i += 1
                    len_li = len(l[i])

                    if len_li == 4:
                        # -0300
                        res.tzoffset = int(l[i][:2])*3600+int(l[i][2:])*60
                    elif i+1 < len_l and l[i+1] == ':':
                        # -03:00
                        res.tzoffset = int(l[i])*3600+int(l[i+2])*60
                        i += 2
                    elif len_li <= 2:
                        # -[0]3
                        res.tzoffset = int(l[i][:2])*3600
                    else:
                        return None, None
                    i += 1

                    res.tzoffset *= signal

                    # Look for a timezone name between parenthesis
                    if (i+3 < len_l and
                        info.jump(l[i]) and l[i+1] == '(' and l[i+3] == ')' and
                        3 <= len(l[i+2]) <= 5 and
                        not [x for x in l[i+2]
                             if x not in string.ascii_uppercase]):
                        # -0300 (BRST)
                        res.tzname = l[i+2]
                        i += 4
                    continue

                # Check jumps
                if not (info.jump(l[i]) or fuzzy):
                    return None, None

                last_skipped_token_i = self._skip_token(skipped_tokens,
                                                last_skipped_token_i, i, l)
                i += 1

            # Process year/month/day
            year, month, day = ymd.resolve_ymd(mstridx, yearfirst, dayfirst)
            if year is not None:
                res.year = year
                res.century_specified = ymd.century_specified

            if month is not None:
                res.month = month

            if day is not None:
                res.day = day

        except (IndexError, ValueError, AssertionError):
            return None, None

        if not info.validate(res):
            return None, None

        if fuzzy_with_tokens:
            return res, tuple(skipped_tokens)
        else:
            return res, None