def _clean_options()

in source/predictive_maintenance/pandas/io/parsers.py [0:0]
124 lines of code
48 McCabe index (conditional complexity)

    def _clean_options(self, options, engine):
        result = options.copy()

        engine_specified = self._engine_specified
        fallback_reason = None

        sep = options['delimiter']
        delim_whitespace = options['delim_whitespace']

        # C engine not supported yet
        if engine == 'c':
            if options['skipfooter'] > 0:
                fallback_reason = ("the 'c' engine does not support"
                                   " skipfooter")
                engine = 'python'

        encoding = sys.getfilesystemencoding() or 'utf-8'
        if sep is None and not delim_whitespace:
            if engine == 'c':
                fallback_reason = ("the 'c' engine does not support"
                                   " sep=None with delim_whitespace=False")
                engine = 'python'
        elif sep is not None and len(sep) > 1:
            if engine == 'c' and sep == r'\s+':
                result['delim_whitespace'] = True
                del result['delimiter']
            elif engine not in ('python', 'python-fwf'):
                # wait until regex engine integrated
                fallback_reason = ("the 'c' engine does not support"
                                   " regex separators (separators > 1 char and"
                                   r" different from '\s+' are"
                                   " interpreted as regex)")
                engine = 'python'
        elif delim_whitespace:
            if 'python' in engine:
                result['delimiter'] = r'\s+'
        elif sep is not None:
            encodeable = True
            try:
                if len(sep.encode(encoding)) > 1:
                    encodeable = False
            except UnicodeDecodeError:
                encodeable = False
            if not encodeable and engine not in ('python', 'python-fwf'):
                fallback_reason = ("the separator encoded in {encoding}"
                                   " is > 1 char long, and the 'c' engine"
                                   " does not support such separators"
                                   .format(encoding=encoding))
                engine = 'python'

        quotechar = options['quotechar']
        if (quotechar is not None and
                isinstance(quotechar, (str, compat.text_type, bytes))):
            if (len(quotechar) == 1 and ord(quotechar) > 127 and
                    engine not in ('python', 'python-fwf')):
                fallback_reason = ("ord(quotechar) > 127, meaning the "
                                   "quotechar is larger than one byte, "
                                   "and the 'c' engine does not support "
                                   "such quotechars")
                engine = 'python'

        if fallback_reason and engine_specified:
            raise ValueError(fallback_reason)

        if engine == 'c':
            for arg in _c_unsupported:
                del result[arg]

        if 'python' in engine:
            for arg in _python_unsupported:
                if fallback_reason and result[arg] != _c_parser_defaults[arg]:
                    msg = ("Falling back to the 'python' engine because"
                           " {reason}, but this causes {option!r} to be"
                           " ignored as it is not supported by the 'python'"
                           " engine.").format(reason=fallback_reason,
                                              option=arg)
                    raise ValueError(msg)
                del result[arg]

        if fallback_reason:
            warnings.warn(("Falling back to the 'python' engine because"
                           " {0}; you can avoid this warning by specifying"
                           " engine='python'.").format(fallback_reason),
                          ParserWarning, stacklevel=5)

        index_col = options['index_col']
        names = options['names']
        converters = options['converters']
        na_values = options['na_values']
        skiprows = options['skiprows']

        _validate_header_arg(options['header'])

        depr_warning = ''

        for arg in _deprecated_args:
            parser_default = _c_parser_defaults[arg]
            depr_default = _deprecated_defaults[arg]

            msg = ("The '{arg}' argument has been deprecated "
                   "and will be removed in a future version."
                   .format(arg=arg))

            if arg == 'tupleize_cols':
                msg += (' Column tuples will then '
                        'always be converted to MultiIndex.')

            if result.get(arg, depr_default) != depr_default:
                # raise Exception(result.get(arg, depr_default), depr_default)
                depr_warning += msg + '\n\n'
            else:
                result[arg] = parser_default

        if depr_warning != '':
            warnings.warn(depr_warning, FutureWarning, stacklevel=2)

        if index_col is True:
            raise ValueError("The value of index_col couldn't be 'True'")
        if _is_index_col(index_col):
            if not isinstance(index_col, (list, tuple, np.ndarray)):
                index_col = [index_col]
        result['index_col'] = index_col

        names = list(names) if names is not None else names

        # type conversion-related
        if converters is not None:
            if not isinstance(converters, dict):
                raise TypeError('Type converters must be a dict or'
                                ' subclass, input was '
                                'a {0!r}'.format(type(converters).__name__))
        else:
            converters = {}

        # Converting values to NA
        keep_default_na = options['keep_default_na']
        na_values, na_fvalues = _clean_na_values(na_values, keep_default_na)

        # handle skiprows; this is internally handled by the
        # c-engine, so only need for python parsers
        if engine != 'c':
            if is_integer(skiprows):
                skiprows = lrange(skiprows)
            if skiprows is None:
                skiprows = set()
            elif not callable(skiprows):
                skiprows = set(skiprows)

        # put stuff back
        result['names'] = names
        result['converters'] = converters
        result['na_values'] = na_values
        result['na_fvalues'] = na_fvalues
        result['skiprows'] = skiprows

        return result, engine