def fillna()

in odps/df/expr/collections.py [0:0]


def fillna(expr, value=None, method=None, subset=None):
    """
    Fill NA/NaN values using the specified method

    :param DataFrame expr: input DataFrame
    :param method: can be ‘backfill’, ‘bfill’, ‘pad’, ‘ffill’ or None
    :param value: value to fill into
    :param subset: Labels along other axis to consider.
    :return: DataFrame
    """
    col_dict = OrderedDict([(c, expr._get_field(c)) for c in expr.schema.names])
    if subset is None:
        sel_col_names = expr.schema.names
    else:
        # when c is in expr._fields, _get_field may do substitution which will cause error
        subset = (c.copy() if isinstance(c, Expr) else c for c in utils.to_list(subset))
        sel_col_names = [expr._get_field(c).name for c in subset]

    if method is not None and value is not None:
        raise ValueError('The argument `method` is not compatible with `value`.')
    if method is None and value is None:
        raise ValueError('You should supply at least one argument in `method` and `value`.')
    if method is not None and method not in ('backfill', 'bfill', 'pad', 'ffill'):
        raise ValueError('Method value %s is illegal.' % str(method))

    if method in ('backfill', 'bfill'):
        sel_cols = list(reversed(sel_col_names))
    else:
        sel_cols = sel_col_names

    if method is None:
        for n in sel_col_names:
            e = col_dict[n]
            col_dict[n] = e.isna().ifelse(value, e).rename(n)
        return expr.select(list(col_dict.values()))

    else:
        names = list(col_dict.keys())
        typs = list(c.dtype.name for c in col_dict.values())

        @output(names, typs)
        def mapper(row):
            last_valid = None
            update_dict = dict()

            import math
            try:
                import numpy as np
            except ImportError:
                np = None

            def isnan(v):
                if v is None:
                    return True
                if np is not None:
                    try:
                        return np.isnan(v)
                    except TypeError:
                        pass
                try:
                    return math.isnan(v)
                except TypeError:
                    return False

            for n in sel_cols:
                old_val = getattr(row, n)
                if old_val is None or isnan(old_val):
                    if last_valid is not None:
                        update_dict[n] = last_valid
                else:
                    last_valid = old_val

            yield row.replace(**update_dict)

        return expr.map_reduce(mapper)