def visit_string_op()

in odps/df/backends/odpssql/analyzer.py [0:0]


    def visit_string_op(self, expr):
        if isinstance(expr, Ljust):
            rest = expr.width - expr.input.len()
            sub = expr.input + (rest >= 0).ifelse(expr._fillchar.repeat(rest), '')
            self._sub(expr, sub.rename(expr.name))
            return
        elif isinstance(expr, Rjust):
            rest = expr.width - expr.input.len()
            sub = (rest >= 0).ifelse(expr._fillchar.repeat(rest), '') + expr.input
            self._sub(expr, sub.rename(expr.name))
            return
        elif isinstance(expr, Zfill):
            fillchar = Scalar('0')
            rest = expr.width - expr.input.len()
            sub = (rest >= 0).ifelse(fillchar.repeat(rest), '') + expr.input
            self._sub(expr, sub.rename(expr.name))
            return
        elif isinstance(expr, CatStr):
            input = expr.input
            others = expr._others if isinstance(expr._others, Iterable) else (expr._others, )
            for other in others:
                if expr.na_rep is not None:
                    for e in (input, ) + tuple(others):
                        self._sub(e, e.fillna(expr.na_rep), parents=(expr, ))
                    return
                else:
                    if expr._sep is not None:
                        input = other.isnull().ifelse(input, input + expr._sep + other)
                    else:
                        input = other.isnull().ifelse(input, input + other)
            self._sub(expr, input.rename(expr.name))
            return

        if not options.df.analyze:
            raise NotImplementedError

        func = None
        if isinstance(expr, Contains) and expr.regex:
            def func(x, pat, case, flags):
                if x is None:
                    return None

                flgs = 0
                if not case:
                    flgs = re.I
                if flags > 0:
                    flgs = flgs | flags
                r = re.compile(pat, flgs)
                return r.search(x) is not None

            func._identifier = "str_contains"
            pat = expr._pat if not isinstance(expr._pat, StringScalar) or expr._pat._value is None \
                else Scalar(re.escape(to_text(expr.pat)))
            inputs = expr.input, pat, expr._case, expr._flags
            sub = self._gen_mapped_expr(expr, inputs, func,
                                        expr.name, multiple=False)
            self._sub(expr, sub)
            return
        elif isinstance(expr, StrCount):
            def func(x, pat, flags):
                if x is None:
                    return None

                regex = re.compile(pat, flags=flags)
                return len(regex.findall(x))

            func._identifier = "str_count"
            pat = expr._pat if not isinstance(expr._pat, StringScalar) or expr._pat._value is None \
                else Scalar(re.escape(to_text(expr.pat)))
            inputs = expr.input, pat, expr._flags
            sub = self._gen_mapped_expr(expr, inputs, func, expr.name, multiple=False)
            self._sub(expr, sub)
            return
        elif isinstance(expr, Find) and expr.end is not None:
            start = expr.start
            end = expr.end
            substr = expr.sub

            def func(x):
                if x is None:
                    return None

                return x.find(substr, start, end)

            func._identifier = "str_find"
        elif isinstance(expr, RFind):
            start = expr.start
            end = expr.end
            substr = expr.sub

            def func(x):
                if x is None:
                    return None

                return x.rfind(substr, start, end)

            func._identifier = "str_rfind"
        elif isinstance(expr, Extract):
            def func(x, pat, flags, group):
                if x is None:
                    return None

                regex = re.compile(pat, flags=flags)
                m = regex.search(x)
                if m:
                    if group is None:
                        return m.group()
                    return m.group(group)

            func._identifier = "str_extract"
            pat = expr._pat if not isinstance(expr._pat, StringScalar) or expr._pat._value is None \
                else Scalar(re.escape(to_text(expr.pat)))
            inputs = expr.input, pat, expr._flags, expr._group
            sub = self._gen_mapped_expr(expr, inputs, func, expr.name, multiple=False)
            self._sub(expr, sub)
            return
        elif isinstance(expr, Replace):
            def func(x, pat, repl, n, case, flags, use_regex):
                if x is None:
                    return None

                use_re = use_regex and (not case or len(pat) > 1 or flags)

                if use_re:
                    if not case:
                        flags |= re.IGNORECASE
                    regex = re.compile(pat, flags=flags)
                    n = n if n >= 0 else 0

                    return regex.sub(repl, x, count=n)
                else:
                    return x.replace(pat, repl, n)

            func._identifier = "str_replace"
            pat = expr._pat if not isinstance(expr._pat, StringScalar) or expr._pat._value is None \
                else Scalar(re.escape(to_text(expr.pat)))
            inputs = expr.input, pat, expr._repl, expr._n, \
                     expr._case, expr._flags, expr._regex
            sub = self._gen_mapped_expr(expr, inputs, func,
                                        expr.name, multiple=False)
            self._sub(expr, sub)
            return
        elif isinstance(expr, (Lstrip, Strip, Rstrip)) and expr.to_strip != ' ':
            to_strip = expr.to_strip

            if isinstance(expr, Lstrip):
                def func(x):
                    if x is None:
                        return None

                    return x.lstrip(to_strip)

                func._identifier = "str_lstrip"
            elif isinstance(expr, Strip):
                def func(x):
                    if x is None:
                        return None

                    return x.strip(to_strip)

                func._identifier = "str_strip"
            elif isinstance(expr, Rstrip):
                def func(x):
                    if x is None:
                        return None

                    return x.rstrip(to_strip)

                func._identifier = "str_rstrip"
        elif isinstance(expr, Pad):
            side = expr.side
            fillchar = expr.fillchar
            width = expr.width

            def func(x, width, fillchar, side):
                if x is None:
                    return None
                if side == 'left':
                    return x.rjust(width, fillchar)
                elif side == 'right':
                    return x.ljust(width, fillchar)
                else:
                    return x.center(width, fillchar)

            func._identifier = "str_pad"
            if side not in ('left', 'right', 'both'):
                raise NotImplementedError
            inputs = expr.input, Scalar(width), Scalar(fillchar), Scalar(side)
            sub = self._gen_mapped_expr(expr, inputs, func, expr.name, multiple=False)
            self._sub(expr, sub)
            return
        elif isinstance(expr, Slice):
            start, end, step = expr.start, expr.end, expr.step

            if end is None and step is None:
                raise NotImplementedError
            if isinstance(start, six.integer_types) and \
                    isinstance(end, six.integer_types) and step is None:
                if start >= 0 and end >= 0:
                    raise NotImplementedError

            flag = 0x4 if start is not None else 0
            flag |= 0x2 if end is not None else 0
            flag |= 0x1 if step is not None else 0

            def func(x, flag, *args):
                if x is None:
                    return None

                idx = 0
                s, e, t = None, None, None
                for i in range(3):
                    if i == 0 and (flag & 0x4):
                        s = args[idx]
                        idx += 1
                    if i == 1 and (flag & 0x2):
                        e = args[idx]
                        idx += 1
                    if i == 2 and (flag & 0x1):
                        t = args[idx]
                        idx += 1
                return x[s: e: t]

            func._identifier = "str_slice"
            inputs = expr.input, Scalar(flag), expr._start, expr._end, expr._step
            sub = self._gen_mapped_expr(expr, tuple(i for i in inputs if i is not None),
                                        func, expr.name, multiple=False)
            self._sub(expr, sub)
            return
        elif isinstance(expr, Swapcase):
            func = lambda x: x.swapcase() if x is not None else None
            func._identifier = "str_swapcase"
        elif isinstance(expr, Title):
            func = lambda x: x.title() if x is not None else None
            func._identifier = "str_title"
        elif isinstance(expr, Strptime):
            def func(x, date_fmt):
                from datetime import datetime

                return datetime.strptime(x, date_fmt) if x is not None else None

            func._identifier = "strptime"
            date_fmt = expr._date_format \
                if not isinstance(expr._date_format, StringScalar) or expr._date_format._value is None \
                else Scalar(to_text(expr.date_format).replace("%", "%%"))
            inputs = expr.input, date_fmt
            sub = self._gen_mapped_expr(expr, inputs, func, expr.name, multiple=False)
            self._sub(expr, sub)
            return
        else:
            if isinstance(expr, Isalnum):
                func = lambda x: x.isalnum() if x is not None else None
                func._identifier = "str_isalnum"
            elif isinstance(expr, Isalpha):
                func = lambda x: x.isalpha() if x is not None else None
                func._identifier = "str_isalpha"
            elif isinstance(expr, Isdigit):
                func = lambda x: x.isdigit() if x is not None else None
                func._identifier = "str_isdigit"
            elif isinstance(expr, Isspace):
                func = lambda x: x.isspace() if x is not None else None
                func._identifier = "str_isspace"
            elif isinstance(expr, Islower):
                func = lambda x: x.islower() if x is not None else None
                func._identifier = "str_islower"
            elif isinstance(expr, Isupper):
                func = lambda x: x.isupper() if x is not None else None
                func._identifier = "str_isupper"
            elif isinstance(expr, Istitle):
                func = lambda x: x.istitle() if x is not None else None
                func._identifier = "str_istitle"
            elif isinstance(expr, (Isnumeric, Isdecimal)):
                def u_safe(s):
                    try:
                        return unicode(s, "unicode_escape")
                    except:
                        return s

                if isinstance(expr, Isnumeric):
                    func = lambda x: u_safe(x).isnumeric() if x is not None else None
                    func._identifier = "str_isnumeric"
                else:
                    func = lambda x: u_safe(x).isdecimal() if x is not None else None
                    func._identifier = "str_isdecimal"

        if func is not None:
            sub = expr.input.map(func, expr.dtype)
            self._sub(expr, sub)
            return

        raise NotImplementedError