def visit_string_op()

in odps/df/backends/odpssql/compiler.py [0:0]


    def visit_string_op(self, expr):
        # FIXME quite a few operations cannot support by internal function
        compiled = None

        input = self._ctx.get_expr_compiled(expr.input)
        if isinstance(expr, strings.Capitalize):
            compiled = 'CONCAT(TOUPPER(SUBSTR(%(input)s, 1, 1)), TOLOWER(SUBSTR(%(input)s, 2)))' % {
                'input': input
            }
        elif isinstance(expr, strings.CatStr):
            nodes = [expr._input]
            if expr._others is not None:
                others = (expr._others, ) if not isinstance(expr._others, Iterable) else expr._others
                for other in others:
                    if expr._sep is not None:
                        nodes.extend([expr._sep, other])
                    else:
                        nodes.append(other)
            compiled = 'CONCAT(%s)' % ', '.join(self._ctx.get_expr_compiled(e) for e in nodes)
        elif isinstance(expr, strings.Contains):
            if expr.regex:
                raise NotImplementedError
            compiled = 'INSTR(%s, %s) > 0' % (input, self._ctx.get_expr_compiled(expr._pat))
        elif isinstance(expr, strings.Endswith):
            # TODO: any better solution?
            compiled = 'INSTR(REVERSE(%s), REVERSE(%s)) == 1' % (
                input, self._ctx.get_expr_compiled(expr._pat))
        elif isinstance(expr, strings.Startswith):
            compiled = 'INSTR(%s, %s) == 1' % (input, self._ctx.get_expr_compiled(expr._pat))
        elif isinstance(expr, strings.Find):
            if isinstance(expr.start, six.integer_types):
                start = expr.start + 1 if expr.start >= 0 else expr.start
            else:
                start = 'IF(%(start)s >= 0, %(start)s + 1, %(start)s)' % {
                    'start': self._ctx.get_expr_compiled(expr._start)
                }
            if expr.end is not None:
                raise NotImplementedError
            else:
                compiled = 'INSTR(%s, %s, %s) - 1' % (
                    input, self._ctx.get_expr_compiled(expr._sub), start)
        elif isinstance(expr, strings.Get):
            compiled = 'SUBSTR(%s, %s, 1)' % (input, expr.index + 1)
        elif isinstance(expr, strings.Len):
            compiled = 'LENGTH(%s)' % input
        elif isinstance(expr, strings.Lower):
            compiled = 'TOLOWER(%s)' % input
        elif isinstance(expr, strings.Upper):
            compiled = 'TOUPPER(%s)' % input
        elif isinstance(expr, (strings.Lstrip, strings.Rstrip, strings.Strip)):
            if expr.to_strip != ' ':
                raise NotImplementedError
            func = {
                'Lstrip': 'LTRIM',
                'Rstrip': 'RTRIM',
                'Strip': 'TRIM'
            }
            compiled = '%s(%s)' % (func[type(expr).__name__], input)
        elif isinstance(expr, strings.Slice):
            # internal function will be compiled in two cases:
            # 1) start is not None
            # 2) positive start and end
            if expr.end is None and expr.step is None:
                compiled = 'SUBSTR(%s, %s)' % (input, expr.start + 1)
            else:
                # expr.start and expr.end
                length = expr.end - expr.start
                compiled = 'SUBSTR(%s, %s, %s)' % (input, expr.start + 1, length)
        elif isinstance(expr, strings.Repeat):
            compiled = 'REPEAT(%s, %s)' % (
                input, self._ctx.get_expr_compiled(expr._repeats))
        elif isinstance(expr, strings.Split):
            if expr.n != -1:
                raise NotImplementedError

            escape_pat = re.escape(expr.pat)
            nre_compiled = 'SPLIT(%s, \'%s\')' % (input, utils.escape_odps_string(expr.pat))
            re_compiled = 'SPLIT(%s, \'%s\')' % (input, utils.escape_odps_string(re.escape(expr.pat)))

            try:
                re.compile(expr.pat)
                is_regex = True
            except:
                is_regex = False

            if expr.pat == escape_pat or not is_regex:
                compiled = nre_compiled
            else:
                compiled = 'IF(SIZE(%(re)s) = 0, %(nre)s, %(re)s)' % dict(re=re_compiled, nre=nre_compiled)
        elif isinstance(expr, strings.StringToDict):
            compiled = 'STR_TO_MAP(%s, \'%s\', \'%s\')' % (input, expr.item_delim, expr.kv_delim)

        if compiled is not None:
            self._ctx.add_expr_compiled(expr, compiled)
        else:
            raise NotImplementedError