in odps/df/backends/odpssql/compiler.py [0:0]
def visit_string_op(self, expr):
# FIXME quite a few operations cannot support by internal function
compiled = None
input = self._ctx.get_expr_compiled(expr.input)
if isinstance(expr, strings.Capitalize):
compiled = 'CONCAT(TOUPPER(SUBSTR(%(input)s, 1, 1)), TOLOWER(SUBSTR(%(input)s, 2)))' % {
'input': input
}
elif isinstance(expr, strings.CatStr):
nodes = [expr._input]
if expr._others is not None:
others = (expr._others, ) if not isinstance(expr._others, Iterable) else expr._others
for other in others:
if expr._sep is not None:
nodes.extend([expr._sep, other])
else:
nodes.append(other)
compiled = 'CONCAT(%s)' % ', '.join(self._ctx.get_expr_compiled(e) for e in nodes)
elif isinstance(expr, strings.Contains):
if expr.regex:
raise NotImplementedError
compiled = 'INSTR(%s, %s) > 0' % (input, self._ctx.get_expr_compiled(expr._pat))
elif isinstance(expr, strings.Endswith):
# TODO: any better solution?
compiled = 'INSTR(REVERSE(%s), REVERSE(%s)) == 1' % (
input, self._ctx.get_expr_compiled(expr._pat))
elif isinstance(expr, strings.Startswith):
compiled = 'INSTR(%s, %s) == 1' % (input, self._ctx.get_expr_compiled(expr._pat))
elif isinstance(expr, strings.Find):
if isinstance(expr.start, six.integer_types):
start = expr.start + 1 if expr.start >= 0 else expr.start
else:
start = 'IF(%(start)s >= 0, %(start)s + 1, %(start)s)' % {
'start': self._ctx.get_expr_compiled(expr._start)
}
if expr.end is not None:
raise NotImplementedError
else:
compiled = 'INSTR(%s, %s, %s) - 1' % (
input, self._ctx.get_expr_compiled(expr._sub), start)
elif isinstance(expr, strings.Get):
compiled = 'SUBSTR(%s, %s, 1)' % (input, expr.index + 1)
elif isinstance(expr, strings.Len):
compiled = 'LENGTH(%s)' % input
elif isinstance(expr, strings.Lower):
compiled = 'TOLOWER(%s)' % input
elif isinstance(expr, strings.Upper):
compiled = 'TOUPPER(%s)' % input
elif isinstance(expr, (strings.Lstrip, strings.Rstrip, strings.Strip)):
if expr.to_strip != ' ':
raise NotImplementedError
func = {
'Lstrip': 'LTRIM',
'Rstrip': 'RTRIM',
'Strip': 'TRIM'
}
compiled = '%s(%s)' % (func[type(expr).__name__], input)
elif isinstance(expr, strings.Slice):
# internal function will be compiled in two cases:
# 1) start is not None
# 2) positive start and end
if expr.end is None and expr.step is None:
compiled = 'SUBSTR(%s, %s)' % (input, expr.start + 1)
else:
# expr.start and expr.end
length = expr.end - expr.start
compiled = 'SUBSTR(%s, %s, %s)' % (input, expr.start + 1, length)
elif isinstance(expr, strings.Repeat):
compiled = 'REPEAT(%s, %s)' % (
input, self._ctx.get_expr_compiled(expr._repeats))
elif isinstance(expr, strings.Split):
if expr.n != -1:
raise NotImplementedError
escape_pat = re.escape(expr.pat)
nre_compiled = 'SPLIT(%s, \'%s\')' % (input, utils.escape_odps_string(expr.pat))
re_compiled = 'SPLIT(%s, \'%s\')' % (input, utils.escape_odps_string(re.escape(expr.pat)))
try:
re.compile(expr.pat)
is_regex = True
except:
is_regex = False
if expr.pat == escape_pat or not is_regex:
compiled = nre_compiled
else:
compiled = 'IF(SIZE(%(re)s) = 0, %(nre)s, %(re)s)' % dict(re=re_compiled, nre=nre_compiled)
elif isinstance(expr, strings.StringToDict):
compiled = 'STR_TO_MAP(%s, \'%s\', \'%s\')' % (input, expr.item_delim, expr.kv_delim)
if compiled is not None:
self._ctx.add_expr_compiled(expr, compiled)
else:
raise NotImplementedError