in odps/df/backends/odpssql/analyzer.py [0:0]
def visit_string_op(self, expr):
if isinstance(expr, Ljust):
rest = expr.width - expr.input.len()
sub = expr.input + (rest >= 0).ifelse(expr._fillchar.repeat(rest), '')
self._sub(expr, sub.rename(expr.name))
return
elif isinstance(expr, Rjust):
rest = expr.width - expr.input.len()
sub = (rest >= 0).ifelse(expr._fillchar.repeat(rest), '') + expr.input
self._sub(expr, sub.rename(expr.name))
return
elif isinstance(expr, Zfill):
fillchar = Scalar('0')
rest = expr.width - expr.input.len()
sub = (rest >= 0).ifelse(fillchar.repeat(rest), '') + expr.input
self._sub(expr, sub.rename(expr.name))
return
elif isinstance(expr, CatStr):
input = expr.input
others = expr._others if isinstance(expr._others, Iterable) else (expr._others, )
for other in others:
if expr.na_rep is not None:
for e in (input, ) + tuple(others):
self._sub(e, e.fillna(expr.na_rep), parents=(expr, ))
return
else:
if expr._sep is not None:
input = other.isnull().ifelse(input, input + expr._sep + other)
else:
input = other.isnull().ifelse(input, input + other)
self._sub(expr, input.rename(expr.name))
return
if not options.df.analyze:
raise NotImplementedError
func = None
if isinstance(expr, Contains) and expr.regex:
def func(x, pat, case, flags):
if x is None:
return None
flgs = 0
if not case:
flgs = re.I
if flags > 0:
flgs = flgs | flags
r = re.compile(pat, flgs)
return r.search(x) is not None
func._identifier = "str_contains"
pat = expr._pat if not isinstance(expr._pat, StringScalar) or expr._pat._value is None \
else Scalar(re.escape(to_text(expr.pat)))
inputs = expr.input, pat, expr._case, expr._flags
sub = self._gen_mapped_expr(expr, inputs, func,
expr.name, multiple=False)
self._sub(expr, sub)
return
elif isinstance(expr, StrCount):
def func(x, pat, flags):
if x is None:
return None
regex = re.compile(pat, flags=flags)
return len(regex.findall(x))
func._identifier = "str_count"
pat = expr._pat if not isinstance(expr._pat, StringScalar) or expr._pat._value is None \
else Scalar(re.escape(to_text(expr.pat)))
inputs = expr.input, pat, expr._flags
sub = self._gen_mapped_expr(expr, inputs, func, expr.name, multiple=False)
self._sub(expr, sub)
return
elif isinstance(expr, Find) and expr.end is not None:
start = expr.start
end = expr.end
substr = expr.sub
def func(x):
if x is None:
return None
return x.find(substr, start, end)
func._identifier = "str_find"
elif isinstance(expr, RFind):
start = expr.start
end = expr.end
substr = expr.sub
def func(x):
if x is None:
return None
return x.rfind(substr, start, end)
func._identifier = "str_rfind"
elif isinstance(expr, Extract):
def func(x, pat, flags, group):
if x is None:
return None
regex = re.compile(pat, flags=flags)
m = regex.search(x)
if m:
if group is None:
return m.group()
return m.group(group)
func._identifier = "str_extract"
pat = expr._pat if not isinstance(expr._pat, StringScalar) or expr._pat._value is None \
else Scalar(re.escape(to_text(expr.pat)))
inputs = expr.input, pat, expr._flags, expr._group
sub = self._gen_mapped_expr(expr, inputs, func, expr.name, multiple=False)
self._sub(expr, sub)
return
elif isinstance(expr, Replace):
def func(x, pat, repl, n, case, flags, use_regex):
if x is None:
return None
use_re = use_regex and (not case or len(pat) > 1 or flags)
if use_re:
if not case:
flags |= re.IGNORECASE
regex = re.compile(pat, flags=flags)
n = n if n >= 0 else 0
return regex.sub(repl, x, count=n)
else:
return x.replace(pat, repl, n)
func._identifier = "str_replace"
pat = expr._pat if not isinstance(expr._pat, StringScalar) or expr._pat._value is None \
else Scalar(re.escape(to_text(expr.pat)))
inputs = expr.input, pat, expr._repl, expr._n, \
expr._case, expr._flags, expr._regex
sub = self._gen_mapped_expr(expr, inputs, func,
expr.name, multiple=False)
self._sub(expr, sub)
return
elif isinstance(expr, (Lstrip, Strip, Rstrip)) and expr.to_strip != ' ':
to_strip = expr.to_strip
if isinstance(expr, Lstrip):
def func(x):
if x is None:
return None
return x.lstrip(to_strip)
func._identifier = "str_lstrip"
elif isinstance(expr, Strip):
def func(x):
if x is None:
return None
return x.strip(to_strip)
func._identifier = "str_strip"
elif isinstance(expr, Rstrip):
def func(x):
if x is None:
return None
return x.rstrip(to_strip)
func._identifier = "str_rstrip"
elif isinstance(expr, Pad):
side = expr.side
fillchar = expr.fillchar
width = expr.width
def func(x, width, fillchar, side):
if x is None:
return None
if side == 'left':
return x.rjust(width, fillchar)
elif side == 'right':
return x.ljust(width, fillchar)
else:
return x.center(width, fillchar)
func._identifier = "str_pad"
if side not in ('left', 'right', 'both'):
raise NotImplementedError
inputs = expr.input, Scalar(width), Scalar(fillchar), Scalar(side)
sub = self._gen_mapped_expr(expr, inputs, func, expr.name, multiple=False)
self._sub(expr, sub)
return
elif isinstance(expr, Slice):
start, end, step = expr.start, expr.end, expr.step
if end is None and step is None:
raise NotImplementedError
if isinstance(start, six.integer_types) and \
isinstance(end, six.integer_types) and step is None:
if start >= 0 and end >= 0:
raise NotImplementedError
flag = 0x4 if start is not None else 0
flag |= 0x2 if end is not None else 0
flag |= 0x1 if step is not None else 0
def func(x, flag, *args):
if x is None:
return None
idx = 0
s, e, t = None, None, None
for i in range(3):
if i == 0 and (flag & 0x4):
s = args[idx]
idx += 1
if i == 1 and (flag & 0x2):
e = args[idx]
idx += 1
if i == 2 and (flag & 0x1):
t = args[idx]
idx += 1
return x[s: e: t]
func._identifier = "str_slice"
inputs = expr.input, Scalar(flag), expr._start, expr._end, expr._step
sub = self._gen_mapped_expr(expr, tuple(i for i in inputs if i is not None),
func, expr.name, multiple=False)
self._sub(expr, sub)
return
elif isinstance(expr, Swapcase):
func = lambda x: x.swapcase() if x is not None else None
func._identifier = "str_swapcase"
elif isinstance(expr, Title):
func = lambda x: x.title() if x is not None else None
func._identifier = "str_title"
elif isinstance(expr, Strptime):
def func(x, date_fmt):
from datetime import datetime
return datetime.strptime(x, date_fmt) if x is not None else None
func._identifier = "strptime"
date_fmt = expr._date_format \
if not isinstance(expr._date_format, StringScalar) or expr._date_format._value is None \
else Scalar(to_text(expr.date_format).replace("%", "%%"))
inputs = expr.input, date_fmt
sub = self._gen_mapped_expr(expr, inputs, func, expr.name, multiple=False)
self._sub(expr, sub)
return
else:
if isinstance(expr, Isalnum):
func = lambda x: x.isalnum() if x is not None else None
func._identifier = "str_isalnum"
elif isinstance(expr, Isalpha):
func = lambda x: x.isalpha() if x is not None else None
func._identifier = "str_isalpha"
elif isinstance(expr, Isdigit):
func = lambda x: x.isdigit() if x is not None else None
func._identifier = "str_isdigit"
elif isinstance(expr, Isspace):
func = lambda x: x.isspace() if x is not None else None
func._identifier = "str_isspace"
elif isinstance(expr, Islower):
func = lambda x: x.islower() if x is not None else None
func._identifier = "str_islower"
elif isinstance(expr, Isupper):
func = lambda x: x.isupper() if x is not None else None
func._identifier = "str_isupper"
elif isinstance(expr, Istitle):
func = lambda x: x.istitle() if x is not None else None
func._identifier = "str_istitle"
elif isinstance(expr, (Isnumeric, Isdecimal)):
def u_safe(s):
try:
return unicode(s, "unicode_escape")
except:
return s
if isinstance(expr, Isnumeric):
func = lambda x: u_safe(x).isnumeric() if x is not None else None
func._identifier = "str_isnumeric"
else:
func = lambda x: u_safe(x).isdecimal() if x is not None else None
func._identifier = "str_isdecimal"
if func is not None:
sub = expr.input.map(func, expr.dtype)
self._sub(expr, sub)
return
raise NotImplementedError