in core/maxframe/utils.py [0:0]
def arrow_type_from_str(type_str: str) -> pa.DataType:
"""
Convert arrow type representations (for inst., list<item: int64>)
into arrow DataType instances
"""
# enable consecutive brackets to be tokenized
type_str = type_str.replace("<", "< ").replace(">", " >")
token_iter = pytokenize.tokenize(io.BytesIO(type_str.encode()).readline)
value_stack, op_stack = [], []
def _pop_make_type(with_args: bool = False, combined: bool = True) -> None:
"""
Pops tops of value stacks, creates a DataType instance and push back
Parameters
----------
with_args: bool
if True, will contain next item (parameter list) in
the value stack as parameters
combined: bool
if True, will use first element of the top of the value stack
in DataType constructors
"""
args = () if not with_args else (value_stack.pop(-1),)
if not combined:
args = args[0]
type_name = value_stack.pop(-1)
if isinstance(type_name, pa.DataType):
value_stack.append(type_name)
elif type_name in _arrow_type_constructors:
value_stack.append(_arrow_type_constructors[type_name](*args))
else: # pragma: no cover
value_stack.append(type_name)
for token in token_iter:
if token.type == pytokenize.OP:
if token.string == ":":
op_stack.append(token.string)
elif token.string == ",":
# gather previous sub-types
if op_stack[-1] in ("<", ":"):
_pop_make_type()
if op_stack[-1] == ":":
# parameterized sub-types need to be represented as tuples
op_stack.pop(-1)
values = value_stack[-2:]
value_stack = value_stack[:-2]
value_stack.append(tuple(values))
# put generated item into the parameter list
val = value_stack.pop(-1)
value_stack[-1].append(val)
elif token.string in ("<", "[", "("):
# pushes an empty parameter list for future use
value_stack.append([])
op_stack.append(token.string)
elif token.string in (")", "]"):
# put generated item into the parameter list
val = value_stack.pop(-1)
value_stack[-1].append(val)
# make DataType (i.e., fixed_size_binary / decimal) given args
_pop_make_type(with_args=True, combined=False)
op_stack.pop(-1)
elif token.string == ">":
_pop_make_type()
if op_stack[-1] == ":":
# parameterized sub-types need to be represented as tuples
op_stack.pop(-1)
values = value_stack[-2:]
value_stack = value_stack[:-2]
value_stack.append(tuple(values))
# put generated item into the parameter list
val = value_stack.pop(-1)
value_stack[-1].append(val)
# make DataType (i.e., list / map / struct) given args
_pop_make_type(True)
op_stack.pop(-1)
elif token.type == pytokenize.NAME:
value_stack.append(token.string)
elif token.type == pytokenize.NUMBER:
value_stack.append(int(token.string))
elif token.type == pytokenize.ENDMARKER:
# make final type
_pop_make_type()
if len(value_stack) > 1:
raise ValueError(f"Cannot parse type {type_str}")
return value_stack[-1]