def arrow_type_from_str()

in core/maxframe/utils.py [0:0]


def arrow_type_from_str(type_str: str) -> pa.DataType:
    """
    Convert arrow type representations (for inst., list<item: int64>)
    into arrow DataType instances
    """
    # enable consecutive brackets to be tokenized
    type_str = type_str.replace("<", "< ").replace(">", " >")
    token_iter = pytokenize.tokenize(io.BytesIO(type_str.encode()).readline)
    value_stack, op_stack = [], []

    def _pop_make_type(with_args: bool = False, combined: bool = True) -> None:
        """
        Pops tops of value stacks, creates a DataType instance and push back

        Parameters
        ----------
            with_args: bool
                if True, will contain next item (parameter list) in
                the value stack as parameters
            combined: bool
                if True, will use first element of the top of the value stack
                in DataType constructors
        """
        args = () if not with_args else (value_stack.pop(-1),)
        if not combined:
            args = args[0]
        type_name = value_stack.pop(-1)
        if isinstance(type_name, pa.DataType):
            value_stack.append(type_name)
        elif type_name in _arrow_type_constructors:
            value_stack.append(_arrow_type_constructors[type_name](*args))
        else:  # pragma: no cover
            value_stack.append(type_name)

    for token in token_iter:
        if token.type == pytokenize.OP:
            if token.string == ":":
                op_stack.append(token.string)
            elif token.string == ",":
                # gather previous sub-types
                if op_stack[-1] in ("<", ":"):
                    _pop_make_type()

                if op_stack[-1] == ":":
                    # parameterized sub-types need to be represented as tuples
                    op_stack.pop(-1)
                    values = value_stack[-2:]
                    value_stack = value_stack[:-2]
                    value_stack.append(tuple(values))
                # put generated item into the parameter list
                val = value_stack.pop(-1)
                value_stack[-1].append(val)
            elif token.string in ("<", "[", "("):
                # pushes an empty parameter list for future use
                value_stack.append([])
                op_stack.append(token.string)
            elif token.string in (")", "]"):
                # put generated item into the parameter list
                val = value_stack.pop(-1)
                value_stack[-1].append(val)
                # make DataType (i.e., fixed_size_binary / decimal) given args
                _pop_make_type(with_args=True, combined=False)
                op_stack.pop(-1)
            elif token.string == ">":
                _pop_make_type()

                if op_stack[-1] == ":":
                    # parameterized sub-types need to be represented as tuples
                    op_stack.pop(-1)
                    values = value_stack[-2:]
                    value_stack = value_stack[:-2]
                    value_stack.append(tuple(values))

                # put generated item into the parameter list
                val = value_stack.pop(-1)
                value_stack[-1].append(val)
                # make DataType (i.e., list / map / struct) given args
                _pop_make_type(True)
                op_stack.pop(-1)
        elif token.type == pytokenize.NAME:
            value_stack.append(token.string)
        elif token.type == pytokenize.NUMBER:
            value_stack.append(int(token.string))
        elif token.type == pytokenize.ENDMARKER:
            # make final type
            _pop_make_type()
    if len(value_stack) > 1:
        raise ValueError(f"Cannot parse type {type_str}")
    return value_stack[-1]