core/maxframe/opcodes.py (487 lines of code) (raw):

# Copyright 1999-2025 Alibaba Group Holding Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. NULL = 0 # creation # tensor SCALAR = 1 TENSOR_DATA_SOURCE = 2 TENSOR_ONES = 3 TENSOR_ONES_LIKE = 4 TENSOR_ZEROS = 5 TENSOR_ZEROS_LIKE = 6 TENSOR_EMPTY = 7 TENSOR_EMPTY_LIKE = 8 TENSOR_FULL = 9 TENSOR_FULL_LIKE = 25 TENSOR_ARANGE = 10 TENSOR_INDICES = 11 TENSOR_DIAG = 12 TENSOR_EYE = 13 TENSOR_LINSPACE = 14 TENSOR_TRIU = 15 TENSOR_TRIL = 16 # external storage TENSOR_FROM_TILEDB = 18 TENSOR_STORE_TILEDB = 19 TENSOR_STORE_TILEDB_CONSOLIDATE = 20 TENSOR_FROM_DATAFRAME = 22 TENSOR_FROM_HDF5 = 27 TENSOR_STORE_HDF5 = 28 TENSOR_FROM_ZARR = 29 TENSOR_STORE_ZARR = 32 # dataframe DATAFRAME_DATA_SOURCE = 17 DATAFRAME_FROM_TENSOR = 21 DATAFRAME_FROM_RECORDS = 24 # series SERIES_DATA_SOURCE = 23 SERIES_FROM_TENSOR = 26 SERIES_FROM_INDEX = 39 # index INDEX_DATA_SOURCE = 33 DATE_RANGE = 34 TIMEDELTA_RANGE = 35 CHECK_MONOTONIC = 38 # misc MEMORY_USAGE = 36 REBALANCE = 37 # GPU TO_GPU = 30 TO_CPU = 31 # random RAND_RAND = 41 RAND_RANDN = 42 RAND_RANDINT = 43 RAND_RANDOM_INTEGERS = 44 RAND_RANDOM_SAMPLE = 45 RAND_RANDOM = 46 RAND_RANF = 47 RAND_SAMPLE = 48 RAND_BYTES = 49 # random distribution RAND_BETA = 50 RAND_BINOMIAL = 51 RAND_CHISQUARE = 52 RAND_CHOICE = 53 RAND_DIRICHLET = 54 RAND_EXPONENTIAL = 55 RAND_F = 56 RAND_GAMMA = 57 RAND_GEOMETRIC = 58 RAND_GUMBEL = 59 RAND_HYPERGEOMETRIC = 60 RAND_LAPLACE = 61 RAND_LOGISTIC = 62 RAND_LOGNORMAL = 63 RAND_LOGSERIES = 64 RAND_MULTINOMIAL = 65 RAND_MULTIVARIATE_NORMAL = 66 RAND_NEGATIVE_BINOMIAL = 67 RAND_NONCENTRAL_CHISQURE = 68 RAND_NONCENTRAL_F = 69 RAND_NORMAL = 70 RAND_PARETO = 71 RAND_PERMUTATION = 72 RAND_POSSION = 73 RAND_POWER = 74 RAND_RAYLEIGH = 75 RAND_SHUFFLE = 76 RAND_STANDARD_CAUCHY = 77 RAND_STANDARD_EXPONENTIAL = 78 RAND_STANDARD_GAMMMA = 79 RAND_STANDARD_NORMAL = 80 RAND_STANDARD_T = 81 RAND_TOMAXINT = 82 RAND_TRIANGULAR = 83 RAND_UNIFORM = 84 RAND_VONMISES = 85 RAND_WALD = 86 RAND_WEIBULL = 87 RAND_ZIPF = 88 PERMUTATION = 89 UNIQUE = 90 # ufunc ADD = 101 SUB = 102 MUL = 103 DIV = 104 TRUEDIV = 105 FLOORDIV = 106 POW = 107 MOD = 108 FMOD = 109 LOGADDEXP = 110 LOGADDEXP2 = 111 NEGATIVE = 112 POSITIVE = 113 ABSOLUTE = 114 FABS = 115 ABS = 116 RINT = 117 SIGN = 118 CONJ = 119 EXP = 120 EXP2 = 121 LOG = 122 LOG2 = 123 LOG10 = 124 EXPM1 = 125 LOG1P = 126 SQRT = 127 SQUARE = 128 CBRT = 129 RECIPROCAL = 130 EQ = 131 NE = 132 LT = 133 LE = 134 GT = 135 GE = 136 SIN = 137 COS = 138 TAN = 139 ARCSIN = 140 ARCCOS = 141 ARCTAN = 142 ARCTAN2 = 143 HYPOT = 144 SINH = 145 COSH = 146 TANH = 147 ARCSINH = 148 ARCCOSH = 149 ARCTANH = 150 DEG2RAD = 151 RAD2DEG = 152 BITAND = 153 BITOR = 154 BITXOR = 155 INVERT = 156 LSHIFT = 157 RSHIFT = 158 AND = 159 OR = 160 XOR = 161 NOT = 162 MAXIMUM = 163 MINIMUM = 164 AROUND = 165 FLOAT_POWER = 166 FMAX = 167 FMIN = 168 ISFINITE = 169 ISINF = 170 ISNAN = 171 SIGNBIT = 172 COPYSIGN = 173 NEXTAFTER = 174 SPACING = 175 LDEXP = 176 FREXP = 177 MODF = 178 FLOOR = 179 CEIL = 180 TRUNC = 181 DEGREES = 182 RADIANS = 183 CLIP = 184 ISREAL = 185 ISCOMPLEX = 186 REAL = 187 IMAG = 188 FIX = 189 I0 = 190 SINC = 191 NAN_TO_NUM = 192 ISCLOSE = 193 DIVMOD = 194 ANGLE = 195 SET_REAL = 196 SET_IMAG = 197 # special SPECIAL = 200 # spatial PDIST = 231 CDIST = 232 SQUAREFORM = 233 # tree operator TREE_ADD = 251 TREE_MULTIPLY = 252 TREE_OR = 253 # reduction CUMSUM = 301 CUMPROD = 302 PROD = 303 SUM = 304 MAX = 305 MIN = 306 ALL = 307 ANY = 308 MEAN = 309 ARGMAX = 310 ARGMIN = 311 NANSUM = 312 NANMAX = 313 NANMIN = 314 NANPROD = 315 NANMEAN = 316 NANARGMAX = 317 NANARGMIN = 318 COUNT_NONZERO = 319 MOMENT = 320 NANMOMENT = 321 VAR = 322 STD = 323 NANVAR = 324 NANSTD = 325 NANCUMSUM = 326 NANCUMPROD = 327 COUNT = 343 CUMMAX = 344 CUMMIN = 345 CUMCOUNT = 346 CORR = 347 REDUCTION_SIZE = 348 CUSTOM_REDUCTION = 349 SKEW = 350 KURTOSIS = 351 SEM = 352 STR_CONCAT = 353 MAD = 354 MEDIAN = 355 # tensor operator RESHAPE = 401 SLICE = 402 INDEX = 403 INDEXSETVALUE = 404 CONCATENATE = 405 RECHUNK = 406 ASTYPE = 407 TRANSPOSE = 408 SWAPAXES = 409 BROADCAST_TO = 410 STACK = 411 WHERE = 412 CHOOSE = 413 NONZERO = 414 ARGWHERE = 415 UNRAVEL_INDEX = 416 RAVEL_MULTI_INDEX = 417 ARRAY_SPLIT = 418 SQUEEZE = 419 DIGITIZE = 420 REPEAT = 421 COPYTO = 422 ISIN = 423 SEARCHSORTED = 428 SORT = 429 HISTOGRAM = 430 HISTOGRAM_BIN_EDGES = 431 PARTITION = 432 QUANTILE = 440 FILL_DIAGONAL = 441 NORMALIZE = 442 TOPK = 443 TRAPZ = 444 GET_SHAPE = 445 BINCOUNT = 446 # fancy index, distributed phase is a shuffle operation that # the fancy indexes will be distributed to the left chunks # the concat phase will concat back the indexed left chunks and index # according to the original fancy index order FANCY_INDEX_DISTRIBUTE = 424 FANCY_INDEX_CONCAT = 425 # linear algebra TENSORDOT = 501 DOT = 502 MATMUL = 503 CHOLESKY = 510 QR = 511 SVD = 512 LU = 513 SOLVE_TRIANGULAR = 520 INV = 521 NORM = 530 # fft FFT = 601 IFFT = 602 FFT2 = 603 IFFT2 = 604 FFTN = 605 IFFTN = 606 RFFT = 607 IRFFT = 608 RFFT2 = 609 IRFFT2 = 610 RFFTN = 611 IRFFTN = 612 HFFT = 613 IHFFT = 614 FFTFREQ = 615 FFTFREQ_CHUNK = 616 RFFTFREQ = 617 FFTSHIFT = 618 IFFTSHIFT = 619 # einsum EINSUM = 630 # sparse creation SPARSE_MATRIX_DATA_SOURCE = 701 DENSE_TO_SPARSE = 702 SPARSE_TO_DENSE = 703 # DataFrame MAP = 710 DESCRIBE = 712 FILL_NA = 713 AGGREGATE = 714 STRING_METHOD = 715 DATETIME_METHOD = 716 APPLY = 717 TRANSFORM = 718 CHECK_NA = 719 DROP_NA = 720 NUNIQUE = 721 CUT = 722 SHIFT = 723 DIFF = 724 VALUE_COUNTS = 725 TO_DATETIME = 726 DATAFRAME_DROP = 727 DROP_DUPLICATES = 728 MELT = 729 RENAME = 731 INSERT = 732 CARTESIAN_CHUNK = 734 EXPLODE = 735 REPLACE = 736 RENAME_AXIS = 737 DATAFRAME_EVAL = 738 DUPLICATED = 739 DELETE = 740 ALIGN = 741 CASE_WHEN = 742 PIVOT = 743 PIVOT_TABLE = 744 FUSE = 801 # LLM DASHSCOPE_TEXT_GENERATION = 810 DASHSCOPE_MULTI_MODAL_GENERATION = 811 MANAGED_TEXT_MODAL_GENERATION = 812 MANAGED_MULTI_MODAL_GENERATION = 813 LLM_TEXT_SUMMARIZE_TASK = 814 LLM_TEXT_TRANSLATE_TASK = 815 LLM_TEXT_CLASSIFY_TASK = 816 # table like input for tensor TABLE_COO = 1003 # store tensor as coo format STORE_COO = 1004 # shuffle SHUFFLE_PROXY = 2001 DATAFRAME_INDEX_ALIGN = 2004 # indexing DATAFRAME_SET_INDEX = 2020 DATAFRAME_SET_AXIS = 730 DATAFRAME_ILOC_GETITEM = 2021 DATAFRAME_ILOC_SETITEM = 2022 DATAFRAME_LOC_GETITEM = 2023 DATAFRAME_LOC_SETITEM = 2024 # merge DATAFRAME_MERGE = 2010 DATAFRAME_SHUFFLE_MERGE_ALIGN = 2011 # bloom filter DATAFRAME_BLOOM_FILTER = 2014 # append APPEND = 2015 # reset index RESET_INDEX = 2028 # reindex REINDEX = 2029 # groupby GROUPBY = 2030 GROUPBY_AGG = 2033 GROUPBY_CONCAT = 2034 GROUPBY_HEAD = 2035 GROUPBY_SAMPLE_ILOC = 2036 GROUPBY_SORT_REGULAR_SAMPLE = 2037 GROUPBY_SORT_PIVOT = 2038 GROUPBY_SORT_SHUFFLE = 2039 # parallel sorting by regular sampling PSRS_SORT_REGULAR_SMAPLE = 2040 PSRS_CONCAT_PIVOT = 2041 PSRS_SHUFFLE = 2042 PSRS_ALIGN = 2043 # partition CALC_PARTITIONS_INFO = 2046 PARTITION_MERGED = 2047 # dataframe sort SORT_VALUES = 2050 SORT_INDEX = 2051 # window ROLLING_AGG = 2060 EXPANDING_AGG = 2061 EWM_AGG = 2062 # source & store READ_CSV = 2100 TO_CSV = 2101 READ_PARQUET = 2103 TO_PARQUET = 2104 READ_SQL = 2105 TO_SQL = 2108 READ_RAYDATASET = 2109 READ_MLDATASET = 2106 READ_ODPS_TABLE = 20111 TO_ODPS_TABLE = 20112 READ_ODPS_VOLUME = 20113 TO_ODPS_VOLUME = 20114 READ_ODPS_QUERY = 20115 TO_CSV_STAT = 2102 # standardize range index STANDARDIZE_RANGE_INDEX = 2107 # successors exclusive SUCCESSORS_EXCLUSIVE = 2002 # read images IMREAD = 2110 # machine learning # pairwise distances PAIRWISE_EUCLIDEAN_DISTANCES = 2200 PAIRWISE_MANHATTAN_DISTANCES = 2201 PAIRWISE_COSINE_DISTANCES = 2202 PAIRWISE_HAVERSINE_DISTANCES = 2203 PAIRWISE_DISTANCES_TOPK = 2204 # nearest neighbors KD_TREE_TRAIN = 2230 KD_TREE_QUERY = 2231 BALL_TREE_TRAIN = 2232 BALL_TREE_QUERY = 2233 FAISS_BUILD_INDEX = 2234 FAISS_TRAIN_SAMPLED_INDEX = 2235 FAISS_QUERY = 2236 PROXIMA_SIMPLE_BUILDER = 2238 PROXIMA_SIMPLE_SEARCHER = 2239 KNEIGHBORS_GRAPH = 2237 # cluster KMEANS_PLUS_PLUS_INIT = 2250 KMEANS_SCALABLE_PLUS_PLUS_INIT = 2251 KMEANS_ELKAN_INIT_BOUNDS = 2252 KMEANS_ELKAN_UPDATE = 2253 KMEANS_ELKAN_POSTPROCESS = 2254 KMEANS_LLOYD_UPDATE = 2255 KMEANS_LLOYD_POSTPROCESS = 2256 KMEANS_INERTIA = 2257 KMEANS_RELOCASTE_EMPTY_CLUSTERS = 2258 # XGBoost XGBOOST_TRAIN = 3001 XGBOOST_PREDICT = 3002 TO_DMATRIX = 3003 START_TRACKER = 3004 # LightGBM LGBM_TRAIN = 3020 LGBM_PREDICT = 3021 LGBM_ALIGN = 3022 # TensorFlow RUN_TENSORFLOW = 3010 # PyTorch RUN_PYTORCH = 3011 # statsmodels STATSMODELS_TRAIN = 3012 STATSMODELS_PREDICT = 3013 # learn CONNECTED_COMPONENTS = 3100 # checks CHECK_NON_NEGATIVE = 3300 # classifier check targets CHECK_TARGETS = 3301 ASSERT_ALL_FINITE = 3302 # multilabel IS_MULTILABEL = 3303 # get type TYPE_OF_TARGET = 3304 # classification ACCURACY_SCORE = 3305 # port detection COLLECT_PORTS = 3306 # unique labels UNIQUE_LABELS = 3307 # preprocessing LABEL_BINARIZE = 3308 # ensemble: blockwise BLOCKWISE_ENSEMBLE_FIT = 3309 BLOCKWISE_ENSEMBLE_PREDICT = 3310 # ensemble: bagging BAGGING_SHUFFLE_SAMPLE = 3400 BAGGING_SHUFFLE_REINDEX = 3401 BAGGING_FIT = 3402 BAGGING_PREDICTION = 3403 # Remote Functions and class REMOTE_FUNCATION = 5001 RUN_SCRIPT = 5002 CHOLESKY_FUSE = 999988 # MaxFrame-dedicated functions DATAFRAME_RESHUFFLE = 10001 FLATMAP = 10002 FLATJSON = 10003 APPLY_CHUNK = 10004 SERIES_DICT_GETITEM = 10005 SERIES_DICT_SETITEM = 10006 SERIES_DICT_LENGTH = 10007 SERIES_DICT_REMOVE = 10008 SERIES_DICT_CONTAINS = 10009 SERIES_DICT_FLATTEN = 10010 SERIES_LIST_GETITEM = 10020 SERIES_LIST_SETITEM = 10021 SERIES_LIST_CONTAINS = 10022 SERIES_LIST_LENGTH = 10023 SERIES_LIST_INSERT = 10024 SERIES_LIST_EXTEND = 10025 SERIES_LIST_POP = 10026 SERIES_LIST_SORT = 10027 SERIES_LIST_FLATTEN = 10028 # MaxFrame internal operators DATAFRAME_PROJECTION_SAME_INDEX_MERGE = 100001 GROUPBY_AGGR_SAME_INDEX_MERGE = 100002 DATAFRAME_ILOC_GET_AND_RENAME_ITEM = 100003 COLLECT_MODEL_RESULT = 100004 MODEL_DATA_SOURCE = 100005 # fetches FETCH_SHUFFLE = 999998 FETCH = 999999 _val_to_dict = dict() for _var_name, _var_val in globals().copy().items(): if not isinstance(_var_val, int): continue if _var_val in _val_to_dict: # pragma: no cover raise ImportError( f"Cannot import opcode: {_var_name} and " f"{_val_to_dict[_var_val]} collides with value {_var_val}" ) _val_to_dict[_var_val] = _var_name del _val_to_dict, _var_name, _var_val