core/maxframe/tensor/misc/unique.py (85 lines of code) (raw):

# Copyright 1999-2025 Alibaba Group Holding Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from ... import opcodes from ...serialization.serializables import BoolField, Int32Field from ..core import TensorOrder from ..operators import TensorHasInput, TensorOperatorMixin from ..utils import validate_axis class TensorUnique(TensorHasInput, TensorOperatorMixin): _op_type_ = opcodes.UNIQUE return_index = BoolField("return_index", default=False) return_inverse = BoolField("return_inverse", default=False) return_counts = BoolField("return_counts", default=False) axis = Int32Field("axis", default=None) @property def output_limit(self): return 1 + self.return_index + self.return_inverse + self.return_counts @classmethod def _gen_kws(cls, op: "TensorUnique", input_obj, chunk=False, chunk_index=None): kws = [] # unique tensor shape = list(input_obj.shape) shape[op.axis] = np.nan kw = {"shape": tuple(shape), "dtype": input_obj.dtype, "gpu": input_obj.op.gpu} if chunk: idx = [0] * len(shape) idx[op.axis] = chunk_index or 0 kw["index"] = tuple(idx) kws.append(kw) # unique indices tensor if op.return_index: kw = { "shape": (np.nan,), "dtype": np.dtype(np.intp), "gpu": input_obj.op.gpu, "type": "indices", } if chunk: kw["index"] = (chunk_index or 0,) kws.append(kw) # unique inverse tensor if op.return_inverse: kw = { "shape": (input_obj.shape[op.axis],), "dtype": np.dtype(np.intp), "gpu": input_obj.op.gpu, "type": "inverse", } if chunk: kw["index"] = (chunk_index or 0,) kws.append(kw) # unique counts tensor if op.return_counts: kw = { "shape": (np.nan,), "dtype": np.dtype(int), "gpu": input_obj.op.gpu, "type": "counts", } if chunk: kw["index"] = (chunk_index or 0,) kws.append(kw) return kws def __call__(self, ar): from .atleast_1d import atleast_1d ar = atleast_1d(ar) if self.axis is None: if ar.ndim > 1: ar = ar.flatten() self.axis = 0 else: self.axis = validate_axis(ar.ndim, self.axis) kws = self._gen_kws(self, ar) tensors = self.new_tensors([ar], kws=kws, order=TensorOrder.C_ORDER) if len(tensors) == 1: return tensors[0] return tensors def unique( ar, return_index=False, return_inverse=False, return_counts=False, axis=None, ): """ Find the unique elements of a tensor. Returns the sorted unique elements of a tensor. There are three optional outputs in addition to the unique elements: * the indices of the input tensor that give the unique values * the indices of the unique tensor that reconstruct the input tensor * the number of times each unique value comes up in the input tensor Parameters ---------- ar : array_like Input tensor. Unless `axis` is specified, this will be flattened if it is not already 1-D. return_index : bool, optional If True, also return the indices of `ar` (along the specified axis, if provided, or in the flattened tensor) that result in the unique tensor. return_inverse : bool, optional If True, also return the indices of the unique tensor (for the specified axis, if provided) that can be used to reconstruct `ar`. return_counts : bool, optional If True, also return the number of times each unique item appears in `ar`. axis : int or None, optional The axis to operate on. If None, `ar` will be flattened. If an integer, the subarrays indexed by the given axis will be flattened and treated as the elements of a 1-D tensor with the dimension of the given axis, see the notes for more details. Object tensors or structured tensors that contain objects are not supported if the `axis` kwarg is used. The default is None. Returns ------- unique : Tensor The sorted unique values. unique_indices : Tensor, optional The indices of the first occurrences of the unique values in the original tensor. Only provided if `return_index` is True. unique_inverse : Tensor, optional The indices to reconstruct the original tensor from the unique tensor. Only provided if `return_inverse` is True. unique_counts : Tensor, optional The number of times each of the unique values comes up in the original tensor. Only provided if `return_counts` is True. Examples -------- >>> import maxframe.tensor as mt >>> mt.unique([1, 1, 2, 2, 3, 3]).execute() array([1, 2, 3]) >>> a = mt.array([[1, 1], [2, 3]]) >>> mt.unique(a).execute() array([1, 2, 3]) Return the unique rows of a 2D tensor >>> a = mt.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]]) >>> mt.unique(a, axis=0).execute() array([[1, 0, 0], [2, 3, 4]]) Return the indices of the original tensor that give the unique values: >>> a = mt.array(['a', 'b', 'b', 'c', 'a']) >>> u, indices = mt.unique(a, return_index=True) >>> u.execute() array(['a', 'b', 'c'], dtype='|S1') >>> indices.execute() array([0, 1, 3]) >>> a[indices].execute() array(['a', 'b', 'c'], dtype='|S1') Reconstruct the input array from the unique values: >>> a = mt.array([1, 2, 6, 4, 2, 3, 2]) >>> u, indices = mt.unique(a, return_inverse=True) >>> u.execute() array([1, 2, 3, 4, 6]) >>> indices.execute() array([0, 1, 4, 3, 1, 2, 1]) >>> u[indices].execute() array([1, 2, 6, 4, 2, 3, 2]) """ op = TensorUnique( return_index=return_index, return_inverse=return_inverse, return_counts=return_counts, axis=axis, ) return op(ar)