def sample()

in src/ab/plugins/db/odps_helper.py [0:0]


    def sample(self, table_name: str, column_names: list, partitions: list, total_count: int):
        """
        args:
            total_count: total row count of target partitions or whole table
        returns:
            sample_data
        """
        if total_count is None:
            raise AlgorithmException(data='选择的表过大,仅支持最旧采样')

        assert total_count > self.max_count, 'system error, total_count must be greater than sampler max_count'

        condition = ODPS.join_partitions(partitions)

        ratio = total_count // self.max_count
        sample_condition = 'sample({ratio}) = true'.format(ratio=ratio)
        if condition:
            condition = '({condition}) and {sample_condition}'.format(condition=condition, sample_condition=sample_condition)
        else:
            condition = sample_condition

        if condition:
            where = ' where {condition}'.format(condition=condition)
        else:
            where = ''

        fields = ', '.join(column_names)
        sql = 'select {fields} from {table_name}{where} limit {self.max_count}'.format(
            fields=fields, table_name=table_name, where=where, self=self)

        logger.debug('sample sql:', sql)
        return self.db.table_sql(sql, table_name, column_names)