def sample()

in src/ab/plugins/db/odps_helper.py [0:0]


    def sample(self, table_name: str, column_names: list, partitions: list, total_count: int):
        """
        args:
            total_count: total row count of target partitions or whole table
        returns:
            sample_data
        """
        if total_count is None:
            raise AlgorithmException(data='选择的表过大,仅支持最旧采样')

        assert total_count > self.max_count, 'system error, total_count must be greater than sampler max_count'

        condition = ODPS.join_partitions(partitions)
        if condition:
            inner_where = ' where {condition}'.format(condition=condition)
        else:
            inner_where = ''

        fields = ', '.join(column_names)
        sql = """select {fields} from 
                    (select {fields}, row_number() over (partition by 1) as _xlab_tail_sampling_row_number from {table_name}{inner_where}) a
                where _xlab_tail_sampling_row_number > {rn}""".format(
            fields=fields, table_name=table_name, inner_where=inner_where, rn=total_count - self.max_count
        )

        logger.debug('sample sql:', sql)
        return self.db.table_sql(sql, table_name, column_names)