def count()

in src/ab/plugins/db/odps_helper.py [0:0]


    def count(self, table_name, partitions=None, skip_high_cost_query=True, cost_threshold=30):
        """
        :param table_name:
        :param partitions:
        :param skip_high_cost_query: if table or table partition is too large,
                                 or the query is too complex,
                                 it may takes too long to count every line.
                                 return None instead
        :param cost_threshold: default is 30 RMB, for counting 100GB data
        :return:
            None: table too large, skip count
            >=0: real count
        """
        if partitions is None:
            partition_condition = self.get_partition_condition(table_name)
        else:
            partition_condition = ODPS.join_partitions(partitions)

        if partition_condition == '':
            return 0

        if partition_condition is None:
            sql = 'select count(*) as count from {table_name}'.format(table_name=table_name)
        else:
            sql = 'select count(*) as count from {table_name} where {partition_condition}'.format(
                table_name=table_name, partition_condition=partition_condition)

        cost = self.execute_sql_cost(sql)
        if skip_high_cost_query and cost > cost_threshold:
            return None

        with self.odps.execute_sql(sql).open_reader() as reader:
            return reader[0]['count']