in src/ab/plugins/db/odps_helper.py [0:0]
def count(self, table_name, partitions=None, skip_high_cost_query=True, cost_threshold=30):
"""
:param table_name:
:param partitions:
:param skip_high_cost_query: if table or table partition is too large,
or the query is too complex,
it may takes too long to count every line.
return None instead
:param cost_threshold: default is 30 RMB, for counting 100GB data
:return:
None: table too large, skip count
>=0: real count
"""
if partitions is None:
partition_condition = self.get_partition_condition(table_name)
else:
partition_condition = ODPS.join_partitions(partitions)
if partition_condition == '':
return 0
if partition_condition is None:
sql = 'select count(*) as count from {table_name}'.format(table_name=table_name)
else:
sql = 'select count(*) as count from {table_name} where {partition_condition}'.format(
table_name=table_name, partition_condition=partition_condition)
cost = self.execute_sql_cost(sql)
if skip_high_cost_query and cost > cost_threshold:
return None
with self.odps.execute_sql(sql).open_reader() as reader:
return reader[0]['count']