def gather_table_stats()

in redshift_monitoring.py [0:0]


def gather_table_stats(cursor, cluster):
    run_command(cursor,
                "select /* Lambda CloudWatch Exporter */ \"schema\" || '.' || \"table\" as table, encoded, max_varchar, unsorted, stats_off, tbl_rows, skew_sortkey1, skew_rows from svv_table_info")
    tables_not_compressed = 0
    max_skew_ratio = 0
    total_skew_ratio = 0
    number_tables_skew = 0
    number_tables = 0
    max_skew_sort_ratio = 0
    total_skew_sort_ratio = 0
    number_tables_skew_sort = 0
    number_tables_statsoff = 0
    max_varchar_size = 0
    max_unsorted_pct = 0
    total_rows = 0

    result = cursor.fetchall()

    for table in result:
        table_name, encoded, max_varchar, unsorted, stats_off, tbl_rows, skew_sortkey1, skew_rows = table
        number_tables += 1
        if encoded == 'N':
            tables_not_compressed += 1
        if skew_rows is not None:
            if skew_rows > max_skew_ratio:
                max_skew_ratio = skew_rows
            total_skew_ratio += skew_rows
            number_tables_skew += 1
        if skew_sortkey1 is not None:
            if skew_sortkey1 > max_skew_sort_ratio:
                max_skew_sort_ratio = skew_sortkey1
            total_skew_sort_ratio += skew_sortkey1
            number_tables_skew_sort += 1
        if stats_off is not None and stats_off > 5:
            number_tables_statsoff += 1
        if max_varchar is not None and max_varchar > max_varchar_size:
            max_varchar_size = max_varchar
        if unsorted is not None and unsorted > max_unsorted_pct:
            max_unsorted_pct = unsorted
        if tbl_rows is not None:
            total_rows += tbl_rows

    if number_tables_skew > 0:
        avg_skew_ratio = total_skew_ratio / number_tables_skew
    else:
        avg_skew_ratio = 0

    if number_tables_skew_sort > 0:
        avg_skew_sort_ratio = total_skew_sort_ratio / number_tables_skew_sort
    else:
        avg_skew_sort_ratio = 0

    # build up the metrics to put in cloudwatch
    metrics = []

    def add_metric(metric_name, value, unit):
        metrics.append({
            'MetricName': metric_name,
            'Dimensions': [
                {'Name': 'ClusterIdentifier', 'Value': cluster}
            ],
            'Timestamp': datetime.datetime.utcnow(),
            'Value': value,
            'Unit': unit
        })

    units_count = 'Count'
    units_none = 'None'
    units_pct = 'Percent'

    add_metric('TablesNotCompressed', tables_not_compressed, units_count)
    add_metric('MaxSkewRatio', max_skew_ratio, units_none)
    add_metric('MaxSkewSortRatio', max_skew_sort_ratio, units_none)
    add_metric('AvgSkewRatio', avg_skew_ratio, units_none)
    add_metric('AvgSkewSortRatio', avg_skew_sort_ratio, units_none)
    add_metric('Tables', number_tables, units_count)
    add_metric('Rows', total_rows, units_count)
    add_metric('TablesStatsOff', number_tables_statsoff, units_count)
    add_metric('MaxVarcharSize', max_varchar_size, units_none)
    add_metric('MaxUnsorted', max_unsorted_pct, units_pct)

    return metrics