perf_dashboard/python_clientlibs_download.py (81 lines of code) (raw):

# Copyright 2017 Google Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import datetime import os import sys import time import uuid from google.cloud import bigquery import bq_utils GCLOUD_PROJECT_ENV = 'GCLOUD_PROJECT' DATETIME_FORMAT = '%Y%m%d' DATASET_NAME = 'python_clientlibs_download_by_week' VENEER_TABLE_NAME = 'veneer_client_libs' STACKDRIVER_TABLE_NAME = 'stackdriver_client_libs' GRPC_TABLE_NAME = 'grpc_lib' THIRD_PARTY_TABLE_NAME = 'third_party_client_libs' TABLES = [ VENEER_TABLE_NAME, GRPC_TABLE_NAME, STACKDRIVER_TABLE_NAME, THIRD_PARTY_TABLE_NAME, ] CLIENTLIBS = { VENEER_TABLE_NAME: [ 'google-cloud-core', 'google-cloud-speech', 'google-cloud-language', 'google-cloud-pubsub', 'google-cloud-bigquery', 'google-cloud-bigtable', 'google-cloud-datastore', 'google-cloud-spanner', 'google-cloud-storage', 'google-cloud-vision', 'google-cloud-translate', 'google-cloud-dns', 'google-cloud-videointelligence', ], STACKDRIVER_TABLE_NAME: [ 'google-cloud-logging', 'google-cloud-monitoring', 'google-cloud-error_reporting', 'google-cloud-trace', ], GRPC_TABLE_NAME: [ 'grpcio', ], THIRD_PARTY_TABLE_NAME: [ 'pandas-gbq', ] } def get_weekly_clientlibs_downloads(clientlibs_table_name, date_str): """Use a SQL query to collect the weekly download data of the client libraries. Args: clientlibs_table_name (str): Table name, which is the key in the CLIENTLIBS dict. date_str (str): A date string in "YYYYMMDD" format. Returns: list: rows of the query result. """ client_libs = CLIENTLIBS[clientlibs_table_name] date_time = datetime.datetime.strptime(date_str, DATETIME_FORMAT) week_dates = [(date_time + datetime.timedelta(days=-i)) .strftime(DATETIME_FORMAT) for i in range(7)] query = """ SELECT file.project as client_library_name, COUNT(*) as download_count FROM `the-psf.pypi.downloads*` WHERE file.project IN UNNEST(@client_libs) AND _TABLE_SUFFIX IN UNNEST(@week_dates) GROUP BY client_library_name """ client = bigquery.Client() query_parameters=[ bigquery.ArrayQueryParameter( 'client_libs', 'STRING', client_libs), bigquery.ArrayQueryParameter( 'week_dates', 'STRING', week_dates) ] job_config = bigquery.QueryJobConfig() job_config.query_parameters = query_parameters query_job = client.query(query, job_config=job_config) # Wait for the job to complete and get the results results = [row.values() for row in query_job.result()] rows = [(date_time,) + row for row in results] return rows def main(): for table_name in CLIENTLIBS.keys(): rows = get_weekly_clientlibs_downloads( clientlibs_table_name=table_name, date_str=datetime.datetime.now().strftime("%Y%m%d")) bq_utils.insert_rows( project=os.environ.get(GCLOUD_PROJECT_ENV), dataset_name=DATASET_NAME, table_name=table_name, rows=rows) if __name__ == '__main__': main()