aoo-stats/downloads-by-country.py (60 lines of code) (raw):

################################################################ # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # ################################################################ import urllib import json import os import hashlib import datetime import time import sys from urllib import urlencode def getSourceForgeStats(download, start_date, end_date): #print download url = download + "/stats/json?start_date=" + start_date + "&" "end_date=" + end_date attempts = 0 while attempts < 3: try: conn = urllib.urlopen(url) data = conn.read() return data except: attempts += 1 print "error " + str(attempts) return "" # dictionary of language code to country_dict (dictionary of country name to count) country_dict = {} def mergeCountries(countries): for country_tuple in countries: country_name = country_tuple[0] country_count = country_tuple[1] if country_name in country_dict: country_dict[country_name] = country_dict[country_name] + country_count else: country_dict[country_name] = country_count if len(sys.argv) == 0: print "syntax: python downloads-by-country.py <urls.lst> <start-date> <end-date>" print "where <urls.lst> is a list of files URLs to gather stats on, and <start-date> and <end-date> are in YYYY-MM-DD format." downloads = [line.strip() for line in open(sys.argv[1])] for download in downloads : data = getSourceForgeStats(download, sys.argv[2], sys.argv[3]) #print data obj = json.loads(data) countries = obj["countries"] mergeCountries(countries) print "<html>" print "<head>" print "<title>OpenOffice Downloads by Country/Territory</title>" print "</head>" print "<body>" print "<p>This table shows downloads per <a href='https://en.wikipedia.org/wiki/Country_code_top-level_domain'>country/territory</a> for the period from " + sys.argv[2] + " to " + sys.argv[3] + ".</p>" print "<table border='1' cellpadding='10'>" print "<tr>" print "<th>Rank</th>" print "<th>Country/Territory</th>" print "<th>Downloads</th>" print "</tr>" rank = 1 for country in sorted(country_dict,key=lambda x: country_dict[x], reverse=True): print "<tr>" print "<td align='right'>" + "#" + str(rank) + "</td>" print "<td>" + country.encode("utf-8") + "</td>" print "<td align='right'>" + str(country_dict[country]) + "</td>" print "</tr>" rank +=1 print"</table>" print "</body>"