aoo-stats/countries.py (61 lines of code) (raw):

################################################################ # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # ################################################################ import urllib import json import os import hashlib import datetime import time import sys from urllib import urlencode def getSourceForgeStats(download, start_date, end_date): print download url = download + "/stats/json?start_date=" + start_date + "&" "end_date=" + end_date attempts = 0 while attempts < 3: try: conn = urllib.urlopen(url) data = conn.read() return data except: attempts += 1 print "error " + str(attempts) return "" # extracts the language code from the URL # this logic is very sensitive to the exact naming conventions def getLanguage(url): s = str(url) if s.endswith('.exe'): s = s[:-4] elif s.endswith('.dmg'): s = s[:-4] elif s.endswith('.tar.gz'): s = s[:-7] return s[s.rfind("_")+1:len(s)] # dictionary of language code to country_dict (dictionary of country name to count) master_dict = {} def mergeCountries(lang, countries): country_dict = {} if lang in master_dict: country_dict = master_dict[lang] else: master_dict[lang] = country_dict for country_tuple in countries: country_name = country_tuple[0] country_count = country_tuple[1] if country_name in country_dict: country_dict[country_name] = country_dict[country_name] + country_count else: country_dict[country_name] = country_count if len(sys.argv) == 0: print "syntax: python countries.py <urls.lst> <start-date> <end-date>" print "where <urls.lst> is a list of files URLs to gather stats on, and <start-date> and <end-date> are in YYYY-MM-DD format." downloads = [line.strip() for line in open(sys.argv[1])] for download in downloads : data = json.loads(getSourceForgeStats(download, sys.argv[2], sys.argv[3])) lang = getLanguage(download) #lang = "foo" countries = data["countries"] mergeCountries(lang, countries) for lang in master_dict: print "===" + lang + "===" country_dict = master_dict[lang] for country in sorted(country_dict,key=lambda x: country_dict[x], reverse=True): out_str = country + "," + str(country_dict[country]) print out_str.encode("utf-8") print print