urlutils.py (62 lines of code) (raw):

#!/usr/bin/env python3 """ Utility URL methods """ import hashlib import sys import errno import time import yaml import json from os import environ from os.path import getmtime, join from urllib.request import urlopen, Request from urllib.error import HTTPError MAXAGE=int(environ.get('CACHE_AGE', '600')) # 5 min in seconds CACHE=environ.get('CACHE') DEBUG=environ.get('CACHE_DEBUG') def isFileStale(filename): """ is file older than max age (default 5 minutes) """ try: t = getmtime(filename) except OSError as e: if not e.errno == errno.ENOENT: raise e return True diff = time.time() - t return diff > MAXAGE def hashurl(url): """ create hash from url """ return hashlib.sha224(url.encode()).hexdigest() def geturl(url): """ Get url contents -- no caching """ req = Request(url) resp = urlopen(req) return resp.read() def urlexists(url): """ Does URL exist? """ req = Request(url) try: urlopen(req) return True except HTTPError: return False def urlcache(url): """ Get url contents -- optional caching """ if CACHE: # basename seems to work OK with URLs cache = join(CACHE, hashurl(url)+".tmp") if isFileStale(cache): if DEBUG: print("Caching %s" % url, file=sys.stderr) data = geturl(url) with open(cache,'wb') as w: w.write(data) return data else: if DEBUG: print("Using cache for %s" % url, file=sys.stderr) with open(cache,'r') as r: return r.read() else: if DEBUG: print("Fetching %s" % url, file=sys.stderr) return geturl(url) def loadyaml(url): return yaml.safe_load(urlcache(url)) def loadjson(url): return json.loads(urlcache(url)) # Test code if __name__ == '__main__': for arg in sys.argv[1:]: print(arg) print(loadyaml(arg))