in scripts/cronjobs/urlutils.py [0:0]
def getIfNewer(url, sinceTime=None, encoding=None, errors=None, silent=False, debug=False, method='GET'):
"""
Get a URL if it is newer
@param url: the url to fetch (required)
@param sinceTime: the most recent Last-Modified string (format as per mod_date())
@param encoding: the encoding to use (default 'None')
@param errors: If encoding is provided, this specifies the on-error action (e.g. 'ignore')
@param silent: whether to print the url and headers (default True)
@param debug: whether to print additional info (default False)
@param method: the HTTP method to use (default GET)
@return: (lastMod, response)
- lastMod: the Last-Modified string (from sinceTime if the URL is not later) may be None
- response: the HTTPResponse (encoding == None) or TextIOBase object.
'None' if the URL is not newer
@raise urllib.error.HTTPError: if URL not found or other error
"""
if sinceTime:
headers = {"If-Modified-Since" : sinceTime}
else:
headers = {}
response = None
try:
if not silent: print("%s %s" % (url, headers))
req = Request(url, headers=headers, method=method)
resp = urlopen(req, timeout=URL_TIMEOUT)
# Debug - detect why json sometimes returned as HTML but no error code
if debug and not silent: print("STATUS %s" % resp.getcode()) # Works for Py2/3
if debug and not silent: print(resp.headers)
try:
lastMod = resp.headers['Last-Modified']
if not lastMod: # e.g. responses to git blob-plain URLs don't seem to have dates
lastMod = None
except KeyError: # python2 raises this for missing headers
lastMod = None
if encoding:
response = io.TextIOWrapper(resp, encoding=encoding, errors=errors)
else:
response = resp
except HTTPError as err:
if err.code == 304:
lastMod = sinceTime # preserve timestamp
else:
raise
return lastMod, response