def getIfNewer()

in scripts/cronjobs/urlutils.py [0:0]


def getIfNewer(url, sinceTime=None, encoding=None, errors=None, silent=False, debug=False, method='GET'):
    """
        Get a URL if it is newer
    
        @param url: the url to fetch (required)
        @param sinceTime: the most recent Last-Modified string (format as per mod_date())
        @param encoding: the encoding to use (default 'None')
        @param errors: If encoding is provided, this specifies the on-error action (e.g. 'ignore')
        @param silent: whether to print the url and headers (default True)
        @param debug: whether to print additional info (default False)
        @param method: the HTTP method to use (default GET)

        @return: (lastMod, response)
        - lastMod: the Last-Modified string (from sinceTime if the URL is not later) may be None
        - response: the HTTPResponse (encoding == None) or TextIOBase object.
         'None' if the URL is not newer
        @raise urllib.error.HTTPError: if URL not found or other error
    """
    if sinceTime:
        headers = {"If-Modified-Since" : sinceTime}
    else:
        headers = {}
    response = None
    try:
        if not silent: print("%s %s" % (url, headers))
        req = Request(url, headers=headers, method=method)
        resp = urlopen(req, timeout=URL_TIMEOUT)
        # Debug - detect why json sometimes returned as HTML but no error code
        if debug and not silent: print("STATUS %s" % resp.getcode()) # Works for Py2/3
        if debug and not silent: print(resp.headers)
        try:
            lastMod = resp.headers['Last-Modified']
            if not lastMod: # e.g. responses to git blob-plain URLs don't seem to have dates
                lastMod = None
        except KeyError: # python2 raises this for missing headers
            lastMod = None
        if encoding:
            response = io.TextIOWrapper(resp, encoding=encoding, errors=errors)
        else:
            response = resp
    except HTTPError as err:
        if err.code == 304:
            lastMod = sinceTime # preserve timestamp
        else:
            raise
    return lastMod, response