def get()

in scripts/cronjobs/urlutils.py [0:0]


    def get(self, url, name, encoding=None, errors=None, useFileModTime=False):
        """
            Check if the filename exists in the cache.
            If it does not, or if it does and the URL has not been checked recently,
            then try to download the URL using If-Modified-Since.
            The URL is downloaded to a temporary file and renamed to the filename
            to reduce the time when the file is being updated.
            The interval parameter is used to determine how often to check if the URL has changed.
            (this is mainly intended to avoid excess URL requests in unit testing).
            If this is set to -1, then the URL will only be downloaded once. 
            @param url: the url to fetch (required)
            @param name: the name to use in the cache (required)
            @param encoding: the encoding to use (default None)
            @param errors: If encoding is provided, this specifies the on-error action (e.g. 'ignore')
                        (default None)
            @param useFileModTime: whether to use the file modification time as the last check time
            If not, a hidden marker file is used (default false). Set this to true for URLs that don't
            provide a Last-Modified header
            @return: the opened stream, using the encoding if specified. Otherwise opened in binary mode. 
        """
        if name == None:
            name = basename(urlparse(url).path)
            if name == '': # no-name URL
                import re
                # convert URL to file name: replace all but alphanum and '-'
                name = re.sub(r'[^\w]+','_',url)
        target=self.__getname(name)
        fileTime = self.__file_mtime(target)
        if useFileModTime:
            check = self.__getname(name)
        else:
            check = self.__getMarker(name)
        upToDate = False
        if fileTime >= 0:
            if self.__interval == -1:
                if not self.__silent: print("File %s exists and URL check has been disabled" % name)
                upToDate = True
            elif self.__interval == 0:
                if not self.__silent: print("File %s exists and check interval is zero" % name)
            else:
                checkTime = self.__file_mtime(check)
                now = time.time()
                diff = now - checkTime
                if diff < self.__interval:
                    if not self.__silent: print("Recently checked: %d < %d, skip check for %s" % (diff, self.__interval, name))
                    upToDate = True
                else:
                    if checkTime >= 0:
                        if not self.__silent: print("Not recently checked: %d > %d (%s)" % (diff, self.__interval, name))
                    else:
                        if not self.__silent: print("Not recently checked (%s)" % name)
        else:
            if not self.__silent: print("Not found %s " % name)

        if not upToDate:
            sinceTime = mod_date(fileTime)
            lastMod, response = getIfNewer(url, sinceTime, silent=self.__silent, debug=self.__debug)
            if response: # we have a new version
                if lastMod:
                    try:
                        lastModT = calendar.timegm(time.strptime(lastMod, _HTTP_TIME_FORMAT))
                    except ValueError:
                        lastModT = 0
                else:
                    lastModT = 0
                
                tmpFile = target + ".tmp"
                with open(tmpFile,'wb') as f:
                    shutil.copyfileobj(response, f)
                if not useFileModTime:
                    # store the last mod time as the time of the file
                    touchFile(tmpFile, lastModT)
                os.rename(tmpFile, target) # seems to preserve file mod time
                if lastMod:
                    if fileTime > 0:
                        if not self.__silent: print("Downloaded new version of %s (%s > %s)" % (name, lastMod, sinceTime))
                    else:
                        if not self.__silent: print("Downloaded new version of %s (%s)" % (name, lastMod))
                else:
                    if not self.__silent: print("Downloaded new version of %s (undated)" % (name))
            else:
                if not self.__silent: print("Cached copy of %s is up to date (%s)" % (name, lastMod))

    
            if self.__interval > 0: # no point creating a marker file if we won't be using it
                if useFileModTime:
                    os.utime(check, None) # touch the marker file
                else:
                    with open(check,'a'):
                        os.utime(check, None) # touch the marker file

        if encoding:
            return open(target, 'r', encoding=encoding, errors=errors)
        else:
            return open(target, 'rb')