in scripts/cronjobs/urlutils.py [0:0]
def get(self, url, name, encoding=None, errors=None, useFileModTime=False):
"""
Check if the filename exists in the cache.
If it does not, or if it does and the URL has not been checked recently,
then try to download the URL using If-Modified-Since.
The URL is downloaded to a temporary file and renamed to the filename
to reduce the time when the file is being updated.
The interval parameter is used to determine how often to check if the URL has changed.
(this is mainly intended to avoid excess URL requests in unit testing).
If this is set to -1, then the URL will only be downloaded once.
@param url: the url to fetch (required)
@param name: the name to use in the cache (required)
@param encoding: the encoding to use (default None)
@param errors: If encoding is provided, this specifies the on-error action (e.g. 'ignore')
(default None)
@param useFileModTime: whether to use the file modification time as the last check time
If not, a hidden marker file is used (default false). Set this to true for URLs that don't
provide a Last-Modified header
@return: the opened stream, using the encoding if specified. Otherwise opened in binary mode.
"""
if name == None:
name = basename(urlparse(url).path)
if name == '': # no-name URL
import re
# convert URL to file name: replace all but alphanum and '-'
name = re.sub(r'[^\w]+','_',url)
target=self.__getname(name)
fileTime = self.__file_mtime(target)
if useFileModTime:
check = self.__getname(name)
else:
check = self.__getMarker(name)
upToDate = False
if fileTime >= 0:
if self.__interval == -1:
if not self.__silent: print("File %s exists and URL check has been disabled" % name)
upToDate = True
elif self.__interval == 0:
if not self.__silent: print("File %s exists and check interval is zero" % name)
else:
checkTime = self.__file_mtime(check)
now = time.time()
diff = now - checkTime
if diff < self.__interval:
if not self.__silent: print("Recently checked: %d < %d, skip check for %s" % (diff, self.__interval, name))
upToDate = True
else:
if checkTime >= 0:
if not self.__silent: print("Not recently checked: %d > %d (%s)" % (diff, self.__interval, name))
else:
if not self.__silent: print("Not recently checked (%s)" % name)
else:
if not self.__silent: print("Not found %s " % name)
if not upToDate:
sinceTime = mod_date(fileTime)
lastMod, response = getIfNewer(url, sinceTime, silent=self.__silent, debug=self.__debug)
if response: # we have a new version
if lastMod:
try:
lastModT = calendar.timegm(time.strptime(lastMod, _HTTP_TIME_FORMAT))
except ValueError:
lastModT = 0
else:
lastModT = 0
tmpFile = target + ".tmp"
with open(tmpFile,'wb') as f:
shutil.copyfileobj(response, f)
if not useFileModTime:
# store the last mod time as the time of the file
touchFile(tmpFile, lastModT)
os.rename(tmpFile, target) # seems to preserve file mod time
if lastMod:
if fileTime > 0:
if not self.__silent: print("Downloaded new version of %s (%s > %s)" % (name, lastMod, sinceTime))
else:
if not self.__silent: print("Downloaded new version of %s (%s)" % (name, lastMod))
else:
if not self.__silent: print("Downloaded new version of %s (undated)" % (name))
else:
if not self.__silent: print("Cached copy of %s is up to date (%s)" % (name, lastMod))
if self.__interval > 0: # no point creating a marker file if we won't be using it
if useFileModTime:
os.utime(check, None) # touch the marker file
else:
with open(check,'a'):
os.utime(check, None) # touch the marker file
if encoding:
return open(target, 'r', encoding=encoding, errors=errors)
else:
return open(target, 'rb')