in src/google/appengine/api/urlfetch_stub.py [0:0]
def _RetrieveURL(url, payload, method, headers, request, response,
follow_redirects=True, deadline=_API_CALL_DEADLINE,
validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT,
http_proxy=None):
"""Retrieves a URL over network.
Args:
url: String containing the URL to access.
payload: Request payload to send, if any; None if no payload.
If the payload is unicode, we assume it is utf-8.
method: HTTP method to use (e.g., 'GET')
headers: List of additional header objects to use for the request.
request: A urlfetch_service_pb2.URLFetchRequest proto object from
original request.
response: A urlfetch_service_pb2.URLFetchResponse proto object to
populate with the response data.
follow_redirects: optional setting (defaulting to True) for whether or not
we should transparently follow redirects (up to MAX_REDIRECTS)
deadline: Number of seconds to wait for the urlfetch to finish.
validate_certificate: If true, do not send request to server unless the
certificate is valid, signed by a trusted CA and the hostname matches
the certificate.
http_proxy: Tuple of (hostname, port), where hostname is a string and port
is an int, to use as the http proxy.
Raises:
Raises an apiproxy_errors.ApplicationError exception with
INVALID_URL_ERROR in cases where:
- The protocol of the redirected URL is bad or missing.
- The port is not in the allowable range of ports.
Raises an apiproxy_errors.ApplicationError exception with
TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded
"""
last_protocol = ''
last_host = ''
url = six.ensure_str(url, 'utf-8')
if isinstance(payload, six.text_type):
payload = six.ensure_str(payload, 'utf-8')
for _ in range(MAX_REDIRECTS + 1):
parsed = urllib.parse.urlsplit(url)
protocol, host, path, query, _ = parsed
port = parsed.port
if not _IsAllowedPort(port):
logging.error(
'urlfetch received %s ; port %s is not allowed in production!',
url, port)
raise apiproxy_errors.ApplicationError(
urlfetch_service_pb2.URLFetchServiceError.INVALID_URL)
if protocol and not host:
logging.error('Missing host on redirect; target url is %s', url)
raise apiproxy_errors.ApplicationError(
urlfetch_service_pb2.URLFetchServiceError.INVALID_URL)
if not host and not protocol:
host = last_host
protocol = last_protocol
if port == 0:
host = host.replace(':0', '')
adjusted_headers = {
b'User-Agent': [(
b'AppEngine-Google; (+http://code.google.com/appengine; appid: %s)'
% full_app_id.get().encode())],
b'Host': [six.ensure_binary(host)],
b'Accept-Encoding': [b'gzip'],
}
if payload is not None:
adjusted_headers[b'Content-Length'] = [
six.ensure_binary(str(len(payload)))
]
if method == 'POST' and payload:
adjusted_headers[b'Content-Type'] = [
b'application/x-www-form-urlencoded'
]
passthrough_content_encoding = False
for header in headers:
header_key = six.ensure_binary(header.Key)
header_value = six.ensure_binary(header.Value)
if header_key.lower() == b'user-agent':
adjusted_headers[header_key.title()] = [
(b'%s %s' % (six.ensure_binary(header_value),
adjusted_headers[b'User-Agent'][0]))
]
elif header_key.lower() == b'accept-encoding':
passthrough_content_encoding = True
adjusted_headers[header_key.title()] = [header_value]
elif header_key.lower() == b'content-type':
adjusted_headers[header_key.title()] = [header_value]
else:
adjusted_headers.setdefault(header_key, []).append(header_value)
logging.debug(
'Making HTTP request: host = %r, url = %r, payload = %.1000r, '
'headers = %r', host, url, payload, adjusted_headers)
try:
proxy_host = None
connection_kwargs = {'timeout': deadline}
if protocol == 'http':
connection_class = http_client.HTTPConnection
default_port = 80
if http_proxy and not _IsLocalhost(host):
proxy_host = '%s:%d' % (http_proxy[0],
http_proxy[1])
elif os.environ.get('HTTP_PROXY') and not _IsLocalhost(host):
_, proxy_host, _, _, _ = (
urllib.parse.urlsplit(os.environ.get('HTTP_PROXY')))
elif protocol == 'https':
connection_class = http_client.HTTPSConnection
if (validate_certificate and CERT_PATH):
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
context.verify_mode = ssl.CERT_REQUIRED
context.load_verify_locations(CERT_PATH)
context.check_hostname = True
connection_kwargs['context'] = context
default_port = 443
if os.environ.get('HTTPS_PROXY') and not _IsLocalhost(host):
_, proxy_host, _, _, _ = (
urllib.parse.urlsplit(os.environ.get('HTTPS_PROXY')))
else:
error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
logging.error(error_msg)
raise apiproxy_errors.ApplicationError(
urlfetch_service_pb2.URLFetchServiceError.INVALID_URL, error_msg)
if (not validate_certificate and sys.version_info >= (2, 7, 9)
and protocol == 'https'):
connection_kwargs['context'] = ssl._create_unverified_context()
if proxy_host:
proxy_address, _, proxy_port = proxy_host.partition(':')
connection = connection_class(
proxy_address, proxy_port if proxy_port else default_port,
**connection_kwargs)
full_path = urllib.parse.urlunsplit((protocol, host, path, query, ''))
if protocol == 'https':
connection.set_tunnel(host)
else:
connection = connection_class(host, **connection_kwargs)
full_path = urllib.parse.urlunsplit(('', '', path, query, ''))
last_protocol = protocol
last_host = host
try:
_SendRequest(connection, method, full_path, payload, adjusted_headers)
http_response = connection.getresponse()
if method == 'HEAD':
http_response_data = ''
else:
http_response_data = http_response.read()
finally:
connection.close()
except ssl.CertificateError as e:
raise apiproxy_errors.ApplicationError(
urlfetch_service_pb2.URLFetchServiceError.SSL_CERTIFICATE_ERROR,
str(e))
except ssl.SSLError as e:
app_error = (
urlfetch_service_pb2.URLFetchServiceError.DEADLINE_EXCEEDED
if 'timed out' in str(e) else
urlfetch_service_pb2.URLFetchServiceError.SSL_CERTIFICATE_ERROR)
raise apiproxy_errors.ApplicationError(app_error, str(e))
except socket.timeout as e:
raise apiproxy_errors.ApplicationError(
urlfetch_service_pb2.URLFetchServiceError.DEADLINE_EXCEEDED, str(e))
except (http_client.error, socket.error, IOError) as e:
raise apiproxy_errors.ApplicationError(
urlfetch_service_pb2.URLFetchServiceError.FETCH_ERROR, str(e))
if http_response.status >= 600:
raise apiproxy_errors.ApplicationError(
urlfetch_service_pb2.URLFetchServiceError.FETCH_ERROR,
'Status %s unknown' % http_response.status)
if http_response.status in REDIRECT_STATUSES and follow_redirects:
url = http_response.getheader('Location', None)
if url is None:
error_msg = 'Missing "Location" header for redirect.'
logging.error(error_msg)
raise apiproxy_errors.ApplicationError(
urlfetch_service_pb2.URLFetchServiceError.MALFORMED_REPLY,
error_msg)
if (http_response.status != http_client.TEMPORARY_REDIRECT and
method not in PRESERVE_ON_REDIRECT):
logging.warning('Received a %s to a %s. Redirecting with a GET',
http_response.status, method)
method = 'GET'
payload = None
else:
response.StatusCode = http_response.status
if (http_response.getheader('content-encoding') == 'gzip' and
not passthrough_content_encoding):
gzip_stream = six.BytesIO(http_response_data)
gzip_file = gzip.GzipFile(fileobj=gzip_stream)
http_response_data = gzip_file.read()
response.Content = six.ensure_binary(
http_response_data[:MAX_RESPONSE_SIZE])
key_set = set([key.lower() for key in http_response.msg.keys()])
for header_key in key_set:
header_values = GetHeaders(http_response.msg, header_key)
if (header_key.lower() == 'content-encoding' and
'gzip' in header_values and not passthrough_content_encoding):
continue
if header_key.lower() == 'content-length' and method != 'HEAD':
header_values = [str(len(response.Content))]
for header_value in header_values:
response.header.add(Key=header_key, Value=header_value)
if len(http_response_data) > MAX_RESPONSE_SIZE:
response.ContentWasTruncated = True
if request.Url != url:
response.FinalUrl = url
break
else:
error_msg = 'Too many repeated redirects'
logging.error(error_msg)
raise apiproxy_errors.ApplicationError(
urlfetch_service_pb2.URLFetchServiceError.TOO_MANY_REDIRECTS,
error_msg)