def _RetrieveURL()

in src/google/appengine/api/urlfetch_stub.py [0:0]
180 lines of code
56 McCabe index (conditional complexity)

  def _RetrieveURL(url, payload, method, headers, request, response,
                   follow_redirects=True, deadline=_API_CALL_DEADLINE,
                   validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT,
                   http_proxy=None):
    """Retrieves a URL over network.

    Args:
      url: String containing the URL to access.
      payload: Request payload to send, if any; None if no payload.
        If the payload is unicode, we assume it is utf-8.
      method: HTTP method to use (e.g., 'GET')
      headers: List of additional header objects to use for the request.
      request: A urlfetch_service_pb2.URLFetchRequest proto object from
          original request.
      response: A urlfetch_service_pb2.URLFetchResponse proto object to
          populate with the response data.
      follow_redirects: optional setting (defaulting to True) for whether or not
        we should transparently follow redirects (up to MAX_REDIRECTS)
      deadline: Number of seconds to wait for the urlfetch to finish.
      validate_certificate: If true, do not send request to server unless the
        certificate is valid, signed by a trusted CA and the hostname matches
        the certificate.
      http_proxy: Tuple of (hostname, port), where hostname is a string and port
        is an int, to use as the http proxy.

    Raises:
      Raises an apiproxy_errors.ApplicationError exception with
      INVALID_URL_ERROR in cases where:
        - The protocol of the redirected URL is bad or missing.
        - The port is not in the allowable range of ports.
      Raises an apiproxy_errors.ApplicationError exception with
      TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded
    """
    last_protocol = ''
    last_host = ''

    url = six.ensure_str(url, 'utf-8')

    if isinstance(payload, six.text_type):
      payload = six.ensure_str(payload, 'utf-8')

    for _ in range(MAX_REDIRECTS + 1):
      parsed = urllib.parse.urlsplit(url)
      protocol, host, path, query, _ = parsed

      port = parsed.port

      if not _IsAllowedPort(port):
        logging.error(
            'urlfetch received %s ; port %s is not allowed in production!',
            url, port)





        raise apiproxy_errors.ApplicationError(
            urlfetch_service_pb2.URLFetchServiceError.INVALID_URL)

      if protocol and not host:

        logging.error('Missing host on redirect; target url is %s', url)
        raise apiproxy_errors.ApplicationError(
            urlfetch_service_pb2.URLFetchServiceError.INVALID_URL)




      if not host and not protocol:
        host = last_host
        protocol = last_protocol


      if port == 0:
        host = host.replace(':0', '')









      adjusted_headers = {
          b'User-Agent': [(
              b'AppEngine-Google; (+http://code.google.com/appengine; appid: %s)'
              % full_app_id.get().encode())],
          b'Host': [six.ensure_binary(host)],
          b'Accept-Encoding': [b'gzip'],
      }
      if payload is not None:


        adjusted_headers[b'Content-Length'] = [
            six.ensure_binary(str(len(payload)))
        ]


      if method == 'POST' and payload:
        adjusted_headers[b'Content-Type'] = [
            b'application/x-www-form-urlencoded'
        ]

      passthrough_content_encoding = False
      for header in headers:





        header_key = six.ensure_binary(header.Key)
        header_value = six.ensure_binary(header.Value)
        if header_key.lower() == b'user-agent':
          adjusted_headers[header_key.title()] = [
              (b'%s %s' % (six.ensure_binary(header_value),
                           adjusted_headers[b'User-Agent'][0]))
          ]
        elif header_key.lower() == b'accept-encoding':
          passthrough_content_encoding = True
          adjusted_headers[header_key.title()] = [header_value]
        elif header_key.lower() == b'content-type':
          adjusted_headers[header_key.title()] = [header_value]
        else:
          adjusted_headers.setdefault(header_key, []).append(header_value)

      logging.debug(
          'Making HTTP request: host = %r, url = %r, payload = %.1000r, '
          'headers = %r', host, url, payload, adjusted_headers)
      try:
        proxy_host = None
        connection_kwargs = {'timeout': deadline}

        if protocol == 'http':
          connection_class = http_client.HTTPConnection
          default_port = 80

          if http_proxy and not _IsLocalhost(host):
            proxy_host = '%s:%d' % (http_proxy[0],
                                    http_proxy[1])
          elif os.environ.get('HTTP_PROXY') and not _IsLocalhost(host):
            _, proxy_host, _, _, _ = (
                urllib.parse.urlsplit(os.environ.get('HTTP_PROXY')))
        elif protocol == 'https':
          connection_class = http_client.HTTPSConnection
          if (validate_certificate and CERT_PATH):
            context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
            context.verify_mode = ssl.CERT_REQUIRED
            context.load_verify_locations(CERT_PATH)
            context.check_hostname = True
            connection_kwargs['context'] = context

          default_port = 443

          if os.environ.get('HTTPS_PROXY') and not _IsLocalhost(host):
            _, proxy_host, _, _, _ = (
                urllib.parse.urlsplit(os.environ.get('HTTPS_PROXY')))
        else:

          error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
          logging.error(error_msg)
          raise apiproxy_errors.ApplicationError(
              urlfetch_service_pb2.URLFetchServiceError.INVALID_URL, error_msg)




        if (not validate_certificate and sys.version_info >= (2, 7, 9)
            and protocol == 'https'):
          connection_kwargs['context'] = ssl._create_unverified_context()

        if proxy_host:
          proxy_address, _, proxy_port = proxy_host.partition(':')
          connection = connection_class(
              proxy_address, proxy_port if proxy_port else default_port,
              **connection_kwargs)
          full_path = urllib.parse.urlunsplit((protocol, host, path, query, ''))

          if protocol == 'https':
            connection.set_tunnel(host)
        else:
          connection = connection_class(host, **connection_kwargs)
          full_path = urllib.parse.urlunsplit(('', '', path, query, ''))



        last_protocol = protocol
        last_host = host

        try:
          _SendRequest(connection, method, full_path, payload, adjusted_headers)
          http_response = connection.getresponse()
          if method == 'HEAD':
            http_response_data = ''
          else:
            http_response_data = http_response.read()
        finally:
          connection.close()
      except ssl.CertificateError as e:
        raise apiproxy_errors.ApplicationError(
            urlfetch_service_pb2.URLFetchServiceError.SSL_CERTIFICATE_ERROR,
            str(e))
      except ssl.SSLError as e:





        app_error = (
            urlfetch_service_pb2.URLFetchServiceError.DEADLINE_EXCEEDED
            if 'timed out' in str(e) else
            urlfetch_service_pb2.URLFetchServiceError.SSL_CERTIFICATE_ERROR)
        raise apiproxy_errors.ApplicationError(app_error, str(e))
      except socket.timeout as e:
        raise apiproxy_errors.ApplicationError(
            urlfetch_service_pb2.URLFetchServiceError.DEADLINE_EXCEEDED, str(e))
      except (http_client.error, socket.error, IOError) as e:
        raise apiproxy_errors.ApplicationError(
            urlfetch_service_pb2.URLFetchServiceError.FETCH_ERROR, str(e))

      if http_response.status >= 600:
        raise apiproxy_errors.ApplicationError(
            urlfetch_service_pb2.URLFetchServiceError.FETCH_ERROR,
            'Status %s unknown' % http_response.status)




      if http_response.status in REDIRECT_STATUSES and follow_redirects:

        url = http_response.getheader('Location', None)
        if url is None:
          error_msg = 'Missing "Location" header for redirect.'
          logging.error(error_msg)
          raise apiproxy_errors.ApplicationError(
              urlfetch_service_pb2.URLFetchServiceError.MALFORMED_REPLY,
              error_msg)



        if (http_response.status != http_client.TEMPORARY_REDIRECT and
            method not in PRESERVE_ON_REDIRECT):
          logging.warning('Received a %s to a %s. Redirecting with a GET',
                          http_response.status, method)
          method = 'GET'
          payload = None
      else:
        response.StatusCode = http_response.status
        if (http_response.getheader('content-encoding') == 'gzip' and
            not passthrough_content_encoding):
          gzip_stream = six.BytesIO(http_response_data)
          gzip_file = gzip.GzipFile(fileobj=gzip_stream)
          http_response_data = gzip_file.read()
        response.Content = six.ensure_binary(
            http_response_data[:MAX_RESPONSE_SIZE])




        key_set = set([key.lower() for key in http_response.msg.keys()])
        for header_key in key_set:
          header_values = GetHeaders(http_response.msg, header_key)
          if (header_key.lower() == 'content-encoding' and
              'gzip' in header_values and not passthrough_content_encoding):
            continue
          if header_key.lower() == 'content-length' and method != 'HEAD':
            header_values = [str(len(response.Content))]

          for header_value in header_values:
            response.header.add(Key=header_key, Value=header_value)

        if len(http_response_data) > MAX_RESPONSE_SIZE:
          response.ContentWasTruncated = True



        if request.Url != url:
          response.FinalUrl = url


        break
    else:
      error_msg = 'Too many repeated redirects'
      logging.error(error_msg)
      raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb2.URLFetchServiceError.TOO_MANY_REDIRECTS,
          error_msg)