URLConnection connect()

in src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpConnector.java [94:257]


  URLConnection connect(URL originalUrl, Function<URL, ImmutableMap<String, String>> requestHeaders)
      throws IOException {

    if (Thread.interrupted()) {
      throw new InterruptedIOException();
    }
    URL url = originalUrl;
    if (HttpUtils.isProtocol(url, "file")) {
      return url.openConnection();
    }
    List<Throwable> suppressions = new ArrayList<>();
    int retries = 0;
    int redirects = 0;
    int connectTimeout = scale(MIN_CONNECT_TIMEOUT_MS);
    while (true) {
      HttpURLConnection connection = null;
      try {
        connection = (HttpURLConnection)
            url.openConnection(proxyHelper.createProxyIfNeeded(url));
        // TODO(zecke): Revise once https://bugs.openjdk.java.net/browse/JDK-8163921 is fixed.
        connection.addRequestProperty("Accept", "text/html, image/gif, image/jpeg, */*");
        boolean isAlreadyCompressed =
            COMPRESSED_EXTENSIONS.contains(HttpUtils.getExtension(url.getPath()))
                || COMPRESSED_EXTENSIONS.contains(HttpUtils.getExtension(originalUrl.getPath()));
        connection.setInstanceFollowRedirects(false);
        for (Map.Entry<String, String> entry : requestHeaders.apply(url).entrySet()) {
          if (isAlreadyCompressed && Ascii.equalsIgnoreCase(entry.getKey(), "Accept-Encoding")) {
            // We're not going to ask for compression if we're downloading a file that already
            // appears to be compressed.
            continue;
          }
          connection.addRequestProperty(entry.getKey(), entry.getValue());
        }
        if (connection.getRequestProperty("User-Agent") == null) {
          connection.setRequestProperty("User-Agent", USER_AGENT_VALUE);
        }
        connection.setConnectTimeout(connectTimeout);
        // The read timeout is always large because it stays in effect after this method.
        connection.setReadTimeout(scale(READ_TIMEOUT_MS));
        // Java tries to abstract HTTP error responses for us. We don't want that. So we're going
        // to try and undo any IOException that doesn't appear to be a legitimate I/O exception.
        int code;
        try {
          connection.connect();
          code = connection.getResponseCode();
        } catch (FileNotFoundException ignored) {
          code = connection.getResponseCode();
        } catch (UnknownHostException e) {
          String message = "Unknown host: " + e.getMessage();
          eventHandler.handle(Event.progress(message));
          throw new UnrecoverableHttpException(message);
        } catch (IllegalArgumentException e) {
          // This will happen if the user does something like specify a port greater than 2^16-1.
          throw new UnrecoverableHttpException(e.getMessage());
        } catch (IOException e) {
          // Some HTTP error status codes are converted to IOExceptions, which we can only
          // disambiguate from other IOExceptions by checking the exception message. We need to be
          // careful because some exceptions (e.g., SocketTimeoutException) may have a null message.
          if (e.getMessage() == null || !e.getMessage().startsWith("Server returned")) {
            throw e;
          }
          code = connection.getResponseCode();
        }
        // 206 means partial content and only happens if caller specified Range. See RFC7233 § 4.1.
        if (code == 200 || code == 206) {
          return connection;
        } else if (code == 301 || code == 302 || code == 303 || code == 307) {
          readAllBytesAndClose(connection.getInputStream());
          if (++redirects == MAX_REDIRECTS) {
            eventHandler.handle(Event.progress("Redirect loop detected in " + originalUrl));
            throw new UnrecoverableHttpException("Redirect loop detected");
          }
          url = HttpUtils.getLocation(connection);
          if (code == 301) {
            originalUrl = url;
          }
        } else if (code == 403) {
          // jart@ has noticed BitBucket + Amazon AWS downloads frequently flake with this code.
          throw new IOException(describeHttpResponse(connection));
        } else if (code == 408) {
          // The 408 (Request Timeout) status code indicates that the server did not receive a
          // complete request message within the time that it was prepared to wait. Server SHOULD
          // send the "close" connection option (Section 6.1 of [RFC7230]) in the response, since
          // 408 implies that the server has decided to close the connection rather than continue
          // waiting.  If the client has an outstanding request in transit, the client MAY repeat
          // that request on a new connection. Quoth RFC7231 § 6.5.7
          throw new IOException(describeHttpResponse(connection));
        } else if (code < 500          // 4xx means client seems to have erred quoth RFC7231 § 6.5
                    || code == 501     // Server doesn't support function quoth RFC7231 § 6.6.2
                    || code == 502     // Host not configured on server cf. RFC7231 § 6.6.3
                    || code == 505) {  // Server refuses to support version quoth RFC7231 § 6.6.6
          // This is a permanent error so we're not going to retry.
          readAllBytesAndClose(connection.getErrorStream());
          if (code == 404 || code == 410) {
            // For Not Found, we throw a separate unrecoverable exception so that callers can
            // distinguish between the resource being not found and the server being unavailable.
            throw new FileNotFoundException(describeHttpResponse(connection));
          }
          throw new UnrecoverableHttpException(describeHttpResponse(connection));
        } else {
          // However we will retry on some 5xx errors, particularly 500 and 503.
          throw new IOException(describeHttpResponse(connection));
        }
      } catch (UnrecoverableHttpException | FileNotFoundException e) {
        throw e;
      } catch (IllegalArgumentException e) {
        throw new UnrecoverableHttpException(e.getMessage());
      } catch (IOException e) {
        if (connection != null) {
          // If we got here, it means we might not have consumed the entire payload of the
          // response, if any. So we're going to force this socket to disconnect and not be
          // reused. This is particularly important if multiple threads end up establishing
          // connections to multiple mirrors simultaneously for a large file. We don't want to
          // download that large file twice.
          connection.disconnect();
        }
        // We don't respect the Retry-After header (RFC7231 § 7.1.3) because it's rarely used and
        // tends to be too conservative when it is. We're already being good citizens by using
        // exponential backoff. Furthermore RFC law didn't use the magic word "MUST".
        int timeout = IntMath.pow(2, retries) * MIN_RETRY_DELAY_MS;
        if (e instanceof SocketTimeoutException) {
          eventHandler.handle(Event.progress("Timeout connecting to " + url));
          connectTimeout = Math.min(connectTimeout * 2, scale(MAX_CONNECT_TIMEOUT_MS));
          // If we got connect timeout, we're already doing exponential backoff, so no point
          // in sleeping too.
          timeout = 1;
        } else if (e instanceof InterruptedIOException) {
          // Please note that SocketTimeoutException is a subtype of InterruptedIOException.
          throw e;
        }
        if (++retries == MAX_RETRIES) {
          if (e instanceof SocketTimeoutException) {
            // SocketTimeoutExceptions are InterruptedIOExceptions; however they do not signify
            // an external interruption, but simply a failed download due to some server timing
            // out. So rethrow them as ordinary IOExceptions.
            e = new IOException(e.getMessage(), e);
          } else {
            eventHandler
                .handle(Event.progress(format("Error connecting to %s: %s", url, e.getMessage())));
          }
          for (Throwable suppressed : suppressions) {
            e.addSuppressed(suppressed);
          }
          throw e;
        }
        // Java 7 allows us to create a tree of all errors that led to the ultimate failure.
        suppressions.add(e);
        eventHandler.handle(
            Event.progress(format("Failed to connect to %s trying again in %,dms", url, timeout)));
        url = originalUrl;
        try {
          sleeper.sleepMillis(timeout);
        } catch (InterruptedException translated) {
          throw new InterruptedIOException();
        }
      } catch (RuntimeException e) {
        if (connection != null) {
          connection.disconnect();
        }
        eventHandler.handle(Event.progress(format("Unknown error connecting to %s: %s", url, e)));
        throw e;
      }
    }
  }