in src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpConnector.java [94:257]
URLConnection connect(URL originalUrl, Function<URL, ImmutableMap<String, String>> requestHeaders)
throws IOException {
if (Thread.interrupted()) {
throw new InterruptedIOException();
}
URL url = originalUrl;
if (HttpUtils.isProtocol(url, "file")) {
return url.openConnection();
}
List<Throwable> suppressions = new ArrayList<>();
int retries = 0;
int redirects = 0;
int connectTimeout = scale(MIN_CONNECT_TIMEOUT_MS);
while (true) {
HttpURLConnection connection = null;
try {
connection = (HttpURLConnection)
url.openConnection(proxyHelper.createProxyIfNeeded(url));
// TODO(zecke): Revise once https://bugs.openjdk.java.net/browse/JDK-8163921 is fixed.
connection.addRequestProperty("Accept", "text/html, image/gif, image/jpeg, */*");
boolean isAlreadyCompressed =
COMPRESSED_EXTENSIONS.contains(HttpUtils.getExtension(url.getPath()))
|| COMPRESSED_EXTENSIONS.contains(HttpUtils.getExtension(originalUrl.getPath()));
connection.setInstanceFollowRedirects(false);
for (Map.Entry<String, String> entry : requestHeaders.apply(url).entrySet()) {
if (isAlreadyCompressed && Ascii.equalsIgnoreCase(entry.getKey(), "Accept-Encoding")) {
// We're not going to ask for compression if we're downloading a file that already
// appears to be compressed.
continue;
}
connection.addRequestProperty(entry.getKey(), entry.getValue());
}
if (connection.getRequestProperty("User-Agent") == null) {
connection.setRequestProperty("User-Agent", USER_AGENT_VALUE);
}
connection.setConnectTimeout(connectTimeout);
// The read timeout is always large because it stays in effect after this method.
connection.setReadTimeout(scale(READ_TIMEOUT_MS));
// Java tries to abstract HTTP error responses for us. We don't want that. So we're going
// to try and undo any IOException that doesn't appear to be a legitimate I/O exception.
int code;
try {
connection.connect();
code = connection.getResponseCode();
} catch (FileNotFoundException ignored) {
code = connection.getResponseCode();
} catch (UnknownHostException e) {
String message = "Unknown host: " + e.getMessage();
eventHandler.handle(Event.progress(message));
throw new UnrecoverableHttpException(message);
} catch (IllegalArgumentException e) {
// This will happen if the user does something like specify a port greater than 2^16-1.
throw new UnrecoverableHttpException(e.getMessage());
} catch (IOException e) {
// Some HTTP error status codes are converted to IOExceptions, which we can only
// disambiguate from other IOExceptions by checking the exception message. We need to be
// careful because some exceptions (e.g., SocketTimeoutException) may have a null message.
if (e.getMessage() == null || !e.getMessage().startsWith("Server returned")) {
throw e;
}
code = connection.getResponseCode();
}
// 206 means partial content and only happens if caller specified Range. See RFC7233 § 4.1.
if (code == 200 || code == 206) {
return connection;
} else if (code == 301 || code == 302 || code == 303 || code == 307) {
readAllBytesAndClose(connection.getInputStream());
if (++redirects == MAX_REDIRECTS) {
eventHandler.handle(Event.progress("Redirect loop detected in " + originalUrl));
throw new UnrecoverableHttpException("Redirect loop detected");
}
url = HttpUtils.getLocation(connection);
if (code == 301) {
originalUrl = url;
}
} else if (code == 403) {
// jart@ has noticed BitBucket + Amazon AWS downloads frequently flake with this code.
throw new IOException(describeHttpResponse(connection));
} else if (code == 408) {
// The 408 (Request Timeout) status code indicates that the server did not receive a
// complete request message within the time that it was prepared to wait. Server SHOULD
// send the "close" connection option (Section 6.1 of [RFC7230]) in the response, since
// 408 implies that the server has decided to close the connection rather than continue
// waiting. If the client has an outstanding request in transit, the client MAY repeat
// that request on a new connection. Quoth RFC7231 § 6.5.7
throw new IOException(describeHttpResponse(connection));
} else if (code < 500 // 4xx means client seems to have erred quoth RFC7231 § 6.5
|| code == 501 // Server doesn't support function quoth RFC7231 § 6.6.2
|| code == 502 // Host not configured on server cf. RFC7231 § 6.6.3
|| code == 505) { // Server refuses to support version quoth RFC7231 § 6.6.6
// This is a permanent error so we're not going to retry.
readAllBytesAndClose(connection.getErrorStream());
if (code == 404 || code == 410) {
// For Not Found, we throw a separate unrecoverable exception so that callers can
// distinguish between the resource being not found and the server being unavailable.
throw new FileNotFoundException(describeHttpResponse(connection));
}
throw new UnrecoverableHttpException(describeHttpResponse(connection));
} else {
// However we will retry on some 5xx errors, particularly 500 and 503.
throw new IOException(describeHttpResponse(connection));
}
} catch (UnrecoverableHttpException | FileNotFoundException e) {
throw e;
} catch (IllegalArgumentException e) {
throw new UnrecoverableHttpException(e.getMessage());
} catch (IOException e) {
if (connection != null) {
// If we got here, it means we might not have consumed the entire payload of the
// response, if any. So we're going to force this socket to disconnect and not be
// reused. This is particularly important if multiple threads end up establishing
// connections to multiple mirrors simultaneously for a large file. We don't want to
// download that large file twice.
connection.disconnect();
}
// We don't respect the Retry-After header (RFC7231 § 7.1.3) because it's rarely used and
// tends to be too conservative when it is. We're already being good citizens by using
// exponential backoff. Furthermore RFC law didn't use the magic word "MUST".
int timeout = IntMath.pow(2, retries) * MIN_RETRY_DELAY_MS;
if (e instanceof SocketTimeoutException) {
eventHandler.handle(Event.progress("Timeout connecting to " + url));
connectTimeout = Math.min(connectTimeout * 2, scale(MAX_CONNECT_TIMEOUT_MS));
// If we got connect timeout, we're already doing exponential backoff, so no point
// in sleeping too.
timeout = 1;
} else if (e instanceof InterruptedIOException) {
// Please note that SocketTimeoutException is a subtype of InterruptedIOException.
throw e;
}
if (++retries == MAX_RETRIES) {
if (e instanceof SocketTimeoutException) {
// SocketTimeoutExceptions are InterruptedIOExceptions; however they do not signify
// an external interruption, but simply a failed download due to some server timing
// out. So rethrow them as ordinary IOExceptions.
e = new IOException(e.getMessage(), e);
} else {
eventHandler
.handle(Event.progress(format("Error connecting to %s: %s", url, e.getMessage())));
}
for (Throwable suppressed : suppressions) {
e.addSuppressed(suppressed);
}
throw e;
}
// Java 7 allows us to create a tree of all errors that led to the ultimate failure.
suppressions.add(e);
eventHandler.handle(
Event.progress(format("Failed to connect to %s trying again in %,dms", url, timeout)));
url = originalUrl;
try {
sleeper.sleepMillis(timeout);
} catch (InterruptedException translated) {
throw new InterruptedIOException();
}
} catch (RuntimeException e) {
if (connection != null) {
connection.disconnect();
}
eventHandler.handle(Event.progress(format("Unknown error connecting to %s: %s", url, e)));
throw e;
}
}
}