in src/api/sync.rs [456:574]
fn metadata(&self, url: &str) -> Result<Metadata, ApiError> {
let mut response = self
.no_redirect_client
.get(url)
.header(RANGE, "bytes=0-0")
.call()
.map_err(Box::new)?;
// Closure to check if status code is a redirection
let should_redirect = |status_code: StatusCode| {
matches!(
status_code,
StatusCode::MOVED_PERMANENTLY
| StatusCode::FOUND
| StatusCode::SEE_OTHER
| StatusCode::TEMPORARY_REDIRECT
| StatusCode::PERMANENT_REDIRECT
)
};
// Follow redirects until `host.is_some()` i.e. only follow relative redirects
// See: https://github.com/huggingface/huggingface_hub/blob/9c6af39cdce45b570f0b7f8fad2b311c96019804/src/huggingface_hub/file_download.py#L411
let response = loop {
// Check if redirect
if should_redirect(response.status()) {
// Get redirect location
if let Some(location) = response.headers().get("Location") {
// Parse location
let uri = Uri::from_str(
std::str::from_utf8(location.as_bytes())
.map_err(|_| InvalidHeader("location"))?,
)
.map_err(|_| InvalidHeader("location"))?;
// Check if relative i.e. host is none
if uri.host().is_none() {
// Merge relative path with url
let mut parts = Uri::from_str(url).unwrap().into_parts();
parts.path_and_query = uri.into_parts().path_and_query;
// Final uri
let redirect_uri = Uri::from_parts(parts).unwrap();
// Follow redirect
response = self
.no_redirect_client
.get(&redirect_uri.to_string())
.header(RANGE, "bytes=0-0")
.call()
.map_err(Box::new)?;
continue;
}
};
}
break response;
};
// let headers = response.headers();
let header_commit = "x-repo-commit";
let header_linked_etag = "x-linked-etag";
let header_etag = "etag";
let etag = match response.headers().get(header_linked_etag) {
Some(etag) => etag,
None => response
.headers()
.get(header_etag)
.ok_or(ApiError::MissingHeader(header_etag))?,
};
// Cleaning extra quotes
let etag = std::str::from_utf8(etag.as_bytes())
.map_err(|_| ApiError::InvalidHeader("etag"))?
.replace('"', "");
let commit_hash = std::str::from_utf8(
response
.headers()
.get(header_commit)
.ok_or(ApiError::MissingHeader(header_commit))?
.as_bytes(),
)
.map_err(|_| ApiError::InvalidHeader("commit_hash"))?
.to_string();
// The response was redirected to S3 most likely which will
// know about the size of the file
let status = response.status();
let is_redirection = status.is_redirection();
let response = if is_redirection {
let location = response
.headers()
.get(LOCATION)
.expect("location header in redirect");
let location = std::str::from_utf8(location.as_bytes())
.map_err(|_| ApiError::InvalidHeader("etag"))?;
self.client
.get(location)
.header(RANGE, "bytes=0-0")
.call()
.map_err(Box::new)?
} else {
response
};
let content_range = response
.headers()
.get(CONTENT_RANGE)
.ok_or(ApiError::MissingHeader(CONTENT_RANGE))?;
let content_range = std::str::from_utf8(content_range.as_bytes())
.map_err(|_| ApiError::InvalidHeader(CONTENT_RANGE))?;
let size = content_range
.split('/')
.next_back()
.ok_or(ApiError::InvalidHeader(CONTENT_RANGE))?
.parse()?;
Ok(Metadata {
commit_hash,
etag,
size,
})
}