in src/azure/builder.rs [645:707]
fn parse_url(&mut self, url: &str) -> Result<()> {
let parsed = Url::parse(url).map_err(|source| {
let url = url.into();
Error::UnableToParseUrl { url, source }
})?;
let host = parsed
.host_str()
.ok_or_else(|| Error::UrlNotRecognised { url: url.into() })?;
let validate = |s: &str| match s.contains('.') {
true => Err(Error::UrlNotRecognised { url: url.into() }),
false => Ok(s.to_string()),
};
match parsed.scheme() {
"az" | "adl" | "azure" => self.container_name = Some(validate(host)?),
"abfs" | "abfss" => {
// abfs(s) might refer to the fsspec convention abfs://<container>/<path>
// or the convention for the hadoop driver abfs[s]://<file_system>@<account_name>.dfs.core.windows.net/<path>
if parsed.username().is_empty() {
self.container_name = Some(validate(host)?);
} else if let Some(a) = host.strip_suffix(".dfs.core.windows.net") {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
} else if let Some(a) = host.strip_suffix(".dfs.fabric.microsoft.com") {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
self.use_fabric_endpoint = true.into();
} else {
return Err(Error::UrlNotRecognised { url: url.into() }.into());
}
}
"https" => match host.split_once('.') {
Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => {
self.account_name = Some(validate(a)?);
if let Some(container) = parsed.path_segments().unwrap().next() {
self.container_name = Some(validate(container)?);
}
}
Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => {
self.account_name = Some(validate(a)?);
// Attempt to infer the container name from the URL
// - https://onelake.dfs.fabric.microsoft.com/<workspaceGUID>/<itemGUID>/Files/test.csv
// - https://onelake.dfs.fabric.microsoft.com/<workspace>/<item>.<itemtype>/<path>/<fileName>
//
// See <https://learn.microsoft.com/en-us/fabric/onelake/onelake-access-api>
if let Some(workspace) = parsed.path_segments().unwrap().next() {
if !workspace.is_empty() {
self.container_name = Some(workspace.to_string())
}
}
self.use_fabric_endpoint = true.into();
}
_ => return Err(Error::UrlNotRecognised { url: url.into() }.into()),
},
scheme => {
let scheme = scheme.into();
return Err(Error::UnknownUrlScheme { scheme }.into());
}
}
Ok(())
}