crates/core/src/storage/util.rs (105 lines of code) (raw):

/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ //! Utility functions for storage. use url::Url; use crate::storage::error::StorageError::{InvalidPath, UrlParseError}; use crate::storage::Result; /// Parses a URI string into a URL. pub fn parse_uri(uri: &str) -> Result<Url> { let mut url = match Url::parse(uri) { Ok(url) => url, Err(e) => Url::from_directory_path(uri).map_err(|_| UrlParseError(e))?, }; if url.path().ends_with('/') { let err = InvalidPath(format!("Url {:?} cannot be a base", url)); url.path_segments_mut().map_err(|_| err)?.pop(); } Ok(url) } /// Returns the scheme and authority of a URL in the form of `scheme://authority`. pub fn get_scheme_authority(url: &Url) -> String { format!("{}://{}", url.scheme(), url.authority()) } /// Joins a base URL with a list of segments. pub fn join_url_segments(base_url: &Url, segments: &[&str]) -> Result<Url> { let mut url = base_url.clone(); if url.path().ends_with('/') { url.path_segments_mut().unwrap().pop(); } for &seg in segments { let segs: Vec<_> = seg.split('/').filter(|&s| !s.is_empty()).collect(); let err = InvalidPath(format!("Url {:?} cannot be a base", url)); url.path_segments_mut().map_err(|_| err)?.extend(segs); } Ok(url) } #[cfg(test)] mod tests { use super::*; use std::str::FromStr; #[test] fn parse_valid_uri_in_various_forms() { let urls = vec![ parse_uri("/foo/").unwrap(), parse_uri("file:/foo/").unwrap(), parse_uri("file:///foo/").unwrap(), parse_uri("hdfs://foo/").unwrap(), parse_uri("s3://foo").unwrap(), parse_uri("s3://foo/").unwrap(), parse_uri("s3a://foo/bar/").unwrap(), parse_uri("gs://foo/").unwrap(), parse_uri("wasb://foo/bar").unwrap(), parse_uri("wasbs://foo/").unwrap(), ]; let schemes = vec![ "file", "file", "file", "hdfs", "s3", "s3", "s3a", "gs", "wasb", "wasbs", ]; let paths = vec![ "/foo", "/foo", "/foo", "/", "", "/", "/bar", "/", "/bar", "/", ]; assert_eq!(urls.iter().map(|u| u.scheme()).collect::<Vec<_>>(), schemes); assert_eq!(urls.iter().map(|u| u.path()).collect::<Vec<_>>(), paths); } #[test] fn join_base_url_with_segments() { let base_url = Url::from_str("file:///base").unwrap(); assert_eq!( join_url_segments(&base_url, &["foo"]).unwrap(), Url::from_str("file:///base/foo").unwrap() ); assert_eq!( join_url_segments(&base_url, &["/foo"]).unwrap(), Url::from_str("file:///base/foo").unwrap() ); assert_eq!( join_url_segments(&base_url, &["/foo", "bar/", "/baz/"]).unwrap(), Url::from_str("file:///base/foo/bar/baz").unwrap() ); assert_eq!( join_url_segments(&base_url, &["foo/", "", "bar/baz"]).unwrap(), Url::from_str("file:///base/foo/bar/baz").unwrap() ); assert_eq!( join_url_segments(&base_url, &["foo1/bar1", "foo2/bar2"]).unwrap(), Url::from_str("file:///base/foo1/bar1/foo2/bar2").unwrap() ); } #[test] fn join_failed_due_to_invalid_base() { let base_url = Url::from_str("foo:text/plain,bar").unwrap(); let result = join_url_segments(&base_url, &["foo"]); assert!(result.is_err()); } }