in standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java [265:660]
static {
for (char c = 0; c < ' '; c++) {
charToEscape.set(c);
}
/*
* ASCII 01-1F are HTTP control characters that need to be escaped.
* \u000A and \u000D are \n and \r, respectively.
*/
char[] clist = new char[] {'\u0001', '\u0002', '\u0003', '\u0004',
'\u0005', '\u0006', '\u0007', '\u0008', '\u0009', '\n', '\u000B',
'\u000C', '\r', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012',
'\u0013', '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019',
'\u001A', '\u001B', '\u001C', '\u001D', '\u001E', '\u001F',
'"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F', '{',
'[', ']', '^'};
for (char c : clist) {
charToEscape.set(c);
}
}
private static boolean needsEscaping(char c) {
return c < charToEscape.size() && charToEscape.get(c);
}
public static String escapePathName(String path) {
return escapePathName(path, null);
}
/**
* Escapes a path name.
* @param path The path to escape.
* @param defaultPath
* The default name for the path, if the given path is empty or null.
* @return An escaped path name.
*/
public static String escapePathName(String path, String defaultPath) {
// __HIVE_DEFAULT_NULL__ is the system default value for null and empty string.
// TODO: we should allow user to specify default partition or HDFS file location.
if (path == null || path.length() == 0) {
if (defaultPath == null) {
//previously, when path is empty or null and no default path is specified,
// __HIVE_DEFAULT_PARTITION__ was the return value for escapePathName
return "__HIVE_DEFAULT_PARTITION__";
} else {
return defaultPath;
}
}
StringBuilder sb = new StringBuilder();
for (int i = 0; i < path.length(); i++) {
char c = path.charAt(i);
if (needsEscaping(c)) {
sb.append('%');
sb.append(String.format("%1$02X", (int) c));
} else {
sb.append(c);
}
}
return sb.toString();
}
public static String unescapePathName(String path) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < path.length(); i++) {
char c = path.charAt(i);
if (c == '%' && i + 2 < path.length()) {
int code = -1;
try {
code = Integer.parseInt(path.substring(i + 1, i + 3), 16);
} catch (Exception e) {
code = -1;
}
if (code >= 0) {
sb.append((char) code);
i += 2;
continue;
}
}
sb.append(c);
}
return sb.toString();
}
/**
* Get all file status from a root path and recursively go deep into certain levels.
*
* @param base
* the root path
* @param fs
* the file system
* @return array of FileStatus
*/
public static List<FileStatus> getFileStatusRecurse(Path base, FileSystem fs) {
try {
List<FileStatus> results = new ArrayList<>();
if (isS3a(fs)) {
// S3A file system has an optimized recursive directory listing implementation however it doesn't support filtering.
// Therefore we filter the result set afterwards. This might be not so optimal in HDFS case (which does a tree walking) where a filter could have been used.
listS3FilesRecursive(base, fs, results);
} else {
listStatusRecursively(fs, fs.getFileStatus(base), results);
}
return results;
} catch (IOException e) {
// globStatus() API returns empty FileStatus[] when the specified path
// does not exist. But getFileStatus() throw IOException. To mimic the
// similar behavior we will return empty array on exception. For external
// tables, the path of the table will not exists during table creation
return Collections.emptyList();
}
}
/**
* Recursively lists status for all files starting from a particular directory (or individual file
* as base case).
*
* @param fs
* file system
*
* @param fileStatus
* starting point in file system
*
* @param results
* receives enumeration of all files found
*/
private static void listStatusRecursively(FileSystem fs, FileStatus fileStatus,
List<FileStatus> results) throws IOException {
if (fileStatus.isDir()) {
for (FileStatus stat : fs.listStatus(fileStatus.getPath(), HIDDEN_FILES_PATH_FILTER)) {
listStatusRecursively(fs, stat, results);
}
} else {
results.add(fileStatus);
}
}
private static void listS3FilesRecursive(Path base, FileSystem fs, List<FileStatus> results) throws IOException {
RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles(base, true);
while (remoteIterator.hasNext()) {
LocatedFileStatus each = remoteIterator.next();
Path relativePath = makeRelative(base, each.getPath());
if (RemoteIteratorWithFilter.HIDDEN_FILES_FULL_PATH_FILTER.accept(relativePath)) {
results.add(each);
}
}
}
/**
* Returns a relative path wrt the parent path.
* @param parentPath the parent path.
* @param childPath the child path.
* @return childPath relative to parent path.
*/
public static Path makeRelative(Path parentPath, Path childPath) {
String parentString =
parentPath.toString().endsWith(Path.SEPARATOR) ? parentPath.toString() : parentPath.toString() + Path.SEPARATOR;
String childString =
childPath.toString().endsWith(Path.SEPARATOR) ? childPath.toString() : childPath.toString() + Path.SEPARATOR;
return new Path(childString.replaceFirst(parentString, ""));
}
public static boolean isS3a(FileSystem fs) {
try {
return "s3a".equalsIgnoreCase(fs.getScheme());
} catch (UnsupportedOperationException ex) {
return false;
}
}
public static String makePartName(List<String> partCols, List<String> vals) {
return makePartName(partCols, vals, null);
}
/**
* Makes a valid partition name.
* @param partCols The partition keys' names
* @param vals The partition values
* @param defaultStr
* The default name given to a partition value if the respective value is empty or null.
* @return An escaped, valid partition name.
*/
public static String makePartName(List<String> partCols, List<String> vals,
String defaultStr) {
StringBuilder name = new StringBuilder();
for (int i = 0; i < partCols.size(); i++) {
if (i > 0) {
name.append(Path.SEPARATOR);
}
name.append(escapePathName((partCols.get(i)).toLowerCase(), defaultStr));
name.append('=');
name.append(escapePathName(vals.get(i), defaultStr));
}
return name.toString();
}
/**
* Determine if two objects reference the same file system.
* @param fs1 first file system
* @param fs2 second file system
* @return return true if both file system arguments point to same file system
*/
public static boolean equalsFileSystem(FileSystem fs1, FileSystem fs2) {
//When file system cache is disabled, you get different FileSystem objects
// for same file system, so '==' can't be used in such cases
//FileSystem api doesn't have a .equals() function implemented, so using
//the uri for comparison. FileSystem already uses uri+Configuration for
//equality in its CACHE .
//Once equality has been added in HDFS-9159, we should make use of it
return fs1.getUri().equals(fs2.getUri());
}
/**
* Check if the path contains a subdirectory named '.snapshot'
* @param p path to check
* @param fs filesystem of the path
* @return true if p contains a subdirectory named '.snapshot'
* @throws IOException
*/
public static boolean pathHasSnapshotSubDir(Path p, FileSystem fs) throws IOException {
// Hadoop is missing a public API to check for snapshotable directories. Check with the directory name
// until a more appropriate API is provided by HDFS-12257.
final FileStatus[] statuses = fs.listStatus(p, FileUtils.SNAPSHOT_DIR_PATH_FILTER);
return statuses != null && statuses.length != 0;
}
public static void makeDir(Path path, Configuration conf) throws MetaException {
FileSystem fs;
try {
fs = path.getFileSystem(conf);
if (!fs.exists(path)) {
fs.mkdirs(path);
}
} catch (IOException e) {
throw new MetaException("Unable to : " + path);
}
}
/**
* Utility method that determines if a specified directory already has
* contents (non-hidden files) or not - useful to determine if an
* immutable table already has contents, for example.
* @param fs
* @param path
* @throws IOException
*/
public static boolean isDirEmpty(FileSystem fs, Path path) throws IOException {
if (fs.exists(path)) {
FileStatus[] status = fs.globStatus(new Path(path, "*"), hiddenFileFilter);
if (status.length > 0) {
return false;
}
}
return true;
}
/**
* Variant of Path.makeQualified that qualifies the input path against the default file system
* indicated by the configuration
*
* This does not require a FileSystem handle in most cases - only requires the Filesystem URI.
* This saves the cost of opening the Filesystem - which can involve RPCs - as well as cause
* errors
*
* @param path
* path to be fully qualified
* @param conf
* Configuration file
* @return path qualified relative to default file system
*/
public static Path makeQualified(Path path, Configuration conf) throws IOException {
if (!path.isAbsolute()) {
// in this case we need to get the working directory
// and this requires a FileSystem handle. So revert to
// original method.
FileSystem fs = FileSystem.get(conf);
return path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
}
URI fsUri = FileSystem.getDefaultUri(conf);
URI pathUri = path.toUri();
String scheme = pathUri.getScheme();
String authority = pathUri.getAuthority();
// validate/fill-in scheme and authority. this follows logic
// identical to FileSystem.get(URI, conf) - but doesn't actually
// obtain a file system handle
if (scheme == null) {
// no scheme - use default file system uri
scheme = fsUri.getScheme();
authority = fsUri.getAuthority();
if (authority == null) {
authority = "";
}
} else {
if (authority == null) {
// no authority - use default one if it applies
if (scheme.equals(fsUri.getScheme()) && fsUri.getAuthority() != null) {
authority = fsUri.getAuthority();
} else {
authority = "";
}
}
}
return new Path(scheme, authority, pathUri.getPath());
}
/**
* Returns a BEST GUESS as to whether or not other is a subdirectory of parent. It does not
* take into account any intricacies of the underlying file system, which is assumed to be
* HDFS. This should not return any false positives, but may return false negatives.
*
* @param parent
* @param other Directory to check if it is a subdirectory of parent
* @return True, if other is subdirectory of parent
*/
public static boolean isSubdirectory(String parent, String other) {
return other.startsWith(parent.endsWith(Path.SEPARATOR) ? parent : parent + Path.SEPARATOR);
}
public static Path getTransformedPath(String name, String subDir, String root) {
if (root != null) {
Path newPath = new Path(root);
if (subDir != null) {
newPath = new Path(newPath, subDir);
}
return new Path(newPath, name);
}
return null;
}
public static class RemoteIteratorWithFilter implements RemoteIterator<LocatedFileStatus> {
/**
* This works with {@link RemoteIterator} which (potentially) produces all files recursively
* so looking for hidden folders must look at whole path, not just the the last part of it as
* would be appropriate w/o recursive listing.
*/
public static final PathFilter HIDDEN_FILES_FULL_PATH_FILTER = new PathFilter() {
@Override
public boolean accept(Path p) {
do {
String name = p.getName();
if (name.startsWith("_") || name.startsWith(".")) {
return false;
}
} while ((p = p.getParent()) != null);
return true;
}
};
private final RemoteIterator<LocatedFileStatus> iter;
private final PathFilter filter;
private LocatedFileStatus nextFile;
public RemoteIteratorWithFilter(RemoteIterator<LocatedFileStatus> iter, PathFilter filter)
throws IOException {
this.iter = iter;
this.filter = filter;
findNext();
}
@Override
public boolean hasNext() throws IOException {
return nextFile != null;
}
@Override
public LocatedFileStatus next() throws IOException {
if (!hasNext()) {
throw new NoSuchElementException();
}
LocatedFileStatus result = nextFile;
findNext();
return result;
}
void findNext() throws IOException {
while (iter.hasNext()) {
LocatedFileStatus status = iter.next();
if (filter.accept(status.getPath())) {
nextFile = status;
return;
}
}
// No more matching files in the iterator
nextFile = null;
}
}
}