in manifest_xml.py [0:0]
def _CheckLocalPath(path, dir_ok=False, cwd_dot_ok=False):
"""Verify |path| is reasonable for use in filesystem paths.
Used with <copyfile> & <linkfile> & <project> elements.
This only validates the |path| in isolation: it does not check against the
current filesystem state. Thus it is suitable as a first-past in a parser.
It enforces a number of constraints:
* No empty paths.
* No "~" in paths.
* No Unicode codepoints that filesystems might elide when normalizing.
* No relative path components like "." or "..".
* No absolute paths.
* No ".git" or ".repo*" path components.
Args:
path: The path name to validate.
dir_ok: Whether |path| may force a directory (e.g. end in a /).
cwd_dot_ok: Whether |path| may be just ".".
Returns:
None if |path| is OK, a failure message otherwise.
"""
if not path:
return 'empty paths not allowed'
if '~' in path:
return '~ not allowed (due to 8.3 filenames on Windows filesystems)'
path_codepoints = set(path)
# Some filesystems (like Apple's HFS+) try to normalize Unicode codepoints
# which means there are alternative names for ".git". Reject paths with
# these in it as there shouldn't be any reasonable need for them here.
# The set of codepoints here was cribbed from jgit's implementation:
# https://eclipse.googlesource.com/jgit/jgit/+/9110037e3e9461ff4dac22fee84ef3694ed57648/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectChecker.java#884
BAD_CODEPOINTS = {
u'\u200C', # ZERO WIDTH NON-JOINER
u'\u200D', # ZERO WIDTH JOINER
u'\u200E', # LEFT-TO-RIGHT MARK
u'\u200F', # RIGHT-TO-LEFT MARK
u'\u202A', # LEFT-TO-RIGHT EMBEDDING
u'\u202B', # RIGHT-TO-LEFT EMBEDDING
u'\u202C', # POP DIRECTIONAL FORMATTING
u'\u202D', # LEFT-TO-RIGHT OVERRIDE
u'\u202E', # RIGHT-TO-LEFT OVERRIDE
u'\u206A', # INHIBIT SYMMETRIC SWAPPING
u'\u206B', # ACTIVATE SYMMETRIC SWAPPING
u'\u206C', # INHIBIT ARABIC FORM SHAPING
u'\u206D', # ACTIVATE ARABIC FORM SHAPING
u'\u206E', # NATIONAL DIGIT SHAPES
u'\u206F', # NOMINAL DIGIT SHAPES
u'\uFEFF', # ZERO WIDTH NO-BREAK SPACE
}
if BAD_CODEPOINTS & path_codepoints:
# This message is more expansive than reality, but should be fine.
return 'Unicode combining characters not allowed'
# Reject newlines as there shouldn't be any legitmate use for them, they'll
# be confusing to users, and they can easily break tools that expect to be
# able to iterate over newline delimited lists. This even applies to our
# own code like .repo/project.list.
if {'\r', '\n'} & path_codepoints:
return 'Newlines not allowed'
# Assume paths might be used on case-insensitive filesystems.
path = path.lower()
# Split up the path by its components. We can't use os.path.sep exclusively
# as some platforms (like Windows) will convert / to \ and that bypasses all
# our constructed logic here. Especially since manifest authors only use
# / in their paths.
resep = re.compile(r'[/%s]' % re.escape(os.path.sep))
# Strip off trailing slashes as those only produce '' elements, and we use
# parts to look for individual bad components.
parts = resep.split(path.rstrip('/'))
# Some people use src="." to create stable links to projects. Lets allow
# that but reject all other uses of "." to keep things simple.
if not cwd_dot_ok or parts != ['.']:
for part in set(parts):
if part in {'.', '..', '.git'} or part.startswith('.repo'):
return 'bad component: %s' % (part,)
if not dir_ok and resep.match(path[-1]):
return 'dirs not allowed'
# NB: The two abspath checks here are to handle platforms with multiple
# filesystem path styles (e.g. Windows).
norm = os.path.normpath(path)
if (norm == '..' or
(len(norm) >= 3 and norm.startswith('..') and resep.match(norm[0])) or
os.path.isabs(norm) or
norm.startswith('/')):
return 'path cannot be outside'