def _CheckLocalPath()

in manifest_xml.py [0:0]
43 lines of code
21 McCabe index (conditional complexity)

  def _CheckLocalPath(path, dir_ok=False, cwd_dot_ok=False):
    """Verify |path| is reasonable for use in filesystem paths.

    Used with <copyfile> & <linkfile> & <project> elements.

    This only validates the |path| in isolation: it does not check against the
    current filesystem state.  Thus it is suitable as a first-past in a parser.

    It enforces a number of constraints:
    * No empty paths.
    * No "~" in paths.
    * No Unicode codepoints that filesystems might elide when normalizing.
    * No relative path components like "." or "..".
    * No absolute paths.
    * No ".git" or ".repo*" path components.

    Args:
      path: The path name to validate.
      dir_ok: Whether |path| may force a directory (e.g. end in a /).
      cwd_dot_ok: Whether |path| may be just ".".

    Returns:
      None if |path| is OK, a failure message otherwise.
    """
    if not path:
      return 'empty paths not allowed'

    if '~' in path:
      return '~ not allowed (due to 8.3 filenames on Windows filesystems)'

    path_codepoints = set(path)

    # Some filesystems (like Apple's HFS+) try to normalize Unicode codepoints
    # which means there are alternative names for ".git".  Reject paths with
    # these in it as there shouldn't be any reasonable need for them here.
    # The set of codepoints here was cribbed from jgit's implementation:
    # https://eclipse.googlesource.com/jgit/jgit/+/9110037e3e9461ff4dac22fee84ef3694ed57648/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectChecker.java#884
    BAD_CODEPOINTS = {
        u'\u200C',  # ZERO WIDTH NON-JOINER
        u'\u200D',  # ZERO WIDTH JOINER
        u'\u200E',  # LEFT-TO-RIGHT MARK
        u'\u200F',  # RIGHT-TO-LEFT MARK
        u'\u202A',  # LEFT-TO-RIGHT EMBEDDING
        u'\u202B',  # RIGHT-TO-LEFT EMBEDDING
        u'\u202C',  # POP DIRECTIONAL FORMATTING
        u'\u202D',  # LEFT-TO-RIGHT OVERRIDE
        u'\u202E',  # RIGHT-TO-LEFT OVERRIDE
        u'\u206A',  # INHIBIT SYMMETRIC SWAPPING
        u'\u206B',  # ACTIVATE SYMMETRIC SWAPPING
        u'\u206C',  # INHIBIT ARABIC FORM SHAPING
        u'\u206D',  # ACTIVATE ARABIC FORM SHAPING
        u'\u206E',  # NATIONAL DIGIT SHAPES
        u'\u206F',  # NOMINAL DIGIT SHAPES
        u'\uFEFF',  # ZERO WIDTH NO-BREAK SPACE
    }
    if BAD_CODEPOINTS & path_codepoints:
      # This message is more expansive than reality, but should be fine.
      return 'Unicode combining characters not allowed'

    # Reject newlines as there shouldn't be any legitmate use for them, they'll
    # be confusing to users, and they can easily break tools that expect to be
    # able to iterate over newline delimited lists.  This even applies to our
    # own code like .repo/project.list.
    if {'\r', '\n'} & path_codepoints:
      return 'Newlines not allowed'

    # Assume paths might be used on case-insensitive filesystems.
    path = path.lower()

    # Split up the path by its components.  We can't use os.path.sep exclusively
    # as some platforms (like Windows) will convert / to \ and that bypasses all
    # our constructed logic here.  Especially since manifest authors only use
    # / in their paths.
    resep = re.compile(r'[/%s]' % re.escape(os.path.sep))
    # Strip off trailing slashes as those only produce '' elements, and we use
    # parts to look for individual bad components.
    parts = resep.split(path.rstrip('/'))

    # Some people use src="." to create stable links to projects.  Lets allow
    # that but reject all other uses of "." to keep things simple.
    if not cwd_dot_ok or parts != ['.']:
      for part in set(parts):
        if part in {'.', '..', '.git'} or part.startswith('.repo'):
          return 'bad component: %s' % (part,)

    if not dir_ok and resep.match(path[-1]):
      return 'dirs not allowed'

    # NB: The two abspath checks here are to handle platforms with multiple
    # filesystem path styles (e.g. Windows).
    norm = os.path.normpath(path)
    if (norm == '..' or
        (len(norm) >= 3 and norm.startswith('..') and resep.match(norm[0])) or
        os.path.isabs(norm) or
        norm.startswith('/')):
      return 'path cannot be outside'