def _translate_segment_glob()

in scancode/lib/gitwildmatch.py [0:0]


	def _translate_segment_glob(pattern):
		"""
		Translates the glob pattern to a regular expression. This is used in
		the constructor to translate a path segment glob pattern to its
		corresponding regular expression.

		*pattern* (:class:`str`) is the glob pattern.

		Returns the regular expression (:class:`str`).
		"""
		# NOTE: This is derived from `fnmatch.translate()` and is similar to
		# the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.

		escape = False
		regex = ''
		i, end = 0, len(pattern)
		while i < end:
			# Get next character.
			char = pattern[i]
			i += 1

			if escape:
				# Escape the character.
				escape = False
				regex += re.escape(char)

			elif char == '\\':
				# Escape character, escape next character.
				escape = True

			elif char == '*':
				# Multi-character wildcard. Match any string (except slashes),
				# including an empty string.
				regex += '[^/]*'

			elif char == '?':
				# Single-character wildcard. Match any single character (except
				# a slash).
				regex += '[^/]'

			elif char == '[':
				# Bracket expression wildcard. Except for the beginning
				# exclamation mark, the whole bracket expression can be used
				# directly as regex but we have to find where the expression
				# ends.
				# - "[][!]" matchs ']', '[' and '!'.
				# - "[]-]" matchs ']' and '-'.
				# - "[!]a-]" matchs any character except ']', 'a' and '-'.
				j = i
				# Pass bracket expression negation.
				if j < end and pattern[j] == '!':
					j += 1
				# Pass first closing bracket if it is at the beginning of the
				# expression.
				if j < end and pattern[j] == ']':
					j += 1
				# Find closing bracket. Stop once we reach the end or find it.
				while j < end and pattern[j] != ']':
					j += 1

				if j < end:
					# Found end of bracket expression. Increment j to be one past
					# the closing bracket:
					#
					#  [...]
					#   ^   ^
					#   i   j
					#
					j += 1
					expr = '['

					if pattern[i] == '!':
						# Bracket expression needs to be negated.
						expr += '^'
						i += 1
					elif pattern[i] == '^':
						# POSIX declares that the regex bracket expression negation
						# "[^...]" is undefined in a glob pattern. Python's
						# `fnmatch.translate()` escapes the caret ('^') as a
						# literal. To maintain consistency with undefined behavior,
						# I am escaping the '^' as well.
						expr += '\\^'
						i += 1

					# Build regex bracket expression. Escape slashes so they are
					# treated as literal slashes by regex as defined by POSIX.
					expr += pattern[i:j].replace('\\', '\\\\')

					# Add regex bracket expression to regex result.
					regex += expr

					# Set i to one past the closing bracket.
					i = j

				else:
					# Failed to find closing bracket, treat opening bracket as a
					# bracket literal instead of as an expression.
					regex += '\\['

			else:
				# Regular character, escape it for regex.
				regex += re.escape(char)

		return regex