# encoding: utf-8
#
# This is a copy of source code from Pathspec 0.5.9
# (https://pypi.org/project/pathspec/) which is
# available under an Mozilla Public License 2.0
# (https://www.mozilla.org/en-US/MPL/2.0/).
# A copy of the license is also available in 
# ../../licenses/LICENSE-pathspec.txt.
#
"""
This module implements Git's wildmatch pattern matching which itself is
derived from Rsync's wildmatch. Git uses wildmatch for its ".gitignore"
files.
"""

from __future__ import unicode_literals

import re
import warnings

import util
from compat import unicode
from pattern import RegexPattern

#: The encoding to use when parsing a byte string pattern.
_BYTES_ENCODING = 'latin1'


class GitWildMatchPattern(RegexPattern):
	"""
	The :class:`GitWildMatchPattern` class represents a compiled Git
	wildmatch pattern.
	"""

	# Keep the dict-less class hierarchy.
	__slots__ = ()

	@classmethod
	def pattern_to_regex(cls, pattern):
		"""
		Convert the pattern into a regular expression.

		*pattern* (:class:`unicode` or :class:`bytes`) is the pattern to
		convert into a regular expression.

		Returns the uncompiled regular expression (:class:`unicode`, :class:`bytes`,
		or :data:`None`), and whether matched files should be included
		(:data:`True`), excluded (:data:`False`), or if it is a
		null-operation (:data:`None`).
		"""
		if isinstance(pattern, unicode):
			return_type = unicode
		elif isinstance(pattern, bytes):
			return_type = bytes
			pattern = pattern.decode(_BYTES_ENCODING)
		else:
			raise TypeError("pattern:{!r} is not a unicode or byte string.".format(pattern))

		pattern = pattern.strip()

		if pattern.startswith('#'):
			# A pattern starting with a hash ('#') serves as a comment
			# (neither includes nor excludes files). Escape the hash with a
			# back-slash to match a literal hash (i.e., '\#').
			regex = None
			include = None

		elif pattern == '/':
			# EDGE CASE: According to `git check-ignore` (v2.4.1), a single
			# '/' does not match any file.
			regex = None
			include = None

		elif pattern:

			if pattern.startswith('!'):
				# A pattern starting with an exclamation mark ('!') negates the
				# pattern (exclude instead of include). Escape the exclamation
				# mark with a back-slash to match a literal exclamation mark
				# (i.e., '\!').
				include = False
				# Remove leading exclamation mark.
				pattern = pattern[1:]
			else:
				include = True

			if pattern.startswith('\\'):
				# Remove leading back-slash escape for escaped hash ('#') or
				# exclamation mark ('!').
				pattern = pattern[1:]

			# Split pattern into segments.
			pattern_segs = pattern.split('/')

			# Normalize pattern to make processing easier.

			if not pattern_segs[0]:
				# A pattern beginning with a slash ('/') will only match paths
				# directly on the root directory instead of any descendant
				# paths. So, remove empty first segment to make pattern relative
				# to root.
				del pattern_segs[0]

			elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):
				# A single pattern without a beginning slash ('/') will match
				# any descendant path. This is equivalent to "**/{pattern}". So,
				# prepend with double-asterisks to make pattern relative to
				# root.
				# EDGE CASE: This also holds for a single pattern with a
				# trailing slash (e.g. dir/).
				if pattern_segs[0] != '**':
					pattern_segs.insert(0, '**')

			else:
				# EDGE CASE: A pattern without a beginning slash ('/') but
				# contains at least one prepended directory (e.g.
				# "dir/{pattern}") should not match "**/dir/{pattern}",
				# according to `git check-ignore` (v2.4.1).
				pass

			if not pattern_segs[-1] and len(pattern_segs) > 1:
				# A pattern ending with a slash ('/') will match all descendant
				# paths if it is a directory but not if it is a regular file.
				# This is equivalent to "{pattern}/**". So, set last segment to
				# double asterisks to include all descendants.
				pattern_segs[-1] = '**'

			# Build regular expression from pattern.
			output = ['^']
			need_slash = False
			end = len(pattern_segs) - 1
			for i, seg in enumerate(pattern_segs):
				if seg == '**':
					if i == 0 and i == end:
						# A pattern consisting solely of double-asterisks ('**')
						# will match every path.
						output.append('.+')
					elif i == 0:
						# A normalized pattern beginning with double-asterisks
						# ('**') will match any leading path segments.
						output.append('(?:.+/)?')
						need_slash = False
					elif i == end:
						# A normalized pattern ending with double-asterisks ('**')
						# will match any trailing path segments.
						output.append('/.*')
					else:
						# A pattern with inner double-asterisks ('**') will match
						# multiple (or zero) inner path segments.
						output.append('(?:/.+)?')
						need_slash = True
				elif seg == '*':
					# Match single path segment.
					if need_slash:
						output.append('/')
					output.append('[^/]+')
					need_slash = True
				else:
					# Match segment glob pattern.
					if need_slash:
						output.append('/')
					output.append(cls._translate_segment_glob(seg))
					if i == end and include is True:
						# A pattern ending without a slash ('/') will match a file
						# or a directory (with paths underneath it). E.g., "foo"
						# matches "foo", "foo/bar", "foo/bar/baz", etc.
						# EDGE CASE: However, this does not hold for exclusion cases
						# according to `git check-ignore` (v2.4.1).
						output.append('(?:/.*)?')
					need_slash = True
			output.append('$')
			regex = ''.join(output)

		else:
			# A blank pattern is a null-operation (neither includes nor
			# excludes files).
			regex = None
			include = None

		if regex is not None and return_type is bytes:
			regex = regex.encode(_BYTES_ENCODING)

		return regex, include

	@staticmethod
	def _translate_segment_glob(pattern):
		"""
		Translates the glob pattern to a regular expression. This is used in
		the constructor to translate a path segment glob pattern to its
		corresponding regular expression.

		*pattern* (:class:`str`) is the glob pattern.

		Returns the regular expression (:class:`str`).
		"""
		# NOTE: This is derived from `fnmatch.translate()` and is similar to
		# the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.

		escape = False
		regex = ''
		i, end = 0, len(pattern)
		while i < end:
			# Get next character.
			char = pattern[i]
			i += 1

			if escape:
				# Escape the character.
				escape = False
				regex += re.escape(char)

			elif char == '\\':
				# Escape character, escape next character.
				escape = True

			elif char == '*':
				# Multi-character wildcard. Match any string (except slashes),
				# including an empty string.
				regex += '[^/]*'

			elif char == '?':
				# Single-character wildcard. Match any single character (except
				# a slash).
				regex += '[^/]'

			elif char == '[':
				# Bracket expression wildcard. Except for the beginning
				# exclamation mark, the whole bracket expression can be used
				# directly as regex but we have to find where the expression
				# ends.
				# - "[][!]" matchs ']', '[' and '!'.
				# - "[]-]" matchs ']' and '-'.
				# - "[!]a-]" matchs any character except ']', 'a' and '-'.
				j = i
				# Pass bracket expression negation.
				if j < end and pattern[j] == '!':
					j += 1
				# Pass first closing bracket if it is at the beginning of the
				# expression.
				if j < end and pattern[j] == ']':
					j += 1
				# Find closing bracket. Stop once we reach the end or find it.
				while j < end and pattern[j] != ']':
					j += 1

				if j < end:
					# Found end of bracket expression. Increment j to be one past
					# the closing bracket:
					#
					#  [...]
					#   ^   ^
					#   i   j
					#
					j += 1
					expr = '['

					if pattern[i] == '!':
						# Bracket expression needs to be negated.
						expr += '^'
						i += 1
					elif pattern[i] == '^':
						# POSIX declares that the regex bracket expression negation
						# "[^...]" is undefined in a glob pattern. Python's
						# `fnmatch.translate()` escapes the caret ('^') as a
						# literal. To maintain consistency with undefined behavior,
						# I am escaping the '^' as well.
						expr += '\\^'
						i += 1

					# Build regex bracket expression. Escape slashes so they are
					# treated as literal slashes by regex as defined by POSIX.
					expr += pattern[i:j].replace('\\', '\\\\')

					# Add regex bracket expression to regex result.
					regex += expr

					# Set i to one past the closing bracket.
					i = j

				else:
					# Failed to find closing bracket, treat opening bracket as a
					# bracket literal instead of as an expression.
					regex += '\\['

			else:
				# Regular character, escape it for regex.
				regex += re.escape(char)

		return regex

util.register_pattern('gitwildmatch', GitWildMatchPattern)


class GitIgnorePattern(GitWildMatchPattern):
	"""
	The :class:`GitIgnorePattern` class is deprecated by :class:`GitWildMatchPattern`.
	This class only exists to maintain compatibility with v0.4.
	"""

	def __init__(self, *args, **kw):
		"""
		Warn about deprecation.
		"""
		self._deprecated()
		return super(GitIgnorePattern, self).__init__(*args, **kw)

	@staticmethod
	def _deprecated():
		"""
		Warn about deprecation.
		"""
		warnings.warn("GitIgnorePattern ('gitignore') is deprecated. Use GitWildMatchPattern ('gitwildmatch') instead.", DeprecationWarning, stacklevel=3)

	@classmethod
	def pattern_to_regex(cls, *args, **kw):
		"""
		Warn about deprecation.
		"""
		cls._deprecated()
		return super(GitIgnorePattern, cls).pattern_to_regex(*args, **kw)

# Register `GitIgnorePattern` as "gitignore" for backward compatibility
# with v0.4.
util.register_pattern('gitignore', GitIgnorePattern)