Skip to content

Commit 1794eba

Browse files
committed
Invalid range notation
1 parent 28d03d6 commit 1794eba

5 files changed

Lines changed: 83 additions & 24 deletions

File tree

pathspec/patterns/gitignore/base.py

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,14 @@
44

55
import re
66

7+
from typing import (
8+
Literal)
9+
710
from pathspec.pattern import (
811
RegexPattern)
912
from pathspec._typing import (
10-
AnyStr) # Removed in 3.18.
13+
AnyStr, # Removed in 3.18.
14+
assert_unreachable)
1115

1216
_BYTES_ENCODING = 'latin1'
1317
"""
@@ -55,14 +59,25 @@ def escape(s: AnyStr) -> AnyStr:
5559
return out_string
5660

5761
@staticmethod
58-
def _translate_segment_glob(pattern: str) -> str:
62+
def _translate_segment_glob(
63+
pattern: str,
64+
range_error: Literal['literal', 'raise'],
65+
) -> str:
5966
"""
6067
Translates the glob pattern to a regular expression. This is used in the
6168
constructor to translate a path segment glob pattern to its corresponding
6269
regular expression.
6370
6471
*pattern* (:class:`str`) is the glob pattern.
6572
73+
*range_error* (:class:`int`) is how to handle invalid range notation in the
74+
pattern:
75+
76+
- :data:`"literal"`: Invalid notation will be treated as a literal string.
77+
78+
- :data:`"raise"`: Invalid notation will cause a :class:`_RangeError` to be
79+
raised.
80+
6681
Returns the regular expression (:class:`str`).
6782
"""
6883
# NOTE: This is derived from `fnmatch.translate()` and is similar to the
@@ -96,9 +111,9 @@ def _translate_segment_glob(pattern: str) -> str:
96111
regex += '[^/]'
97112

98113
elif char == '[':
99-
# Bracket expression wildcard. Except for the beginning exclamation
100-
# mark, the whole bracket expression can be used directly as regex, but
101-
# we have to find where the expression ends.
114+
# Bracket expression (range notation) wildcard. Except for the beginning
115+
# exclamation mark, the whole bracket expression can be used directly as
116+
# regex, but we have to find where the expression ends.
102117
# - "[][!]" matches ']', '[' and '!'.
103118
# - "[]-]" matches ']' and '-'.
104119
# - "[!]a-]" matches any character except ']', 'a' and '-'.
@@ -152,9 +167,19 @@ def _translate_segment_glob(pattern: str) -> str:
152167
i = j
153168

154169
else:
155-
# Failed to find closing bracket, treat opening bracket as a bracket
156-
# literal instead of as an expression.
157-
regex += '\\['
170+
# Failed to find closing bracket.
171+
if range_error == 'literal':
172+
# Treat opening bracket as a bracket literal instead of as an
173+
# expression.
174+
regex += '\\['
175+
elif range_error == 'raise':
176+
# Treat invalid range notation as an error.
177+
raise _RangeError((
178+
f"Invalid range notation={pattern[i:j]!r} found in pattern="
179+
f"{pattern!r}."
180+
))
181+
else:
182+
assert_unreachable(f"{range_error=!r} is invalid.")
158183

159184
else:
160185
# Regular character, escape it for regex.
@@ -174,3 +199,11 @@ class GitIgnorePatternError(ValueError):
174199
pattern.
175200
"""
176201
pass
202+
203+
204+
class _RangeError(GitIgnorePatternError):
205+
"""
206+
The :class:`_RangeError` class indicates an invalid range notation was found
207+
in a gitignore pattern.
208+
"""
209+
pass

pathspec/patterns/gitignore/basic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def __translate_segments(cls, pattern_segs: list[str]) -> list[str]:
293293

294294
else:
295295
# Match segment glob pattern.
296-
out_parts.append(cls._translate_segment_glob(seg))
296+
out_parts.append(cls._translate_segment_glob(seg, 'literal'))
297297

298298
if i == end:
299299
if seg == '*':

pathspec/patterns/gitignore/spec.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
from .base import (
2020
GitIgnorePatternError,
2121
_BYTES_ENCODING,
22-
_GitIgnoreBasePattern)
22+
_GitIgnoreBasePattern,
23+
_RangeError)
2324

2425
_DIR_MARK = 'ps_d'
2526
"""
@@ -80,7 +81,7 @@ def __normalize_segments(
8081
elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):
8182
# A single segment pattern with or without a trailing slash ('/') will
8283
# match any descendant path. This is equivalent to "**/{pattern}". Prepend
83-
# double-asterisk segment to make pattern relative to root.
84+
# a double-asterisk segment to make the pattern relative to root.
8485
if pattern_segs[0] != '**':
8586
pattern_segs.insert(0, '**')
8687

@@ -98,8 +99,8 @@ def __normalize_segments(
9899
if not pattern_segs[-1]:
99100
# A pattern ending with a slash ('/') will match all descendant paths if
100101
# it is a directory but not if it is a regular file. This is equivalent to
101-
# "{pattern}/**". Set empty last segment to a double-asterisk to include
102-
# all descendants.
102+
# "{pattern}/**". Set the empty last segment to a double-asterisk to
103+
# include all descendants.
103104
pattern_segs[-1] = '**'
104105

105106
# EDGE CASE: Collapse duplicate double-asterisk sequences (i.e., '**/**').
@@ -210,8 +211,8 @@ def pattern_to_regex(
210211

211212
if pattern_str.startswith('!'):
212213
# A pattern starting with an exclamation mark ('!') negates the pattern
213-
# (exclude instead of include). Escape the exclamation mark with a back
214-
# slash to match a literal exclamation mark (i.e., '\!').
214+
# (exclude instead of include). Escape the exclamation mark with a
215+
# backslash to match a literal exclamation mark (i.e., '\!').
215216
include = False
216217
# Remove leading exclamation mark.
217218
pattern_str = pattern_str[1:]
@@ -243,6 +244,9 @@ def pattern_to_regex(
243244
# Build regular expression from pattern.
244245
try:
245246
regex_parts = cls.__translate_segments(is_dir_pattern, pattern_segs)
247+
except _RangeError:
248+
# EDGE CASE: Git discards patterns with range notation errors.
249+
return (None, None)
246250
except ValueError as e:
247251
raise GitIgnorePatternError((
248252
f"Invalid git pattern: {original_pattern!r}"
@@ -279,6 +283,8 @@ def __translate_segments(
279283
*pattern_segs* (:class:`list` of :class:`str`) contains the pattern
280284
segments.
281285
286+
Raises :class:`_RangeError` if invalid range notation is found.
287+
282288
Returns the regular expression parts (:class:`list` of :class:`str`).
283289
"""
284290
# Build regular expression from pattern.
@@ -322,7 +328,7 @@ def __translate_segments(
322328

323329
else:
324330
# Match segment glob pattern.
325-
out_parts.append(cls._translate_segment_glob(seg))
331+
out_parts.append(cls._translate_segment_glob(seg, 'raise'))
326332

327333
if i == end:
328334
# A pattern ending without a slash ('/') will match a file or a

tests/test_02_gitignore_basic.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -926,13 +926,23 @@ def test_15_issue_93_c_2_invalid(self):
926926
# - See <https://github.com/cpburnz/python-pathspec/issues/93>.
927927
for raw_pattern in [
928928
'[!]',
929-
'[z-a]',
930-
'a[z-a]',
929+
'a[!]',
931930
]:
932931
with self.subTest(f"p={raw_pattern!r}"):
933932
pattern = GitIgnoreBasicPattern(raw_pattern)
934933
self.assertIs(pattern.include, None)
935-
self.assertIs(pattern.regex.pattern, None)
934+
self.assertIs(pattern.regex, None)
935+
936+
# The `re` module fails to compile these.
937+
# - NOTE: Technically, these should result in null patterns rather than
938+
# exceptions to fully replicate Git's behavior.
939+
for raw_pattern in [
940+
'[z-a]',
941+
'a[z-a]',
942+
]:
943+
with self.subTest(f"p={raw_pattern!r}"):
944+
with self.assertRaises(re.PatternError):
945+
GitIgnoreBasicPattern(raw_pattern)
936946

937947
def test_15_issue_93_c_3_unclosed(self):
938948
"""
@@ -954,7 +964,7 @@ def test_15_issue_93_c_3_unclosed(self):
954964
with self.subTest(f"p={raw_pattern!r}"):
955965
pattern = GitIgnoreBasicPattern(raw_pattern)
956966
self.assertIs(pattern.include, None)
957-
self.assertIs(pattern.regex.pattern, None)
967+
self.assertIs(pattern.regex, None)
958968

959969
def test_16_repr_str(self):
960970
"""

tests/test_03_gitignore_spec.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -928,13 +928,23 @@ def test_15_issue_93_c_2_invalid(self):
928928
# - See <https://github.com/cpburnz/python-pathspec/issues/93>.
929929
for raw_pattern in [
930930
'[!]',
931-
'[z-a]',
932-
'a[z-a]',
931+
'a[!]',
933932
]:
934933
with self.subTest(f"p={raw_pattern!r}"):
935934
pattern = GitIgnoreSpecPattern(raw_pattern)
936935
self.assertIs(pattern.include, None)
937-
self.assertIs(pattern.regex.pattern, None)
936+
self.assertIs(pattern.regex, None)
937+
938+
# The `re` module fails to compile these.
939+
# - NOTE: Technically, these should result in null patterns rather than
940+
# exceptions to fully replicate Git's behavior.
941+
for raw_pattern in [
942+
'[z-a]',
943+
'a[z-a]',
944+
]:
945+
with self.subTest(f"p={raw_pattern!r}"):
946+
with self.assertRaises(re.PatternError):
947+
GitIgnoreSpecPattern(raw_pattern)
938948

939949
def test_15_issue_93_c_3_unclosed(self):
940950
"""
@@ -956,4 +966,4 @@ def test_15_issue_93_c_3_unclosed(self):
956966
with self.subTest(f"p={raw_pattern!r}"):
957967
pattern = GitIgnoreSpecPattern(raw_pattern)
958968
self.assertIs(pattern.include, None)
959-
self.assertIs(pattern.regex.pattern, None)
969+
self.assertIs(pattern.regex, None)

0 commit comments

Comments
 (0)