Browse Source

Adds some comments + docstrings + a test case

master
Noah Pederson 11 months ago
parent
commit
208d90829d
2 changed files with 26 additions and 21 deletions
  1. +21
    -18
      pattern_matching/pattern_matching_v3.py
  2. +5
    -3
      tests/test_pattern_matching.py

+ 21
- 18
pattern_matching/pattern_matching_v3.py View File

@@ -1,47 +1,51 @@
import ipdb

DEBUG = False
def isMatch3(text, pattern):

"""
Returns whether pattern fully matches text using the following rules:
? represents exactly 1 instance of any character
* will match 0 or more of it's preceding character
literals (any non ? or * character) will match exactly 1 instance of the same character
"""
# Empty pattern can only match empty string
if len(pattern) == 0:
return len(text) == 0
patternindex = textindex = 0
patternindex = textindex = 0 # all zeroes!
while patternindex < len(pattern):

if (patternindex < len(pattern) - 1) and pattern[patternindex + 1] == "*":
# the current character can repeat 0,n times
if DEBUG:
ipdb.set_trace()
# the current pattern character can repeat 0,n times
#look ahead to get the first non text[textindex] character and the # of characters ahead it is
future_text_literal, target_char = _scan_text_for_literal(
text[textindex:], character=pattern[patternindex])
#figure out how many instances of the x in x* we need to match, accounting for literals and ?s
# and ensuring the ret of the pattern can match, even if it means this doesn't match every instance of current char
future_pattern_literal = _scan_pattern_for_literal(
pattern[patternindex+2:], character=pattern[patternindex], target=target_char)
# match_count is the # of repeating chars this * group matches
match_count = future_text_literal - future_pattern_literal
if match_count > 0 and pattern[patternindex] != text[textindex]:
return False
# increase the current progress through text by the # of matched characters
textindex = textindex + match_count
patternindex = patternindex + 1
elif pattern[patternindex] == "?":
# if the current pattern character is a ?, we just increase the indexs by 1 (pattern is down at the bottom)
textindex = textindex + 1
elif textindex >= len(text):
# we got to the end of the text before the end of the pattern, False
if DEBUG:
ipdb.set_trace()
# we got to the end of the text before the end of the pattern, we're done here
break
else:
if pattern[patternindex] != text[textindex]:
if DEBUG:
ipdb.set_trace()
# if literals don't match up, pattern doesn't match
return False
textindex = textindex + 1
patternindex = patternindex + 1
if DEBUG:
ipdb.set_trace()
# if we managed to get here and go through every character in text, the strings match
# if we didn't make it through every character in text, the pattern didn't match the entire string and we return False
return textindex == len(text)


def _scan_pattern_for_literal(text, character=None, target=None):
"""returns index of first literal found in text, or the length of text if it doesn't exist"""
"""returns the number of repeating <characters>, subtracting the # of '?' characters until <target> is hit"""
count = 0
qcount = 0
for c in text:
@@ -52,7 +56,6 @@ def _scan_pattern_for_literal(text, character=None, target=None):
if c not in ("?", "*", character):
return count - qcount
count = count + 1

return count # this happens if we get to the end of text



+ 5
- 3
tests/test_pattern_matching.py View File

@@ -49,13 +49,15 @@ class MatcherTestCase(TestCase):
("aaacde", "a*aaacde", True),
("aaabc", "?a*b?", True),
("aaabc", "a*?c", True), # <-- This was the hardest test to get passing
("aaaaaaab", "a*aa?ab", True),
("aaabc", "a*a?b?", True)
]
@skip("Tests for old implementation")
def test_bulk(self):
for test_case in self.test_cases:
with self.subTest(text=test_case[0], pattern=test_case[1]):
match = isMatch(test_case[0], test_case[1])
logging.info("%s match %s = %s should be %s", # info because by default it won't log debug or info
logging.info("%s match %s = %s should be %s",
test_case[0], test_case[1], match, test_case[2])
self.assertEqual(match, test_case[2])
@@ -81,7 +83,7 @@ class MatcherTestCase(TestCase):
for test_case in self.false_negatives_v2:
with self.subTest(text=test_case[0], pattern=test_case[1]):
match = isMatch2(test_case[0], test_case[1])
logging.info("%s match %s = %s should be %s", # info because by default it won't log debug or info
logging.info("%s match %s = %s should be %s",
test_case[0], test_case[1], match, test_case[2])
self.assertEqual(match, test_case[2])
@@ -89,7 +91,7 @@ class MatcherTestCase(TestCase):
for test_case in self.test_cases:
with self.subTest(text=test_case[0], pattern=test_case[1]):
match = isMatch3(test_case[0], test_case[1])
logging.info("%s match %s = %s should be %s", # info because by default it won't log debug or info
logging.info("%s match %s = %s should be %s",
test_case[0], test_case[1], match, test_case[2])
self.assertEqual(match, test_case[2])

Loading…
Cancel
Save