Patchwork [6,of,7] stringutil: add function to compile stringmatcher pattern into regexp

login
register
mail settings
Submitter Yuya Nishihara
Date Oct. 14, 2020, 2:13 p.m.
Message ID <c71d4b4018af62936283.1602684816@mimosa>
Download mbox | patch
Permalink /patch/47461/
State Accepted
Headers show

Comments

Yuya Nishihara - Oct. 14, 2020, 2:13 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1601898039 -32400
#      Mon Oct 05 20:40:39 2020 +0900
# Node ID c71d4b4018af62936283b28ce4ed70cc9ea33840
# Parent  18bb913c45acf3f8d1fcbc90e22853f8e5f8e767
stringutil: add function to compile stringmatcher pattern into regexp

Prepares for adding a revset predicate for "grep --diff". The grep logic
needs a regexp object instead of a match function.

Patch

diff --git a/mercurial/utils/stringutil.py b/mercurial/utils/stringutil.py
--- a/mercurial/utils/stringutil.py
+++ b/mercurial/utils/stringutil.py
@@ -376,6 +376,58 @@  def stringmatcher(pattern, casesensitive
     raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
 
 
+def substringregexp(pattern, flags=0):
+    """Build a regexp object from a string pattern possibly starting with
+    're:' or 'literal:' prefix.
+
+    helper for tests:
+    >>> def test(pattern, *tests):
+    ...     regexp = substringregexp(pattern)
+    ...     return [bool(regexp.search(t)) for t in tests]
+    >>> def itest(pattern, *tests):
+    ...     regexp = substringregexp(pattern, remod.I)
+    ...     return [bool(regexp.search(t)) for t in tests]
+
+    substring matching (no prefix):
+    >>> test(b'bcde', b'abc', b'def', b'abcdefg')
+    [False, False, True]
+
+    substring pattern should be escaped:
+    >>> substringregexp(b'.bc').pattern
+    '\\\\.bc'
+    >>> test(b'.bc', b'abc', b'def', b'abcdefg')
+    [False, False, False]
+
+    regex matching ('re:' prefix)
+    >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
+    [False, False, True]
+
+    force substring matches ('literal:' prefix)
+    >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
+    [False, True]
+
+    case insensitive literal matches
+    >>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
+    [False, False, True]
+
+    case insensitive regex matches
+    >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
+    [False, False, True]
+    """
+    kind, pattern = _splitpattern(pattern)
+    if kind == b're':
+        try:
+            return remod.compile(pattern, flags)
+        except remod.error as e:
+            raise error.ParseError(
+                _(b'invalid regular expression: %s') % forcebytestr(e)
+            )
+    elif kind == b'literal':
+        return remod.compile(remod.escape(pattern), flags)
+
+    raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
+
+
 def shortuser(user):
     """Return a short representation of a user name or email address."""
     f = user.find(b'@')