Patchwork [4,of,7] fileset: add stub for weight-based optimization

login
register
mail settings
Submitter Yuya Nishihara
Date Aug. 3, 2018, 3:01 p.m.
Message ID <6c9246c08523616914af.1533308501@mimosa>
Download mbox | patch
Permalink /patch/33154/
State Accepted
Headers show

Comments

Yuya Nishihara - Aug. 3, 2018, 3:01 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1532155946 -32400
#      Sat Jul 21 15:52:26 2018 +0900
# Node ID 6c9246c08523616914af131700126cfc408114f7
# Parent  1598c4146f74757fef4e2db244b03188d197df52
fileset: add stub for weight-based optimization

The main purpose of this change is to group basic patterns, which can be
mapped to a plain matcher. I'm not so interested in a weight of each function.

Patch

diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -902,6 +902,7 @@  def debugfileset(ui, repo, expr, **opts)
     stages = [
         ('parsed', pycompat.identity),
         ('analyzed', filesetlang.analyze),
+        ('optimized', filesetlang.optimize),
     ]
     stagenames = set(n for n, f in stages)
 
diff --git a/mercurial/fileset.py b/mercurial/fileset.py
--- a/mercurial/fileset.py
+++ b/mercurial/fileset.py
@@ -524,6 +524,7 @@  def match(ctx, expr, badfn=None):
     """Create a matcher for a single fileset expression"""
     tree = filesetlang.parse(expr)
     tree = filesetlang.analyze(tree)
+    tree = filesetlang.optimize(tree)
     mctx = matchctx(ctx, _buildstatus(ctx, tree), badfn=badfn)
     return getmatch(mctx, tree)
 
diff --git a/mercurial/filesetlang.py b/mercurial/filesetlang.py
--- a/mercurial/filesetlang.py
+++ b/mercurial/filesetlang.py
@@ -164,12 +164,50 @@  def _analyze(x):
 
 def analyze(x):
     """Transform raw parsed tree to evaluatable tree which can be fed to
-    getmatch()
+    optimize() or getmatch()
 
     All pseudo operations should be mapped to real operations or functions
     defined in methods or symbols table respectively.
     """
     return _analyze(x)
 
+def _optimize(x):
+    if x is None:
+        return 0, x
+
+    op = x[0]
+    if op in {'string', 'symbol'}:
+        return 0.5, x
+    if op == 'kindpat':
+        w, t = _optimize(x[2])
+        return w, (op, x[1], t)
+    if op == 'not':
+        w, t = _optimize(x[1])
+        return w, (op, t)
+    if op in {'and', 'minus'}:
+        wa, ta = _optimize(x[1])
+        wb, tb = _optimize(x[2])
+        return max(wa, wb), (op, ta, tb)
+    if op == 'or':
+        ws, ts = zip(*(_optimize(y) for y in x[1:]))
+        return max(ws), (op,) + ts
+    if op == 'list':
+        ws, ts = zip(*(_optimize(y) for y in x[1:]))
+        return sum(ws), (op,) + ts
+    if op == 'func':
+        f = getsymbol(x[1])
+        w = getattr(symbols.get(f), '_weight', 1)
+        wa, ta = _optimize(x[2])
+        return w + wa, (op, x[1], ta)
+    raise error.ProgrammingError('invalid operator %r' % op)
+
+def optimize(x):
+    """Reorder/rewrite evaluatable tree for optimization
+
+    All pseudo operations should be transformed beforehand.
+    """
+    _w, t = _optimize(x)
+    return t
+
 def prettyformat(tree):
     return parser.prettyformat(tree, ('string', 'symbol'))
diff --git a/mercurial/minifileset.py b/mercurial/minifileset.py
--- a/mercurial/minifileset.py
+++ b/mercurial/minifileset.py
@@ -86,4 +86,5 @@  def compile(text):
     """
     tree = filesetlang.parse(text)
     tree = filesetlang.analyze(tree)
+    tree = filesetlang.optimize(tree)
     return _compile(tree)
diff --git a/mercurial/registrar.py b/mercurial/registrar.py
--- a/mercurial/registrar.py
+++ b/mercurial/registrar.py
@@ -247,6 +247,9 @@  class filesetpredicate(_funcregistrarbas
      implies 'matchctx.status()' at runtime or not (False, by
      default).
 
+    Optional argument 'weight' indicates the estimated run-time cost, useful
+    for static optimization, default is 1. Higher weight means more expensive.
+
     'filesetpredicate' instance in example above can be used to
     decorate multiple functions.
 
@@ -259,8 +262,9 @@  class filesetpredicate(_funcregistrarbas
     _getname = _funcregistrarbase._parsefuncdecl
     _docformat = "``%s``\n    %s"
 
-    def _extrasetup(self, name, func, callstatus=False):
+    def _extrasetup(self, name, func, callstatus=False, weight=1):
         func._callstatus = callstatus
+        func._weight = weight
 
 class _templateregistrarbase(_funcregistrarbase):
     """Base of decorator to register functions as template specific one
diff --git a/tests/test-fileset.t b/tests/test-fileset.t
--- a/tests/test-fileset.t
+++ b/tests/test-fileset.t
@@ -180,6 +180,17 @@  Show parsed tree at stages:
       (func
         (symbol 'clean')
         None)))
+  * optimized:
+  (or
+    (symbol 'a1')
+    (symbol 'a2')
+    (and
+      (func
+        (symbol 'grep')
+        (string 'b'))
+      (func
+        (symbol 'clean')
+        None)))
   * matcher:
   <unionmatcher matchers=[
     <patternmatcher patterns='(?:a1$)'>,