Patchwork [7,of,7,V2] revset: introduce an API that avoids `formatspec` input serialization

login
register
mail settings
Submitter Boris Feld
Date Jan. 14, 2019, 12:13 p.m.
Message ID <8bcaad324401bd275886.1547467997@localhost.localdomain>
Download mbox | patch
Permalink /patch/37727/
State Superseded
Headers show

Comments

Boris Feld - Jan. 14, 2019, 12:13 p.m.
# HG changeset patch
# User Boris Feld <boris.feld@octobus.net>
# Date 1546605681 -3600
#      Fri Jan 04 13:41:21 2019 +0100
# Node ID 8bcaad324401bd275886a0ae4340a876ac82ab84
# Parent  1def212ed730ce024963fd41f2d341f584521bbf
# EXP-Topic revs-efficiency
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 8bcaad324401
revset: introduce an API that avoids `formatspec` input serialization

Instead of having the data fully serialized, the input can be replaced with a
`__internal_input__(<idx>)` entry in the revspec. The actual value at `<idx>`
as to be passed along with the format spec but the operation can get much more
efficient.

Just using it for simple "%ld" case provide a significant boost. For example
here are the impact on a sample discovery run between two pypy repositories
with arbitrary differences (using hg perfdiscovery).

$ hg perfdiscovery
before: ! wall 0.700435 comb 0.710000 user 0.700000 sys 0.010000 (median of 15)
after:  ! wall 0.501305 comb 0.510000 user 0.490000 sys 0.020000 (median of 20)
Boris Feld - Jan. 14, 2019, 2:27 p.m.
Please ignore the V2 version, I sent the wrong version. Sorry for the
inconvenience.

I'm sending the V3 now with the correct version

On 14/01/2019 13:13, Boris Feld wrote:
> # HG changeset patch
> # User Boris Feld <boris.feld@octobus.net>
> # Date 1546605681 -3600
> #      Fri Jan 04 13:41:21 2019 +0100
> # Node ID 8bcaad324401bd275886a0ae4340a876ac82ab84
> # Parent  1def212ed730ce024963fd41f2d341f584521bbf
> # EXP-Topic revs-efficiency
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 8bcaad324401
> revset: introduce an API that avoids `formatspec` input serialization
>
> Instead of having the data fully serialized, the input can be replaced with a
> `__internal_input__(<idx>)` entry in the revspec. The actual value at `<idx>`
> as to be passed along with the format spec but the operation can get much more
> efficient.
>
> Just using it for simple "%ld" case provide a significant boost. For example
> here are the impact on a sample discovery run between two pypy repositories
> with arbitrary differences (using hg perfdiscovery).
>
> $ hg perfdiscovery
> before: ! wall 0.700435 comb 0.710000 user 0.700000 sys 0.010000 (median of 15)
> after:  ! wall 0.501305 comb 0.510000 user 0.490000 sys 0.020000 (median of 20)
>
> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
> --- a/mercurial/localrepo.py
> +++ b/mercurial/localrepo.py
> @@ -1362,9 +1362,8 @@ class localrepository(object):
>          Returns a revset.abstractsmartset, which is a list-like interface
>          that contains integer revisions.
>          '''
> -        expr = revsetlang.formatspec(expr, *args)
> -        m = revset.match(None, expr)
> -        return m(self)
> +        tree = revsetlang.spectree(expr, *args)
> +        return revset.makematcher(tree)(self)
>  
>      def set(self, expr, *args):
>          '''Find revisions matching a revset and emit changectx instances.
> diff --git a/mercurial/revset.py b/mercurial/revset.py
> --- a/mercurial/revset.py
> +++ b/mercurial/revset.py
> @@ -125,6 +125,13 @@ def stringset(repo, subset, x, order):
>          return baseset([x])
>      return baseset()
>  
> +def rawsmartset(repo, subset, x, order):
> +    """argument is already a smartset, use that directly"""
> +    if order == followorder:
> +        return subset & x
> +    else:
> +        return x & subset
> +
>  def rangeset(repo, subset, x, y, order):
>      m = getset(repo, fullreposet(repo), x)
>      n = getset(repo, fullreposet(repo), y)
> @@ -2216,6 +2223,7 @@ methods = {
>      "ancestor": ancestorspec,
>      "parent": parentspec,
>      "parentpost": parentpost,
> +    "smartset": rawsmartset,
>  }
>  
>  subscriptrelations = {
> diff --git a/mercurial/revsetlang.py b/mercurial/revsetlang.py
> --- a/mercurial/revsetlang.py
> +++ b/mercurial/revsetlang.py
> @@ -333,7 +333,7 @@ def _analyze(x):
>      elif op == 'negate':
>          s = getstring(x[1], _("can't negate that"))
>          return _analyze(('string', '-' + s))
> -    elif op in ('string', 'symbol'):
> +    elif op in ('string', 'symbol', 'smartset'):
>          return x
>      elif op == 'rangeall':
>          return (op, None)
> @@ -373,7 +373,7 @@ def _optimize(x):
>          return 0, x
>  
>      op = x[0]
> -    if op in ('string', 'symbol'):
> +    if op in ('string', 'symbol', 'smartset'):
>          return 0.5, x # single revisions are small
>      elif op == 'and':
>          wa, ta = _optimize(x[1])
> @@ -691,6 +691,29 @@ def formatspec(expr, *args):
>              raise error.ProgrammingError("unknown revspec item type: %r" % t)
>      return b''.join(ret)
>  
> +def spectree(expr, *args):
> +    """similar to formatspec but return a parsed and optimized tree"""
> +    parsed = _parseargs(expr, args)
> +    ret = []
> +    inputs = []
> +    for t, arg in parsed:
> +        if t is None:
> +            ret.append(arg)
> +        elif t == 'baseset':
> +            newtree = ('smartset', smartset.baseset(arg))
> +            inputs.append(newtree)
> +            ret.append("$")
> +        else:
> +            raise error.ProgrammingError("unknown revspec item type: %r" % t)
> +    expr = b''.join(ret)
> +    tree = _parsewith(expr, syminitletters=_aliassyminitletters)
> +    tree = parser.buildtree(tree, ('symbol', '$'), *inputs)
> +    tree = foldconcat(tree)
> +    tree = analyze(tree)
> +    tree = optimize(tree)
> +    return tree
> +
> +
>  def _parseargs(expr, args):
>      """parse the expression and replace all inexpensive args
>  
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Patch

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -1362,9 +1362,8 @@  class localrepository(object):
         Returns a revset.abstractsmartset, which is a list-like interface
         that contains integer revisions.
         '''
-        expr = revsetlang.formatspec(expr, *args)
-        m = revset.match(None, expr)
-        return m(self)
+        tree = revsetlang.spectree(expr, *args)
+        return revset.makematcher(tree)(self)
 
     def set(self, expr, *args):
         '''Find revisions matching a revset and emit changectx instances.
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -125,6 +125,13 @@  def stringset(repo, subset, x, order):
         return baseset([x])
     return baseset()
 
+def rawsmartset(repo, subset, x, order):
+    """argument is already a smartset, use that directly"""
+    if order == followorder:
+        return subset & x
+    else:
+        return x & subset
+
 def rangeset(repo, subset, x, y, order):
     m = getset(repo, fullreposet(repo), x)
     n = getset(repo, fullreposet(repo), y)
@@ -2216,6 +2223,7 @@  methods = {
     "ancestor": ancestorspec,
     "parent": parentspec,
     "parentpost": parentpost,
+    "smartset": rawsmartset,
 }
 
 subscriptrelations = {
diff --git a/mercurial/revsetlang.py b/mercurial/revsetlang.py
--- a/mercurial/revsetlang.py
+++ b/mercurial/revsetlang.py
@@ -333,7 +333,7 @@  def _analyze(x):
     elif op == 'negate':
         s = getstring(x[1], _("can't negate that"))
         return _analyze(('string', '-' + s))
-    elif op in ('string', 'symbol'):
+    elif op in ('string', 'symbol', 'smartset'):
         return x
     elif op == 'rangeall':
         return (op, None)
@@ -373,7 +373,7 @@  def _optimize(x):
         return 0, x
 
     op = x[0]
-    if op in ('string', 'symbol'):
+    if op in ('string', 'symbol', 'smartset'):
         return 0.5, x # single revisions are small
     elif op == 'and':
         wa, ta = _optimize(x[1])
@@ -691,6 +691,29 @@  def formatspec(expr, *args):
             raise error.ProgrammingError("unknown revspec item type: %r" % t)
     return b''.join(ret)
 
+def spectree(expr, *args):
+    """similar to formatspec but return a parsed and optimized tree"""
+    parsed = _parseargs(expr, args)
+    ret = []
+    inputs = []
+    for t, arg in parsed:
+        if t is None:
+            ret.append(arg)
+        elif t == 'baseset':
+            newtree = ('smartset', smartset.baseset(arg))
+            inputs.append(newtree)
+            ret.append("$")
+        else:
+            raise error.ProgrammingError("unknown revspec item type: %r" % t)
+    expr = b''.join(ret)
+    tree = _parsewith(expr, syminitletters=_aliassyminitletters)
+    tree = parser.buildtree(tree, ('symbol', '$'), *inputs)
+    tree = foldconcat(tree)
+    tree = analyze(tree)
+    tree = optimize(tree)
+    return tree
+
+
 def _parseargs(expr, args):
     """parse the expression and replace all inexpensive args