Patchwork [2,of,4,V2] copies: refactor checkcopies() into a top level method

login
register
mail settings
Submitter Durham Goode
Date May 7, 2013, 3:14 a.m.
Message ID <9557401c5bab0a37d2e6.1367896488@dev350.prn1.facebook.com>
Download mbox | patch
Permalink /patch/1580/
State Superseded, archived
Commit 4327687ca7579e825b8877d9756b91c066c743d5
Delegated to: Augie Fackler
Headers show

Comments

Durham Goode - May 7, 2013, 3:14 a.m.
# HG changeset patch
# User Durham Goode <durham@fb.com>
# Date 1367430261 25200
#      Wed May 01 10:44:21 2013 -0700
# Node ID 9557401c5bab0a37d2e6eceb1fd13c66a75463f0
# Parent  672183bf736b045705760066ef102a0a4cb52cca
copies: refactor checkcopies() into a top level method

This moves checkcopies() out of mergecopies() and makes it a top level
function in the copies module. This allows extensions to override it. For
example, I'm developing a filelog replacement that doesn't have rev numbers
so all the rev number dependent implementation here needs to be replaced
by the extension.

No logic is changed in this commit.
Augie Fackler - May 7, 2013, 3:25 p.m.
On Mon, May 06, 2013 at 08:14:48PM -0700, Durham Goode wrote:
> # HG changeset patch
> # User Durham Goode <durham@fb.com>
> # Date 1367430261 25200
> #      Wed May 01 10:44:21 2013 -0700
> # Node ID 9557401c5bab0a37d2e6eceb1fd13c66a75463f0
> # Parent  672183bf736b045705760066ef102a0a4cb52cca
> copies: refactor checkcopies() into a top level method

SGTM. Looks reasonable here.

>
> This moves checkcopies() out of mergecopies() and makes it a top level
> function in the copies module. This allows extensions to override it.

> For
> example, I'm developing a filelog replacement that doesn't have rev numbers
> so all the rev number dependent implementation here needs to be replaced
> by the extension.

Ooh. That posted anyplace I can follow along? That's relevant to my interests.


>
> No logic is changed in this commit.
>
> diff --git a/mercurial/copies.py b/mercurial/copies.py
> --- a/mercurial/copies.py
> +++ b/mercurial/copies.py
> @@ -222,65 +222,8 @@
>      fullcopy = {}
>      diverge = {}
>
> -    def related(f1, f2, limit):
> -        # Walk back to common ancestor to see if the two files originate
> -        # from the same file. Since workingfilectx's rev() is None it messes
> -        # up the integer comparison logic, hence the pre-step check for
> -        # None (f1 and f2 can only be workingfilectx's initially).
> -
> -        if f1 == f2:
> -            return f1 # a match
> -
> -        g1, g2 = f1.ancestors(), f2.ancestors()
> -        try:
> -            f1r, f2r = f1.rev(), f2.rev()
> -
> -            if f1r is None:
> -                f1 = g1.next()
> -            if f2r is None:
> -                f2 = g2.next()
> -
> -            while True:
> -                f1r, f2r = f1.rev(), f2.rev()
> -                if f1r > f2r:
> -                    f1 = g1.next()
> -                elif f2r > f1r:
> -                    f2 = g2.next()
> -                elif f1 == f2:
> -                    return f1 # a match
> -                elif f1r == f2r or f1r < limit or f2r < limit:
> -                    return False # copy no longer relevant
> -        except StopIteration:
> -            return False
> -
> -    def checkcopies(f, m1, m2):
> -        '''check possible copies of f from m1 to m2'''
> -        of = None
> -        seen = set([f])
> -        for oc in ctx(f, m1[f]).ancestors():
> -            ocr = oc.rev()
> -            of = oc.path()
> -            if of in seen:
> -                # check limit late - grab last rename before
> -                if ocr < limit:
> -                    break
> -                continue
> -            seen.add(of)
> -
> -            fullcopy[f] = of # remember for dir rename detection
> -            if of not in m2:
> -                continue # no match, keep looking
> -            if m2[of] == ma.get(of):
> -                break # no merge needed, quit early
> -            c2 = ctx(of, m2[of])
> -            cr = related(oc, c2, ca.rev())
> -            if cr and (of == f or of == c2.path()): # non-divergent
> -                copy[f] = of
> -                of = None
> -                break
> -
> -        if of in ma:
> -            diverge.setdefault(of, []).append(f)
> +    def _checkcopies(f, m1, m2):
> +        checkcopies(ctx, f, m1, m2, ca, ma, limit, diverge, copy, fullcopy)
>
>      repo.ui.debug("  searching for copies back to rev %d\n" % limit)
>
> @@ -295,9 +238,9 @@
>                        % "\n   ".join(u2))
>
>      for f in u1:
> -        checkcopies(f, m1, m2)
> +        _checkcopies(f, m1, m2)
>      for f in u2:
> -        checkcopies(f, m2, m1)
> +        _checkcopies(f, m2, m1)
>
>      renamedelete = {}
>      renamedelete2 = set()
> @@ -386,3 +329,64 @@
>                      break
>
>      return copy, movewithdir, diverge, renamedelete
> +
> +def checkcopies(ctx, f, m1, m2, ca, ma, limit, diverge, copy, fullcopy):
> +    '''check possible copies of f from m1 to m2'''
> +
> +    def _related(f1, f2, limit):
> +        # Walk back to common ancestor to see if the two files originate
> +        # from the same file. Since workingfilectx's rev() is None it messes
> +        # up the integer comparison logic, hence the pre-step check for
> +        # None (f1 and f2 can only be workingfilectx's initially).
> +
> +        if f1 == f2:
> +            return f1 # a match
> +
> +        g1, g2 = f1.ancestors(), f2.ancestors()
> +        try:
> +            f1r, f2r = f1.rev(), f2.rev()
> +
> +            if f1r is None:
> +                f1 = g1.next()
> +            if f2r is None:
> +                f2 = g2.next()
> +
> +            while True:
> +                f1r, f2r = f1.rev(), f2.rev()
> +                if f1r > f2r:
> +                    f1 = g1.next()
> +                elif f2r > f1r:
> +                    f2 = g2.next()
> +                elif f1 == f2:
> +                    return f1 # a match
> +                elif f1r == f2r or f1r < limit or f2r < limit:
> +                    return False # copy no longer relevant
> +        except StopIteration:
> +            return False
> +
> +    of = None
> +    seen = set([f])
> +    for oc in ctx(f, m1[f]).ancestors():
> +        ocr = oc.rev()
> +        of = oc.path()
> +        if of in seen:
> +            # check limit late - grab last rename before
> +            if ocr < limit:
> +                break
> +            continue
> +        seen.add(of)
> +
> +        fullcopy[f] = of # remember for dir rename detection
> +        if of not in m2:
> +            continue # no match, keep looking
> +        if m2[of] == ma.get(of):
> +            break # no merge needed, quit early
> +        c2 = ctx(of, m2[of])
> +        cr = _related(oc, c2, ca.rev())
> +        if cr and (of == f or of == c2.path()): # non-divergent
> +            copy[f] = of
> +            of = None
> +            break
> +
> +    if of in ma:
> +        diverge.setdefault(of, []).append(f)
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
Durham Goode - May 7, 2013, 4:56 p.m.
On 5/7/13 8:25 AM, "Augie Fackler" <raf@durin42.com> wrote:

>On Mon, May 06, 2013 at 08:14:48PM -0700, Durham Goode wrote:
>>
>> For
>> example, I'm developing a filelog replacement that doesn't have rev
>>numbers
>> so all the rev number dependent implementation here needs to be replaced
>> by the extension.
>
>Ooh. That posted anyplace I can follow along? That's relevant to my
>interests.
>

There's nothing posted publicly yet. Once it's in a mildly reviewable
state (next week maybe?) I'll send out an email here with some details.
It'll be hosted on bitbucket, so hopefully I can make the repo public.

The general goal is to get all of our file contents off our client
machines and into a memcache like service. Then we only pull down the
files that are needed during an update. So we have a revlog implementation
that acts more like a key/value store than a revlog and fetches the values
from a remote server as needed.
Augie Fackler - May 7, 2013, 5:02 p.m.
On Tue, May 7, 2013 at 12:56 PM, Durham Goode <durham@fb.com> wrote:
> The general goal is to get all of our file contents off our client
> machines and into a memcache like service. Then we only pull down the
> files that are needed during an update. So we have a revlog implementation
> that acts more like a key/value store than a revlog and fetches the values
> from a remote server as needed.


*nod*, sounds like what we talked about in March. I'd like to do
enough work on this that we could use the same extensibility hooks for
hg-on-bigtable some day.

Patch

diff --git a/mercurial/copies.py b/mercurial/copies.py
--- a/mercurial/copies.py
+++ b/mercurial/copies.py
@@ -222,65 +222,8 @@ 
     fullcopy = {}
     diverge = {}
 
-    def related(f1, f2, limit):
-        # Walk back to common ancestor to see if the two files originate
-        # from the same file. Since workingfilectx's rev() is None it messes
-        # up the integer comparison logic, hence the pre-step check for
-        # None (f1 and f2 can only be workingfilectx's initially).
-
-        if f1 == f2:
-            return f1 # a match
-
-        g1, g2 = f1.ancestors(), f2.ancestors()
-        try:
-            f1r, f2r = f1.rev(), f2.rev()
-
-            if f1r is None:
-                f1 = g1.next()
-            if f2r is None:
-                f2 = g2.next()
-
-            while True:
-                f1r, f2r = f1.rev(), f2.rev()
-                if f1r > f2r:
-                    f1 = g1.next()
-                elif f2r > f1r:
-                    f2 = g2.next()
-                elif f1 == f2:
-                    return f1 # a match
-                elif f1r == f2r or f1r < limit or f2r < limit:
-                    return False # copy no longer relevant
-        except StopIteration:
-            return False
-
-    def checkcopies(f, m1, m2):
-        '''check possible copies of f from m1 to m2'''
-        of = None
-        seen = set([f])
-        for oc in ctx(f, m1[f]).ancestors():
-            ocr = oc.rev()
-            of = oc.path()
-            if of in seen:
-                # check limit late - grab last rename before
-                if ocr < limit:
-                    break
-                continue
-            seen.add(of)
-
-            fullcopy[f] = of # remember for dir rename detection
-            if of not in m2:
-                continue # no match, keep looking
-            if m2[of] == ma.get(of):
-                break # no merge needed, quit early
-            c2 = ctx(of, m2[of])
-            cr = related(oc, c2, ca.rev())
-            if cr and (of == f or of == c2.path()): # non-divergent
-                copy[f] = of
-                of = None
-                break
-
-        if of in ma:
-            diverge.setdefault(of, []).append(f)
+    def _checkcopies(f, m1, m2):
+        checkcopies(ctx, f, m1, m2, ca, ma, limit, diverge, copy, fullcopy)
 
     repo.ui.debug("  searching for copies back to rev %d\n" % limit)
 
@@ -295,9 +238,9 @@ 
                       % "\n   ".join(u2))
 
     for f in u1:
-        checkcopies(f, m1, m2)
+        _checkcopies(f, m1, m2)
     for f in u2:
-        checkcopies(f, m2, m1)
+        _checkcopies(f, m2, m1)
 
     renamedelete = {}
     renamedelete2 = set()
@@ -386,3 +329,64 @@ 
                     break
 
     return copy, movewithdir, diverge, renamedelete
+
+def checkcopies(ctx, f, m1, m2, ca, ma, limit, diverge, copy, fullcopy):
+    '''check possible copies of f from m1 to m2'''
+
+    def _related(f1, f2, limit):
+        # Walk back to common ancestor to see if the two files originate
+        # from the same file. Since workingfilectx's rev() is None it messes
+        # up the integer comparison logic, hence the pre-step check for
+        # None (f1 and f2 can only be workingfilectx's initially).
+
+        if f1 == f2:
+            return f1 # a match
+
+        g1, g2 = f1.ancestors(), f2.ancestors()
+        try:
+            f1r, f2r = f1.rev(), f2.rev()
+
+            if f1r is None:
+                f1 = g1.next()
+            if f2r is None:
+                f2 = g2.next()
+
+            while True:
+                f1r, f2r = f1.rev(), f2.rev()
+                if f1r > f2r:
+                    f1 = g1.next()
+                elif f2r > f1r:
+                    f2 = g2.next()
+                elif f1 == f2:
+                    return f1 # a match
+                elif f1r == f2r or f1r < limit or f2r < limit:
+                    return False # copy no longer relevant
+        except StopIteration:
+            return False
+
+    of = None
+    seen = set([f])
+    for oc in ctx(f, m1[f]).ancestors():
+        ocr = oc.rev()
+        of = oc.path()
+        if of in seen:
+            # check limit late - grab last rename before
+            if ocr < limit:
+                break
+            continue
+        seen.add(of)
+
+        fullcopy[f] = of # remember for dir rename detection
+        if of not in m2:
+            continue # no match, keep looking
+        if m2[of] == ma.get(of):
+            break # no merge needed, quit early
+        c2 = ctx(of, m2[of])
+        cr = _related(oc, c2, ca.rev())
+        if cr and (of == f or of == c2.path()): # non-divergent
+            copy[f] = of
+            of = None
+            break
+
+    if of in ma:
+        diverge.setdefault(of, []).append(f)