Patchwork [2,of,5,v4] mergecopies: add logic to process incomplete data

login
register
mail settings
Submitter Gábor Stefanik
Date Oct. 17, 2016, 6:42 p.m.
Message ID <f5f046a680e6730485df.1476729726@GSTEFANIK.NavnGo.local>
Download mbox | patch
Permalink /patch/17158/
State Accepted
Headers show

Comments

Gábor Stefanik - Oct. 17, 2016, 6:42 p.m.
# HG changeset patch
# User Gábor Stefanik <gabor.stefanik@nng.com>
# Date 1475578314 -7200
#      Tue Oct 04 12:51:54 2016 +0200
# Node ID f5f046a680e6730485df1760f82a0e0084305873
# Parent  d8379d11021b681a06cda08a07da237eaca520b2
mergecopies: add logic to process incomplete data

We first combine incomplete copies on the two sides of the topological CA
into complete copies.
Any leftover incomplete copies are then combined with the incomplete
divergences to reconstruct divergences spanning over the topological CA.
Finally we promote any divergences falsely flagged as incomplete to full
divergences.

Right now, there is nothing generating incomplete copy/divergence data,
so this code does nothing. Changes to _checkcopies to populate these
dicts are coming later in this series.
Pierre-Yves David - Oct. 17, 2016, 9:40 p.m.
On 10/17/2016 08:42 PM, Gábor Stefanik wrote:
> # HG changeset patch
> # User Gábor Stefanik <gabor.stefanik@nng.com>
> # Date 1475578314 -7200
> #      Tue Oct 04 12:51:54 2016 +0200
> # Node ID f5f046a680e6730485df1760f82a0e0084305873
> # Parent  d8379d11021b681a06cda08a07da237eaca520b2
> mergecopies: add logic to process incomplete data
>
> We first combine incomplete copies on the two sides of the topological CA
> into complete copies.
> Any leftover incomplete copies are then combined with the incomplete
> divergences to reconstruct divergences spanning over the topological CA.
> Finally we promote any divergences falsely flagged as incomplete to full
> divergences.
>
> Right now, there is nothing generating incomplete copy/divergence data,
> so this code does nothing. Changes to _checkcopies to populate these
> dicts are coming later in this series.
>
> diff -r d8379d11021b -r f5f046a680e6 mercurial/copies.py
> --- a/mercurial/copies.py	Wed Oct 12 11:54:03 2016 +0200
> +++ b/mercurial/copies.py	Tue Oct 04 12:51:54 2016 +0200
> @@ -289,6 +289,22 @@
>          return fctx
>      return util.lrucachefunc(makectx)
>
> +def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
> +    """combine partial copy paths"""
> +    remainder = {}
> +    for f in copyfrom:
> +        if f in copyto:
> +            finalcopy[copyto[f]] = copyfrom[f]
> +            del copyto[f]
> +    for f in incompletediverge:
> +        assert f not in diverge
> +        ic = incompletediverge[f]
> +        if ic[0] in copyto:
> +            diverge[f] = [copyto[ic[0]], ic[1]]
> +        else:
> +            remainder[f] = ic
> +    return remainder
> +
>  def mergecopies(repo, c1, c2, base):
>      """
>      Find moves and copies between context c1 and c2 that are relevant
> @@ -360,14 +376,21 @@
>      # - diverge = record all diverges in this dict
>      # - copy = record all non-divergent copies in this dict
>      # - fullcopy = record all copies in this dict
> +    # - incomplete = record non-divergent partial copies here
> +    # - incompletediverge = record divergent partial copies here
>      diverge = {} # divergence data is shared
> +    incompletediverge  = {}
>      data1 = {'copy': {},
>               'fullcopy': {},
> +             'incomplete': {},
>               'diverge': diverge,
> +             'incompletediverge': incompletediverge,
>              }
>      data2 = {'copy': {},
>               'fullcopy': {},
> +             'incomplete': {},
>               'diverge': diverge,
> +             'incompletediverge': incompletediverge,
>              }
>
>      # find interesting file sets from manifests
> @@ -398,6 +421,13 @@
>      copy = dict(data1['copy'].items() + data2['copy'].items())
>      fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())
>
> +    if dirtyc1:
> +        _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
> +                       incompletediverge)
> +    else:
> +        _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
> +                       incompletediverge)
> +
>      renamedelete = {}
>      renamedeleteset = set()
>      divergeset = set()
> @@ -416,13 +446,36 @@
>          repo.ui.debug("  unmatched files new in both:\n   %s\n"
>                        % "\n   ".join(bothnew))
>      bothdiverge = {}
> -    bothdata = {'copy': {},
> -                'fullcopy': {},
> -                'diverge': bothdiverge,
> -               }
> +    bothincompletediverge = {}
> +    both1 = {'copy': {},
> +             'fullcopy': {},
> +             'incomplete': {},
> +             'diverge': bothdiverge,
> +             'incompletediverge': bothincompletediverge
> +            }
> +    both2 = {'copy': {},
> +             'fullcopy': {},
> +             'incomplete': {},
> +             'diverge': bothdiverge,
> +             'incompletediverge': bothincompletediverge
> +            }
>      for f in bothnew:
> -        _checkcopies(c1, f, m1, m2, base, tca, limit, bothdata)
> -        _checkcopies(c2, f, m2, m1, base, tca, limit, bothdata)
> +        _checkcopies(c1, f, m1, m2, base, tca, limit, both1)
> +        _checkcopies(c2, f, m2, m1, base, tca, limit, both2)
> +    if dirtyc1:
> +        assert both2['incomplete'] == {}

nits, this could be the more pythonic:

   assert not both2['incomplete']

Please consider updating this in a follow up

> +        remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
> +                                   bothincompletediverge)
> +    else:
> +        assert both1['incomplete'] == {}
> +        remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
> +                                   bothincompletediverge)

It looks like this step is only relevant if dirtyc2 is True.

Can we use a "elif dirtyc2" for the next block and a final

   else:
       assert not both1['incomplete']
       assert not both2['incomplete']

If so, I would be happy to take a follow up cleaning this up.

> +    for f in remainder:
> +        assert f not in bothdiverge
> +        ic = remainder[f]
> +        if ic[0] in (m1 if dirtyc1 else m2):
> +            # backed-out rename on one side, but watch out for deleted files
> +            bothdiverge[f] = ic
>      for of, fl in bothdiverge.items():
>          if len(fl) == 2 and fl[0] == fl[1]:
>              copy[fl[0]] = of # not actually divergent, just matching renames
Gábor Stefanik - Oct. 18, 2016, 12:12 a.m.
>



--------------------------------------------------------------------------
This message, including its attachments, is confidential. For more information please read NNG's email policy here:
http://www.nng.com/emailpolicy/
By responding to this email you accept the email policy.


-----Original Message-----
> From: Pierre-Yves David [mailto:pierre-yves.david@ens-lyon.org]

> Sent: Monday, October 17, 2016 11:40 PM

> To: Gábor STEFANIK <Gabor.STEFANIK@nng.com>; mercurial-

> devel@mercurial-scm.org

> Subject: Re: [PATCH 2 of 5 v4] mergecopies: add logic to process incomplete

> data

>

>

>

> On 10/17/2016 08:42 PM, Gábor Stefanik wrote:

> > # HG changeset patch

> > # User Gábor Stefanik <gabor.stefanik@nng.com> # Date 1475578314 -7200

> > #      Tue Oct 04 12:51:54 2016 +0200

> > # Node ID f5f046a680e6730485df1760f82a0e0084305873

> > # Parent  d8379d11021b681a06cda08a07da237eaca520b2

> > mergecopies: add logic to process incomplete data

> >

> > We first combine incomplete copies on the two sides of the topological

> > CA into complete copies.

> > Any leftover incomplete copies are then combined with the incomplete

> > divergences to reconstruct divergences spanning over the topological CA.

> > Finally we promote any divergences falsely flagged as incomplete to

> > full divergences.

> >

> > Right now, there is nothing generating incomplete copy/divergence

> > data, so this code does nothing. Changes to _checkcopies to populate

> > these dicts are coming later in this series.

> >

> > diff -r d8379d11021b -r f5f046a680e6 mercurial/copies.py

> > --- a/mercurial/copies.pyWed Oct 12 11:54:03 2016 +0200

> > +++ b/mercurial/copies.pyTue Oct 04 12:51:54 2016 +0200

> > @@ -289,6 +289,22 @@

> >          return fctx

> >      return util.lrucachefunc(makectx)

> >

> > +def _combinecopies(copyfrom, copyto, finalcopy, diverge,

> incompletediverge):

> > +    """combine partial copy paths"""

> > +    remainder = {}

> > +    for f in copyfrom:

> > +        if f in copyto:

> > +            finalcopy[copyto[f]] = copyfrom[f]

> > +            del copyto[f]

> > +    for f in incompletediverge:

> > +        assert f not in diverge

> > +        ic = incompletediverge[f]

> > +        if ic[0] in copyto:

> > +            diverge[f] = [copyto[ic[0]], ic[1]]

> > +        else:

> > +            remainder[f] = ic

> > +    return remainder

> > +

> >  def mergecopies(repo, c1, c2, base):

> >      """

> >      Find moves and copies between context c1 and c2 that are relevant

> > @@ -360,14 +376,21 @@

> >      # - diverge = record all diverges in this dict

> >      # - copy = record all non-divergent copies in this dict

> >      # - fullcopy = record all copies in this dict

> > +    # - incomplete = record non-divergent partial copies here

> > +    # - incompletediverge = record divergent partial copies here

> >      diverge = {} # divergence data is shared

> > +    incompletediverge  = {}

> >      data1 = {'copy': {},

> >               'fullcopy': {},

> > +             'incomplete': {},

> >               'diverge': diverge,

> > +             'incompletediverge': incompletediverge,

> >              }

> >      data2 = {'copy': {},

> >               'fullcopy': {},

> > +             'incomplete': {},

> >               'diverge': diverge,

> > +             'incompletediverge': incompletediverge,

> >              }

> >

> >      # find interesting file sets from manifests @@ -398,6 +421,13 @@

> >      copy = dict(data1['copy'].items() + data2['copy'].items())

> >      fullcopy = dict(data1['fullcopy'].items() +

> > data2['fullcopy'].items())

> >

> > +    if dirtyc1:

> > +        _combinecopies(data2['incomplete'], data1['incomplete'], copy,

> diverge,

> > +                       incompletediverge)

> > +    else:

> > +        _combinecopies(data1['incomplete'], data2['incomplete'], copy,

> diverge,

> > +                       incompletediverge)

> > +

> >      renamedelete = {}

> >      renamedeleteset = set()

> >      divergeset = set()

> > @@ -416,13 +446,36 @@

> >          repo.ui.debug("  unmatched files new in both:\n   %s\n"

> >                        % "\n   ".join(bothnew))

> >      bothdiverge = {}

> > -    bothdata = {'copy': {},

> > -                'fullcopy': {},

> > -                'diverge': bothdiverge,

> > -               }

> > +    bothincompletediverge = {}

> > +    both1 = {'copy': {},

> > +             'fullcopy': {},

> > +             'incomplete': {},

> > +             'diverge': bothdiverge,

> > +             'incompletediverge': bothincompletediverge

> > +            }

> > +    both2 = {'copy': {},

> > +             'fullcopy': {},

> > +             'incomplete': {},

> > +             'diverge': bothdiverge,

> > +             'incompletediverge': bothincompletediverge

> > +            }

> >      for f in bothnew:

> > -        _checkcopies(c1, f, m1, m2, base, tca, limit, bothdata)

> > -        _checkcopies(c2, f, m2, m1, base, tca, limit, bothdata)

> > +        _checkcopies(c1, f, m1, m2, base, tca, limit, both1)

> > +        _checkcopies(c2, f, m2, m1, base, tca, limit, both2)

> > +    if dirtyc1:

> > +        assert both2['incomplete'] == {}

>

> nits, this could be the more pythonic:

>

>    assert not both2['incomplete']

>

> Please consider updating this in a follow up

>

> > +        remainder = _combinecopies({}, both1['incomplete'], copy,

> bothdiverge,

> > +                                   bothincompletediverge)

> > +    else:

> > +        assert both1['incomplete'] == {}

> > +        remainder = _combinecopies({}, both2['incomplete'], copy,

> bothdiverge,

> > +                                   bothincompletediverge)

>

> It looks like this step is only relevant if dirtyc2 is True.

>

> Can we use a "elif dirtyc2" for the next block and a final

>

>    else:

>        assert not both1['incomplete']

>        assert not both2['incomplete']

>

> If so, I would be happy to take a follow up cleaning this up.


Done, see "improve assertions" followup patch.

I also added a check for  bothincompletediverge being empty when not grafting.

>

> > +    for f in remainder:

> > +        assert f not in bothdiverge

> > +        ic = remainder[f]

> > +        if ic[0] in (m1 if dirtyc1 else m2):

> > +            # backed-out rename on one side, but watch out for deleted files

> > +            bothdiverge[f] = ic

> >      for of, fl in bothdiverge.items():

> >          if len(fl) == 2 and fl[0] == fl[1]:

> >              copy[fl[0]] = of # not actually divergent, just matching

> > renames

>

> --

> Pierre-Yves David

Patch

diff -r d8379d11021b -r f5f046a680e6 mercurial/copies.py
--- a/mercurial/copies.py	Wed Oct 12 11:54:03 2016 +0200
+++ b/mercurial/copies.py	Tue Oct 04 12:51:54 2016 +0200
@@ -289,6 +289,22 @@ 
         return fctx
     return util.lrucachefunc(makectx)
 
+def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
+    """combine partial copy paths"""
+    remainder = {}
+    for f in copyfrom:
+        if f in copyto:
+            finalcopy[copyto[f]] = copyfrom[f]
+            del copyto[f]
+    for f in incompletediverge:
+        assert f not in diverge
+        ic = incompletediverge[f]
+        if ic[0] in copyto:
+            diverge[f] = [copyto[ic[0]], ic[1]]
+        else:
+            remainder[f] = ic
+    return remainder
+
 def mergecopies(repo, c1, c2, base):
     """
     Find moves and copies between context c1 and c2 that are relevant
@@ -360,14 +376,21 @@ 
     # - diverge = record all diverges in this dict
     # - copy = record all non-divergent copies in this dict
     # - fullcopy = record all copies in this dict
+    # - incomplete = record non-divergent partial copies here
+    # - incompletediverge = record divergent partial copies here
     diverge = {} # divergence data is shared
+    incompletediverge  = {}
     data1 = {'copy': {},
              'fullcopy': {},
+             'incomplete': {},
              'diverge': diverge,
+             'incompletediverge': incompletediverge,
             }
     data2 = {'copy': {},
              'fullcopy': {},
+             'incomplete': {},
              'diverge': diverge,
+             'incompletediverge': incompletediverge,
             }
 
     # find interesting file sets from manifests
@@ -398,6 +421,13 @@ 
     copy = dict(data1['copy'].items() + data2['copy'].items())
     fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())
 
+    if dirtyc1:
+        _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
+                       incompletediverge)
+    else:
+        _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
+                       incompletediverge)
+
     renamedelete = {}
     renamedeleteset = set()
     divergeset = set()
@@ -416,13 +446,36 @@ 
         repo.ui.debug("  unmatched files new in both:\n   %s\n"
                       % "\n   ".join(bothnew))
     bothdiverge = {}
-    bothdata = {'copy': {},
-                'fullcopy': {},
-                'diverge': bothdiverge,
-               }
+    bothincompletediverge = {}
+    both1 = {'copy': {},
+             'fullcopy': {},
+             'incomplete': {},
+             'diverge': bothdiverge,
+             'incompletediverge': bothincompletediverge
+            }
+    both2 = {'copy': {},
+             'fullcopy': {},
+             'incomplete': {},
+             'diverge': bothdiverge,
+             'incompletediverge': bothincompletediverge
+            }
     for f in bothnew:
-        _checkcopies(c1, f, m1, m2, base, tca, limit, bothdata)
-        _checkcopies(c2, f, m2, m1, base, tca, limit, bothdata)
+        _checkcopies(c1, f, m1, m2, base, tca, limit, both1)
+        _checkcopies(c2, f, m2, m1, base, tca, limit, both2)
+    if dirtyc1:
+        assert both2['incomplete'] == {}
+        remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
+                                   bothincompletediverge)
+    else:
+        assert both1['incomplete'] == {}
+        remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
+                                   bothincompletediverge)
+    for f in remainder:
+        assert f not in bothdiverge
+        ic = remainder[f]
+        if ic[0] in (m1 if dirtyc1 else m2):
+            # backed-out rename on one side, but watch out for deleted files
+            bothdiverge[f] = ic
     for of, fl in bothdiverge.items():
         if len(fl) == 2 and fl[0] == fl[1]:
             copy[fl[0]] = of # not actually divergent, just matching renames