Patchwork [06,of,15] commitctx: return a richer object from _prepare_files

login
register
mail settings
Submitter Pierre-Yves David
Date July 29, 2020, 4:57 p.m.
Message ID <88cc2b7a810243e8c101.1596041856@nodosa.octobus.net>
Download mbox | patch
Permalink /patch/46929/
State Accepted
Headers show

Comments

Pierre-Yves David - July 29, 2020, 4:57 p.m.
# HG changeset patch
# User Pierre-Yves David <pierre-yves.david@octobus.net>
# Date 1595684952 -7200
#      Sat Jul 25 15:49:12 2020 +0200
# Node ID 88cc2b7a810243e8c101933fd99778ce772ac316
# Parent  6b4ada6dca3ff5591c5dea32ffbe2fa22f4e5ed7
# EXP-Topic files-change
# Available At https://foss.heptapod.net/octobus/mercurial-devel/
#              hg pull https://foss.heptapod.net/octobus/mercurial-devel/ -r 88cc2b7a8102
commitctx: return a richer object from _prepare_files

Instead of returning a lot of different list, we introduce a rich object that
hold all the file related information. The unique object help with data
consistency and simply functions arguments and return.

In the rest of this series we will increase usage of this object to simplify
more code.
Yuya Nishihara - Aug. 8, 2020, 3:26 a.m.
On Wed, 29 Jul 2020 18:57:36 +0200, Pierre-Yves David wrote:
> # HG changeset patch
> # User Pierre-Yves David <pierre-yves.david@octobus.net>
> # Date 1595684952 -7200
> #      Sat Jul 25 15:49:12 2020 +0200
> # Node ID 88cc2b7a810243e8c101933fd99778ce772ac316
> # Parent  6b4ada6dca3ff5591c5dea32ffbe2fa22f4e5ed7
> # EXP-Topic files-change
> # Available At https://foss.heptapod.net/octobus/mercurial-devel/
> #              hg pull https://foss.heptapod.net/octobus/mercurial-devel/ -r 88cc2b7a8102
> commitctx: return a richer object from _prepare_files

Queued the remainder, thanks.

> +class ChangingFiles(object):
> +    """A class recording the changes made to a file by a revision
> +    """
> +
> +    def __init__(
> +        self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
> +    ):
> +        self._added = set(added)
> +        self._removed = set(removed)
> +        self._touched = set(touched)
> +        self._touched.update(self._added)
> +        self._touched.update(self._removed)
> +        self._p1_copies = dict(p1_copies)
> +        self._p2_copies = dict(p2_copies)
> +
> +    @property
> +    def added(self):
> +        return frozenset(self._added)

I generally avoid using @property function that does non-trivial work per call.
Does this frozenset business really matter?

> +    def mark_added(self, filename):
> +        self._added.add(filename)
> +        self._touched.add(filename)

> +    def mark_copied_from_p1(self, source, dest):
> +        self._p1_copies[dest] = source

It doesn't make sense that mark_added() updates both added and touched,
but mark_copied() doesn't.

Since ChangingFiles is basically a data object, I think sanity check can be
split into a function, and called occasionally e.g. before serializing the
metadata.
Augie Fackler - Aug. 10, 2020, 3:37 p.m.
> On Jul 29, 2020, at 12:57, Pierre-Yves David <pierre-yves.david@ens-lyon.org> wrote:
> 
> # HG changeset patch
> # User Pierre-Yves David <pierre-yves.david@octobus.net>
> # Date 1595684952 -7200
> #      Sat Jul 25 15:49:12 2020 +0200
> # Node ID 88cc2b7a810243e8c101933fd99778ce772ac316
> # Parent  6b4ada6dca3ff5591c5dea32ffbe2fa22f4e5ed7
> # EXP-Topic files-change
> # Available At https://foss.heptapod.net/octobus/mercurial-devel/
> #              hg pull https://foss.heptapod.net/octobus/mercurial-devel/ -r 88cc2b7a8102
> commitctx: return a richer object from _prepare_files
> 
> Instead of returning a lot of different list, we introduce a rich object that
> hold all the file related information. The unique object help with data
> consistency and simply functions arguments and return.
> 
> In the rest of this series we will increase usage of this object to simplify
> more code.
> 
> diff --git a/mercurial/commit.py b/mercurial/commit.py
> --- a/mercurial/commit.py
> +++ b/mercurial/commit.py
> @@ -64,12 +64,10 @@ def commitctx(repo, ctx, error=False, or
>     user = ctx.user()
> 
>     with repo.lock(), repo.transaction(b"commit") as tr:
> -        r = _prepare_files(tr, ctx, error=error, origctx=origctx)
> -        mn, files, p1copies, p2copies, filesadded, filesremoved = r
> +        mn, files = _prepare_files(tr, ctx, error=error, origctx=origctx)
> 
>         extra = ctx.extra().copy()
> 
> -        files = sorted(files)
>         if extra is not None:
>             for name in (
>                 b'p1copies',
> @@ -79,16 +77,14 @@ def commitctx(repo, ctx, error=False, or
>             ):
>                 extra.pop(name, None)
>         if repo.changelog._copiesstorage == b'extra':
> -            extra = _extra_with_copies(
> -                repo, extra, files, p1copies, p2copies, filesadded, filesremoved
> -            )
> +            extra = _extra_with_copies(repo, extra, files)
> 
>         # update changelog
>         repo.ui.note(_(b"committing changelog\n"))
>         repo.changelog.delayupdate(tr)
>         n = repo.changelog.add(
>             mn,
> -            files,
> +            files.touched,
>             ctx.description(),
>             tr,
>             p1.node(),
> @@ -96,10 +92,10 @@ def commitctx(repo, ctx, error=False, or
>             user,
>             ctx.date(),
>             extra,
> -            p1copies,
> -            p2copies,
> -            filesadded,
> -            filesremoved,
> +            files.copied_from_p1,
> +            files.copied_from_p2,
> +            files.added,
> +            files.removed,
>         )
>         xp1, xp2 = p1.hex(), p2 and p2.hex() or b''
>         repo.hook(
> @@ -149,7 +145,19 @@ def _prepare_files(tr, ctx, error=False,
>     if origctx and origctx.manifestnode() == mn:
>         touched = origctx.files()
> 
> -    return mn, touched, p1copies, p2copies, filesadded, filesremoved
> +    files = metadata.ChangingFiles()
> +    if touched:
> +        files.update_touched(touched)
> +    if p1copies:
> +        files.update_copies_from_p1(p1copies)
> +    if p2copies:
> +        files.update_copies_from_p2(p2copies)
> +    if filesadded:
> +        files.update_added(filesadded)
> +    if filesremoved:
> +        files.update_removed(filesremoved)
> +
> +    return mn, files
> 
> 
> def _process_files(tr, ctx, error=False):
> @@ -413,10 +421,13 @@ def _commit_manifest(tr, linkrev, ctx, m
>     return mn
> 
> 
> -def _extra_with_copies(
> -    repo, extra, files, p1copies, p2copies, filesadded, filesremoved
> -):
> +def _extra_with_copies(repo, extra, files):
>     """encode copy information into a `extra` dictionnary"""
> +    p1copies = files.copied_from_p1
> +    p2copies = files.copied_from_p2
> +    filesadded = files.added
> +    filesremoved = files.removed
> +    files = sorted(files.touched)
>     if not _write_copy_meta(repo)[1]:
>         # If writing only to changeset extras, use None to indicate that
>         # no entry should be written. If writing to both, write an empty
> diff --git a/mercurial/metadata.py b/mercurial/metadata.py
> --- a/mercurial/metadata.py
> +++ b/mercurial/metadata.py
> @@ -22,6 +22,79 @@ from .revlogutils import (
> )
> 
> 
> +class ChangingFiles(object):

Could much of this have been an attr.s instead? I'm not sure I'd worry too hard about frozen* types, but YMMV.

> +    """A class recording the changes made to a file by a revision
> +    """
> +
> +    def __init__(
> +        self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
> +    ):

I'd like a follow-up on this change for this __init__: these should be None, not the empty-tuple. The way the function is constructed now pytype and mypy will both be _very_ confused (or wrong!) about what this function expects: they'll interpret this as "tuple of stuff" when it looks like it should mostly be "optional set or dict" of stuff?

> +        self._added = set(added)
> +        self._removed = set(removed)
> +        self._touched = set(touched)
> +        self._touched.update(self._added)
> +        self._touched.update(self._removed)
> +        self._p1_copies = dict(p1_copies)
> +        self._p2_copies = dict(p2_copies)
> +
> +    @property
> +    def added(self):
> +        return frozenset(self._added)
> +
> +    def mark_added(self, filename):
> +        self._added.add(filename)
> +        self._touched.add(filename)
> +
> +    def update_added(self, filenames):
> +        for f in filenames:
> +            self.mark_added(f)
> +
> +    @property
> +    def removed(self):
> +        return frozenset(self._removed)
> +
> +    def mark_removed(self, filename):
> +        self._removed.add(filename)
> +        self._touched.add(filename)
> +
> +    def update_removed(self, filenames):
> +        for f in filenames:
> +            self.mark_removed(f)
> +
> +    @property
> +    def touched(self):
> +        return frozenset(self._touched)
> +
> +    def mark_touched(self, filename):
> +        self._touched.add(filename)
> +
> +    def update_touched(self, filenames):
> +        for f in filenames:
> +            self.mark_touched(f)
> +
> +    @property
> +    def copied_from_p1(self):
> +        return self._p1_copies.copy()
> +
> +    def mark_copied_from_p1(self, source, dest):
> +        self._p1_copies[dest] = source
> +
> +    def update_copies_from_p1(self, copies):
> +        for dest, source in copies.items():
> +            self.mark_copied_from_p1(source, dest)
> +
> +    @property
> +    def copied_from_p2(self):
> +        return self._p2_copies.copy()
> +
> +    def mark_copied_from_p2(self, source, dest):
> +        self._p2_copies[dest] = source
> +
> +    def update_copies_from_p2(self, copies):
> +        for dest, source in copies.items():
> +            self.mark_copied_from_p2(source, dest)
> +
> +
> def computechangesetfilesadded(ctx):
>     """return the list of files added in a changeset
>     """
> 
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Pierre-Yves David - Aug. 15, 2020, 7:37 a.m.
On 8/8/20 5:26 AM, Yuya Nishihara wrote:
> On Wed, 29 Jul 2020 18:57:36 +0200, Pierre-Yves David wrote:
>> # HG changeset patch
>> # User Pierre-Yves David <pierre-yves.david@octobus.net>
>> # Date 1595684952 -7200
>> #      Sat Jul 25 15:49:12 2020 +0200
>> # Node ID 88cc2b7a810243e8c101933fd99778ce772ac316
>> # Parent  6b4ada6dca3ff5591c5dea32ffbe2fa22f4e5ed7
>> # EXP-Topic files-change
>> # Available At https://foss.heptapod.net/octobus/mercurial-devel/
>> #              hg pull https://foss.heptapod.net/octobus/mercurial-devel/ -r 88cc2b7a8102
>> commitctx: return a richer object from _prepare_files
> 
> Queued the remainder, thanks.
> 
>> +class ChangingFiles(object):
>> +    """A class recording the changes made to a file by a revision
>> +    """
>> +
>> +    def __init__(
>> +        self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
>> +    ):
>> +        self._added = set(added)
>> +        self._removed = set(removed)
>> +        self._touched = set(touched)
>> +        self._touched.update(self._added)
>> +        self._touched.update(self._removed)
>> +        self._p1_copies = dict(p1_copies)
>> +        self._p2_copies = dict(p2_copies)
>> +
>> +    @property
>> +    def added(self):
>> +        return frozenset(self._added)
> 
> I generally avoid using @property function that does non-trivial work per call.

My plan here is to add some caching if this make a difference. But I 
wanted to get the logic right first.

> Does this frozenset business really matter?

These set get used are various layers during merge and commit and theses 
logic already have a lot of bugs. So I really want to avoid anyone 
messing with the global set by mistake. It already caught an handful of 
various error during this series.

> 
>> +    def mark_added(self, filename):
>> +        self._added.add(filename)
>> +        self._touched.add(filename)
> 
>> +    def mark_copied_from_p1(self, source, dest):
>> +        self._p1_copies[dest] = source
> 
> It doesn't make sense that mark_added() updates both added and touched,
> but mark_copied() doesn't.

Strictly speaking, I don't think anyone would call copied for something 
that is not already touched. We can probably add this as a sanity check 
actually.

> Since ChangingFiles is basically a data object, I think sanity check can be
> split into a function, and called occasionally e.g. before serializing the
> metadata.

Given how many different layers will touch this object and how many bugs 
we already found within them. I do not really trust the code using these 
data to do the right thing.

I've created tasks about your feedback and will send follow up when I am 
back from vacation in about 10 days.
Pierre-Yves David - Sept. 23, 2020, 7:04 a.m.
On 8/10/20 5:37 PM, Augie Fackler wrote:
> 
> 
>> On Jul 29, 2020, at 12:57, Pierre-Yves David <pierre-yves.david@ens-lyon.org> wrote:
>>
>> # HG changeset patch
>> # User Pierre-Yves David <pierre-yves.david@octobus.net>
>> # Date 1595684952 -7200
>> #      Sat Jul 25 15:49:12 2020 +0200
>> # Node ID 88cc2b7a810243e8c101933fd99778ce772ac316
>> # Parent  6b4ada6dca3ff5591c5dea32ffbe2fa22f4e5ed7
>> # EXP-Topic files-change
>> # Available At https://foss.heptapod.net/octobus/mercurial-devel/
>> #              hg pull https://foss.heptapod.net/octobus/mercurial-devel/ -r 88cc2b7a8102
>> commitctx: return a richer object from _prepare_files
>>
>> Instead of returning a lot of different list, we introduce a rich object that
>> hold all the file related information. The unique object help with data
>> consistency and simply functions arguments and return.
>>
>> In the rest of this series we will increase usage of this object to simplify
>> more code.
>>
>> diff --git a/mercurial/commit.py b/mercurial/commit.py
>> --- a/mercurial/commit.py
>> +++ b/mercurial/commit.py
>> @@ -64,12 +64,10 @@ def commitctx(repo, ctx, error=False, or
>>      user = ctx.user()
>>
>>      with repo.lock(), repo.transaction(b"commit") as tr:
>> -        r = _prepare_files(tr, ctx, error=error, origctx=origctx)
>> -        mn, files, p1copies, p2copies, filesadded, filesremoved = r
>> +        mn, files = _prepare_files(tr, ctx, error=error, origctx=origctx)
>>
>>          extra = ctx.extra().copy()
>>
>> -        files = sorted(files)
>>          if extra is not None:
>>              for name in (
>>                  b'p1copies',
>> @@ -79,16 +77,14 @@ def commitctx(repo, ctx, error=False, or
>>              ):
>>                  extra.pop(name, None)
>>          if repo.changelog._copiesstorage == b'extra':
>> -            extra = _extra_with_copies(
>> -                repo, extra, files, p1copies, p2copies, filesadded, filesremoved
>> -            )
>> +            extra = _extra_with_copies(repo, extra, files)
>>
>>          # update changelog
>>          repo.ui.note(_(b"committing changelog\n"))
>>          repo.changelog.delayupdate(tr)
>>          n = repo.changelog.add(
>>              mn,
>> -            files,
>> +            files.touched,
>>              ctx.description(),
>>              tr,
>>              p1.node(),
>> @@ -96,10 +92,10 @@ def commitctx(repo, ctx, error=False, or
>>              user,
>>              ctx.date(),
>>              extra,
>> -            p1copies,
>> -            p2copies,
>> -            filesadded,
>> -            filesremoved,
>> +            files.copied_from_p1,
>> +            files.copied_from_p2,
>> +            files.added,
>> +            files.removed,
>>          )
>>          xp1, xp2 = p1.hex(), p2 and p2.hex() or b''
>>          repo.hook(
>> @@ -149,7 +145,19 @@ def _prepare_files(tr, ctx, error=False,
>>      if origctx and origctx.manifestnode() == mn:
>>          touched = origctx.files()
>>
>> -    return mn, touched, p1copies, p2copies, filesadded, filesremoved
>> +    files = metadata.ChangingFiles()
>> +    if touched:
>> +        files.update_touched(touched)
>> +    if p1copies:
>> +        files.update_copies_from_p1(p1copies)
>> +    if p2copies:
>> +        files.update_copies_from_p2(p2copies)
>> +    if filesadded:
>> +        files.update_added(filesadded)
>> +    if filesremoved:
>> +        files.update_removed(filesremoved)
>> +
>> +    return mn, files
>>
>>
>> def _process_files(tr, ctx, error=False):
>> @@ -413,10 +421,13 @@ def _commit_manifest(tr, linkrev, ctx, m
>>      return mn
>>
>>
>> -def _extra_with_copies(
>> -    repo, extra, files, p1copies, p2copies, filesadded, filesremoved
>> -):
>> +def _extra_with_copies(repo, extra, files):
>>      """encode copy information into a `extra` dictionnary"""
>> +    p1copies = files.copied_from_p1
>> +    p2copies = files.copied_from_p2
>> +    filesadded = files.added
>> +    filesremoved = files.removed
>> +    files = sorted(files.touched)
>>      if not _write_copy_meta(repo)[1]:
>>          # If writing only to changeset extras, use None to indicate that
>>          # no entry should be written. If writing to both, write an empty
>> diff --git a/mercurial/metadata.py b/mercurial/metadata.py
>> --- a/mercurial/metadata.py
>> +++ b/mercurial/metadata.py
>> @@ -22,6 +22,79 @@ from .revlogutils import (
>> )
>>
>>
>> +class ChangingFiles(object):
> 
> Could much of this have been an attr.s instead? I'm not sure I'd worry too hard about frozen* types, but YMMV.
> 
>> +    """A class recording the changes made to a file by a revision
>> +    """
>> +
>> +    def __init__(
>> +        self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
>> +    ):
> 
> I'd like a follow-up on this change for this __init__: these should be None, not the empty-tuple. The way the function is constructed now pytype and mypy will both be _very_ confused (or wrong!) about what this function expects: they'll interpret this as "tuple of stuff" when it looks like it should mostly be "optional set or dict" of stuff?

Sure, I'll put that is a series I am about to send.

Patch

diff --git a/mercurial/commit.py b/mercurial/commit.py
--- a/mercurial/commit.py
+++ b/mercurial/commit.py
@@ -64,12 +64,10 @@  def commitctx(repo, ctx, error=False, or
     user = ctx.user()
 
     with repo.lock(), repo.transaction(b"commit") as tr:
-        r = _prepare_files(tr, ctx, error=error, origctx=origctx)
-        mn, files, p1copies, p2copies, filesadded, filesremoved = r
+        mn, files = _prepare_files(tr, ctx, error=error, origctx=origctx)
 
         extra = ctx.extra().copy()
 
-        files = sorted(files)
         if extra is not None:
             for name in (
                 b'p1copies',
@@ -79,16 +77,14 @@  def commitctx(repo, ctx, error=False, or
             ):
                 extra.pop(name, None)
         if repo.changelog._copiesstorage == b'extra':
-            extra = _extra_with_copies(
-                repo, extra, files, p1copies, p2copies, filesadded, filesremoved
-            )
+            extra = _extra_with_copies(repo, extra, files)
 
         # update changelog
         repo.ui.note(_(b"committing changelog\n"))
         repo.changelog.delayupdate(tr)
         n = repo.changelog.add(
             mn,
-            files,
+            files.touched,
             ctx.description(),
             tr,
             p1.node(),
@@ -96,10 +92,10 @@  def commitctx(repo, ctx, error=False, or
             user,
             ctx.date(),
             extra,
-            p1copies,
-            p2copies,
-            filesadded,
-            filesremoved,
+            files.copied_from_p1,
+            files.copied_from_p2,
+            files.added,
+            files.removed,
         )
         xp1, xp2 = p1.hex(), p2 and p2.hex() or b''
         repo.hook(
@@ -149,7 +145,19 @@  def _prepare_files(tr, ctx, error=False,
     if origctx and origctx.manifestnode() == mn:
         touched = origctx.files()
 
-    return mn, touched, p1copies, p2copies, filesadded, filesremoved
+    files = metadata.ChangingFiles()
+    if touched:
+        files.update_touched(touched)
+    if p1copies:
+        files.update_copies_from_p1(p1copies)
+    if p2copies:
+        files.update_copies_from_p2(p2copies)
+    if filesadded:
+        files.update_added(filesadded)
+    if filesremoved:
+        files.update_removed(filesremoved)
+
+    return mn, files
 
 
 def _process_files(tr, ctx, error=False):
@@ -413,10 +421,13 @@  def _commit_manifest(tr, linkrev, ctx, m
     return mn
 
 
-def _extra_with_copies(
-    repo, extra, files, p1copies, p2copies, filesadded, filesremoved
-):
+def _extra_with_copies(repo, extra, files):
     """encode copy information into a `extra` dictionnary"""
+    p1copies = files.copied_from_p1
+    p2copies = files.copied_from_p2
+    filesadded = files.added
+    filesremoved = files.removed
+    files = sorted(files.touched)
     if not _write_copy_meta(repo)[1]:
         # If writing only to changeset extras, use None to indicate that
         # no entry should be written. If writing to both, write an empty
diff --git a/mercurial/metadata.py b/mercurial/metadata.py
--- a/mercurial/metadata.py
+++ b/mercurial/metadata.py
@@ -22,6 +22,79 @@  from .revlogutils import (
 )
 
 
+class ChangingFiles(object):
+    """A class recording the changes made to a file by a revision
+    """
+
+    def __init__(
+        self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(),
+    ):
+        self._added = set(added)
+        self._removed = set(removed)
+        self._touched = set(touched)
+        self._touched.update(self._added)
+        self._touched.update(self._removed)
+        self._p1_copies = dict(p1_copies)
+        self._p2_copies = dict(p2_copies)
+
+    @property
+    def added(self):
+        return frozenset(self._added)
+
+    def mark_added(self, filename):
+        self._added.add(filename)
+        self._touched.add(filename)
+
+    def update_added(self, filenames):
+        for f in filenames:
+            self.mark_added(f)
+
+    @property
+    def removed(self):
+        return frozenset(self._removed)
+
+    def mark_removed(self, filename):
+        self._removed.add(filename)
+        self._touched.add(filename)
+
+    def update_removed(self, filenames):
+        for f in filenames:
+            self.mark_removed(f)
+
+    @property
+    def touched(self):
+        return frozenset(self._touched)
+
+    def mark_touched(self, filename):
+        self._touched.add(filename)
+
+    def update_touched(self, filenames):
+        for f in filenames:
+            self.mark_touched(f)
+
+    @property
+    def copied_from_p1(self):
+        return self._p1_copies.copy()
+
+    def mark_copied_from_p1(self, source, dest):
+        self._p1_copies[dest] = source
+
+    def update_copies_from_p1(self, copies):
+        for dest, source in copies.items():
+            self.mark_copied_from_p1(source, dest)
+
+    @property
+    def copied_from_p2(self):
+        return self._p2_copies.copy()
+
+    def mark_copied_from_p2(self, source, dest):
+        self._p2_copies[dest] = source
+
+    def update_copies_from_p2(self, copies):
+        for dest, source in copies.items():
+            self.mark_copied_from_p2(source, dest)
+
+
 def computechangesetfilesadded(ctx):
     """return the list of files added in a changeset
     """