Submitter | Pierre-Yves David |
---|---|
Date | July 29, 2020, 4:57 p.m. |
Message ID | <88cc2b7a810243e8c101.1596041856@nodosa.octobus.net> |
Download | mbox | patch |
Permalink | /patch/46929/ |
State | Accepted |
Headers | show |
Comments
On Wed, 29 Jul 2020 18:57:36 +0200, Pierre-Yves David wrote: > # HG changeset patch > # User Pierre-Yves David <pierre-yves.david@octobus.net> > # Date 1595684952 -7200 > # Sat Jul 25 15:49:12 2020 +0200 > # Node ID 88cc2b7a810243e8c101933fd99778ce772ac316 > # Parent 6b4ada6dca3ff5591c5dea32ffbe2fa22f4e5ed7 > # EXP-Topic files-change > # Available At https://foss.heptapod.net/octobus/mercurial-devel/ > # hg pull https://foss.heptapod.net/octobus/mercurial-devel/ -r 88cc2b7a8102 > commitctx: return a richer object from _prepare_files Queued the remainder, thanks. > +class ChangingFiles(object): > + """A class recording the changes made to a file by a revision > + """ > + > + def __init__( > + self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(), > + ): > + self._added = set(added) > + self._removed = set(removed) > + self._touched = set(touched) > + self._touched.update(self._added) > + self._touched.update(self._removed) > + self._p1_copies = dict(p1_copies) > + self._p2_copies = dict(p2_copies) > + > + @property > + def added(self): > + return frozenset(self._added) I generally avoid using @property function that does non-trivial work per call. Does this frozenset business really matter? > + def mark_added(self, filename): > + self._added.add(filename) > + self._touched.add(filename) > + def mark_copied_from_p1(self, source, dest): > + self._p1_copies[dest] = source It doesn't make sense that mark_added() updates both added and touched, but mark_copied() doesn't. Since ChangingFiles is basically a data object, I think sanity check can be split into a function, and called occasionally e.g. before serializing the metadata.
> On Jul 29, 2020, at 12:57, Pierre-Yves David <pierre-yves.david@ens-lyon.org> wrote: > > # HG changeset patch > # User Pierre-Yves David <pierre-yves.david@octobus.net> > # Date 1595684952 -7200 > # Sat Jul 25 15:49:12 2020 +0200 > # Node ID 88cc2b7a810243e8c101933fd99778ce772ac316 > # Parent 6b4ada6dca3ff5591c5dea32ffbe2fa22f4e5ed7 > # EXP-Topic files-change > # Available At https://foss.heptapod.net/octobus/mercurial-devel/ > # hg pull https://foss.heptapod.net/octobus/mercurial-devel/ -r 88cc2b7a8102 > commitctx: return a richer object from _prepare_files > > Instead of returning a lot of different list, we introduce a rich object that > hold all the file related information. The unique object help with data > consistency and simply functions arguments and return. > > In the rest of this series we will increase usage of this object to simplify > more code. > > diff --git a/mercurial/commit.py b/mercurial/commit.py > --- a/mercurial/commit.py > +++ b/mercurial/commit.py > @@ -64,12 +64,10 @@ def commitctx(repo, ctx, error=False, or > user = ctx.user() > > with repo.lock(), repo.transaction(b"commit") as tr: > - r = _prepare_files(tr, ctx, error=error, origctx=origctx) > - mn, files, p1copies, p2copies, filesadded, filesremoved = r > + mn, files = _prepare_files(tr, ctx, error=error, origctx=origctx) > > extra = ctx.extra().copy() > > - files = sorted(files) > if extra is not None: > for name in ( > b'p1copies', > @@ -79,16 +77,14 @@ def commitctx(repo, ctx, error=False, or > ): > extra.pop(name, None) > if repo.changelog._copiesstorage == b'extra': > - extra = _extra_with_copies( > - repo, extra, files, p1copies, p2copies, filesadded, filesremoved > - ) > + extra = _extra_with_copies(repo, extra, files) > > # update changelog > repo.ui.note(_(b"committing changelog\n")) > repo.changelog.delayupdate(tr) > n = repo.changelog.add( > mn, > - files, > + files.touched, > ctx.description(), > tr, > p1.node(), > @@ -96,10 +92,10 @@ def commitctx(repo, ctx, error=False, or > user, > ctx.date(), > extra, > - p1copies, > - p2copies, > - filesadded, > - filesremoved, > + files.copied_from_p1, > + files.copied_from_p2, > + files.added, > + files.removed, > ) > xp1, xp2 = p1.hex(), p2 and p2.hex() or b'' > repo.hook( > @@ -149,7 +145,19 @@ def _prepare_files(tr, ctx, error=False, > if origctx and origctx.manifestnode() == mn: > touched = origctx.files() > > - return mn, touched, p1copies, p2copies, filesadded, filesremoved > + files = metadata.ChangingFiles() > + if touched: > + files.update_touched(touched) > + if p1copies: > + files.update_copies_from_p1(p1copies) > + if p2copies: > + files.update_copies_from_p2(p2copies) > + if filesadded: > + files.update_added(filesadded) > + if filesremoved: > + files.update_removed(filesremoved) > + > + return mn, files > > > def _process_files(tr, ctx, error=False): > @@ -413,10 +421,13 @@ def _commit_manifest(tr, linkrev, ctx, m > return mn > > > -def _extra_with_copies( > - repo, extra, files, p1copies, p2copies, filesadded, filesremoved > -): > +def _extra_with_copies(repo, extra, files): > """encode copy information into a `extra` dictionnary""" > + p1copies = files.copied_from_p1 > + p2copies = files.copied_from_p2 > + filesadded = files.added > + filesremoved = files.removed > + files = sorted(files.touched) > if not _write_copy_meta(repo)[1]: > # If writing only to changeset extras, use None to indicate that > # no entry should be written. If writing to both, write an empty > diff --git a/mercurial/metadata.py b/mercurial/metadata.py > --- a/mercurial/metadata.py > +++ b/mercurial/metadata.py > @@ -22,6 +22,79 @@ from .revlogutils import ( > ) > > > +class ChangingFiles(object): Could much of this have been an attr.s instead? I'm not sure I'd worry too hard about frozen* types, but YMMV. > + """A class recording the changes made to a file by a revision > + """ > + > + def __init__( > + self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(), > + ): I'd like a follow-up on this change for this __init__: these should be None, not the empty-tuple. The way the function is constructed now pytype and mypy will both be _very_ confused (or wrong!) about what this function expects: they'll interpret this as "tuple of stuff" when it looks like it should mostly be "optional set or dict" of stuff? > + self._added = set(added) > + self._removed = set(removed) > + self._touched = set(touched) > + self._touched.update(self._added) > + self._touched.update(self._removed) > + self._p1_copies = dict(p1_copies) > + self._p2_copies = dict(p2_copies) > + > + @property > + def added(self): > + return frozenset(self._added) > + > + def mark_added(self, filename): > + self._added.add(filename) > + self._touched.add(filename) > + > + def update_added(self, filenames): > + for f in filenames: > + self.mark_added(f) > + > + @property > + def removed(self): > + return frozenset(self._removed) > + > + def mark_removed(self, filename): > + self._removed.add(filename) > + self._touched.add(filename) > + > + def update_removed(self, filenames): > + for f in filenames: > + self.mark_removed(f) > + > + @property > + def touched(self): > + return frozenset(self._touched) > + > + def mark_touched(self, filename): > + self._touched.add(filename) > + > + def update_touched(self, filenames): > + for f in filenames: > + self.mark_touched(f) > + > + @property > + def copied_from_p1(self): > + return self._p1_copies.copy() > + > + def mark_copied_from_p1(self, source, dest): > + self._p1_copies[dest] = source > + > + def update_copies_from_p1(self, copies): > + for dest, source in copies.items(): > + self.mark_copied_from_p1(source, dest) > + > + @property > + def copied_from_p2(self): > + return self._p2_copies.copy() > + > + def mark_copied_from_p2(self, source, dest): > + self._p2_copies[dest] = source > + > + def update_copies_from_p2(self, copies): > + for dest, source in copies.items(): > + self.mark_copied_from_p2(source, dest) > + > + > def computechangesetfilesadded(ctx): > """return the list of files added in a changeset > """ > > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
On 8/8/20 5:26 AM, Yuya Nishihara wrote: > On Wed, 29 Jul 2020 18:57:36 +0200, Pierre-Yves David wrote: >> # HG changeset patch >> # User Pierre-Yves David <pierre-yves.david@octobus.net> >> # Date 1595684952 -7200 >> # Sat Jul 25 15:49:12 2020 +0200 >> # Node ID 88cc2b7a810243e8c101933fd99778ce772ac316 >> # Parent 6b4ada6dca3ff5591c5dea32ffbe2fa22f4e5ed7 >> # EXP-Topic files-change >> # Available At https://foss.heptapod.net/octobus/mercurial-devel/ >> # hg pull https://foss.heptapod.net/octobus/mercurial-devel/ -r 88cc2b7a8102 >> commitctx: return a richer object from _prepare_files > > Queued the remainder, thanks. > >> +class ChangingFiles(object): >> + """A class recording the changes made to a file by a revision >> + """ >> + >> + def __init__( >> + self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(), >> + ): >> + self._added = set(added) >> + self._removed = set(removed) >> + self._touched = set(touched) >> + self._touched.update(self._added) >> + self._touched.update(self._removed) >> + self._p1_copies = dict(p1_copies) >> + self._p2_copies = dict(p2_copies) >> + >> + @property >> + def added(self): >> + return frozenset(self._added) > > I generally avoid using @property function that does non-trivial work per call. My plan here is to add some caching if this make a difference. But I wanted to get the logic right first. > Does this frozenset business really matter? These set get used are various layers during merge and commit and theses logic already have a lot of bugs. So I really want to avoid anyone messing with the global set by mistake. It already caught an handful of various error during this series. > >> + def mark_added(self, filename): >> + self._added.add(filename) >> + self._touched.add(filename) > >> + def mark_copied_from_p1(self, source, dest): >> + self._p1_copies[dest] = source > > It doesn't make sense that mark_added() updates both added and touched, > but mark_copied() doesn't. Strictly speaking, I don't think anyone would call copied for something that is not already touched. We can probably add this as a sanity check actually. > Since ChangingFiles is basically a data object, I think sanity check can be > split into a function, and called occasionally e.g. before serializing the > metadata. Given how many different layers will touch this object and how many bugs we already found within them. I do not really trust the code using these data to do the right thing. I've created tasks about your feedback and will send follow up when I am back from vacation in about 10 days.
On 8/10/20 5:37 PM, Augie Fackler wrote: > > >> On Jul 29, 2020, at 12:57, Pierre-Yves David <pierre-yves.david@ens-lyon.org> wrote: >> >> # HG changeset patch >> # User Pierre-Yves David <pierre-yves.david@octobus.net> >> # Date 1595684952 -7200 >> # Sat Jul 25 15:49:12 2020 +0200 >> # Node ID 88cc2b7a810243e8c101933fd99778ce772ac316 >> # Parent 6b4ada6dca3ff5591c5dea32ffbe2fa22f4e5ed7 >> # EXP-Topic files-change >> # Available At https://foss.heptapod.net/octobus/mercurial-devel/ >> # hg pull https://foss.heptapod.net/octobus/mercurial-devel/ -r 88cc2b7a8102 >> commitctx: return a richer object from _prepare_files >> >> Instead of returning a lot of different list, we introduce a rich object that >> hold all the file related information. The unique object help with data >> consistency and simply functions arguments and return. >> >> In the rest of this series we will increase usage of this object to simplify >> more code. >> >> diff --git a/mercurial/commit.py b/mercurial/commit.py >> --- a/mercurial/commit.py >> +++ b/mercurial/commit.py >> @@ -64,12 +64,10 @@ def commitctx(repo, ctx, error=False, or >> user = ctx.user() >> >> with repo.lock(), repo.transaction(b"commit") as tr: >> - r = _prepare_files(tr, ctx, error=error, origctx=origctx) >> - mn, files, p1copies, p2copies, filesadded, filesremoved = r >> + mn, files = _prepare_files(tr, ctx, error=error, origctx=origctx) >> >> extra = ctx.extra().copy() >> >> - files = sorted(files) >> if extra is not None: >> for name in ( >> b'p1copies', >> @@ -79,16 +77,14 @@ def commitctx(repo, ctx, error=False, or >> ): >> extra.pop(name, None) >> if repo.changelog._copiesstorage == b'extra': >> - extra = _extra_with_copies( >> - repo, extra, files, p1copies, p2copies, filesadded, filesremoved >> - ) >> + extra = _extra_with_copies(repo, extra, files) >> >> # update changelog >> repo.ui.note(_(b"committing changelog\n")) >> repo.changelog.delayupdate(tr) >> n = repo.changelog.add( >> mn, >> - files, >> + files.touched, >> ctx.description(), >> tr, >> p1.node(), >> @@ -96,10 +92,10 @@ def commitctx(repo, ctx, error=False, or >> user, >> ctx.date(), >> extra, >> - p1copies, >> - p2copies, >> - filesadded, >> - filesremoved, >> + files.copied_from_p1, >> + files.copied_from_p2, >> + files.added, >> + files.removed, >> ) >> xp1, xp2 = p1.hex(), p2 and p2.hex() or b'' >> repo.hook( >> @@ -149,7 +145,19 @@ def _prepare_files(tr, ctx, error=False, >> if origctx and origctx.manifestnode() == mn: >> touched = origctx.files() >> >> - return mn, touched, p1copies, p2copies, filesadded, filesremoved >> + files = metadata.ChangingFiles() >> + if touched: >> + files.update_touched(touched) >> + if p1copies: >> + files.update_copies_from_p1(p1copies) >> + if p2copies: >> + files.update_copies_from_p2(p2copies) >> + if filesadded: >> + files.update_added(filesadded) >> + if filesremoved: >> + files.update_removed(filesremoved) >> + >> + return mn, files >> >> >> def _process_files(tr, ctx, error=False): >> @@ -413,10 +421,13 @@ def _commit_manifest(tr, linkrev, ctx, m >> return mn >> >> >> -def _extra_with_copies( >> - repo, extra, files, p1copies, p2copies, filesadded, filesremoved >> -): >> +def _extra_with_copies(repo, extra, files): >> """encode copy information into a `extra` dictionnary""" >> + p1copies = files.copied_from_p1 >> + p2copies = files.copied_from_p2 >> + filesadded = files.added >> + filesremoved = files.removed >> + files = sorted(files.touched) >> if not _write_copy_meta(repo)[1]: >> # If writing only to changeset extras, use None to indicate that >> # no entry should be written. If writing to both, write an empty >> diff --git a/mercurial/metadata.py b/mercurial/metadata.py >> --- a/mercurial/metadata.py >> +++ b/mercurial/metadata.py >> @@ -22,6 +22,79 @@ from .revlogutils import ( >> ) >> >> >> +class ChangingFiles(object): > > Could much of this have been an attr.s instead? I'm not sure I'd worry too hard about frozen* types, but YMMV. > >> + """A class recording the changes made to a file by a revision >> + """ >> + >> + def __init__( >> + self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(), >> + ): > > I'd like a follow-up on this change for this __init__: these should be None, not the empty-tuple. The way the function is constructed now pytype and mypy will both be _very_ confused (or wrong!) about what this function expects: they'll interpret this as "tuple of stuff" when it looks like it should mostly be "optional set or dict" of stuff? Sure, I'll put that is a series I am about to send.
Patch
diff --git a/mercurial/commit.py b/mercurial/commit.py --- a/mercurial/commit.py +++ b/mercurial/commit.py @@ -64,12 +64,10 @@ def commitctx(repo, ctx, error=False, or user = ctx.user() with repo.lock(), repo.transaction(b"commit") as tr: - r = _prepare_files(tr, ctx, error=error, origctx=origctx) - mn, files, p1copies, p2copies, filesadded, filesremoved = r + mn, files = _prepare_files(tr, ctx, error=error, origctx=origctx) extra = ctx.extra().copy() - files = sorted(files) if extra is not None: for name in ( b'p1copies', @@ -79,16 +77,14 @@ def commitctx(repo, ctx, error=False, or ): extra.pop(name, None) if repo.changelog._copiesstorage == b'extra': - extra = _extra_with_copies( - repo, extra, files, p1copies, p2copies, filesadded, filesremoved - ) + extra = _extra_with_copies(repo, extra, files) # update changelog repo.ui.note(_(b"committing changelog\n")) repo.changelog.delayupdate(tr) n = repo.changelog.add( mn, - files, + files.touched, ctx.description(), tr, p1.node(), @@ -96,10 +92,10 @@ def commitctx(repo, ctx, error=False, or user, ctx.date(), extra, - p1copies, - p2copies, - filesadded, - filesremoved, + files.copied_from_p1, + files.copied_from_p2, + files.added, + files.removed, ) xp1, xp2 = p1.hex(), p2 and p2.hex() or b'' repo.hook( @@ -149,7 +145,19 @@ def _prepare_files(tr, ctx, error=False, if origctx and origctx.manifestnode() == mn: touched = origctx.files() - return mn, touched, p1copies, p2copies, filesadded, filesremoved + files = metadata.ChangingFiles() + if touched: + files.update_touched(touched) + if p1copies: + files.update_copies_from_p1(p1copies) + if p2copies: + files.update_copies_from_p2(p2copies) + if filesadded: + files.update_added(filesadded) + if filesremoved: + files.update_removed(filesremoved) + + return mn, files def _process_files(tr, ctx, error=False): @@ -413,10 +421,13 @@ def _commit_manifest(tr, linkrev, ctx, m return mn -def _extra_with_copies( - repo, extra, files, p1copies, p2copies, filesadded, filesremoved -): +def _extra_with_copies(repo, extra, files): """encode copy information into a `extra` dictionnary""" + p1copies = files.copied_from_p1 + p2copies = files.copied_from_p2 + filesadded = files.added + filesremoved = files.removed + files = sorted(files.touched) if not _write_copy_meta(repo)[1]: # If writing only to changeset extras, use None to indicate that # no entry should be written. If writing to both, write an empty diff --git a/mercurial/metadata.py b/mercurial/metadata.py --- a/mercurial/metadata.py +++ b/mercurial/metadata.py @@ -22,6 +22,79 @@ from .revlogutils import ( ) +class ChangingFiles(object): + """A class recording the changes made to a file by a revision + """ + + def __init__( + self, touched=(), added=(), removed=(), p1_copies=(), p2_copies=(), + ): + self._added = set(added) + self._removed = set(removed) + self._touched = set(touched) + self._touched.update(self._added) + self._touched.update(self._removed) + self._p1_copies = dict(p1_copies) + self._p2_copies = dict(p2_copies) + + @property + def added(self): + return frozenset(self._added) + + def mark_added(self, filename): + self._added.add(filename) + self._touched.add(filename) + + def update_added(self, filenames): + for f in filenames: + self.mark_added(f) + + @property + def removed(self): + return frozenset(self._removed) + + def mark_removed(self, filename): + self._removed.add(filename) + self._touched.add(filename) + + def update_removed(self, filenames): + for f in filenames: + self.mark_removed(f) + + @property + def touched(self): + return frozenset(self._touched) + + def mark_touched(self, filename): + self._touched.add(filename) + + def update_touched(self, filenames): + for f in filenames: + self.mark_touched(f) + + @property + def copied_from_p1(self): + return self._p1_copies.copy() + + def mark_copied_from_p1(self, source, dest): + self._p1_copies[dest] = source + + def update_copies_from_p1(self, copies): + for dest, source in copies.items(): + self.mark_copied_from_p1(source, dest) + + @property + def copied_from_p2(self): + return self._p2_copies.copy() + + def mark_copied_from_p2(self, source, dest): + self._p2_copies[dest] = source + + def update_copies_from_p2(self, copies): + for dest, source in copies.items(): + self.mark_copied_from_p2(source, dest) + + def computechangesetfilesadded(ctx): """return the list of files added in a changeset """