Submitter | Mateusz Kwapich |
---|---|
Date | Nov. 21, 2016, 4:13 p.m. |
Message ID | <4af70f21264ac8e52d9b.1479744812@devvm314.lla2.facebook.com> |
Download | mbox | patch |
Permalink | /patch/17668/ |
State | Accepted |
Headers | show |
Comments
On Mon, Nov 21, 2016 at 08:13:32AM -0800, Mateusz Kwapich wrote: > # HG changeset patch > # User Mateusz Kwapich <mitrandir@fb.com> > # Date 1479744581 28800 > # Mon Nov 21 08:09:41 2016 -0800 > # Node ID 4af70f21264ac8e52d9b218080bbc96ee5505606 > # Parent 4a0824bead3ba5980bd8528937fba5f7bb31ba9f > memctx: allow the memlightctx thats reusing the manifest node > > When we have a lot of files writing a new manifest revision can be expensive. > This commit adds a possibility for memctx to reuse a manifest from a different > commit. This can be beneficial for commands that are creating metadata changes > without any actual files changed like "hg metaedit" in evolve extension. > > I will send the change for evolve that leverages this once this is accepted. > > diff --git a/mercurial/context.py b/mercurial/context.py > --- a/mercurial/context.py > +++ b/mercurial/context.py > @@ -1975,3 +1975,101 @@ class memfilectx(committablefilectx): > def write(self, data, flags): > """wraps repo.wwrite""" > self._data = data > + > +class memlightctx(committablectx): Could this instead be called "metadataonlyctx"? No need to do a resend, if you're happy with my bikeshed color I can fix it in flight. Also, could histedit be updated to use this for 'mess' actions perhaps? Probably not easy, but if it is I'd love to see an in-tree client of this class. Can you take a look? > + """Like memctx but it's reusing the manifest of different commit. > + Intended to be used by lightweight operations that are creating > + metadata-only changes. > + > + Revision information is supplied at initialization time. 'repo' is the > + current localrepo, 'ctx' is original revision which manifest we're reuisng > + 'parents' is a sequence of two parent revisions identifiers (pass None for > + every missing parent), 'text' is the commit. > + > + user receives the committer name and defaults to current repository > + username, date is the commit date in any format supported by > + util.parsedate() and defaults to current date, extra is a dictionary of > + metadata or is left empty. > + """ > + def __new__(cls, repo, path, *args, **kwargs): > + return super(memlightctx, cls).__new__(cls, repo) > + > + def __init__(self, repo, originalctx, parents, text, user=None, date=None, > + extra=None, editor=False): > + super(memlightctx, self).__init__(repo, text, user, date, extra) > + self._rev = None > + self._node = None > + self._originalctx = originalctx > + self._manifestnode = originalctx.manifestnode() > + parents = [(p or nullid) for p in parents] > + p1, p2 = self._parents = [changectx(self._repo, p) for p in parents] > + > + # sanity check to ensure that the reused manifest parents are > + # manifests of our commit parents > + mp1, mp2 = self.manifestctx().parents > + if p1 != nullid and p1.manifestctx().node() != mp1: > + raise RuntimeError('can\'t reuse the manifest: ' > + 'its p1 doesn\'t match the new ctx p1') > + if p2 != nullid and p2.manifestctx().node() != mp2: > + raise RuntimeError('can\'t reuse the manifest: ' > + 'its p2 doesn\'t match the new ctx p2') > + > + self._files = originalctx.files() > + self.substate = {} > + > + if extra: > + self._extra = extra.copy() > + else: > + self._extra = {} > + > + if self._extra.get('branch', '') == '': > + self._extra['branch'] = 'default' > + > + if editor: > + self._text = editor(self._repo, self, []) > + self._repo.savecommitmessage(self._text) > + > + def manifestnode(self): > + return self._manifestnode > + > + @propertycache > + def _manifestctx(self): > + return self._repo.manifestlog[self._manifestnode] > + > + def filectx(self, path, filelog=None): > + return self._originalctx.filectx(path, filelog=filelog) > + > + def commit(self): > + """commit context to the repo""" > + return self._repo.commitctx(self) > + > + @property > + def _manifest(self): > + return self._originalctx.manifest() > + > + @propertycache > + def _status(self): > + """Calculate exact status from ``files`` specified in the ``origctx`` > + and parents manifests. > + """ > + man1 = self.p1().manifest() > + p2 = self._parents[1] > + # "1 < len(self._parents)" can't be used for checking > + # existence of the 2nd parent, because "memlightctx._parents" is > + # explicitly initialized by the list, of which length is 2. > + if p2.node() != nullid: > + man2 = p2.manifest() > + managing = lambda f: f in man1 or f in man2 > + else: > + managing = lambda f: f in man1 > + > + modified, added, removed = [], [], [] > + for f in self._files: > + if not managing(f): > + added.append(f) > + elif self[f]: > + modified.append(f) > + else: > + removed.append(f) > + > + return scmutil.status(modified, added, removed, [], [], [], []) > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Excerpts from Augie Fackler's message of 2016-11-21 18:26:16 -0500: > On Mon, Nov 21, 2016 at 08:13:32AM -0800, Mateusz Kwapich wrote: > > # HG changeset patch > > # User Mateusz Kwapich <mitrandir@fb.com> > > # Date 1479744581 28800 > > # Mon Nov 21 08:09:41 2016 -0800 > > # Node ID 4af70f21264ac8e52d9b218080bbc96ee5505606 > > # Parent 4a0824bead3ba5980bd8528937fba5f7bb31ba9f > > memctx: allow the memlightctx thats reusing the manifest node > > > > When we have a lot of files writing a new manifest revision can be expensive. > > This commit adds a possibility for memctx to reuse a manifest from a different > > commit. This can be beneficial for commands that are creating metadata changes > > without any actual files changed like "hg metaedit" in evolve extension. > > > > I will send the change for evolve that leverages this once this is accepted. > > > > diff --git a/mercurial/context.py b/mercurial/context.py > > --- a/mercurial/context.py > > +++ b/mercurial/context.py > > @@ -1975,3 +1975,101 @@ class memfilectx(committablefilectx): > > def write(self, data, flags): > > """wraps repo.wwrite""" > > self._data = data > > + > > +class memlightctx(committablectx): > > Could this instead be called "metadataonlyctx"? No need to do a > resend, if you're happy with my bikeshed color I can fix it in flight. > > Also, could histedit be updated to use this for 'mess' actions > perhaps? Probably not easy, but if it is I'd love to see an in-tree > client of this class. Can you take a look? sure, feel free to rename it. I can look into hooking this up into the histedit. It won't be helpful in general case of message editing but it can be used when the previous actions were all "pick" or "mess" and it will be much faster in that case. > > > + """Like memctx but it's reusing the manifest of different commit. > > + Intended to be used by lightweight operations that are creating > > + metadata-only changes. > > + > > + Revision information is supplied at initialization time. 'repo' is the > > + current localrepo, 'ctx' is original revision which manifest we're reuisng > > + 'parents' is a sequence of two parent revisions identifiers (pass None for > > + every missing parent), 'text' is the commit. > > + > > + user receives the committer name and defaults to current repository > > + username, date is the commit date in any format supported by > > + util.parsedate() and defaults to current date, extra is a dictionary of > > + metadata or is left empty. > > + """ > > + def __new__(cls, repo, path, *args, **kwargs): > > + return super(memlightctx, cls).__new__(cls, repo) > > + > > + def __init__(self, repo, originalctx, parents, text, user=None, date=None, > > + extra=None, editor=False): > > + super(memlightctx, self).__init__(repo, text, user, date, extra) > > + self._rev = None > > + self._node = None > > + self._originalctx = originalctx > > + self._manifestnode = originalctx.manifestnode() > > + parents = [(p or nullid) for p in parents] > > + p1, p2 = self._parents = [changectx(self._repo, p) for p in parents] > > + > > + # sanity check to ensure that the reused manifest parents are > > + # manifests of our commit parents > > + mp1, mp2 = self.manifestctx().parents > > + if p1 != nullid and p1.manifestctx().node() != mp1: > > + raise RuntimeError('can\'t reuse the manifest: ' > > + 'its p1 doesn\'t match the new ctx p1') > > + if p2 != nullid and p2.manifestctx().node() != mp2: > > + raise RuntimeError('can\'t reuse the manifest: ' > > + 'its p2 doesn\'t match the new ctx p2') > > + > > + self._files = originalctx.files() > > + self.substate = {} > > + > > + if extra: > > + self._extra = extra.copy() > > + else: > > + self._extra = {} > > + > > + if self._extra.get('branch', '') == '': > > + self._extra['branch'] = 'default' > > + > > + if editor: > > + self._text = editor(self._repo, self, []) > > + self._repo.savecommitmessage(self._text) > > + > > + def manifestnode(self): > > + return self._manifestnode > > + > > + @propertycache > > + def _manifestctx(self): > > + return self._repo.manifestlog[self._manifestnode] > > + > > + def filectx(self, path, filelog=None): > > + return self._originalctx.filectx(path, filelog=filelog) > > + > > + def commit(self): > > + """commit context to the repo""" > > + return self._repo.commitctx(self) > > + > > + @property > > + def _manifest(self): > > + return self._originalctx.manifest() > > + > > + @propertycache > > + def _status(self): > > + """Calculate exact status from ``files`` specified in the ``origctx`` > > + and parents manifests. > > + """ > > + man1 = self.p1().manifest() > > + p2 = self._parents[1] > > + # "1 < len(self._parents)" can't be used for checking > > + # existence of the 2nd parent, because "memlightctx._parents" is > > + # explicitly initialized by the list, of which length is 2. > > + if p2.node() != nullid: > > + man2 = p2.manifest() > > + managing = lambda f: f in man1 or f in man2 > > + else: > > + managing = lambda f: f in man1 > > + > > + modified, added, removed = [], [], [] > > + for f in self._files: > > + if not managing(f): > > + added.append(f) > > + elif self[f]: > > + modified.append(f) > > + else: > > + removed.append(f) > > + > > + return scmutil.status(modified, added, removed, [], [], [], []) > > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel Best, Mateusz
Excerpts from Augie Fackler's message of 2016-11-21 18:26:16 -0500: > Also, could histedit be updated to use this for 'mess' actions > perhaps? Probably not easy, but if it is I'd love to see an in-tree > client of this class. Can you take a look? It does not work if "mess" happens after other csets creating new manifests. Instead of making it a special for histedit. I'm always more interested in a general purpose lower-level function doing some kind of "rebase" in core, that smartly deals with this case and handles bookmark and dirstate parent movements (and obsmarker creation) automatically. There are currently multiple places reinventing the logic over and over: - absorb / collate - rebase - histedit - evolve And once we have this API in core, we can migrate the above commands to use it. I'm thinking about something like: relocate(origctx, newparents, fileoverrides, metaoverrides, obsoleted, transaction) # origctx: context # newparents: [node] # fileoverrides: abstract dict, keys: path, value: (content, mode, renamed) # metaoverrides: dict with defined set of keys. to override metadata like # commit message etc. # obsoleted: bool, could also be a non-bool to provide more metadata Then relocate will choose what *ctx to use internally. For absorb, metaoverrides is empty, for metaedit, fileoverrides is empty. For rebase, both are empty. For histedit, depends. Two things I'd like to make sure they are considered while developing the API: 1. Giant files friendly - if we need to commit two giant files, we don't need to keep both of them in memory. "fileoverrides" being an abstract dict should solve this. 2. Commit hooks friendly - if any commit hook is requiring a working copy, fallback to the slow path that writs the disk. (this could be the most complex part of the implementation, but I guess crecord-alike already has similar things). We also need to have ways to mark a commit hook as "do not require on-disk working copy" - could be a config option, or checking some magic string in the hook itself. If we decide to go this way, I could help start some more formal discussion (a plan page or so) - hopefully we can collect enough corner cases that need support, have a final decision about the API soon. CC smf because it could be related to your memctx work.
I like the relocate idea. It would be an useful abstractions - we have a lot of things that are rewriting one commit at a time. Excerpts from Jun Wu's message of 2016-11-22 22:18:59 +0000: > Excerpts from Augie Fackler's message of 2016-11-21 18:26:16 -0500: > > Also, could histedit be updated to use this for 'mess' actions > > perhaps? Probably not easy, but if it is I'd love to see an in-tree > > client of this class. Can you take a look? > > It does not work if "mess" happens after other csets creating new manifests. > > Instead of making it a special for histedit. I'm always more interested in > a general purpose lower-level function doing some kind of "rebase" in core, > that smartly deals with this case and handles bookmark and dirstate parent > movements (and obsmarker creation) automatically. > > There are currently multiple places reinventing the logic over and over: > - absorb / collate > - rebase > - histedit > - evolve > > And once we have this API in core, we can migrate the above commands to use > it. > > I'm thinking about something like: > > relocate(origctx, newparents, fileoverrides, metaoverrides, obsoleted, > transaction) > # origctx: context > # newparents: [node] > # fileoverrides: abstract dict, keys: path, value: (content, mode, renamed) > # metaoverrides: dict with defined set of keys. to override metadata like > # commit message etc. > # obsoleted: bool, could also be a non-bool to provide more metadata > > Then relocate will choose what *ctx to use internally. For absorb, > metaoverrides is empty, for metaedit, fileoverrides is empty. For rebase, > both are empty. For histedit, depends. > > Two things I'd like to make sure they are considered while developing the > API: > > 1. Giant files friendly - if we need to commit two giant files, we don't > need to keep both of them in memory. "fileoverrides" being an abstract > dict should solve this. > 2. Commit hooks friendly - if any commit hook is requiring a working copy, > fallback to the slow path that writs the disk. (this could be the most > complex part of the implementation, but I guess crecord-alike already > has similar things). We also need to have ways to mark a commit hook as > "do not require on-disk working copy" - could be a config option, or > checking some magic string in the hook itself. > > If we decide to go this way, I could help start some more formal discussion > (a plan page or so) - hopefully we can collect enough corner cases that > need support, have a final decision about the API soon. > > CC smf because it could be related to your memctx work.
On Tue, Nov 22, 2016 at 2:18 PM, Jun Wu <quark@fb.com> wrote: > Excerpts from Augie Fackler's message of 2016-11-21 18:26:16 -0500: > > Also, could histedit be updated to use this for 'mess' actions > > perhaps? Probably not easy, but if it is I'd love to see an in-tree > > client of this class. Can you take a look? > > It does not work if "mess" happens after other csets creating new > manifests. > > Instead of making it a special for histedit. I'm always more interested in > a general purpose lower-level function doing some kind of "rebase" in core, > that smartly deals with this case and handles bookmark and dirstate parent > movements (and obsmarker creation) automatically. > > There are currently multiple places reinventing the logic over and over: > - absorb / collate > - rebase > - histedit > - evolve > > And once we have this API in core, we can migrate the above commands to use > it. > > This is a good idea. And it has been discussed before! Often in the context of https://www.mercurial-scm.org/wiki/PlanHistoryRewritePlan. I once showed Pierre-Yves some code ( https://hg.mozilla.org/hgcustom/version-control-tools/file/6298a2195598/pylib/mozhg/mozhg/rewrite.py) Mozilla wrote to do generic changeset rewriting. He was receptive to introducing a rewrite.py or rewriting.py in the core project and consolidating low-level history rewriting code there. Whether Mozilla's code could be used - probably not, as it was designed for a very narrow focus. > I'm thinking about something like: > > relocate(origctx, newparents, fileoverrides, metaoverrides, obsoleted, > transaction) > # origctx: context > # newparents: [node] > # fileoverrides: abstract dict, keys: path, value: (content, mode, > renamed) > # metaoverrides: dict with defined set of keys. to override metadata like > # commit message etc. > # obsoleted: bool, could also be a non-bool to provide more metadata > > Then relocate will choose what *ctx to use internally. For absorb, > metaoverrides is empty, for metaedit, fileoverrides is empty. For rebase, > both are empty. For histedit, depends. > > Two things I'd like to make sure they are considered while developing the > API: > > 1. Giant files friendly - if we need to commit two giant files, we don't > need to keep both of them in memory. "fileoverrides" being an abstract > dict should solve this. > 2. Commit hooks friendly - if any commit hook is requiring a working > copy, > fallback to the slow path that writs the disk. (this could be the most > complex part of the implementation, but I guess crecord-alike already > has similar things). We also need to have ways to mark a commit hook > as > "do not require on-disk working copy" - could be a config option, or > checking some magic string in the hook itself. > > If we decide to go this way, I could help start some more formal discussion > (a plan page or so) - hopefully we can collect enough corner cases that > need support, have a final decision about the API soon. > > CC smf because it could be related to your memctx work. > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel >
On Fri, Nov 25, 2016 at 12:15:57PM +0000, Mateusz Kwapich wrote: > I like the relocate idea. It would be an useful abstractions - we have a > lot of things that are rewriting one commit at a time. These patches are queued with the rename of the class performed. Thanks! > > Excerpts from Jun Wu's message of 2016-11-22 22:18:59 +0000: > > Excerpts from Augie Fackler's message of 2016-11-21 18:26:16 -0500: > > > Also, could histedit be updated to use this for 'mess' actions > > > perhaps? Probably not easy, but if it is I'd love to see an in-tree > > > client of this class. Can you take a look? > > > > It does not work if "mess" happens after other csets creating new manifests. > > > > Instead of making it a special for histedit. I'm always more interested in > > a general purpose lower-level function doing some kind of "rebase" in core, > > that smartly deals with this case and handles bookmark and dirstate parent > > movements (and obsmarker creation) automatically. > > > > There are currently multiple places reinventing the logic over and over: > > - absorb / collate > > - rebase > > - histedit > > - evolve > > > > And once we have this API in core, we can migrate the above commands to use > > it. > > > > I'm thinking about something like: > > > > relocate(origctx, newparents, fileoverrides, metaoverrides, obsoleted, > > transaction) > > # origctx: context > > # newparents: [node] > > # fileoverrides: abstract dict, keys: path, value: (content, mode, renamed) > > # metaoverrides: dict with defined set of keys. to override metadata like > > # commit message etc. > > # obsoleted: bool, could also be a non-bool to provide more metadata > > > > Then relocate will choose what *ctx to use internally. For absorb, > > metaoverrides is empty, for metaedit, fileoverrides is empty. For rebase, > > both are empty. For histedit, depends. > > > > Two things I'd like to make sure they are considered while developing the > > API: > > > > 1. Giant files friendly - if we need to commit two giant files, we don't > > need to keep both of them in memory. "fileoverrides" being an abstract > > dict should solve this. > > 2. Commit hooks friendly - if any commit hook is requiring a working copy, > > fallback to the slow path that writs the disk. (this could be the most > > complex part of the implementation, but I guess crecord-alike already > > has similar things). We also need to have ways to mark a commit hook as > > "do not require on-disk working copy" - could be a config option, or > > checking some magic string in the hook itself. > > > > If we decide to go this way, I could help start some more formal discussion > > (a plan page or so) - hopefully we can collect enough corner cases that > > need support, have a final decision about the API soon. > > > > CC smf because it could be related to your memctx work. > > --
On Mon, 21 Nov 2016 08:13:32 -0800, Mateusz Kwapich wrote: > # HG changeset patch > # User Mateusz Kwapich <mitrandir@fb.com> > # Date 1479744581 28800 > # Mon Nov 21 08:09:41 2016 -0800 > # Node ID 4af70f21264ac8e52d9b218080bbc96ee5505606 > # Parent 4a0824bead3ba5980bd8528937fba5f7bb31ba9f > memctx: allow the memlightctx thats reusing the manifest node > --- a/mercurial/context.py > +++ b/mercurial/context.py > @@ -1975,3 +1975,101 @@ class memfilectx(committablefilectx): > def write(self, data, flags): > """wraps repo.wwrite""" > self._data = data > + > +class memlightctx(committablectx): > + """Like memctx but it's reusing the manifest of different commit. > + Intended to be used by lightweight operations that are creating > + metadata-only changes. > + > + Revision information is supplied at initialization time. 'repo' is the > + current localrepo, 'ctx' is original revision which manifest we're reuisng > + 'parents' is a sequence of two parent revisions identifiers (pass None for > + every missing parent), 'text' is the commit. > + > + user receives the committer name and defaults to current repository > + username, date is the commit date in any format supported by > + util.parsedate() and defaults to current date, extra is a dictionary of > + metadata or is left empty. > + """ > + def __new__(cls, repo, path, *args, **kwargs): > + return super(memlightctx, cls).__new__(cls, repo) Nit: the argument name 'path' is misleading since it should actually be an 'originalctx'. Maybe that can be fixed by a follow-up patch. > + > + def __init__(self, repo, originalctx, parents, text, user=None, date=None, > + extra=None, editor=False):
Patch
diff --git a/mercurial/context.py b/mercurial/context.py --- a/mercurial/context.py +++ b/mercurial/context.py @@ -1975,3 +1975,101 @@ class memfilectx(committablefilectx): def write(self, data, flags): """wraps repo.wwrite""" self._data = data + +class memlightctx(committablectx): + """Like memctx but it's reusing the manifest of different commit. + Intended to be used by lightweight operations that are creating + metadata-only changes. + + Revision information is supplied at initialization time. 'repo' is the + current localrepo, 'ctx' is original revision which manifest we're reuisng + 'parents' is a sequence of two parent revisions identifiers (pass None for + every missing parent), 'text' is the commit. + + user receives the committer name and defaults to current repository + username, date is the commit date in any format supported by + util.parsedate() and defaults to current date, extra is a dictionary of + metadata or is left empty. + """ + def __new__(cls, repo, path, *args, **kwargs): + return super(memlightctx, cls).__new__(cls, repo) + + def __init__(self, repo, originalctx, parents, text, user=None, date=None, + extra=None, editor=False): + super(memlightctx, self).__init__(repo, text, user, date, extra) + self._rev = None + self._node = None + self._originalctx = originalctx + self._manifestnode = originalctx.manifestnode() + parents = [(p or nullid) for p in parents] + p1, p2 = self._parents = [changectx(self._repo, p) for p in parents] + + # sanity check to ensure that the reused manifest parents are + # manifests of our commit parents + mp1, mp2 = self.manifestctx().parents + if p1 != nullid and p1.manifestctx().node() != mp1: + raise RuntimeError('can\'t reuse the manifest: ' + 'its p1 doesn\'t match the new ctx p1') + if p2 != nullid and p2.manifestctx().node() != mp2: + raise RuntimeError('can\'t reuse the manifest: ' + 'its p2 doesn\'t match the new ctx p2') + + self._files = originalctx.files() + self.substate = {} + + if extra: + self._extra = extra.copy() + else: + self._extra = {} + + if self._extra.get('branch', '') == '': + self._extra['branch'] = 'default' + + if editor: + self._text = editor(self._repo, self, []) + self._repo.savecommitmessage(self._text) + + def manifestnode(self): + return self._manifestnode + + @propertycache + def _manifestctx(self): + return self._repo.manifestlog[self._manifestnode] + + def filectx(self, path, filelog=None): + return self._originalctx.filectx(path, filelog=filelog) + + def commit(self): + """commit context to the repo""" + return self._repo.commitctx(self) + + @property + def _manifest(self): + return self._originalctx.manifest() + + @propertycache + def _status(self): + """Calculate exact status from ``files`` specified in the ``origctx`` + and parents manifests. + """ + man1 = self.p1().manifest() + p2 = self._parents[1] + # "1 < len(self._parents)" can't be used for checking + # existence of the 2nd parent, because "memlightctx._parents" is + # explicitly initialized by the list, of which length is 2. + if p2.node() != nullid: + man2 = p2.manifest() + managing = lambda f: f in man1 or f in man2 + else: + managing = lambda f: f in man1 + + modified, added, removed = [], [], [] + for f in self._files: + if not managing(f): + added.append(f) + elif self[f]: + modified.append(f) + else: + removed.append(f) + + return scmutil.status(modified, added, removed, [], [], [], [])