Submitter | Katsunori FUJIWARA |
---|---|
Date | June 28, 2014, 3:11 p.m. |
Message ID | <cb8567d0c76d7c0e66d2.1403968281@feefifofum> |
Download | mbox | patch |
Permalink | /patch/5077/ |
State | Superseded |
Commit | 3420346174b1cff51e6af2cffaa2756c68882dd8 |
Headers | show |
Comments
At Sun, 29 Jun 2014 00:11:21 +0900, FUJIWARA Katsunori wrote: > > # HG changeset patch > # User FUJIWARA Katsunori <foozy@lares.dti.ne.jp> > # Date 1403966784 -32400 > # Sat Jun 28 23:46:24 2014 +0900 > # Branch stable > # Node ID cb8567d0c76d7c0e66d2fe32607acc8621d2389f > # Parent a4b67bf1f0a5051736c14d9c13fae50fd5f5e464 > convert: detect removal of ".gitmodules" at git source revisions correctly > > Before this patch, all operations applied on ".gitmodules" at git > source revisions are treated as modification, even if they are > actually removal of it. > > If removal of ".gitmodules" is treated as modification unexpectedly, > "hg convert" is aborted by the exception raised in > "retrievegitmodules()" for ".gitmodules" at the git source revision > removing it, because that revision doesn't have any information of > ".gitmodules". For example, recent git repository of Phabricator contains such revision, and causes failure of "hg convert". > This patch detects removal of ".gitmodules" at git source revisions > correctly. > > If ".gitmodules" is removed at the git source revision, this patch > records "hex(nullid)" as the contents hash value for ".hgsub" and > ".hgsubstate" at the destination revision. > > This patch makes "getfile()" raise IOError also for ".hgstatus" and > ".hgsubstate" if the contents hash value is "hex(nullid)", and this > tells removal of ".hgstatus" and ".hgsubstate" at the destination > revision to "localrepository.commitctx()" correctly. > > For files other than ".hgstatus" and ".hgsubstate", checking the > contents hash value in "getfile()" may be redundant, because > "catfile()" for them also does so. > > But this patch chooses writing it only once at the beginning of > "getfile()", to avoid writing same code twice both for ".hgsub" and > ".hgsubstate" separately. > > diff --git a/hgext/convert/git.py b/hgext/convert/git.py > --- a/hgext/convert/git.py > +++ b/hgext/convert/git.py > @@ -104,6 +104,8 @@ > return data > > def getfile(self, name, rev): > + if rev == hex(nullid): > + raise IOError > if name == '.hgsub': > data = '\n'.join([m.hgsub() for m in self.submoditer()]) > mode = '' > @@ -155,6 +157,7 @@ > seen = set() > entry = None > subexists = False > + subdeleted = False > for l in fh.read().split('\x00'): > if not entry: > if not l.startswith(':'): > @@ -171,7 +174,11 @@ > > if f == '.gitmodules': > subexists = True > - changes.append(('.hgsub', '')) > + if entry[4] == 'D': > + subdeleted = True > + changes.append(('.hgsub', hex(nullid))) > + else: > + changes.append(('.hgsub', '')) > elif entry[1] == '160000' or entry[0] == ':160000': > subexists = True > else: > @@ -182,8 +189,11 @@ > raise util.Abort(_('cannot read changes in %s') % version) > > if subexists: > - self.retrievegitmodules(version) > - changes.append(('.hgsubstate', '')) > + if subdeleted: > + changes.append(('.hgsubstate', hex(nullid))) > + else: > + self.retrievegitmodules(version) > + changes.append(('.hgsubstate', '')) > return (changes, {}) > > def getcommit(self, version): > diff --git a/tests/test-convert-git.t b/tests/test-convert-git.t > --- a/tests/test-convert-git.t > +++ b/tests/test-convert-git.t > @@ -363,6 +363,23 @@ > > $ cd ../.. > > +convert the revision removing '.gitmodules' itself (and related > +submodules) > + > + $ cd git-repo6 > + $ git rm .gitmodules > + rm '.gitmodules' > + $ git rm --cached git-repo5 > + rm 'git-repo5' > + $ commit -a -m 'remove .gitmodules and submodule git-repo5' > + $ cd .. > + > + $ hg convert -q git-repo6 git-repo6-hg > + $ hg -R git-repo6-hg tip -T "{desc|firstline}\n" > + remove .gitmodules and submodule git-repo5 > + $ hg -R git-repo6-hg tip -T "{file_dels}\n" > + .hgsub .hgsubstate > + > damaged git repository tests: > In case the hard-coded hashes change, the following commands can be used to > list the hashes and their corresponding types in the repository: > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel > ---------------------------------------------------------------------- [FUJIWARA Katsunori] foozy@lares.dti.ne.jp
On Sun, 2014-06-29 at 00:11 +0900, FUJIWARA Katsunori wrote: > # HG changeset patch > # User FUJIWARA Katsunori <foozy@lares.dti.ne.jp> > # Date 1403966784 -32400 > # Sat Jun 28 23:46:24 2014 +0900 > # Branch stable > # Node ID cb8567d0c76d7c0e66d2fe32607acc8621d2389f > # Parent a4b67bf1f0a5051736c14d9c13fae50fd5f5e464 > convert: detect removal of ".gitmodules" at git source revisions correctly Queued for stable, thanks.
On Tue, 2014-07-01 at 15:47 -0500, Matt Mackall wrote: > On Sun, 2014-06-29 at 00:11 +0900, FUJIWARA Katsunori wrote: > > # HG changeset patch > > # User FUJIWARA Katsunori <foozy@lares.dti.ne.jp> > > # Date 1403966784 -32400 > > # Sat Jun 28 23:46:24 2014 +0900 > > # Branch stable > > # Node ID cb8567d0c76d7c0e66d2fe32607acc8621d2389f > > # Parent a4b67bf1f0a5051736c14d9c13fae50fd5f5e464 > > convert: detect removal of ".gitmodules" at git source revisions correctly > > Queued for stable, thanks. Breaks tests: $ hg convert -q git-repo6 git-repo6-hg + transaction abort! + rollback completed + Traceback (most recent call last): + File "/dev/shm/hgtests.uBEOuH/install/bin/hg", line 43, in <module> + mercurial.dispatch.run() + File "/dev/shm/hgtests.uBEOuH/install/lib/python/mercurial/dispatch.py", line 28, in run + sys.exit((dispatch(request(sys.argv[1:])) or 0) & 255) + File "/dev/shm/hgtests.uBEOuH/install/lib/python/mercurial/dispatch.py", line 69, in dispatch + ret = _runcatch(req) + File "/dev/shm/hgtests.uBEOuH/install/lib/python/mercurial/dispatch.py", line 228, in _runcatch + elif util.safehasattr(inst, "args") and inst.args[0] == errno.EPIPE: + IndexError: tuple index out of range + [1] $ hg -R git-repo6-hg tip -T "{desc|firstline}\n" - remove .gitmodules and submodule git-repo5 + addsubmodule $ hg -R git-repo6-hg tip -T "{file_dels}\n" - .hgsub .hgsubstate +
At Tue, 01 Jul 2014 18:46:08 -0500, Matt Mackall wrote: > > On Tue, 2014-07-01 at 15:47 -0500, Matt Mackall wrote: > > On Sun, 2014-06-29 at 00:11 +0900, FUJIWARA Katsunori wrote: > > > # HG changeset patch > > > # User FUJIWARA Katsunori <foozy@lares.dti.ne.jp> > > > # Date 1403966784 -32400 > > > # Sat Jun 28 23:46:24 2014 +0900 > > > # Branch stable > > > # Node ID cb8567d0c76d7c0e66d2fe32607acc8621d2389f > > > # Parent a4b67bf1f0a5051736c14d9c13fae50fd5f5e464 > > > convert: detect removal of ".gitmodules" at git source revisions correctly > > > > Queued for stable, thanks. > > Breaks tests: > > $ hg convert -q git-repo6 git-repo6-hg > + transaction abort! > + rollback completed > + Traceback (most recent call last): > + File "/dev/shm/hgtests.uBEOuH/install/bin/hg", line 43, in <module> > + mercurial.dispatch.run() > + File "/dev/shm/hgtests.uBEOuH/install/lib/python/mercurial/dispatch.py", line 28, in run > + sys.exit((dispatch(request(sys.argv[1:])) or 0) & 255) > + File "/dev/shm/hgtests.uBEOuH/install/lib/python/mercurial/dispatch.py", line 69, in dispatch > + ret = _runcatch(req) > + File "/dev/shm/hgtests.uBEOuH/install/lib/python/mercurial/dispatch.py", line 228, in _runcatch > + elif util.safehasattr(inst, "args") and inst.args[0] == errno.EPIPE: > + IndexError: tuple index out of range > + [1] > $ hg -R git-repo6-hg tip -T "{desc|firstline}\n" > - remove .gitmodules and submodule git-repo5 > + addsubmodule > $ hg -R git-repo6-hg tip -T "{file_dels}\n" > - .hgsub .hgsubstate > + "IndexError: tuple index out of range" itself seems to occurs, because IOError raised in "getfile()" in "hgext/convert/git.py" has empty "args". With a little monkey patching, I could get actual back trace of this unexpected failure. (BTW, this is a failure on "default" branch, isn't it ?) + File "/tmp/hgtests.Ac881r/install/lib/python/hgext/convert/hg.py", line 178, in putcommit + self.repo.commitctx(ctx) + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/localrepo.py", line 63, in wrapper + return orig(repo.unfiltered(), *args, **kwargs) + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/localrepo.py", line 1430, in commitctx + targetphase = subrepo.newcommitphase(self.ui, ctx) + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/subrepo.py", line 349, in newcommitphase + substate = getattr(ctx, "substate", None) + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/util.py", line 287, in __get__ + result = self.func(obj) + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/context.py", line 136, in substate + return subrepo.state(self, self._repo.ui) + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/subrepo.py", line 86, in state + read('.hgsub') + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/subrepo.py", line 74, in read + data = ctx[f].data() + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/context.py", line 1594, in __getitem__ + return self.filectx(key) + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/context.py", line 1636, in filectx + return self._filectxfn(self._repo, self, path) + File "/tmp/hgtests.Ac881r/install/lib/python/hgext/convert/hg.py", line 136, in getfilectx + data, mode = source.getfile(f, v) + File "/tmp/hgtests.Ac881r/install/lib/python/hgext/convert/convcmd.py", line 88, in getfile + return self.source.getfile(file, rev) + File "/tmp/hgtests.Ac881r/install/lib/python/hgext/convert/git.py", line 137, in getfile + raise IOError + IOError According to my "hg bisect", the revision below is the first revision, which causes this failure with this series. changeset: 21665:d2743be1bb06 user: Sean Farley <sean.michael.farley@gmail.com> date: Thu Aug 15 15:00:03 2013 -0500 summary: memctx: inherit from committablectx Before Sean's work above, "subrepo.newcommitphase()" returns immediately, because "memctx" (derived from "object") doesn't have "substate" property. ================ def newcommitphase(ui, ctx): commitphase = phases.newcommitphase(ui) substate = getattr(ctx, "substate", None) if not substate: return commitphase ================ In the other hand, after Sean's work, "memctxt" has "substate" inherited from "basectx" via "committablectx", and causes invocation of "subrepo.state" via "basectx.substate". After that, "'.hgsub' in ctx" examination on "memctx" in "subrepo.state" fails to detect removal of ".hgsub", because: - "f in ctx" invokes "f in self._manifest" via "__contains__" of "basectx" - "_manifest" of "committablectx" can detect removal of files, only if "self._status" contains also removed files correctly, but - "memctx" contains only modified (status[0]) files (removal of files are only recognized in "localrepository.commitctx" layer) Finally, "subrepo.state()" tries to get already removed ".hgsub" and causes unexpected failure. Then, what should we do to fix this problem ? 1. make "memctx" take new "removed files" argument this is not reasonable (at least for stable), because this requires many changes around "memctx" construction. 2. make "localrepository.commitctx" put removed files into "memctx" "detecting additional removal of files" in "commitctx" means that the specified "ctx" should be "memctx", because other than "memctx" contain removed files in "ctx.removed()" (= "self._status[2]"). ================ removed = list(ctx.removed()) : : try: fctx = ctx[f] : : except IOError, inst: errcode = getattr(inst, 'errno', errno.ENOENT) if error or errcode and errcode != errno.ENOENT: self.ui.warn(_("trouble committing %s!\n") % f) raise else: removed.append(f) # ctx should be "memctx" in this case : : # add code path below .... if not ctx.removed() and removed: # or using "isinstance()" ???? ctx.setremoved(removed) # or "ctx._status[2] = removed" ???? ================ this is simple, but not smart. 3. examine "'.hgsub' in ctx" with try/except for IOError, before getting "substate" from ctx, in "newcommitphase" this is simple, but not smart. 4. and so on (any other good solutions ?) ---------------------------------------------------------------------- [FUJIWARA Katsunori] foozy@lares.dti.ne.jp
At Wed, 02 Jul 2014 12:44:04 +0900, FUJIWARA Katsunori wrote: > At Tue, 01 Jul 2014 18:46:08 -0500, > Matt Mackall wrote: > > > > On Tue, 2014-07-01 at 15:47 -0500, Matt Mackall wrote: > > > On Sun, 2014-06-29 at 00:11 +0900, FUJIWARA Katsunori wrote: > > > > # HG changeset patch > > > > # User FUJIWARA Katsunori <foozy@lares.dti.ne.jp> > > > > # Date 1403966784 -32400 > > > > # Sat Jun 28 23:46:24 2014 +0900 > > > > # Branch stable > > > > # Node ID cb8567d0c76d7c0e66d2fe32607acc8621d2389f > > > > # Parent a4b67bf1f0a5051736c14d9c13fae50fd5f5e464 > > > > convert: detect removal of ".gitmodules" at git source revisions correctly > > > > > > Queued for stable, thanks. > > > > Breaks tests: > > > > $ hg convert -q git-repo6 git-repo6-hg > > + transaction abort! > > + rollback completed > > + Traceback (most recent call last): > > + File "/dev/shm/hgtests.uBEOuH/install/bin/hg", line 43, in <module> > > + mercurial.dispatch.run() > > + File "/dev/shm/hgtests.uBEOuH/install/lib/python/mercurial/dispatch.py", line 28, in run > > + sys.exit((dispatch(request(sys.argv[1:])) or 0) & 255) > > + File "/dev/shm/hgtests.uBEOuH/install/lib/python/mercurial/dispatch.py", line 69, in dispatch > > + ret = _runcatch(req) > > + File "/dev/shm/hgtests.uBEOuH/install/lib/python/mercurial/dispatch.py", line 228, in _runcatch > > + elif util.safehasattr(inst, "args") and inst.args[0] == errno.EPIPE: > > + IndexError: tuple index out of range > > + [1] > > $ hg -R git-repo6-hg tip -T "{desc|firstline}\n" > > - remove .gitmodules and submodule git-repo5 > > + addsubmodule > > $ hg -R git-repo6-hg tip -T "{file_dels}\n" > > - .hgsub .hgsubstate > > + > > "IndexError: tuple index out of range" itself seems to occurs, because > IOError raised in "getfile()" in "hgext/convert/git.py" has empty > "args". > > > With a little monkey patching, I could get actual back trace of this > unexpected failure. > > (BTW, this is a failure on "default" branch, isn't it ?) > > + File "/tmp/hgtests.Ac881r/install/lib/python/hgext/convert/hg.py", line 178, in putcommit > + self.repo.commitctx(ctx) > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/localrepo.py", line 63, in wrapper > + return orig(repo.unfiltered(), *args, **kwargs) > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/localrepo.py", line 1430, in commitctx > + targetphase = subrepo.newcommitphase(self.ui, ctx) > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/subrepo.py", line 349, in newcommitphase > + substate = getattr(ctx, "substate", None) > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/util.py", line 287, in __get__ > + result = self.func(obj) > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/context.py", line 136, in substate > + return subrepo.state(self, self._repo.ui) > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/subrepo.py", line 86, in state > + read('.hgsub') > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/subrepo.py", line 74, in read > + data = ctx[f].data() > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/context.py", line 1594, in __getitem__ > + return self.filectx(key) > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/context.py", line 1636, in filectx > + return self._filectxfn(self._repo, self, path) > + File "/tmp/hgtests.Ac881r/install/lib/python/hgext/convert/hg.py", line 136, in getfilectx > + data, mode = source.getfile(f, v) > + File "/tmp/hgtests.Ac881r/install/lib/python/hgext/convert/convcmd.py", line 88, in getfile > + return self.source.getfile(file, rev) > + File "/tmp/hgtests.Ac881r/install/lib/python/hgext/convert/git.py", line 137, in getfile > + raise IOError > + IOError > > > According to my "hg bisect", the revision below is the first revision, > which causes this failure with this series. > > changeset: 21665:d2743be1bb06 > user: Sean Farley <sean.michael.farley@gmail.com> > date: Thu Aug 15 15:00:03 2013 -0500 > summary: memctx: inherit from committablectx > > > Before Sean's work above, "subrepo.newcommitphase()" returns > immediately, because "memctx" (derived from "object") doesn't have > "substate" property. > > ================ > def newcommitphase(ui, ctx): > commitphase = phases.newcommitphase(ui) > substate = getattr(ctx, "substate", None) > if not substate: > return commitphase > ================ > > In the other hand, after Sean's work, "memctxt" has "substate" > inherited from "basectx" via "committablectx", and causes invocation > of "subrepo.state" via "basectx.substate". > > > After that, "'.hgsub' in ctx" examination on "memctx" in > "subrepo.state" fails to detect removal of ".hgsub", because: > > - "f in ctx" invokes "f in self._manifest" via "__contains__" of > "basectx" > > - "_manifest" of "committablectx" can detect removal of files, only > if "self._status" contains also removed files correctly, but > > - "memctx" contains only modified (status[0]) files > (removal of files are only recognized in > "localrepository.commitctx" layer) > > > Finally, "subrepo.state()" tries to get already removed ".hgsub" and > causes unexpected failure. > > > Then, what should we do to fix this problem ? I found the another way to resolve this problem, suppressing subrepo phase checking while converting. I'll send revised series. > 1. make "memctx" take new "removed files" argument > > this is not reasonable (at least for stable), because this > requires many changes around "memctx" construction. > > > 2. make "localrepository.commitctx" put removed files into "memctx" > > "detecting additional removal of files" in "commitctx" means that > the specified "ctx" should be "memctx", because other than "memctx" > contain removed files in "ctx.removed()" (= "self._status[2]"). > > ================ > removed = list(ctx.removed()) > : > : > try: > fctx = ctx[f] > : > : > except IOError, inst: > errcode = getattr(inst, 'errno', errno.ENOENT) > if error or errcode and errcode != errno.ENOENT: > self.ui.warn(_("trouble committing %s!\n") % f) > raise > else: > removed.append(f) # ctx should be "memctx" in this case > : > : > > # add code path below .... > if not ctx.removed() and removed: # or using "isinstance()" ???? > ctx.setremoved(removed) # or "ctx._status[2] = removed" ???? > ================ > > this is simple, but not smart. > > > 3. examine "'.hgsub' in ctx" with try/except for IOError, before > getting "substate" from ctx, in "newcommitphase" > > this is simple, but not smart. > > 4. and so on (any other good solutions ?) > > ---------------------------------------------------------------------- > [FUJIWARA Katsunori] foozy@lares.dti.ne.jp > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel > ---------------------------------------------------------------------- [FUJIWARA Katsunori] foozy@lares.dti.ne.jp
FUJIWARA Katsunori writes: [...] > "IndexError: tuple index out of range" itself seems to occurs, because > IOError raised in "getfile()" in "hgext/convert/git.py" has empty > "args". > > > With a little monkey patching, I could get actual back trace of this > unexpected failure. > > (BTW, this is a failure on "default" branch, isn't it ?) Yes, this must be on default. > + File "/tmp/hgtests.Ac881r/install/lib/python/hgext/convert/hg.py", line 178, in putcommit > + self.repo.commitctx(ctx) > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/localrepo.py", line 63, in wrapper > + return orig(repo.unfiltered(), *args, **kwargs) > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/localrepo.py", line 1430, in commitctx > + targetphase = subrepo.newcommitphase(self.ui, ctx) > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/subrepo.py", line 349, in newcommitphase > + substate = getattr(ctx, "substate", None) > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/util.py", line 287, in __get__ > + result = self.func(obj) > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/context.py", line 136, in substate > + return subrepo.state(self, self._repo.ui) > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/subrepo.py", line 86, in state > + read('.hgsub') > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/subrepo.py", line 74, in read > + data = ctx[f].data() > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/context.py", line 1594, in __getitem__ > + return self.filectx(key) > + File "/tmp/hgtests.Ac881r/install/lib/python/mercurial/context.py", line 1636, in filectx > + return self._filectxfn(self._repo, self, path) > + File "/tmp/hgtests.Ac881r/install/lib/python/hgext/convert/hg.py", line 136, in getfilectx > + data, mode = source.getfile(f, v) > + File "/tmp/hgtests.Ac881r/install/lib/python/hgext/convert/convcmd.py", line 88, in getfile > + return self.source.getfile(file, rev) > + File "/tmp/hgtests.Ac881r/install/lib/python/hgext/convert/git.py", line 137, in getfile > + raise IOError > + IOError > > > According to my "hg bisect", the revision below is the first revision, > which causes this failure with this series. > > changeset: 21665:d2743be1bb06 > user: Sean Farley <sean.michael.farley@gmail.com> > date: Thu Aug 15 15:00:03 2013 -0500 > summary: memctx: inherit from committablectx > > > Before Sean's work above, "subrepo.newcommitphase()" returns > immediately, because "memctx" (derived from "object") doesn't have > "substate" property. > > ================ > def newcommitphase(ui, ctx): > commitphase = phases.newcommitphase(ui) > substate = getattr(ctx, "substate", None) > if not substate: > return commitphase > ================ > > In the other hand, after Sean's work, "memctxt" has "substate" > inherited from "basectx" via "committablectx", and causes invocation > of "subrepo.state" via "basectx.substate". Yes, this is definitely my fault. Both hg-git and hgsubversion have suffered from this as well. > After that, "'.hgsub' in ctx" examination on "memctx" in > "subrepo.state" fails to detect removal of ".hgsub", because: > > - "f in ctx" invokes "f in self._manifest" via "__contains__" of > "basectx" > > - "_manifest" of "committablectx" can detect removal of files, only > if "self._status" contains also removed files correctly, but > > - "memctx" contains only modified (status[0]) files > (removal of files are only recognized in > "localrepository.commitctx" layer) > > > Finally, "subrepo.state()" tries to get already removed ".hgsub" and > causes unexpected failure. Thanks for the great explanation! > Then, what should we do to fix this problem ? > > 1. make "memctx" take new "removed files" argument > > this is not reasonable (at least for stable), because this > requires many changes around "memctx" construction. I've started work on something like this. Basically, manipulate the manifest directly to indicate modified, added, or removed files. This is needed for in-memory merge but will also help get rid of localrepository.commitctx and the like. In turn, this will help revamp the record interface. If you have any input, please do chime in. I'm eager to get my work on the list and get some feedback.
At Wed, 02 Jul 2014 14:33:01 -0500, Sean Farley wrote: > > FUJIWARA Katsunori writes: > > Then, what should we do to fix this problem ? > > > > 1. make "memctx" take new "removed files" argument > > > > this is not reasonable (at least for stable), because this > > requires many changes around "memctx" construction. > > I've started work on something like this. Basically, manipulate the > manifest directly to indicate modified, added, or removed files. This is > needed for in-memory merge but will also help get rid of > localrepository.commitctx and the like. In turn, this will help revamp > the record interface. > > If you have any input, please do chime in. I'm eager to get my work on > the list and get some feedback. Yes, I have a question about your work around "__contains__". (background) With a little more researching, I found that my explanation in the previous mail was not correct :-< > > After that, "'.hgsub' in ctx" examination on "memctx" in > > "subrepo.state" fails to detect removal of ".hgsub", because: > > > > - "f in ctx" invokes "f in self._manifest" via "__contains__" of > > "basectx" "memctx" uses the "__contains__" below derived from "committablectx": def __contains__(self, key): return self._repo.dirstate[key] not in "?r" Then, "f in ctx" on "memctx" returns the result value unexpectedly according to current dirstate. > > - "_manifest" of "committablectx" can detect removal of files, only > > if "self._status" contains also removed files correctly, but > > > > - "memctx" contains only modified (status[0]) files > > (removal of files are only recognized in > > "localrepository.commitctx" layer) > > > > > > Finally, "subrepo.state()" tries to get already removed ".hgsub" and > > causes unexpected failure. I confirmed that making working directory empty by "hg update null" and "hg debugrebuilddirstate" before "hg convert" can suppress unexpected failure in the case of this patch series. (question) Actions using "memctx" below may allow such dirstate dependent behavior: - "collapse" of "hg histedit", and - "hg import" wihtout "--bypass": these use working directory But it may cause unexpected file handling in actions below: - "hg lfconvert" of largefiles, and - incremantal "hg convert" of convert: working directory may be in use - "hg import --bypass": working directory should be ignored absolutely I think that current "__contains__" of "committablectx" should be defined in "workingctx", to make "memctx" use "__contains__" of "basectx", which was moved from "changectx" in your work below (of course, "handling removed files, too" should be needed for correct manifest calculation). changeset: 19550:0c8ad779eb36 user: Sean Farley <sean.michael.farley@gmail.com> date: Mon Aug 05 17:21:38 2013 -0500 summary: basectx: move __contains__ from changectx But this replacement is a just backing out of your work below: changeset: 19668:9d56a3359011 user: Sean Farley <sean.michael.farley@gmail.com> date: Wed Aug 14 15:29:09 2013 -0500 summary: commitablectx: move __contains__ from workingctx Is current "__contains__" hierarchy between "basectx", "committablectx", "workingctx" and "memctx" intentional ? ---------------------------------------------------------------------- [FUJIWARA Katsunori] foozy@lares.dti.ne.jp
FUJIWARA Katsunori writes: > At Wed, 02 Jul 2014 14:33:01 -0500, > Sean Farley wrote: >> >> FUJIWARA Katsunori writes: > >> > Then, what should we do to fix this problem ? >> > >> > 1. make "memctx" take new "removed files" argument >> > >> > this is not reasonable (at least for stable), because this >> > requires many changes around "memctx" construction. >> >> I've started work on something like this. Basically, manipulate the >> manifest directly to indicate modified, added, or removed files. This is >> needed for in-memory merge but will also help get rid of >> localrepository.commitctx and the like. In turn, this will help revamp >> the record interface. >> >> If you have any input, please do chime in. I'm eager to get my work on >> the list and get some feedback. > > Yes, I have a question about your work around "__contains__". > > > (background) > > With a little more researching, I found that my explanation in the > previous mail was not correct :-< Well, it was onto something :-) I used an idea from Sid to set substate=None in the __init__ method of memctx. >> > After that, "'.hgsub' in ctx" examination on "memctx" in >> > "subrepo.state" fails to detect removal of ".hgsub", because: >> > >> > - "f in ctx" invokes "f in self._manifest" via "__contains__" of >> > "basectx" > > "memctx" uses the "__contains__" below derived from "committablectx": > > def __contains__(self, key): > return self._repo.dirstate[key] not in "?r" > > Then, "f in ctx" on "memctx" returns the result value unexpectedly > according to current dirstate. Oops. memctx shouldn't be worrying about the dirstate. >> > - "_manifest" of "committablectx" can detect removal of files, only >> > if "self._status" contains also removed files correctly, but >> > >> > - "memctx" contains only modified (status[0]) files >> > (removal of files are only recognized in >> > "localrepository.commitctx" layer) >> > >> > >> > Finally, "subrepo.state()" tries to get already removed ".hgsub" and >> > causes unexpected failure. > > I confirmed that making working directory empty by "hg update null" > and "hg debugrebuilddirstate" before "hg convert" can suppress > unexpected failure in the case of this patch series. > > > (question) > > Actions using "memctx" below may allow such dirstate dependent > behavior: > > - "collapse" of "hg histedit", and > - "hg import" wihtout "--bypass": these use working directory There are a lot of commands that use the working directory. I view this as an implementation detail for the most part. Take localrepo.commit, for instance. It is completely dependent on the working directory but doesn't need to be. My current approach to decouple this is: - lift the code that makes a context object from the working directory - separate that code out of localrepo.commit - only have context.commit be the object that actually does the commit transaction > But it may cause unexpected file handling in actions below: > > - "hg lfconvert" of largefiles, and > - incremantal "hg convert" of convert: working directory may be in use > - "hg import --bypass": working directory should be ignored absolutely That is wayyyyy off in the future for me (working on in-memory merge first). > I think that current "__contains__" of "committablectx" should be > defined in "workingctx", to make "memctx" use "__contains__" of > "basectx", which was moved from "changectx" in your work below (of > course, "handling removed files, too" should be needed for correct > manifest calculation). > > changeset: 19550:0c8ad779eb36 > user: Sean Farley <sean.michael.farley@gmail.com> > date: Mon Aug 05 17:21:38 2013 -0500 > summary: basectx: move __contains__ from changectx > > But this replacement is a just backing out of your work below: > > changeset: 19668:9d56a3359011 > user: Sean Farley <sean.michael.farley@gmail.com> > date: Wed Aug 14 15:29:09 2013 -0500 > summary: commitablectx: move __contains__ from workingctx > > > Is current "__contains__" hierarchy between "basectx", > "committablectx", "workingctx" and "memctx" intentional ? I'm pretty sure that was unintentional on my part. That might make the substate=None patch unneeded, right?
At Thu, 03 Jul 2014 15:51:37 -0500, Sean Farley wrote: > > > FUJIWARA Katsunori writes: > > > At Wed, 02 Jul 2014 14:33:01 -0500, > > Sean Farley wrote: > >> > >> FUJIWARA Katsunori writes: > > > >> > Then, what should we do to fix this problem ? > >> > > >> > 1. make "memctx" take new "removed files" argument > >> > > >> > this is not reasonable (at least for stable), because this > >> > requires many changes around "memctx" construction. > >> > >> I've started work on something like this. Basically, manipulate the > >> manifest directly to indicate modified, added, or removed files. This is > >> needed for in-memory merge but will also help get rid of > >> localrepository.commitctx and the like. In turn, this will help revamp > >> the record interface. > >> > >> If you have any input, please do chime in. I'm eager to get my work on > >> the list and get some feedback. > > > > Yes, I have a question about your work around "__contains__". > > > > > > (background) > > > > With a little more researching, I found that my explanation in the > > previous mail was not correct :-< > > Well, it was onto something :-) I used an idea from Sid to set > substate=None in the __init__ method of memctx. Oh ! "setting substate=None in the __init__" looks better than "disabling subrepo phase checking while converting forcibly" which I planned to use: the latter needs saving and restoring by "ui.setconfig" in try/finally clause for "commitctx". I'll re-post this series soon after queuing your setting substate=None patch :-) > > I think that current "__contains__" of "committablectx" should be > > defined in "workingctx", to make "memctx" use "__contains__" of > > "basectx", which was moved from "changectx" in your work below (of > > course, "handling removed files, too" should be needed for correct > > manifest calculation). > > > > changeset: 19550:0c8ad779eb36 > > user: Sean Farley <sean.michael.farley@gmail.com> > > date: Mon Aug 05 17:21:38 2013 -0500 > > summary: basectx: move __contains__ from changectx > > > > But this replacement is a just backing out of your work below: > > > > changeset: 19668:9d56a3359011 > > user: Sean Farley <sean.michael.farley@gmail.com> > > date: Wed Aug 14 15:29:09 2013 -0500 > > summary: commitablectx: move __contains__ from workingctx > > > > > > Is current "__contains__" hierarchy between "basectx", > > "committablectx", "workingctx" and "memctx" intentional ? > > I'm pretty sure that was unintentional on my part. That might make the > substate=None patch unneeded, right? I think that "substate=None" patch is still needed, even if current "committablectx.__contains__" code is moved into "workingctx", because "memctx" still returns incorrect manifest/status for removal of files yet (= "key in self._manifest" is incorrect, too). ---------------------------------------------------------------------- [FUJIWARA Katsunori] foozy@lares.dti.ne.jp
Patch
diff --git a/hgext/convert/git.py b/hgext/convert/git.py --- a/hgext/convert/git.py +++ b/hgext/convert/git.py @@ -104,6 +104,8 @@ return data def getfile(self, name, rev): + if rev == hex(nullid): + raise IOError if name == '.hgsub': data = '\n'.join([m.hgsub() for m in self.submoditer()]) mode = '' @@ -155,6 +157,7 @@ seen = set() entry = None subexists = False + subdeleted = False for l in fh.read().split('\x00'): if not entry: if not l.startswith(':'): @@ -171,7 +174,11 @@ if f == '.gitmodules': subexists = True - changes.append(('.hgsub', '')) + if entry[4] == 'D': + subdeleted = True + changes.append(('.hgsub', hex(nullid))) + else: + changes.append(('.hgsub', '')) elif entry[1] == '160000' or entry[0] == ':160000': subexists = True else: @@ -182,8 +189,11 @@ raise util.Abort(_('cannot read changes in %s') % version) if subexists: - self.retrievegitmodules(version) - changes.append(('.hgsubstate', '')) + if subdeleted: + changes.append(('.hgsubstate', hex(nullid))) + else: + self.retrievegitmodules(version) + changes.append(('.hgsubstate', '')) return (changes, {}) def getcommit(self, version): diff --git a/tests/test-convert-git.t b/tests/test-convert-git.t --- a/tests/test-convert-git.t +++ b/tests/test-convert-git.t @@ -363,6 +363,23 @@ $ cd ../.. +convert the revision removing '.gitmodules' itself (and related +submodules) + + $ cd git-repo6 + $ git rm .gitmodules + rm '.gitmodules' + $ git rm --cached git-repo5 + rm 'git-repo5' + $ commit -a -m 'remove .gitmodules and submodule git-repo5' + $ cd .. + + $ hg convert -q git-repo6 git-repo6-hg + $ hg -R git-repo6-hg tip -T "{desc|firstline}\n" + remove .gitmodules and submodule git-repo5 + $ hg -R git-repo6-hg tip -T "{file_dels}\n" + .hgsub .hgsubstate + damaged git repository tests: In case the hard-coded hashes change, the following commands can be used to list the hashes and their corresponding types in the repository: