Submitter | Phillip Cohen |
---|---|
Date | March 7, 2017, 7:40 p.m. |
Message ID | <bbce62e3790220f19e7b.1488915659@phillco-mbp.dhcp.thefacebook.com> |
Download | mbox | patch |
Permalink | /patch/18971/ |
State | Changes Requested |
Headers | show |
Comments
On 3/7/17 11:40 AM, Phil Cohen wrote: > # HG changeset patch > # User Phil Cohen <phillco@fb.com> > # Date 1488915535 28800 > # Tue Mar 07 11:38:55 2017 -0800 > # Node ID bbce62e3790220f19e7b37160a2f8351b7461272 > # Parent 91e86a6c61c0c6a3b554eefeba9060000311aa29 > merge: add `internal:dumpjson` tool to `resolve`, which outputs conflict state > > This supersedes a previous change which added a --prep option. Generally we don't reference previous iterations of the patch in the commit message, since those previous iterations wont' show up in the repo after this is pushed. > Normally, `hg resolve` takes the user on a whistle-stop tour of each conflicted > file, pausing to launch the editor to resolve the conflicts. This is an > alternative workflow for resolving many conflicts in a random-access fashion. It > doesn't change/replace the default behavior. > > This commit adds `--tool=internal:dumpjson`. It prints, for each conflict, the > "base", "other", and "ours" versions (the contents, as well as their exec/link > flags), and where the user/tool should write a resolved version (i.e., the > working copy) as JSON. The user will then resolve the conflicts at their leisure > and run `hg resolve --mark`. Overall I think it looks good. Some minor comments inline, but I'd be ok having this queued (since it's been sitting here a while) and addressing the nits in a follow up. > diff --git a/mercurial/commands.py b/mercurial/commands.py > --- a/mercurial/commands.py > +++ b/mercurial/commands.py > @@ -33,6 +33,7 @@ > error, > exchange, > extensions, > + formatter, > graphmod, > hbisect, > help, > @@ -4284,6 +4285,26 @@ > fm.end() > return 0 > > + if opts.get('tool', '') == "internal:dumpjson": > + fm = ui.formatter('resolve', {'template': 'json'}) > + ms = mergemod.mergestate.read(repo) > + m = scmutil.match(repo[None], pats, opts) > + wctx = repo[None] > + > + paths = [] > + for f in ms: > + if not m(f): > + continue > + > + val = ms.internaldump(f, wctx) > + if val is not None: > + paths.append(val) > + > + fm.startitem() > + fm.write('conflicts', '%s\n', paths) > + fm.end() > + return 0 > + > with repo.wlock(): > ms = mergemod.mergestate.read(repo) > > diff --git a/mercurial/filemerge.py b/mercurial/filemerge.py > --- a/mercurial/filemerge.py > +++ b/mercurial/filemerge.py > @@ -567,6 +567,40 @@ > "o": " [%s]" % labels[1], > } > > +def summarize(repo, fcd, fco, fca): > + back = None if fcd.isabsent() else \ > + scmutil.origpath(repo.ui, repo, repo.wjoin(fcd.path())) > + > + def sum(ctx): > + return { > + 'contents': ctx.data(), > + 'isexec': ctx.isexec(), > + 'issymlink': ctx.islink(), > + } > + > + ours = None > + if back and util.filestat(back).stat: > + # Replicate the schema of `base` and `other` for `ours`. Since you can > + # start a merge with a dirty working copy, `ours` must reflect that, > + # not the underlying commit. That's why we look at .orig version. > + ours = { > + 'path': back, > + 'contents': util.readfile(back), > + 'isexec': util.isexec(back), > + 'issymlink': util.statislink(util.filestat(back).stat) > + } > + > + output = sum(fcd) > + output['path'] = repo.wjoin(fcd.path()) > + > + return { > + 'path': fcd.path(), > + 'base': sum(fca), > + 'other': sum(fco), > + 'output': output, > + 'ours': ours, > + } > + > def _filemerge(premerge, repo, mynode, orig, fcd, fco, fca, labels=None): > """perform a 3-way merge in the working directory > > diff --git a/mercurial/merge.py b/mercurial/merge.py > --- a/mercurial/merge.py > +++ b/mercurial/merge.py > @@ -456,6 +456,24 @@ > def extras(self, filename): > return self._stateextras.setdefault(filename, {}) > > + def _internaldump(self, dfile, wctx): > + if self[dfile] in 'rd': > + return None > + stateentry = self._state[dfile] > + state, hash, lfile, afile, anode, ofile, onode, flags = stateentry > + octx = self._repo[self._other] > + extras = self.extras(dfile) > + anccommitnode = extras.get('ancestorlinknode') > + if anccommitnode: > + actx = self._repo[anccommitnode] > + else: > + actx = None > + fcd = self._filectxorabsent(hash, wctx, dfile) > + fco = self._filectxorabsent(onode, octx, ofile) > + fca = self._repo.filectx(afile, fileid=anode, changeid=actx) > + > + return filemerge.summarize(self._repo, fcd, fco, fca) > + > def _resolve(self, preresolve, dfile, wctx): > """rerun merge process for file path `dfile`""" > if self[dfile] in 'rd': > @@ -543,6 +561,9 @@ > Returns whether the merge is complete, and the exit code.""" > return self._resolve(True, dfile, wctx) > > + def internaldump(self, dfile, wctx): > + return self._internaldump(dfile, wctx) Why have the _ function and the normal function? > + > def resolve(self, dfile, wctx): > """run merge process (assuming premerge was run) for dfile > > diff --git a/tests/test-resolve-prep.t b/tests/test-resolve-prep.t > new file mode 100644 > --- /dev/null > +++ b/tests/test-resolve-prep.t Maybe needs a new name since we're no longer calling this --prep. Same for references to prep inside the test > @@ -0,0 +1,75 @@ > +1) Make the repo > + $ hg init > + > +2) Can't run prep outside a conflict > + $ hg resolve --tool internal:dumpjson > + abort: no files or directories specified > + (use --all to re-merge all unresolved files) > + [255] > + > +3) Make a simple conflict > + $ echo "Unconflicted base, F1" > F1 > + $ echo "Unconflicted base, F2" > F2 > + $ hg commit -Aqm "initial commit" > + $ echo "First conflicted version, F1" > F1 > + $ echo "First conflicted version, F2" > F2 > + $ hg commit -m "first version, a" > + $ hg bookmark a > + $ hg checkout .~1 > + 2 files updated, 0 files merged, 0 files removed, 0 files unresolved > + (leaving bookmark a) > + $ echo "Second conflicted version, F1" > F1 > + $ echo "Second conflicted version, F2" > F2 > + $ hg commit -m "second version, b" > + created new head > + $ hg bookmark b > + $ hg log -G -T '({rev}) {desc}\nbookmark: {bookmarks}\nfiles: {files}\n\n' > + @ (2) second version, b > + | bookmark: b > + | files: F1 F2 > + | > + | o (1) first version, a > + |/ bookmark: a > + | files: F1 F2 > + | > + o (0) initial commit > + bookmark: > + files: F1 F2 > + > + > + > + $ hg merge a > + merging F1 > + merging F2 > + warning: conflicts while merging F1! (edit, then use 'hg resolve --mark') > + warning: conflicts while merging F2! (edit, then use 'hg resolve --mark') > + 0 files updated, 0 files merged, 0 files removed, 2 files unresolved > + use 'hg resolve' to retry unresolved file merges or 'hg update -C .' to abandon > + [1] > + > +5) Get the paths: > + $ hg resolve --tool internal:dumpjson --all > + [ > + { > + "conflicts": [{"base": {"contents": "Unconflicted base, F1\n", "isexec": false, "issymlink": false}, "other": {"contents": "First conflicted version, F1\n", "isexec": false, "issymlink": false}, "ours": {"contents": "Second conflicted version, F1\n", "isexec": false, "issymlink": false, "path": "$TESTTMP/F1.orig"}, "output": {"contents": "<<<<<<<workingcopy:13124abb51b9b-test:secondversion,b\nSecondconflictedversion,F1\n=======\nFirstconflictedversion,F1\n>>>>>>> merge rev: 6dd692b7db4a a - test: first version, a\n", "isexec": false, "issymlink": false, "path": "$TESTTMP/F1"}, "path": "F1"}, {"base": {"contents": "Unconflicted base, F2\n", "isexec": false, "issymlink": false}, "other": {"contents": "First conflicted version, F2\n", "isexec": false, "issymlink": false}, "ours": {"contents": "Second conflicted version, F2\n", "isexec": false, "issymlink": false, "path": "$TESTTMP/F2.orig"}, "output": {"contents": "<<<<<<<workingcopy:13124abb51b9b-test:secondversion,b\nSecondconflictedversion,F2\n=======\nFirstconflictedversion,F2\n>>>>>>> merge rev: 6dd692b7db4a a - test: first version, a\n", "isexec": false, "issymlink": false, "path": "$TESTTMP/F2"}, "path": "F2"}] > + } > + ] Might be nice to check that passing a path to resolve along with :dumpjson results in it only dumping json for that path. > + > +6) Ensure the paths point to the right contents: > + $ getcontents() { # Usage: getcontents <path> <version> > + > local script="import sys, json; print json.load(sys.stdin)[0][\"conflicts\"][$1][\"$2\"][\"contents\"]" > + > local result=$(hg resolve --tool internal:dumpjson --all | python -c "$script") > + > echo "$result" > + > } > + $ echo $(getcontents 0 "base") > + Unconflicted base, F1 > + $ echo $(getcontents 0 "other") > + First conflicted version, F1 > + $ echo $(getcontents 0 "ours") > + Second conflicted version, F1 > + $ echo $(getcontents 1 "base") > + Unconflicted base, F2 > + $ echo $(getcontents 1 "other") > + First conflicted version, F2 > + $ echo $(getcontents 1 "ours") > + Second conflicted version, F2 > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://urldefense.proofpoint.com/v2/url?u=https-3A__www.mercurial-2Dscm.org_mailman_listinfo_mercurial-2Ddevel&d=DwIGaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=nuarHzhP1wi1T9iURRCj1A&m=LZNT2zhuXHpkPvdp1N9NXnSPIY6ecgs3vTcv3HNHXkk&s=o-YlM_1SJ6VH-Ml03wjmChyG6eiTnigl710Szyon_eo&e= >
More nits inline to consider since you're looking into a v3 anyway... On 3/15/17 10:48 AM, Durham Goode wrote: > > > On 3/7/17 11:40 AM, Phil Cohen wrote: >> # HG changeset patch >> # User Phil Cohen <phillco@fb.com> >> # Date 1488915535 28800 >> # Tue Mar 07 11:38:55 2017 -0800 >> # Node ID bbce62e3790220f19e7b37160a2f8351b7461272 >> # Parent 91e86a6c61c0c6a3b554eefeba9060000311aa29 >> merge: add `internal:dumpjson` tool to `resolve`, which outputs >> conflict state >> >> This supersedes a previous change which added a --prep option. > > Generally we don't reference previous iterations of the patch in the > commit message, since those previous iterations wont' show up in the > repo after this is pushed. > >> Normally, `hg resolve` takes the user on a whistle-stop tour of each >> conflicted >> file, pausing to launch the editor to resolve the conflicts. This is an >> alternative workflow for resolving many conflicts in a random-access >> fashion. It >> doesn't change/replace the default behavior. >> >> This commit adds `--tool=internal:dumpjson`. It prints, for each >> conflict, the >> "base", "other", and "ours" versions (the contents, as well as their >> exec/link >> flags), and where the user/tool should write a resolved version >> (i.e., the >> working copy) as JSON. The user will then resolve the conflicts at >> their leisure >> and run `hg resolve --mark`. > > Overall I think it looks good. Some minor comments inline, but I'd be > ok having this queued (since it's been sitting here a while) and > addressing the nits in a follow up. > >> diff --git a/mercurial/commands.py b/mercurial/commands.py >> --- a/mercurial/commands.py >> +++ b/mercurial/commands.py >> @@ -33,6 +33,7 @@ >> error, >> exchange, >> extensions, >> + formatter, >> graphmod, >> hbisect, >> help, >> @@ -4284,6 +4285,26 @@ >> fm.end() >> return 0 >> >> + if opts.get('tool', '') == "internal:dumpjson": >> + fm = ui.formatter('resolve', {'template': 'json'}) >> + ms = mergemod.mergestate.read(repo) I don't love the short name style hg uses, and I don't see a godo reason to use it unless there's already a strong-established precedent in the file. We've been calling formatters 'fm' for a long time, but 'ms' as mergestate seems like a new one (to me at least). I'd love a more descriptive variable name since we have the option right now >> + m = scmutil.match(repo[None], pats, opts) Would prefer this be called 'matcher' >> + wctx = repo[None] >> + >> + paths = [] >> + for f in ms: Then this could become: for filename in mergestate: And then it's obvious from just reading the loop what it is doing >> + if not m(f): And this would become: if not matcher(filename): Which is also much more clear to me at least. >> + continue >> + >> + val = ms.internaldump(f, wctx) >> + if val is not None: >> + paths.append(val) >> + >> + fm.startitem() >> + fm.write('conflicts', '%s\n', paths) >> + fm.end() >> + return 0 >> + >> with repo.wlock(): >> ms = mergemod.mergestate.read(repo) >> >> diff --git a/mercurial/filemerge.py b/mercurial/filemerge.py >> --- a/mercurial/filemerge.py >> +++ b/mercurial/filemerge.py >> @@ -567,6 +567,40 @@ >> "o": " [%s]" % labels[1], >> } >> >> +def summarize(repo, fcd, fco, fca): I'm sure we can come up with better parameter names here too. fca is probably the "file context ancestor", but I have no idea what fcd and fco are without diving deep into the code. >> + back = None if fcd.isabsent() else \ >> + scmutil.origpath(repo.ui, repo, repo.wjoin(fcd.path())) It's unclear to me what 'back' will be used for. Can you add a comment? (is it the "back"up -- the .orig file?) >> + >> + def sum(ctx): I'd name this flags() instead of sum >> + return { >> + 'contents': ctx.data(), >> + 'isexec': ctx.isexec(), >> + 'issymlink': ctx.islink(), >> + } >> + >> + ours = None 'ours' is a git thing, not a mercurial thing. Let's use consitent hg-internal naming if possible: * local * other * base >> + if back and util.filestat(back).stat: >> + # Replicate the schema of `base` and `other` for `ours`. >> Since you can >> + # start a merge with a dirty working copy, `ours` must >> reflect that, >> + # not the underlying commit. That's why we look at .orig >> version. >> + ours = { >> + 'path': back, >> + 'contents': util.readfile(back), >> + 'isexec': util.isexec(back), >> + 'issymlink': util.statislink(util.filestat(back).stat) >> + } >> + >> + output = sum(fcd) >> + output['path'] = repo.wjoin(fcd.path()) >> + >> + return { >> + 'path': fcd.path(), >> + 'base': sum(fca), >> + 'other': sum(fco), >> + 'output': output, >> + 'ours': ours, It's even more important not to introduce a new non-hg term in the user-visible output. >> + } >> + >> def _filemerge(premerge, repo, mynode, orig, fcd, fco, fca, >> labels=None): >> """perform a 3-way merge in the working directory >> >> diff --git a/mercurial/merge.py b/mercurial/merge.py >> --- a/mercurial/merge.py >> +++ b/mercurial/merge.py >> @@ -456,6 +456,24 @@ >> def extras(self, filename): >> return self._stateextras.setdefault(filename, {}) >> >> + def _internaldump(self, dfile, wctx): >> + if self[dfile] in 'rd': >> + return None >> + stateentry = self._state[dfile] >> + state, hash, lfile, afile, anode, ofile, onode, flags = >> stateentry >> + octx = self._repo[self._other] >> + extras = self.extras(dfile) >> + anccommitnode = extras.get('ancestorlinknode') >> + if anccommitnode: >> + actx = self._repo[anccommitnode] >> + else: >> + actx = None >> + fcd = self._filectxorabsent(hash, wctx, dfile) >> + fco = self._filectxorabsent(onode, octx, ofile) >> + fca = self._repo.filectx(afile, fileid=anode, changeid=actx) >> + >> + return filemerge.summarize(self._repo, fcd, fco, fca) >> + It seems weird to me that _internaldump() (with an underscore) is called from outside the module and summarize() (without an underscore) is called from inside the module. >> def _resolve(self, preresolve, dfile, wctx): >> """rerun merge process for file path `dfile`""" >> if self[dfile] in 'rd': >> @@ -543,6 +561,9 @@ >> Returns whether the merge is complete, and the exit code.""" >> return self._resolve(True, dfile, wctx) >> >> + def internaldump(self, dfile, wctx): >> + return self._internaldump(dfile, wctx) > > Why have the _ function and the normal function? > >> + >> def resolve(self, dfile, wctx): >> """run merge process (assuming premerge was run) for dfile >> >> diff --git a/tests/test-resolve-prep.t b/tests/test-resolve-prep.t >> new file mode 100644 >> --- /dev/null >> +++ b/tests/test-resolve-prep.t > > Maybe needs a new name since we're no longer calling this --prep. Same > for references to prep inside the test > >> @@ -0,0 +1,75 @@ >> +1) Make the repo >> + $ hg init >> + >> +2) Can't run prep outside a conflict >> + $ hg resolve --tool internal:dumpjson >> + abort: no files or directories specified >> + (use --all to re-merge all unresolved files) >> + [255] >> + >> +3) Make a simple conflict >> + $ echo "Unconflicted base, F1" > F1 >> + $ echo "Unconflicted base, F2" > F2 >> + $ hg commit -Aqm "initial commit" >> + $ echo "First conflicted version, F1" > F1 >> + $ echo "First conflicted version, F2" > F2 >> + $ hg commit -m "first version, a" >> + $ hg bookmark a >> + $ hg checkout .~1 >> + 2 files updated, 0 files merged, 0 files removed, 0 files unresolved >> + (leaving bookmark a) >> + $ echo "Second conflicted version, F1" > F1 >> + $ echo "Second conflicted version, F2" > F2 >> + $ hg commit -m "second version, b" >> + created new head >> + $ hg bookmark b >> + $ hg log -G -T '({rev}) {desc}\nbookmark: {bookmarks}\nfiles: >> {files}\n\n' >> + @ (2) second version, b >> + | bookmark: b >> + | files: F1 F2 >> + | >> + | o (1) first version, a >> + |/ bookmark: a >> + | files: F1 F2 >> + | >> + o (0) initial commit >> + bookmark: >> + files: F1 F2 >> + >> + >> + >> + $ hg merge a >> + merging F1 >> + merging F2 >> + warning: conflicts while merging F1! (edit, then use 'hg resolve >> --mark') >> + warning: conflicts while merging F2! (edit, then use 'hg resolve >> --mark') >> + 0 files updated, 0 files merged, 0 files removed, 2 files unresolved >> + use 'hg resolve' to retry unresolved file merges or 'hg update -C >> .' to abandon >> + [1] >> + >> +5) Get the paths: >> + $ hg resolve --tool internal:dumpjson --all >> + [ >> + { >> + "conflicts": [{"base": {"contents": "Unconflicted base, F1\n", >> "isexec": false, "issymlink": false}, "other": {"contents": "First >> conflicted version, F1\n", "isexec": false, "issymlink": false}, >> "ours": {"contents": "Second conflicted version, F1\n", "isexec": >> false, "issymlink": false, "path": "$TESTTMP/F1.orig"}, "output": >> {"contents": >> "<<<<<<<workingcopy:13124abb51b9b-test:secondversion,b\nSecondconflictedversion,F1\n=======\nFirstconflictedversion,F1\n>>>>>>> >> merge rev: 6dd692b7db4a a - test: first version, a\n", "isexec": >> false, "issymlink": false, "path": "$TESTTMP/F1"}, "path": "F1"}, >> {"base": {"contents": "Unconflicted base, F2\n", "isexec": false, >> "issymlink": false}, "other": {"contents": "First conflicted version, >> F2\n", "isexec": false, "issymlink": false}, "ours": {"contents": >> "Second conflicted version, F2\n", "isexec": false, "issymlink": >> false, "path": "$TESTTMP/F2.orig"}, "output": {"contents": >> "<<<<<<<workingcopy:13124abb51b9b-test:secondversion,b\nSecondconflictedversion,F2\n=======\nFirstconflictedversion,F2\n>>>>>>> >> merge rev: 6dd692b7db4a a - test: first version, a\n", "isexec": >> false, "issymlink": false, "path": "$TESTTMP/F2"}, "path": "F2"}] >> + } >> + ] > > Might be nice to check that passing a path to resolve along with > :dumpjson results in it only dumping json for that path. > >> + >> +6) Ensure the paths point to the right contents: >> + $ getcontents() { # Usage: getcontents <path> <version> >> + > local script="import sys, json; print >> json.load(sys.stdin)[0][\"conflicts\"][$1][\"$2\"][\"contents\"]" >> + > local result=$(hg resolve --tool internal:dumpjson --all | >> python -c "$script") >> + > echo "$result" >> + > } >> + $ echo $(getcontents 0 "base") >> + Unconflicted base, F1 >> + $ echo $(getcontents 0 "other") >> + First conflicted version, F1 >> + $ echo $(getcontents 0 "ours") >> + Second conflicted version, F1 >> + $ echo $(getcontents 1 "base") >> + Unconflicted base, F2 >> + $ echo $(getcontents 1 "other") >> + First conflicted version, F2 >> + $ echo $(getcontents 1 "ours") >> + Second conflicted version, F2 I'd like to see tests for more of the "weird" conflicts that we went over in the table a couple days ago. I also think this patch should be split up to make it smaller and easier to review: My suggestions: * a patch introducing summarize * a patch introducing internaldump (but probably renamed because _internaldump really sounds like I shouldn't be calling it) * a patch introducing the new merge tool and the tests I'm excited for this behavior!
Thanks a lot for the points re: variable names! I'm actually really happy to hear that -- these names were taken from elsewhere in the codebase, and they were very confusing. I have forgotten/had to relearn the difference between fcd/fca/fco/etc several times. Glad to hear that slightly longer/clearer names are encouraged. On 3/15/17 11:20 PM, Ryan McElroy wrote: > More nits inline to consider since you're looking into a v3 anyway... > > > On 3/15/17 10:48 AM, Durham Goode wrote: >> >> >> On 3/7/17 11:40 AM, Phil Cohen wrote: >>> # HG changeset patch >>> # User Phil Cohen <phillco@fb.com> >>> # Date 1488915535 28800 >>> # Tue Mar 07 11:38:55 2017 -0800 >>> # Node ID bbce62e3790220f19e7b37160a2f8351b7461272 >>> # Parent 91e86a6c61c0c6a3b554eefeba9060000311aa29 >>> merge: add `internal:dumpjson` tool to `resolve`, which outputs >>> conflict state >>> >>> This supersedes a previous change which added a --prep option. >> >> Generally we don't reference previous iterations of the patch in the >> commit message, since those previous iterations wont' show up in the >> repo after this is pushed. >> >>> Normally, `hg resolve` takes the user on a whistle-stop tour of each >>> conflicted >>> file, pausing to launch the editor to resolve the conflicts. This is an >>> alternative workflow for resolving many conflicts in a random-access >>> fashion. It >>> doesn't change/replace the default behavior. >>> >>> This commit adds `--tool=internal:dumpjson`. It prints, for each >>> conflict, the >>> "base", "other", and "ours" versions (the contents, as well as their >>> exec/link >>> flags), and where the user/tool should write a resolved version >>> (i.e., the >>> working copy) as JSON. The user will then resolve the conflicts at >>> their leisure >>> and run `hg resolve --mark`. >> >> Overall I think it looks good. Some minor comments inline, but I'd be >> ok having this queued (since it's been sitting here a while) and >> addressing the nits in a follow up. >> >>> diff --git a/mercurial/commands.py b/mercurial/commands.py >>> --- a/mercurial/commands.py >>> +++ b/mercurial/commands.py >>> @@ -33,6 +33,7 @@ >>> error, >>> exchange, >>> extensions, >>> + formatter, >>> graphmod, >>> hbisect, >>> help, >>> @@ -4284,6 +4285,26 @@ >>> fm.end() >>> return 0 >>> >>> + if opts.get('tool', '') == "internal:dumpjson": >>> + fm = ui.formatter('resolve', {'template': 'json'}) >>> + ms = mergemod.mergestate.read(repo) > > I don't love the short name style hg uses, and I don't see a godo reason > to use it unless there's already a strong-established precedent in the > file. > > We've been calling formatters 'fm' for a long time, but 'ms' as > mergestate seems like a new one (to me at least). I'd love a more > descriptive variable name since we have the option right now > >>> + m = scmutil.match(repo[None], pats, opts) > > Would prefer this be called 'matcher' > >>> + wctx = repo[None] >>> + >>> + paths = [] >>> + for f in ms: > > Then this could become: > > for filename in mergestate: > > And then it's obvious from just reading the loop what it is doing > >>> + if not m(f): > > And this would become: > > if not matcher(filename): > > Which is also much more clear to me at least. > >>> + continue >>> + >>> + val = ms.internaldump(f, wctx) >>> + if val is not None: >>> + paths.append(val) >>> + >>> + fm.startitem() >>> + fm.write('conflicts', '%s\n', paths) >>> + fm.end() >>> + return 0 >>> + >>> with repo.wlock(): >>> ms = mergemod.mergestate.read(repo) >>> >>> diff --git a/mercurial/filemerge.py b/mercurial/filemerge.py >>> --- a/mercurial/filemerge.py >>> +++ b/mercurial/filemerge.py >>> @@ -567,6 +567,40 @@ >>> "o": " [%s]" % labels[1], >>> } >>> >>> +def summarize(repo, fcd, fco, fca): > > I'm sure we can come up with better parameter names here too. > > fca is probably the "file context ancestor", but I have no idea what fcd > and fco are without diving deep into the code. > >>> + back = None if fcd.isabsent() else \ >>> + scmutil.origpath(repo.ui, repo, repo.wjoin(fcd.path())) > > It's unclear to me what 'back' will be used for. Can you add a comment? > (is it the "back"up -- the .orig file?) > >>> + >>> + def sum(ctx): > > I'd name this flags() instead of sum > >>> + return { >>> + 'contents': ctx.data(), >>> + 'isexec': ctx.isexec(), >>> + 'issymlink': ctx.islink(), >>> + } >>> + >>> + ours = None > > 'ours' is a git thing, not a mercurial thing. Let's use consitent > hg-internal naming if possible: > * local > * other > * base > >>> + if back and util.filestat(back).stat: >>> + # Replicate the schema of `base` and `other` for `ours`. >>> Since you can >>> + # start a merge with a dirty working copy, `ours` must >>> reflect that, >>> + # not the underlying commit. That's why we look at .orig >>> version. >>> + ours = { >>> + 'path': back, >>> + 'contents': util.readfile(back), >>> + 'isexec': util.isexec(back), >>> + 'issymlink': util.statislink(util.filestat(back).stat) >>> + } >>> + >>> + output = sum(fcd) >>> + output['path'] = repo.wjoin(fcd.path()) >>> + >>> + return { >>> + 'path': fcd.path(), >>> + 'base': sum(fca), >>> + 'other': sum(fco), >>> + 'output': output, >>> + 'ours': ours, > > It's even more important not to introduce a new non-hg term in the > user-visible output. > >>> + } >>> + >>> def _filemerge(premerge, repo, mynode, orig, fcd, fco, fca, >>> labels=None): >>> """perform a 3-way merge in the working directory >>> >>> diff --git a/mercurial/merge.py b/mercurial/merge.py >>> --- a/mercurial/merge.py >>> +++ b/mercurial/merge.py >>> @@ -456,6 +456,24 @@ >>> def extras(self, filename): >>> return self._stateextras.setdefault(filename, {}) >>> >>> + def _internaldump(self, dfile, wctx): >>> + if self[dfile] in 'rd': >>> + return None >>> + stateentry = self._state[dfile] >>> + state, hash, lfile, afile, anode, ofile, onode, flags = >>> stateentry >>> + octx = self._repo[self._other] >>> + extras = self.extras(dfile) >>> + anccommitnode = extras.get('ancestorlinknode') >>> + if anccommitnode: >>> + actx = self._repo[anccommitnode] >>> + else: >>> + actx = None >>> + fcd = self._filectxorabsent(hash, wctx, dfile) >>> + fco = self._filectxorabsent(onode, octx, ofile) >>> + fca = self._repo.filectx(afile, fileid=anode, changeid=actx) >>> + >>> + return filemerge.summarize(self._repo, fcd, fco, fca) >>> + > > It seems weird to me that _internaldump() (with an underscore) is called > from outside the module and summarize() (without an underscore) is > called from inside the module. > >>> def _resolve(self, preresolve, dfile, wctx): >>> """rerun merge process for file path `dfile`""" >>> if self[dfile] in 'rd': >>> @@ -543,6 +561,9 @@ >>> Returns whether the merge is complete, and the exit code.""" >>> return self._resolve(True, dfile, wctx) >>> >>> + def internaldump(self, dfile, wctx): >>> + return self._internaldump(dfile, wctx) >> >> Why have the _ function and the normal function? >> >>> + >>> def resolve(self, dfile, wctx): >>> """run merge process (assuming premerge was run) for dfile >>> >>> diff --git a/tests/test-resolve-prep.t b/tests/test-resolve-prep.t >>> new file mode 100644 >>> --- /dev/null >>> +++ b/tests/test-resolve-prep.t >> >> Maybe needs a new name since we're no longer calling this --prep. Same >> for references to prep inside the test >> >>> @@ -0,0 +1,75 @@ >>> +1) Make the repo >>> + $ hg init >>> + >>> +2) Can't run prep outside a conflict >>> + $ hg resolve --tool internal:dumpjson >>> + abort: no files or directories specified >>> + (use --all to re-merge all unresolved files) >>> + [255] >>> + >>> +3) Make a simple conflict >>> + $ echo "Unconflicted base, F1" > F1 >>> + $ echo "Unconflicted base, F2" > F2 >>> + $ hg commit -Aqm "initial commit" >>> + $ echo "First conflicted version, F1" > F1 >>> + $ echo "First conflicted version, F2" > F2 >>> + $ hg commit -m "first version, a" >>> + $ hg bookmark a >>> + $ hg checkout .~1 >>> + 2 files updated, 0 files merged, 0 files removed, 0 files unresolved >>> + (leaving bookmark a) >>> + $ echo "Second conflicted version, F1" > F1 >>> + $ echo "Second conflicted version, F2" > F2 >>> + $ hg commit -m "second version, b" >>> + created new head >>> + $ hg bookmark b >>> + $ hg log -G -T '({rev}) {desc}\nbookmark: {bookmarks}\nfiles: >>> {files}\n\n' >>> + @ (2) second version, b >>> + | bookmark: b >>> + | files: F1 F2 >>> + | >>> + | o (1) first version, a >>> + |/ bookmark: a >>> + | files: F1 F2 >>> + | >>> + o (0) initial commit >>> + bookmark: >>> + files: F1 F2 >>> + >>> + >>> + >>> + $ hg merge a >>> + merging F1 >>> + merging F2 >>> + warning: conflicts while merging F1! (edit, then use 'hg resolve >>> --mark') >>> + warning: conflicts while merging F2! (edit, then use 'hg resolve >>> --mark') >>> + 0 files updated, 0 files merged, 0 files removed, 2 files unresolved >>> + use 'hg resolve' to retry unresolved file merges or 'hg update -C >>> .' to abandon >>> + [1] >>> + >>> +5) Get the paths: >>> + $ hg resolve --tool internal:dumpjson --all >>> + [ >>> + { >>> + "conflicts": [{"base": {"contents": "Unconflicted base, F1\n", >>> "isexec": false, "issymlink": false}, "other": {"contents": "First >>> conflicted version, F1\n", "isexec": false, "issymlink": false}, >>> "ours": {"contents": "Second conflicted version, F1\n", "isexec": >>> false, "issymlink": false, "path": "$TESTTMP/F1.orig"}, "output": >>> {"contents": >>> "<<<<<<<workingcopy:13124abb51b9b-test:secondversion,b\nSecondconflictedversion,F1\n=======\nFirstconflictedversion,F1\n>>>>>>> >>> merge rev: 6dd692b7db4a a - test: first version, a\n", "isexec": >>> false, "issymlink": false, "path": "$TESTTMP/F1"}, "path": "F1"}, >>> {"base": {"contents": "Unconflicted base, F2\n", "isexec": false, >>> "issymlink": false}, "other": {"contents": "First conflicted version, >>> F2\n", "isexec": false, "issymlink": false}, "ours": {"contents": >>> "Second conflicted version, F2\n", "isexec": false, "issymlink": >>> false, "path": "$TESTTMP/F2.orig"}, "output": {"contents": >>> "<<<<<<<workingcopy:13124abb51b9b-test:secondversion,b\nSecondconflictedversion,F2\n=======\nFirstconflictedversion,F2\n>>>>>>> >>> merge rev: 6dd692b7db4a a - test: first version, a\n", "isexec": >>> false, "issymlink": false, "path": "$TESTTMP/F2"}, "path": "F2"}] >>> + } >>> + ] >> >> Might be nice to check that passing a path to resolve along with >> :dumpjson results in it only dumping json for that path. >> >>> + >>> +6) Ensure the paths point to the right contents: >>> + $ getcontents() { # Usage: getcontents <path> <version> >>> + > local script="import sys, json; print >>> json.load(sys.stdin)[0][\"conflicts\"][$1][\"$2\"][\"contents\"]" >>> + > local result=$(hg resolve --tool internal:dumpjson --all | >>> python -c "$script") >>> + > echo "$result" >>> + > } >>> + $ echo $(getcontents 0 "base") >>> + Unconflicted base, F1 >>> + $ echo $(getcontents 0 "other") >>> + First conflicted version, F1 >>> + $ echo $(getcontents 0 "ours") >>> + Second conflicted version, F1 >>> + $ echo $(getcontents 1 "base") >>> + Unconflicted base, F2 >>> + $ echo $(getcontents 1 "other") >>> + First conflicted version, F2 >>> + $ echo $(getcontents 1 "ours") >>> + Second conflicted version, F2 > > I'd like to see tests for more of the "weird" conflicts that we went > over in the table a couple days ago. > > I also think this patch should be split up to make it smaller and easier > to review: My suggestions: > > * a patch introducing summarize > * a patch introducing internaldump (but probably renamed because > _internaldump really sounds like I shouldn't be calling it) > * a patch introducing the new merge tool and the tests > > I'm excited for this behavior!
Just to add some notes on variable names. There are some very common short names. Like "p1" representing "parent1". And "p1" should be preferred. An incomplete list is at [1]. It's okay to use "fname" hetlridvnvrtrtf "filename". FWIW, Ruby community standard prefers longer names while Golang explicitly prefers shorter names [2]. I think both are reasonable, as long as the code is consistent. For less-common things like "fca", "fco", etc. Longer names are definitely better. For common objects like "matcher", "manifest", I think either is okay. Since we are unlikely to rename "p1" to "parent1", I personally prefer shorter ones for unambiguous common objects, especially when it can reduce line wraps. [1]: https://www.mercurial-scm.org/wiki/CodingStyle [2]: https://github.com/golang/go/wiki/CodeReviewComments#variable-names Excerpts from Phil Cohen's message of 2017-03-16 19:32:44 -0700: > Thanks a lot for the points re: variable names! I'm actually really > happy to hear that -- these names were taken from elsewhere in the > codebase, and they were very confusing. I have forgotten/had to relearn > the difference between fcd/fca/fco/etc several times. Glad to hear that > slightly longer/clearer names are encouraged. > > On 3/15/17 11:20 PM, Ryan McElroy wrote: > > More nits inline to consider since you're looking into a v3 anyway... > > > > > > On 3/15/17 10:48 AM, Durham Goode wrote: > >> > >> > >> On 3/7/17 11:40 AM, Phil Cohen wrote: > >>> # HG changeset patch > >>> # User Phil Cohen <phillco@fb.com> > >>> # Date 1488915535 28800 > >>> # Tue Mar 07 11:38:55 2017 -0800 > >>> # Node ID bbce62e3790220f19e7b37160a2f8351b7461272 > >>> # Parent 91e86a6c61c0c6a3b554eefeba9060000311aa29 > >>> merge: add `internal:dumpjson` tool to `resolve`, which outputs > >>> conflict state > >>> > >>> This supersedes a previous change which added a --prep option. > >> > >> Generally we don't reference previous iterations of the patch in the > >> commit message, since those previous iterations wont' show up in the > >> repo after this is pushed. > >> > >>> Normally, `hg resolve` takes the user on a whistle-stop tour of each > >>> conflicted > >>> file, pausing to launch the editor to resolve the conflicts. This is an > >>> alternative workflow for resolving many conflicts in a random-access > >>> fashion. It > >>> doesn't change/replace the default behavior. > >>> > >>> This commit adds `--tool=internal:dumpjson`. It prints, for each > >>> conflict, the > >>> "base", "other", and "ours" versions (the contents, as well as their > >>> exec/link > >>> flags), and where the user/tool should write a resolved version > >>> (i.e., the > >>> working copy) as JSON. The user will then resolve the conflicts at > >>> their leisure > >>> and run `hg resolve --mark`. > >> > >> Overall I think it looks good. Some minor comments inline, but I'd be > >> ok having this queued (since it's been sitting here a while) and > >> addressing the nits in a follow up. > >> > >>> diff --git a/mercurial/commands.py b/mercurial/commands.py > >>> --- a/mercurial/commands.py > >>> +++ b/mercurial/commands.py > >>> @@ -33,6 +33,7 @@ > >>> error, > >>> exchange, > >>> extensions, > >>> + formatter, > >>> graphmod, > >>> hbisect, > >>> help, > >>> @@ -4284,6 +4285,26 @@ > >>> fm.end() > >>> return 0 > >>> > >>> + if opts.get('tool', '') == "internal:dumpjson": > >>> + fm = ui.formatter('resolve', {'template': 'json'}) > >>> + ms = mergemod.mergestate.read(repo) > > > > I don't love the short name style hg uses, and I don't see a godo reason > > to use it unless there's already a strong-established precedent in the > > file. > > > > We've been calling formatters 'fm' for a long time, but 'ms' as > > mergestate seems like a new one (to me at least). I'd love a more > > descriptive variable name since we have the option right now > > > >>> + m = scmutil.match(repo[None], pats, opts) > > > > Would prefer this be called 'matcher' > > > >>> + wctx = repo[None] > >>> + > >>> + paths = [] > >>> + for f in ms: > > > > Then this could become: > > > > for filename in mergestate: > > > > And then it's obvious from just reading the loop what it is doing > > > >>> + if not m(f): > > > > And this would become: > > > > if not matcher(filename): > > > > Which is also much more clear to me at least. > > > >>> + continue > >>> + > >>> + val = ms.internaldump(f, wctx) > >>> + if val is not None: > >>> + paths.append(val) > >>> + > >>> + fm.startitem() > >>> + fm.write('conflicts', '%s\n', paths) > >>> + fm.end() > >>> + return 0 > >>> + > >>> with repo.wlock(): > >>> ms = mergemod.mergestate.read(repo) > >>> > >>> diff --git a/mercurial/filemerge.py b/mercurial/filemerge.py > >>> --- a/mercurial/filemerge.py > >>> +++ b/mercurial/filemerge.py > >>> @@ -567,6 +567,40 @@ > >>> "o": " [%s]" % labels[1], > >>> } > >>> > >>> +def summarize(repo, fcd, fco, fca): > > > > I'm sure we can come up with better parameter names here too. > > > > fca is probably the "file context ancestor", but I have no idea what fcd > > and fco are without diving deep into the code. > > > >>> + back = None if fcd.isabsent() else \ > >>> + scmutil.origpath(repo.ui, repo, repo.wjoin(fcd.path())) > > > > It's unclear to me what 'back' will be used for. Can you add a comment? > > (is it the "back"up -- the .orig file?) > > > >>> + > >>> + def sum(ctx): > > > > I'd name this flags() instead of sum > > > >>> + return { > >>> + 'contents': ctx.data(), > >>> + 'isexec': ctx.isexec(), > >>> + 'issymlink': ctx.islink(), > >>> + } > >>> + > >>> + ours = None > > > > 'ours' is a git thing, not a mercurial thing. Let's use consitent > > hg-internal naming if possible: > > * local > > * other > > * base > > > >>> + if back and util.filestat(back).stat: > >>> + # Replicate the schema of `base` and `other` for `ours`. > >>> Since you can > >>> + # start a merge with a dirty working copy, `ours` must > >>> reflect that, > >>> + # not the underlying commit. That's why we look at .orig > >>> version. > >>> + ours = { > >>> + 'path': back, > >>> + 'contents': util.readfile(back), > >>> + 'isexec': util.isexec(back), > >>> + 'issymlink': util.statislink(util.filestat(back).stat) > >>> + } > >>> + > >>> + output = sum(fcd) > >>> + output['path'] = repo.wjoin(fcd.path()) > >>> + > >>> + return { > >>> + 'path': fcd.path(), > >>> + 'base': sum(fca), > >>> + 'other': sum(fco), > >>> + 'output': output, > >>> + 'ours': ours, > > > > It's even more important not to introduce a new non-hg term in the > > user-visible output. > > > >>> + } > >>> + > >>> def _filemerge(premerge, repo, mynode, orig, fcd, fco, fca, > >>> labels=None): > >>> """perform a 3-way merge in the working directory > >>> > >>> diff --git a/mercurial/merge.py b/mercurial/merge.py > >>> --- a/mercurial/merge.py > >>> +++ b/mercurial/merge.py > >>> @@ -456,6 +456,24 @@ > >>> def extras(self, filename): > >>> return self._stateextras.setdefault(filename, {}) > >>> > >>> + def _internaldump(self, dfile, wctx): > >>> + if self[dfile] in 'rd': > >>> + return None > >>> + stateentry = self._state[dfile] > >>> + state, hash, lfile, afile, anode, ofile, onode, flags = > >>> stateentry > >>> + octx = self._repo[self._other] > >>> + extras = self.extras(dfile) > >>> + anccommitnode = extras.get('ancestorlinknode') > >>> + if anccommitnode: > >>> + actx = self._repo[anccommitnode] > >>> + else: > >>> + actx = None > >>> + fcd = self._filectxorabsent(hash, wctx, dfile) > >>> + fco = self._filectxorabsent(onode, octx, ofile) > >>> + fca = self._repo.filectx(afile, fileid=anode, changeid=actx) > >>> + > >>> + return filemerge.summarize(self._repo, fcd, fco, fca) > >>> + > > > > It seems weird to me that _internaldump() (with an underscore) is called > > from outside the module and summarize() (without an underscore) is > > called from inside the module. > > > >>> def _resolve(self, preresolve, dfile, wctx): > >>> """rerun merge process for file path `dfile`""" > >>> if self[dfile] in 'rd': > >>> @@ -543,6 +561,9 @@ > >>> Returns whether the merge is complete, and the exit code.""" > >>> return self._resolve(True, dfile, wctx) > >>> > >>> + def internaldump(self, dfile, wctx): > >>> + return self._internaldump(dfile, wctx) > >> > >> Why have the _ function and the normal function? > >> > >>> + > >>> def resolve(self, dfile, wctx): > >>> """run merge process (assuming premerge was run) for dfile > >>> > >>> diff --git a/tests/test-resolve-prep.t b/tests/test-resolve-prep.t > >>> new file mode 100644 > >>> --- /dev/null > >>> +++ b/tests/test-resolve-prep.t > >> > >> Maybe needs a new name since we're no longer calling this --prep. Same > >> for references to prep inside the test > >> > >>> @@ -0,0 +1,75 @@ > >>> +1) Make the repo > >>> + $ hg init > >>> + > >>> +2) Can't run prep outside a conflict > >>> + $ hg resolve --tool internal:dumpjson > >>> + abort: no files or directories specified > >>> + (use --all to re-merge all unresolved files) > >>> + [255] > >>> + > >>> +3) Make a simple conflict > >>> + $ echo "Unconflicted base, F1" > F1 > >>> + $ echo "Unconflicted base, F2" > F2 > >>> + $ hg commit -Aqm "initial commit" > >>> + $ echo "First conflicted version, F1" > F1 > >>> + $ echo "First conflicted version, F2" > F2 > >>> + $ hg commit -m "first version, a" > >>> + $ hg bookmark a > >>> + $ hg checkout .~1 > >>> + 2 files updated, 0 files merged, 0 files removed, 0 files unresolved > >>> + (leaving bookmark a) > >>> + $ echo "Second conflicted version, F1" > F1 > >>> + $ echo "Second conflicted version, F2" > F2 > >>> + $ hg commit -m "second version, b" > >>> + created new head > >>> + $ hg bookmark b > >>> + $ hg log -G -T '({rev}) {desc}\nbookmark: {bookmarks}\nfiles: > >>> {files}\n\n' > >>> + @ (2) second version, b > >>> + | bookmark: b > >>> + | files: F1 F2 > >>> + | > >>> + | o (1) first version, a > >>> + |/ bookmark: a > >>> + | files: F1 F2 > >>> + | > >>> + o (0) initial commit > >>> + bookmark: > >>> + files: F1 F2 > >>> + > >>> + > >>> + > >>> + $ hg merge a > >>> + merging F1 > >>> + merging F2 > >>> + warning: conflicts while merging F1! (edit, then use 'hg resolve > >>> --mark') > >>> + warning: conflicts while merging F2! (edit, then use 'hg resolve > >>> --mark') > >>> + 0 files updated, 0 files merged, 0 files removed, 2 files unresolved > >>> + use 'hg resolve' to retry unresolved file merges or 'hg update -C > >>> .' to abandon > >>> + [1] > >>> + > >>> +5) Get the paths: > >>> + $ hg resolve --tool internal:dumpjson --all > >>> + [ > >>> + { > >>> + "conflicts": [{"base": {"contents": "Unconflicted base, F1\n", > >>> "isexec": false, "issymlink": false}, "other": {"contents": "First > >>> conflicted version, F1\n", "isexec": false, "issymlink": false}, > >>> "ours": {"contents": "Second conflicted version, F1\n", "isexec": > >>> false, "issymlink": false, "path": "$TESTTMP/F1.orig"}, "output": > >>> {"contents": > >>> "<<<<<<<workingcopy:13124abb51b9b-test:secondversion,b\nSecondconflictedversion,F1\n=======\nFirstconflictedversion,F1\n>>>>>>> > >>> merge rev: 6dd692b7db4a a - test: first version, a\n", "isexec": > >>> false, "issymlink": false, "path": "$TESTTMP/F1"}, "path": "F1"}, > >>> {"base": {"contents": "Unconflicted base, F2\n", "isexec": false, > >>> "issymlink": false}, "other": {"contents": "First conflicted version, > >>> F2\n", "isexec": false, "issymlink": false}, "ours": {"contents": > >>> "Second conflicted version, F2\n", "isexec": false, "issymlink": > >>> false, "path": "$TESTTMP/F2.orig"}, "output": {"contents": > >>> "<<<<<<<workingcopy:13124abb51b9b-test:secondversion,b\nSecondconflictedversion,F2\n=======\nFirstconflictedversion,F2\n>>>>>>> > >>> merge rev: 6dd692b7db4a a - test: first version, a\n", "isexec": > >>> false, "issymlink": false, "path": "$TESTTMP/F2"}, "path": "F2"}] > >>> + } > >>> + ] > >> > >> Might be nice to check that passing a path to resolve along with > >> :dumpjson results in it only dumping json for that path. > >> > >>> + > >>> +6) Ensure the paths point to the right contents: > >>> + $ getcontents() { # Usage: getcontents <path> <version> > >>> + > local script="import sys, json; print > >>> json.load(sys.stdin)[0][\"conflicts\"][$1][\"$2\"][\"contents\"]" > >>> + > local result=$(hg resolve --tool internal:dumpjson --all | > >>> python -c "$script") > >>> + > echo "$result" > >>> + > } > >>> + $ echo $(getcontents 0 "base") > >>> + Unconflicted base, F1 > >>> + $ echo $(getcontents 0 "other") > >>> + First conflicted version, F1 > >>> + $ echo $(getcontents 0 "ours") > >>> + Second conflicted version, F1 > >>> + $ echo $(getcontents 1 "base") > >>> + Unconflicted base, F2 > >>> + $ echo $(getcontents 1 "other") > >>> + First conflicted version, F2 > >>> + $ echo $(getcontents 1 "ours") > >>> + Second conflicted version, F2 > > > > I'd like to see tests for more of the "weird" conflicts that we went > > over in the table a couple days ago. > > > > I also think this patch should be split up to make it smaller and easier > > to review: My suggestions: > > > > * a patch introducing summarize > > * a patch introducing internaldump (but probably renamed because > > _internaldump really sounds like I shouldn't be calling it) > > * a patch introducing the new merge tool and the tests > > > > I'm excited for this behavior!
Fix a cat typing. Excerpts from Jun Wu's message of 2017-03-16 20:12:28 -0700: > Just to add some notes on variable names. There are some very common short > names. Like "p1" representing "parent1". And "p1" should be preferred. An > incomplete list is at [1]. It's okay to use "fname" hetlridvnvrtrtf ^^^^^^^^^^^^^^^ or "fn" instead of > "filename". > > FWIW, Ruby community standard prefers longer names while Golang explicitly > prefers shorter names [2]. I think both are reasonable, as long as the code > is consistent. > > For less-common things like "fca", "fco", etc. Longer names are definitely > better. For common objects like "matcher", "manifest", I think either is > okay. Since we are unlikely to rename "p1" to "parent1", I personally prefer > shorter ones for unambiguous common objects, especially when it can reduce > line wraps. > > [1]: https://www.mercurial-scm.org/wiki/CodingStyle > [2]: https://github.com/golang/go/wiki/CodeReviewComments#variable-names > > Excerpts from Phil Cohen's message of 2017-03-16 19:32:44 -0700: > > Thanks a lot for the points re: variable names! I'm actually really > > happy to hear that -- these names were taken from elsewhere in the > > codebase, and they were very confusing. I have forgotten/had to relearn > > the difference between fcd/fca/fco/etc several times. Glad to hear that > > slightly longer/clearer names are encouraged. > > > > On 3/15/17 11:20 PM, Ryan McElroy wrote: > > > More nits inline to consider since you're looking into a v3 anyway... > > > > > > > > > On 3/15/17 10:48 AM, Durham Goode wrote: > > >> > > >> > > >> On 3/7/17 11:40 AM, Phil Cohen wrote: > > >>> # HG changeset patch > > >>> # User Phil Cohen <phillco@fb.com> > > >>> # Date 1488915535 28800 > > >>> # Tue Mar 07 11:38:55 2017 -0800 > > >>> # Node ID bbce62e3790220f19e7b37160a2f8351b7461272 > > >>> # Parent 91e86a6c61c0c6a3b554eefeba9060000311aa29 > > >>> merge: add `internal:dumpjson` tool to `resolve`, which outputs > > >>> conflict state > > >>> > > >>> This supersedes a previous change which added a --prep option. > > >> > > >> Generally we don't reference previous iterations of the patch in the > > >> commit message, since those previous iterations wont' show up in the > > >> repo after this is pushed. > > >> > > >>> Normally, `hg resolve` takes the user on a whistle-stop tour of each > > >>> conflicted > > >>> file, pausing to launch the editor to resolve the conflicts. This is an > > >>> alternative workflow for resolving many conflicts in a random-access > > >>> fashion. It > > >>> doesn't change/replace the default behavior. > > >>> > > >>> This commit adds `--tool=internal:dumpjson`. It prints, for each > > >>> conflict, the > > >>> "base", "other", and "ours" versions (the contents, as well as their > > >>> exec/link > > >>> flags), and where the user/tool should write a resolved version > > >>> (i.e., the > > >>> working copy) as JSON. The user will then resolve the conflicts at > > >>> their leisure > > >>> and run `hg resolve --mark`. > > >> > > >> Overall I think it looks good. Some minor comments inline, but I'd be > > >> ok having this queued (since it's been sitting here a while) and > > >> addressing the nits in a follow up. > > >> > > >>> diff --git a/mercurial/commands.py b/mercurial/commands.py > > >>> --- a/mercurial/commands.py > > >>> +++ b/mercurial/commands.py > > >>> @@ -33,6 +33,7 @@ > > >>> error, > > >>> exchange, > > >>> extensions, > > >>> + formatter, > > >>> graphmod, > > >>> hbisect, > > >>> help, > > >>> @@ -4284,6 +4285,26 @@ > > >>> fm.end() > > >>> return 0 > > >>> > > >>> + if opts.get('tool', '') == "internal:dumpjson": > > >>> + fm = ui.formatter('resolve', {'template': 'json'}) > > >>> + ms = mergemod.mergestate.read(repo) > > > > > > I don't love the short name style hg uses, and I don't see a godo reason > > > to use it unless there's already a strong-established precedent in the > > > file. > > > > > > We've been calling formatters 'fm' for a long time, but 'ms' as > > > mergestate seems like a new one (to me at least). I'd love a more > > > descriptive variable name since we have the option right now > > > > > >>> + m = scmutil.match(repo[None], pats, opts) > > > > > > Would prefer this be called 'matcher' > > > > > >>> + wctx = repo[None] > > >>> + > > >>> + paths = [] > > >>> + for f in ms: > > > > > > Then this could become: > > > > > > for filename in mergestate: > > > > > > And then it's obvious from just reading the loop what it is doing > > > > > >>> + if not m(f): > > > > > > And this would become: > > > > > > if not matcher(filename): > > > > > > Which is also much more clear to me at least. > > > > > >>> + continue > > >>> + > > >>> + val = ms.internaldump(f, wctx) > > >>> + if val is not None: > > >>> + paths.append(val) > > >>> + > > >>> + fm.startitem() > > >>> + fm.write('conflicts', '%s\n', paths) > > >>> + fm.end() > > >>> + return 0 > > >>> + > > >>> with repo.wlock(): > > >>> ms = mergemod.mergestate.read(repo) > > >>> > > >>> diff --git a/mercurial/filemerge.py b/mercurial/filemerge.py > > >>> --- a/mercurial/filemerge.py > > >>> +++ b/mercurial/filemerge.py > > >>> @@ -567,6 +567,40 @@ > > >>> "o": " [%s]" % labels[1], > > >>> } > > >>> > > >>> +def summarize(repo, fcd, fco, fca): > > > > > > I'm sure we can come up with better parameter names here too. > > > > > > fca is probably the "file context ancestor", but I have no idea what fcd > > > and fco are without diving deep into the code. > > > > > >>> + back = None if fcd.isabsent() else \ > > >>> + scmutil.origpath(repo.ui, repo, repo.wjoin(fcd.path())) > > > > > > It's unclear to me what 'back' will be used for. Can you add a comment? > > > (is it the "back"up -- the .orig file?) > > > > > >>> + > > >>> + def sum(ctx): > > > > > > I'd name this flags() instead of sum > > > > > >>> + return { > > >>> + 'contents': ctx.data(), > > >>> + 'isexec': ctx.isexec(), > > >>> + 'issymlink': ctx.islink(), > > >>> + } > > >>> + > > >>> + ours = None > > > > > > 'ours' is a git thing, not a mercurial thing. Let's use consitent > > > hg-internal naming if possible: > > > * local > > > * other > > > * base > > > > > >>> + if back and util.filestat(back).stat: > > >>> + # Replicate the schema of `base` and `other` for `ours`. > > >>> Since you can > > >>> + # start a merge with a dirty working copy, `ours` must > > >>> reflect that, > > >>> + # not the underlying commit. That's why we look at .orig > > >>> version. > > >>> + ours = { > > >>> + 'path': back, > > >>> + 'contents': util.readfile(back), > > >>> + 'isexec': util.isexec(back), > > >>> + 'issymlink': util.statislink(util.filestat(back).stat) > > >>> + } > > >>> + > > >>> + output = sum(fcd) > > >>> + output['path'] = repo.wjoin(fcd.path()) > > >>> + > > >>> + return { > > >>> + 'path': fcd.path(), > > >>> + 'base': sum(fca), > > >>> + 'other': sum(fco), > > >>> + 'output': output, > > >>> + 'ours': ours, > > > > > > It's even more important not to introduce a new non-hg term in the > > > user-visible output. > > > > > >>> + } > > >>> + > > >>> def _filemerge(premerge, repo, mynode, orig, fcd, fco, fca, > > >>> labels=None): > > >>> """perform a 3-way merge in the working directory > > >>> > > >>> diff --git a/mercurial/merge.py b/mercurial/merge.py > > >>> --- a/mercurial/merge.py > > >>> +++ b/mercurial/merge.py > > >>> @@ -456,6 +456,24 @@ > > >>> def extras(self, filename): > > >>> return self._stateextras.setdefault(filename, {}) > > >>> > > >>> + def _internaldump(self, dfile, wctx): > > >>> + if self[dfile] in 'rd': > > >>> + return None > > >>> + stateentry = self._state[dfile] > > >>> + state, hash, lfile, afile, anode, ofile, onode, flags = > > >>> stateentry > > >>> + octx = self._repo[self._other] > > >>> + extras = self.extras(dfile) > > >>> + anccommitnode = extras.get('ancestorlinknode') > > >>> + if anccommitnode: > > >>> + actx = self._repo[anccommitnode] > > >>> + else: > > >>> + actx = None > > >>> + fcd = self._filectxorabsent(hash, wctx, dfile) > > >>> + fco = self._filectxorabsent(onode, octx, ofile) > > >>> + fca = self._repo.filectx(afile, fileid=anode, changeid=actx) > > >>> + > > >>> + return filemerge.summarize(self._repo, fcd, fco, fca) > > >>> + > > > > > > It seems weird to me that _internaldump() (with an underscore) is called > > > from outside the module and summarize() (without an underscore) is > > > called from inside the module. > > > > > >>> def _resolve(self, preresolve, dfile, wctx): > > >>> """rerun merge process for file path `dfile`""" > > >>> if self[dfile] in 'rd': > > >>> @@ -543,6 +561,9 @@ > > >>> Returns whether the merge is complete, and the exit code.""" > > >>> return self._resolve(True, dfile, wctx) > > >>> > > >>> + def internaldump(self, dfile, wctx): > > >>> + return self._internaldump(dfile, wctx) > > >> > > >> Why have the _ function and the normal function? > > >> > > >>> + > > >>> def resolve(self, dfile, wctx): > > >>> """run merge process (assuming premerge was run) for dfile > > >>> > > >>> diff --git a/tests/test-resolve-prep.t b/tests/test-resolve-prep.t > > >>> new file mode 100644 > > >>> --- /dev/null > > >>> +++ b/tests/test-resolve-prep.t > > >> > > >> Maybe needs a new name since we're no longer calling this --prep. Same > > >> for references to prep inside the test > > >> > > >>> @@ -0,0 +1,75 @@ > > >>> +1) Make the repo > > >>> + $ hg init > > >>> + > > >>> +2) Can't run prep outside a conflict > > >>> + $ hg resolve --tool internal:dumpjson > > >>> + abort: no files or directories specified > > >>> + (use --all to re-merge all unresolved files) > > >>> + [255] > > >>> + > > >>> +3) Make a simple conflict > > >>> + $ echo "Unconflicted base, F1" > F1 > > >>> + $ echo "Unconflicted base, F2" > F2 > > >>> + $ hg commit -Aqm "initial commit" > > >>> + $ echo "First conflicted version, F1" > F1 > > >>> + $ echo "First conflicted version, F2" > F2 > > >>> + $ hg commit -m "first version, a" > > >>> + $ hg bookmark a > > >>> + $ hg checkout .~1 > > >>> + 2 files updated, 0 files merged, 0 files removed, 0 files unresolved > > >>> + (leaving bookmark a) > > >>> + $ echo "Second conflicted version, F1" > F1 > > >>> + $ echo "Second conflicted version, F2" > F2 > > >>> + $ hg commit -m "second version, b" > > >>> + created new head > > >>> + $ hg bookmark b > > >>> + $ hg log -G -T '({rev}) {desc}\nbookmark: {bookmarks}\nfiles: > > >>> {files}\n\n' > > >>> + @ (2) second version, b > > >>> + | bookmark: b > > >>> + | files: F1 F2 > > >>> + | > > >>> + | o (1) first version, a > > >>> + |/ bookmark: a > > >>> + | files: F1 F2 > > >>> + | > > >>> + o (0) initial commit > > >>> + bookmark: > > >>> + files: F1 F2 > > >>> + > > >>> + > > >>> + > > >>> + $ hg merge a > > >>> + merging F1 > > >>> + merging F2 > > >>> + warning: conflicts while merging F1! (edit, then use 'hg resolve > > >>> --mark') > > >>> + warning: conflicts while merging F2! (edit, then use 'hg resolve > > >>> --mark') > > >>> + 0 files updated, 0 files merged, 0 files removed, 2 files unresolved > > >>> + use 'hg resolve' to retry unresolved file merges or 'hg update -C > > >>> .' to abandon > > >>> + [1] > > >>> + > > >>> +5) Get the paths: > > >>> + $ hg resolve --tool internal:dumpjson --all > > >>> + [ > > >>> + { > > >>> + "conflicts": [{"base": {"contents": "Unconflicted base, F1\n", > > >>> "isexec": false, "issymlink": false}, "other": {"contents": "First > > >>> conflicted version, F1\n", "isexec": false, "issymlink": false}, > > >>> "ours": {"contents": "Second conflicted version, F1\n", "isexec": > > >>> false, "issymlink": false, "path": "$TESTTMP/F1.orig"}, "output": > > >>> {"contents": > > >>> "<<<<<<<workingcopy:13124abb51b9b-test:secondversion,b\nSecondconflictedversion,F1\n=======\nFirstconflictedversion,F1\n>>>>>>> > > >>> merge rev: 6dd692b7db4a a - test: first version, a\n", "isexec": > > >>> false, "issymlink": false, "path": "$TESTTMP/F1"}, "path": "F1"}, > > >>> {"base": {"contents": "Unconflicted base, F2\n", "isexec": false, > > >>> "issymlink": false}, "other": {"contents": "First conflicted version, > > >>> F2\n", "isexec": false, "issymlink": false}, "ours": {"contents": > > >>> "Second conflicted version, F2\n", "isexec": false, "issymlink": > > >>> false, "path": "$TESTTMP/F2.orig"}, "output": {"contents": > > >>> "<<<<<<<workingcopy:13124abb51b9b-test:secondversion,b\nSecondconflictedversion,F2\n=======\nFirstconflictedversion,F2\n>>>>>>> > > >>> merge rev: 6dd692b7db4a a - test: first version, a\n", "isexec": > > >>> false, "issymlink": false, "path": "$TESTTMP/F2"}, "path": "F2"}] > > >>> + } > > >>> + ] > > >> > > >> Might be nice to check that passing a path to resolve along with > > >> :dumpjson results in it only dumping json for that path. > > >> > > >>> + > > >>> +6) Ensure the paths point to the right contents: > > >>> + $ getcontents() { # Usage: getcontents <path> <version> > > >>> + > local script="import sys, json; print > > >>> json.load(sys.stdin)[0][\"conflicts\"][$1][\"$2\"][\"contents\"]" > > >>> + > local result=$(hg resolve --tool internal:dumpjson --all | > > >>> python -c "$script") > > >>> + > echo "$result" > > >>> + > } > > >>> + $ echo $(getcontents 0 "base") > > >>> + Unconflicted base, F1 > > >>> + $ echo $(getcontents 0 "other") > > >>> + First conflicted version, F1 > > >>> + $ echo $(getcontents 0 "ours") > > >>> + Second conflicted version, F1 > > >>> + $ echo $(getcontents 1 "base") > > >>> + Unconflicted base, F2 > > >>> + $ echo $(getcontents 1 "other") > > >>> + First conflicted version, F2 > > >>> + $ echo $(getcontents 1 "ours") > > >>> + Second conflicted version, F2 > > > > > > I'd like to see tests for more of the "weird" conflicts that we went > > > over in the table a couple days ago. > > > > > > I also think this patch should be split up to make it smaller and easier > > > to review: My suggestions: > > > > > > * a patch introducing summarize > > > * a patch introducing internaldump (but probably renamed because > > > _internaldump really sounds like I shouldn't be calling it) > > > * a patch introducing the new merge tool and the tests > > > > > > I'm excited for this behavior!
I agree with pretty much all of that. Short standardized names that represent universal concepts in the codebase (like p1) are very useful. To steal Siracusa's description of Perl, it is a bit like Huffman coding: make the common case easy and short, and the more unusual case longer and more descriptive. From: Jun Wu Sent: Thursday, March 16, 2017 8:15 PM To: Jun Wu Cc: Phil Cohen; Ryan McElroy; Durham Goode; mercurial-devel Subject: Re: [PATCH V2] merge: add `internal:dumpjson` tool to `resolve`, which outputs conflict state Fix a cat typing. Excerpts from Jun Wu's message of 2017-03-16 20:12:28 -0700: > Just to add some notes on variable names. There are some very common short > names. Like "p1" representing "parent1". And "p1" should be preferred. An > incomplete list is at [1]. It's okay to use "fname" hetlridvnvrtrtf ^^^^^^^^^^^^^^^ or "fn" instead of > "filename". > > FWIW, Ruby community standard prefers longer names while Golang explicitly > prefers shorter names [2]. I think both are reasonable, as long as the code > is consistent. > > For less-common things like "fca", "fco", etc. Longer names are definitely > better. For common objects like "matcher", "manifest", I think either is > okay. Since we are unlikely to rename "p1" to "parent1", I personally prefer > shorter ones for unambiguous common objects, especially when it can reduce > line wraps. > > [1]: https://www.mercurial-scm.org/wiki/CodingStyle CodingStyle - Mercurial www.mercurial-scm.org 1. Introduction. This page is intended to save new developers a few round-trips when contributing changes. It doesn't cover everything, but it does cover some of the ... > [2]: https://github.com/golang/go/wiki/CodeReviewComments#variable-names https://avatars1.githubusercontent.com/u/4314092?v=3&s=400 CodeReviewComments · golang/go Wiki · GitHub github.com The former declares a nil slice value, while the latter is non-nil but zero-length. They are functionally equivalent—their len and cap are both zero—but the nil ... > > Excerpts from Phil Cohen's message of 2017-03-16 19:32:44 -0700: > > Thanks a lot for the points re: variable names! I'm actually really > > happy to hear that -- these names were taken from elsewhere in the > > codebase, and they were very confusing. I have forgotten/had to relearn > > the difference between fcd/fca/fco/etc several times. Glad to hear that > > slightly longer/clearer names are encouraged. > > > > On 3/15/17 11:20 PM, Ryan McElroy wrote: > > > More nits inline to consider since you're looking into a v3 anyway... > > > > > > > > > On 3/15/17 10:48 AM, Durham Goode wrote: > > >> > > >> > > >> On 3/7/17 11:40 AM, Phil Cohen wrote: > > >>> # HG changeset patch > > >>> # User Phil Cohen <phillco@fb.com> > > >>> # Date 1488915535 28800 > > >>> # Tue Mar 07 11:38:55 2017 -0800 > > >>> # Node ID bbce62e3790220f19e7b37160a2f8351b7461272 > > >>> # Parent 91e86a6c61c0c6a3b554eefeba9060000311aa29 > > >>> merge: add `internal:dumpjson` tool to `resolve`, which outputs > > >>> conflict state > > >>> > > >>> This supersedes a previous change which added a --prep option. > > >> > > >> Generally we don't reference previous iterations of the patch in the > > >> commit message, since those previous iterations wont' show up in the > > >> repo after this is pushed. > > >> > > >>> Normally, `hg resolve` takes the user on a whistle-stop tour of each > > >>> conflicted > > >>> file, pausing to launch the editor to resolve the conflicts. This is an > > >>> alternative workflow for resolving many conflicts in a random-access > > >>> fashion. It > > >>> doesn't change/replace the default behavior. > > >>> > > >>> This commit adds `--tool=internal:dumpjson`. It prints, for each > > >>> conflict, the > > >>> "base", "other", and "ours" versions (the contents, as well as their > > >>> exec/link > > >>> flags), and where the user/tool should write a resolved version > > >>> (i.e., the > > >>> working copy) as JSON. The user will then resolve the conflicts at > > >>> their leisure > > >>> and run `hg resolve --mark`. > > >> > > >> Overall I think it looks good. Some minor comments inline, but I'd be > > >> ok having this queued (since it's been sitting here a while) and > > >> addressing the nits in a follow up. > > >> > > >>> diff --git a/mercurial/commands.py b/mercurial/commands.py > > >>> --- a/mercurial/commands.py > > >>> +++ b/mercurial/commands.py > > >>> @@ -33,6 +33,7 @@ > > >>> error, > > >>> exchange, > > >>> extensions, > > >>> + formatter, > > >>> graphmod, > > >>> hbisect, > > >>> help, > > >>> @@ -4284,6 +4285,26 @@ > > >>> fm.end() > > >>> return 0 > > >>> > > >>> + if opts.get('tool', '') == "internal:dumpjson": > > >>> + fm = ui.formatter('resolve', {'template': 'json'}) > > >>> + ms = mergemod.mergestate.read(repo) > > > > > > I don't love the short name style hg uses, and I don't see a godo reason > > > to use it unless there's already a strong-established precedent in the > > > file. > > > > > > We've been calling formatters 'fm' for a long time, but 'ms' as > > > mergestate seems like a new one (to me at least). I'd love a more > > > descriptive variable name since we have the option right now > > > > > >>> + m = scmutil.match(repo[None], pats, opts) > > > > > > Would prefer this be called 'matcher' > > > > > >>> + wctx = repo[None] > > >>> + > > >>> + paths = [] > > >>> + for f in ms: > > > > > > Then this could become: > > > > > > for filename in mergestate: > > > > > > And then it's obvious from just reading the loop what it is doing > > > > > >>> + if not m(f): > > > > > > And this would become: > > > > > > if not matcher(filename): > > > > > > Which is also much more clear to me at least. > > > > > >>> + continue > > >>> + > > >>> + val = ms.internaldump(f, wctx) > > >>> + if val is not None: > > >>> + paths.append(val) > > >>> + > > >>> + fm.startitem() > > >>> + fm.write('conflicts', '%s\n', paths) > > >>> + fm.end() > > >>> + return 0 > > >>> + > > >>> with repo.wlock(): > > >>> ms = mergemod.mergestate.read(repo) > > >>> > > >>> diff --git a/mercurial/filemerge.py b/mercurial/filemerge.py > > >>> --- a/mercurial/filemerge.py > > >>> +++ b/mercurial/filemerge.py > > >>> @@ -567,6 +567,40 @@ > > >>> "o": " [%s]" % labels[1], > > >>> } > > >>> > > >>> +def summarize(repo, fcd, fco, fca): > > > > > > I'm sure we can come up with better parameter names here too. > > > > > > fca is probably the "file context ancestor", but I have no idea what fcd > > > and fco are without diving deep into the code. > > > > > >>> + back = None if fcd.isabsent() else \ > > >>> + scmutil.origpath(repo.ui, repo, repo.wjoin(fcd.path())) > > > > > > It's unclear to me what 'back' will be used for. Can you add a comment? > > > (is it the "back"up -- the .orig file?) > > > > > >>> + > > >>> + def sum(ctx): > > > > > > I'd name this flags() instead of sum > > > > > >>> + return { > > >>> + 'contents': ctx.data(), > > >>> + 'isexec': ctx.isexec(), > > >>> + 'issymlink': ctx.islink(), > > >>> + } > > >>> + > > >>> + ours = None > > > > > > 'ours' is a git thing, not a mercurial thing. Let's use consitent > > > hg-internal naming if possible: > > > * local > > > * other > > > * base > > > > > >>> + if back and util.filestat(back).stat: > > >>> + # Replicate the schema of `base` and `other` for `ours`. > > >>> Since you can > > >>> + # start a merge with a dirty working copy, `ours` must > > >>> reflect that, > > >>> + # not the underlying commit. That's why we look at .orig > > >>> version. > > >>> + ours = { > > >>> + 'path': back, > > >>> + 'contents': util.readfile(back), > > >>> + 'isexec': util.isexec(back), > > >>> + 'issymlink': util.statislink(util.filestat(back).stat) > > >>> + } > > >>> + > > >>> + output = sum(fcd) > > >>> + output['path'] = repo.wjoin(fcd.path()) > > >>> + > > >>> + return { > > >>> + 'path': fcd.path(), > > >>> + 'base': sum(fca), > > >>> + 'other': sum(fco), > > >>> + 'output': output, > > >>> + 'ours': ours, > > > > > > It's even more important not to introduce a new non-hg term in the > > > user-visible output. > > > > > >>> + } > > >>> + > > >>> def _filemerge(premerge, repo, mynode, orig, fcd, fco, fca, > > >>> labels=None): > > >>> """perform a 3-way merge in the working directory > > >>> > > >>> diff --git a/mercurial/merge.py b/mercurial/merge.py > > >>> --- a/mercurial/merge.py > > >>> +++ b/mercurial/merge.py > > >>> @@ -456,6 +456,24 @@ > > >>> def extras(self, filename): > > >>> return self._stateextras.setdefault(filename, {}) > > >>> > > >>> + def _internaldump(self, dfile, wctx): > > >>> + if self[dfile] in 'rd': > > >>> + return None > > >>> + stateentry = self._state[dfile] > > >>> + state, hash, lfile, afile, anode, ofile, onode, flags = > > >>> stateentry > > >>> + octx = self._repo[self._other] > > >>> + extras = self.extras(dfile) > > >>> + anccommitnode = extras.get('ancestorlinknode') > > >>> + if anccommitnode: > > >>> + actx = self._repo[anccommitnode] > > >>> + else: > > >>> + actx = None > > >>> + fcd = self._filectxorabsent(hash, wctx, dfile) > > >>> + fco = self._filectxorabsent(onode, octx, ofile) > > >>> + fca = self._repo.filectx(afile, fileid=anode, changeid=actx) > > >>> + > > >>> + return filemerge.summarize(self._repo, fcd, fco, fca) > > >>> + > > > > > > It seems weird to me that _internaldump() (with an underscore) is called > > > from outside the module and summarize() (without an underscore) is > > > called from inside the module. > > > > > >>> def _resolve(self, preresolve, dfile, wctx): > > >>> """rerun merge process for file path `dfile`""" > > >>> if self[dfile] in 'rd': > > >>> @@ -543,6 +561,9 @@ > > >>> Returns whether the merge is complete, and the exit code.""" > > >>> return self._resolve(True, dfile, wctx) > > >>> > > >>> + def internaldump(self, dfile, wctx): > > >>> + return self._internaldump(dfile, wctx) > > >> > > >> Why have the _ function and the normal function? > > >> > > >>> + > > >>> def resolve(self, dfile, wctx): > > >>> """run merge process (assuming premerge was run) for dfile > > >>> > > >>> diff --git a/tests/test-resolve-prep.t b/tests/test-resolve-prep.t > > >>> new file mode 100644 > > >>> --- /dev/null > > >>> +++ b/tests/test-resolve-prep.t > > >> > > >> Maybe needs a new name since we're no longer calling this --prep. Same > > >> for references to prep inside the test > > >> > > >>> @@ -0,0 +1,75 @@ > > >>> +1) Make the repo > > >>> + $ hg init > > >>> + > > >>> +2) Can't run prep outside a conflict > > >>> + $ hg resolve --tool internal:dumpjson > > >>> + abort: no files or directories specified > > >>> + (use --all to re-merge all unresolved files) > > >>> + [255] > > >>> + > > >>> +3) Make a simple conflict > > >>> + $ echo "Unconflicted base, F1" > F1 > > >>> + $ echo "Unconflicted base, F2" > F2 > > >>> + $ hg commit -Aqm "initial commit" > > >>> + $ echo "First conflicted version, F1" > F1 > > >>> + $ echo "First conflicted version, F2" > F2 > > >>> + $ hg commit -m "first version, a" > > >>> + $ hg bookmark a > > >>> + $ hg checkout .~1 > > >>> + 2 files updated, 0 files merged, 0 files removed, 0 files unresolved > > >>> + (leaving bookmark a) > > >>> + $ echo "Second conflicted version, F1" > F1 > > >>> + $ echo "Second conflicted version, F2" > F2 > > >>> + $ hg commit -m "second version, b" > > >>> + created new head > > >>> + $ hg bookmark b > > >>> + $ hg log -G -T '({rev}) {desc}\nbookmark: {bookmarks}\nfiles: > > >>> {files}\n\n' > > >>> + @ (2) second version, b > > >>> + | bookmark: b > > >>> + | files: F1 F2 > > >>> + | > > >>> + | o (1) first version, a > > >>> + |/ bookmark: a > > >>> + | files: F1 F2 > > >>> + | > > >>> + o (0) initial commit > > >>> + bookmark: > > >>> + files: F1 F2 > > >>> + > > >>> + > > >>> + > > >>> + $ hg merge a > > >>> + merging F1 > > >>> + merging F2 > > >>> + warning: conflicts while merging F1! (edit, then use 'hg resolve > > >>> --mark') > > >>> + warning: conflicts while merging F2! (edit, then use 'hg resolve > > >>> --mark') > > >>> + 0 files updated, 0 files merged, 0 files removed, 2 files unresolved > > >>> + use 'hg resolve' to retry unresolved file merges or 'hg update -C > > >>> .' to abandon > > >>> + [1] > > >>> + > > >>> +5) Get the paths: > > >>> + $ hg resolve --tool internal:dumpjson --all > > >>> + [ > > >>> + { > > >>> + "conflicts": [{"base": {"contents": "Unconflicted base, F1\n", > > >>> "isexec": false, "issymlink": false}, "other": {"contents": "First > > >>> conflicted version, F1\n", "isexec": false, "issymlink": false}, > > >>> "ours": {"contents": "Second conflicted version, F1\n", "isexec": > > >>> false, "issymlink": false, "path": "$TESTTMP/F1.orig"}, "output": > > >>> {"contents": > > >>> "<<<<<<<workingcopy:13124abb51b9b-test:secondversion,b\nSecondconflictedversion,F1\n=======\nFirstconflictedversion,F1\n>>>>>>> > > >>> merge rev: 6dd692b7db4a a - test: first version, a\n", "isexec": > > >>> false, "issymlink": false, "path": "$TESTTMP/F1"}, "path": "F1"}, > > >>> {"base": {"contents": "Unconflicted base, F2\n", "isexec": false, > > >>> "issymlink": false}, "other": {"contents": "First conflicted version, > > >>> F2\n", "isexec": false, "issymlink": false}, "ours": {"contents": > > >>> "Second conflicted version, F2\n", "isexec": false, "issymlink": > > >>> false, "path": "$TESTTMP/F2.orig"}, "output": {"contents": > > >>> "<<<<<<<workingcopy:13124abb51b9b-test:secondversion,b\nSecondconflictedversion,F2\n=======\nFirstconflictedversion,F2\n>>>>>>> > > >>> merge rev: 6dd692b7db4a a - test: first version, a\n", "isexec": > > >>> false, "issymlink": false, "path": "$TESTTMP/F2"}, "path": "F2"}] > > >>> + } > > >>> + ] > > >> > > >> Might be nice to check that passing a path to resolve along with > > >> :dumpjson results in it only dumping json for that path. > > >> > > >>> + > > >>> +6) Ensure the paths point to the right contents: > > >>> + $ getcontents() { # Usage: getcontents <path> <version> > > >>> + > local script="import sys, json; print > > >>> json.load(sys.stdin)[0][\"conflicts\"][$1][\"$2\"][\"contents\"]" > > >>> + > local result=$(hg resolve --tool internal:dumpjson --all | > > >>> python -c "$script") > > >>> + > echo "$result" > > >>> + > } > > >>> + $ echo $(getcontents 0 "base") > > >>> + Unconflicted base, F1 > > >>> + $ echo $(getcontents 0 "other") > > >>> + First conflicted version, F1 > > >>> + $ echo $(getcontents 0 "ours") > > >>> + Second conflicted version, F1 > > >>> + $ echo $(getcontents 1 "base") > > >>> + Unconflicted base, F2 > > >>> + $ echo $(getcontents 1 "other") > > >>> + First conflicted version, F2 > > >>> + $ echo $(getcontents 1 "ours") > > >>> + Second conflicted version, F2 > > > > > > I'd like to see tests for more of the "weird" conflicts that we went > > > over in the table a couple days ago. > > > > > > I also think this patch should be split up to make it smaller and easier > > > to review: My suggestions: > > > > > > * a patch introducing summarize > > > * a patch introducing internaldump (but probably renamed because > > > _internaldump really sounds like I shouldn't be calling it) > > > * a patch introducing the new merge tool and the tests > > > > > > I'm excited for this behavior!
On Tue, 7 Mar 2017 11:40:59 -0800, Phil Cohen wrote: > # HG changeset patch > # User Phil Cohen <phillco@fb.com> > # Date 1488915535 28800 > # Tue Mar 07 11:38:55 2017 -0800 > # Node ID bbce62e3790220f19e7b37160a2f8351b7461272 > # Parent 91e86a6c61c0c6a3b554eefeba9060000311aa29 > merge: add `internal:dumpjson` tool to `resolve`, which outputs conflict state > + if opts.get('tool', '') == "internal:dumpjson": > + fm = ui.formatter('resolve', {'template': 'json'}) > + ms = mergemod.mergestate.read(repo) > + m = scmutil.match(repo[None], pats, opts) > + wctx = repo[None] > + > + paths = [] > + for f in ms: > + if not m(f): > + continue > + > + val = ms.internaldump(f, wctx) > + if val is not None: > + paths.append(val) > + > + fm.startitem() > + fm.write('conflicts', '%s\n', paths) > + fm.end() It doesn't make sense to use the formatter to dump a single object. Perhaps formatter._jsonifyobj() can be a public utility function.
> It doesn't make sense to use the formatter to dump a single object. Perhaps > formatter._jsonifyobj() can be a public utility function. That's a good idea. If there are no objections I'll send that out, either as the first change in the next version or a isolated change (probably the former so I can depend on it here). On Fri, Mar 17, 2017 at 7:08 AM, Yuya Nishihara <yuya@tcha.org> wrote: > On Tue, 7 Mar 2017 11:40:59 -0800, Phil Cohen wrote: >> # HG changeset patch >> # User Phil Cohen <phillco@fb.com> >> # Date 1488915535 28800 >> # Tue Mar 07 11:38:55 2017 -0800 >> # Node ID bbce62e3790220f19e7b37160a2f8351b7461272 >> # Parent 91e86a6c61c0c6a3b554eefeba9060000311aa29 >> merge: add `internal:dumpjson` tool to `resolve`, which outputs conflict state > >> + if opts.get('tool', '') == "internal:dumpjson": >> + fm = ui.formatter('resolve', {'template': 'json'}) >> + ms = mergemod.mergestate.read(repo) >> + m = scmutil.match(repo[None], pats, opts) >> + wctx = repo[None] >> + >> + paths = [] >> + for f in ms: >> + if not m(f): >> + continue >> + >> + val = ms.internaldump(f, wctx) >> + if val is not None: >> + paths.append(val) >> + >> + fm.startitem() >> + fm.write('conflicts', '%s\n', paths) >> + fm.end() > > It doesn't make sense to use the formatter to dump a single object. Perhaps > formatter._jsonifyobj() can be a public utility function. > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Patch
diff --git a/mercurial/commands.py b/mercurial/commands.py --- a/mercurial/commands.py +++ b/mercurial/commands.py @@ -33,6 +33,7 @@ error, exchange, extensions, + formatter, graphmod, hbisect, help, @@ -4284,6 +4285,26 @@ fm.end() return 0 + if opts.get('tool', '') == "internal:dumpjson": + fm = ui.formatter('resolve', {'template': 'json'}) + ms = mergemod.mergestate.read(repo) + m = scmutil.match(repo[None], pats, opts) + wctx = repo[None] + + paths = [] + for f in ms: + if not m(f): + continue + + val = ms.internaldump(f, wctx) + if val is not None: + paths.append(val) + + fm.startitem() + fm.write('conflicts', '%s\n', paths) + fm.end() + return 0 + with repo.wlock(): ms = mergemod.mergestate.read(repo) diff --git a/mercurial/filemerge.py b/mercurial/filemerge.py --- a/mercurial/filemerge.py +++ b/mercurial/filemerge.py @@ -567,6 +567,40 @@ "o": " [%s]" % labels[1], } +def summarize(repo, fcd, fco, fca): + back = None if fcd.isabsent() else \ + scmutil.origpath(repo.ui, repo, repo.wjoin(fcd.path())) + + def sum(ctx): + return { + 'contents': ctx.data(), + 'isexec': ctx.isexec(), + 'issymlink': ctx.islink(), + } + + ours = None + if back and util.filestat(back).stat: + # Replicate the schema of `base` and `other` for `ours`. Since you can + # start a merge with a dirty working copy, `ours` must reflect that, + # not the underlying commit. That's why we look at .orig version. + ours = { + 'path': back, + 'contents': util.readfile(back), + 'isexec': util.isexec(back), + 'issymlink': util.statislink(util.filestat(back).stat) + } + + output = sum(fcd) + output['path'] = repo.wjoin(fcd.path()) + + return { + 'path': fcd.path(), + 'base': sum(fca), + 'other': sum(fco), + 'output': output, + 'ours': ours, + } + def _filemerge(premerge, repo, mynode, orig, fcd, fco, fca, labels=None): """perform a 3-way merge in the working directory diff --git a/mercurial/merge.py b/mercurial/merge.py --- a/mercurial/merge.py +++ b/mercurial/merge.py @@ -456,6 +456,24 @@ def extras(self, filename): return self._stateextras.setdefault(filename, {}) + def _internaldump(self, dfile, wctx): + if self[dfile] in 'rd': + return None + stateentry = self._state[dfile] + state, hash, lfile, afile, anode, ofile, onode, flags = stateentry + octx = self._repo[self._other] + extras = self.extras(dfile) + anccommitnode = extras.get('ancestorlinknode') + if anccommitnode: + actx = self._repo[anccommitnode] + else: + actx = None + fcd = self._filectxorabsent(hash, wctx, dfile) + fco = self._filectxorabsent(onode, octx, ofile) + fca = self._repo.filectx(afile, fileid=anode, changeid=actx) + + return filemerge.summarize(self._repo, fcd, fco, fca) + def _resolve(self, preresolve, dfile, wctx): """rerun merge process for file path `dfile`""" if self[dfile] in 'rd': @@ -543,6 +561,9 @@ Returns whether the merge is complete, and the exit code.""" return self._resolve(True, dfile, wctx) + def internaldump(self, dfile, wctx): + return self._internaldump(dfile, wctx) + def resolve(self, dfile, wctx): """run merge process (assuming premerge was run) for dfile diff --git a/tests/test-resolve-prep.t b/tests/test-resolve-prep.t new file mode 100644 --- /dev/null +++ b/tests/test-resolve-prep.t @@ -0,0 +1,75 @@ +1) Make the repo + $ hg init + +2) Can't run prep outside a conflict + $ hg resolve --tool internal:dumpjson + abort: no files or directories specified + (use --all to re-merge all unresolved files) + [255] + +3) Make a simple conflict + $ echo "Unconflicted base, F1" > F1 + $ echo "Unconflicted base, F2" > F2 + $ hg commit -Aqm "initial commit" + $ echo "First conflicted version, F1" > F1 + $ echo "First conflicted version, F2" > F2 + $ hg commit -m "first version, a" + $ hg bookmark a + $ hg checkout .~1 + 2 files updated, 0 files merged, 0 files removed, 0 files unresolved + (leaving bookmark a) + $ echo "Second conflicted version, F1" > F1 + $ echo "Second conflicted version, F2" > F2 + $ hg commit -m "second version, b" + created new head + $ hg bookmark b + $ hg log -G -T '({rev}) {desc}\nbookmark: {bookmarks}\nfiles: {files}\n\n' + @ (2) second version, b + | bookmark: b + | files: F1 F2 + | + | o (1) first version, a + |/ bookmark: a + | files: F1 F2 + | + o (0) initial commit + bookmark: + files: F1 F2 + + + + $ hg merge a + merging F1 + merging F2 + warning: conflicts while merging F1! (edit, then use 'hg resolve --mark') + warning: conflicts while merging F2! (edit, then use 'hg resolve --mark') + 0 files updated, 0 files merged, 0 files removed, 2 files unresolved + use 'hg resolve' to retry unresolved file merges or 'hg update -C .' to abandon + [1] + +5) Get the paths: + $ hg resolve --tool internal:dumpjson --all + [ + { + "conflicts": [{"base": {"contents": "Unconflicted base, F1\n", "isexec": false, "issymlink": false}, "other": {"contents": "First conflicted version, F1\n", "isexec": false, "issymlink": false}, "ours": {"contents": "Second conflicted version, F1\n", "isexec": false, "issymlink": false, "path": "$TESTTMP/F1.orig"}, "output": {"contents": "<<<<<<< working copy: 13124abb51b9 b - test: second version, b\nSecond conflicted version, F1\n=======\nFirst conflicted version, F1\n>>>>>>> merge rev: 6dd692b7db4a a - test: first version, a\n", "isexec": false, "issymlink": false, "path": "$TESTTMP/F1"}, "path": "F1"}, {"base": {"contents": "Unconflicted base, F2\n", "isexec": false, "issymlink": false}, "other": {"contents": "First conflicted version, F2\n", "isexec": false, "issymlink": false}, "ours": {"contents": "Second conflicted version, F2\n", "isexec": false, "issymlink": false, "path": "$TESTTMP/F2.orig"}, "output": {"contents": "<<<<<<< working copy: 13124abb51b9 b - test: second version, b\nSecond conflicted version, F2\n=======\nFirst conflicted version, F2\n>>>>>>> merge rev: 6dd692b7db4a a - test: first version, a\n", "isexec": false, "issymlink": false, "path": "$TESTTMP/F2"}, "path": "F2"}] + } + ] + +6) Ensure the paths point to the right contents: + $ getcontents() { # Usage: getcontents <path> <version> + > local script="import sys, json; print json.load(sys.stdin)[0][\"conflicts\"][$1][\"$2\"][\"contents\"]" + > local result=$(hg resolve --tool internal:dumpjson --all | python -c "$script") + > echo "$result" + > } + $ echo $(getcontents 0 "base") + Unconflicted base, F1 + $ echo $(getcontents 0 "other") + First conflicted version, F1 + $ echo $(getcontents 0 "ours") + Second conflicted version, F1 + $ echo $(getcontents 1 "base") + Unconflicted base, F2 + $ echo $(getcontents 1 "other") + First conflicted version, F2 + $ echo $(getcontents 1 "ours") + Second conflicted version, F2