Patchwork [1,of,2,RFC] extdiff: use archiver to take snapshots of committed revisions

login
register
mail settings
Submitter Matt Harbison
Date Feb. 9, 2015, 4:01 a.m.
Message ID <6abceaa1a49f82cebd3a.1423454496@Envy>
Download mbox | patch
Permalink /patch/7769/
State Superseded
Commit 68822b7cdd01a84d1c5fa335be492b3706daf510
Headers show

Comments

Matt Harbison - Feb. 9, 2015, 4:01 a.m.
# HG changeset patch
# User Matt Harbison <matt_harbison@yahoo.com>
# Date 1342054131 14400
#      Wed Jul 11 20:48:51 2012 -0400
# Node ID 6abceaa1a49f82cebd3a4f141f69558e2bb3cec4
# Parent  ff5caa8dfd993680d9602ca6ebb14da9de10d5f4
extdiff: use archiver to take snapshots of committed revisions

[This is the proof of concept that Mathias asked for.  The fix for file
archiving from the internal API maybe should be a separate commit.]

There should be no visible functional differences, other than the largefile
standins are no longer included in the non working copy snapshots.  That's
probably not a big deal, and proper largefile support can still be added.  This
is the first step to make -S work.  The full (deep) working copy snapshot needs
to be handled prior to that.  This could probably be improved in the future by
excluding .hgsub and .hgsubstate from status, since that is really just private
bookkeeping info.
Mathias De Maré - Feb. 9, 2015, 7:18 a.m.
On Mon, Feb 9, 2015 at 5:01 AM, Matt Harbison <mharbison72@gmail.com> wrote:

> # HG changeset patch
> # User Matt Harbison <matt_harbison@yahoo.com>
> # Date 1342054131 14400
> #      Wed Jul 11 20:48:51 2012 -0400
> # Node ID 6abceaa1a49f82cebd3a4f141f69558e2bb3cec4
> # Parent  ff5caa8dfd993680d9602ca6ebb14da9de10d5f4
> extdiff: use archiver to take snapshots of committed revisions
>
> [This is the proof of concept that Mathias asked for.  The fix for file
> archiving from the internal API maybe should be a separate commit.]
>

Thanks! :-)

How do you want to proceed with this? Do you want to continue with these
patches yourself, or is it something you don't have time to continue with?

>
> There should be no visible functional differences, other than the largefile
> standins are no longer included in the non working copy snapshots.  That's
> probably not a big deal, and proper largefile support can still be added.
> This
> is the first step to make -S work.  The full (deep) working copy snapshot
> needs
> to be handled prior to that.  This could probably be improved in the
> future by
> excluding .hgsub and .hgsubstate from status, since that is really just
> private
> bookkeeping info.
>
> diff --git a/hgext/extdiff.py b/hgext/extdiff.py
> --- a/hgext/extdiff.py
> +++ b/hgext/extdiff.py
> @@ -63,6 +63,7 @@
>  from mercurial.i18n import _
>  from mercurial.node import short, nullid
>  from mercurial import cmdutil, scmutil, util, commands, encoding,
> filemerge
> +from mercurial import archival
>  import os, shlex, shutil, tempfile, re
>
>  cmdtable = {}
> @@ -80,31 +81,44 @@
>          dirname = '%s.%s' % (dirname, short(node))
>      base = os.path.join(tmproot, dirname)
>      os.mkdir(base)
> +    fns_and_mtime = []
> +
>      if node is not None:
>          ui.note(_('making snapshot of %d files from rev %s\n') %
>                  (len(files), short(node)))
> +
> +        # Use archive to build the snapshot for committed nodes.  (It
> aborts if
> +        # the list is empty.)
> +        if files:
> +            repo.ui.setconfig("ui", "archivemeta", False)
> +
> +            archival.archive(repo, base, node, 'files',
> +                             matchfn=scmutil.matchfiles(repo, files))
>      else:
>          ui.note(_('making snapshot of %d files from working directory\n')
> %
>              (len(files)))
> -    wopener = scmutil.opener(base)
> -    fns_and_mtime = []
> -    ctx = repo[node]
> -    for fn in sorted(files):
> -        wfn = util.pconvert(fn)
> -        if wfn not in ctx:
> -            # File doesn't exist; could be a bogus modify
> -            continue
> -        ui.note('  %s\n' % wfn)
> -        dest = os.path.join(base, wfn)
> -        fctx = ctx[wfn]
> -        data = repo.wwritedata(wfn, fctx.data())
> -        if 'l' in fctx.flags():
> -            wopener.symlink(data, wfn)
> -        else:
> -            wopener.write(wfn, data)
> -            if 'x' in fctx.flags():
> -                util.setflags(dest, False, True)
> -        if node is None:
> +
> +        # TODO: Use filesystem routines to duplicate the relevant parts
> of the
> +        #       working directory instead of this (archive doesn't work
> for
> +        #       wctx).  This will allow any subrepo type and largefiles
> to work
>
It looks like util.copyfile should be able to do this.

> +        wopener = scmutil.opener(base)
> +        ctx = repo[node]
> +        for fn in sorted(files):
> +            wfn = util.pconvert(fn)
> +            if wfn not in ctx:
> +                # File doesn't exist; could be a bogus modify
> +                continue
>
I've been wondering about this: what exactly does this bogus modify mean?
Is this still something that could happen, or a leftover from the past?
Could we ignore it and just copy all the relevant files without worrying
about this?

> +            ui.note('  %s\n' % wfn)
> +            dest = os.path.join(base, wfn)
> +            fctx = ctx[wfn]
> +            data = repo.wwritedata(wfn, fctx.data())
> +            if 'l' in fctx.flags():
> +                wopener.symlink(data, wfn)
> +            else:
> +                wopener.write(wfn, data)
> +                if 'x' in fctx.flags():
> +                    util.setflags(dest, False, True)
>
The checking of the flags will no longer be necessary if we can use
something like copyfile.

> +
>              fns_and_mtime.append((dest, repo.wjoin(fn),
>                                    os.lstat(dest).st_mtime))
>      return dirname, fns_and_mtime
> diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py
> --- a/hgext/largefiles/overrides.py
> +++ b/hgext/largefiles/overrides.py
> @@ -851,7 +851,7 @@
>          repo._lfcommithooks.pop()
>
>  def overridearchive(orig, repo, dest, node, kind, decode=True,
> matchfn=None,
> -            prefix=None, mtime=None, subrepos=None):
> +            prefix='', mtime=None, subrepos=None):
>      # No need to lock because we are only reading history and
>      # largefile caches, neither of which are modified.
>      lfcommands.cachelfiles(repo.ui, repo, node)
> diff --git a/mercurial/archival.py b/mercurial/archival.py
> --- a/mercurial/archival.py
> +++ b/mercurial/archival.py
> @@ -230,7 +230,7 @@
>      }
>
>  def archive(repo, dest, node, kind, decode=True, matchfn=None,
> -            prefix=None, mtime=None, subrepos=False):
> +            prefix='', mtime=None, subrepos=False):
>      '''create archive of repo as it was at node.
>
>      dest can be name of directory, name of archive file, or file
> diff --git a/tests/test-subrepo-deep-nested-change.t
> b/tests/test-subrepo-deep-nested-change.t
> --- a/tests/test-subrepo-deep-nested-change.t
> +++ b/tests/test-subrepo-deep-nested-change.t
> @@ -419,4 +419,47 @@
>    A a.dat
>    A a.txt
>
> +  $ cat <<EOF >> $HGRCPATH
> +  > [extdiff]
> +  > cmd.falabala = echo
> +  > opts.falabala = diffing
> +  > EOF
> +
> +Interaction with extdiff, largefiles and subrepos
> +
> +  $ hg --config extensions.extdiff= extdiff
> +  diff -Npru cloned.6e907bf12afc/.hglf/a.dat cloned/.hglf/a.dat
> +  --- cloned.6e907bf12afc/.hglf/a.dat  1970-01-01 00:00:00 +0000
> +  +++ cloned/.hglf/a.dat       * (glob)
> +  @@ -0,0 +1 @@
> +  +
> +  [1]
> +
> +  $ hg --config extensions.extdiff= extdiff -r .^
> +  diff -Npru cloned.1f79fbad5d0f/.hglf/a.dat cloned/.hglf/a.dat
> +  --- cloned.1f79fbad5d0f/.hglf/a.dat  1970-01-01 00:00:00 +0000
> +  +++ cloned/.hglf/a.dat       * (glob)
> +  @@ -0,0 +1 @@
> +  +
> +  diff -Npru cloned.1f79fbad5d0f/.hglf/foo/bar/large.dat
> cloned/.hglf/foo/bar/large.dat
> +  --- cloned.1f79fbad5d0f/.hglf/foo/bar/large.dat      1970-01-01
> 00:00:00 +0000
> +  +++ cloned/.hglf/foo/bar/large.dat   * (glob)
> +  @@ -0,0 +1 @@
> +  +2f6933b5ee0f5fdd823d9717d8729f3c2523811b
> +  diff -Npru cloned.1f79fbad5d0f/foo/bar/abc cloned/foo/bar/abc
> +  --- cloned.1f79fbad5d0f/foo/bar/abc  * (glob)
> +  +++ cloned/foo/bar/abc       * (glob)
> +  @@ -0,0 +1 @@
> +  +changed
> +  [1]
> +
> +  $ hg --config extensions.extdiff= extdiff -r 0 -r .^
> +  diff -Npru cloned.7f491f53a367/.hgsubstate
> cloned.1f79fbad5d0f/.hgsubstate
> +  --- cloned.7f491f53a367/.hgsubstate  * (glob)
> +  +++ cloned.1f79fbad5d0f/.hgsubstate  * (glob)
> +  @@ -1 +1 @@
> +  -fc3b4ce2696f7741438c79207583768f2ce6b0dd sub1
> +  +7a36fa02b66e61f27f3d4a822809f159479b8ab2 sub1
> +  [1]
> +
>    $ cd ..
>
Augie Fackler - Feb. 9, 2015, 11:01 p.m.
On Sun, Feb 08, 2015 at 11:01:36PM -0500, Matt Harbison wrote:
> # HG changeset patch
> # User Matt Harbison <matt_harbison@yahoo.com>
> # Date 1342054131 14400
> #      Wed Jul 11 20:48:51 2012 -0400
> # Node ID 6abceaa1a49f82cebd3a4f141f69558e2bb3cec4
> # Parent  ff5caa8dfd993680d9602ca6ebb14da9de10d5f4
> extdiff: use archiver to take snapshots of committed revisions

This seems like a decent approach.

>
> [This is the proof of concept that Mathias asked for.  The fix for file
> archiving from the internal API maybe should be a separate commit.]

Yes, I think it should.

>
> There should be no visible functional differences, other than the largefile
> standins are no longer included in the non working copy snapshots.  That's
> probably not a big deal, and proper largefile support can still be added.
>

I don't speak largefiles. Does this mean that the phony files won't be
included, but the actual largefile content will? For extdiff, that's
arguably the right behavior?

> This
> is the first step to make -S work.  The full (deep) working copy snapshot needs
> to be handled prior to that.  This could probably be improved in the future by
> excluding .hgsub and .hgsubstate from status, since that is really just private
> bookkeeping info.

It probably makes sense to keep extdiff and 'hg diff' output at least
somewhat related?

>
> diff --git a/hgext/extdiff.py b/hgext/extdiff.py
> --- a/hgext/extdiff.py
> +++ b/hgext/extdiff.py
> @@ -63,6 +63,7 @@
>  from mercurial.i18n import _
>  from mercurial.node import short, nullid
>  from mercurial import cmdutil, scmutil, util, commands, encoding, filemerge
> +from mercurial import archival
>  import os, shlex, shutil, tempfile, re
>
>  cmdtable = {}
> @@ -80,31 +81,44 @@
>          dirname = '%s.%s' % (dirname, short(node))
>      base = os.path.join(tmproot, dirname)
>      os.mkdir(base)
> +    fns_and_mtime = []
> +
>      if node is not None:
>          ui.note(_('making snapshot of %d files from rev %s\n') %
>                  (len(files), short(node)))
> +
> +        # Use archive to build the snapshot for committed nodes.  (It aborts if
> +        # the list is empty.)
> +        if files:
> +            repo.ui.setconfig("ui", "archivemeta", False)
> +
> +            archival.archive(repo, base, node, 'files',
> +                             matchfn=scmutil.matchfiles(repo, files))
>      else:
>          ui.note(_('making snapshot of %d files from working directory\n') %
>              (len(files)))
> -    wopener = scmutil.opener(base)
> -    fns_and_mtime = []
> -    ctx = repo[node]
> -    for fn in sorted(files):
> -        wfn = util.pconvert(fn)
> -        if wfn not in ctx:
> -            # File doesn't exist; could be a bogus modify
> -            continue
> -        ui.note('  %s\n' % wfn)
> -        dest = os.path.join(base, wfn)
> -        fctx = ctx[wfn]
> -        data = repo.wwritedata(wfn, fctx.data())
> -        if 'l' in fctx.flags():
> -            wopener.symlink(data, wfn)
> -        else:
> -            wopener.write(wfn, data)
> -            if 'x' in fctx.flags():
> -                util.setflags(dest, False, True)
> -        if node is None:
> +
> +        # TODO: Use filesystem routines to duplicate the relevant parts of the
> +        #       working directory instead of this (archive doesn't work for
> +        #       wctx).  This will allow any subrepo type and largefiles to work
> +        wopener = scmutil.opener(base)
> +        ctx = repo[node]
> +        for fn in sorted(files):
> +            wfn = util.pconvert(fn)
> +            if wfn not in ctx:
> +                # File doesn't exist; could be a bogus modify
> +                continue
> +            ui.note('  %s\n' % wfn)
> +            dest = os.path.join(base, wfn)
> +            fctx = ctx[wfn]
> +            data = repo.wwritedata(wfn, fctx.data())
> +            if 'l' in fctx.flags():
> +                wopener.symlink(data, wfn)
> +            else:
> +                wopener.write(wfn, data)
> +                if 'x' in fctx.flags():
> +                    util.setflags(dest, False, True)
> +
>              fns_and_mtime.append((dest, repo.wjoin(fn),
>                                    os.lstat(dest).st_mtime))
>      return dirname, fns_and_mtime
> diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py
> --- a/hgext/largefiles/overrides.py
> +++ b/hgext/largefiles/overrides.py
> @@ -851,7 +851,7 @@
>          repo._lfcommithooks.pop()
>
>  def overridearchive(orig, repo, dest, node, kind, decode=True, matchfn=None,
> -            prefix=None, mtime=None, subrepos=None):
> +            prefix='', mtime=None, subrepos=None):
>      # No need to lock because we are only reading history and
>      # largefile caches, neither of which are modified.
>      lfcommands.cachelfiles(repo.ui, repo, node)
> diff --git a/mercurial/archival.py b/mercurial/archival.py
> --- a/mercurial/archival.py
> +++ b/mercurial/archival.py
> @@ -230,7 +230,7 @@
>      }
>
>  def archive(repo, dest, node, kind, decode=True, matchfn=None,
> -            prefix=None, mtime=None, subrepos=False):
> +            prefix='', mtime=None, subrepos=False):
>      '''create archive of repo as it was at node.
>
>      dest can be name of directory, name of archive file, or file
> diff --git a/tests/test-subrepo-deep-nested-change.t b/tests/test-subrepo-deep-nested-change.t
> --- a/tests/test-subrepo-deep-nested-change.t
> +++ b/tests/test-subrepo-deep-nested-change.t
> @@ -419,4 +419,47 @@
>    A a.dat
>    A a.txt
>
> +  $ cat <<EOF >> $HGRCPATH
> +  > [extdiff]
> +  > cmd.falabala = echo
> +  > opts.falabala = diffing
> +  > EOF
> +
> +Interaction with extdiff, largefiles and subrepos
> +
> +  $ hg --config extensions.extdiff= extdiff
> +  diff -Npru cloned.6e907bf12afc/.hglf/a.dat cloned/.hglf/a.dat
> +  --- cloned.6e907bf12afc/.hglf/a.dat	1970-01-01 00:00:00 +0000
> +  +++ cloned/.hglf/a.dat	* (glob)
> +  @@ -0,0 +1 @@
> +  +
> +  [1]
> +
> +  $ hg --config extensions.extdiff= extdiff -r .^
> +  diff -Npru cloned.1f79fbad5d0f/.hglf/a.dat cloned/.hglf/a.dat
> +  --- cloned.1f79fbad5d0f/.hglf/a.dat	1970-01-01 00:00:00 +0000
> +  +++ cloned/.hglf/a.dat	* (glob)
> +  @@ -0,0 +1 @@
> +  +
> +  diff -Npru cloned.1f79fbad5d0f/.hglf/foo/bar/large.dat cloned/.hglf/foo/bar/large.dat
> +  --- cloned.1f79fbad5d0f/.hglf/foo/bar/large.dat	1970-01-01 00:00:00 +0000
> +  +++ cloned/.hglf/foo/bar/large.dat	* (glob)
> +  @@ -0,0 +1 @@
> +  +2f6933b5ee0f5fdd823d9717d8729f3c2523811b
> +  diff -Npru cloned.1f79fbad5d0f/foo/bar/abc cloned/foo/bar/abc
> +  --- cloned.1f79fbad5d0f/foo/bar/abc	* (glob)
> +  +++ cloned/foo/bar/abc	* (glob)
> +  @@ -0,0 +1 @@
> +  +changed
> +  [1]
> +
> +  $ hg --config extensions.extdiff= extdiff -r 0 -r .^
> +  diff -Npru cloned.7f491f53a367/.hgsubstate cloned.1f79fbad5d0f/.hgsubstate
> +  --- cloned.7f491f53a367/.hgsubstate	* (glob)
> +  +++ cloned.1f79fbad5d0f/.hgsubstate	* (glob)
> +  @@ -1 +1 @@
> +  -fc3b4ce2696f7741438c79207583768f2ce6b0dd sub1
> +  +7a36fa02b66e61f27f3d4a822809f159479b8ab2 sub1
> +  [1]
> +
>    $ cd ..
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
Matt Harbison - Feb. 10, 2015, 12:34 a.m.
On Mon, 09 Feb 2015 18:01:12 -0500, Augie Fackler <raf@durin42.com> wrote:

> On Sun, Feb 08, 2015 at 11:01:36PM -0500, Matt Harbison wrote:
>> # HG changeset patch
>> # User Matt Harbison <matt_harbison@yahoo.com>
>> # Date 1342054131 14400
>> #      Wed Jul 11 20:48:51 2012 -0400
>> # Node ID 6abceaa1a49f82cebd3a4f141f69558e2bb3cec4
>> # Parent  ff5caa8dfd993680d9602ca6ebb14da9de10d5f4
>> extdiff: use archiver to take snapshots of committed revisions
>
> This seems like a decent approach.
>
>>
>> [This is the proof of concept that Mathias asked for.  The fix for file
>> archiving from the internal API maybe should be a separate commit.]
>
> Yes, I think it should.
>
>>
>> There should be no visible functional differences, other than the  
>> largefile
>> standins are no longer included in the non working copy snapshots.   
>> That's
>> probably not a big deal, and proper largefile support can still be  
>> added.
>>
>
> I don't speak largefiles. Does this mean that the phony files won't be
> included, but the actual largefile content will? For extdiff, that's
> arguably the right behavior?

Yes, that's ultimately where it is going, but this only partially gets  
there.  The cases where the snapshot is of the working directory are still  
handled by the old code, which does stage the phony files.  For snapshots  
of all other revisions, archive is used, and archive already knows to  
DTRT.  Adding subrepo support is conceptually separate from adding  
largefile support, except that archive supports both, so we get the other  
for "free".

It may be worth making that conditional though by patching on a --large  
option: files that are hundreds of MB probably aren't very diff-able, but  
will slow the time to get the diff program opened.  If the phony file  
shows up, that's at least a clue to the user that the largefile changed.

>> This
>> is the first step to make -S work.  The full (deep) working copy  
>> snapshot needs
>> to be handled prior to that.  This could probably be improved in the  
>> future by
>> excluding .hgsub and .hgsubstate from status, since that is really just  
>> private
>> bookkeeping info.
>
> It probably makes sense to keep extdiff and 'hg diff' output at least
> somewhat related?

That's a good point.  I was second guessing what I wrote after I sent it  
because even though the file isn't very meaningful to the user, it is at  
least a clue that the subrepo changed.

--Matt
Matt Harbison - Feb. 10, 2015, 1:59 a.m.
On Mon, 09 Feb 2015 02:18:16 -0500, Mathias De Maré  
<mathias.demare@gmail.com> wrote:

> On Mon, Feb 9, 2015 at 5:01 AM, Matt Harbison <mharbison72@gmail.com>  
> wrote:
>
>> # HG changeset patch
>> # User Matt Harbison <matt_harbison@yahoo.com>
>> # Date 1342054131 14400
>> #      Wed Jul 11 20:48:51 2012 -0400
>> # Node ID 6abceaa1a49f82cebd3a4f141f69558e2bb3cec4
>> # Parent  ff5caa8dfd993680d9602ca6ebb14da9de10d5f4
>> extdiff: use archiver to take snapshots of committed revisions
>>
>> [This is the proof of concept that Mathias asked for.  The fix for file
>> archiving from the internal API maybe should be a separate commit.]
>>
>
> Thanks! :-)
>
> How do you want to proceed with this? Do you want to continue with these
> patches yourself, or is it something you don't have time to continue  
> with?

I've got 4 or 5 patches waiting for the current backlog to break free, but  
I've got time to work on it.  You've motivated me to look at this again  
:-).  The first thing I'd like to do is figure out what is going to be the  
least amount of headache for largefiles.

It's looking like maybe tweaking archive is the way to go (see below).  If  
that's the case, I'll need your help tweaking the git implementation.  But  
I'd like someone familiar with subrepo design to chime in before I wander  
too far off into the weeds.

>>
>> There should be no visible functional differences, other than the  
>> largefile
>> standins are no longer included in the non working copy snapshots.   
>> That's
>> probably not a big deal, and proper largefile support can still be  
>> added.
>> This
>> is the first step to make -S work.  The full (deep) working copy  
>> snapshot
>> needs
>> to be handled prior to that.  This could probably be improved in the
>> future by
>> excluding .hgsub and .hgsubstate from status, since that is really just
>> private
>> bookkeeping info.
>>
>> diff --git a/hgext/extdiff.py b/hgext/extdiff.py
>> --- a/hgext/extdiff.py
>> +++ b/hgext/extdiff.py
>> @@ -63,6 +63,7 @@
>>  from mercurial.i18n import _
>>  from mercurial.node import short, nullid
>>  from mercurial import cmdutil, scmutil, util, commands, encoding,
>> filemerge
>> +from mercurial import archival
>>  import os, shlex, shutil, tempfile, re
>>
>>  cmdtable = {}
>> @@ -80,31 +81,44 @@
>>          dirname = '%s.%s' % (dirname, short(node))
>>      base = os.path.join(tmproot, dirname)
>>      os.mkdir(base)
>> +    fns_and_mtime = []
>> +
>>      if node is not None:
>>          ui.note(_('making snapshot of %d files from rev %s\n') %
>>                  (len(files), short(node)))
>> +
>> +        # Use archive to build the snapshot for committed nodes.  (It
>> aborts if
>> +        # the list is empty.)
>> +        if files:
>> +            repo.ui.setconfig("ui", "archivemeta", False)
>> +
>> +            archival.archive(repo, base, node, 'files',
>> +                             matchfn=scmutil.matchfiles(repo, files))
>>      else:
>>          ui.note(_('making snapshot of %d files from working  
>> directory\n')
>> %
>>              (len(files)))
>> -    wopener = scmutil.opener(base)
>> -    fns_and_mtime = []
>> -    ctx = repo[node]
>> -    for fn in sorted(files):
>> -        wfn = util.pconvert(fn)
>> -        if wfn not in ctx:
>> -            # File doesn't exist; could be a bogus modify
>> -            continue
>> -        ui.note('  %s\n' % wfn)
>> -        dest = os.path.join(base, wfn)
>> -        fctx = ctx[wfn]
>> -        data = repo.wwritedata(wfn, fctx.data())
>> -        if 'l' in fctx.flags():
>> -            wopener.symlink(data, wfn)
>> -        else:
>> -            wopener.write(wfn, data)
>> -            if 'x' in fctx.flags():
>> -                util.setflags(dest, False, True)
>> -        if node is None:
>> +
>> +        # TODO: Use filesystem routines to duplicate the relevant parts
>> of the
>> +        #       working directory instead of this (archive doesn't work
>> for
>> +        #       wctx).  This will allow any subrepo type and largefiles
>> to work
>>
> It looks like util.copyfile should be able to do this.
>
>> +        wopener = scmutil.opener(base)
>> +        ctx = repo[node]
>> +        for fn in sorted(files):
>> +            wfn = util.pconvert(fn)
>> +            if wfn not in ctx:
>> +                # File doesn't exist; could be a bogus modify
>> +                continue
>>
> I've been wondering about this: what exactly does this bogus modify mean?
> Is this still something that could happen, or a leftover from the past?
> Could we ignore it and just copy all the relevant files without worrying
> about this?

In a previous life, this comment mentioned 'new file after a merge?', and  
originated here:

changeset:   3330:49966b5ab16f
parent:      3322:a1aad25ccc3e
user:        Benoit Boissinot <benoit.boissinot@ens-lyon.org>
date:        Wed Oct 11 16:35:09 2006 +0200
summary:     fix traceback of extdiff after a merge

I assume this is referencing an added (but not yet committed) file but I  
don't understand why that would be a problem.

But I wonder if util.copyfile() is a no-go because the data is currently  
written out through repo.wwritedata() (which archive also does).   
copyfile() obviously doesn't.  I know nothing about the filtering scheme  
it does.

>> +            ui.note('  %s\n' % wfn)
>> +            dest = os.path.join(base, wfn)
>> +            fctx = ctx[wfn]
>> +            data = repo.wwritedata(wfn, fctx.data())
>> +            if 'l' in fctx.flags():
>> +                wopener.symlink(data, wfn)
>> +            else:
>> +                wopener.write(wfn, data)
>> +                if 'x' in fctx.flags():
>> +                    util.setflags(dest, False, True)
>>
> The checking of the flags will no longer be necessary if we can use
> something like copyfile.
>
Mads Kiilerich - Feb. 26, 2015, 12:06 a.m.
On 02/09/2015 05:01 AM, Matt Harbison wrote:
> # HG changeset patch
> # User Matt Harbison <matt_harbison@yahoo.com>
> # Date 1342054131 14400
> #      Wed Jul 11 20:48:51 2012 -0400
> # Node ID 6abceaa1a49f82cebd3a4f141f69558e2bb3cec4
> # Parent  ff5caa8dfd993680d9602ca6ebb14da9de10d5f4
> extdiff: use archiver to take snapshots of committed revisions
>
> [This is the proof of concept that Mathias asked for.  The fix for file
> archiving from the internal API maybe should be a separate commit.]
>
> There should be no visible functional differences, other than the largefile
> standins are no longer included in the non working copy snapshots.  That's
> probably not a big deal, and proper largefile support can still be added.

I think "proper largefile support" is pretty much the standin handling 
we already have. It is quite important to not just leave them out; an 
unreliable diff is bad. Being able to see that a largefile changed is 
often quite valuable.

Largefiles are in general so large that it could be a problem to copy 
them out to temp files just for diffing ... and they are often binary 
and not really diff-able anyway.

I can however also see the use case for tools that can compare images or 
archives so I guess it would be nice to have both options somehow ...

/Mads
Matt Harbison - Feb. 26, 2015, 2:15 a.m.
On Wed, 25 Feb 2015 19:06:04 -0500, Mads Kiilerich <mads@kiilerich.com>  
wrote:

> On 02/09/2015 05:01 AM, Matt Harbison wrote:
>> # HG changeset patch
>> # User Matt Harbison <matt_harbison@yahoo.com>
>> # Date 1342054131 14400
>> #      Wed Jul 11 20:48:51 2012 -0400
>> # Node ID 6abceaa1a49f82cebd3a4f141f69558e2bb3cec4
>> # Parent  ff5caa8dfd993680d9602ca6ebb14da9de10d5f4
>> extdiff: use archiver to take snapshots of committed revisions
>>
>> [This is the proof of concept that Mathias asked for.  The fix for file
>> archiving from the internal API maybe should be a separate commit.]
>>
>> There should be no visible functional differences, other than the  
>> largefile
>> standins are no longer included in the non working copy snapshots.   
>> That's
>> probably not a big deal, and proper largefile support can still be  
>> added.
>
> I think "proper largefile support" is pretty much the standin handling  
> we already have. It is quite important to not just leave them out; an  
> unreliable diff is bad. Being able to see that a largefile changed is  
> often quite valuable.
>
> Largefiles are in general so large that it could be a problem to copy  
> them out to temp files just for diffing ... and they are often binary  
> and not really diff-able anyway.
>
> I can however also see the use case for tools that can compare images or  
> archives so I guess it would be nice to have both options somehow ...
>
> /Mads

I can see how it would be useful to conditionalize it.  I'm thinking wrap  
the extdiff command and its manufactured deriviatives to add '--large',  
and set lfstatus on the repo if it is specified.  The status call would  
DTRT without changes, but the archival.archive() override in largefiles  
would have to be adjusted to look at it, and its existing two callers  
adjusted appropriately.  Somehow the setting needs to be propagated to  
subrepos too.

--Matt

Patch

diff --git a/hgext/extdiff.py b/hgext/extdiff.py
--- a/hgext/extdiff.py
+++ b/hgext/extdiff.py
@@ -63,6 +63,7 @@ 
 from mercurial.i18n import _
 from mercurial.node import short, nullid
 from mercurial import cmdutil, scmutil, util, commands, encoding, filemerge
+from mercurial import archival
 import os, shlex, shutil, tempfile, re
 
 cmdtable = {}
@@ -80,31 +81,44 @@ 
         dirname = '%s.%s' % (dirname, short(node))
     base = os.path.join(tmproot, dirname)
     os.mkdir(base)
+    fns_and_mtime = []
+
     if node is not None:
         ui.note(_('making snapshot of %d files from rev %s\n') %
                 (len(files), short(node)))
+
+        # Use archive to build the snapshot for committed nodes.  (It aborts if
+        # the list is empty.)
+        if files:
+            repo.ui.setconfig("ui", "archivemeta", False)
+
+            archival.archive(repo, base, node, 'files',
+                             matchfn=scmutil.matchfiles(repo, files))
     else:
         ui.note(_('making snapshot of %d files from working directory\n') %
             (len(files)))
-    wopener = scmutil.opener(base)
-    fns_and_mtime = []
-    ctx = repo[node]
-    for fn in sorted(files):
-        wfn = util.pconvert(fn)
-        if wfn not in ctx:
-            # File doesn't exist; could be a bogus modify
-            continue
-        ui.note('  %s\n' % wfn)
-        dest = os.path.join(base, wfn)
-        fctx = ctx[wfn]
-        data = repo.wwritedata(wfn, fctx.data())
-        if 'l' in fctx.flags():
-            wopener.symlink(data, wfn)
-        else:
-            wopener.write(wfn, data)
-            if 'x' in fctx.flags():
-                util.setflags(dest, False, True)
-        if node is None:
+
+        # TODO: Use filesystem routines to duplicate the relevant parts of the
+        #       working directory instead of this (archive doesn't work for
+        #       wctx).  This will allow any subrepo type and largefiles to work
+        wopener = scmutil.opener(base)
+        ctx = repo[node]
+        for fn in sorted(files):
+            wfn = util.pconvert(fn)
+            if wfn not in ctx:
+                # File doesn't exist; could be a bogus modify
+                continue
+            ui.note('  %s\n' % wfn)
+            dest = os.path.join(base, wfn)
+            fctx = ctx[wfn]
+            data = repo.wwritedata(wfn, fctx.data())
+            if 'l' in fctx.flags():
+                wopener.symlink(data, wfn)
+            else:
+                wopener.write(wfn, data)
+                if 'x' in fctx.flags():
+                    util.setflags(dest, False, True)
+
             fns_and_mtime.append((dest, repo.wjoin(fn),
                                   os.lstat(dest).st_mtime))
     return dirname, fns_and_mtime
diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py
--- a/hgext/largefiles/overrides.py
+++ b/hgext/largefiles/overrides.py
@@ -851,7 +851,7 @@ 
         repo._lfcommithooks.pop()
 
 def overridearchive(orig, repo, dest, node, kind, decode=True, matchfn=None,
-            prefix=None, mtime=None, subrepos=None):
+            prefix='', mtime=None, subrepos=None):
     # No need to lock because we are only reading history and
     # largefile caches, neither of which are modified.
     lfcommands.cachelfiles(repo.ui, repo, node)
diff --git a/mercurial/archival.py b/mercurial/archival.py
--- a/mercurial/archival.py
+++ b/mercurial/archival.py
@@ -230,7 +230,7 @@ 
     }
 
 def archive(repo, dest, node, kind, decode=True, matchfn=None,
-            prefix=None, mtime=None, subrepos=False):
+            prefix='', mtime=None, subrepos=False):
     '''create archive of repo as it was at node.
 
     dest can be name of directory, name of archive file, or file
diff --git a/tests/test-subrepo-deep-nested-change.t b/tests/test-subrepo-deep-nested-change.t
--- a/tests/test-subrepo-deep-nested-change.t
+++ b/tests/test-subrepo-deep-nested-change.t
@@ -419,4 +419,47 @@ 
   A a.dat
   A a.txt
 
+  $ cat <<EOF >> $HGRCPATH
+  > [extdiff]
+  > cmd.falabala = echo
+  > opts.falabala = diffing
+  > EOF
+
+Interaction with extdiff, largefiles and subrepos
+
+  $ hg --config extensions.extdiff= extdiff
+  diff -Npru cloned.6e907bf12afc/.hglf/a.dat cloned/.hglf/a.dat
+  --- cloned.6e907bf12afc/.hglf/a.dat	1970-01-01 00:00:00 +0000
+  +++ cloned/.hglf/a.dat	* (glob)
+  @@ -0,0 +1 @@
+  +
+  [1]
+
+  $ hg --config extensions.extdiff= extdiff -r .^
+  diff -Npru cloned.1f79fbad5d0f/.hglf/a.dat cloned/.hglf/a.dat
+  --- cloned.1f79fbad5d0f/.hglf/a.dat	1970-01-01 00:00:00 +0000
+  +++ cloned/.hglf/a.dat	* (glob)
+  @@ -0,0 +1 @@
+  +
+  diff -Npru cloned.1f79fbad5d0f/.hglf/foo/bar/large.dat cloned/.hglf/foo/bar/large.dat
+  --- cloned.1f79fbad5d0f/.hglf/foo/bar/large.dat	1970-01-01 00:00:00 +0000
+  +++ cloned/.hglf/foo/bar/large.dat	* (glob)
+  @@ -0,0 +1 @@
+  +2f6933b5ee0f5fdd823d9717d8729f3c2523811b
+  diff -Npru cloned.1f79fbad5d0f/foo/bar/abc cloned/foo/bar/abc
+  --- cloned.1f79fbad5d0f/foo/bar/abc	* (glob)
+  +++ cloned/foo/bar/abc	* (glob)
+  @@ -0,0 +1 @@
+  +changed
+  [1]
+
+  $ hg --config extensions.extdiff= extdiff -r 0 -r .^
+  diff -Npru cloned.7f491f53a367/.hgsubstate cloned.1f79fbad5d0f/.hgsubstate
+  --- cloned.7f491f53a367/.hgsubstate	* (glob)
+  +++ cloned.1f79fbad5d0f/.hgsubstate	* (glob)
+  @@ -1 +1 @@
+  -fc3b4ce2696f7741438c79207583768f2ce6b0dd sub1
+  +7a36fa02b66e61f27f3d4a822809f159479b8ab2 sub1
+  [1]
+
   $ cd ..