Submitter | Katsunori FUJIWARA |
---|---|
Date | March 5, 2014, 12:12 p.m. |
Message ID | <655cacfc3311c174c995.1394021526@juju> |
Download | mbox | patch |
Permalink | /patch/3848/ |
State | Superseded |
Headers | show |
Comments
Thanks for these patches. They are a nice improvement. On 03/05/2014 01:12 PM, FUJIWARA Katsunori wrote: > # HG changeset patch > # User FUJIWARA Katsunori <foozy@lares.dti.ne.jp> > # Date 1394019743 -32400 > # Wed Mar 05 20:42:23 2014 +0900 > # Node ID 655cacfc3311c174c995e4bab13755b498cb43dc > # Parent 779ceb84f4f782d32dfe47f6684107c08d2f6142 > largefiles: centralize the logic to get outgoing largefiles > > Before this patch, "overrides.getoutgoinglfiles()" (called by > "overrideoutgoing()" and "overridesummary()") and "lfilesrepo.push()" > implement similar logic to get outgoing largefiles separately. Please elaborate a bit. It took me some time to figure the following out (feel free to use it): The difference between these two were that one returned the name of the standin across all revisoins, the other returned the largefile hashes across all standins. > This patch centralizes the logic to get outgoing largefiles in > "lfutil.getlfilestoupload()". > > diff --git a/hgext/largefiles/lfutil.py b/hgext/largefiles/lfutil.py > --- a/hgext/largefiles/lfutil.py > +++ b/hgext/largefiles/lfutil.py > @@ -15,6 +15,7 @@ > > from mercurial import dirstate, httpconnection, match as match_, util, scmutil > from mercurial.i18n import _ > +from mercurial import node Please extend the existing 'from mercurial import' list ... or at least group them together. > shortname = '.hglf' > shortnameslash = shortname + '/' > @@ -365,3 +366,31 @@ > if f[0] not in filelist: > filelist.append(f[0]) > return filelist > + > +def getlfilestoupload(repo, missing, mapfunc=None): > + toupload = set() > + if not missing: > + return toupload Do this check make any difference? Looping an empty list is free. > + if not mapfunc: > + mapfunc = lambda f, ctx: f I not think this mapfunc thing is pretty. In the end, it would be nice to tell in the UI which largefile we are pulling/pushing. Not only the hash, but also the largefile name (or one of them). Currently outgoing and summary will list all largefiles in outgoing changesets, no matter if they really need pushing or not. Arguably it should only list the largefiles that really are outgoing. For that we need the hashes. So how about letting this function return a dict mapping from hash to a filename that uses it? For now the two users of this function can just look at either keys or values. /Mads > + for n in missing: > + parents = [p for p in repo.changelog.parents(n) if p != node.nullid] > + ctx = repo[n] > + files = set(ctx.files()) > + if len(parents) == 2: > + mc = ctx.manifest() > + mp1 = ctx.parents()[0].manifest() > + mp2 = ctx.parents()[1].manifest() > + for f in mp1: > + if f not in mc: > + files.add(f) > + for f in mp2: > + if f not in mc: > + files.add(f) > + for f in mc: > + if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None): > + files.add(f) > + toupload = toupload.union(set([mapfunc(f, ctx) > + for f in files > + if isstandin(f) and f in ctx])) > + return toupload > diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py > --- a/hgext/largefiles/overrides.py > +++ b/hgext/largefiles/overrides.py > @@ -12,7 +12,7 @@ > import copy > > from mercurial import hg, commands, util, cmdutil, scmutil, match as match_, \ > - node, archival, error, merge, discovery, pathutil, revset > + archival, error, merge, discovery, pathutil, revset > from mercurial.i18n import _ > from mercurial.node import hex > from hgext import rebase > @@ -999,27 +999,7 @@ > if opts.get('newest_first'): > o.reverse() > > - toupload = set() > - for n in o: > - parents = [p for p in repo.changelog.parents(n) if p != node.nullid] > - ctx = repo[n] > - files = set(ctx.files()) > - if len(parents) == 2: > - mc = ctx.manifest() > - mp1 = ctx.parents()[0].manifest() > - mp2 = ctx.parents()[1].manifest() > - for f in mp1: > - if f not in mc: > - files.add(f) > - for f in mp2: > - if f not in mc: > - files.add(f) > - for f in mc: > - if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None): > - files.add(f) > - toupload = toupload.union( > - set([f for f in files if lfutil.isstandin(f) and f in ctx])) > - return sorted(toupload) > + return sorted(lfutil.getlfilestoupload(repo, o)) > > def overrideoutgoing(orig, ui, repo, dest=None, **opts): > result = orig(ui, repo, dest, **opts) > diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py > --- a/hgext/largefiles/reposetup.py > +++ b/hgext/largefiles/reposetup.py > @@ -11,7 +11,6 @@ > import os > > from mercurial import error, manifest, match as match_, util, discovery > -from mercurial import node as node_ > from mercurial.i18n import _ > from mercurial import localrepo > > @@ -418,32 +417,9 @@ > outgoing = discovery.findcommonoutgoing(repo, remote.peer(), > force=force) > if outgoing.missing: > - toupload = set() > o = self.changelog.nodesbetween(outgoing.missing, revs)[0] > - for n in o: > - parents = [p for p in self.changelog.parents(n) > - if p != node_.nullid] > - ctx = self[n] > - files = set(ctx.files()) > - if len(parents) == 2: > - mc = ctx.manifest() > - mp1 = ctx.parents()[0].manifest() > - mp2 = ctx.parents()[1].manifest() > - for f in mp1: > - if f not in mc: > - files.add(f) > - for f in mp2: > - if f not in mc: > - files.add(f) > - for f in mc: > - if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, > - None): > - files.add(f) > - > - toupload = toupload.union( > - set([ctx[f].data().strip() > - for f in files > - if lfutil.isstandin(f) and f in ctx])) > + mapfunc = lambda f, ctx: ctx[f].data().strip() > + toupload = lfutil.getlfilestoupload(self, o, mapfunc) > lfcommands.uploadlfiles(ui, self, remote, toupload) > return super(lfilesrepo, self).push(remote, force=force, revs=revs, > newbranch=newbranch) > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel
At Thu, 06 Mar 2014 15:48:20 +0100, Mads Kiilerich wrote: > > Thanks for these patches. They are a nice improvement. > > On 03/05/2014 01:12 PM, FUJIWARA Katsunori wrote: > > # HG changeset patch > > # User FUJIWARA Katsunori <foozy@lares.dti.ne.jp> > > # Date 1394019743 -32400 > > # Wed Mar 05 20:42:23 2014 +0900 > > # Node ID 655cacfc3311c174c995e4bab13755b498cb43dc > > # Parent 779ceb84f4f782d32dfe47f6684107c08d2f6142 > > largefiles: centralize the logic to get outgoing largefiles > > > > Before this patch, "overrides.getoutgoinglfiles()" (called by > > "overrideoutgoing()" and "overridesummary()") and "lfilesrepo.push()" > > implement similar logic to get outgoing largefiles separately. > > Please elaborate a bit. It took me some time to figure the following out > (feel free to use it): > > The difference between these two were that one returned the name of the > standin across all revisoins, the other returned the largefile hashes > across all standins. I'll try to explain more clearly. > > This patch centralizes the logic to get outgoing largefiles in > > "lfutil.getlfilestoupload()". > > > > diff --git a/hgext/largefiles/lfutil.py b/hgext/largefiles/lfutil.py > > --- a/hgext/largefiles/lfutil.py > > +++ b/hgext/largefiles/lfutil.py > > @@ -15,6 +15,7 @@ > > > > from mercurial import dirstate, httpconnection, match as match_, util, scmutil > > from mercurial.i18n import _ > > +from mercurial import node > > Please extend the existing 'from mercurial import' list ... or at least > group them together. OK, I'll insert new import line before "from mercurial.i18n import _", because "from mercurial import" line seems to be too long to be extended. > > shortname = '.hglf' > > shortnameslash = shortname + '/' > > @@ -365,3 +366,31 @@ > > if f[0] not in filelist: > > filelist.append(f[0]) > > return filelist > > + > > +def getlfilestoupload(repo, missing, mapfunc=None): > > + toupload = set() > > + if not missing: > > + return toupload > > Do this check make any difference? Looping an empty list is free. "missing" will be bound to "None" if there are no outgoing revisions, and it causes exception raising at looping. Should I explain it in comment ? > > + if not mapfunc: > > + mapfunc = lambda f, ctx: f > > I not think this mapfunc thing is pretty. > > In the end, it would be nice to tell in the UI which largefile we are > pulling/pushing. Not only the hash, but also the largefile name (or one > of them). > > Currently outgoing and summary will list all largefiles in outgoing > changesets, no matter if they really need pushing or not. Arguably it > should only list the largefiles that really are outgoing. For that we > need the hashes. Do you mean that "getlfilestoupload()" should return also hashes in the way for the improvement (to list or count only really outgoing largefiles) of outgoing/summary in the future ? > So how about letting this function return a dict mapping from hash to a > filename that uses it? For now the two users of this function can just > look at either keys or values. I chose this (= passing map function, if needed) implementation to avoid performance impact below when there are many outgoing largefiles: - returning hashes always may slow down outgoing/summary by meaningless "ctx[f].data().strip()" for them - trying "filename/context => hash" on the result of "getlfilestoupload()" causes scanning (maybe large) list again If these are not so serious and/or you intend improvement of outgoing/summary in the future as I guess above, I'll change "getlfilestoupload()" as you mentioned. > /Mads > > > + for n in missing: > > + parents = [p for p in repo.changelog.parents(n) if p != node.nullid] > > + ctx = repo[n] > > + files = set(ctx.files()) > > + if len(parents) == 2: > > + mc = ctx.manifest() > > + mp1 = ctx.parents()[0].manifest() > > + mp2 = ctx.parents()[1].manifest() > > + for f in mp1: > > + if f not in mc: > > + files.add(f) > > + for f in mp2: > > + if f not in mc: > > + files.add(f) > > + for f in mc: > > + if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None): > > + files.add(f) > > + toupload = toupload.union(set([mapfunc(f, ctx) > > + for f in files > > + if isstandin(f) and f in ctx])) > > + return toupload > > diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py > > --- a/hgext/largefiles/overrides.py > > +++ b/hgext/largefiles/overrides.py > > @@ -12,7 +12,7 @@ > > import copy > > > > from mercurial import hg, commands, util, cmdutil, scmutil, match as match_, \ > > - node, archival, error, merge, discovery, pathutil, revset > > + archival, error, merge, discovery, pathutil, revset > > from mercurial.i18n import _ > > from mercurial.node import hex > > from hgext import rebase > > @@ -999,27 +999,7 @@ > > if opts.get('newest_first'): > > o.reverse() > > > > - toupload = set() > > - for n in o: > > - parents = [p for p in repo.changelog.parents(n) if p != node.nullid] > > - ctx = repo[n] > > - files = set(ctx.files()) > > - if len(parents) == 2: > > - mc = ctx.manifest() > > - mp1 = ctx.parents()[0].manifest() > > - mp2 = ctx.parents()[1].manifest() > > - for f in mp1: > > - if f not in mc: > > - files.add(f) > > - for f in mp2: > > - if f not in mc: > > - files.add(f) > > - for f in mc: > > - if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None): > > - files.add(f) > > - toupload = toupload.union( > > - set([f for f in files if lfutil.isstandin(f) and f in ctx])) > > - return sorted(toupload) > > + return sorted(lfutil.getlfilestoupload(repo, o)) > > > > def overrideoutgoing(orig, ui, repo, dest=None, **opts): > > result = orig(ui, repo, dest, **opts) > > diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py > > --- a/hgext/largefiles/reposetup.py > > +++ b/hgext/largefiles/reposetup.py > > @@ -11,7 +11,6 @@ > > import os > > > > from mercurial import error, manifest, match as match_, util, discovery > > -from mercurial import node as node_ > > from mercurial.i18n import _ > > from mercurial import localrepo > > > > @@ -418,32 +417,9 @@ > > outgoing = discovery.findcommonoutgoing(repo, remote.peer(), > > force=force) > > if outgoing.missing: > > - toupload = set() > > o = self.changelog.nodesbetween(outgoing.missing, revs)[0] > > - for n in o: > > - parents = [p for p in self.changelog.parents(n) > > - if p != node_.nullid] > > - ctx = self[n] > > - files = set(ctx.files()) > > - if len(parents) == 2: > > - mc = ctx.manifest() > > - mp1 = ctx.parents()[0].manifest() > > - mp2 = ctx.parents()[1].manifest() > > - for f in mp1: > > - if f not in mc: > > - files.add(f) > > - for f in mp2: > > - if f not in mc: > > - files.add(f) > > - for f in mc: > > - if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, > > - None): > > - files.add(f) > > - > > - toupload = toupload.union( > > - set([ctx[f].data().strip() > > - for f in files > > - if lfutil.isstandin(f) and f in ctx])) > > + mapfunc = lambda f, ctx: ctx[f].data().strip() > > + toupload = lfutil.getlfilestoupload(self, o, mapfunc) > > lfcommands.uploadlfiles(ui, self, remote, toupload) > > return super(lfilesrepo, self).push(remote, force=force, revs=revs, > > newbranch=newbranch) > > _______________________________________________ > > Mercurial-devel mailing list > > Mercurial-devel@selenic.com > > http://selenic.com/mailman/listinfo/mercurial-devel > > ---------------------------------------------------------------------- [FUJIWARA Katsunori] foozy@lares.dti.ne.jp
On 03/07/2014 03:47 PM, FUJIWARA Katsunori wrote: > At Thu, 06 Mar 2014 15:48:20 +0100, Mads Kiilerich wrote: >>> shortname = '.hglf' >>> shortnameslash = shortname + '/' >>> @@ -365,3 +366,31 @@ >>> if f[0] not in filelist: >>> filelist.append(f[0]) >>> return filelist >>> + >>> +def getlfilestoupload(repo, missing, mapfunc=None): >>> + toupload = set() >>> + if not missing: >>> + return toupload >> Do this check make any difference? Looping an empty list is free. > "missing" will be bound to "None" if there are no outgoing revisions, > and it causes exception raising at looping. Really? It seems like this function in both places replaces a 'for n in o' loop? (Btw: it would be nice with a docstring that says what 'missing' is and that the function returns a set of mapfunc results.) >>> + if not mapfunc: >>> + mapfunc = lambda f, ctx: f >> I not think this mapfunc thing is pretty. >> >> In the end, it would be nice to tell in the UI which largefile we are >> pulling/pushing. Not only the hash, but also the largefile name (or one >> of them). >> >> Currently outgoing and summary will list all largefiles in outgoing >> changesets, no matter if they really need pushing or not. Arguably it >> should only list the largefiles that really are outgoing. For that we >> need the hashes. > Do you mean that "getlfilestoupload()" should return also hashes in > the way for the improvement (to list or count only really outgoing > largefiles) of outgoing/summary in the future ? Yes, I think so. Outgoing/summary do not show all revisions - it only shows the revisions that are outgoing. Currently outgoing/summary shows all largefiles in these revisions, no matter if they are outgoing or not (if I understand it correctly) - that seems like a bug. Instead it should only show the largefiles that actually will be pushed. There could be different ways of indicating that. Showing the involved largefile names could be one. Showing the actual number of largefiles (unique hashes) could be another, and yet another could be to show the total amount of bytes. What do you think about this? >> So how about letting this function return a dict mapping from hash to a >> filename that uses it? For now the two users of this function can just >> look at either keys or values. > I chose this (= passing map function, if needed) implementation to > avoid performance impact below when there are many outgoing > largefiles: > > - returning hashes always may slow down outgoing/summary by > meaningless "ctx[f].data().strip()" for them A largefile is still a big thing and expensive to shuffle around due to its size (and the general design of what it is). My gut feeling is that an extra call to .data() will be a small extra cost in the greater picture ... especially considering that it (and a lstat call to the remote location) is essential for actually giving a useful result for outgoing/summary. > - trying "filename/context => hash" on the result of > "getlfilestoupload()" causes scanning (maybe large) list again Sorry, I don't understand that. My thought would be that the function should return a "hash => one-of-the-filenames" mapping. I don't think there would be a need for any scanning ... or how it would be a problem. > If these are not so serious and/or you intend improvement of > outgoing/summary in the future as I guess above, I'll change > "getlfilestoupload()" as you mentioned. I don't have any particular plans for that. There is lots of bigger problems with largefiles - this nice series is addressing one of them. Anyway: These were just my thoughts. If you still prefer the way you proposed to do these changes then it is fine with me. /Mads
At Fri, 07 Mar 2014 20:53:26 +0100, Mads Kiilerich wrote: > > On 03/07/2014 03:47 PM, FUJIWARA Katsunori wrote: > > At Thu, 06 Mar 2014 15:48:20 +0100, Mads Kiilerich wrote: > >>> shortname = '.hglf' > >>> shortnameslash = shortname + '/' > >>> @@ -365,3 +366,31 @@ > >>> if f[0] not in filelist: > >>> filelist.append(f[0]) > >>> return filelist > >>> + > >>> +def getlfilestoupload(repo, missing, mapfunc=None): > >>> + toupload = set() > >>> + if not missing: > >>> + return toupload > >> Do this check make any difference? Looping an empty list is free. > > "missing" will be bound to "None" if there are no outgoing revisions, > > and it causes exception raising at looping. > > Really? It seems like this function in both places replaces a 'for n in > o' loop? More exactly, "missing" will be bound to "None" if (1) there are no outgoing revisions, and (2) "getlfilestoupload()" is invoked with the value returned by "hg._outgoing()". But it seems that there is no specific reason for "hg._outgoing()" to return "None" instead of empty list (returning "None" should comes from 8888e56ac417) So, I'll clean these code paths up in revised series. > (Btw: it would be nice with a docstring that says what 'missing' is and > that the function returns a set of mapfunc results.) > > >>> + if not mapfunc: > >>> + mapfunc = lambda f, ctx: f > >> I not think this mapfunc thing is pretty. > >> > >> In the end, it would be nice to tell in the UI which largefile we are > >> pulling/pushing. Not only the hash, but also the largefile name (or one > >> of them). > >> > >> Currently outgoing and summary will list all largefiles in outgoing > >> changesets, no matter if they really need pushing or not. Arguably it > >> should only list the largefiles that really are outgoing. For that we > >> need the hashes. > > Do you mean that "getlfilestoupload()" should return also hashes in > > the way for the improvement (to list or count only really outgoing > > largefiles) of outgoing/summary in the future ? > > Yes, I think so. Outgoing/summary do not show all revisions - it only > shows the revisions that are outgoing. Currently outgoing/summary shows > all largefiles in these revisions, no matter if they are outgoing or not > (if I understand it correctly) - that seems like a bug. Instead it > should only show the largefiles that actually will be pushed. There > could be different ways of indicating that. Showing the involved > largefile names could be one. Showing the actual number of largefiles > (unique hashes) could be another, and yet another could be to show the > total amount of bytes. > > What do you think about this? It sounds good. How about the message below ? largefiles: NN entities to upload for MM largefiles path/to/file ..... (only for "hg outgoinge") .... and below for "hg outgoing --large --debug" ? largefiles: NN entities to upload for MM largefiles path/to/file (hash) path/to/file (hash1 hash2 hash3) # for multiple changes .... "NN" is number of unique hashes, and "MM" is number of paths of largefiles, to be uploaded. To keep patches small and simple, I plan to achieve it in steps below. (1) refactor to avoid redundant discovery it just centralizes the logic to check like this series. (2) check entities which may be uploaded get unique hashes of largefiles which may be uploaded to remote, and show detailed messages like above. in this step, existence of largefiles on remote side is still not checked: messages above may show grater NN/MM than exact ones. (3) check entities which should be uploaded exactly in addition to (2), check existence of each largefiles on remote side to show exact numbers in outgoing/summary messages. this involves "exists" wire access and increases remote access cost. The patch for (3) can be dropped individually, if wire accessing seems not to be reasonable in performance or so. > >> So how about letting this function return a dict mapping from hash to a > >> filename that uses it? For now the two users of this function can just > >> look at either keys or values. > > I chose this (= passing map function, if needed) implementation to > > avoid performance impact below when there are many outgoing > > largefiles: > > > > - returning hashes always may slow down outgoing/summary by > > meaningless "ctx[f].data().strip()" for them > > A largefile is still a big thing and expensive to shuffle around due to > its size (and the general design of what it is). My gut feeling is that > an extra call to .data() will be a small extra cost in the greater > picture ... especially considering that it (and a lstat call to the > remote location) is essential for actually giving a useful result for > outgoing/summary. > > > - trying "filename/context => hash" on the result of > > "getlfilestoupload()" causes scanning (maybe large) list again > > Sorry, I don't understand that. > > My thought would be that the function should return a "hash => > one-of-the-filenames" mapping. I don't think there would be a need for > any scanning ... or how it would be a problem. > > > If these are not so serious and/or you intend improvement of > > outgoing/summary in the future as I guess above, I'll change > > "getlfilestoupload()" as you mentioned. > > I don't have any particular plans for that. There is lots of bigger > problems with largefiles - this nice series is addressing one of them. > > > Anyway: These were just my thoughts. If you still prefer the way you > proposed to do these changes then it is fine with me. > > /Mads > ---------------------------------------------------------------------- [FUJIWARA Katsunori] foozy@lares.dti.ne.jp
On 03/11/2014 11:35 AM, FUJIWARA Katsunori wrote:
> So, I'll clean these code paths up in revised series.
Hi - any news on this one? I would love to get it in before the next freeze.
/Mads
Patch
diff --git a/hgext/largefiles/lfutil.py b/hgext/largefiles/lfutil.py --- a/hgext/largefiles/lfutil.py +++ b/hgext/largefiles/lfutil.py @@ -15,6 +15,7 @@ from mercurial import dirstate, httpconnection, match as match_, util, scmutil from mercurial.i18n import _ +from mercurial import node shortname = '.hglf' shortnameslash = shortname + '/' @@ -365,3 +366,31 @@ if f[0] not in filelist: filelist.append(f[0]) return filelist + +def getlfilestoupload(repo, missing, mapfunc=None): + toupload = set() + if not missing: + return toupload + if not mapfunc: + mapfunc = lambda f, ctx: f + for n in missing: + parents = [p for p in repo.changelog.parents(n) if p != node.nullid] + ctx = repo[n] + files = set(ctx.files()) + if len(parents) == 2: + mc = ctx.manifest() + mp1 = ctx.parents()[0].manifest() + mp2 = ctx.parents()[1].manifest() + for f in mp1: + if f not in mc: + files.add(f) + for f in mp2: + if f not in mc: + files.add(f) + for f in mc: + if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None): + files.add(f) + toupload = toupload.union(set([mapfunc(f, ctx) + for f in files + if isstandin(f) and f in ctx])) + return toupload diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py --- a/hgext/largefiles/overrides.py +++ b/hgext/largefiles/overrides.py @@ -12,7 +12,7 @@ import copy from mercurial import hg, commands, util, cmdutil, scmutil, match as match_, \ - node, archival, error, merge, discovery, pathutil, revset + archival, error, merge, discovery, pathutil, revset from mercurial.i18n import _ from mercurial.node import hex from hgext import rebase @@ -999,27 +999,7 @@ if opts.get('newest_first'): o.reverse() - toupload = set() - for n in o: - parents = [p for p in repo.changelog.parents(n) if p != node.nullid] - ctx = repo[n] - files = set(ctx.files()) - if len(parents) == 2: - mc = ctx.manifest() - mp1 = ctx.parents()[0].manifest() - mp2 = ctx.parents()[1].manifest() - for f in mp1: - if f not in mc: - files.add(f) - for f in mp2: - if f not in mc: - files.add(f) - for f in mc: - if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None): - files.add(f) - toupload = toupload.union( - set([f for f in files if lfutil.isstandin(f) and f in ctx])) - return sorted(toupload) + return sorted(lfutil.getlfilestoupload(repo, o)) def overrideoutgoing(orig, ui, repo, dest=None, **opts): result = orig(ui, repo, dest, **opts) diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py --- a/hgext/largefiles/reposetup.py +++ b/hgext/largefiles/reposetup.py @@ -11,7 +11,6 @@ import os from mercurial import error, manifest, match as match_, util, discovery -from mercurial import node as node_ from mercurial.i18n import _ from mercurial import localrepo @@ -418,32 +417,9 @@ outgoing = discovery.findcommonoutgoing(repo, remote.peer(), force=force) if outgoing.missing: - toupload = set() o = self.changelog.nodesbetween(outgoing.missing, revs)[0] - for n in o: - parents = [p for p in self.changelog.parents(n) - if p != node_.nullid] - ctx = self[n] - files = set(ctx.files()) - if len(parents) == 2: - mc = ctx.manifest() - mp1 = ctx.parents()[0].manifest() - mp2 = ctx.parents()[1].manifest() - for f in mp1: - if f not in mc: - files.add(f) - for f in mp2: - if f not in mc: - files.add(f) - for f in mc: - if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, - None): - files.add(f) - - toupload = toupload.union( - set([ctx[f].data().strip() - for f in files - if lfutil.isstandin(f) and f in ctx])) + mapfunc = lambda f, ctx: ctx[f].data().strip() + toupload = lfutil.getlfilestoupload(self, o, mapfunc) lfcommands.uploadlfiles(ui, self, remote, toupload) return super(lfilesrepo, self).push(remote, force=force, revs=revs, newbranch=newbranch)