From patchwork Fri Nov 7 08:12:11 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [3,of,3,stable] changegroup: sparsely populate fnodes From: Gregory Szorc X-Patchwork-Id: 6631 Message-Id: <1c646d31cdd16a0f7ddd.1415347931@gps-mbp.local> To: mercurial-devel@selenic.com Date: Fri, 07 Nov 2014 00:12:11 -0800 # HG changeset patch # User Gregory Szorc # Date 1415342900 28800 # Thu Nov 06 22:48:20 2014 -0800 # Node ID 1c646d31cdd16a0f7dddaf869bc0ea59d7c0c25c # Parent 87945ee4a50d8e639a737d72f8e5c1e8a45441da changegroup: sparsely populate fnodes Previously, fnodes had a key and empty dict value for every element in changedfiles. This is somewhat wasteful. Empty dicts in CPython consume a lot more memory than you would expect - 280 bytes. On mozilla-central, which has ~190,000 files/fnodes keys, the previous loop populating fnodes allocated 91,924 KB of memory, most of that for the empty dicts. With this patch in place, our peak RSS during mozilla-central clone drops: before: 364,356 KB after: 326,008 KB delta: -38,348 KB When combined with the previous patch, total peak RSS decrease is now 190,116 KB. diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -343,10 +343,8 @@ class cg1packer(object): reorder=reorder): yield chunk progress(msgbundling, None) - for f in changedfiles: - fnodes[f] = {} # Callback for the manifest, used to collect linkrevs for filelog # revisions. # Returns the linkrev node (collected in lookupcl). @@ -357,9 +355,9 @@ class cg1packer(object): for f, n in mdata.iteritems(): if f in changedfiles: # record the first changeset introducing this filelog # version - fnodes[f].setdefault(n, clnode) + fnodes.setdefault(f, {}).setdefault(n, clnode) return clnode mfnodes = self.prune(mf, mfs, commonrevs, source) for chunk in self.group(mfnodes, mf, lookupmf, units=_('manifests'),