Submitter | Augie Fackler |
---|---|
Date | Jan. 8, 2015, 8:35 p.m. |
Message ID | <07da98fd38a1269c899b.1420749300@arthedain.pit.corp.google.com> |
Download | mbox | patch |
Permalink | /patch/7390/ |
State | Changes Requested |
Headers | show |
Comments
I just spotted a defect in this change, and will be mailing a resend soon. Sigh. On Thu, Jan 8, 2015 at 3:35 PM, Augie Fackler <raf@durin42.com> wrote: > # HG changeset patch > # User Augie Fackler <augie@google.com> > # Date 1420746668 18000 > # Thu Jan 08 14:51:08 2015 -0500 > # Node ID 07da98fd38a1269c899b3c81fbe23dbc807511d0 > # Parent 1165b1c87091d12209a3346834be5c1e3db8a9e4 > manifest: use lazymanifest instead of manifestdict > > Before any of my related changes on mozilla-central: > > perfmanifest tip > ! wall 0.268805 comb 0.260000 user 0.260000 sys 0.000000 (best of 37) > perftags > ! result: 162 > ! wall 0.007099 comb 0.000000 user 0.000000 sys 0.000000 (best of 401) > perfstatus > ! wall 0.415680 comb 0.420000 user 0.260000 sys 0.160000 (best of 24) > hgperf export tip > ! wall 0.142118 comb 0.140000 user 0.140000 sys 0.000000 (best of 67) > > after all of my changes on mozilla-central: > > ./hg: > perfmanifest tip > ! wall 0.232640 comb 0.230000 user 0.220000 sys 0.010000 (best of 43) > perftags > ! result: 162 > ! wall 0.007057 comb 0.010000 user 0.000000 sys 0.010000 (best of 395) > perfstatus > ! wall 0.415503 comb 0.420000 user 0.280000 sys 0.140000 (best of 24) > hgperf export tip > ! wall 0.025096 comb 0.030000 user 0.030000 sys 0.000000 (best of 102) > > so it's no real change in performance on perf{manifest,tags,status}, > but is a huge win on 'hgperf export tip'. > > This should open the door to doing fast hashing on a per-directory > basis for the manifest type, which is the current plan for supporting > narrow-clone-friendly manifests. > > There's a little performance work that could still be done here: > fastdelta() could be done significantly more intelligently by using > the internal state of the lazymanifest type in C, but that seems like > good future work. > > diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py > --- a/hgext/largefiles/reposetup.py > +++ b/hgext/largefiles/reposetup.py > @@ -35,17 +35,18 @@ def reposetup(ui, repo): > def __getitem__(self, changeid): > ctx = super(lfilesrepo, self).__getitem__(changeid) > if self.unfiltered().lfstatus: > - class lfilesmanifestdict(manifest.manifestdict): > + class lfilesmanifest(manifest.lazymanifest): > def __contains__(self, filename): > - orig = super(lfilesmanifestdict, self).__contains__ > + orig = super(lfilesmanifest, self).__contains__ > return orig(filename) or orig(lfutil.standin(filename)) > + > class lfilesctx(ctx.__class__): > def files(self): > filenames = super(lfilesctx, self).files() > return [lfutil.splitstandin(f) or f for f in filenames] > def manifest(self): > man1 = super(lfilesctx, self).manifest() > - man1.__class__ = lfilesmanifestdict > + man1.__class__ = lfilesmanifest > return man1 > def filectx(self, path, fileid=None, filelog=None): > orig = super(lfilesctx, self).filectx > diff --git a/mercurial/manifest.c b/mercurial/manifest.c > --- a/mercurial/manifest.c > +++ b/mercurial/manifest.c > @@ -673,13 +673,14 @@ static lazymanifest *lzm_filtercopy(lazy > return tmp; > } > > -static PyObject *lzm_diff(lazymanifest * self, lazymanifest * other) > +static PyObject *lzm_diff(lazymanifest * self, PyObject * args) > { > - if (!PyObject_IsInstance((PyObject *)other, > - (PyObject *)&lazymanifestType)) { > - PyErr_Format(PyExc_TypeError, "other must be a lazymanifest"); > + lazymanifest * other; > + PyObject * pyclean = NULL; > + if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) { > return NULL; > } > + bool clean = (pyclean == NULL) ? false : PyObject_IsTrue(pyclean); > PyObject *emptyTup = NULL, *ret = NULL; > PyObject *es = PyString_FromString(""); > if (es == NULL) { > @@ -772,6 +773,8 @@ static PyObject *lzm_diff(lazymanifest * > } > PyDict_SetItem(ret, key, outer); > Py_DECREF(outer); > + } else if (clean) { > + PyDict_SetItem(ret, key, Py_None); > } > sneedle++; > oneedle++; > @@ -792,7 +795,7 @@ static PyMethodDef lzm_methods[] = { > "Make a copy of this lazymanifest."}, > {"filtercopy", (PyCFunction)lzm_filtercopy, METH_O, > "Make a copy of this manifest filtered by matchfn."}, > - {"diff", (PyCFunction)lzm_diff, METH_O, > + {"diff", (PyCFunction)lzm_diff, METH_VARARGS, > "Compare this lazymanifest to another one."}, > {"text", (PyCFunction)lzm_text, METH_NOARGS, > "Encode this manifest to text."}, > diff --git a/mercurial/manifest.py b/mercurial/manifest.py > --- a/mercurial/manifest.py > +++ b/mercurial/manifest.py > @@ -9,35 +9,40 @@ from i18n import _ > import mdiff, parsers, error, revlog, util > import array, struct > > -class manifestdict(dict): > - def __init__(self, mapping=None, flags=None): > - if mapping is None: > - mapping = {} > - if flags is None: > - flags = {} > - dict.__init__(self, mapping) > - self._flags = flags > - def __setitem__(self, k, v): > - assert v is not None > - dict.__setitem__(self, k, v) > - def flags(self, f): > - return self._flags.get(f, "") > - def setflag(self, f, flags): > - """Set the flags (symlink, executable) for path f.""" > - self._flags[f] = flags > - def copy(self): > - return manifestdict(self, dict.copy(self._flags)) > +class lazymanifest(object): > + def __init__(self, data): > + self._lm = parsers.lazymanifest(data) > + > + def __getitem__(self, key): > + return self._lm[key][0] > + > + def __len__(self): > + return len(self._lm) > + > + def __setitem__(self, key, node): > + self._lm[key] = node, self.flags(key, '') > + > + def __contains__(self, key): > + return key in self._lm > + > + def __delitem__(self, key): > + del self._lm[key] > + > + def keys(self): > + return [x[0] for x in self._lm] > + > + def iterkeys(self): > + return iter(self.keys()) > + > def intersectfiles(self, files): > - '''make a new manifestdict with the intersection of self with files > + '''make a new lazymanifest with the intersection of self with files > > The algorithm assumes that files is much smaller than self.''' > - ret = manifestdict() > + ret = lazymanifest('') > + lm = self._lm > for fn in files: > - if fn in self: > - ret[fn] = self[fn] > - flags = self._flags.get(fn, None) > - if flags: > - ret._flags[fn] = flags > + if fn in lm: > + ret._lm[fn] = self._lm[fn] > return ret > > def matches(self, match): > @@ -50,11 +55,9 @@ class manifestdict(dict): > (not match.anypats() and util.all(fn in self for fn in files))): > return self.intersectfiles(files) > > - mf = self.copy() > - for fn in mf.keys(): > - if not match(fn): > - del mf[fn] > - return mf > + lm = lazymanifest('') > + lm._lm = self._lm.filtercopy(match) > + return lm > > def diff(self, m2, clean=False): > '''Finds changes between the current manifest and m2. > @@ -71,35 +74,36 @@ class manifestdict(dict): > the nodeid will be None and the flags will be the empty > string. > ''' > - diff = {} > + return self._lm.diff(m2._lm, clean) > > - for fn, n1 in self.iteritems(): > - fl1 = self._flags.get(fn, '') > - n2 = m2.get(fn, None) > - fl2 = m2._flags.get(fn, '') > - if n2 is None: > - fl2 = '' > - if n1 != n2 or fl1 != fl2: > - diff[fn] = ((n1, fl1), (n2, fl2)) > - elif clean: > - diff[fn] = None > + def setflag(self, key, flag): > + self._lm[key] = self[key], flag > > - for fn, n2 in m2.iteritems(): > - if fn not in self: > - fl2 = m2._flags.get(fn, '') > - diff[fn] = ((None, ''), (n2, fl2)) > + def get(self, key, default=None): > + try: > + return self._lm[key][0] > + except KeyError: > + return default > > - return diff > + def flags(self, key, default=''): > + try: > + return self._lm[key][1] > + except KeyError: > + return default > + > + def __iter__(self): > + return (x[0] for x in self._lm) > + > + def copy(self): > + c = lazymanifest('') > + c._lm = self._lm.copy() > + return c > + > + def iteritems(self): > + return (x[:2] for x in self._lm) > > def text(self): > - """Get the full data of this manifest as a bytestring.""" > - fl = sorted(self) > - _checkforbidden(fl) > - > - hex, flags = revlog.hex, self.flags > - # if this is changed to support newlines in filenames, > - # be sure to check the templates/ dir again (especially *-raw.tmpl) > - return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl) > + return self._lm.text() > > def fastdelta(self, base, changes): > """Given a base manifest text as an array.array and a list of changes > @@ -119,7 +123,8 @@ class manifestdict(dict): > # bs will either be the index of the item or the insert point > start, end = _msearch(addbuf, f, start) > if not todelete: > - l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f)) > + h, fl = self._lm[f] > + l = "%s\0%s%s\n" % (f, revlog.hex(h), fl) > else: > if start == end: > # item we want to delete was not found, error out > @@ -213,11 +218,6 @@ def _addlistdelta(addlist, x): > + content for start, end, content in x) > return deltatext, newaddlist > > -def _parse(lines): > - mfdict = manifestdict() > - parsers.parse_manifest(mfdict, mfdict._flags, lines) > - return mfdict > - > class manifest(revlog.revlog): > def __init__(self, opener): > # we expect to deal with not more than four revs at a time, > @@ -227,7 +227,8 @@ class manifest(revlog.revlog): > > def readdelta(self, node): > r = self.rev(node) > - return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r))) > + data = mdiff.patchtext(self.revdiff(self.deltaparent(r), r)) > + return lazymanifest(data) > > def readfast(self, node): > '''use the faster of readdelta or read''' > @@ -239,12 +240,12 @@ class manifest(revlog.revlog): > > def read(self, node): > if node == revlog.nullid: > - return manifestdict() # don't upset local cache > + return lazymanifest('') # don't upset local cache > if node in self._mancache: > return self._mancache[node][0] > text = self.revision(node) > arraytext = array.array('c', text) > - mapping = _parse(text) > + mapping = lazymanifest(text) > self._mancache[node] = (mapping, arraytext) > return mapping > > @@ -255,12 +256,10 @@ class manifest(revlog.revlog): > mapping = self._mancache[node][0] > return mapping.get(f), mapping.flags(f) > text = self.revision(node) > - start, end = _msearch(text, f) > - if start == end: > + try: > + return lazymanifest(text)._lm[f] > + except KeyError: > return None, None > - l = text[start:end] > - f, n = l.split('\0') > - return revlog.bin(n[:40]), n[40:-1] > > def add(self, map, transaction, link, p1, p2, added, removed): > if p1 in self._mancache: > diff --git a/tests/test-manifest.py b/tests/test-manifest.py > --- a/tests/test-manifest.py > +++ b/tests/test-manifest.py > @@ -5,6 +5,7 @@ import itertools > import silenttestrunner > > from mercurial import parsers > +from mercurial import manifest as manifestmod > > HASH_1 = '1' * 40 > HASH_2 = 'f' * 40 > @@ -211,5 +212,14 @@ class testmanifest(unittest.TestCase): > self.assertEqual(HUGE_MANIFEST_ENTRIES, len(m)) > self.assertEqual(len(m), len(list(m))) > > + def testIntersectFiles(self): > + m = manifestmod.lazymanifest(A_HUGE_MANIFEST) > + m2 = m.intersectfiles(['file1', 'file200', 'file300']) > + w = ('file1\0%sx\n' > + 'file200\0%sl\n' > + 'file300\0%s\n') % (HASH_2, HASH_1, HASH_1) > + self.assertEqual(w, m2.text()) > + > + > if __name__ == '__main__': > silenttestrunner.main(__name__) > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel
On 1/8/15 12:35 PM, Augie Fackler wrote: > # HG changeset patch > # User Augie Fackler <augie@google.com> > # Date 1420746668 18000 > # Thu Jan 08 14:51:08 2015 -0500 > # Node ID 07da98fd38a1269c899b3c81fbe23dbc807511d0 > # Parent 1165b1c87091d12209a3346834be5c1e3db8a9e4 > manifest: use lazymanifest instead of manifestdict > > Before any of my related changes on mozilla-central: > > perfmanifest tip > ! wall 0.268805 comb 0.260000 user 0.260000 sys 0.000000 (best of 37) > perftags > ! result: 162 > ! wall 0.007099 comb 0.000000 user 0.000000 sys 0.000000 (best of 401) > perfstatus > ! wall 0.415680 comb 0.420000 user 0.260000 sys 0.160000 (best of 24) > hgperf export tip > ! wall 0.142118 comb 0.140000 user 0.140000 sys 0.000000 (best of 67) > > after all of my changes on mozilla-central: > > ./hg: > perfmanifest tip > ! wall 0.232640 comb 0.230000 user 0.220000 sys 0.010000 (best of 43) > perftags > ! result: 162 > ! wall 0.007057 comb 0.010000 user 0.000000 sys 0.010000 (best of 395) > perfstatus > ! wall 0.415503 comb 0.420000 user 0.280000 sys 0.140000 (best of 24) > hgperf export tip > ! wall 0.025096 comb 0.030000 user 0.030000 sys 0.000000 (best of 102) > > so it's no real change in performance on perf{manifest,tags,status}, > but is a huge win on 'hgperf export tip'. > Very nice! Is there any performance difference for arbitrary manifest diffs? Like what happens in merge.update for hg update/rebase/etc?
Patch
diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py --- a/hgext/largefiles/reposetup.py +++ b/hgext/largefiles/reposetup.py @@ -35,17 +35,18 @@ def reposetup(ui, repo): def __getitem__(self, changeid): ctx = super(lfilesrepo, self).__getitem__(changeid) if self.unfiltered().lfstatus: - class lfilesmanifestdict(manifest.manifestdict): + class lfilesmanifest(manifest.lazymanifest): def __contains__(self, filename): - orig = super(lfilesmanifestdict, self).__contains__ + orig = super(lfilesmanifest, self).__contains__ return orig(filename) or orig(lfutil.standin(filename)) + class lfilesctx(ctx.__class__): def files(self): filenames = super(lfilesctx, self).files() return [lfutil.splitstandin(f) or f for f in filenames] def manifest(self): man1 = super(lfilesctx, self).manifest() - man1.__class__ = lfilesmanifestdict + man1.__class__ = lfilesmanifest return man1 def filectx(self, path, fileid=None, filelog=None): orig = super(lfilesctx, self).filectx diff --git a/mercurial/manifest.c b/mercurial/manifest.c --- a/mercurial/manifest.c +++ b/mercurial/manifest.c @@ -673,13 +673,14 @@ static lazymanifest *lzm_filtercopy(lazy return tmp; } -static PyObject *lzm_diff(lazymanifest * self, lazymanifest * other) +static PyObject *lzm_diff(lazymanifest * self, PyObject * args) { - if (!PyObject_IsInstance((PyObject *)other, - (PyObject *)&lazymanifestType)) { - PyErr_Format(PyExc_TypeError, "other must be a lazymanifest"); + lazymanifest * other; + PyObject * pyclean = NULL; + if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) { return NULL; } + bool clean = (pyclean == NULL) ? false : PyObject_IsTrue(pyclean); PyObject *emptyTup = NULL, *ret = NULL; PyObject *es = PyString_FromString(""); if (es == NULL) { @@ -772,6 +773,8 @@ static PyObject *lzm_diff(lazymanifest * } PyDict_SetItem(ret, key, outer); Py_DECREF(outer); + } else if (clean) { + PyDict_SetItem(ret, key, Py_None); } sneedle++; oneedle++; @@ -792,7 +795,7 @@ static PyMethodDef lzm_methods[] = { "Make a copy of this lazymanifest."}, {"filtercopy", (PyCFunction)lzm_filtercopy, METH_O, "Make a copy of this manifest filtered by matchfn."}, - {"diff", (PyCFunction)lzm_diff, METH_O, + {"diff", (PyCFunction)lzm_diff, METH_VARARGS, "Compare this lazymanifest to another one."}, {"text", (PyCFunction)lzm_text, METH_NOARGS, "Encode this manifest to text."}, diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -9,35 +9,40 @@ from i18n import _ import mdiff, parsers, error, revlog, util import array, struct -class manifestdict(dict): - def __init__(self, mapping=None, flags=None): - if mapping is None: - mapping = {} - if flags is None: - flags = {} - dict.__init__(self, mapping) - self._flags = flags - def __setitem__(self, k, v): - assert v is not None - dict.__setitem__(self, k, v) - def flags(self, f): - return self._flags.get(f, "") - def setflag(self, f, flags): - """Set the flags (symlink, executable) for path f.""" - self._flags[f] = flags - def copy(self): - return manifestdict(self, dict.copy(self._flags)) +class lazymanifest(object): + def __init__(self, data): + self._lm = parsers.lazymanifest(data) + + def __getitem__(self, key): + return self._lm[key][0] + + def __len__(self): + return len(self._lm) + + def __setitem__(self, key, node): + self._lm[key] = node, self.flags(key, '') + + def __contains__(self, key): + return key in self._lm + + def __delitem__(self, key): + del self._lm[key] + + def keys(self): + return [x[0] for x in self._lm] + + def iterkeys(self): + return iter(self.keys()) + def intersectfiles(self, files): - '''make a new manifestdict with the intersection of self with files + '''make a new lazymanifest with the intersection of self with files The algorithm assumes that files is much smaller than self.''' - ret = manifestdict() + ret = lazymanifest('') + lm = self._lm for fn in files: - if fn in self: - ret[fn] = self[fn] - flags = self._flags.get(fn, None) - if flags: - ret._flags[fn] = flags + if fn in lm: + ret._lm[fn] = self._lm[fn] return ret def matches(self, match): @@ -50,11 +55,9 @@ class manifestdict(dict): (not match.anypats() and util.all(fn in self for fn in files))): return self.intersectfiles(files) - mf = self.copy() - for fn in mf.keys(): - if not match(fn): - del mf[fn] - return mf + lm = lazymanifest('') + lm._lm = self._lm.filtercopy(match) + return lm def diff(self, m2, clean=False): '''Finds changes between the current manifest and m2. @@ -71,35 +74,36 @@ class manifestdict(dict): the nodeid will be None and the flags will be the empty string. ''' - diff = {} + return self._lm.diff(m2._lm, clean) - for fn, n1 in self.iteritems(): - fl1 = self._flags.get(fn, '') - n2 = m2.get(fn, None) - fl2 = m2._flags.get(fn, '') - if n2 is None: - fl2 = '' - if n1 != n2 or fl1 != fl2: - diff[fn] = ((n1, fl1), (n2, fl2)) - elif clean: - diff[fn] = None + def setflag(self, key, flag): + self._lm[key] = self[key], flag - for fn, n2 in m2.iteritems(): - if fn not in self: - fl2 = m2._flags.get(fn, '') - diff[fn] = ((None, ''), (n2, fl2)) + def get(self, key, default=None): + try: + return self._lm[key][0] + except KeyError: + return default - return diff + def flags(self, key, default=''): + try: + return self._lm[key][1] + except KeyError: + return default + + def __iter__(self): + return (x[0] for x in self._lm) + + def copy(self): + c = lazymanifest('') + c._lm = self._lm.copy() + return c + + def iteritems(self): + return (x[:2] for x in self._lm) def text(self): - """Get the full data of this manifest as a bytestring.""" - fl = sorted(self) - _checkforbidden(fl) - - hex, flags = revlog.hex, self.flags - # if this is changed to support newlines in filenames, - # be sure to check the templates/ dir again (especially *-raw.tmpl) - return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl) + return self._lm.text() def fastdelta(self, base, changes): """Given a base manifest text as an array.array and a list of changes @@ -119,7 +123,8 @@ class manifestdict(dict): # bs will either be the index of the item or the insert point start, end = _msearch(addbuf, f, start) if not todelete: - l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f)) + h, fl = self._lm[f] + l = "%s\0%s%s\n" % (f, revlog.hex(h), fl) else: if start == end: # item we want to delete was not found, error out @@ -213,11 +218,6 @@ def _addlistdelta(addlist, x): + content for start, end, content in x) return deltatext, newaddlist -def _parse(lines): - mfdict = manifestdict() - parsers.parse_manifest(mfdict, mfdict._flags, lines) - return mfdict - class manifest(revlog.revlog): def __init__(self, opener): # we expect to deal with not more than four revs at a time, @@ -227,7 +227,8 @@ class manifest(revlog.revlog): def readdelta(self, node): r = self.rev(node) - return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r))) + data = mdiff.patchtext(self.revdiff(self.deltaparent(r), r)) + return lazymanifest(data) def readfast(self, node): '''use the faster of readdelta or read''' @@ -239,12 +240,12 @@ class manifest(revlog.revlog): def read(self, node): if node == revlog.nullid: - return manifestdict() # don't upset local cache + return lazymanifest('') # don't upset local cache if node in self._mancache: return self._mancache[node][0] text = self.revision(node) arraytext = array.array('c', text) - mapping = _parse(text) + mapping = lazymanifest(text) self._mancache[node] = (mapping, arraytext) return mapping @@ -255,12 +256,10 @@ class manifest(revlog.revlog): mapping = self._mancache[node][0] return mapping.get(f), mapping.flags(f) text = self.revision(node) - start, end = _msearch(text, f) - if start == end: + try: + return lazymanifest(text)._lm[f] + except KeyError: return None, None - l = text[start:end] - f, n = l.split('\0') - return revlog.bin(n[:40]), n[40:-1] def add(self, map, transaction, link, p1, p2, added, removed): if p1 in self._mancache: diff --git a/tests/test-manifest.py b/tests/test-manifest.py --- a/tests/test-manifest.py +++ b/tests/test-manifest.py @@ -5,6 +5,7 @@ import itertools import silenttestrunner from mercurial import parsers +from mercurial import manifest as manifestmod HASH_1 = '1' * 40 HASH_2 = 'f' * 40 @@ -211,5 +212,14 @@ class testmanifest(unittest.TestCase): self.assertEqual(HUGE_MANIFEST_ENTRIES, len(m)) self.assertEqual(len(m), len(list(m))) + def testIntersectFiles(self): + m = manifestmod.lazymanifest(A_HUGE_MANIFEST) + m2 = m.intersectfiles(['file1', 'file200', 'file300']) + w = ('file1\0%sx\n' + 'file200\0%sl\n' + 'file300\0%s\n') % (HASH_2, HASH_1, HASH_1) + self.assertEqual(w, m2.text()) + + if __name__ == '__main__': silenttestrunner.main(__name__)