Patchwork [3,of,4,lazy-manifest] manifest: use lazymanifest instead of manifestdict

login
register
mail settings
Submitter Augie Fackler
Date Jan. 8, 2015, 8:35 p.m.
Message ID <07da98fd38a1269c899b.1420749300@arthedain.pit.corp.google.com>
Download mbox | patch
Permalink /patch/7390/
State Changes Requested
Headers show

Comments

Augie Fackler - Jan. 8, 2015, 8:35 p.m.
# HG changeset patch
# User Augie Fackler <augie@google.com>
# Date 1420746668 18000
#      Thu Jan 08 14:51:08 2015 -0500
# Node ID 07da98fd38a1269c899b3c81fbe23dbc807511d0
# Parent  1165b1c87091d12209a3346834be5c1e3db8a9e4
manifest: use lazymanifest instead of manifestdict

Before any of my related changes on mozilla-central:

perfmanifest tip
! wall 0.268805 comb 0.260000 user 0.260000 sys 0.000000 (best of 37)
perftags
! result: 162
! wall 0.007099 comb 0.000000 user 0.000000 sys 0.000000 (best of 401)
perfstatus
! wall 0.415680 comb 0.420000 user 0.260000 sys 0.160000 (best of 24)
hgperf export tip
! wall 0.142118 comb 0.140000 user 0.140000 sys 0.000000 (best of 67)

after all of my changes on mozilla-central:

./hg:
perfmanifest tip
! wall 0.232640 comb 0.230000 user 0.220000 sys 0.010000 (best of 43)
perftags
! result: 162
! wall 0.007057 comb 0.010000 user 0.000000 sys 0.010000 (best of 395)
perfstatus
! wall 0.415503 comb 0.420000 user 0.280000 sys 0.140000 (best of 24)
hgperf export tip
! wall 0.025096 comb 0.030000 user 0.030000 sys 0.000000 (best of 102)

so it's no real change in performance on perf{manifest,tags,status},
but is a huge win on 'hgperf export tip'.

This should open the door to doing fast hashing on a per-directory
basis for the manifest type, which is the current plan for supporting
narrow-clone-friendly manifests.

There's a little performance work that could still be done here:
fastdelta() could be done significantly more intelligently by using
the internal state of the lazymanifest type in C, but that seems like
good future work.
Augie Fackler - Jan. 8, 2015, 8:51 p.m.
I just spotted a defect in this change, and will be mailing a resend soon. Sigh.

On Thu, Jan 8, 2015 at 3:35 PM, Augie Fackler <raf@durin42.com> wrote:
> # HG changeset patch
> # User Augie Fackler <augie@google.com>
> # Date 1420746668 18000
> #      Thu Jan 08 14:51:08 2015 -0500
> # Node ID 07da98fd38a1269c899b3c81fbe23dbc807511d0
> # Parent  1165b1c87091d12209a3346834be5c1e3db8a9e4
> manifest: use lazymanifest instead of manifestdict
>
> Before any of my related changes on mozilla-central:
>
> perfmanifest tip
> ! wall 0.268805 comb 0.260000 user 0.260000 sys 0.000000 (best of 37)
> perftags
> ! result: 162
> ! wall 0.007099 comb 0.000000 user 0.000000 sys 0.000000 (best of 401)
> perfstatus
> ! wall 0.415680 comb 0.420000 user 0.260000 sys 0.160000 (best of 24)
> hgperf export tip
> ! wall 0.142118 comb 0.140000 user 0.140000 sys 0.000000 (best of 67)
>
> after all of my changes on mozilla-central:
>
> ./hg:
> perfmanifest tip
> ! wall 0.232640 comb 0.230000 user 0.220000 sys 0.010000 (best of 43)
> perftags
> ! result: 162
> ! wall 0.007057 comb 0.010000 user 0.000000 sys 0.010000 (best of 395)
> perfstatus
> ! wall 0.415503 comb 0.420000 user 0.280000 sys 0.140000 (best of 24)
> hgperf export tip
> ! wall 0.025096 comb 0.030000 user 0.030000 sys 0.000000 (best of 102)
>
> so it's no real change in performance on perf{manifest,tags,status},
> but is a huge win on 'hgperf export tip'.
>
> This should open the door to doing fast hashing on a per-directory
> basis for the manifest type, which is the current plan for supporting
> narrow-clone-friendly manifests.
>
> There's a little performance work that could still be done here:
> fastdelta() could be done significantly more intelligently by using
> the internal state of the lazymanifest type in C, but that seems like
> good future work.
>
> diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py
> --- a/hgext/largefiles/reposetup.py
> +++ b/hgext/largefiles/reposetup.py
> @@ -35,17 +35,18 @@ def reposetup(ui, repo):
>          def __getitem__(self, changeid):
>              ctx = super(lfilesrepo, self).__getitem__(changeid)
>              if self.unfiltered().lfstatus:
> -                class lfilesmanifestdict(manifest.manifestdict):
> +                class lfilesmanifest(manifest.lazymanifest):
>                      def __contains__(self, filename):
> -                        orig = super(lfilesmanifestdict, self).__contains__
> +                        orig = super(lfilesmanifest, self).__contains__
>                          return orig(filename) or orig(lfutil.standin(filename))
> +
>                  class lfilesctx(ctx.__class__):
>                      def files(self):
>                          filenames = super(lfilesctx, self).files()
>                          return [lfutil.splitstandin(f) or f for f in filenames]
>                      def manifest(self):
>                          man1 = super(lfilesctx, self).manifest()
> -                        man1.__class__ = lfilesmanifestdict
> +                        man1.__class__ = lfilesmanifest
>                          return man1
>                      def filectx(self, path, fileid=None, filelog=None):
>                          orig = super(lfilesctx, self).filectx
> diff --git a/mercurial/manifest.c b/mercurial/manifest.c
> --- a/mercurial/manifest.c
> +++ b/mercurial/manifest.c
> @@ -673,13 +673,14 @@ static lazymanifest *lzm_filtercopy(lazy
>         return tmp;
>  }
>
> -static PyObject *lzm_diff(lazymanifest * self, lazymanifest * other)
> +static PyObject *lzm_diff(lazymanifest * self, PyObject * args)
>  {
> -       if (!PyObject_IsInstance((PyObject *)other,
> -                                (PyObject *)&lazymanifestType)) {
> -               PyErr_Format(PyExc_TypeError, "other must be a lazymanifest");
> +       lazymanifest * other;
> +       PyObject * pyclean = NULL;
> +       if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
>                 return NULL;
>         }
> +       bool clean = (pyclean == NULL) ? false : PyObject_IsTrue(pyclean);
>         PyObject *emptyTup = NULL, *ret = NULL;
>         PyObject *es = PyString_FromString("");
>         if (es == NULL) {
> @@ -772,6 +773,8 @@ static PyObject *lzm_diff(lazymanifest *
>                                 }
>                                 PyDict_SetItem(ret, key, outer);
>                                 Py_DECREF(outer);
> +                       } else if (clean) {
> +                               PyDict_SetItem(ret, key, Py_None);
>                         }
>                         sneedle++;
>                         oneedle++;
> @@ -792,7 +795,7 @@ static PyMethodDef lzm_methods[] = {
>          "Make a copy of this lazymanifest."},
>         {"filtercopy", (PyCFunction)lzm_filtercopy, METH_O,
>          "Make a copy of this manifest filtered by matchfn."},
> -       {"diff", (PyCFunction)lzm_diff, METH_O,
> +       {"diff", (PyCFunction)lzm_diff, METH_VARARGS,
>          "Compare this lazymanifest to another one."},
>         {"text", (PyCFunction)lzm_text, METH_NOARGS,
>          "Encode this manifest to text."},
> diff --git a/mercurial/manifest.py b/mercurial/manifest.py
> --- a/mercurial/manifest.py
> +++ b/mercurial/manifest.py
> @@ -9,35 +9,40 @@ from i18n import _
>  import mdiff, parsers, error, revlog, util
>  import array, struct
>
> -class manifestdict(dict):
> -    def __init__(self, mapping=None, flags=None):
> -        if mapping is None:
> -            mapping = {}
> -        if flags is None:
> -            flags = {}
> -        dict.__init__(self, mapping)
> -        self._flags = flags
> -    def __setitem__(self, k, v):
> -        assert v is not None
> -        dict.__setitem__(self, k, v)
> -    def flags(self, f):
> -        return self._flags.get(f, "")
> -    def setflag(self, f, flags):
> -        """Set the flags (symlink, executable) for path f."""
> -        self._flags[f] = flags
> -    def copy(self):
> -        return manifestdict(self, dict.copy(self._flags))
> +class lazymanifest(object):
> +    def __init__(self, data):
> +        self._lm = parsers.lazymanifest(data)
> +
> +    def __getitem__(self, key):
> +        return self._lm[key][0]
> +
> +    def __len__(self):
> +        return len(self._lm)
> +
> +    def __setitem__(self, key, node):
> +        self._lm[key] = node, self.flags(key, '')
> +
> +    def __contains__(self, key):
> +        return key in self._lm
> +
> +    def __delitem__(self, key):
> +        del self._lm[key]
> +
> +    def keys(self):
> +        return [x[0] for x in self._lm]
> +
> +    def iterkeys(self):
> +        return iter(self.keys())
> +
>      def intersectfiles(self, files):
> -        '''make a new manifestdict with the intersection of self with files
> +        '''make a new lazymanifest with the intersection of self with files
>
>          The algorithm assumes that files is much smaller than self.'''
> -        ret = manifestdict()
> +        ret = lazymanifest('')
> +        lm = self._lm
>          for fn in files:
> -            if fn in self:
> -                ret[fn] = self[fn]
> -                flags = self._flags.get(fn, None)
> -                if flags:
> -                    ret._flags[fn] = flags
> +            if fn in lm:
> +                ret._lm[fn] = self._lm[fn]
>          return ret
>
>      def matches(self, match):
> @@ -50,11 +55,9 @@ class manifestdict(dict):
>              (not match.anypats() and util.all(fn in self for fn in files))):
>              return self.intersectfiles(files)
>
> -        mf = self.copy()
> -        for fn in mf.keys():
> -            if not match(fn):
> -                del mf[fn]
> -        return mf
> +        lm = lazymanifest('')
> +        lm._lm = self._lm.filtercopy(match)
> +        return lm
>
>      def diff(self, m2, clean=False):
>          '''Finds changes between the current manifest and m2.
> @@ -71,35 +74,36 @@ class manifestdict(dict):
>          the nodeid will be None and the flags will be the empty
>          string.
>          '''
> -        diff = {}
> +        return self._lm.diff(m2._lm, clean)
>
> -        for fn, n1 in self.iteritems():
> -            fl1 = self._flags.get(fn, '')
> -            n2 = m2.get(fn, None)
> -            fl2 = m2._flags.get(fn, '')
> -            if n2 is None:
> -                fl2 = ''
> -            if n1 != n2 or fl1 != fl2:
> -                diff[fn] = ((n1, fl1), (n2, fl2))
> -            elif clean:
> -                diff[fn] = None
> +    def setflag(self, key, flag):
> +        self._lm[key] = self[key], flag
>
> -        for fn, n2 in m2.iteritems():
> -            if fn not in self:
> -                fl2 = m2._flags.get(fn, '')
> -                diff[fn] = ((None, ''), (n2, fl2))
> +    def get(self, key, default=None):
> +        try:
> +            return self._lm[key][0]
> +        except KeyError:
> +            return default
>
> -        return diff
> +    def flags(self, key, default=''):
> +        try:
> +            return self._lm[key][1]
> +        except KeyError:
> +            return default
> +
> +    def __iter__(self):
> +        return (x[0] for x in self._lm)
> +
> +    def copy(self):
> +        c = lazymanifest('')
> +        c._lm = self._lm.copy()
> +        return c
> +
> +    def iteritems(self):
> +        return (x[:2] for x in self._lm)
>
>      def text(self):
> -        """Get the full data of this manifest as a bytestring."""
> -        fl = sorted(self)
> -        _checkforbidden(fl)
> -
> -        hex, flags = revlog.hex, self.flags
> -        # if this is changed to support newlines in filenames,
> -        # be sure to check the templates/ dir again (especially *-raw.tmpl)
> -        return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
> +        return self._lm.text()
>
>      def fastdelta(self, base, changes):
>          """Given a base manifest text as an array.array and a list of changes
> @@ -119,7 +123,8 @@ class manifestdict(dict):
>              # bs will either be the index of the item or the insert point
>              start, end = _msearch(addbuf, f, start)
>              if not todelete:
> -                l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f))
> +                h, fl = self._lm[f]
> +                l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
>              else:
>                  if start == end:
>                      # item we want to delete was not found, error out
> @@ -213,11 +218,6 @@ def _addlistdelta(addlist, x):
>                     + content for start, end, content in x)
>      return deltatext, newaddlist
>
> -def _parse(lines):
> -    mfdict = manifestdict()
> -    parsers.parse_manifest(mfdict, mfdict._flags, lines)
> -    return mfdict
> -
>  class manifest(revlog.revlog):
>      def __init__(self, opener):
>          # we expect to deal with not more than four revs at a time,
> @@ -227,7 +227,8 @@ class manifest(revlog.revlog):
>
>      def readdelta(self, node):
>          r = self.rev(node)
> -        return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
> +        data = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
> +        return lazymanifest(data)
>
>      def readfast(self, node):
>          '''use the faster of readdelta or read'''
> @@ -239,12 +240,12 @@ class manifest(revlog.revlog):
>
>      def read(self, node):
>          if node == revlog.nullid:
> -            return manifestdict() # don't upset local cache
> +            return lazymanifest('') # don't upset local cache
>          if node in self._mancache:
>              return self._mancache[node][0]
>          text = self.revision(node)
>          arraytext = array.array('c', text)
> -        mapping = _parse(text)
> +        mapping = lazymanifest(text)
>          self._mancache[node] = (mapping, arraytext)
>          return mapping
>
> @@ -255,12 +256,10 @@ class manifest(revlog.revlog):
>              mapping = self._mancache[node][0]
>              return mapping.get(f), mapping.flags(f)
>          text = self.revision(node)
> -        start, end = _msearch(text, f)
> -        if start == end:
> +        try:
> +            return lazymanifest(text)._lm[f]
> +        except KeyError:
>              return None, None
> -        l = text[start:end]
> -        f, n = l.split('\0')
> -        return revlog.bin(n[:40]), n[40:-1]
>
>      def add(self, map, transaction, link, p1, p2, added, removed):
>          if p1 in self._mancache:
> diff --git a/tests/test-manifest.py b/tests/test-manifest.py
> --- a/tests/test-manifest.py
> +++ b/tests/test-manifest.py
> @@ -5,6 +5,7 @@ import itertools
>  import silenttestrunner
>
>  from mercurial import parsers
> +from mercurial import manifest as manifestmod
>
>  HASH_1 = '1' * 40
>  HASH_2 = 'f' * 40
> @@ -211,5 +212,14 @@ class testmanifest(unittest.TestCase):
>          self.assertEqual(HUGE_MANIFEST_ENTRIES, len(m))
>          self.assertEqual(len(m), len(list(m)))
>
> +    def testIntersectFiles(self):
> +        m = manifestmod.lazymanifest(A_HUGE_MANIFEST)
> +        m2 = m.intersectfiles(['file1', 'file200', 'file300'])
> +        w = ('file1\0%sx\n'
> +             'file200\0%sl\n'
> +             'file300\0%s\n') % (HASH_2, HASH_1, HASH_1)
> +        self.assertEqual(w, m2.text())
> +
> +
>  if __name__ == '__main__':
>      silenttestrunner.main(__name__)
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
Durham Goode - Jan. 9, 2015, 9:30 p.m.
On 1/8/15 12:35 PM, Augie Fackler wrote:
> # HG changeset patch
> # User Augie Fackler <augie@google.com>
> # Date 1420746668 18000
> #      Thu Jan 08 14:51:08 2015 -0500
> # Node ID 07da98fd38a1269c899b3c81fbe23dbc807511d0
> # Parent  1165b1c87091d12209a3346834be5c1e3db8a9e4
> manifest: use lazymanifest instead of manifestdict
>
> Before any of my related changes on mozilla-central:
>
> perfmanifest tip
> ! wall 0.268805 comb 0.260000 user 0.260000 sys 0.000000 (best of 37)
> perftags
> ! result: 162
> ! wall 0.007099 comb 0.000000 user 0.000000 sys 0.000000 (best of 401)
> perfstatus
> ! wall 0.415680 comb 0.420000 user 0.260000 sys 0.160000 (best of 24)
> hgperf export tip
> ! wall 0.142118 comb 0.140000 user 0.140000 sys 0.000000 (best of 67)
>
> after all of my changes on mozilla-central:
>
> ./hg:
> perfmanifest tip
> ! wall 0.232640 comb 0.230000 user 0.220000 sys 0.010000 (best of 43)
> perftags
> ! result: 162
> ! wall 0.007057 comb 0.010000 user 0.000000 sys 0.010000 (best of 395)
> perfstatus
> ! wall 0.415503 comb 0.420000 user 0.280000 sys 0.140000 (best of 24)
> hgperf export tip
> ! wall 0.025096 comb 0.030000 user 0.030000 sys 0.000000 (best of 102)
>
> so it's no real change in performance on perf{manifest,tags,status},
> but is a huge win on 'hgperf export tip'.
>
Very nice! Is there any performance difference for arbitrary manifest 
diffs?  Like what happens in merge.update for hg update/rebase/etc?

Patch

diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py
--- a/hgext/largefiles/reposetup.py
+++ b/hgext/largefiles/reposetup.py
@@ -35,17 +35,18 @@  def reposetup(ui, repo):
         def __getitem__(self, changeid):
             ctx = super(lfilesrepo, self).__getitem__(changeid)
             if self.unfiltered().lfstatus:
-                class lfilesmanifestdict(manifest.manifestdict):
+                class lfilesmanifest(manifest.lazymanifest):
                     def __contains__(self, filename):
-                        orig = super(lfilesmanifestdict, self).__contains__
+                        orig = super(lfilesmanifest, self).__contains__
                         return orig(filename) or orig(lfutil.standin(filename))
+
                 class lfilesctx(ctx.__class__):
                     def files(self):
                         filenames = super(lfilesctx, self).files()
                         return [lfutil.splitstandin(f) or f for f in filenames]
                     def manifest(self):
                         man1 = super(lfilesctx, self).manifest()
-                        man1.__class__ = lfilesmanifestdict
+                        man1.__class__ = lfilesmanifest
                         return man1
                     def filectx(self, path, fileid=None, filelog=None):
                         orig = super(lfilesctx, self).filectx
diff --git a/mercurial/manifest.c b/mercurial/manifest.c
--- a/mercurial/manifest.c
+++ b/mercurial/manifest.c
@@ -673,13 +673,14 @@  static lazymanifest *lzm_filtercopy(lazy
 	return tmp;
 }
 
-static PyObject *lzm_diff(lazymanifest * self, lazymanifest * other)
+static PyObject *lzm_diff(lazymanifest * self, PyObject * args)
 {
-	if (!PyObject_IsInstance((PyObject *)other,
-				 (PyObject *)&lazymanifestType)) {
-		PyErr_Format(PyExc_TypeError, "other must be a lazymanifest");
+	lazymanifest * other;
+	PyObject * pyclean = NULL;
+	if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
 		return NULL;
 	}
+	bool clean = (pyclean == NULL) ? false : PyObject_IsTrue(pyclean);
 	PyObject *emptyTup = NULL, *ret = NULL;
 	PyObject *es = PyString_FromString("");
 	if (es == NULL) {
@@ -772,6 +773,8 @@  static PyObject *lzm_diff(lazymanifest *
 				}
 				PyDict_SetItem(ret, key, outer);
 				Py_DECREF(outer);
+			} else if (clean) {
+				PyDict_SetItem(ret, key, Py_None);
 			}
 			sneedle++;
 			oneedle++;
@@ -792,7 +795,7 @@  static PyMethodDef lzm_methods[] = {
 	 "Make a copy of this lazymanifest."},
 	{"filtercopy", (PyCFunction)lzm_filtercopy, METH_O,
 	 "Make a copy of this manifest filtered by matchfn."},
-	{"diff", (PyCFunction)lzm_diff, METH_O,
+	{"diff", (PyCFunction)lzm_diff, METH_VARARGS,
 	 "Compare this lazymanifest to another one."},
 	{"text", (PyCFunction)lzm_text, METH_NOARGS,
 	 "Encode this manifest to text."},
diff --git a/mercurial/manifest.py b/mercurial/manifest.py
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -9,35 +9,40 @@  from i18n import _
 import mdiff, parsers, error, revlog, util
 import array, struct
 
-class manifestdict(dict):
-    def __init__(self, mapping=None, flags=None):
-        if mapping is None:
-            mapping = {}
-        if flags is None:
-            flags = {}
-        dict.__init__(self, mapping)
-        self._flags = flags
-    def __setitem__(self, k, v):
-        assert v is not None
-        dict.__setitem__(self, k, v)
-    def flags(self, f):
-        return self._flags.get(f, "")
-    def setflag(self, f, flags):
-        """Set the flags (symlink, executable) for path f."""
-        self._flags[f] = flags
-    def copy(self):
-        return manifestdict(self, dict.copy(self._flags))
+class lazymanifest(object):
+    def __init__(self, data):
+        self._lm = parsers.lazymanifest(data)
+
+    def __getitem__(self, key):
+        return self._lm[key][0]
+
+    def __len__(self):
+        return len(self._lm)
+
+    def __setitem__(self, key, node):
+        self._lm[key] = node, self.flags(key, '')
+
+    def __contains__(self, key):
+        return key in self._lm
+
+    def __delitem__(self, key):
+        del self._lm[key]
+
+    def keys(self):
+        return [x[0] for x in self._lm]
+
+    def iterkeys(self):
+        return iter(self.keys())
+
     def intersectfiles(self, files):
-        '''make a new manifestdict with the intersection of self with files
+        '''make a new lazymanifest with the intersection of self with files
 
         The algorithm assumes that files is much smaller than self.'''
-        ret = manifestdict()
+        ret = lazymanifest('')
+        lm = self._lm
         for fn in files:
-            if fn in self:
-                ret[fn] = self[fn]
-                flags = self._flags.get(fn, None)
-                if flags:
-                    ret._flags[fn] = flags
+            if fn in lm:
+                ret._lm[fn] = self._lm[fn]
         return ret
 
     def matches(self, match):
@@ -50,11 +55,9 @@  class manifestdict(dict):
             (not match.anypats() and util.all(fn in self for fn in files))):
             return self.intersectfiles(files)
 
-        mf = self.copy()
-        for fn in mf.keys():
-            if not match(fn):
-                del mf[fn]
-        return mf
+        lm = lazymanifest('')
+        lm._lm = self._lm.filtercopy(match)
+        return lm
 
     def diff(self, m2, clean=False):
         '''Finds changes between the current manifest and m2.
@@ -71,35 +74,36 @@  class manifestdict(dict):
         the nodeid will be None and the flags will be the empty
         string.
         '''
-        diff = {}
+        return self._lm.diff(m2._lm, clean)
 
-        for fn, n1 in self.iteritems():
-            fl1 = self._flags.get(fn, '')
-            n2 = m2.get(fn, None)
-            fl2 = m2._flags.get(fn, '')
-            if n2 is None:
-                fl2 = ''
-            if n1 != n2 or fl1 != fl2:
-                diff[fn] = ((n1, fl1), (n2, fl2))
-            elif clean:
-                diff[fn] = None
+    def setflag(self, key, flag):
+        self._lm[key] = self[key], flag
 
-        for fn, n2 in m2.iteritems():
-            if fn not in self:
-                fl2 = m2._flags.get(fn, '')
-                diff[fn] = ((None, ''), (n2, fl2))
+    def get(self, key, default=None):
+        try:
+            return self._lm[key][0]
+        except KeyError:
+            return default
 
-        return diff
+    def flags(self, key, default=''):
+        try:
+            return self._lm[key][1]
+        except KeyError:
+            return default
+
+    def __iter__(self):
+        return (x[0] for x in self._lm)
+
+    def copy(self):
+        c = lazymanifest('')
+        c._lm = self._lm.copy()
+        return c
+
+    def iteritems(self):
+        return (x[:2] for x in self._lm)
 
     def text(self):
-        """Get the full data of this manifest as a bytestring."""
-        fl = sorted(self)
-        _checkforbidden(fl)
-
-        hex, flags = revlog.hex, self.flags
-        # if this is changed to support newlines in filenames,
-        # be sure to check the templates/ dir again (especially *-raw.tmpl)
-        return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
+        return self._lm.text()
 
     def fastdelta(self, base, changes):
         """Given a base manifest text as an array.array and a list of changes
@@ -119,7 +123,8 @@  class manifestdict(dict):
             # bs will either be the index of the item or the insert point
             start, end = _msearch(addbuf, f, start)
             if not todelete:
-                l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f))
+                h, fl = self._lm[f]
+                l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
             else:
                 if start == end:
                     # item we want to delete was not found, error out
@@ -213,11 +218,6 @@  def _addlistdelta(addlist, x):
                    + content for start, end, content in x)
     return deltatext, newaddlist
 
-def _parse(lines):
-    mfdict = manifestdict()
-    parsers.parse_manifest(mfdict, mfdict._flags, lines)
-    return mfdict
-
 class manifest(revlog.revlog):
     def __init__(self, opener):
         # we expect to deal with not more than four revs at a time,
@@ -227,7 +227,8 @@  class manifest(revlog.revlog):
 
     def readdelta(self, node):
         r = self.rev(node)
-        return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
+        data = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
+        return lazymanifest(data)
 
     def readfast(self, node):
         '''use the faster of readdelta or read'''
@@ -239,12 +240,12 @@  class manifest(revlog.revlog):
 
     def read(self, node):
         if node == revlog.nullid:
-            return manifestdict() # don't upset local cache
+            return lazymanifest('') # don't upset local cache
         if node in self._mancache:
             return self._mancache[node][0]
         text = self.revision(node)
         arraytext = array.array('c', text)
-        mapping = _parse(text)
+        mapping = lazymanifest(text)
         self._mancache[node] = (mapping, arraytext)
         return mapping
 
@@ -255,12 +256,10 @@  class manifest(revlog.revlog):
             mapping = self._mancache[node][0]
             return mapping.get(f), mapping.flags(f)
         text = self.revision(node)
-        start, end = _msearch(text, f)
-        if start == end:
+        try:
+            return lazymanifest(text)._lm[f]
+        except KeyError:
             return None, None
-        l = text[start:end]
-        f, n = l.split('\0')
-        return revlog.bin(n[:40]), n[40:-1]
 
     def add(self, map, transaction, link, p1, p2, added, removed):
         if p1 in self._mancache:
diff --git a/tests/test-manifest.py b/tests/test-manifest.py
--- a/tests/test-manifest.py
+++ b/tests/test-manifest.py
@@ -5,6 +5,7 @@  import itertools
 import silenttestrunner
 
 from mercurial import parsers
+from mercurial import manifest as manifestmod
 
 HASH_1 = '1' * 40
 HASH_2 = 'f' * 40
@@ -211,5 +212,14 @@  class testmanifest(unittest.TestCase):
         self.assertEqual(HUGE_MANIFEST_ENTRIES, len(m))
         self.assertEqual(len(m), len(list(m)))
 
+    def testIntersectFiles(self):
+        m = manifestmod.lazymanifest(A_HUGE_MANIFEST)
+        m2 = m.intersectfiles(['file1', 'file200', 'file300'])
+        w = ('file1\0%sx\n'
+             'file200\0%sl\n'
+             'file300\0%s\n') % (HASH_2, HASH_1, HASH_1)
+        self.assertEqual(w, m2.text())
+
+
 if __name__ == '__main__':
     silenttestrunner.main(__name__)