Submitter | Durham Goode |
---|---|
Date | Sept. 20, 2016, 11:47 p.m. |
Message ID | <69b91c12d904f329eff6.1474415273@dev111.prn1.facebook.com> |
Download | mbox | patch |
Permalink | /patch/16705/ |
State | Accepted |
Headers | show |
Comments
On Tue, Sep 20, 2016 at 4:47 PM, Durham Goode <durham@fb.com> wrote: > # HG changeset patch > # User Durham Goode <durham@fb.com> > # Date 1474415164 25200 > # Tue Sep 20 16:46:04 2016 -0700 > # Node ID 69b91c12d904f329eff6618ac43f5cfef631e8dc > # Parent 52a206d772021194ab4356aeba2c73650851a624 > manifest: change manifestlog mancache to be directory based > > In the last patch we added a get() function that allows fetching directory level > treemanifestctxs. It didn't handle caching at directory level though, so we need to > change our mancache to support multiple directories. > > diff --git a/mercurial/manifest.py b/mercurial/manifest.py > --- a/mercurial/manifest.py > +++ b/mercurial/manifest.py > @@ -1021,9 +1021,16 @@ class manifestlog(object): > usetreemanifest = opts.get('treemanifest', usetreemanifest) > self._treeinmem = usetreemanifest > > + # A dictionary containing the cache for each directory nit: "dictionary" doesn't add much. I'd rather say what is cached ([tree]manifestctx instances, right?). > + self._dirmancache = {} > + > # We'll separate this into it's own cache once oldmanifest is no longer > # used > - self._mancache = repo.manifest._mancache > + self._dirmancache[''] = repo.manifest._mancache IIUC, the root manifest's cache will be shared, but subdirectory logs will not be? I think the cache is not used very frequently, so maybe it's not a big deal? Do you know a good test case for it? Rebase? We're looking for a case where both manifest._mancache and manifestlog._dirmancache will be used, right? Perhaps there are no such cases so it doesn't matter? So it's probably fine, just wanted to point it out in case you can think of problem that I did not see. > + > + # A future patch makes this use the same config value as the existing > + # mancache > + self.cachesize = 4 > > @property > def _revlog(self): > @@ -1039,6 +1046,14 @@ class manifestlog(object): > """Retrieves the manifest instance for the given node. Throws a KeyError > if not found. > """ > + if node in self._dirmancache.get(dir, ()): > + cachemf = self._dirmancache[dir][node] > + # The old manifest may put non-ctx manifests in the cache, so > + # skip those since they don't implement the full api. > + if (isinstance(cachemf, manifestctx) or > + isinstance(cachemf, treemanifestctx)): > + return cachemf > + > if dir: > if self._revlog._treeondisk: > m = treemanifestctx(self._revlog.dirlog(dir), dir, node) > @@ -1047,20 +1062,17 @@ class manifestlog(object): > _("cannot ask for manifest directory '%s' in a flat " > "manifest") % dir) > else: > - if node in self._mancache: > - cachemf = self._mancache[node] > - # The old manifest may put non-ctx manifests in the cache, so > - # skip those since they don't implement the full api. > - if (isinstance(cachemf, manifestctx) or > - isinstance(cachemf, treemanifestctx)): > - return cachemf > - > if self._treeinmem: > m = treemanifestctx(self._revlog, '', node) > else: > m = manifestctx(self._revlog, node) > - if node != revlog.nullid: > - self._mancache[node] = m > + > + if node != revlog.nullid: > + mancache = self._dirmancache.get(dir) > + if not mancache: > + mancache = util.lrucachedict(self.cachesize) > + self._dirmancache[dir] = mancache > + mancache[node] = m > return m > > def add(self, m, transaction, link, p1, p2, added, removed): > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
On 9/21/16 1:31 PM, Martin von Zweigbergk wrote: > On Tue, Sep 20, 2016 at 4:47 PM, Durham Goode <durham@fb.com> wrote: >> # HG changeset patch >> # User Durham Goode <durham@fb.com> >> # Date 1474415164 25200 >> # Tue Sep 20 16:46:04 2016 -0700 >> # Node ID 69b91c12d904f329eff6618ac43f5cfef631e8dc >> # Parent 52a206d772021194ab4356aeba2c73650851a624 >> manifest: change manifestlog mancache to be directory based >> >> In the last patch we added a get() function that allows fetching directory level >> treemanifestctxs. It didn't handle caching at directory level though, so we need to >> change our mancache to support multiple directories. >> >> diff --git a/mercurial/manifest.py b/mercurial/manifest.py >> --- a/mercurial/manifest.py >> +++ b/mercurial/manifest.py >> @@ -1021,9 +1021,16 @@ class manifestlog(object): >> usetreemanifest = opts.get('treemanifest', usetreemanifest) >> self._treeinmem = usetreemanifest >> >> + # A dictionary containing the cache for each directory > nit: "dictionary" doesn't add much. I'd rather say what is cached > ([tree]manifestctx instances, right?). Will fix > >> + self._dirmancache = {} >> + >> # We'll separate this into it's own cache once oldmanifest is no longer >> # used >> - self._mancache = repo.manifest._mancache >> + self._dirmancache[''] = repo.manifest._mancache > IIUC, the root manifest's cache will be shared, but subdirectory logs > will not be? I think the cache is not used very frequently, so maybe > it's not a big deal? Do you know a good test case for it? Rebase? > We're looking for a case where both manifest._mancache and > manifestlog._dirmancache will be used, right? Perhaps there are no > such cases so it doesn't matter? So it's probably fine, just wanted to > point it out in case you can think of problem that I did not see. It's a bit trickier to share the directory level mancache since the directory level revlog may not have even been constructed yet when we construct the ctx (since it's not constructed until we call ctx.read()). I intend to get the entire manifest refactor in asap once the we unfreeze, and by the end the manifest class and it's cache will all be gone, so this is a very temporary measure.
Patch
diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -1021,9 +1021,16 @@ class manifestlog(object): usetreemanifest = opts.get('treemanifest', usetreemanifest) self._treeinmem = usetreemanifest + # A dictionary containing the cache for each directory + self._dirmancache = {} + # We'll separate this into it's own cache once oldmanifest is no longer # used - self._mancache = repo.manifest._mancache + self._dirmancache[''] = repo.manifest._mancache + + # A future patch makes this use the same config value as the existing + # mancache + self.cachesize = 4 @property def _revlog(self): @@ -1039,6 +1046,14 @@ class manifestlog(object): """Retrieves the manifest instance for the given node. Throws a KeyError if not found. """ + if node in self._dirmancache.get(dir, ()): + cachemf = self._dirmancache[dir][node] + # The old manifest may put non-ctx manifests in the cache, so + # skip those since they don't implement the full api. + if (isinstance(cachemf, manifestctx) or + isinstance(cachemf, treemanifestctx)): + return cachemf + if dir: if self._revlog._treeondisk: m = treemanifestctx(self._revlog.dirlog(dir), dir, node) @@ -1047,20 +1062,17 @@ class manifestlog(object): _("cannot ask for manifest directory '%s' in a flat " "manifest") % dir) else: - if node in self._mancache: - cachemf = self._mancache[node] - # The old manifest may put non-ctx manifests in the cache, so - # skip those since they don't implement the full api. - if (isinstance(cachemf, manifestctx) or - isinstance(cachemf, treemanifestctx)): - return cachemf - if self._treeinmem: m = treemanifestctx(self._revlog, '', node) else: m = manifestctx(self._revlog, node) - if node != revlog.nullid: - self._mancache[node] = m + + if node != revlog.nullid: + mancache = self._dirmancache.get(dir) + if not mancache: + mancache = util.lrucachedict(self.cachesize) + self._dirmancache[dir] = mancache + mancache[node] = m return m def add(self, m, transaction, link, p1, p2, added, removed):