Submitter | Gregory Szorc |
---|---|
Date | July 2, 2017, 3:52 a.m. |
Message ID | <79e1b61f921b81b59d79.1498967532@ubuntu-vm-main> |
Download | mbox | patch |
Permalink | /patch/21911/ |
State | Accepted |
Headers | show |
Comments
On Sat, 01 Jul 2017 20:52:12 -0700, Gregory Szorc wrote: > # HG changeset patch > # User Gregory Szorc <gregory.szorc@gmail.com> > # Date 1498967479 25200 > # Sat Jul 01 20:51:19 2017 -0700 > # Node ID 79e1b61f921b81b59d791c26a92120875020b2b3 > # Parent 6d678ab1b10d0fddc73003d21aa3c7ec43194e2e > localrepo: cache types for filtered repos (issue5043) [...] > This patch adds a cache of of the dynamically generated repoview/filter > types on the localrepo object. Since we only generate each type > once, we cap the amount of memory that can leak to something > reasonable. > > After this change, `hg convert` no longer leaks memory on every > revision. The process will likely grow memory usage over time due > to e.g. larger manifests. But there are no leaks. > > diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py > --- a/mercurial/localrepo.py > +++ b/mercurial/localrepo.py > @@ -400,6 +400,9 @@ class localrepository(object): > # post-dirstate-status hooks > self._postdsstatus = [] > > + # Cache of types representing filtered repos. > + self._filteredrepotypes = weakref.WeakKeyDictionary() > + > # generic mapping between names and nodes > self.names = namespaces.namespaces() > > @@ -501,11 +504,21 @@ class localrepository(object): > > def filtered(self, name): > """Return a filtered version of a repository""" > - # build a new class with the mixin and the current class > - # (possibly subclass of the repo) > - class filteredrepo(repoview.repoview, self.unfiltered().__class__): > - pass > - return filteredrepo(self, name) > + # Python <3.4 easily leaks types via __mro__. See > + # https://bugs.python.org/issue17950. We cache dynamically > + # created types so this method doesn't leak on every > + # invocation. > + > + key = self.unfiltered().__class__ > + if key not in self._filteredrepotypes: > + # Build a new type with the repoview mixin and the base > + # class of this repo. Give it a name containing the > + # filter name to aid debugging. > + bases = (repoview.repoview, key) > + cls = type('%sfilteredrepo' % name, bases, {}) > + self._filteredrepotypes[key] = cls I think this may still leak a proxy class in hgweb-type applications. Do we need a type object per repository instance?
> On Jul 3, 2017, at 06:03, Yuya Nishihara <yuya@tcha.org> wrote: > >> On Sat, 01 Jul 2017 20:52:12 -0700, Gregory Szorc wrote: >> # HG changeset patch >> # User Gregory Szorc <gregory.szorc@gmail.com> >> # Date 1498967479 25200 >> # Sat Jul 01 20:51:19 2017 -0700 >> # Node ID 79e1b61f921b81b59d791c26a92120875020b2b3 >> # Parent 6d678ab1b10d0fddc73003d21aa3c7ec43194e2e >> localrepo: cache types for filtered repos (issue5043) > > [...] > >> This patch adds a cache of of the dynamically generated repoview/filter >> types on the localrepo object. Since we only generate each type >> once, we cap the amount of memory that can leak to something >> reasonable. >> >> After this change, `hg convert` no longer leaks memory on every >> revision. The process will likely grow memory usage over time due >> to e.g. larger manifests. But there are no leaks. >> >> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py >> --- a/mercurial/localrepo.py >> +++ b/mercurial/localrepo.py >> @@ -400,6 +400,9 @@ class localrepository(object): >> # post-dirstate-status hooks >> self._postdsstatus = [] >> >> + # Cache of types representing filtered repos. >> + self._filteredrepotypes = weakref.WeakKeyDictionary() >> + >> # generic mapping between names and nodes >> self.names = namespaces.namespaces() >> >> @@ -501,11 +504,21 @@ class localrepository(object): >> >> def filtered(self, name): >> """Return a filtered version of a repository""" >> - # build a new class with the mixin and the current class >> - # (possibly subclass of the repo) >> - class filteredrepo(repoview.repoview, self.unfiltered().__class__): >> - pass >> - return filteredrepo(self, name) >> + # Python <3.4 easily leaks types via __mro__. See >> + # https://bugs.python.org/issue17950. We cache dynamically >> + # created types so this method doesn't leak on every >> + # invocation. >> + >> + key = self.unfiltered().__class__ >> + if key not in self._filteredrepotypes: >> + # Build a new type with the repoview mixin and the base >> + # class of this repo. Give it a name containing the >> + # filter name to aid debugging. >> + bases = (repoview.repoview, key) >> + cls = type('%sfilteredrepo' % name, bases, {}) >> + self._filteredrepotypes[key] = cls > > I think this may still leak a proxy class in hgweb-type applications. Do > we need a type object per repository instance? I don't follow. Where's the leak? If you are proposing a module-level cache, I *think* that will work. As long as weak refs are used, cache entries should go away when the last underlying repo referencing them does.
Excerpts from Gregory Szorc's message of 2017-07-03 08:30:10 -0700: > > I think this may still leak a proxy class in hgweb-type applications. Do > > we need a type object per repository instance? > > I don't follow. Where's the leak? > > If you are proposing a module-level cache, I *think* that will work. As > long as weak refs are used, cache entries should go away when the last > underlying repo referencing them does. I think module-level is better. I tried to express that in comments for V1 but it might be unclear at that time. Since we only have limited (4?) filters, weakref could be optional.
On Mon, 3 Jul 2017 08:30:10 -0700, Gregory Szorc wrote: > > On Jul 3, 2017, at 06:03, Yuya Nishihara <yuya@tcha.org> wrote: > >> On Sat, 01 Jul 2017 20:52:12 -0700, Gregory Szorc wrote: > >> # HG changeset patch > >> # User Gregory Szorc <gregory.szorc@gmail.com> > >> # Date 1498967479 25200 > >> # Sat Jul 01 20:51:19 2017 -0700 > >> # Node ID 79e1b61f921b81b59d791c26a92120875020b2b3 > >> # Parent 6d678ab1b10d0fddc73003d21aa3c7ec43194e2e > >> localrepo: cache types for filtered repos (issue5043) > > > > [...] > > > >> This patch adds a cache of of the dynamically generated repoview/filter > >> types on the localrepo object. Since we only generate each type > >> once, we cap the amount of memory that can leak to something > >> reasonable. > >> > >> After this change, `hg convert` no longer leaks memory on every > >> revision. The process will likely grow memory usage over time due > >> to e.g. larger manifests. But there are no leaks. > >> > >> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py > >> --- a/mercurial/localrepo.py > >> +++ b/mercurial/localrepo.py > >> @@ -400,6 +400,9 @@ class localrepository(object): > >> # post-dirstate-status hooks > >> self._postdsstatus = [] > >> > >> + # Cache of types representing filtered repos. > >> + self._filteredrepotypes = weakref.WeakKeyDictionary() > >> + > >> # generic mapping between names and nodes > >> self.names = namespaces.namespaces() > >> > >> @@ -501,11 +504,21 @@ class localrepository(object): > >> > >> def filtered(self, name): > >> """Return a filtered version of a repository""" > >> - # build a new class with the mixin and the current class > >> - # (possibly subclass of the repo) > >> - class filteredrepo(repoview.repoview, self.unfiltered().__class__): > >> - pass > >> - return filteredrepo(self, name) > >> + # Python <3.4 easily leaks types via __mro__. See > >> + # https://bugs.python.org/issue17950. We cache dynamically > >> + # created types so this method doesn't leak on every > >> + # invocation. > >> + > >> + key = self.unfiltered().__class__ > >> + if key not in self._filteredrepotypes: > >> + # Build a new type with the repoview mixin and the base > >> + # class of this repo. Give it a name containing the > >> + # filter name to aid debugging. > >> + bases = (repoview.repoview, key) > >> + cls = type('%sfilteredrepo' % name, bases, {}) > >> + self._filteredrepotypes[key] = cls > > > > I think this may still leak a proxy class in hgweb-type applications. Do > > we need a type object per repository instance? > > I don't follow. Where's the leak? IIUC, dynamically-created classes aren't deleted (or the deletion is delayed until GC run) because they are referenced by another class. So they would have longer lifetime than the repo instance. If that isn't the case, I think per-instance cache is okay.
Excerpts from Yuya Nishihara's message of 2017-07-04 01:01:18 +0900: > IIUC, dynamically-created classes aren't deleted (or the deletion is > delayed until GC run) because they are referenced by another class. So > they would have longer lifetime than the repo instance. It's not deleted because klass.__mro__ references klass so it's a reference cycle. There are other cycles but one __mro__ is enough. That applies to all classes regardless where they get created. > If that isn't the case, I think per-instance cache is okay.
On Sat, Jul 01, 2017 at 08:52:12PM -0700, Gregory Szorc wrote: > # HG changeset patch > # User Gregory Szorc <gregory.szorc@gmail.com> > # Date 1498967479 25200 > # Sat Jul 01 20:51:19 2017 -0700 > # Node ID 79e1b61f921b81b59d791c26a92120875020b2b3 > # Parent 6d678ab1b10d0fddc73003d21aa3c7ec43194e2e > localrepo: cache types for filtered repos (issue5043) queued, thanks (ugh)
Patch
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -400,6 +400,9 @@ class localrepository(object): # post-dirstate-status hooks self._postdsstatus = [] + # Cache of types representing filtered repos. + self._filteredrepotypes = weakref.WeakKeyDictionary() + # generic mapping between names and nodes self.names = namespaces.namespaces() @@ -501,11 +504,21 @@ class localrepository(object): def filtered(self, name): """Return a filtered version of a repository""" - # build a new class with the mixin and the current class - # (possibly subclass of the repo) - class filteredrepo(repoview.repoview, self.unfiltered().__class__): - pass - return filteredrepo(self, name) + # Python <3.4 easily leaks types via __mro__. See + # https://bugs.python.org/issue17950. We cache dynamically + # created types so this method doesn't leak on every + # invocation. + + key = self.unfiltered().__class__ + if key not in self._filteredrepotypes: + # Build a new type with the repoview mixin and the base + # class of this repo. Give it a name containing the + # filter name to aid debugging. + bases = (repoview.repoview, key) + cls = type('%sfilteredrepo' % name, bases, {}) + self._filteredrepotypes[key] = cls + + return self._filteredrepotypes[key](self, name) @repofilecache('bookmarks', 'bookmarks.current') def _bookmarks(self): diff --git a/mercurial/statichttprepo.py b/mercurial/statichttprepo.py --- a/mercurial/statichttprepo.py +++ b/mercurial/statichttprepo.py @@ -165,6 +165,8 @@ class statichttprepository(localrepo.loc self.encodepats = None self.decodepats = None self._transref = None + # Cache of types representing filtered repos. + self._filteredrepotypes = {} def _restrictcapabilities(self, caps): caps = super(statichttprepository, self)._restrictcapabilities(caps)