Submitter | Gregory Szorc |
---|---|
Date | Sept. 9, 2015, 11:36 p.m. |
Message ID | <2a9ae40cdbbb6126b246.1441841793@gps-mbp.local> |
Download | mbox | patch |
Permalink | /patch/10453/ |
State | Accepted |
Headers | show |
Comments
On Wed, Sep 09, 2015 at 04:36:33PM -0700, Gregory Szorc wrote: > # HG changeset patch > # User Gregory Szorc <gregory.szorc@gmail.com> > # Date 1440294204 25200 > # Sat Aug 22 18:43:24 2015 -0700 > # Node ID 2a9ae40cdbbb6126b246c8496c24a3c325d6afd6 > # Parent f4ebdbb128db525cfb5adf60b5e88f43fa9bc962 > hgweb: use separate repo instances per thread Queued these, awesome work. > > Before this change, multiple threads/requests could share a > localrepository instance. This meant that all of localrepository needed > to be thread safe. Many bugs have been reported telling us that > localrepository isn't actually thread safe. > > While making localrepository thread safe is a noble cause, it is a lot > of work. And there is little gain from doing so. Due to Python's GIL, > only 1 thread may be processing Python code at a time. The benefits > to multi-threaded servers are marginal. > > Thread safety would be a lot of work for little gain. So, we're not > going to even attempt it. > > This patch establishes a pool of repos in hgweb. When a request arrives, > we obtain the most recently used repository from the pool or create a > new one if none is available. When the request has finished, we put that > repo back in the pool. > > We start with a pool size of 1. For servers using a single thread, the > pool will only ever be of size 1. For multi-threaded servers, the pool > size will grow to the max number of simultaneous requests the server > processes. > > No logic for pruning the pool has been implemented. We assume server > operators either limit the number of threads to something they can > handle or restart the Mercurial process after a certain amount of > requests or time has passed. > > diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py > --- a/mercurial/hgweb/hgweb_mod.py > +++ b/mercurial/hgweb/hgweb_mod.py > @@ -5,8 +5,9 @@ > # > # This software may be used and distributed according to the terms of the > # GNU General Public License version 2 or any later version. > > +import contextlib > import os > from mercurial import ui, hg, hook, error, encoding, templater, util, repoview > from mercurial.templatefilters import websub > from common import get_stat, ErrorResponse, permhooks, caching > @@ -207,24 +208,46 @@ class hgweb(object): > # displaying bundling progress bar while serving feel wrong and may > # break some wsgi implementation. > r.ui.setconfig('progress', 'disable', 'true', 'hgweb') > r.baseui.setconfig('progress', 'disable', 'true', 'hgweb') > - self._repo = hg.cachedlocalrepo(self._webifyrepo(r)) > + self._repos = [hg.cachedlocalrepo(self._webifyrepo(r))] > + self._lastrepo = self._repos[0] > hook.redirect(True) > self.reponame = name > > def _webifyrepo(self, repo): > repo = getwebview(repo) > self.websubtable = webutil.getwebsubs(repo) > return repo > > - def _getrepo(self): > - r, created = self._repo.fetch() > - if created: > - r = self._webifyrepo(r) > + @contextlib.contextmanager > + def _obtainrepo(self): > + """Obtain a repo unique to the caller. > > - self.mtime = self._repo.mtime > - return r > + Internally we maintain a stack of cachedlocalrepo instances > + to be handed out. If one is available, we pop it and return it, > + ensuring it is up to date in the process. If one is not available, > + we clone the most recently used repo instance and return it. > + > + It is currently possible for the stack to grow without bounds > + if the server allows infinite threads. However, servers should > + have a thread limit, thus establishing our limit. > + """ > + if self._repos: > + cached = self._repos.pop() > + r, created = cached.fetch() > + if created: > + r = self._webifyrepo(r) > + else: > + cached = self._lastrepo.copy() > + r, created = cached.fetch() > + > + self._lastrepo = cached > + self.mtime = cached.mtime > + try: > + yield r > + finally: > + self._repos.append(cached) > > def run(self): > """Start a server from CGI environment. > > @@ -250,9 +273,12 @@ class hgweb(object): > > This is typically only called by Mercurial. External consumers > should be using instances of this class as the WSGI application. > """ > - repo = self._getrepo() > + with self._obtainrepo() as repo: > + return self._runwsgi(req, repo) > + > + def _runwsgi(self, req, repo): > rctx = requestcontext(self, repo) > > # This state is global across all threads. > encoding.encoding = rctx.config('web', 'encoding', encoding.encoding) > diff --git a/tests/test-hgweb-non-interactive.t b/tests/test-hgweb-non-interactive.t > --- a/tests/test-hgweb-non-interactive.t > +++ b/tests/test-hgweb-non-interactive.t > @@ -63,9 +63,10 @@ by the WSGI standard and strictly implem > > print errors.getvalue() > > print '---- OS.ENVIRON wsgi variables' > > print sorted([x for x in os.environ if x.startswith('wsgi')]) > > print '---- request.ENVIRON wsgi variables' > - > print sorted([x for x in i._getrepo().ui.environ if x.startswith('wsgi')]) > + > with i._obtainrepo() as repo: > + > print sorted([x for x in repo.ui.environ if x.startswith('wsgi')]) > > EOF > $ python request.py > ---- STATUS > 200 Script output follows > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > https://selenic.com/mailman/listinfo/mercurial-devel
Patch
diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py --- a/mercurial/hgweb/hgweb_mod.py +++ b/mercurial/hgweb/hgweb_mod.py @@ -5,8 +5,9 @@ # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. +import contextlib import os from mercurial import ui, hg, hook, error, encoding, templater, util, repoview from mercurial.templatefilters import websub from common import get_stat, ErrorResponse, permhooks, caching @@ -207,24 +208,46 @@ class hgweb(object): # displaying bundling progress bar while serving feel wrong and may # break some wsgi implementation. r.ui.setconfig('progress', 'disable', 'true', 'hgweb') r.baseui.setconfig('progress', 'disable', 'true', 'hgweb') - self._repo = hg.cachedlocalrepo(self._webifyrepo(r)) + self._repos = [hg.cachedlocalrepo(self._webifyrepo(r))] + self._lastrepo = self._repos[0] hook.redirect(True) self.reponame = name def _webifyrepo(self, repo): repo = getwebview(repo) self.websubtable = webutil.getwebsubs(repo) return repo - def _getrepo(self): - r, created = self._repo.fetch() - if created: - r = self._webifyrepo(r) + @contextlib.contextmanager + def _obtainrepo(self): + """Obtain a repo unique to the caller. - self.mtime = self._repo.mtime - return r + Internally we maintain a stack of cachedlocalrepo instances + to be handed out. If one is available, we pop it and return it, + ensuring it is up to date in the process. If one is not available, + we clone the most recently used repo instance and return it. + + It is currently possible for the stack to grow without bounds + if the server allows infinite threads. However, servers should + have a thread limit, thus establishing our limit. + """ + if self._repos: + cached = self._repos.pop() + r, created = cached.fetch() + if created: + r = self._webifyrepo(r) + else: + cached = self._lastrepo.copy() + r, created = cached.fetch() + + self._lastrepo = cached + self.mtime = cached.mtime + try: + yield r + finally: + self._repos.append(cached) def run(self): """Start a server from CGI environment. @@ -250,9 +273,12 @@ class hgweb(object): This is typically only called by Mercurial. External consumers should be using instances of this class as the WSGI application. """ - repo = self._getrepo() + with self._obtainrepo() as repo: + return self._runwsgi(req, repo) + + def _runwsgi(self, req, repo): rctx = requestcontext(self, repo) # This state is global across all threads. encoding.encoding = rctx.config('web', 'encoding', encoding.encoding) diff --git a/tests/test-hgweb-non-interactive.t b/tests/test-hgweb-non-interactive.t --- a/tests/test-hgweb-non-interactive.t +++ b/tests/test-hgweb-non-interactive.t @@ -63,9 +63,10 @@ by the WSGI standard and strictly implem > print errors.getvalue() > print '---- OS.ENVIRON wsgi variables' > print sorted([x for x in os.environ if x.startswith('wsgi')]) > print '---- request.ENVIRON wsgi variables' - > print sorted([x for x in i._getrepo().ui.environ if x.startswith('wsgi')]) + > with i._obtainrepo() as repo: + > print sorted([x for x in repo.ui.environ if x.startswith('wsgi')]) > EOF $ python request.py ---- STATUS 200 Script output follows