Patchwork D2792: hgweb: port archive command to modern response API

login
register
mail settings
Submitter phabricator
Date March 11, 2018, 5:24 a.m.
Message ID <differential-rev-PHID-DREV-urgo5v4ganbkmcswuu4z-req@phab.mercurial-scm.org>
Download mbox | patch
Permalink /patch/29289/
State Superseded
Headers show

Comments

phabricator - March 11, 2018, 5:24 a.m.
indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Well, I tried to go with PEP 3333's recommendations and only allow
  our WSGI application to emit data via a response generator.
  Unfortunately, the "archive" command calls into the zipfile and
  tarfile modules and these operator on file objects and must send
  their data to an object with write(). There's no easy way turn
  these write() calls into a generator.
  
  So, we teach our response type how to expose a file object like
  object that can be used to write() output. We try to keep the API
  consistent with how things work currently: callers must call a
  setbody*(), then sendresponse() to trigger sending of headers,
  and only then can they get a handle on the object to perform
  writing.
  
  This required overloading the return value of @webcommand functions
  even more. Fortunately, we're almost completely ported off the
  legacy API. So we should be able to simplify matters in the near
  future.
  
  A test relying on this functionality has also been updated to use
  the new API.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2792

AFFECTED FILES
  hgext/keyword.py
  mercurial/hgweb/hgweb_mod.py
  mercurial/hgweb/request.py
  mercurial/hgweb/webcommands.py
  tests/hgweberror.py

CHANGE DETAILS




To: indygreg, #hg-reviewers
Cc: mercurial-devel
phabricator - March 16, 2018, 4:38 p.m.
yuja added inline comments.

INLINE COMMENTS

> webcommands.py:1220
> +    web.res.setbodywillwrite()
> +    assert list(web.res.sendresponse()) == []
> +

`assert` may be deleted by `-O`, but this one is important.

Can you send a followup?

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2792

To: indygreg, #hg-reviewers, durin42
Cc: yuja, mercurial-devel

Patch

diff --git a/tests/hgweberror.py b/tests/hgweberror.py
--- a/tests/hgweberror.py
+++ b/tests/hgweberror.py
@@ -10,9 +10,12 @@ 
     '''Dummy web command that raises an uncaught Exception.'''
 
     # Simulate an error after partial response.
-    if 'partialresponse' in req.req.qsparams:
-        req.respond(200, 'text/plain')
-        req.write('partial content\n')
+    if 'partialresponse' in web.req.qsparams:
+        web.res.status = b'200 Script output follows'
+        web.res.headers[b'Content-Type'] = b'text/plain'
+        web.res.setbodywillwrite()
+        list(web.res.sendresponse())
+        web.res.getbodyfile().write(b'partial content\n')
 
     raise AttributeError('I am an uncaught error!')
 
diff --git a/mercurial/hgweb/webcommands.py b/mercurial/hgweb/webcommands.py
--- a/mercurial/hgweb/webcommands.py
+++ b/mercurial/hgweb/webcommands.py
@@ -19,14 +19,10 @@ 
     ErrorResponse,
     HTTP_FORBIDDEN,
     HTTP_NOT_FOUND,
-    HTTP_OK,
     get_contact,
     paritygen,
     staticfile,
 )
-from . import (
-    request as requestmod,
-)
 
 from .. import (
     archival,
@@ -64,7 +60,9 @@ 
     The function can return the ``requestcontext.res`` instance to signal
     that it wants to use this object to generate the response. If an iterable
     is returned, the ``wsgirequest`` instance will be used and the returned
-    content will constitute the response body.
+    content will constitute the response body. ``True`` can be returned to
+    indicate that the function already sent output and the caller doesn't
+    need to do anything more to send the response.
 
     Usage:
 
@@ -1210,21 +1208,24 @@ 
                     'file(s) not found: %s' % file)
 
     mimetype, artype, extension, encoding = web.archivespecs[type_]
-    headers = [
-        ('Content-Disposition', 'attachment; filename=%s%s' % (name, extension))
-        ]
+
+    web.res.headers['Content-Type'] = mimetype
+    web.res.headers['Content-Disposition'] = 'attachment; filename=%s%s' % (
+        name, extension)
+
     if encoding:
-        headers.append(('Content-Encoding', encoding))
-    req.headers.extend(headers)
-    req.respond(HTTP_OK, mimetype)
+        web.res.headers['Content-Encoding'] = encoding
 
-    bodyfh = requestmod.offsettrackingwriter(req.write)
+    web.res.setbodywillwrite()
+    assert list(web.res.sendresponse()) == []
+
+    bodyfh = web.res.getbodyfile()
 
     archival.archive(web.repo, bodyfh, cnode, artype, prefix=name,
                      matchfn=match,
                      subrepos=web.configbool("web", "archivesubrepos"))
-    return []
 
+    return True
 
 @webcommand('static')
 def static(web, req, tmpl):
diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -351,22 +351,42 @@ 
 
         self._bodybytes = None
         self._bodygen = None
+        self._bodywillwrite = False
         self._started = False
+        self._bodywritefn = None
+
+    def _verifybody(self):
+        if (self._bodybytes is not None or self._bodygen is not None
+            or self._bodywillwrite):
+            raise error.ProgrammingError('cannot define body multiple times')
 
     def setbodybytes(self, b):
         """Define the response body as static bytes."""
-        if self._bodybytes is not None or self._bodygen is not None:
-            raise error.ProgrammingError('cannot define body multiple times')
-
+        self._verifybody()
         self._bodybytes = b
         self.headers['Content-Length'] = '%d' % len(b)
 
     def setbodygen(self, gen):
         """Define the response body as a generator of bytes."""
-        if self._bodybytes is not None or self._bodygen is not None:
-            raise error.ProgrammingError('cannot define body multiple times')
+        self._verifybody()
+        self._bodygen = gen
+
+    def setbodywillwrite(self):
+        """Signal an intent to use write() to emit the response body.
+
+        **This is the least preferred way to send a body.**
 
-        self._bodygen = gen
+        It is preferred for WSGI applications to emit a generator of chunks
+        constituting the response body. However, some consumers can't emit
+        data this way. So, WSGI provides a way to obtain a ``write(data)``
+        function that can be used to synchronously perform an unbuffered
+        write.
+
+        Calling this function signals an intent to produce the body in this
+        manner.
+        """
+        self._verifybody()
+        self._bodywillwrite = True
 
     def sendresponse(self):
         """Send the generated response to the client.
@@ -384,7 +404,8 @@ 
         if not self.status:
             raise error.ProgrammingError('status line not defined')
 
-        if self._bodybytes is None and self._bodygen is None:
+        if (self._bodybytes is None and self._bodygen is None
+            and not self._bodywillwrite):
             raise error.ProgrammingError('response body not defined')
 
         # Various HTTP clients (notably httplib) won't read the HTTP response
@@ -434,15 +455,40 @@ 
                 if not chunk:
                     break
 
-        self._startresponse(pycompat.sysstr(self.status), self.headers.items())
+        write = self._startresponse(pycompat.sysstr(self.status),
+                                    self.headers.items())
+
         if self._bodybytes:
             yield self._bodybytes
         elif self._bodygen:
             for chunk in self._bodygen:
                 yield chunk
+        elif self._bodywillwrite:
+            self._bodywritefn = write
         else:
             error.ProgrammingError('do not know how to send body')
 
+    def getbodyfile(self):
+        """Obtain a file object like object representing the response body.
+
+        For this to work, you must call ``setbodywillwrite()`` and then
+        ``sendresponse()`` first. ``sendresponse()`` is a generator and the
+        function won't run to completion unless the generator is advanced. The
+        generator yields not items. The easiest way to consume it is with
+        ``list(res.sendresponse())``, which should resolve to an empty list -
+        ``[]``.
+        """
+        if not self._bodywillwrite:
+            raise error.ProgrammingError('must call setbodywillwrite() first')
+
+        if not self._started:
+            raise error.ProgrammingError('must call sendresponse() first; did '
+                                         'you remember to consume it since it '
+                                         'is a generator?')
+
+        assert self._bodywritefn
+        return offsettrackingwriter(self._bodywritefn)
+
 class wsgirequest(object):
     """Higher-level API for a WSGI request.
 
diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py
--- a/mercurial/hgweb/hgweb_mod.py
+++ b/mercurial/hgweb/hgweb_mod.py
@@ -408,10 +408,11 @@ 
 
                 if content is res:
                     return res.sendresponse()
-
-                wsgireq.respond(HTTP_OK, ctype)
-
-            return content
+                elif content is True:
+                    return []
+                else:
+                    wsgireq.respond(HTTP_OK, ctype)
+                    return content
 
         except (error.LookupError, error.RepoLookupError) as err:
             wsgireq.respond(HTTP_NOT_FOUND, ctype)
diff --git a/hgext/keyword.py b/hgext/keyword.py
--- a/hgext/keyword.py
+++ b/hgext/keyword.py
@@ -624,6 +624,9 @@ 
         res = orig(web, req, tmpl)
         if res is web.res:
             res = res.sendresponse()
+        elif res is True:
+            return
+
         for chunk in res:
             yield chunk
     finally: