Patchwork D2772: hgweb: parse and store POST form data

login
register
mail settings
Submitter phabricator
Date March 10, 2018, 1:23 a.m.
Message ID <differential-rev-PHID-DREV-ttw3dmkribcvkpdwz47r-req@phab.mercurial-scm.org>
Download mbox | patch
Permalink /patch/29227/
State Superseded
Headers show

Comments

phabricator - March 10, 2018, 1:23 a.m.
indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  This is essentially a port of what wsgirequest.__init__ was doing
  inline via cgi.parse().
  
  Our version is better because - unlike cgi.parse() - we keep the
  query string and POST data separate. We do still expose a unified
  dict for convenience, however. But consumers can now be explicit
  about whether a parameter should be fetched from the query string
  or form data.
  
  Because we can only read from POST data once, we had to support
  passing in previously resolved values to support hgwebdir's ugly
  hack of having to construct a new parsedrequest type.
  
  The only consumer we update to use the new API is wsgirequest.
  
  Also, the normalization of values is explicitly not performed in
  the new code. This is because the intent of our request object is
  to represent the raw request with minimal transformation.
  
  With this commit, all meaningful request components are now
  reflected on our parsedrequest type!

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2772

AFFECTED FILES
  mercurial/hgweb/hgwebdir_mod.py
  mercurial/hgweb/request.py

CHANGE DETAILS




To: indygreg, #hg-reviewers
Cc: mercurial-devel
phabricator - March 10, 2018, 3:56 a.m.
indygreg abandoned this revision.
indygreg added a subscriber: durin42.
indygreg added a comment.


  @durin42 thinks we don't need this feature. So I'll submit a patch to remove it instead.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2772

To: indygreg, #hg-reviewers
Cc: durin42, mercurial-devel

Patch

diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -96,8 +96,15 @@ 
     headers = attr.ib()
     # Request body input stream.
     bodyfh = attr.ib()
+    # Like ``querystringlist`` and ``querystringdict`` but for form data
+    # submitted on POST requests decoded from well-known content types.
+    postformlist = attr.ib()
+    postformdict = attr.ib()
 
-def parserequestfromenv(env, bodyfh):
+    # All "form" parameters. A combination of query string and POST form data.
+    params = attr.ib()
+
+def parserequestfromenv(env, bodyfh, previousformdata=None):
     """Parse URL components from environment variables.
 
     WSGI defines request attributes via environment variables. This function
@@ -220,6 +227,49 @@ 
     #if 'Content-Length' in headers:
     #    bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
 
+    # Form data is kinda wonky. It can come from request bodies, which we can
+    # only read once. Since hgwebdir may construct a new parsedrequest, we
+    # allow existing form data to be passed in to this function. That's kinda
+    # hacky.
+    if previousformdata is None:
+        if env['REQUEST_METHOD'] == 'POST':
+            # This is based on cgi.parse(), but without the hacky parts (like
+            # merging QUERY_STRING and setting QUERY_STRING as a side-effect).
+            ct, params = cgi.parse_header(env.get('CONTENT_TYPE', ''))
+            if ct == 'multipart/form-data':
+                # We don't have a way to preserve order. So we normalize to a
+                # list for consistency with x-www-form-urlencoded.
+                postformlist = []
+                for k, l in cgi.parse_multipart(bodyfh, params).iteritems():
+                    for v in l:
+                        postformlist.append((k, v))
+            elif ct == 'application/x-www-form-urlencoded':
+                cl = int(headers['Content-Length'])
+                postformlist = util.urlreq.parseqsl(bodyfh.read(cl),
+                                                    keep_blank_values=True)
+            else:
+                postformlist = []
+
+            postformdict = {}
+            for k, v in postformdict:
+                if k in postformdict:
+                    postformdict[k].append(v)
+                else:
+                    postformdict[k] = [v]
+        else:
+            postformlist = []
+            postformdict = {}
+
+        # Now that we have the raw post data. Merge in query string data to
+        # provide a unified interface.
+        formdict = {k: list(v) for k, v in postformdict.iteritems()}
+        for k, l in querystringdict.iteritems():
+            if k not in formdict:
+                formdict[k] = []
+            formdict[k].extend(l)
+    else:
+        postformlist, postformdict, formdict = previousformdata
+
     return parsedrequest(method=env['REQUEST_METHOD'],
                          url=fullurl, baseurl=baseurl,
                          advertisedurl=advertisedfullurl,
@@ -231,7 +281,9 @@ 
                          querystringlist=querystringlist,
                          querystringdict=querystringdict,
                          headers=headers,
-                         bodyfh=bodyfh)
+                         bodyfh=bodyfh,
+                         postformlist=postformlist, postformdict=postformdict,
+                         params=formdict)
 
 class wsgirequest(object):
     """Higher-level API for a WSGI request.
@@ -258,15 +310,12 @@ 
         self.multiprocess = wsgienv[r'wsgi.multiprocess']
         self.run_once = wsgienv[r'wsgi.run_once']
         self.env = wsgienv
-        self.form = normalize(cgi.parse(inp,
-                                        self.env,
-                                        keep_blank_values=1))
+        self.req = parserequestfromenv(wsgienv, inp)
+        self.form = normalize(self.req.params)
         self._start_response = start_response
         self.server_write = None
         self.headers = []
 
-        self.req = parserequestfromenv(wsgienv, inp)
-
     def respond(self, status, type, filename=None, body=None):
         if not isinstance(type, str):
             type = pycompat.sysstr(type)
diff --git a/mercurial/hgweb/hgwebdir_mod.py b/mercurial/hgweb/hgwebdir_mod.py
--- a/mercurial/hgweb/hgwebdir_mod.py
+++ b/mercurial/hgweb/hgwebdir_mod.py
@@ -293,8 +293,14 @@ 
                     # variable.
                     # TODO this is kind of hacky and we should have a better
                     # way of doing this than with REPO_NAME side-effects.
+                    previousdata = (
+                        wsgireq.req.postformlist,
+                        wsgireq.req.postformdict,
+                        wsgireq.req.params,
+                    )
                     wsgireq.req = requestmod.parserequestfromenv(
-                        wsgireq.env, wsgireq.req.bodyfh)
+                        wsgireq.env, wsgireq.req.bodyfh,
+                        previousformdata=previousdata)
                     try:
                         # ensure caller gets private copy of ui
                         repo = hg.repository(self.ui.copy(), real)