From patchwork Thu Jun 25 15:12:57 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [v2] hgweb: encode WSGI environment like OS environment From: Manuel Jacob X-Patchwork-Id: 46570 Message-Id: <5adbe0742aac22c15d04.1593097977@tmp> To: mercurial-devel@mercurial-scm.org Date: Thu, 25 Jun 2020 17:12:57 +0200 # HG changeset patch # User Manuel Jacob # Date 1593049567 -7200 # Thu Jun 25 03:46:07 2020 +0200 # Branch stable # Node ID 5adbe0742aac22c15d04d143a08b854c77e4ea7c # Parent 9a3cc406efed4d1b11ccf91ede87fe8615672902 # EXP-Topic cgi_env_encoding hgweb: encode WSGI environment like OS environment Previously, the WSGI environment keys and values were encoded using latin-1. This resulted in a crash if a WSGI environment key or value could not be encoded using latin-1. On Unix, the OS environment is byte-based. Therefore we should do the reverse of what Python does for os.environ. On Windows, there’s no native byte-based OS environment. Therefore we should do the same as what mercurial.encoding does with the OS environment. diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py --- a/mercurial/hgweb/request.py +++ b/mercurial/hgweb/request.py @@ -8,10 +8,13 @@ from __future__ import absolute_import +import os + # import wsgiref.validate from ..thirdparty import attr from .. import ( + encoding, error, pycompat, util, @@ -162,10 +165,17 @@ # strings on Python 3 must be between \00000-\000FF. We deal with bytes # in Mercurial, so mass convert string keys and values to bytes. if pycompat.ispy3: + def tobytes(s): if not isinstance(s, str): return s - return s.encode('latin-1') + if pycompat.iswindows: + # This is what mercurial.encoding does for os.environ on Windows. + return encoding.strtolocal(s) + else: + # This is what is documented to be used for os.environ on Unix. + return os.fsencode(s) + env = {tobytes(k): tobytes(v) for k, v in pycompat.iteritems(env)} # Some hosting solutions are emulating hgwebdir, and dispatching directly diff --git a/tests/test-wsgirequest.py b/tests/test-wsgirequest.py --- a/tests/test-wsgirequest.py +++ b/tests/test-wsgirequest.py @@ -3,7 +3,7 @@ import unittest from mercurial.hgweb import request as requestmod -from mercurial import error +from mercurial import error, pycompat DEFAULT_ENV = { 'REQUEST_METHOD': 'GET', @@ -432,6 +432,18 @@ self.assertEqual(r.dispatchpath, b'path1/path2') self.assertEqual(r.reponame, b'repo') + def testenvencoding(self): + if pycompat.iswindows: + # On Windows, we can't generally know which non-ASCII characters + # are supported. + r = parse(DEFAULT_ENV, extra={'foo': 'bar'}) + self.assertEqual(r.rawenv[b'foo'], b'bar') + else: + # Unix is byte-based. Therefore we test all possible bytes. + b = b''.join(pycompat.bytechr(i) for i in range(256)) + r = parse(DEFAULT_ENV, extra={'foo': pycompat.fsdecode(b)}) + self.assertEqual(r.rawenv[b'foo'], b) + if __name__ == '__main__': import silenttestrunner