From patchwork Sun Feb 10 10:56:46 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [2,of,2,V4] hgweb: teach archive how to handle file patterns From: Angel Ezquerra X-Patchwork-Id: 917 Message-Id: To: mercurial-devel@selenic.com Date: Sun, 10 Feb 2013 11:56:46 +0100 # HG changeset patch # User Angel Ezquerra # Date 1360493525 -3600 # Node ID fb655ad16f6675265da9d472ded7140a223fb283 # Parent be3e96a41d0f4b7a1f1dd443f5261d6eeb66626a hgweb: teach archive how to handle file patterns The archive web command now takes into account the "file" request entry, if one is provided. The provided "file" is processed as a "path" pattern by default, which makes it easy to only archive a certain file or directory. However, it is possible to specify a different type of pattern, such as relglob by specifying it explicitly on the query URL. Note that only "safe" patterns are allowed. Safe patterns are 'path', 'relpath', 'glog' and 'relglob'. Other pattern types are not allowed because they could be expensive to calculate. With this change hgweb can to process requests such as: 1. http://mercurial.selenic.com/hg/archive/tip.zip/mercurial/templates This will download all files on the mercurial/templates directory as a zip file 2. http://mercurial.selenic.com/hg/archive/tip.tar.gz/relglob:*.py This will download all *.py files in the repository into a tar.gz file. An so forth. Note that this is a first step to add support for downloading directories from the web interface. Currently the only way to use this feature is by manually constructing the URL that you want to download. We will have to modify the archiveentry map entry on the different templates so that it adds the current folder path to the archive links. This revision also adds a two tests for this feature to test-archive.t. The first tests the selective archive feature and the second tests that the server rejects "unsafe" patterns. diff --git a/mercurial/hgweb/webcommands.py b/mercurial/hgweb/webcommands.py --- a/mercurial/hgweb/webcommands.py +++ b/mercurial/hgweb/webcommands.py @@ -803,6 +803,17 @@ if cnode == key or key == 'tip': arch_version = short(cnode) name = "%s-%s" % (reponame, arch_version) + + ctx = webutil.changectx(web.repo, req) + pats = [] + file = req.form.get('file', None) + defaultpat = 'path' + if file: + pats = [req.form['file'][0]] + if not scmutil.patsaresafe(pats, defaultpat): + msg = 'Archive pattern not allowed: %s' % pats[0] + raise ErrorResponse(HTTP_FORBIDDEN, msg) + mimetype, artype, extension, encoding = web.archive_specs[type_] headers = [ ('Content-Disposition', 'attachment; filename=%s%s' % (name, extension)) @@ -812,9 +823,9 @@ req.headers.extend(headers) req.respond(HTTP_OK, mimetype) - ctx = webutil.changectx(web.repo, req) + matchfn = scmutil.match(ctx, pats, default=defaultpat) archival.archive(web.repo, req, cnode, artype, prefix=name, - matchfn=scmutil.match(ctx, []), + matchfn=matchfn, subrepos=web.configbool("web", "archivesubrepos")) return [] diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py --- a/mercurial/scmutil.py +++ b/mercurial/scmutil.py @@ -682,6 +682,15 @@ return l +def patsaresafe(pats, defaultpattype): + for pat in pats: + pattype = defaultpattype + if ':' in pat: + pattype = pat.split(':')[0] + if pattype.lower() not in ('path', 'relpath', 'glog', 'relglob'): + return False + return True + def expandpats(pats): if not util.expandglobs: return list(pats) diff --git a/tests/test-archive.t b/tests/test-archive.t --- a/tests/test-archive.t +++ b/tests/test-archive.t @@ -100,6 +100,13 @@ testing: test-archive-2c0277f05ed4/baz/bletch OK testing: test-archive-2c0277f05ed4/foo OK No errors detected in compressed data of archive.zip. + $ python getarchive.py "$TIP" gz baz | gunzip | tar tf - 2>/dev/null + test-archive-2c0277f05ed4/baz/bletch + +test that we reject unsafe patterns + + $ python getarchive.py "$TIP" gz relre:baz + HTTP Error 403: Archive pattern not allowed: relre:baz $ "$TESTDIR/killdaemons.py" $DAEMON_PIDS