Patchwork [V3] hgweb: restrict usage of regular expressions in search

login
register
mail settings
Submitter Alexander Plavin
Date Aug. 16, 2013, 9 p.m.
Message ID <0cf9f8749e3d031259a6.1376686806@debian-alexander.dolgopa>
Download mbox | patch
Permalink /patch/2196/
State Deferred
Headers show

Comments

Alexander Plavin - Aug. 16, 2013, 9 p.m.
# HG changeset patch
# User Alexander Plavin <alexander@plav.in>
# Date 1376650882 -14400
#      Fri Aug 16 15:01:22 2013 +0400
# Node ID 0cf9f8749e3d031259a6c3ff131b4945d1dc3eeb
# Parent  d7684354b9a2755149fc8b9740d2770634d3185e
hgweb: restrict usage of regular expressions in search

If the search query has strings defining revset regular expressions
(those starting with 're:'), revset syntax is disabled. It eliminates the
possibility of ReDoS.
Alexander Plavin - Aug. 16, 2013, 9:02 p.m.
2013/8/17 Alexander Plavin <alexander@plav.in>:
> # HG changeset patch
> # User Alexander Plavin <alexander@plav.in>
> # Date 1376650882 -14400
> #      Fri Aug 16 15:01:22 2013 +0400
> # Node ID 0cf9f8749e3d031259a6c3ff131b4945d1dc3eeb
> # Parent  d7684354b9a2755149fc8b9740d2770634d3185e
> hgweb: restrict usage of regular expressions in search
>
> If the search query has strings defining revset regular expressions
> (those starting with 're:'), revset syntax is disabled. It eliminates the
> possibility of ReDoS.
>
> diff -r d7684354b9a2 -r 0cf9f8749e3d mercurial/hgweb/webcommands.py
> --- a/mercurial/hgweb/webcommands.py    Wed Aug 07 01:16:14 2013 +0400
> +++ b/mercurial/hgweb/webcommands.py    Fri Aug 16 15:01:22 2013 +0400
> @@ -9,7 +9,7 @@
>  import webutil
>  from mercurial import error, encoding, archival, templater, templatefilters
>  from mercurial.node import short, hex, nullid
> -from mercurial.util import binary
> +from mercurial.util import binary, any
>  from common import paritygen, staticfile, get_contact, ErrorResponse
>  from common import HTTP_OK, HTTP_FORBIDDEN, HTTP_NOT_FOUND
>  from mercurial import graphmod, patch
> @@ -175,6 +175,10 @@
>              # no revset syntax used
>              return 'kw'
>
> +        if any((token, (value or '')[:3]) == ('string', 're:')

Now 'any' is used legitimately here, as it's imported at the top.
However, don't know what to do with the check code test failing here
due to use of 'any'.

> +               for token, value, pos in revset.tokenize(revdef)):
> +            return 'kw'
> +
>          mfunc = revset.match(None, revdef)
>          try:
>              # try running against empty subset
> diff -r d7684354b9a2 -r 0cf9f8749e3d tests/test-hgweb-commands.t
> --- a/tests/test-hgweb-commands.t       Wed Aug 07 01:16:14 2013 +0400
> +++ b/tests/test-hgweb-commands.t       Fri Aug 16 15:01:22 2013 +0400
> @@ -632,6 +632,56 @@
>
>
>
> +  $ "$TESTDIR/get-with-headers.py" 127.0.0.1:$HGPORT 'log?rev=user("test")&style=raw'
> +  200 Script output follows
> +
> +
> +  # HG changesets search
> +  # Node ID cad8025a2e87f88c06259790adfa15acb4080123
> +  # Query "user("test")"
> +
> +  changeset:   cad8025a2e87f88c06259790adfa15acb4080123
> +  revision:    3
> +  user:        test
> +  date:        Thu, 01 Jan 1970 00:00:00 +0000
> +  summary:     branch commit with null character: \x00 (esc)
> +  branch:      unstable
> +  tag:         tip
> +  bookmark:    something
> +
> +  changeset:   1d22e65f027e5a0609357e7d8e7508cd2ba5d2fe
> +  revision:    2
> +  user:        test
> +  date:        Thu, 01 Jan 1970 00:00:00 +0000
> +  summary:     branch
> +  branch:      stable
> +
> +  changeset:   a4f92ed23982be056b9852de5dfe873eaac7f0de
> +  revision:    1
> +  user:        test
> +  date:        Thu, 01 Jan 1970 00:00:00 +0000
> +  summary:     Added tag 1.0 for changeset 2ef0ac749a14
> +  branch:      default
> +
> +  changeset:   2ef0ac749a14e4f57a5a822464a0902c6f7f448f
> +  revision:    0
> +  user:        test
> +  date:        Thu, 01 Jan 1970 00:00:00 +0000
> +  summary:     base
> +  tag:         1.0
> +  bookmark:    anotherthing
> +
> +
> +  $ "$TESTDIR/get-with-headers.py" 127.0.0.1:$HGPORT 'log?rev=user("re:test")&style=raw'
> +  200 Script output follows
> +
> +
> +  # HG changesets search
> +  # Node ID cad8025a2e87f88c06259790adfa15acb4080123
> +  # Query "user("re:test")"
> +
> +
> +
>  File-related
>
>    $ "$TESTDIR/get-with-headers.py" 127.0.0.1:$HGPORT 'file/1/foo/?style=raw'
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel

Patch

diff -r d7684354b9a2 -r 0cf9f8749e3d mercurial/hgweb/webcommands.py
--- a/mercurial/hgweb/webcommands.py	Wed Aug 07 01:16:14 2013 +0400
+++ b/mercurial/hgweb/webcommands.py	Fri Aug 16 15:01:22 2013 +0400
@@ -9,7 +9,7 @@ 
 import webutil
 from mercurial import error, encoding, archival, templater, templatefilters
 from mercurial.node import short, hex, nullid
-from mercurial.util import binary
+from mercurial.util import binary, any
 from common import paritygen, staticfile, get_contact, ErrorResponse
 from common import HTTP_OK, HTTP_FORBIDDEN, HTTP_NOT_FOUND
 from mercurial import graphmod, patch
@@ -175,6 +175,10 @@ 
             # no revset syntax used
             return 'kw'
 
+        if any((token, (value or '')[:3]) == ('string', 're:')
+               for token, value, pos in revset.tokenize(revdef)):
+            return 'kw'
+
         mfunc = revset.match(None, revdef)
         try:
             # try running against empty subset
diff -r d7684354b9a2 -r 0cf9f8749e3d tests/test-hgweb-commands.t
--- a/tests/test-hgweb-commands.t	Wed Aug 07 01:16:14 2013 +0400
+++ b/tests/test-hgweb-commands.t	Fri Aug 16 15:01:22 2013 +0400
@@ -632,6 +632,56 @@ 
   
   
 
+  $ "$TESTDIR/get-with-headers.py" 127.0.0.1:$HGPORT 'log?rev=user("test")&style=raw'
+  200 Script output follows
+  
+  
+  # HG changesets search
+  # Node ID cad8025a2e87f88c06259790adfa15acb4080123
+  # Query "user("test")"
+  
+  changeset:   cad8025a2e87f88c06259790adfa15acb4080123
+  revision:    3
+  user:        test
+  date:        Thu, 01 Jan 1970 00:00:00 +0000
+  summary:     branch commit with null character: \x00 (esc)
+  branch:      unstable
+  tag:         tip
+  bookmark:    something
+  
+  changeset:   1d22e65f027e5a0609357e7d8e7508cd2ba5d2fe
+  revision:    2
+  user:        test
+  date:        Thu, 01 Jan 1970 00:00:00 +0000
+  summary:     branch
+  branch:      stable
+  
+  changeset:   a4f92ed23982be056b9852de5dfe873eaac7f0de
+  revision:    1
+  user:        test
+  date:        Thu, 01 Jan 1970 00:00:00 +0000
+  summary:     Added tag 1.0 for changeset 2ef0ac749a14
+  branch:      default
+  
+  changeset:   2ef0ac749a14e4f57a5a822464a0902c6f7f448f
+  revision:    0
+  user:        test
+  date:        Thu, 01 Jan 1970 00:00:00 +0000
+  summary:     base
+  tag:         1.0
+  bookmark:    anotherthing
+  
+  
+  $ "$TESTDIR/get-with-headers.py" 127.0.0.1:$HGPORT 'log?rev=user("re:test")&style=raw'
+  200 Script output follows
+  
+  
+  # HG changesets search
+  # Node ID cad8025a2e87f88c06259790adfa15acb4080123
+  # Query "user("re:test")"
+  
+  
+
 File-related
 
   $ "$TESTDIR/get-with-headers.py" 127.0.0.1:$HGPORT 'file/1/foo/?style=raw'