new file mode 100644
@@ -0,0 +1,95 @@
+# filterlang.py - a simple language to select files
+#
+# Copyright 2017 Facebook, Inc.
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import itertools
+from mercurial import (
+ error,
+ parser,
+ util,
+)
+
+elements = {
+ # copied from mercurial/revsetlang.py, simplified - only boolean operations
+ # and parentheses are interesting here.
+ # token-type: binding-strength, primary, prefix, infix, suffix
+ "(": (21, None, ("group", 1, ")"), None, None),
+ "!": (10, None, ("not", 10), None, None),
+ "&": (5, None, None, ("and", 5), None),
+ "|": (4, None, None, ("or", 4), None),
+ ")": (0, None, None, None, None),
+ "symbol": (0, "symbol", None, None, None),
+ "end": (0, None, None, None, None),
+}
+
+def _tokenize(text):
+ text = memoryview(text) # make slice zero-copy
+ special = ' ()&|!'
+ pos = 0
+ l = len(text)
+ while pos < l:
+ symbol = ''.join(itertools.takewhile(lambda ch: ch not in special,
+ text[pos:]))
+ if symbol:
+ yield ('symbol', symbol, pos)
+ pos += len(symbol)
+ else: # special char
+ if text[pos] != ' ': # ignore space silently
+ yield (text[pos], None, pos)
+ pos += 1
+ yield ('end', None, pos)
+
+def _parse(text):
+ tree, pos = parser.parser(elements).parse(_tokenize(text))
+ if pos != len(text):
+ raise error.ParseError('invalid token', pos)
+ return tree
+
+def _compile(tree):
+ op = tree[0]
+ if op == 'symbol':
+ name = tree[1]
+ op = name[0]
+ if op == '>': # size greater than test, ex. ">4M"
+ size = util.sizetoint(name[1:])
+ return lambda n, s: s > size
+ elif op == '.': # file extension test, ex. ".tar.gz"
+ return lambda n, s: n.endswith(name)
+ elif op == '/': # directory or full path test
+ p = name[1:].rstrip('/') # prefix
+ pl = len(p)
+ f = lambda n, s: n.startswith(p) and (len(n) == pl or n[pl] == '/')
+ return f
+ elif name == 'always': # always true
+ return lambda n, s: True
+ else:
+ raise error.ParseError('invalid symbol', name)
+ elif op in ['or', 'and']:
+ funcs = [_compile(t) for t in tree[1:]]
+ summary = {'or': any, 'and': all}[op]
+ return lambda n, s: summary(f(n, s) for f in funcs)
+ elif op == 'not':
+ return lambda n, s: not _compile(tree[1])(n, s)
+ elif op == 'group':
+ return _compile(tree[1])
+ else:
+ raise error.ProgrammingError('illegal tree: %r' % tree)
+
+def compile(text):
+ """generate a function (path, size) -> bool from filter specification.
+
+ "text" could contain "&", "|", "()", "!" for common logic operations,
+ ".extname" for file extension test, ">size" for size test, "/dir/subdir"
+ for directory test. The text could also be "always" or "!always" if no test
+ is wanted.
+
+ For example, "(.php & >10MB) | .zip | (/bin & !/bin/README)" will catch all
+ php files whose size is greater than 10 MB, all files whose name ends with
+ ".zip", and all files under "bin" in the repo root except for "bin/README".
+ """
+ return _compile(_parse(text))
new file mode 100644
@@ -0,0 +1,30 @@
+import os
+import sys
+
+# make it runnable directly without run-tests.py
+sys.path[0:0] = [os.path.join(os.path.dirname(__file__), '..')]
+
+from lfs import filterlang
+
+def check(text, truecases, falsecases):
+ f = filterlang.compile(text)
+ for args in truecases:
+ if not f(*args):
+ print('unexpected: %r should include %r' % (text, args))
+ for args in falsecases:
+ if f(*args):
+ print('unexpected: %r should exclude %r' % (text, args))
+
+check('always', [('a.php', 123), ('b.txt', 0)], [])
+check('!!!!((!(!!always)))', [], [('a.php', 123), ('b.txt', 0)])
+
+check('/a & (.b | .c)', [('a/b.b', 0), ('a/c.c', 0)], [('b/c.c', 0)])
+check('(/a & .b) | .c', [('a/b.b', 0), ('a/c.c', 0), ('b/c.c', 0)], [])
+
+check('!!.bin | >20B | /bin | !>10 | !always',
+ [('a.bin', 11), ('b.txt', 21), ('bin/abc', 11)],
+ [('a.notbin', 11), ('b.txt', 11), ('bin2/abc', 11)])
+
+check('(.php & >10KB) | .zip | (/bin & !/bin/README) | >1M',
+ [('a.php', 15000), ('a.zip', 0), ('bin/a', 0), ('bin/README', 1e7)],
+ [('a.php', 5000), ('b.zip2', 0), ('t/bin/a', 0), ('bin/README', 1)])