Patchwork [1,of,3] contrib: add a codemod script to write coreconfigitem

login
register
mail settings
Submitter Jun Wu
Date July 13, 2017, 1:22 a.m.
Message ID <695702ea1caedaeeed9a.1499908966@x1c>
Download mbox | patch
Permalink /patch/22285/
State Accepted
Headers show

Comments

Jun Wu - July 13, 2017, 1:22 a.m.
# HG changeset patch
# User Jun Wu <quark@fb.com>
# Date 1499891115 25200
#      Wed Jul 12 13:25:15 2017 -0700
# Node ID 695702ea1caedaeeed9a6d63473c0e338adf35f4
# Parent  26e4ba058215e536d3827befbea99ff6203d35f8
# Available At https://bitbucket.org/quark-zju/hg-draft
#              hg pull https://bitbucket.org/quark-zju/hg-draft -r 695702ea1cae
contrib: add a codemod script to write coreconfigitem

The coreconfigitem migration seems possible to be automatized. I have tried
RedBaron [1] which seems easy to use and suitable for this usecase. The
script is kept in contrib in case we want to re-run it in the future to
cover new config options. [web] section is ignored now since its usage of
config is a bit weird and we will have issues like check-config complaining
undocumented web.* after codemod.

Note that the script only works for core hg code and does not work for
extension code now (it writes to mercurial/configitems.py). This gives other
people a chance to learn this area and improve the codemod script.

[1]: http://redbaron.pycqa.org/
Augie Fackler - July 14, 2017, 6:36 p.m.
On Wed, Jul 12, 2017 at 06:22:46PM -0700, Jun Wu wrote:
> # HG changeset patch
> # User Jun Wu <quark@fb.com>
> # Date 1499891115 25200
> #      Wed Jul 12 13:25:15 2017 -0700
> # Node ID 695702ea1caedaeeed9a6d63473c0e338adf35f4
> # Parent  26e4ba058215e536d3827befbea99ff6203d35f8
> # Available At https://bitbucket.org/quark-zju/hg-draft
> #              hg pull https://bitbucket.org/quark-zju/hg-draft -r 695702ea1cae
> contrib: add a codemod script to write coreconfigitem
>
> The coreconfigitem migration seems possible to be automatized. I have tried
> RedBaron [1] which seems easy to use and suitable for this usecase. The
> script is kept in contrib in case we want to re-run it in the future to
> cover new config options. [web] section is ignored now since its usage of
> config is a bit weird and we will have issues like check-config complaining
> undocumented web.* after codemod.
>
> Note that the script only works for core hg code and does not work for
> extension code now (it writes to mercurial/configitems.py). This gives other
> people a chance to learn this area and improve the codemod script.
>
> [1]: http://redbaron.pycqa.org/
>
> diff --git a/contrib/codemod/codemod_configitems.py b/contrib/codemod/codemod_configitems.py
> new file mode 100755
> --- /dev/null
> +++ b/contrib/codemod/codemod_configitems.py

This is going to seem highly nitpicky, but can we just call this
"contrib/codemod/configitems.py" instead of stuttering the codemod
part?

> @@ -0,0 +1,182 @@
> +#!/usr/bin/env python
> +# codemod_configitems.py - codemod tool to fill configitems
> +#
> +# Copyright 2017 Facebook, Inc.
> +#
> +# This software may be used and distributed according to the terms of the
> +# GNU General Public License version 2 or any later version.
> +from __future__ import absolute_import, print_function
> +
> +import os
> +import sys
> +
> +import redbaron
> +
> +def readpath(path):
> +    with open(path) as f:
> +        return f.read()
> +
> +def writepath(path, content):
> +    with open(path, 'w') as f:
> +        f.write(content)
> +
> +_configmethods = {'config', 'configbool', 'configint', 'configbytes',
> +                  'configlist', 'configdate'}
> +
> +def extractstring(rnode):
> +    """get the string from a RedBaron string or call_argument node"""
> +    while rnode.type != 'string':
> +        rnode = rnode.value
> +    return rnode.value[1:-1]  # unquote, "'str'" -> "str"
> +
> +def uiconfigitems(red):
> +    """match *.ui.config* pattern, yield (node, method, args, section, name)"""
> +    for node in red.find_all('atomtrailers'):
> +        entry = None
> +        try:
> +            obj = node[-3].value
> +            method = node[-2].value
> +            args = node[-1]
> +            section = args[0].value
> +            name = args[1].value
> +            if (obj in ('ui', 'self') and method in _configmethods
> +                and section.type == 'string' and name.type == 'string'):
> +                entry = (node, method, args, extractstring(section),
> +                         extractstring(name))
> +        except Exception:
> +            pass
> +        else:
> +            if entry:
> +                yield entry
> +
> +def coreconfigitems(red):
> +    """match coreconfigitem(...) pattern, yield (node, args, section, name)"""
> +    for node in red.find_all('atomtrailers'):
> +        entry = None
> +        try:
> +            args = node[1]
> +            section = args[0].value
> +            name = args[1].value
> +            if (node[0].value == 'coreconfigitem' and section.type == 'string'
> +                and name.type == 'string'):
> +                entry = (node, args, extractstring(section),
> +                         extractstring(name))
> +        except Exception:
> +            pass
> +        else:
> +            if entry:
> +                yield entry
> +
> +def registercoreconfig(cfgred, section, name, defaultrepr):
> +    """insert coreconfigitem to cfgred AST
> +
> +    section and name are plain string, defaultrepr is a string
> +    """
> +    # find a place to insert the "coreconfigitem" item
> +    entries = list(coreconfigitems(cfgred))
> +    for node, args, nodesection, nodename in reversed(entries):
> +        if (nodesection, nodename) < (section, name):
> +            # insert after this entry
> +            node.insert_after(
> +                'coreconfigitem(%r, %r,\n'
> +                '    default=%s,\n'
> +                ')' % (section, name, defaultrepr))
> +            return
> +
> +def main(argv):
> +    if not argv:
> +        print('Usage: codemod_configitems.py FILES\n'
> +              'For example, FILES could be "{hgext,mercurial}/*/**.py"')
> +    dirname = os.path.dirname
> +    reporoot = dirname(dirname(dirname(os.path.abspath(__file__))))
> +
> +    # register configitems to this destination
> +    cfgpath = os.path.join(reporoot, 'mercurial', 'configitems.py')
> +    cfgred = redbaron.RedBaron(readpath(cfgpath))
> +
> +    # state about what to do
> +    registered = set((s, n) for n, a, s, n in coreconfigitems(cfgred))
> +    toregister = {} # {(section, name): defaultrepr}
> +    coreconfigs = set() # {(section, name)}, whether it's used in core
> +
> +    # first loop: scan all files before taking any action
> +    for i, path in enumerate(argv):
> +        print('(%d/%d) scanning %s' % (i + 1, len(argv), path))
> +        iscore = ('mercurial' in path) and ('hgext' not in path)
> +        red = redbaron.RedBaron(readpath(path))
> +        # find all repo.ui.config* and ui.config* calls, and collect their
> +        # section, name and default value information.
> +        for node, method, args, section, name in uiconfigitems(red):
> +            if section == 'web':
> +                # [web] section has some weirdness, ignore them for now
> +                continue
> +            defaultrepr = None
> +            key = (section, name)
> +            if len(args) == 2:
> +                if key in registered:
> +                    continue
> +                if method == 'configlist':
> +                    defaultrepr = 'list'
> +                elif method == 'configbool':
> +                    defaultrepr = 'False'
> +                else:
> +                    defaultrepr = 'None'
> +            elif len(args) >= 3 and (args[2].target is None or
> +                                     args[2].target.value == 'default'):
> +                # try to understand the "default" value
> +                dnode = args[2].value
> +                if dnode.type == 'name':
> +                    if dnode.value in {'None', 'True', 'False'}:
> +                        defaultrepr = dnode.value
> +                elif dnode.type == 'string':
> +                    defaultrepr = repr(dnode.value[1:-1])
> +                elif dnode.type in ('int', 'float'):
> +                    defaultrepr = dnode.value
> +            # inconsistent default
> +            if key in toregister and toregister[key] != defaultrepr:
> +                defaultrepr = None
> +            # interesting to rewrite
> +            if key not in registered:
> +                if defaultrepr is None:
> +                    print('[note] %s: %s.%s: unsupported default'
> +                          % (path, section, name))
> +                    registered.add(key) # skip checking it again
> +                else:
> +                    toregister[key] = defaultrepr
> +                    if iscore:
> +                        coreconfigs.add(key)
> +
> +    # second loop: rewrite files given "toregister" result
> +    for path in argv:
> +        # reconstruct redbaron - trade CPU for memory
> +        red = redbaron.RedBaron(readpath(path))
> +        changed = False
> +        for node, method, args, section, name in uiconfigitems(red):
> +            key = (section, name)
> +            defaultrepr = toregister.get(key)
> +            if defaultrepr is None or key not in coreconfigs:
> +                continue
> +            if len(args) >= 3 and (args[2].target is None or
> +                                   args[2].target.value == 'default'):
> +                try:
> +                    del args[2]
> +                    changed = True
> +                except Exception:
> +                    # redbaron fails to do the rewrite due to indentation
> +                    # see https://github.com/PyCQA/redbaron/issues/100
> +                    print('[warn] %s: %s.%s: default needs manual removal'
> +                          % (path, section, name))
> +            if key not in registered:
> +                print('registering %s.%s' % (section, name))
> +                registercoreconfig(cfgred, section, name, defaultrepr)
> +                registered.add(key)
> +        if changed:
> +            print('updating %s' % path)
> +            writepath(path, red.dumps())
> +
> +    if toregister:
> +        print('updating configitems.py')
> +        writepath(cfgpath, cfgred.dumps())
> +
> +if __name__ == "__main__":
> +    sys.exit(main(sys.argv[1:]))
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Patch

diff --git a/contrib/codemod/codemod_configitems.py b/contrib/codemod/codemod_configitems.py
new file mode 100755
--- /dev/null
+++ b/contrib/codemod/codemod_configitems.py
@@ -0,0 +1,182 @@ 
+#!/usr/bin/env python
+# codemod_configitems.py - codemod tool to fill configitems
+#
+# Copyright 2017 Facebook, Inc.
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+from __future__ import absolute_import, print_function
+
+import os
+import sys
+
+import redbaron
+
+def readpath(path):
+    with open(path) as f:
+        return f.read()
+
+def writepath(path, content):
+    with open(path, 'w') as f:
+        f.write(content)
+
+_configmethods = {'config', 'configbool', 'configint', 'configbytes',
+                  'configlist', 'configdate'}
+
+def extractstring(rnode):
+    """get the string from a RedBaron string or call_argument node"""
+    while rnode.type != 'string':
+        rnode = rnode.value
+    return rnode.value[1:-1]  # unquote, "'str'" -> "str"
+
+def uiconfigitems(red):
+    """match *.ui.config* pattern, yield (node, method, args, section, name)"""
+    for node in red.find_all('atomtrailers'):
+        entry = None
+        try:
+            obj = node[-3].value
+            method = node[-2].value
+            args = node[-1]
+            section = args[0].value
+            name = args[1].value
+            if (obj in ('ui', 'self') and method in _configmethods
+                and section.type == 'string' and name.type == 'string'):
+                entry = (node, method, args, extractstring(section),
+                         extractstring(name))
+        except Exception:
+            pass
+        else:
+            if entry:
+                yield entry
+
+def coreconfigitems(red):
+    """match coreconfigitem(...) pattern, yield (node, args, section, name)"""
+    for node in red.find_all('atomtrailers'):
+        entry = None
+        try:
+            args = node[1]
+            section = args[0].value
+            name = args[1].value
+            if (node[0].value == 'coreconfigitem' and section.type == 'string'
+                and name.type == 'string'):
+                entry = (node, args, extractstring(section),
+                         extractstring(name))
+        except Exception:
+            pass
+        else:
+            if entry:
+                yield entry
+
+def registercoreconfig(cfgred, section, name, defaultrepr):
+    """insert coreconfigitem to cfgred AST
+
+    section and name are plain string, defaultrepr is a string
+    """
+    # find a place to insert the "coreconfigitem" item
+    entries = list(coreconfigitems(cfgred))
+    for node, args, nodesection, nodename in reversed(entries):
+        if (nodesection, nodename) < (section, name):
+            # insert after this entry
+            node.insert_after(
+                'coreconfigitem(%r, %r,\n'
+                '    default=%s,\n'
+                ')' % (section, name, defaultrepr))
+            return
+
+def main(argv):
+    if not argv:
+        print('Usage: codemod_configitems.py FILES\n'
+              'For example, FILES could be "{hgext,mercurial}/*/**.py"')
+    dirname = os.path.dirname
+    reporoot = dirname(dirname(dirname(os.path.abspath(__file__))))
+
+    # register configitems to this destination
+    cfgpath = os.path.join(reporoot, 'mercurial', 'configitems.py')
+    cfgred = redbaron.RedBaron(readpath(cfgpath))
+
+    # state about what to do
+    registered = set((s, n) for n, a, s, n in coreconfigitems(cfgred))
+    toregister = {} # {(section, name): defaultrepr}
+    coreconfigs = set() # {(section, name)}, whether it's used in core
+
+    # first loop: scan all files before taking any action
+    for i, path in enumerate(argv):
+        print('(%d/%d) scanning %s' % (i + 1, len(argv), path))
+        iscore = ('mercurial' in path) and ('hgext' not in path)
+        red = redbaron.RedBaron(readpath(path))
+        # find all repo.ui.config* and ui.config* calls, and collect their
+        # section, name and default value information.
+        for node, method, args, section, name in uiconfigitems(red):
+            if section == 'web':
+                # [web] section has some weirdness, ignore them for now
+                continue
+            defaultrepr = None
+            key = (section, name)
+            if len(args) == 2:
+                if key in registered:
+                    continue
+                if method == 'configlist':
+                    defaultrepr = 'list'
+                elif method == 'configbool':
+                    defaultrepr = 'False'
+                else:
+                    defaultrepr = 'None'
+            elif len(args) >= 3 and (args[2].target is None or
+                                     args[2].target.value == 'default'):
+                # try to understand the "default" value
+                dnode = args[2].value
+                if dnode.type == 'name':
+                    if dnode.value in {'None', 'True', 'False'}:
+                        defaultrepr = dnode.value
+                elif dnode.type == 'string':
+                    defaultrepr = repr(dnode.value[1:-1])
+                elif dnode.type in ('int', 'float'):
+                    defaultrepr = dnode.value
+            # inconsistent default
+            if key in toregister and toregister[key] != defaultrepr:
+                defaultrepr = None
+            # interesting to rewrite
+            if key not in registered:
+                if defaultrepr is None:
+                    print('[note] %s: %s.%s: unsupported default'
+                          % (path, section, name))
+                    registered.add(key) # skip checking it again
+                else:
+                    toregister[key] = defaultrepr
+                    if iscore:
+                        coreconfigs.add(key)
+
+    # second loop: rewrite files given "toregister" result
+    for path in argv:
+        # reconstruct redbaron - trade CPU for memory
+        red = redbaron.RedBaron(readpath(path))
+        changed = False
+        for node, method, args, section, name in uiconfigitems(red):
+            key = (section, name)
+            defaultrepr = toregister.get(key)
+            if defaultrepr is None or key not in coreconfigs:
+                continue
+            if len(args) >= 3 and (args[2].target is None or
+                                   args[2].target.value == 'default'):
+                try:
+                    del args[2]
+                    changed = True
+                except Exception:
+                    # redbaron fails to do the rewrite due to indentation
+                    # see https://github.com/PyCQA/redbaron/issues/100
+                    print('[warn] %s: %s.%s: default needs manual removal'
+                          % (path, section, name))
+            if key not in registered:
+                print('registering %s.%s' % (section, name))
+                registercoreconfig(cfgred, section, name, defaultrepr)
+                registered.add(key)
+        if changed:
+            print('updating %s' % path)
+            writepath(path, red.dumps())
+
+    if toregister:
+        print('updating configitems.py')
+        writepath(cfgpath, cfgred.dumps())
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))