Patchwork D5045: hgdemandimport: remove unused functionality from vendored modules

login
register
mail settings
Submitter phabricator
Date Oct. 13, 2018, 7:28 a.m.
Message ID <differential-rev-PHID-DREV-nus73o5beiffednyzioa-req@phab.mercurial-scm.org>
Download mbox | patch
Permalink /patch/35829/
State New
Headers show

Comments

phabricator - Oct. 13, 2018, 7:28 a.m.
indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  We'll never use this. It is just bloating the files.
  
  The PSF license section 3 stipulates that derived works...
  "include in any such work a brief summary of the changes made to Python."
  While I think commit messages and version control count, I'm going one
  step further and documenting things inline.
  
  This change makes test-check-module-imports.t happy.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5045

AFFECTED FILES
  hgdemandimport/py3token.py
  hgdemandimport/py3tokenize.py

CHANGE DETAILS




To: indygreg, #hg-reviewers
Cc: mercurial-devel

Patch

diff --git a/hgdemandimport/py3tokenize.py b/hgdemandimport/py3tokenize.py
--- a/hgdemandimport/py3tokenize.py
+++ b/hgdemandimport/py3tokenize.py
@@ -1,4 +1,4 @@ 
-# Vendored copy of Lib/tokenize.py from cpython.
+# Vendored copy of Lib/tokenize.py from cpython with modifications.
 
 # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
 # 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 Python Software Foundation; All
@@ -53,40 +53,22 @@ 
 # agrees to be bound by the terms and conditions of this License
 # Agreement.
 
-"""Tokenization help for Python programs.
-
-tokenize(readline) is a generator that breaks a stream of bytes into
-Python tokens.  It decodes the bytes according to PEP-0263 for
-determining source file encoding.
-
-It accepts a readline-like method which is called repeatedly to get the
-next line of input (or b"" for EOF).  It generates 5-tuples with these
-members:
-
-    the token type (see token.py)
-    the token (a string)
-    the starting (row, column) indices of the token (a 2-tuple of ints)
-    the ending (row, column) indices of the token (a 2-tuple of ints)
-    the original line (string)
-
-It is designed to match the working of the Python tokenizer exactly, except
-that it produces COMMENT tokens for comments and gives type OP for all
-operators.  Additionally, all token lists start with an ENCODING token
-which tells you which encoding was used to decode the bytes stream.
-"""
+# Changes from official Python source code:
+#
+# * Removed main() and related functionality.
+# * Removed generate_tokens().
+# * Removed open().
+# * Removed module docstring.
 
 __author__ = 'Ka-Ping Yee <ping@lfw.org>'
 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
                'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
                'Michael Foord')
-from builtins import open as _builtin_open
 from codecs import lookup, BOM_UTF8
 import collections
-from io import TextIOWrapper
 from itertools import chain
 import itertools as _itertools
 import re
-import sys
 from token import *
 
 cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
@@ -495,22 +477,6 @@ 
     return default, [first, second]
 
 
-def open(filename):
-    """Open a file in read only mode using the encoding detected by
-    detect_encoding().
-    """
-    buffer = _builtin_open(filename, 'rb')
-    try:
-        encoding, lines = detect_encoding(buffer.readline)
-        buffer.seek(0)
-        text = TextIOWrapper(buffer, encoding, line_buffering=True)
-        text.mode = 'r'
-        return text
-    except:
-        buffer.close()
-        raise
-
-
 def tokenize(readline):
     """
     The tokenize() generator requires one argument, readline, which
@@ -720,72 +686,3 @@ 
     for indent in indents[1:]:                 # pop remaining indent levels
         yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
     yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
-
-
-# An undocumented, backwards compatible, API for all the places in the standard
-# library that expect to be able to use tokenize with strings
-def generate_tokens(readline):
-    return _tokenize(readline, None)
-
-def main():
-    import argparse
-
-    # Helper error handling routines
-    def perror(message):
-        print(message, file=sys.stderr)
-
-    def error(message, filename=None, location=None):
-        if location:
-            args = (filename,) + location + (message,)
-            perror("%s:%d:%d: error: %s" % args)
-        elif filename:
-            perror("%s: error: %s" % (filename, message))
-        else:
-            perror("error: %s" % message)
-        sys.exit(1)
-
-    # Parse the arguments and options
-    parser = argparse.ArgumentParser(prog='python -m tokenize')
-    parser.add_argument(dest='filename', nargs='?',
-                        metavar='filename.py',
-                        help='the file to tokenize; defaults to stdin')
-    parser.add_argument('-e', '--exact', dest='exact', action='store_true',
-                        help='display token names using the exact type')
-    args = parser.parse_args()
-
-    try:
-        # Tokenize the input
-        if args.filename:
-            filename = args.filename
-            with _builtin_open(filename, 'rb') as f:
-                tokens = list(tokenize(f.readline))
-        else:
-            filename = "<stdin>"
-            tokens = _tokenize(sys.stdin.readline, None)
-
-        # Output the tokenization
-        for token in tokens:
-            token_type = token.type
-            if args.exact:
-                token_type = token.exact_type
-            token_range = "%d,%d-%d,%d:" % (token.start + token.end)
-            print("%-20s%-15s%-15r" %
-                  (token_range, tok_name[token_type], token.string))
-    except IndentationError as err:
-        line, column = err.args[1][1:3]
-        error(err.args[0], filename, (line, column))
-    except TokenError as err:
-        line, column = err.args[1]
-        error(err.args[0], filename, (line, column))
-    except SyntaxError as err:
-        error(err, filename)
-    except OSError as err:
-        error(err)
-    except KeyboardInterrupt:
-        print("interrupted\n")
-    except Exception as err:
-        perror("unexpected error: %s" % err)
-        raise
-
-if __name__ == "__main__":
-    main()
diff --git a/hgdemandimport/py3token.py b/hgdemandimport/py3token.py
--- a/hgdemandimport/py3token.py
+++ b/hgdemandimport/py3token.py
@@ -1,4 +1,4 @@ 
-# Vendored copy of Lib/token.py from cpython.
+# Vendored copy of Lib/token.py from cpython with modifications.
 
 # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
 # 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 Python Software Foundation; All
@@ -53,6 +53,10 @@ 
 # agrees to be bound by the terms and conditions of this License
 # Agreement.
 
+# Changes from official Python source code:
+#
+# * _main() and related functionality removed.
+
 """Token constants (from "token.h")."""
 
 __all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
@@ -143,73 +147,3 @@ 
 
 def ISEOF(x):
     return x == ENDMARKER
-
-
-def _main():
-    import re
-    import sys
-    args = sys.argv[1:]
-    inFileName = args and args[0] or "Include/token.h"
-    outFileName = "Lib/token.py"
-    if len(args) > 1:
-        outFileName = args[1]
-    try:
-        fp = open(inFileName)
-    except OSError as err:
-        sys.stdout.write("I/O error: %s\n" % str(err))
-        sys.exit(1)
-    with fp:
-        lines = fp.read().split("\n")
-    prog = re.compile(
-        r"#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)",
-        re.IGNORECASE)
-    comment_regex = re.compile(
-        r"^\s*/\*\s*(.+?)\s*\*/\s*$",
-        re.IGNORECASE)
-
-    tokens = {}
-    prev_val = None
-    for line in lines:
-        match = prog.match(line)
-        if match:
-            name, val = match.group(1, 2)
-            val = int(val)
-            tokens[val] = {'token': name}          # reverse so we can sort them...
-            prev_val = val
-        else:
-            comment_match = comment_regex.match(line)
-            if comment_match and prev_val is not None:
-                comment = comment_match.group(1)
-                tokens[prev_val]['comment'] = comment
-    keys = sorted(tokens.keys())
-    # load the output skeleton from the target:
-    try:
-        fp = open(outFileName)
-    except OSError as err:
-        sys.stderr.write("I/O error: %s\n" % str(err))
-        sys.exit(2)
-    with fp:
-        format = fp.read().split("\n")
-    try:
-        start = format.index("#--start constants--") + 1
-        end = format.index("#--end constants--")
-    except ValueError:
-        sys.stderr.write("target does not contain format markers")
-        sys.exit(3)
-    lines = []
-    for key in keys:
-        lines.append("%s = %d" % (tokens[key]["token"], key))
-        if "comment" in tokens[key]:
-            lines.append("# %s" % tokens[key]["comment"])
-    format[start:end] = lines
-    try:
-        fp = open(outFileName, 'w')
-    except OSError as err:
-        sys.stderr.write("I/O error: %s\n" % str(err))
-        sys.exit(4)
-    with fp:
-        fp.write("\n".join(format))
-
-
-if __name__ == "__main__":
-    _main()