Patchwork D5051: mercurial: implement custom module importer for Python 2.7 (RFC)

login
register
mail settings
Submitter phabricator
Date Oct. 13, 2018, 7:29 a.m.
Message ID <differential-rev-PHID-DREV-p56g7wkvozbduqgmhyzt-req@phab.mercurial-scm.org>
Download mbox | patch
Permalink /patch/35836/
State New
Headers show

Comments

phabricator - Oct. 13, 2018, 7:29 a.m.
indygreg created this revision.
Herald added subscribers: mercurial-devel, mjpieters.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  THIS IS SUPER HACKY. IT DOESN'T INVALIDATE .pyc FILES, WHICH MEANS SOURCE
  CHANGES AREN'T PICKED UP. DO NOT LAND.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5051

AFFECTED FILES
  mercurial/__init__.py

CHANGE DETAILS




To: indygreg, #hg-reviewers
Cc: mjpieters, mercurial-devel

Patch

diff --git a/mercurial/__init__.py b/mercurial/__init__.py
--- a/mercurial/__init__.py
+++ b/mercurial/__init__.py
@@ -297,3 +297,114 @@ 
     if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
         # meta_path is used before any implicit finders and before sys.path.
         sys.meta_path.insert(0, hgpathentryfinder())
+
+# Python 2
+else:
+    import binascii
+    import errno
+    import io
+    import imp
+    import marshal
+    import os
+    import struct
+    from hgdemandimport import py3tokenize as tokenize
+
+    rootpath = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
+
+    def replacetokens(tokens, fullname):
+        for i, t in enumerate(tokens):
+            yield t
+
+    # Header to add to bytecode files. This MUST be changed when
+    # ``replacetokens`` or any mechanism that changes semantics of module
+    # loading is changed. Otherwise cached bytecode may get loaded without
+    # the new transformation mechanisms applied.
+    BYTECODEHEADER = b'HG\x00\x01'
+
+    def loadbytecode(path):
+        try:
+            with open(path, 'rb') as fh:
+                data = fh.read()
+        except IOError as e:
+            if e.errno != errno.ENOENT:
+                raise
+            return None
+
+        if data[0:4] != BYTECODEHEADER:
+            return None
+
+        return buffer(data, 4)
+
+    class hgmodulefinder(object):
+        """A sys.meta_path find that uses a custom module loader."""
+        def find_module(self, fullname, path=None):
+            if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
+                return None
+            if fullname.startswith('mercurial.cext.'):
+                return None
+            if fullname.startswith('mercurial.thirdparty'):
+                return None
+            if fullname.startswith('mercurial.zstd'):
+                return None
+            if fullname.startswith('hgext.fsmonitor.pywartchman'):
+                return None
+
+            pypath = os.path.join(rootpath, *fullname.split('.')) + '.py'
+
+            if not os.path.exists(pypath):
+                return None
+
+            return self
+
+        def load_module(self, fullname):
+            if fullname in sys.modules:
+                return sys.modules[fullname]
+
+            pypath = os.path.join(rootpath, *fullname.split('.')) + '.py'
+            pycpath = pypath + 'c'
+
+            with open(pypath, 'rb') as fh:
+                sourcefh = io.BytesIO(fh.read())
+
+            header = struct.pack('>i', binascii.crc32(sourcefh.getvalue()))
+
+            bytecode = loadbytecode(pycpath)
+
+            if bytecode is not None:
+                if bytecode[0:4] != header:
+                    bytecode = None
+                else:
+                    bytecode = bytecode[4:]
+
+            if bytecode is not None:
+                # TODO need to handle errors.
+                code = marshal.loads(bytecode)
+            else:
+                with open(pypath, 'rb') as fh:
+                    tokens = tokenize.tokenize(fh.readline)
+                    source = tokenize.untokenize(replacetokens(list(tokens),
+                                                               fullname))
+
+                code = compile(source, pypath, 'exec', dont_inherit=True)
+
+                with open(pycpath, 'wb') as fh:
+                    fh.write(BYTECODEHEADER)
+                    fh.write(header)
+                    marshal.dump(code, fh)
+
+            mod = imp.new_module(fullname)
+            sys.modules[fullname] = mod
+
+            mod.__file__ = '<%s>' % fullname
+            mod.__loader__ = self
+
+            if '.' in fullname:
+                mod.__package__ = fullname.rpartition('.')[0]
+            else:
+                mod.__package__ = fullname
+
+            exec(code, mod.__dict__)
+            return mod
+
+    if not any(isinstance(x, hgmodulefinder) for x in sys.meta_path):
+        sys.meta_path.insert(0, hgmodulefinder())