Patchwork D7592: fuzz: add support for fuzzing under either Python 2 or 3

login
register
mail settings
Submitter phabricator
Date Dec. 12, 2019, 12:53 p.m.
Message ID <2a68455b3e625cca8bb425bfde3c2f4a@localhost.localdomain>
Download mbox | patch
Permalink /patch/43735/
State Not Applicable
Headers show

Comments

phabricator - Dec. 12, 2019, 12:53 p.m.
Closed by commit rHG8766728dbce6: fuzz: add support for fuzzing under either Python 2 or 3 (authored by durin42).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7592?vs=18559&id=18630

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7592/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7592

AFFECTED FILES
  contrib/fuzz/dirs.cc
  contrib/fuzz/dirstate.cc
  contrib/fuzz/fm1readmarkers.cc
  contrib/fuzz/fncache.cc
  contrib/fuzz/jsonescapeu8fast.cc
  contrib/fuzz/manifest.cc
  contrib/fuzz/pyutil.cc
  contrib/fuzz/pyutil.h
  contrib/fuzz/revlog.cc

CHANGE DETAILS




To: durin42, #hg-reviewers, pulkit
Cc: mercurial-devel

Patch

diff --git a/contrib/fuzz/revlog.cc b/contrib/fuzz/revlog.cc
--- a/contrib/fuzz/revlog.cc
+++ b/contrib/fuzz/revlog.cc
@@ -9,16 +9,15 @@ 
 
 extern "C" {
 
-static PyCodeObject *code;
+static PYCODETYPE *code;
 
 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
 {
 	contrib::initpy(*argv[0]);
-	code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import parse_index2
+	code = (PYCODETYPE *)Py_CompileString(R"py(
 for inline in (True, False):
     try:
-        index, cache = parse_index2(data, inline)
+        index, cache = parsers.parse_index2(data, inline)
         index.slicechunktodensity(list(range(len(index))), 0.5, 262144)
         index.stats()
         index.findsnapshots({}, 0)
@@ -35,7 +34,7 @@ 
         # to debug failures.
         # print e
 )py",
-	                                        "fuzzer", Py_file_input);
+	                                      "fuzzer", Py_file_input);
 	return 0;
 }
 
diff --git a/contrib/fuzz/pyutil.h b/contrib/fuzz/pyutil.h
--- a/contrib/fuzz/pyutil.h
+++ b/contrib/fuzz/pyutil.h
@@ -1,5 +1,11 @@ 
 #include <Python.h>
 
+#if PY_MAJOR_VERSION >= 3
+#define PYCODETYPE PyObject
+#else
+#define PYCODETYPE PyCodeObject
+#endif
+
 namespace contrib
 {
 
diff --git a/contrib/fuzz/pyutil.cc b/contrib/fuzz/pyutil.cc
--- a/contrib/fuzz/pyutil.cc
+++ b/contrib/fuzz/pyutil.cc
@@ -6,17 +6,26 @@ 
 namespace contrib
 {
 
+#if PY_MAJOR_VERSION >= 3
+#define HG_FUZZER_PY3 1
+PyMODINIT_FUNC PyInit_parsers(void);
+#else
+PyMODINIT_FUNC initparsers(void);
+#endif
+
 static char cpypath[8192] = "\0";
 
 static PyObject *mainmod;
 static PyObject *globals;
 
-/* TODO: use Python 3 for this fuzzing? */
-PyMODINIT_FUNC initparsers(void);
-
 void initpy(const char *cselfpath)
 {
+#ifdef HG_FUZZER_PY3
+	const std::string subdir = "/sanpy/lib/python3.7";
+#else
 	const std::string subdir = "/sanpy/lib/python2.7";
+#endif
+
 	/* HACK ALERT: we need a full Python installation built without
 	   pymalloc and with ASAN, so we dump one in
 	   $OUT/sanpy/lib/python2.7. This helps us wire that up. */
@@ -39,11 +48,24 @@ 
 	setenv("PYTHONNOUSERSITE", "1", 1);
 	/* prevent Python from looking up users in the fuzz environment */
 	setenv("PYTHONUSERBASE", cpypath, 1);
+#ifdef HG_FUZZER_PY3
+	std::wstring wcpypath(pypath.begin(), pypath.end());
+	Py_SetPythonHome(wcpypath.c_str());
+#else
 	Py_SetPythonHome(cpypath);
+#endif
 	Py_InitializeEx(0);
 	mainmod = PyImport_AddModule("__main__");
 	globals = PyModule_GetDict(mainmod);
+
+#ifdef HG_FUZZER_PY3
+	PyObject *mod = PyInit_parsers();
+#else
 	initparsers();
+	PyObject *mod = PyImport_ImportModule("parsers");
+#endif
+
+	PyDict_SetItemString(globals, "parsers", mod);
 }
 
 PyObject *pyglobals()
diff --git a/contrib/fuzz/manifest.cc b/contrib/fuzz/manifest.cc
--- a/contrib/fuzz/manifest.cc
+++ b/contrib/fuzz/manifest.cc
@@ -9,15 +9,14 @@ 
 
 extern "C" {
 
-static PyCodeObject *code;
+static PYCODETYPE *code;
 
 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
 {
 	contrib::initpy(*argv[0]);
-	code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import lazymanifest
+	code = (PYCODETYPE *)Py_CompileString(R"py(
 try:
-  lm = lazymanifest(mdata)
+  lm = parsers.lazymanifest(mdata)
   # iterate the whole thing, which causes the code to fully parse
   # every line in the manifest
   for e, _, _ in lm.iterentries():
@@ -41,7 +40,7 @@ 
   # to debug failures.
   # print e
 )py",
-	                                        "fuzzer", Py_file_input);
+	                                      "fuzzer", Py_file_input);
 	return 0;
 }
 
diff --git a/contrib/fuzz/jsonescapeu8fast.cc b/contrib/fuzz/jsonescapeu8fast.cc
--- a/contrib/fuzz/jsonescapeu8fast.cc
+++ b/contrib/fuzz/jsonescapeu8fast.cc
@@ -11,23 +11,21 @@ 
 
 extern "C" {
 
-static PyCodeObject *code;
+static PYCODETYPE *code;
 
 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
 {
 	contrib::initpy(*argv[0]);
-	code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import jsonescapeu8fast
-
+	code = (PYCODETYPE *)Py_CompileString(R"py(
 try:
-    jsonescapeu8fast(data, paranoid)
+    parsers.jsonescapeu8fast(data, paranoid)
 except Exception as e:
     pass
     # uncomment this print if you're editing this Python code
     # to debug failures.
     # print(e)
 )py",
-	                                        "fuzzer", Py_file_input);
+	                                      "fuzzer", Py_file_input);
 	if (!code) {
 		std::cerr << "failed to compile Python code!" << std::endl;
 	}
diff --git a/contrib/fuzz/fncache.cc b/contrib/fuzz/fncache.cc
--- a/contrib/fuzz/fncache.cc
+++ b/contrib/fuzz/fncache.cc
@@ -10,29 +10,20 @@ 
 
 extern "C" {
 
-static PyCodeObject *code;
+static PYCODETYPE *code;
 
 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
 {
 	contrib::initpy(*argv[0]);
-	code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import (
-    isasciistr,
-    asciilower,
-    asciiupper,
-    encodedir,
-    pathencode,
-    lowerencode,
-)
-
+	code = (PYCODETYPE *)Py_CompileString(R"py(
 try:
     for fn in (
-        isasciistr,
-        asciilower,
-        asciiupper,
-        encodedir,
-        pathencode,
-        lowerencode,
+        parsers.isasciistr,
+        parsers.asciilower,
+        parsers.asciiupper,
+        parsers.encodedir,
+        parsers.pathencode,
+        parsers.lowerencode,
     ):
         try:
             fn(data)
@@ -53,7 +44,7 @@ 
     # to debug failures.
     # print(e)
 )py",
-	                                        "fuzzer", Py_file_input);
+	                                      "fuzzer", Py_file_input);
 	if (!code) {
 		std::cerr << "failed to compile Python code!" << std::endl;
 	}
diff --git a/contrib/fuzz/fm1readmarkers.cc b/contrib/fuzz/fm1readmarkers.cc
--- a/contrib/fuzz/fm1readmarkers.cc
+++ b/contrib/fuzz/fm1readmarkers.cc
@@ -9,13 +9,12 @@ 
 
 extern "C" {
 
-static PyCodeObject *code;
+static PYCODETYPE *code;
 
 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
 {
 	contrib::initpy(*argv[0]);
-	code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import fm1readmarkers
+	code = (PYCODETYPE *)Py_CompileString(R"py(
 def maybeint(s, default):
     try:
         return int(s)
@@ -31,14 +30,14 @@ 
     else:
         offset = stop = 0
     offset, stop = maybeint(offset, 0), maybeint(stop, len(data))
-    fm1readmarkers(data, offset, stop)
+    parsers.fm1readmarkers(data, offset, stop)
 except Exception as e:
     pass
     # uncomment this print if you're editing this Python code
     # to debug failures.
     # print e
 )py",
-	                                        "fuzzer", Py_file_input);
+	                                      "fuzzer", Py_file_input);
 	return 0;
 }
 
diff --git a/contrib/fuzz/dirstate.cc b/contrib/fuzz/dirstate.cc
--- a/contrib/fuzz/dirstate.cc
+++ b/contrib/fuzz/dirstate.cc
@@ -9,24 +9,23 @@ 
 
 extern "C" {
 
-static PyCodeObject *code;
+static PYCODETYPE *code;
 
 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
 {
 	contrib::initpy(*argv[0]);
-	code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import parse_dirstate
+	code = (PYCODETYPE *)Py_CompileString(R"py(
 try:
     dmap = {}
     copymap = {}
-    p = parse_dirstate(dmap, copymap, data)
+    p = parsers.parse_dirstate(dmap, copymap, data)
 except Exception as e:
     pass
     # uncomment this print if you're editing this Python code
     # to debug failures.
     # print e
 )py",
-	                                        "fuzzer", Py_file_input);
+	                                      "fuzzer", Py_file_input);
 	return 0;
 }
 
diff --git a/contrib/fuzz/dirs.cc b/contrib/fuzz/dirs.cc
--- a/contrib/fuzz/dirs.cc
+++ b/contrib/fuzz/dirs.cc
@@ -9,16 +9,15 @@ 
 
 extern "C" {
 
-static PyCodeObject *code;
+static PYCODETYPE *code;
 
 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
 {
 	contrib::initpy(*argv[0]);
-	code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import dirs
+	code = (PYCODETYPE *)Py_CompileString(R"py(
 try:
   files = mdata.split('\n')
-  d = dirs(files)
+  d = parsers.dirs(files)
   list(d)
   'a' in d
   if files:
@@ -29,7 +28,7 @@ 
   # to debug failures.
   # print e
 )py",
-	                                        "fuzzer", Py_file_input);
+	                                      "fuzzer", Py_file_input);
 	return 0;
 }