Patchwork D2138: run-tests: report tests that exception occurred in

login
register
mail settings
Submitter phabricator
Date Feb. 11, 2018, 10:57 p.m.
Message ID <differential-rev-PHID-DREV-wxr5fydgvfkkdsfxswrr-req@phab.mercurial-scm.org>
Download mbox | patch
Permalink /patch/27590/
State Superseded
Headers show

Comments

phabricator - Feb. 11, 2018, 10:57 p.m.
indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  We now record the test that an exception occurred in. We put this
  information to use by aggregating the count of failures in each
  test. For each exception, the exception report now prints the total
  number of tests having that exception and the test with the least
  number of exceptions exhibiting that failure. The exception list
  is now sorted by (total count, tests impacted, count of failures
  in least failing test).
  
  This allows us to:
  
  - Assess how widespread a failure is. Some exceptions occur a lot in a few tests. Others occur over many tests.
  - Easily run a test exhibiting an exception without having to find a failure in test output.
  - Find and fix low hanging fruit (e.g. exceptions that are the only failure in a test).
  
  Here's an example of the new output:
  
  199 (4 tests)   /home/gps/src/hg/hgext/blackbox.py:191: %b requires a bytes-like object, or an object that implements __bytes__, not 'str' (test-devel-warnings.t - 1 total)
  142 (19 tests)  /home/gps/src/hg/hgext/mq.py:655: list indices must be integers or slices, not bytes (test-hardlinks.t - 1 total)
  140 (20 tests)  /home/gps/src/hg/mercurial/patch.py:296: string argument expected, got 'bytes' (test-audit-subrepo.t - 1 total)
  101 (15 tests)  /home/gps/src/hg/hgext/convert/convcmd.py:60: encode() argument 1 must be str, not bytes (test-convert-clonebranches.t - 1 total)
  90 (2 tests)    /home/gps/src/hg/hgext/mq.py:456: can't concat str to bytes (test-mq-qqueue.t - 1 total)
  87 (2 tests)    /home/gps/src/hg/mercurial/branchmap.py:380: %b requires a bytes-like object, or an object that implements __bytes__, not 'FileNotFoundError' (test-branches.t - 2 total)
  85 (22 tests)   /home/gps/src/hg/mercurial/sshpeer.py:223: cannot convert 'UUID' object to bytes (test-bundle2-pushback.t - 1 total)
  1 (1 tests)     /home/gps/src/hg/mercurial/formatter.py:254: %b requires a bytes-like object, or an object that implements __bytes__, not 'str' (test-debugextensions.t - 2 total)
  1 (1 tests)     /home/gps/src/hg/hgext/convert/convcmd.py:420: startswith first arg must be str or a tuple of str, not bytes (test-convert-authormap.t - 2 total)
  1 (1 tests)     /home/gps/src/hg/mercurial/revlog.py:797: '>=' not supported between instances of 'NoneType' and 'int' (test-unionrepo.t - 1 total)
  1 (1 tests)     /home/gps/src/hg/hgext/show.py:129: %b requires a bytes-like object, or an object that implements __bytes__, not 'str' (test-show.t - 1 total)

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2138

AFFECTED FILES
  tests/logexceptions.py
  tests/run-tests.py

CHANGE DETAILS




To: indygreg, #hg-reviewers
Cc: mercurial-devel

Patch

diff --git a/tests/run-tests.py b/tests/run-tests.py
--- a/tests/run-tests.py
+++ b/tests/run-tests.py
@@ -1052,6 +1052,7 @@ 
         env['PYTHONUSERBASE'] = sysconfig.get_config_var('userbase') or ''
         env['HGEMITWARNINGS'] = '1'
         env['TESTTMP'] = self._testtmp
+        env['TESTNAME'] = self.name
         env['HOME'] = self._testtmp
         # This number should match portneeded in _getport
         for port in xrange(3):
@@ -2135,13 +2136,21 @@ 
             if self._runner.options.exceptions:
                 exceptions = aggregateexceptions(
                     os.path.join(self._runner._outputdir, b'exceptions'))
-                total = sum(exceptions.values())
 
                 self.stream.writeln('Exceptions Report:')
                 self.stream.writeln('%d total from %d frames' %
-                                    (total, len(exceptions)))
-                for (frame, line, exc), count in exceptions.most_common():
-                    self.stream.writeln('%d\t%s: %s' % (count, frame, exc))
+                                    (exceptions['total'],
+                                     len(exceptions['exceptioncounts'])))
+                combined = exceptions['combined']
+                for key in sorted(combined, key=combined.get, reverse=True):
+                    frame, line, exc = key
+                    totalcount, testcount, leastcount, leasttest = combined[key]
+
+                    self.stream.writeln('%d (%d tests)\t%s: %s (%s - %d total)'
+                                        % (totalcount,
+                                           testcount,
+                                           frame, exc,
+                                           leasttest, leastcount))
 
             self.stream.flush()
 
@@ -3012,22 +3021,57 @@ 
                       p.decode("utf-8"))
 
 def aggregateexceptions(path):
-    exceptions = collections.Counter()
+    exceptioncounts = collections.Counter()
+    testsbyfailure = collections.defaultdict(set)
+    failuresbytest = collections.defaultdict(set)
 
     for f in os.listdir(path):
         with open(os.path.join(path, f), 'rb') as fh:
             data = fh.read().split(b'\0')
-            if len(data) != 4:
+            if len(data) != 5:
                 continue
 
-            exc, mainframe, hgframe, hgline = data
+            exc, mainframe, hgframe, hgline, testname = data
             exc = exc.decode('utf-8')
             mainframe = mainframe.decode('utf-8')
             hgframe = hgframe.decode('utf-8')
             hgline = hgline.decode('utf-8')
-            exceptions[(hgframe, hgline, exc)] += 1
-
-    return exceptions
+            testname = testname.decode('utf-8')
+
+            key = (hgframe, hgline, exc)
+            exceptioncounts[key] += 1
+            testsbyfailure[key].add(testname)
+            failuresbytest[testname].add(key)
+
+    # Find test having fewest failures for each failure.
+    leastfailing = {}
+    for key, tests in testsbyfailure.items():
+        fewesttest = None
+        fewestcount = 99999999
+        for test in sorted(tests):
+            if len(failuresbytest[test]) < fewestcount:
+                fewesttest = test
+                fewestcount = len(failuresbytest[test])
+
+        leastfailing[key] = (fewestcount, fewesttest)
+
+    # Create a combined counter so we can sort by total occurrences and
+    # impacted tests.
+    combined = {}
+    for key in exceptioncounts:
+        combined[key] = (exceptioncounts[key],
+                         len(testsbyfailure[key]),
+                         leastfailing[key][0],
+                         leastfailing[key][1])
+
+    return {
+        'exceptioncounts': exceptioncounts,
+        'total': sum(exceptioncounts.values()),
+        'combined': combined,
+        'leastfailing': leastfailing,
+        'byfailure': testsbyfailure,
+        'bytest': failuresbytest,
+    }
 
 if __name__ == '__main__':
     runner = TestRunner()
diff --git a/tests/logexceptions.py b/tests/logexceptions.py
--- a/tests/logexceptions.py
+++ b/tests/logexceptions.py
@@ -65,6 +65,7 @@ 
             primaryframe,
             hgframe,
             hgline,
+            ui.environ[b'TESTNAME'].decode('utf-8', 'replace'),
         ]
         fh.write(b'\0'.join(p.encode('utf-8', 'replace') for p in parts))