Submitter | Yuya Nishihara |
---|---|
Date | Feb. 23, 2018, 2:53 p.m. |
Message ID | <98cfd7926442dc0a649e.1519397598@mimosa> |
Download | mbox | patch |
Permalink | /patch/28277/ |
State | Accepted |
Headers | show |
Comments
On Fri, Feb 23, 2018 at 23:53:18 +0900, Yuya Nishihara wrote: > # HG changeset patch > # User Yuya Nishihara <yuya@tcha.org> > # Date 1519394998 -32400 > # Fri Feb 23 23:09:58 2018 +0900 > # Node ID 98cfd7926442dc0a649e0359455ad6962815bd13 > # Parent b8d0761a85c7421071750de23228415306852d69 > diff: do not split function name if character encoding is unknown > > Only ASCII characters can be split reliably at any byte positions, so let's > just leave long multi-byte sequence long. It's probably less bad than putting > an invalid byte sequence into a diff. > > This doesn't try to split the first ASCII slice from multi-byte sequence > because a combining character may follow. I like it! Thanks, Jeff. > > diff --git a/mercurial/mdiff.py b/mercurial/mdiff.py > --- a/mercurial/mdiff.py > +++ b/mercurial/mdiff.py > @@ -13,6 +13,7 @@ import zlib > > from .i18n import _ > from . import ( > + encoding, > error, > policy, > pycompat, > @@ -348,7 +349,11 @@ def _unidiff(t1, t2, opts=defaultopts): > # alphanumeric char. > for i in xrange(astart - 1, lastpos - 1, -1): > if l1[i][0:1].isalnum(): > - func = ' ' + l1[i].rstrip()[:40] > + func = b' ' + l1[i].rstrip() > + # split long function name if ASCII. otherwise we have no > + # idea where the multi-byte boundary is, so just leave it. > + if encoding.isasciistr(func): > + func = func[:41] > lastfunc[1] = func > break > # by recording this hunk's starting point as the next place to > diff --git a/tests/test-diff-unified.t b/tests/test-diff-unified.t > --- a/tests/test-diff-unified.t > +++ b/tests/test-diff-unified.t > @@ -386,3 +386,73 @@ If [diff] git is set to true, but the us > } > > $ cd .. > + > +Long function names should be abbreviated, but multi-byte character shouldn't > +be broken up > + > + $ hg init longfunc > + $ cd longfunc > + > + >>> with open('a', 'wb') as f: > + ... f.write(b'a' * 39 + b'bb' + b'\n') > + ... f.write(b' .\n' * 3) > + ... f.write(b' 0 b\n') > + ... f.write(b' .\n' * 3) > + ... f.write(b'a' * 39 + b'\xc3\xa0' + b'\n') > + ... f.write(b' .\n' * 3) > + ... f.write(b' 0 a with grave (single code point)\n') > + ... f.write(b' .\n' * 3) > + ... f.write(b'a' * 39 + b'a\xcc\x80' + b'\n') > + ... f.write(b' .\n' * 3) > + ... f.write(b' 0 a with grave (composition)\n') > + ... f.write(b' .\n' * 3) > + $ hg ci -qAm0 > + > + >>> with open('a', 'wb') as f: > + ... f.write(b'a' * 39 + b'bb' + b'\n') > + ... f.write(b' .\n' * 3) > + ... f.write(b' 1 b\n') > + ... f.write(b' .\n' * 3) > + ... f.write(b'a' * 39 + b'\xc3\xa0' + b'\n') > + ... f.write(b' .\n' * 3) > + ... f.write(b' 1 a with grave (single code point)\n') > + ... f.write(b' .\n' * 3) > + ... f.write(b'a' * 39 + b'a\xcc\x80' + b'\n') > + ... f.write(b' .\n' * 3) > + ... f.write(b' 1 a with grave (composition)\n') > + ... f.write(b' .\n' * 3) > + $ hg ci -m1 > + > + $ hg diff -c1 --nodates --show-function > + diff -r 3e92dd6fa812 -r a256341606cb a > + --- a/a > + +++ b/a > + @@ -2,7 +2,7 @@ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab > + . > + . > + . > + - 0 b > + + 1 b > + . > + . > + . > + @@ -10,7 +10,7 @@ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\xc3\xa0 (esc) > + . > + . > + . > + - 0 a with grave (single code point) > + + 1 a with grave (single code point) > + . > + . > + . > + @@ -18,7 +18,7 @@ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\xcc\x80 (esc) > + . > + . > + . > + - 0 a with grave (composition) > + + 1 a with grave (composition) > + . > + . > + . > + > + $ cd ..
On Fri, Feb 23, 2018 at 11:53:18PM +0900, Yuya Nishihara wrote: > # HG changeset patch > # User Yuya Nishihara <yuya@tcha.org> > # Date 1519394998 -32400 > # Fri Feb 23 23:09:58 2018 +0900 > # Node ID 98cfd7926442dc0a649e0359455ad6962815bd13 > # Parent b8d0761a85c7421071750de23228415306852d69 > diff: do not split function name if character encoding is unknown queued, thanks
Patch
diff --git a/mercurial/mdiff.py b/mercurial/mdiff.py --- a/mercurial/mdiff.py +++ b/mercurial/mdiff.py @@ -13,6 +13,7 @@ import zlib from .i18n import _ from . import ( + encoding, error, policy, pycompat, @@ -348,7 +349,11 @@ def _unidiff(t1, t2, opts=defaultopts): # alphanumeric char. for i in xrange(astart - 1, lastpos - 1, -1): if l1[i][0:1].isalnum(): - func = ' ' + l1[i].rstrip()[:40] + func = b' ' + l1[i].rstrip() + # split long function name if ASCII. otherwise we have no + # idea where the multi-byte boundary is, so just leave it. + if encoding.isasciistr(func): + func = func[:41] lastfunc[1] = func break # by recording this hunk's starting point as the next place to diff --git a/tests/test-diff-unified.t b/tests/test-diff-unified.t --- a/tests/test-diff-unified.t +++ b/tests/test-diff-unified.t @@ -386,3 +386,73 @@ If [diff] git is set to true, but the us } $ cd .. + +Long function names should be abbreviated, but multi-byte character shouldn't +be broken up + + $ hg init longfunc + $ cd longfunc + + >>> with open('a', 'wb') as f: + ... f.write(b'a' * 39 + b'bb' + b'\n') + ... f.write(b' .\n' * 3) + ... f.write(b' 0 b\n') + ... f.write(b' .\n' * 3) + ... f.write(b'a' * 39 + b'\xc3\xa0' + b'\n') + ... f.write(b' .\n' * 3) + ... f.write(b' 0 a with grave (single code point)\n') + ... f.write(b' .\n' * 3) + ... f.write(b'a' * 39 + b'a\xcc\x80' + b'\n') + ... f.write(b' .\n' * 3) + ... f.write(b' 0 a with grave (composition)\n') + ... f.write(b' .\n' * 3) + $ hg ci -qAm0 + + >>> with open('a', 'wb') as f: + ... f.write(b'a' * 39 + b'bb' + b'\n') + ... f.write(b' .\n' * 3) + ... f.write(b' 1 b\n') + ... f.write(b' .\n' * 3) + ... f.write(b'a' * 39 + b'\xc3\xa0' + b'\n') + ... f.write(b' .\n' * 3) + ... f.write(b' 1 a with grave (single code point)\n') + ... f.write(b' .\n' * 3) + ... f.write(b'a' * 39 + b'a\xcc\x80' + b'\n') + ... f.write(b' .\n' * 3) + ... f.write(b' 1 a with grave (composition)\n') + ... f.write(b' .\n' * 3) + $ hg ci -m1 + + $ hg diff -c1 --nodates --show-function + diff -r 3e92dd6fa812 -r a256341606cb a + --- a/a + +++ b/a + @@ -2,7 +2,7 @@ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab + . + . + . + - 0 b + + 1 b + . + . + . + @@ -10,7 +10,7 @@ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\xc3\xa0 (esc) + . + . + . + - 0 a with grave (single code point) + + 1 a with grave (single code point) + . + . + . + @@ -18,7 +18,7 @@ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\xcc\x80 (esc) + . + . + . + - 0 a with grave (composition) + + 1 a with grave (composition) + . + . + . + + $ cd ..