Submitter | Maciej Fijalkowski |
---|---|
Date | Sept. 17, 2016, 6:22 p.m. |
Message ID | <7551f1e60b2155462d89.1474136540@brick.arcode.com> |
Download | mbox | patch |
Permalink | /patch/16664/ |
State | Changes Requested |
Headers | show |
Comments
This should fix the issues presented. There is one problem which is that the hash in test-rebase-detach changes. The way I see it is just an RNG phase-shift, but it might be a real bug. Some actual input will be appreciated. On Sat, Sep 17, 2016 at 8:22 PM, Maciej Fijalkowski <fijall@gmail.com> wrote: > # HG changeset patch > # User Maciej Fijalkowski <fijall@gmail.com> > # Date 1473680234 -7200 > # Mon Sep 12 13:37:14 2016 +0200 > # Node ID 7551f1e60b2155462d89a9571eec065e9f67debc > # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 > lazymanifest: write a more efficient, pypy friendly version of lazymanifest > > diff --git a/mercurial/manifest.py b/mercurial/manifest.py > --- a/mercurial/manifest.py > +++ b/mercurial/manifest.py > @@ -104,69 +104,297 @@ > _checkforbidden(files) > return ''.join(lines) > > -class _lazymanifest(dict): > - """This is the pure implementation of lazymanifest. > - > - It has not been optimized *at all* and is not lazy. > - """ > - > - def __init__(self, data): > - dict.__init__(self) > - for f, n, fl in _parse(data): > - self[f] = n, fl > - > - def __setitem__(self, k, v): > - node, flag = v > - assert node is not None > - if len(node) > 21: > - node = node[:21] # match c implementation behavior > - dict.__setitem__(self, k, (node, flag)) > +class lazymanifestiter(object): > + def __init__(self, lm): > + self.pos = 0 > + self.lm = lm > > def __iter__(self): > - return iter(sorted(dict.keys(self))) > + return self > > - def iterkeys(self): > - return iter(sorted(dict.keys(self))) > + def next(self): > + try: > + data, pos = self.lm._get(self.pos) > + except IndexError: > + raise StopIteration > + if pos == -1: > + self.pos += 1 > + return data[0] > + self.pos += 1 > + zeropos = data.find('\x00', pos) > + return data[pos:zeropos] > > - def iterentries(self): > - return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) > +class lazymanifestiterentries(object): > + def __init__(self, lm): > + self.lm = lm > + self.pos = 0 > + > + def __iter__(self): > + return self > + > + def next(self): > + try: > + data, pos = self.lm._get(self.pos) > + except IndexError: > + raise StopIteration > + if pos == -1: > + self.pos += 1 > + return data > + zeropos = data.find('\x00', pos) > + hashval = unhexlify(data, self.lm.extrainfo[self.pos], > + zeropos + 1, 40) > + flags = self.lm._getflags(data, self.pos, zeropos) > + self.pos += 1 > + return (data[pos:zeropos], hashval, flags) > + > +def unhexlify(data, extra, pos, length): > + s = data[pos:pos + length].decode('hex') > + if extra: > + s += chr(extra & 0xff) > + return s > + > +def _cmp(a, b): > + return (a > b) - (a < b) > + > +class _lazymanifest(object): > + def __init__(self, data, positions=None, extrainfo=None, extradata=None): > + if positions is None: > + self.positions = self.findlines(data) > + self.extrainfo = [0] * len(self.positions) > + self.data = data > + self.extradata = [] > + else: > + self.positions = positions[:] > + self.extrainfo = extrainfo[:] > + self.extradata = extradata[:] > + self.data = data > + > + def findlines(self, data): > + if not data: > + return [] > + pos = data.find("\n") > + positions = [0] > + while pos < len(data) - 1 and pos != -1: > + positions.append(pos + 1) > + pos = data.find("\n", pos + 1) > + return positions > + > + def _get(self, index): > + # get the position encoded in pos: > + # positive number is an index in 'data' > + # negative number is in extrapieces > + pos = self.positions[index] > + if pos >= 0: > + return self.data, pos > + return self.extradata[-pos - 1], -1 > + > + def _getkey(self, pos): > + if pos >= 0: > + return self.data[pos:self.data.find('\x00', pos + 1)] > + return self.extradata[-pos - 1][0] > + > + def bsearch(self, key): > + first = 0 > + last = len(self.positions) - 1 > + > + while first <= last: > + midpoint = (first + last)//2 > + nextpos = self.positions[midpoint] > + candidate = self._getkey(nextpos) > + r = _cmp(key, candidate) > + if r == 0: > + return midpoint > + else: > + if r < 0: > + last = midpoint - 1 > + else: > + first = midpoint + 1 > + return -1 > + > + def bsearch2(self, key): > + # same as the above, but will always return the position > + # done for performance reasons > + first = 0 > + last = len(self.positions) - 1 > + > + while first <= last: > + midpoint = (first + last)//2 > + nextpos = self.positions[midpoint] > + candidate = self._getkey(nextpos) > + r = _cmp(key, candidate) > + if r == 0: > + return (midpoint, True) > + else: > + if r < 0: > + last = midpoint - 1 > + else: > + first = midpoint + 1 > + return (first, False) > + > + def __contains__(self, key): > + return self.bsearch(key) != -1 > + > + def _getflags(self, data, needle, pos): > + start = pos + 41 > + end = data.find("\n", start) > + if end == -1: > + end = len(data) - 1 > + if start == end: > + return '' > + return self.data[start:end] > + > + def __getitem__(self, key): > + if not isinstance(key, str): > + raise TypeError("getitem: manifest keys must be a string.") > + needle = self.bsearch(key) > + if needle == -1: > + raise KeyError > + data, pos = self._get(needle) > + if pos == -1: > + return (data[1], data[2]) > + zeropos = data.find('\x00', pos) > + assert 0 <= needle <= len(self.positions) > + assert len(self.extrainfo) == len(self.positions) > + hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40) > + flags = self._getflags(data, needle, zeropos) > + return (hashval, flags) > + > + def __delitem__(self, key): > + needle, found = self.bsearch2(key) > + if not found: > + raise KeyError > + cur = self.positions[needle] > + self.positions = self.positions[:needle] + self.positions[needle + 1:] > + self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] > + if cur >= 0: > + self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] > + > + def __setitem__(self, key, value): > + if not isinstance(key, str): > + raise TypeError("setitem: manifest keys must be a string.") > + if not isinstance(value, tuple) or len(value) != 2: > + raise TypeError("Manifest values must be a tuple of (node, flags).") > + hashval = value[0] > + if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22: > + raise TypeError("node must be a 20-byte string") > + flags = value[1] > + if not isinstance(flags, str) or len(flags) > 1: > + raise TypeError("flags must a 0 or 1 byte string, got %r", flags) > + needle, found = self.bsearch2(key) > + if found: > + # put the item > + pos = self.positions[needle] > + if pos < 0: > + self.extradata[-pos - 1] = (key, value[0], value[1]) > + else: > + # just don't bother > + self.extradata.append((key, value[0], value[1])) > + self.positions[needle] = -len(self.extradata) > + else: > + # not found, put it in with extra positions > + self.extradata.append((key, value[0], value[1])) > + self.positions = (self.positions[:needle] + [-len(self.extradata)] > + + self.positions[needle:]) > + self.extrainfo = (self.extrainfo[:needle] + [0] + > + self.extrainfo[needle:]) > > def copy(self): > - c = _lazymanifest('') > - c.update(self) > - return c > + # XXX call _compact like in C? > + return _lazymanifest(self.data, self.positions, self.extrainfo, > + self.extradata) > + > + def _compact(self): > + # hopefully not called TOO often > + def findnextposition(positions, start, lgt): > + i = start > + while i < len(positions): > + if positions[i] >= 0: > + return positions[i] > + i += 1 > + return lgt > + > + if len(self.extradata) == 0: > + return > + l = [] > + last_cut = 0 > + i = 0 > + offset = 0 > + self.extrainfo = [0] * len(self.positions) > + while i < len(self.positions): > + if self.positions[i] >= 0: > + cur = self.positions[i] > + last_cut = cur > + while True: > + self.positions[i] = offset > + i += 1 > + if i == len(self.positions) or self.positions[i] < 0: > + break > + offset += self.positions[i] - cur > + cur = self.positions[i] > + end_cut = findnextposition(self.positions, i, len(self.data)) > + offset += end_cut - cur > + l.append(self.data[last_cut:end_cut]) > + else: > + while i < len(self.positions) and self.positions[i] < 0: > + cur = self.positions[i] > + t = self.extradata[-cur - 1] > + l.append(self._pack(t)) > + self.positions[i] = offset > + if len(t[1]) > 20: > + self.extrainfo[i] = ord(t[1][21]) > + offset += len(l[-1]) > + i += 1 > + self.data = ''.join(l) > + self.extradata = [] > + > + def _pack(self, d): > + return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n' > + > + def text(self): > + self._compact() > + return self.data > > def diff(self, m2, clean=False): > '''Finds changes between the current manifest and m2.''' > + # XXX think whether efficiency matters here > diff = {} > > - for fn, e1 in self.iteritems(): > + for fn, e1, flags in self.iterentries(): > if fn not in m2: > - diff[fn] = e1, (None, '') > + diff[fn] = (e1, flags), (None, '') > else: > e2 = m2[fn] > - if e1 != e2: > - diff[fn] = e1, e2 > + if (e1, flags) != e2: > + diff[fn] = (e1, flags), e2 > elif clean: > diff[fn] = None > > - for fn, e2 in m2.iteritems(): > + for fn, e2, flags in m2.iterentries(): > if fn not in self: > - diff[fn] = (None, ''), e2 > + diff[fn] = (None, ''), (e2, flags) > > return diff > > + def iterentries(self): > + return lazymanifestiterentries(self) > + > + def iterkeys(self): > + return lazymanifestiter(self) > + > + def __iter__(self): > + return lazymanifestiter(self) > + > + def __len__(self): > + return len(self.positions) > + > def filtercopy(self, filterfn): > + # XXX should be optimized > c = _lazymanifest('') > for f, n, fl in self.iterentries(): > if filterfn(f): > c[f] = n, fl > return c > > - def text(self): > - """Get the full data of this manifest as a bytestring.""" > - return _textv1(self.iterentries()) > - > try: > _lazymanifest = parsers.lazymanifest > except AttributeError: > diff --git a/mercurial/pure/bdiff.py b/mercurial/pure/bdiff.py > --- a/mercurial/pure/bdiff.py > +++ b/mercurial/pure/bdiff.py > @@ -111,8 +111,8 @@ > def blocks(sa, sb): > a = ffi.new("struct bdiff_line**") > b = ffi.new("struct bdiff_line**") > - ac = ffi.new("char[]", sa) > - bc = ffi.new("char[]", sb) > + ac = ffi.new("char[]", str(sa)) > + bc = ffi.new("char[]", str(sb)) > l = ffi.new("struct bdiff_hunk*") > try: > an = lib.bdiff_splitlines(ac, len(sa), a) > @@ -138,8 +138,8 @@ > def bdiff(sa, sb): > a = ffi.new("struct bdiff_line**") > b = ffi.new("struct bdiff_line**") > - ac = ffi.new("char[]", sa) > - bc = ffi.new("char[]", sb) > + ac = ffi.new("char[]", str(sa)) > + bc = ffi.new("char[]", str(sb)) > l = ffi.new("struct bdiff_hunk*") > try: > an = lib.bdiff_splitlines(ac, len(sa), a)
> On Sep 17, 2016, at 14:23, Maciej Fijalkowski <fijall@gmail.com> wrote: > > This should fix the issues presented. > > There is one problem which is that the hash in test-rebase-detach > changes. The way I see it is just an RNG phase-shift, but it might be > a real bug. Some actual input will be appreciated. I'll get a look at this in the morning America/New_York - my other miscellany occupied me for far too long today and I just don't have brainpower for this tonight. Sorry to keep you waiting. AF > > > > On Sat, Sep 17, 2016 at 8:22 PM, Maciej Fijalkowski <fijall@gmail.com> wrote: >> # HG changeset patch >> # User Maciej Fijalkowski <fijall@gmail.com> >> # Date 1473680234 -7200 >> # Mon Sep 12 13:37:14 2016 +0200 >> # Node ID 7551f1e60b2155462d89a9571eec065e9f67debc >> # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 >> lazymanifest: write a more efficient, pypy friendly version of lazymanifest >> >> diff --git a/mercurial/manifest.py b/mercurial/manifest.py >> --- a/mercurial/manifest.py >> +++ b/mercurial/manifest.py >> @@ -104,69 +104,297 @@ >> _checkforbidden(files) >> return ''.join(lines) >> >> -class _lazymanifest(dict): >> - """This is the pure implementation of lazymanifest. >> - >> - It has not been optimized *at all* and is not lazy. >> - """ >> - >> - def __init__(self, data): >> - dict.__init__(self) >> - for f, n, fl in _parse(data): >> - self[f] = n, fl >> - >> - def __setitem__(self, k, v): >> - node, flag = v >> - assert node is not None >> - if len(node) > 21: >> - node = node[:21] # match c implementation behavior >> - dict.__setitem__(self, k, (node, flag)) >> +class lazymanifestiter(object): >> + def __init__(self, lm): >> + self.pos = 0 >> + self.lm = lm >> >> def __iter__(self): >> - return iter(sorted(dict.keys(self))) >> + return self >> >> - def iterkeys(self): >> - return iter(sorted(dict.keys(self))) >> + def next(self): >> + try: >> + data, pos = self.lm._get(self.pos) >> + except IndexError: >> + raise StopIteration >> + if pos == -1: >> + self.pos += 1 >> + return data[0] >> + self.pos += 1 >> + zeropos = data.find('\x00', pos) >> + return data[pos:zeropos] >> >> - def iterentries(self): >> - return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) >> +class lazymanifestiterentries(object): >> + def __init__(self, lm): >> + self.lm = lm >> + self.pos = 0 >> + >> + def __iter__(self): >> + return self >> + >> + def next(self): >> + try: >> + data, pos = self.lm._get(self.pos) >> + except IndexError: >> + raise StopIteration >> + if pos == -1: >> + self.pos += 1 >> + return data >> + zeropos = data.find('\x00', pos) >> + hashval = unhexlify(data, self.lm.extrainfo[self.pos], >> + zeropos + 1, 40) >> + flags = self.lm._getflags(data, self.pos, zeropos) >> + self.pos += 1 >> + return (data[pos:zeropos], hashval, flags) >> + >> +def unhexlify(data, extra, pos, length): >> + s = data[pos:pos + length].decode('hex') >> + if extra: >> + s += chr(extra & 0xff) >> + return s >> + >> +def _cmp(a, b): >> + return (a > b) - (a < b) >> + >> +class _lazymanifest(object): >> + def __init__(self, data, positions=None, extrainfo=None, extradata=None): >> + if positions is None: >> + self.positions = self.findlines(data) >> + self.extrainfo = [0] * len(self.positions) >> + self.data = data >> + self.extradata = [] >> + else: >> + self.positions = positions[:] >> + self.extrainfo = extrainfo[:] >> + self.extradata = extradata[:] >> + self.data = data >> + >> + def findlines(self, data): >> + if not data: >> + return [] >> + pos = data.find("\n") >> + positions = [0] >> + while pos < len(data) - 1 and pos != -1: >> + positions.append(pos + 1) >> + pos = data.find("\n", pos + 1) >> + return positions >> + >> + def _get(self, index): >> + # get the position encoded in pos: >> + # positive number is an index in 'data' >> + # negative number is in extrapieces >> + pos = self.positions[index] >> + if pos >= 0: >> + return self.data, pos >> + return self.extradata[-pos - 1], -1 >> + >> + def _getkey(self, pos): >> + if pos >= 0: >> + return self.data[pos:self.data.find('\x00', pos + 1)] >> + return self.extradata[-pos - 1][0] >> + >> + def bsearch(self, key): >> + first = 0 >> + last = len(self.positions) - 1 >> + >> + while first <= last: >> + midpoint = (first + last)//2 >> + nextpos = self.positions[midpoint] >> + candidate = self._getkey(nextpos) >> + r = _cmp(key, candidate) >> + if r == 0: >> + return midpoint >> + else: >> + if r < 0: >> + last = midpoint - 1 >> + else: >> + first = midpoint + 1 >> + return -1 >> + >> + def bsearch2(self, key): >> + # same as the above, but will always return the position >> + # done for performance reasons >> + first = 0 >> + last = len(self.positions) - 1 >> + >> + while first <= last: >> + midpoint = (first + last)//2 >> + nextpos = self.positions[midpoint] >> + candidate = self._getkey(nextpos) >> + r = _cmp(key, candidate) >> + if r == 0: >> + return (midpoint, True) >> + else: >> + if r < 0: >> + last = midpoint - 1 >> + else: >> + first = midpoint + 1 >> + return (first, False) >> + >> + def __contains__(self, key): >> + return self.bsearch(key) != -1 >> + >> + def _getflags(self, data, needle, pos): >> + start = pos + 41 >> + end = data.find("\n", start) >> + if end == -1: >> + end = len(data) - 1 >> + if start == end: >> + return '' >> + return self.data[start:end] >> + >> + def __getitem__(self, key): >> + if not isinstance(key, str): >> + raise TypeError("getitem: manifest keys must be a string.") >> + needle = self.bsearch(key) >> + if needle == -1: >> + raise KeyError >> + data, pos = self._get(needle) >> + if pos == -1: >> + return (data[1], data[2]) >> + zeropos = data.find('\x00', pos) >> + assert 0 <= needle <= len(self.positions) >> + assert len(self.extrainfo) == len(self.positions) >> + hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40) >> + flags = self._getflags(data, needle, zeropos) >> + return (hashval, flags) >> + >> + def __delitem__(self, key): >> + needle, found = self.bsearch2(key) >> + if not found: >> + raise KeyError >> + cur = self.positions[needle] >> + self.positions = self.positions[:needle] + self.positions[needle + 1:] >> + self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] >> + if cur >= 0: >> + self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] >> + >> + def __setitem__(self, key, value): >> + if not isinstance(key, str): >> + raise TypeError("setitem: manifest keys must be a string.") >> + if not isinstance(value, tuple) or len(value) != 2: >> + raise TypeError("Manifest values must be a tuple of (node, flags).") >> + hashval = value[0] >> + if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22: >> + raise TypeError("node must be a 20-byte string") >> + flags = value[1] >> + if not isinstance(flags, str) or len(flags) > 1: >> + raise TypeError("flags must a 0 or 1 byte string, got %r", flags) >> + needle, found = self.bsearch2(key) >> + if found: >> + # put the item >> + pos = self.positions[needle] >> + if pos < 0: >> + self.extradata[-pos - 1] = (key, value[0], value[1]) >> + else: >> + # just don't bother >> + self.extradata.append((key, value[0], value[1])) >> + self.positions[needle] = -len(self.extradata) >> + else: >> + # not found, put it in with extra positions >> + self.extradata.append((key, value[0], value[1])) >> + self.positions = (self.positions[:needle] + [-len(self.extradata)] >> + + self.positions[needle:]) >> + self.extrainfo = (self.extrainfo[:needle] + [0] + >> + self.extrainfo[needle:]) >> >> def copy(self): >> - c = _lazymanifest('') >> - c.update(self) >> - return c >> + # XXX call _compact like in C? >> + return _lazymanifest(self.data, self.positions, self.extrainfo, >> + self.extradata) >> + >> + def _compact(self): >> + # hopefully not called TOO often >> + def findnextposition(positions, start, lgt): >> + i = start >> + while i < len(positions): >> + if positions[i] >= 0: >> + return positions[i] >> + i += 1 >> + return lgt >> + >> + if len(self.extradata) == 0: >> + return >> + l = [] >> + last_cut = 0 >> + i = 0 >> + offset = 0 >> + self.extrainfo = [0] * len(self.positions) >> + while i < len(self.positions): >> + if self.positions[i] >= 0: >> + cur = self.positions[i] >> + last_cut = cur >> + while True: >> + self.positions[i] = offset >> + i += 1 >> + if i == len(self.positions) or self.positions[i] < 0: >> + break >> + offset += self.positions[i] - cur >> + cur = self.positions[i] >> + end_cut = findnextposition(self.positions, i, len(self.data)) >> + offset += end_cut - cur >> + l.append(self.data[last_cut:end_cut]) >> + else: >> + while i < len(self.positions) and self.positions[i] < 0: >> + cur = self.positions[i] >> + t = self.extradata[-cur - 1] >> + l.append(self._pack(t)) >> + self.positions[i] = offset >> + if len(t[1]) > 20: >> + self.extrainfo[i] = ord(t[1][21]) >> + offset += len(l[-1]) >> + i += 1 >> + self.data = ''.join(l) >> + self.extradata = [] >> + >> + def _pack(self, d): >> + return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n' >> + >> + def text(self): >> + self._compact() >> + return self.data >> >> def diff(self, m2, clean=False): >> '''Finds changes between the current manifest and m2.''' >> + # XXX think whether efficiency matters here >> diff = {} >> >> - for fn, e1 in self.iteritems(): >> + for fn, e1, flags in self.iterentries(): >> if fn not in m2: >> - diff[fn] = e1, (None, '') >> + diff[fn] = (e1, flags), (None, '') >> else: >> e2 = m2[fn] >> - if e1 != e2: >> - diff[fn] = e1, e2 >> + if (e1, flags) != e2: >> + diff[fn] = (e1, flags), e2 >> elif clean: >> diff[fn] = None >> >> - for fn, e2 in m2.iteritems(): >> + for fn, e2, flags in m2.iterentries(): >> if fn not in self: >> - diff[fn] = (None, ''), e2 >> + diff[fn] = (None, ''), (e2, flags) >> >> return diff >> >> + def iterentries(self): >> + return lazymanifestiterentries(self) >> + >> + def iterkeys(self): >> + return lazymanifestiter(self) >> + >> + def __iter__(self): >> + return lazymanifestiter(self) >> + >> + def __len__(self): >> + return len(self.positions) >> + >> def filtercopy(self, filterfn): >> + # XXX should be optimized >> c = _lazymanifest('') >> for f, n, fl in self.iterentries(): >> if filterfn(f): >> c[f] = n, fl >> return c >> >> - def text(self): >> - """Get the full data of this manifest as a bytestring.""" >> - return _textv1(self.iterentries()) >> - >> try: >> _lazymanifest = parsers.lazymanifest >> except AttributeError: >> diff --git a/mercurial/pure/bdiff.py b/mercurial/pure/bdiff.py >> --- a/mercurial/pure/bdiff.py >> +++ b/mercurial/pure/bdiff.py >> @@ -111,8 +111,8 @@ >> def blocks(sa, sb): >> a = ffi.new("struct bdiff_line**") >> b = ffi.new("struct bdiff_line**") >> - ac = ffi.new("char[]", sa) >> - bc = ffi.new("char[]", sb) >> + ac = ffi.new("char[]", str(sa)) >> + bc = ffi.new("char[]", str(sb)) >> l = ffi.new("struct bdiff_hunk*") >> try: >> an = lib.bdiff_splitlines(ac, len(sa), a) >> @@ -138,8 +138,8 @@ >> def bdiff(sa, sb): >> a = ffi.new("struct bdiff_line**") >> b = ffi.new("struct bdiff_line**") >> - ac = ffi.new("char[]", sa) >> - bc = ffi.new("char[]", sb) >> + ac = ffi.new("char[]", str(sa)) >> + bc = ffi.new("char[]", str(sb)) >> l = ffi.new("struct bdiff_hunk*") >> try: >> an = lib.bdiff_splitlines(ac, len(sa), a) > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
On Sat, Sep 17, 2016 at 08:23:36PM +0200, Maciej Fijalkowski wrote: > This should fix the issues presented. > > There is one problem which is that the hash in test-rebase-detach > changes. The way I see it is just an RNG phase-shift, but it might be > a real bug. Some actual input will be appreciated. It's not possibly RNG related - there's no RNG that goes into the sha1 of a node. I've added --debug to the failure in test-rebase-detach and compared them between cpython and pypy: cpython: @@ -366,11 +366,24 @@ $ hg log --rev tip --debug - changeset: 8:9472f4b1d736 + changeset: 8:9472f4b1d7366902d0098aa1401e3f170000cc56 tag: tip + phase: draft + parent: 7:02de42196ebee42ef284b6780a87cdc96e8eaab6 + parent: -1:0000000000000000000000000000000000000000 + manifest: 8:8dbdcf066a97239fde2d0dba19fcb1e699fa66d2 user: test date: Thu Jan 01 00:00:00 1970 +0000 - summary: Collapsed revision + files: F + files+: E + extra: branch=default + extra: rebase_source=9427d4d5af81c393e6b630129712cde8cdad5605 + description: + Collapsed revision + * I + * Merge + * J + pypy: @@ -366,11 +366,24 @@ $ hg log --rev tip --debug - changeset: 8:9472f4b1d736 + changeset: 8:122ceff3b303b13e5ca323742a8ebe589b9f8066 tag: tip + phase: draft + parent: 7:02de42196ebee42ef284b6780a87cdc96e8eaab6 + parent: -1:0000000000000000000000000000000000000000 + manifest: 8:09f1dc369d2667dcaeb9cf828b0fcb76bda29f33 user: test date: Thu Jan 01 00:00:00 1970 +0000 - summary: Collapsed revision + files: F + files+: E + extra: branch=default + extra: rebase_source=9427d4d5af81c393e6b630129712cde8cdad5605 + description: + Collapsed revision + * I + * Merge + * J + Which means the manifest is hashing out differently. That suggests there's a subtle bug in how the manifest is hashing out in the pypy version. Adding a `hg debugdata -m 8` (which prints raw manifest data), and this is what I get on cpython: $ hg debugdata -m 8 + A\x0045f17b21388f07b8939b22052e5f3776e5246388 (esc) + E\x00c3b9643004a8d1c7f39b0025cefab53dc8f7dc12 (esc) + F\x00293a00dc38ac20f042c67ba8534eedc1b6a7ae15 (esc) + H\x008500189e74a9e0475e822093bc7db0d631aeb0b4 (esc) on pypy: $ hg debugdata -m 8 + A\x0045f17b21388f07b8939b22052e5f3776e5246388 (esc) + F\x0022bfcfd62a21a3287edbd4d656218d0f525ed76a (esc) + E\x00c3b9643004a8d1c7f39b0025cefab53dc8f7dc12 (esc) + F\x00293a00dc38ac20f042c67ba8534eedc1b6a7ae15 (esc) + H\x008500189e74a9e0475e822093bc7db0d631aeb0b4 (esc) So you've got a subtle bug here where the compaction of the lazymanifest isn't working right - probably around merging manifests somehow. Does this give you enough to get started? (I'd probably start debugging this by littering _lazymanifest._compact and _lazymanifest.text with asserts until I managed to trip over the un-sorted lines, and then backtrack from there to figure out how they managed to survive.) > > > > On Sat, Sep 17, 2016 at 8:22 PM, Maciej Fijalkowski <fijall@gmail.com> wrote: > > # HG changeset patch > > # User Maciej Fijalkowski <fijall@gmail.com> > > # Date 1473680234 -7200 > > # Mon Sep 12 13:37:14 2016 +0200 > > # Node ID 7551f1e60b2155462d89a9571eec065e9f67debc > > # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 > > lazymanifest: write a more efficient, pypy friendly version of lazymanifest > > > > diff --git a/mercurial/manifest.py b/mercurial/manifest.py > > --- a/mercurial/manifest.py > > +++ b/mercurial/manifest.py > > @@ -104,69 +104,297 @@ > > _checkforbidden(files) > > return ''.join(lines) > > > > -class _lazymanifest(dict): > > - """This is the pure implementation of lazymanifest. > > - > > - It has not been optimized *at all* and is not lazy. > > - """ > > - > > - def __init__(self, data): > > - dict.__init__(self) > > - for f, n, fl in _parse(data): > > - self[f] = n, fl > > - > > - def __setitem__(self, k, v): > > - node, flag = v > > - assert node is not None > > - if len(node) > 21: > > - node = node[:21] # match c implementation behavior > > - dict.__setitem__(self, k, (node, flag)) > > +class lazymanifestiter(object): > > + def __init__(self, lm): > > + self.pos = 0 > > + self.lm = lm > > > > def __iter__(self): > > - return iter(sorted(dict.keys(self))) > > + return self > > > > - def iterkeys(self): > > - return iter(sorted(dict.keys(self))) > > + def next(self): > > + try: > > + data, pos = self.lm._get(self.pos) > > + except IndexError: > > + raise StopIteration > > + if pos == -1: > > + self.pos += 1 > > + return data[0] > > + self.pos += 1 > > + zeropos = data.find('\x00', pos) > > + return data[pos:zeropos] > > > > - def iterentries(self): > > - return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) > > +class lazymanifestiterentries(object): > > + def __init__(self, lm): > > + self.lm = lm > > + self.pos = 0 > > + > > + def __iter__(self): > > + return self > > + > > + def next(self): > > + try: > > + data, pos = self.lm._get(self.pos) > > + except IndexError: > > + raise StopIteration > > + if pos == -1: > > + self.pos += 1 > > + return data > > + zeropos = data.find('\x00', pos) > > + hashval = unhexlify(data, self.lm.extrainfo[self.pos], > > + zeropos + 1, 40) > > + flags = self.lm._getflags(data, self.pos, zeropos) > > + self.pos += 1 > > + return (data[pos:zeropos], hashval, flags) > > + > > +def unhexlify(data, extra, pos, length): > > + s = data[pos:pos + length].decode('hex') > > + if extra: > > + s += chr(extra & 0xff) > > + return s > > + > > +def _cmp(a, b): > > + return (a > b) - (a < b) > > + > > +class _lazymanifest(object): > > + def __init__(self, data, positions=None, extrainfo=None, extradata=None): > > + if positions is None: > > + self.positions = self.findlines(data) > > + self.extrainfo = [0] * len(self.positions) > > + self.data = data > > + self.extradata = [] > > + else: > > + self.positions = positions[:] > > + self.extrainfo = extrainfo[:] > > + self.extradata = extradata[:] > > + self.data = data > > + > > + def findlines(self, data): > > + if not data: > > + return [] > > + pos = data.find("\n") > > + positions = [0] > > + while pos < len(data) - 1 and pos != -1: > > + positions.append(pos + 1) > > + pos = data.find("\n", pos + 1) > > + return positions > > + > > + def _get(self, index): > > + # get the position encoded in pos: > > + # positive number is an index in 'data' > > + # negative number is in extrapieces > > + pos = self.positions[index] > > + if pos >= 0: > > + return self.data, pos > > + return self.extradata[-pos - 1], -1 > > + > > + def _getkey(self, pos): > > + if pos >= 0: > > + return self.data[pos:self.data.find('\x00', pos + 1)] > > + return self.extradata[-pos - 1][0] > > + > > + def bsearch(self, key): > > + first = 0 > > + last = len(self.positions) - 1 > > + > > + while first <= last: > > + midpoint = (first + last)//2 > > + nextpos = self.positions[midpoint] > > + candidate = self._getkey(nextpos) > > + r = _cmp(key, candidate) > > + if r == 0: > > + return midpoint > > + else: > > + if r < 0: > > + last = midpoint - 1 > > + else: > > + first = midpoint + 1 > > + return -1 > > + > > + def bsearch2(self, key): > > + # same as the above, but will always return the position > > + # done for performance reasons > > + first = 0 > > + last = len(self.positions) - 1 > > + > > + while first <= last: > > + midpoint = (first + last)//2 > > + nextpos = self.positions[midpoint] > > + candidate = self._getkey(nextpos) > > + r = _cmp(key, candidate) > > + if r == 0: > > + return (midpoint, True) > > + else: > > + if r < 0: > > + last = midpoint - 1 > > + else: > > + first = midpoint + 1 > > + return (first, False) > > + > > + def __contains__(self, key): > > + return self.bsearch(key) != -1 > > + > > + def _getflags(self, data, needle, pos): > > + start = pos + 41 > > + end = data.find("\n", start) > > + if end == -1: > > + end = len(data) - 1 > > + if start == end: > > + return '' > > + return self.data[start:end] > > + > > + def __getitem__(self, key): > > + if not isinstance(key, str): > > + raise TypeError("getitem: manifest keys must be a string.") > > + needle = self.bsearch(key) > > + if needle == -1: > > + raise KeyError > > + data, pos = self._get(needle) > > + if pos == -1: > > + return (data[1], data[2]) > > + zeropos = data.find('\x00', pos) > > + assert 0 <= needle <= len(self.positions) > > + assert len(self.extrainfo) == len(self.positions) > > + hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40) > > + flags = self._getflags(data, needle, zeropos) > > + return (hashval, flags) > > + > > + def __delitem__(self, key): > > + needle, found = self.bsearch2(key) > > + if not found: > > + raise KeyError > > + cur = self.positions[needle] > > + self.positions = self.positions[:needle] + self.positions[needle + 1:] > > + self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] > > + if cur >= 0: > > + self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] > > + > > + def __setitem__(self, key, value): > > + if not isinstance(key, str): > > + raise TypeError("setitem: manifest keys must be a string.") > > + if not isinstance(value, tuple) or len(value) != 2: > > + raise TypeError("Manifest values must be a tuple of (node, flags).") > > + hashval = value[0] > > + if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22: > > + raise TypeError("node must be a 20-byte string") > > + flags = value[1] > > + if not isinstance(flags, str) or len(flags) > 1: > > + raise TypeError("flags must a 0 or 1 byte string, got %r", flags) > > + needle, found = self.bsearch2(key) > > + if found: > > + # put the item > > + pos = self.positions[needle] > > + if pos < 0: > > + self.extradata[-pos - 1] = (key, value[0], value[1]) > > + else: > > + # just don't bother > > + self.extradata.append((key, value[0], value[1])) > > + self.positions[needle] = -len(self.extradata) > > + else: > > + # not found, put it in with extra positions > > + self.extradata.append((key, value[0], value[1])) > > + self.positions = (self.positions[:needle] + [-len(self.extradata)] > > + + self.positions[needle:]) > > + self.extrainfo = (self.extrainfo[:needle] + [0] + > > + self.extrainfo[needle:]) > > > > def copy(self): > > - c = _lazymanifest('') > > - c.update(self) > > - return c > > + # XXX call _compact like in C? > > + return _lazymanifest(self.data, self.positions, self.extrainfo, > > + self.extradata) > > + > > + def _compact(self): > > + # hopefully not called TOO often > > + def findnextposition(positions, start, lgt): > > + i = start > > + while i < len(positions): > > + if positions[i] >= 0: > > + return positions[i] > > + i += 1 > > + return lgt > > + > > + if len(self.extradata) == 0: > > + return > > + l = [] > > + last_cut = 0 > > + i = 0 > > + offset = 0 > > + self.extrainfo = [0] * len(self.positions) > > + while i < len(self.positions): > > + if self.positions[i] >= 0: > > + cur = self.positions[i] > > + last_cut = cur > > + while True: > > + self.positions[i] = offset > > + i += 1 > > + if i == len(self.positions) or self.positions[i] < 0: > > + break > > + offset += self.positions[i] - cur > > + cur = self.positions[i] > > + end_cut = findnextposition(self.positions, i, len(self.data)) > > + offset += end_cut - cur > > + l.append(self.data[last_cut:end_cut]) > > + else: > > + while i < len(self.positions) and self.positions[i] < 0: > > + cur = self.positions[i] > > + t = self.extradata[-cur - 1] > > + l.append(self._pack(t)) > > + self.positions[i] = offset > > + if len(t[1]) > 20: > > + self.extrainfo[i] = ord(t[1][21]) > > + offset += len(l[-1]) > > + i += 1 > > + self.data = ''.join(l) > > + self.extradata = [] > > + > > + def _pack(self, d): > > + return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n' > > + > > + def text(self): > > + self._compact() > > + return self.data > > > > def diff(self, m2, clean=False): > > '''Finds changes between the current manifest and m2.''' > > + # XXX think whether efficiency matters here > > diff = {} > > > > - for fn, e1 in self.iteritems(): > > + for fn, e1, flags in self.iterentries(): > > if fn not in m2: > > - diff[fn] = e1, (None, '') > > + diff[fn] = (e1, flags), (None, '') > > else: > > e2 = m2[fn] > > - if e1 != e2: > > - diff[fn] = e1, e2 > > + if (e1, flags) != e2: > > + diff[fn] = (e1, flags), e2 > > elif clean: > > diff[fn] = None > > > > - for fn, e2 in m2.iteritems(): > > + for fn, e2, flags in m2.iterentries(): > > if fn not in self: > > - diff[fn] = (None, ''), e2 > > + diff[fn] = (None, ''), (e2, flags) > > > > return diff > > > > + def iterentries(self): > > + return lazymanifestiterentries(self) > > + > > + def iterkeys(self): > > + return lazymanifestiter(self) > > + > > + def __iter__(self): > > + return lazymanifestiter(self) > > + > > + def __len__(self): > > + return len(self.positions) > > + > > def filtercopy(self, filterfn): > > + # XXX should be optimized > > c = _lazymanifest('') > > for f, n, fl in self.iterentries(): > > if filterfn(f): > > c[f] = n, fl > > return c > > > > - def text(self): > > - """Get the full data of this manifest as a bytestring.""" > > - return _textv1(self.iterentries()) > > - > > try: > > _lazymanifest = parsers.lazymanifest > > except AttributeError: > > diff --git a/mercurial/pure/bdiff.py b/mercurial/pure/bdiff.py > > --- a/mercurial/pure/bdiff.py > > +++ b/mercurial/pure/bdiff.py > > @@ -111,8 +111,8 @@ > > def blocks(sa, sb): > > a = ffi.new("struct bdiff_line**") > > b = ffi.new("struct bdiff_line**") > > - ac = ffi.new("char[]", sa) > > - bc = ffi.new("char[]", sb) > > + ac = ffi.new("char[]", str(sa)) > > + bc = ffi.new("char[]", str(sb)) > > l = ffi.new("struct bdiff_hunk*") > > try: > > an = lib.bdiff_splitlines(ac, len(sa), a) > > @@ -138,8 +138,8 @@ > > def bdiff(sa, sb): > > a = ffi.new("struct bdiff_line**") > > b = ffi.new("struct bdiff_line**") > > - ac = ffi.new("char[]", sa) > > - bc = ffi.new("char[]", sb) > > + ac = ffi.new("char[]", str(sa)) > > + bc = ffi.new("char[]", str(sb)) > > l = ffi.new("struct bdiff_hunk*") > > try: > > an = lib.bdiff_splitlines(ac, len(sa), a) > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
I must say (and you are going to say that those are idle complaints) that asserts and prints are far inferior than a proper debugger which is surprisingly hard to use in the case of mercurial tests On Tue, Sep 20, 2016 at 4:39 PM, Augie Fackler <raf@durin42.com> wrote: > On Sat, Sep 17, 2016 at 08:23:36PM +0200, Maciej Fijalkowski wrote: >> This should fix the issues presented. >> >> There is one problem which is that the hash in test-rebase-detach >> changes. The way I see it is just an RNG phase-shift, but it might be >> a real bug. Some actual input will be appreciated. > > It's not possibly RNG related - there's no RNG that goes into the sha1 > of a node. I've added --debug to the failure in test-rebase-detach and > compared them between cpython and pypy: > > cpython: > @@ -366,11 +366,24 @@ > > > $ hg log --rev tip --debug > - changeset: 8:9472f4b1d736 > + changeset: 8:9472f4b1d7366902d0098aa1401e3f170000cc56 > tag: tip > + phase: draft > + parent: 7:02de42196ebee42ef284b6780a87cdc96e8eaab6 > + parent: -1:0000000000000000000000000000000000000000 > + manifest: 8:8dbdcf066a97239fde2d0dba19fcb1e699fa66d2 > user: test > date: Thu Jan 01 00:00:00 1970 +0000 > - summary: Collapsed revision > + files: F > + files+: E > + extra: branch=default > + extra: rebase_source=9427d4d5af81c393e6b630129712cde8cdad5605 > + description: > + Collapsed revision > + * I > + * Merge > + * J > + > > > pypy: > @@ -366,11 +366,24 @@ > > > $ hg log --rev tip --debug > - changeset: 8:9472f4b1d736 > + changeset: 8:122ceff3b303b13e5ca323742a8ebe589b9f8066 > tag: tip > + phase: draft > + parent: 7:02de42196ebee42ef284b6780a87cdc96e8eaab6 > + parent: -1:0000000000000000000000000000000000000000 > + manifest: 8:09f1dc369d2667dcaeb9cf828b0fcb76bda29f33 > user: test > date: Thu Jan 01 00:00:00 1970 +0000 > - summary: Collapsed revision > + files: F > + files+: E > + extra: branch=default > + extra: rebase_source=9427d4d5af81c393e6b630129712cde8cdad5605 > + description: > + Collapsed revision > + * I > + * Merge > + * J > + > > Which means the manifest is hashing out differently. That suggests > there's a subtle bug in how the manifest is hashing out in the pypy > version. Adding a `hg debugdata -m 8` (which prints raw manifest > data), and this is what I get on cpython: > > $ hg debugdata -m 8 > + A\x0045f17b21388f07b8939b22052e5f3776e5246388 (esc) > + E\x00c3b9643004a8d1c7f39b0025cefab53dc8f7dc12 (esc) > + F\x00293a00dc38ac20f042c67ba8534eedc1b6a7ae15 (esc) > + H\x008500189e74a9e0475e822093bc7db0d631aeb0b4 (esc) > > on pypy: > $ hg debugdata -m 8 > + A\x0045f17b21388f07b8939b22052e5f3776e5246388 (esc) > + F\x0022bfcfd62a21a3287edbd4d656218d0f525ed76a (esc) > + E\x00c3b9643004a8d1c7f39b0025cefab53dc8f7dc12 (esc) > + F\x00293a00dc38ac20f042c67ba8534eedc1b6a7ae15 (esc) > + H\x008500189e74a9e0475e822093bc7db0d631aeb0b4 (esc) > > > So you've got a subtle bug here where the compaction of the > lazymanifest isn't working right - probably around merging manifests > somehow. Does this give you enough to get started? > > (I'd probably start debugging this by littering _lazymanifest._compact > and _lazymanifest.text with asserts until I managed to trip over the > un-sorted lines, and then backtrack from there to figure out how they > managed to survive.) > >> >> >> >> On Sat, Sep 17, 2016 at 8:22 PM, Maciej Fijalkowski <fijall@gmail.com> wrote: >> > # HG changeset patch >> > # User Maciej Fijalkowski <fijall@gmail.com> >> > # Date 1473680234 -7200 >> > # Mon Sep 12 13:37:14 2016 +0200 >> > # Node ID 7551f1e60b2155462d89a9571eec065e9f67debc >> > # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 >> > lazymanifest: write a more efficient, pypy friendly version of lazymanifest >> > >> > diff --git a/mercurial/manifest.py b/mercurial/manifest.py >> > --- a/mercurial/manifest.py >> > +++ b/mercurial/manifest.py >> > @@ -104,69 +104,297 @@ >> > _checkforbidden(files) >> > return ''.join(lines) >> > >> > -class _lazymanifest(dict): >> > - """This is the pure implementation of lazymanifest. >> > - >> > - It has not been optimized *at all* and is not lazy. >> > - """ >> > - >> > - def __init__(self, data): >> > - dict.__init__(self) >> > - for f, n, fl in _parse(data): >> > - self[f] = n, fl >> > - >> > - def __setitem__(self, k, v): >> > - node, flag = v >> > - assert node is not None >> > - if len(node) > 21: >> > - node = node[:21] # match c implementation behavior >> > - dict.__setitem__(self, k, (node, flag)) >> > +class lazymanifestiter(object): >> > + def __init__(self, lm): >> > + self.pos = 0 >> > + self.lm = lm >> > >> > def __iter__(self): >> > - return iter(sorted(dict.keys(self))) >> > + return self >> > >> > - def iterkeys(self): >> > - return iter(sorted(dict.keys(self))) >> > + def next(self): >> > + try: >> > + data, pos = self.lm._get(self.pos) >> > + except IndexError: >> > + raise StopIteration >> > + if pos == -1: >> > + self.pos += 1 >> > + return data[0] >> > + self.pos += 1 >> > + zeropos = data.find('\x00', pos) >> > + return data[pos:zeropos] >> > >> > - def iterentries(self): >> > - return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) >> > +class lazymanifestiterentries(object): >> > + def __init__(self, lm): >> > + self.lm = lm >> > + self.pos = 0 >> > + >> > + def __iter__(self): >> > + return self >> > + >> > + def next(self): >> > + try: >> > + data, pos = self.lm._get(self.pos) >> > + except IndexError: >> > + raise StopIteration >> > + if pos == -1: >> > + self.pos += 1 >> > + return data >> > + zeropos = data.find('\x00', pos) >> > + hashval = unhexlify(data, self.lm.extrainfo[self.pos], >> > + zeropos + 1, 40) >> > + flags = self.lm._getflags(data, self.pos, zeropos) >> > + self.pos += 1 >> > + return (data[pos:zeropos], hashval, flags) >> > + >> > +def unhexlify(data, extra, pos, length): >> > + s = data[pos:pos + length].decode('hex') >> > + if extra: >> > + s += chr(extra & 0xff) >> > + return s >> > + >> > +def _cmp(a, b): >> > + return (a > b) - (a < b) >> > + >> > +class _lazymanifest(object): >> > + def __init__(self, data, positions=None, extrainfo=None, extradata=None): >> > + if positions is None: >> > + self.positions = self.findlines(data) >> > + self.extrainfo = [0] * len(self.positions) >> > + self.data = data >> > + self.extradata = [] >> > + else: >> > + self.positions = positions[:] >> > + self.extrainfo = extrainfo[:] >> > + self.extradata = extradata[:] >> > + self.data = data >> > + >> > + def findlines(self, data): >> > + if not data: >> > + return [] >> > + pos = data.find("\n") >> > + positions = [0] >> > + while pos < len(data) - 1 and pos != -1: >> > + positions.append(pos + 1) >> > + pos = data.find("\n", pos + 1) >> > + return positions >> > + >> > + def _get(self, index): >> > + # get the position encoded in pos: >> > + # positive number is an index in 'data' >> > + # negative number is in extrapieces >> > + pos = self.positions[index] >> > + if pos >= 0: >> > + return self.data, pos >> > + return self.extradata[-pos - 1], -1 >> > + >> > + def _getkey(self, pos): >> > + if pos >= 0: >> > + return self.data[pos:self.data.find('\x00', pos + 1)] >> > + return self.extradata[-pos - 1][0] >> > + >> > + def bsearch(self, key): >> > + first = 0 >> > + last = len(self.positions) - 1 >> > + >> > + while first <= last: >> > + midpoint = (first + last)//2 >> > + nextpos = self.positions[midpoint] >> > + candidate = self._getkey(nextpos) >> > + r = _cmp(key, candidate) >> > + if r == 0: >> > + return midpoint >> > + else: >> > + if r < 0: >> > + last = midpoint - 1 >> > + else: >> > + first = midpoint + 1 >> > + return -1 >> > + >> > + def bsearch2(self, key): >> > + # same as the above, but will always return the position >> > + # done for performance reasons >> > + first = 0 >> > + last = len(self.positions) - 1 >> > + >> > + while first <= last: >> > + midpoint = (first + last)//2 >> > + nextpos = self.positions[midpoint] >> > + candidate = self._getkey(nextpos) >> > + r = _cmp(key, candidate) >> > + if r == 0: >> > + return (midpoint, True) >> > + else: >> > + if r < 0: >> > + last = midpoint - 1 >> > + else: >> > + first = midpoint + 1 >> > + return (first, False) >> > + >> > + def __contains__(self, key): >> > + return self.bsearch(key) != -1 >> > + >> > + def _getflags(self, data, needle, pos): >> > + start = pos + 41 >> > + end = data.find("\n", start) >> > + if end == -1: >> > + end = len(data) - 1 >> > + if start == end: >> > + return '' >> > + return self.data[start:end] >> > + >> > + def __getitem__(self, key): >> > + if not isinstance(key, str): >> > + raise TypeError("getitem: manifest keys must be a string.") >> > + needle = self.bsearch(key) >> > + if needle == -1: >> > + raise KeyError >> > + data, pos = self._get(needle) >> > + if pos == -1: >> > + return (data[1], data[2]) >> > + zeropos = data.find('\x00', pos) >> > + assert 0 <= needle <= len(self.positions) >> > + assert len(self.extrainfo) == len(self.positions) >> > + hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40) >> > + flags = self._getflags(data, needle, zeropos) >> > + return (hashval, flags) >> > + >> > + def __delitem__(self, key): >> > + needle, found = self.bsearch2(key) >> > + if not found: >> > + raise KeyError >> > + cur = self.positions[needle] >> > + self.positions = self.positions[:needle] + self.positions[needle + 1:] >> > + self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] >> > + if cur >= 0: >> > + self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] >> > + >> > + def __setitem__(self, key, value): >> > + if not isinstance(key, str): >> > + raise TypeError("setitem: manifest keys must be a string.") >> > + if not isinstance(value, tuple) or len(value) != 2: >> > + raise TypeError("Manifest values must be a tuple of (node, flags).") >> > + hashval = value[0] >> > + if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22: >> > + raise TypeError("node must be a 20-byte string") >> > + flags = value[1] >> > + if not isinstance(flags, str) or len(flags) > 1: >> > + raise TypeError("flags must a 0 or 1 byte string, got %r", flags) >> > + needle, found = self.bsearch2(key) >> > + if found: >> > + # put the item >> > + pos = self.positions[needle] >> > + if pos < 0: >> > + self.extradata[-pos - 1] = (key, value[0], value[1]) >> > + else: >> > + # just don't bother >> > + self.extradata.append((key, value[0], value[1])) >> > + self.positions[needle] = -len(self.extradata) >> > + else: >> > + # not found, put it in with extra positions >> > + self.extradata.append((key, value[0], value[1])) >> > + self.positions = (self.positions[:needle] + [-len(self.extradata)] >> > + + self.positions[needle:]) >> > + self.extrainfo = (self.extrainfo[:needle] + [0] + >> > + self.extrainfo[needle:]) >> > >> > def copy(self): >> > - c = _lazymanifest('') >> > - c.update(self) >> > - return c >> > + # XXX call _compact like in C? >> > + return _lazymanifest(self.data, self.positions, self.extrainfo, >> > + self.extradata) >> > + >> > + def _compact(self): >> > + # hopefully not called TOO often >> > + def findnextposition(positions, start, lgt): >> > + i = start >> > + while i < len(positions): >> > + if positions[i] >= 0: >> > + return positions[i] >> > + i += 1 >> > + return lgt >> > + >> > + if len(self.extradata) == 0: >> > + return >> > + l = [] >> > + last_cut = 0 >> > + i = 0 >> > + offset = 0 >> > + self.extrainfo = [0] * len(self.positions) >> > + while i < len(self.positions): >> > + if self.positions[i] >= 0: >> > + cur = self.positions[i] >> > + last_cut = cur >> > + while True: >> > + self.positions[i] = offset >> > + i += 1 >> > + if i == len(self.positions) or self.positions[i] < 0: >> > + break >> > + offset += self.positions[i] - cur >> > + cur = self.positions[i] >> > + end_cut = findnextposition(self.positions, i, len(self.data)) >> > + offset += end_cut - cur >> > + l.append(self.data[last_cut:end_cut]) >> > + else: >> > + while i < len(self.positions) and self.positions[i] < 0: >> > + cur = self.positions[i] >> > + t = self.extradata[-cur - 1] >> > + l.append(self._pack(t)) >> > + self.positions[i] = offset >> > + if len(t[1]) > 20: >> > + self.extrainfo[i] = ord(t[1][21]) >> > + offset += len(l[-1]) >> > + i += 1 >> > + self.data = ''.join(l) >> > + self.extradata = [] >> > + >> > + def _pack(self, d): >> > + return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n' >> > + >> > + def text(self): >> > + self._compact() >> > + return self.data >> > >> > def diff(self, m2, clean=False): >> > '''Finds changes between the current manifest and m2.''' >> > + # XXX think whether efficiency matters here >> > diff = {} >> > >> > - for fn, e1 in self.iteritems(): >> > + for fn, e1, flags in self.iterentries(): >> > if fn not in m2: >> > - diff[fn] = e1, (None, '') >> > + diff[fn] = (e1, flags), (None, '') >> > else: >> > e2 = m2[fn] >> > - if e1 != e2: >> > - diff[fn] = e1, e2 >> > + if (e1, flags) != e2: >> > + diff[fn] = (e1, flags), e2 >> > elif clean: >> > diff[fn] = None >> > >> > - for fn, e2 in m2.iteritems(): >> > + for fn, e2, flags in m2.iterentries(): >> > if fn not in self: >> > - diff[fn] = (None, ''), e2 >> > + diff[fn] = (None, ''), (e2, flags) >> > >> > return diff >> > >> > + def iterentries(self): >> > + return lazymanifestiterentries(self) >> > + >> > + def iterkeys(self): >> > + return lazymanifestiter(self) >> > + >> > + def __iter__(self): >> > + return lazymanifestiter(self) >> > + >> > + def __len__(self): >> > + return len(self.positions) >> > + >> > def filtercopy(self, filterfn): >> > + # XXX should be optimized >> > c = _lazymanifest('') >> > for f, n, fl in self.iterentries(): >> > if filterfn(f): >> > c[f] = n, fl >> > return c >> > >> > - def text(self): >> > - """Get the full data of this manifest as a bytestring.""" >> > - return _textv1(self.iterentries()) >> > - >> > try: >> > _lazymanifest = parsers.lazymanifest >> > except AttributeError: >> > diff --git a/mercurial/pure/bdiff.py b/mercurial/pure/bdiff.py >> > --- a/mercurial/pure/bdiff.py >> > +++ b/mercurial/pure/bdiff.py >> > @@ -111,8 +111,8 @@ >> > def blocks(sa, sb): >> > a = ffi.new("struct bdiff_line**") >> > b = ffi.new("struct bdiff_line**") >> > - ac = ffi.new("char[]", sa) >> > - bc = ffi.new("char[]", sb) >> > + ac = ffi.new("char[]", str(sa)) >> > + bc = ffi.new("char[]", str(sb)) >> > l = ffi.new("struct bdiff_hunk*") >> > try: >> > an = lib.bdiff_splitlines(ac, len(sa), a) >> > @@ -138,8 +138,8 @@ >> > def bdiff(sa, sb): >> > a = ffi.new("struct bdiff_line**") >> > b = ffi.new("struct bdiff_line**") >> > - ac = ffi.new("char[]", sa) >> > - bc = ffi.new("char[]", sb) >> > + ac = ffi.new("char[]", str(sa)) >> > + bc = ffi.new("char[]", str(sb)) >> > l = ffi.new("struct bdiff_hunk*") >> > try: >> > an = lib.bdiff_splitlines(ac, len(sa), a) >> _______________________________________________ >> Mercurial-devel mailing list >> Mercurial-devel@mercurial-scm.org >> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
./run-tests.py -l -d test.t If you have a breakpoint set, it should just work (unless server processes detached from stdin are involved). > On Sep 25, 2016, at 00:13, Maciej Fijalkowski <fijall@gmail.com> wrote: > > I must say (and you are going to say that those are idle complaints) > that asserts and prints are far inferior than a proper debugger which > is surprisingly hard to use in the case of mercurial tests > >> On Tue, Sep 20, 2016 at 4:39 PM, Augie Fackler <raf@durin42.com> wrote: >>> On Sat, Sep 17, 2016 at 08:23:36PM +0200, Maciej Fijalkowski wrote: >>> This should fix the issues presented. >>> >>> There is one problem which is that the hash in test-rebase-detach >>> changes. The way I see it is just an RNG phase-shift, but it might be >>> a real bug. Some actual input will be appreciated. >> >> It's not possibly RNG related - there's no RNG that goes into the sha1 >> of a node. I've added --debug to the failure in test-rebase-detach and >> compared them between cpython and pypy: >> >> cpython: >> @@ -366,11 +366,24 @@ >> >> >> $ hg log --rev tip --debug >> - changeset: 8:9472f4b1d736 >> + changeset: 8:9472f4b1d7366902d0098aa1401e3f170000cc56 >> tag: tip >> + phase: draft >> + parent: 7:02de42196ebee42ef284b6780a87cdc96e8eaab6 >> + parent: -1:0000000000000000000000000000000000000000 >> + manifest: 8:8dbdcf066a97239fde2d0dba19fcb1e699fa66d2 >> user: test >> date: Thu Jan 01 00:00:00 1970 +0000 >> - summary: Collapsed revision >> + files: F >> + files+: E >> + extra: branch=default >> + extra: rebase_source=9427d4d5af81c393e6b630129712cde8cdad5605 >> + description: >> + Collapsed revision >> + * I >> + * Merge >> + * J >> + >> >> >> pypy: >> @@ -366,11 +366,24 @@ >> >> >> $ hg log --rev tip --debug >> - changeset: 8:9472f4b1d736 >> + changeset: 8:122ceff3b303b13e5ca323742a8ebe589b9f8066 >> tag: tip >> + phase: draft >> + parent: 7:02de42196ebee42ef284b6780a87cdc96e8eaab6 >> + parent: -1:0000000000000000000000000000000000000000 >> + manifest: 8:09f1dc369d2667dcaeb9cf828b0fcb76bda29f33 >> user: test >> date: Thu Jan 01 00:00:00 1970 +0000 >> - summary: Collapsed revision >> + files: F >> + files+: E >> + extra: branch=default >> + extra: rebase_source=9427d4d5af81c393e6b630129712cde8cdad5605 >> + description: >> + Collapsed revision >> + * I >> + * Merge >> + * J >> + >> >> Which means the manifest is hashing out differently. That suggests >> there's a subtle bug in how the manifest is hashing out in the pypy >> version. Adding a `hg debugdata -m 8` (which prints raw manifest >> data), and this is what I get on cpython: >> >> $ hg debugdata -m 8 >> + A\x0045f17b21388f07b8939b22052e5f3776e5246388 (esc) >> + E\x00c3b9643004a8d1c7f39b0025cefab53dc8f7dc12 (esc) >> + F\x00293a00dc38ac20f042c67ba8534eedc1b6a7ae15 (esc) >> + H\x008500189e74a9e0475e822093bc7db0d631aeb0b4 (esc) >> >> on pypy: >> $ hg debugdata -m 8 >> + A\x0045f17b21388f07b8939b22052e5f3776e5246388 (esc) >> + F\x0022bfcfd62a21a3287edbd4d656218d0f525ed76a (esc) >> + E\x00c3b9643004a8d1c7f39b0025cefab53dc8f7dc12 (esc) >> + F\x00293a00dc38ac20f042c67ba8534eedc1b6a7ae15 (esc) >> + H\x008500189e74a9e0475e822093bc7db0d631aeb0b4 (esc) >> >> >> So you've got a subtle bug here where the compaction of the >> lazymanifest isn't working right - probably around merging manifests >> somehow. Does this give you enough to get started? >> >> (I'd probably start debugging this by littering _lazymanifest._compact >> and _lazymanifest.text with asserts until I managed to trip over the >> un-sorted lines, and then backtrack from there to figure out how they >> managed to survive.) >> >>> >>> >>> >>>> On Sat, Sep 17, 2016 at 8:22 PM, Maciej Fijalkowski <fijall@gmail.com> wrote: >>>> # HG changeset patch >>>> # User Maciej Fijalkowski <fijall@gmail.com> >>>> # Date 1473680234 -7200 >>>> # Mon Sep 12 13:37:14 2016 +0200 >>>> # Node ID 7551f1e60b2155462d89a9571eec065e9f67debc >>>> # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 >>>> lazymanifest: write a more efficient, pypy friendly version of lazymanifest >>>> >>>> diff --git a/mercurial/manifest.py b/mercurial/manifest.py >>>> --- a/mercurial/manifest.py >>>> +++ b/mercurial/manifest.py >>>> @@ -104,69 +104,297 @@ >>>> _checkforbidden(files) >>>> return ''.join(lines) >>>> >>>> -class _lazymanifest(dict): >>>> - """This is the pure implementation of lazymanifest. >>>> - >>>> - It has not been optimized *at all* and is not lazy. >>>> - """ >>>> - >>>> - def __init__(self, data): >>>> - dict.__init__(self) >>>> - for f, n, fl in _parse(data): >>>> - self[f] = n, fl >>>> - >>>> - def __setitem__(self, k, v): >>>> - node, flag = v >>>> - assert node is not None >>>> - if len(node) > 21: >>>> - node = node[:21] # match c implementation behavior >>>> - dict.__setitem__(self, k, (node, flag)) >>>> +class lazymanifestiter(object): >>>> + def __init__(self, lm): >>>> + self.pos = 0 >>>> + self.lm = lm >>>> >>>> def __iter__(self): >>>> - return iter(sorted(dict.keys(self))) >>>> + return self >>>> >>>> - def iterkeys(self): >>>> - return iter(sorted(dict.keys(self))) >>>> + def next(self): >>>> + try: >>>> + data, pos = self.lm._get(self.pos) >>>> + except IndexError: >>>> + raise StopIteration >>>> + if pos == -1: >>>> + self.pos += 1 >>>> + return data[0] >>>> + self.pos += 1 >>>> + zeropos = data.find('\x00', pos) >>>> + return data[pos:zeropos] >>>> >>>> - def iterentries(self): >>>> - return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) >>>> +class lazymanifestiterentries(object): >>>> + def __init__(self, lm): >>>> + self.lm = lm >>>> + self.pos = 0 >>>> + >>>> + def __iter__(self): >>>> + return self >>>> + >>>> + def next(self): >>>> + try: >>>> + data, pos = self.lm._get(self.pos) >>>> + except IndexError: >>>> + raise StopIteration >>>> + if pos == -1: >>>> + self.pos += 1 >>>> + return data >>>> + zeropos = data.find('\x00', pos) >>>> + hashval = unhexlify(data, self.lm.extrainfo[self.pos], >>>> + zeropos + 1, 40) >>>> + flags = self.lm._getflags(data, self.pos, zeropos) >>>> + self.pos += 1 >>>> + return (data[pos:zeropos], hashval, flags) >>>> + >>>> +def unhexlify(data, extra, pos, length): >>>> + s = data[pos:pos + length].decode('hex') >>>> + if extra: >>>> + s += chr(extra & 0xff) >>>> + return s >>>> + >>>> +def _cmp(a, b): >>>> + return (a > b) - (a < b) >>>> + >>>> +class _lazymanifest(object): >>>> + def __init__(self, data, positions=None, extrainfo=None, extradata=None): >>>> + if positions is None: >>>> + self.positions = self.findlines(data) >>>> + self.extrainfo = [0] * len(self.positions) >>>> + self.data = data >>>> + self.extradata = [] >>>> + else: >>>> + self.positions = positions[:] >>>> + self.extrainfo = extrainfo[:] >>>> + self.extradata = extradata[:] >>>> + self.data = data >>>> + >>>> + def findlines(self, data): >>>> + if not data: >>>> + return [] >>>> + pos = data.find("\n") >>>> + positions = [0] >>>> + while pos < len(data) - 1 and pos != -1: >>>> + positions.append(pos + 1) >>>> + pos = data.find("\n", pos + 1) >>>> + return positions >>>> + >>>> + def _get(self, index): >>>> + # get the position encoded in pos: >>>> + # positive number is an index in 'data' >>>> + # negative number is in extrapieces >>>> + pos = self.positions[index] >>>> + if pos >= 0: >>>> + return self.data, pos >>>> + return self.extradata[-pos - 1], -1 >>>> + >>>> + def _getkey(self, pos): >>>> + if pos >= 0: >>>> + return self.data[pos:self.data.find('\x00', pos + 1)] >>>> + return self.extradata[-pos - 1][0] >>>> + >>>> + def bsearch(self, key): >>>> + first = 0 >>>> + last = len(self.positions) - 1 >>>> + >>>> + while first <= last: >>>> + midpoint = (first + last)//2 >>>> + nextpos = self.positions[midpoint] >>>> + candidate = self._getkey(nextpos) >>>> + r = _cmp(key, candidate) >>>> + if r == 0: >>>> + return midpoint >>>> + else: >>>> + if r < 0: >>>> + last = midpoint - 1 >>>> + else: >>>> + first = midpoint + 1 >>>> + return -1 >>>> + >>>> + def bsearch2(self, key): >>>> + # same as the above, but will always return the position >>>> + # done for performance reasons >>>> + first = 0 >>>> + last = len(self.positions) - 1 >>>> + >>>> + while first <= last: >>>> + midpoint = (first + last)//2 >>>> + nextpos = self.positions[midpoint] >>>> + candidate = self._getkey(nextpos) >>>> + r = _cmp(key, candidate) >>>> + if r == 0: >>>> + return (midpoint, True) >>>> + else: >>>> + if r < 0: >>>> + last = midpoint - 1 >>>> + else: >>>> + first = midpoint + 1 >>>> + return (first, False) >>>> + >>>> + def __contains__(self, key): >>>> + return self.bsearch(key) != -1 >>>> + >>>> + def _getflags(self, data, needle, pos): >>>> + start = pos + 41 >>>> + end = data.find("\n", start) >>>> + if end == -1: >>>> + end = len(data) - 1 >>>> + if start == end: >>>> + return '' >>>> + return self.data[start:end] >>>> + >>>> + def __getitem__(self, key): >>>> + if not isinstance(key, str): >>>> + raise TypeError("getitem: manifest keys must be a string.") >>>> + needle = self.bsearch(key) >>>> + if needle == -1: >>>> + raise KeyError >>>> + data, pos = self._get(needle) >>>> + if pos == -1: >>>> + return (data[1], data[2]) >>>> + zeropos = data.find('\x00', pos) >>>> + assert 0 <= needle <= len(self.positions) >>>> + assert len(self.extrainfo) == len(self.positions) >>>> + hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40) >>>> + flags = self._getflags(data, needle, zeropos) >>>> + return (hashval, flags) >>>> + >>>> + def __delitem__(self, key): >>>> + needle, found = self.bsearch2(key) >>>> + if not found: >>>> + raise KeyError >>>> + cur = self.positions[needle] >>>> + self.positions = self.positions[:needle] + self.positions[needle + 1:] >>>> + self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] >>>> + if cur >= 0: >>>> + self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] >>>> + >>>> + def __setitem__(self, key, value): >>>> + if not isinstance(key, str): >>>> + raise TypeError("setitem: manifest keys must be a string.") >>>> + if not isinstance(value, tuple) or len(value) != 2: >>>> + raise TypeError("Manifest values must be a tuple of (node, flags).") >>>> + hashval = value[0] >>>> + if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22: >>>> + raise TypeError("node must be a 20-byte string") >>>> + flags = value[1] >>>> + if not isinstance(flags, str) or len(flags) > 1: >>>> + raise TypeError("flags must a 0 or 1 byte string, got %r", flags) >>>> + needle, found = self.bsearch2(key) >>>> + if found: >>>> + # put the item >>>> + pos = self.positions[needle] >>>> + if pos < 0: >>>> + self.extradata[-pos - 1] = (key, value[0], value[1]) >>>> + else: >>>> + # just don't bother >>>> + self.extradata.append((key, value[0], value[1])) >>>> + self.positions[needle] = -len(self.extradata) >>>> + else: >>>> + # not found, put it in with extra positions >>>> + self.extradata.append((key, value[0], value[1])) >>>> + self.positions = (self.positions[:needle] + [-len(self.extradata)] >>>> + + self.positions[needle:]) >>>> + self.extrainfo = (self.extrainfo[:needle] + [0] + >>>> + self.extrainfo[needle:]) >>>> >>>> def copy(self): >>>> - c = _lazymanifest('') >>>> - c.update(self) >>>> - return c >>>> + # XXX call _compact like in C? >>>> + return _lazymanifest(self.data, self.positions, self.extrainfo, >>>> + self.extradata) >>>> + >>>> + def _compact(self): >>>> + # hopefully not called TOO often >>>> + def findnextposition(positions, start, lgt): >>>> + i = start >>>> + while i < len(positions): >>>> + if positions[i] >= 0: >>>> + return positions[i] >>>> + i += 1 >>>> + return lgt >>>> + >>>> + if len(self.extradata) == 0: >>>> + return >>>> + l = [] >>>> + last_cut = 0 >>>> + i = 0 >>>> + offset = 0 >>>> + self.extrainfo = [0] * len(self.positions) >>>> + while i < len(self.positions): >>>> + if self.positions[i] >= 0: >>>> + cur = self.positions[i] >>>> + last_cut = cur >>>> + while True: >>>> + self.positions[i] = offset >>>> + i += 1 >>>> + if i == len(self.positions) or self.positions[i] < 0: >>>> + break >>>> + offset += self.positions[i] - cur >>>> + cur = self.positions[i] >>>> + end_cut = findnextposition(self.positions, i, len(self.data)) >>>> + offset += end_cut - cur >>>> + l.append(self.data[last_cut:end_cut]) >>>> + else: >>>> + while i < len(self.positions) and self.positions[i] < 0: >>>> + cur = self.positions[i] >>>> + t = self.extradata[-cur - 1] >>>> + l.append(self._pack(t)) >>>> + self.positions[i] = offset >>>> + if len(t[1]) > 20: >>>> + self.extrainfo[i] = ord(t[1][21]) >>>> + offset += len(l[-1]) >>>> + i += 1 >>>> + self.data = ''.join(l) >>>> + self.extradata = [] >>>> + >>>> + def _pack(self, d): >>>> + return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n' >>>> + >>>> + def text(self): >>>> + self._compact() >>>> + return self.data >>>> >>>> def diff(self, m2, clean=False): >>>> '''Finds changes between the current manifest and m2.''' >>>> + # XXX think whether efficiency matters here >>>> diff = {} >>>> >>>> - for fn, e1 in self.iteritems(): >>>> + for fn, e1, flags in self.iterentries(): >>>> if fn not in m2: >>>> - diff[fn] = e1, (None, '') >>>> + diff[fn] = (e1, flags), (None, '') >>>> else: >>>> e2 = m2[fn] >>>> - if e1 != e2: >>>> - diff[fn] = e1, e2 >>>> + if (e1, flags) != e2: >>>> + diff[fn] = (e1, flags), e2 >>>> elif clean: >>>> diff[fn] = None >>>> >>>> - for fn, e2 in m2.iteritems(): >>>> + for fn, e2, flags in m2.iterentries(): >>>> if fn not in self: >>>> - diff[fn] = (None, ''), e2 >>>> + diff[fn] = (None, ''), (e2, flags) >>>> >>>> return diff >>>> >>>> + def iterentries(self): >>>> + return lazymanifestiterentries(self) >>>> + >>>> + def iterkeys(self): >>>> + return lazymanifestiter(self) >>>> + >>>> + def __iter__(self): >>>> + return lazymanifestiter(self) >>>> + >>>> + def __len__(self): >>>> + return len(self.positions) >>>> + >>>> def filtercopy(self, filterfn): >>>> + # XXX should be optimized >>>> c = _lazymanifest('') >>>> for f, n, fl in self.iterentries(): >>>> if filterfn(f): >>>> c[f] = n, fl >>>> return c >>>> >>>> - def text(self): >>>> - """Get the full data of this manifest as a bytestring.""" >>>> - return _textv1(self.iterentries()) >>>> - >>>> try: >>>> _lazymanifest = parsers.lazymanifest >>>> except AttributeError: >>>> diff --git a/mercurial/pure/bdiff.py b/mercurial/pure/bdiff.py >>>> --- a/mercurial/pure/bdiff.py >>>> +++ b/mercurial/pure/bdiff.py >>>> @@ -111,8 +111,8 @@ >>>> def blocks(sa, sb): >>>> a = ffi.new("struct bdiff_line**") >>>> b = ffi.new("struct bdiff_line**") >>>> - ac = ffi.new("char[]", sa) >>>> - bc = ffi.new("char[]", sb) >>>> + ac = ffi.new("char[]", str(sa)) >>>> + bc = ffi.new("char[]", str(sb)) >>>> l = ffi.new("struct bdiff_hunk*") >>>> try: >>>> an = lib.bdiff_splitlines(ac, len(sa), a) >>>> @@ -138,8 +138,8 @@ >>>> def bdiff(sa, sb): >>>> a = ffi.new("struct bdiff_line**") >>>> b = ffi.new("struct bdiff_line**") >>>> - ac = ffi.new("char[]", sa) >>>> - bc = ffi.new("char[]", sb) >>>> + ac = ffi.new("char[]", str(sa)) >>>> + bc = ffi.new("char[]", str(sb)) >>>> l = ffi.new("struct bdiff_hunk*") >>>> try: >>>> an = lib.bdiff_splitlines(ac, len(sa), a) >>> _______________________________________________ >>> Mercurial-devel mailing list >>> Mercurial-devel@mercurial-scm.org >>> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Excerpts from Maciej Fijalkowski's message of 2016-09-25 09:13:29 +0200: > a proper debugger which is surprisingly hard to use in the case of > mercurial tests If you mean ipdb cannot be used together with run-tests.py, it could be solved by changing its I/O to /dev/tty, like: from IPython.core.debugger import Pdb originit = Pdb.__init__ def pdbinit(*args, **kwargs): fin = open('/dev/tty', 'r') fout = open('/dev/tty', 'w') originit(*args, stdin=fin, stdout=fout, **kwargs) Pdb.__init__ = pdbinit import ipdb; ipdb.set_trace()
cool, thanks guys! I've sent a new version that should pass all the tests. On Sun, Sep 25, 2016 at 1:03 PM, Jun Wu <quark@fb.com> wrote: > Excerpts from Maciej Fijalkowski's message of 2016-09-25 09:13:29 +0200: >> a proper debugger which is surprisingly hard to use in the case of >> mercurial tests > > If you mean ipdb cannot be used together with run-tests.py, it could be > solved by changing its I/O to /dev/tty, like: > > from IPython.core.debugger import Pdb > originit = Pdb.__init__ > def pdbinit(*args, **kwargs): > fin = open('/dev/tty', 'r') > fout = open('/dev/tty', 'w') > originit(*args, stdin=fin, stdout=fout, **kwargs) > Pdb.__init__ = pdbinit > import ipdb; ipdb.set_trace()
Patch
diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -104,69 +104,297 @@ _checkforbidden(files) return ''.join(lines) -class _lazymanifest(dict): - """This is the pure implementation of lazymanifest. - - It has not been optimized *at all* and is not lazy. - """ - - def __init__(self, data): - dict.__init__(self) - for f, n, fl in _parse(data): - self[f] = n, fl - - def __setitem__(self, k, v): - node, flag = v - assert node is not None - if len(node) > 21: - node = node[:21] # match c implementation behavior - dict.__setitem__(self, k, (node, flag)) +class lazymanifestiter(object): + def __init__(self, lm): + self.pos = 0 + self.lm = lm def __iter__(self): - return iter(sorted(dict.keys(self))) + return self - def iterkeys(self): - return iter(sorted(dict.keys(self))) + def next(self): + try: + data, pos = self.lm._get(self.pos) + except IndexError: + raise StopIteration + if pos == -1: + self.pos += 1 + return data[0] + self.pos += 1 + zeropos = data.find('\x00', pos) + return data[pos:zeropos] - def iterentries(self): - return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) +class lazymanifestiterentries(object): + def __init__(self, lm): + self.lm = lm + self.pos = 0 + + def __iter__(self): + return self + + def next(self): + try: + data, pos = self.lm._get(self.pos) + except IndexError: + raise StopIteration + if pos == -1: + self.pos += 1 + return data + zeropos = data.find('\x00', pos) + hashval = unhexlify(data, self.lm.extrainfo[self.pos], + zeropos + 1, 40) + flags = self.lm._getflags(data, self.pos, zeropos) + self.pos += 1 + return (data[pos:zeropos], hashval, flags) + +def unhexlify(data, extra, pos, length): + s = data[pos:pos + length].decode('hex') + if extra: + s += chr(extra & 0xff) + return s + +def _cmp(a, b): + return (a > b) - (a < b) + +class _lazymanifest(object): + def __init__(self, data, positions=None, extrainfo=None, extradata=None): + if positions is None: + self.positions = self.findlines(data) + self.extrainfo = [0] * len(self.positions) + self.data = data + self.extradata = [] + else: + self.positions = positions[:] + self.extrainfo = extrainfo[:] + self.extradata = extradata[:] + self.data = data + + def findlines(self, data): + if not data: + return [] + pos = data.find("\n") + positions = [0] + while pos < len(data) - 1 and pos != -1: + positions.append(pos + 1) + pos = data.find("\n", pos + 1) + return positions + + def _get(self, index): + # get the position encoded in pos: + # positive number is an index in 'data' + # negative number is in extrapieces + pos = self.positions[index] + if pos >= 0: + return self.data, pos + return self.extradata[-pos - 1], -1 + + def _getkey(self, pos): + if pos >= 0: + return self.data[pos:self.data.find('\x00', pos + 1)] + return self.extradata[-pos - 1][0] + + def bsearch(self, key): + first = 0 + last = len(self.positions) - 1 + + while first <= last: + midpoint = (first + last)//2 + nextpos = self.positions[midpoint] + candidate = self._getkey(nextpos) + r = _cmp(key, candidate) + if r == 0: + return midpoint + else: + if r < 0: + last = midpoint - 1 + else: + first = midpoint + 1 + return -1 + + def bsearch2(self, key): + # same as the above, but will always return the position + # done for performance reasons + first = 0 + last = len(self.positions) - 1 + + while first <= last: + midpoint = (first + last)//2 + nextpos = self.positions[midpoint] + candidate = self._getkey(nextpos) + r = _cmp(key, candidate) + if r == 0: + return (midpoint, True) + else: + if r < 0: + last = midpoint - 1 + else: + first = midpoint + 1 + return (first, False) + + def __contains__(self, key): + return self.bsearch(key) != -1 + + def _getflags(self, data, needle, pos): + start = pos + 41 + end = data.find("\n", start) + if end == -1: + end = len(data) - 1 + if start == end: + return '' + return self.data[start:end] + + def __getitem__(self, key): + if not isinstance(key, str): + raise TypeError("getitem: manifest keys must be a string.") + needle = self.bsearch(key) + if needle == -1: + raise KeyError + data, pos = self._get(needle) + if pos == -1: + return (data[1], data[2]) + zeropos = data.find('\x00', pos) + assert 0 <= needle <= len(self.positions) + assert len(self.extrainfo) == len(self.positions) + hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40) + flags = self._getflags(data, needle, zeropos) + return (hashval, flags) + + def __delitem__(self, key): + needle, found = self.bsearch2(key) + if not found: + raise KeyError + cur = self.positions[needle] + self.positions = self.positions[:needle] + self.positions[needle + 1:] + self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] + if cur >= 0: + self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] + + def __setitem__(self, key, value): + if not isinstance(key, str): + raise TypeError("setitem: manifest keys must be a string.") + if not isinstance(value, tuple) or len(value) != 2: + raise TypeError("Manifest values must be a tuple of (node, flags).") + hashval = value[0] + if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22: + raise TypeError("node must be a 20-byte string") + flags = value[1] + if not isinstance(flags, str) or len(flags) > 1: + raise TypeError("flags must a 0 or 1 byte string, got %r", flags) + needle, found = self.bsearch2(key) + if found: + # put the item + pos = self.positions[needle] + if pos < 0: + self.extradata[-pos - 1] = (key, value[0], value[1]) + else: + # just don't bother + self.extradata.append((key, value[0], value[1])) + self.positions[needle] = -len(self.extradata) + else: + # not found, put it in with extra positions + self.extradata.append((key, value[0], value[1])) + self.positions = (self.positions[:needle] + [-len(self.extradata)] + + self.positions[needle:]) + self.extrainfo = (self.extrainfo[:needle] + [0] + + self.extrainfo[needle:]) def copy(self): - c = _lazymanifest('') - c.update(self) - return c + # XXX call _compact like in C? + return _lazymanifest(self.data, self.positions, self.extrainfo, + self.extradata) + + def _compact(self): + # hopefully not called TOO often + def findnextposition(positions, start, lgt): + i = start + while i < len(positions): + if positions[i] >= 0: + return positions[i] + i += 1 + return lgt + + if len(self.extradata) == 0: + return + l = [] + last_cut = 0 + i = 0 + offset = 0 + self.extrainfo = [0] * len(self.positions) + while i < len(self.positions): + if self.positions[i] >= 0: + cur = self.positions[i] + last_cut = cur + while True: + self.positions[i] = offset + i += 1 + if i == len(self.positions) or self.positions[i] < 0: + break + offset += self.positions[i] - cur + cur = self.positions[i] + end_cut = findnextposition(self.positions, i, len(self.data)) + offset += end_cut - cur + l.append(self.data[last_cut:end_cut]) + else: + while i < len(self.positions) and self.positions[i] < 0: + cur = self.positions[i] + t = self.extradata[-cur - 1] + l.append(self._pack(t)) + self.positions[i] = offset + if len(t[1]) > 20: + self.extrainfo[i] = ord(t[1][21]) + offset += len(l[-1]) + i += 1 + self.data = ''.join(l) + self.extradata = [] + + def _pack(self, d): + return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n' + + def text(self): + self._compact() + return self.data def diff(self, m2, clean=False): '''Finds changes between the current manifest and m2.''' + # XXX think whether efficiency matters here diff = {} - for fn, e1 in self.iteritems(): + for fn, e1, flags in self.iterentries(): if fn not in m2: - diff[fn] = e1, (None, '') + diff[fn] = (e1, flags), (None, '') else: e2 = m2[fn] - if e1 != e2: - diff[fn] = e1, e2 + if (e1, flags) != e2: + diff[fn] = (e1, flags), e2 elif clean: diff[fn] = None - for fn, e2 in m2.iteritems(): + for fn, e2, flags in m2.iterentries(): if fn not in self: - diff[fn] = (None, ''), e2 + diff[fn] = (None, ''), (e2, flags) return diff + def iterentries(self): + return lazymanifestiterentries(self) + + def iterkeys(self): + return lazymanifestiter(self) + + def __iter__(self): + return lazymanifestiter(self) + + def __len__(self): + return len(self.positions) + def filtercopy(self, filterfn): + # XXX should be optimized c = _lazymanifest('') for f, n, fl in self.iterentries(): if filterfn(f): c[f] = n, fl return c - def text(self): - """Get the full data of this manifest as a bytestring.""" - return _textv1(self.iterentries()) - try: _lazymanifest = parsers.lazymanifest except AttributeError: diff --git a/mercurial/pure/bdiff.py b/mercurial/pure/bdiff.py --- a/mercurial/pure/bdiff.py +++ b/mercurial/pure/bdiff.py @@ -111,8 +111,8 @@ def blocks(sa, sb): a = ffi.new("struct bdiff_line**") b = ffi.new("struct bdiff_line**") - ac = ffi.new("char[]", sa) - bc = ffi.new("char[]", sb) + ac = ffi.new("char[]", str(sa)) + bc = ffi.new("char[]", str(sb)) l = ffi.new("struct bdiff_hunk*") try: an = lib.bdiff_splitlines(ac, len(sa), a) @@ -138,8 +138,8 @@ def bdiff(sa, sb): a = ffi.new("struct bdiff_line**") b = ffi.new("struct bdiff_line**") - ac = ffi.new("char[]", sa) - bc = ffi.new("char[]", sb) + ac = ffi.new("char[]", str(sa)) + bc = ffi.new("char[]", str(sb)) l = ffi.new("struct bdiff_hunk*") try: an = lib.bdiff_splitlines(ac, len(sa), a)