Submitter | Maciej Fijalkowski |
---|---|
Date | Sept. 12, 2016, 12:36 p.m. |
Message ID | <a43b7edeb6d66afc5c4e.1473683779@brick.arcode.com> |
Download | mbox | patch |
Permalink | /patch/16590/ |
State | Accepted |
Headers | show |
Comments
On Mon, Sep 12, 2016 at 02:36:19PM +0200, Maciej Fijalkowski wrote: > # HG changeset patch > # User Maciej Fijalkowski <fijall@gmail.com> > # Date 1473680234 -7200 > # Mon Sep 12 13:37:14 2016 +0200 > # Node ID a43b7edeb6d66afc5c4eab9e4c8bb35b94cbfa97 > # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 > lazymanifest: write a more efficient, pypy friendly version of lazymanifest Queued this. Looks like a clear improvement over what was there before, even with the comments that have questions. > > diff --git a/mercurial/manifest.py b/mercurial/manifest.py > --- a/mercurial/manifest.py > +++ b/mercurial/manifest.py > @@ -104,69 +104,298 @@ > _checkforbidden(files) > return ''.join(lines) > > -class _lazymanifest(dict): > - """This is the pure implementation of lazymanifest. > - > - It has not been optimized *at all* and is not lazy. > - """ > - > - def __init__(self, data): > - dict.__init__(self) > - for f, n, fl in _parse(data): > - self[f] = n, fl > - > - def __setitem__(self, k, v): > - node, flag = v > - assert node is not None > - if len(node) > 21: > - node = node[:21] # match c implementation behavior > - dict.__setitem__(self, k, (node, flag)) > +class LazyManifestIter(object): > + def __init__(self, lm): > + self.pos = 0 > + self.lm = lm > > def __iter__(self): > - return iter(sorted(dict.keys(self))) > + return self > > - def iterkeys(self): > - return iter(sorted(dict.keys(self))) > + def next(self): > + try: > + data, pos = self.lm._get(self.pos) > + except IndexError: > + raise StopIteration > + if pos == -1: > + self.pos += 1 > + return data[0] > + self.pos += 1 > + zeropos = data.find('\x00', pos) > + return data[pos:zeropos] > > - def iterentries(self): > - return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) > +class LazyManifestIterEntries(object): > + def __init__(self, lm): > + self.lm = lm > + self.pos = 0 > + > + def __iter__(self): > + return self > + > + def next(self): > + try: > + data, pos = self.lm._get(self.pos) > + except IndexError: > + raise StopIteration > + if pos == -1: > + self.pos += 1 > + return data > + zeropos = data.find('\x00', pos) > + hashval = unhexlify(data, self.lm.extrainfo[self.pos], > + zeropos + 1, 40) > + flags = self.lm._getflags(data, self.pos, zeropos) > + self.pos += 1 > + return (data[pos:zeropos], hashval, flags) > + > +def unhexlify(data, extra, pos, length): > + s = data[pos:pos + length].decode('hex') > + if extra: > + s += chr(extra & 0xff) > + return s > + > +def _cmp(a, b): > + return (a > b) - (a < b) > + > +class _lazymanifest(object): > + def __init__(self, data, positions=None, extrainfo=None, extradata=None): > + if positions is None: > + self.positions = self.findlines(data) > + self.extrainfo = [0] * len(self.positions) > + self.data = data > + self.extradata = [] > + else: > + self.positions = positions[:] > + self.extrainfo = extrainfo[:] > + self.extradata = extradata[:] > + self.data = data > + > + def findlines(self, data): > + if not data: > + return [] > + pos = data.find("\n") > + positions = [0] > + while pos < len(data) - 1 and pos != -1: > + positions.append(pos + 1) > + pos = data.find("\n", pos + 1) > + return positions > + > + def _get(self, index): > + # get the position encoded in pos: > + # positive number is an index in 'data' > + # negative number is in extrapieces > + pos = self.positions[index] > + if pos >= 0: > + return self.data, pos > + return self.extradata[-pos - 1], -1 > + > + def _getkey(self, pos): > + if pos >= 0: > + return self.data[pos:self.data.find('\x00', pos + 1)] > + return self.extradata[-pos - 1][0] > + > + def bsearch(self, key): > + first = 0 > + last = len(self.positions) - 1 > + > + while first <= last: > + midpoint = (first + last)//2 > + nextpos = self.positions[midpoint] > + candidate = self._getkey(nextpos) > + r = _cmp(key, candidate) > + if r == 0: > + return midpoint > + else: > + if r < 0: > + last = midpoint - 1 > + else: > + first = midpoint + 1 > + return -1 > + > + def bsearch2(self, key): > + # same as the above, but will always return the position > + # done for performance reasons > + first = 0 > + last = len(self.positions) - 1 > + > + while first <= last: > + midpoint = (first + last)//2 > + nextpos = self.positions[midpoint] > + candidate = self._getkey(nextpos) > + r = _cmp(key, candidate) > + if r == 0: > + return (midpoint, True) > + else: > + if r < 0: > + last = midpoint - 1 > + else: > + first = midpoint + 1 > + return (first, False) > + > + def __contains__(self, key): > + return self.bsearch(key) != -1 > + > + def _getflags(self, data, needle, pos): > + start = pos + 41 > + if needle < 0 or needle == len(self.positions) - 1: > + end = len(data) - 1 > + else: > + end = data.find("\n", start) > + if start == end: > + return '' > + return self.data[start:end] > + > + def __getitem__(self, key): > + if not isinstance(key, str): > + raise TypeError("setitem: manifest keys must be a string.") > + needle = self.bsearch(key) > + if needle == -1: > + raise KeyError > + data, pos = self._get(needle) > + if pos == -1: > + return (data[1], data[2]) > + zeropos = data.find('\x00', pos) > + assert 0 <= needle <= len(self.positions) > + assert len(self.extrainfo) == len(self.positions) > + hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40) > + flags = self._getflags(data, needle, zeropos) > + return (hashval, flags) > + > + def __delitem__(self, key): > + needle, found = self.bsearch2(key) > + if not found: > + raise KeyError > + cur = self.positions[needle] > + self.positions = self.positions[:needle] + self.positions[needle + 1:] > + self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] > + if cur >= 0: > + self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] > + > + def __setitem__(self, key, value): > + if not isinstance(key, str): > + raise TypeError("setitem: manifest keys must be a string.") > + if not isinstance(value, tuple) or len(value) != 2: > + raise TypeError("Manifest values must be a tuple of (node, flags).") > + hashval = value[0] > + if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22: > + raise TypeError("node must be a 20-byte string") > + flags = value[1] > + if not isinstance(flags, str) or len(flags) > 1: > + raise TypeError("flags must a 0 or 1 byte string, got %r", flags) > + needle, found = self.bsearch2(key) > + if found: > + # put the item > + pos = self.positions[needle] > + if pos < 0: > + self.extradata[-pos - 1] = (key, value[0], value[1]) > + else: > + # just don't bother > + self.extradata.append((key, value[0], value[1])) > + self.positions[needle] = -len(self.extradata) > + else: > + # not found, put it in with extra positions > + self.extradata.append((key, value[0], value[1])) > + self.positions = (self.positions[:needle] + [-len(self.extradata)] > + + self.positions[needle:]) > + self.extrainfo = (self.extrainfo[:needle] + [0] + > + self.extrainfo[needle:]) > > def copy(self): > - c = _lazymanifest('') > - c.update(self) > - return c > + # XXX call _compact like in C? > + return _lazymanifest(self.data, self.positions, self.extrainfo, > + self.extradata) > + > + def _compact(self): > + # hopefully not called TOO often > + def findnextposition(positions, start, lgt): > + i = start > + while i < len(positions): > + if positions[i] >= 0: > + return positions[i] > + i += 1 > + return lgt > + > + if len(self.extradata) == 0: > + return > + l = [] > + last_cut = 0 > + i = 0 > + offset = 0 > + self.extrainfo = [0] * len(self.positions) > + while i < len(self.positions): > + if self.positions[i] >= 0: > + cur = self.positions[i] > + last_cut = cur > + while True: > + self.positions[i] = offset > + i += 1 > + if i == len(self.positions) or self.positions[i] < 0: > + break > + offset += self.positions[i] - cur > + cur = self.positions[i] > + end_cut = findnextposition(self.positions, i, len(self.data)) > + offset += end_cut - cur > + l.append(self.data[last_cut:end_cut]) > + else: > + while i < len(self.positions) and self.positions[i] < 0: > + cur = self.positions[i] > + t = self.extradata[-cur - 1] > + l.append(self._pack(t)) > + self.positions[i] = offset > + if len(t[1]) > 20: > + self.extrainfo[i] = ord(t[1][21]) > + offset += len(l[-1]) > + i += 1 > + self.data = ''.join(l) > + self.extradata = [] > + > + def _pack(self, d): > + return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n' > + > + def text(self): > + self._compact() > + return self.data > > def diff(self, m2, clean=False): > '''Finds changes between the current manifest and m2.''' > + # XXX think whether efficiency matters here > diff = {} > > - for fn, e1 in self.iteritems(): > + for fn, e1, flags in self.iterentries(): > if fn not in m2: > - diff[fn] = e1, (None, '') > + diff[fn] = (e1, flags), (None, '') > else: > e2 = m2[fn] > - if e1 != e2: > - diff[fn] = e1, e2 > + if (e1, flags) != e2: > + diff[fn] = (e1, flags), e2 > elif clean: > diff[fn] = None > > - for fn, e2 in m2.iteritems(): > + for fn, e2, flags in m2.iterentries(): > if fn not in self: > - diff[fn] = (None, ''), e2 > + diff[fn] = (None, ''), (e2, flags) > > return diff > > + def iterentries(self): > + return LazyManifestIterEntries(self) > + > + def iterkeys(self): > + return LazyManifestIter(self) > + > + def __iter__(self): > + return LazyManifestIter(self) > + > + def __len__(self): > + return len(self.positions) > + > def filtercopy(self, filterfn): > + # XXX can be optimized > c = _lazymanifest('') > for f, n, fl in self.iterentries(): > if filterfn(f): > c[f] = n, fl > return c > > - def text(self): > - """Get the full data of this manifest as a bytestring.""" > - return _textv1(self.iterentries()) > - > try: > _lazymanifest = parsers.lazymanifest > except AttributeError: > diff --git a/mercurial/pure/bdiff.py b/mercurial/pure/bdiff.py > --- a/mercurial/pure/bdiff.py > +++ b/mercurial/pure/bdiff.py > @@ -111,8 +111,8 @@ > def blocks(sa, sb): > a = ffi.new("struct bdiff_line**") > b = ffi.new("struct bdiff_line**") > - ac = ffi.new("char[]", sa) > - bc = ffi.new("char[]", sb) > + ac = ffi.new("char[]", str(sa)) > + bc = ffi.new("char[]", str(sb)) > l = ffi.new("struct bdiff_hunk*") > try: > an = lib.bdiff_splitlines(ac, len(sa), a) > @@ -138,8 +138,8 @@ > def bdiff(sa, sb): > a = ffi.new("struct bdiff_line**") > b = ffi.new("struct bdiff_line**") > - ac = ffi.new("char[]", sa) > - bc = ffi.new("char[]", sb) > + ac = ffi.new("char[]", str(sa)) > + bc = ffi.new("char[]", str(sb)) > l = ffi.new("struct bdiff_hunk*") > try: > an = lib.bdiff_splitlines(ac, len(sa), a) > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
On 09/12/2016 03:13 PM, Augie Fackler wrote: > On Mon, Sep 12, 2016 at 02:36:19PM +0200, Maciej Fijalkowski wrote: >> # HG changeset patch >> # User Maciej Fijalkowski <fijall@gmail.com> >> # Date 1473680234 -7200 >> # Mon Sep 12 13:37:14 2016 +0200 >> # Node ID a43b7edeb6d66afc5c4eab9e4c8bb35b94cbfa97 >> # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 >> lazymanifest: write a more efficient, pypy friendly version of lazymanifest > > Queued this. Looks like a clear improvement over what was there > before, even with the comments that have questions. The lack of documentation makes it a bit hard to review but it seems overall correct, The LazyManifestIter and LazyManifestIterEntries class name are wrong as we don't use CamelCase (so lazymanifestiter and lazymanifestiterentries). There is also a small error in the __getitem__ error message (says setitem) However, tests do not pass with this patch so I've dropped it. Failed test-remove.t: output changed Failed test-subrepo-deep-nested-change.t: output changed Failed test-backout.t: output changed Failed test-rebase-detach.t: output changed Failed test-transplant.t: output changed Failed test-manifest.py: output changed and returned error code 1 Failed test-update-issue1456.t: output changed
On 09/13/2016 12:47 PM, Pierre-Yves David wrote: > > > On 09/12/2016 03:13 PM, Augie Fackler wrote: >> On Mon, Sep 12, 2016 at 02:36:19PM +0200, Maciej Fijalkowski wrote: >>> # HG changeset patch >>> # User Maciej Fijalkowski <fijall@gmail.com> >>> # Date 1473680234 -7200 >>> # Mon Sep 12 13:37:14 2016 +0200 >>> # Node ID a43b7edeb6d66afc5c4eab9e4c8bb35b94cbfa97 >>> # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 >>> lazymanifest: write a more efficient, pypy friendly version of >>> lazymanifest >> >> Queued this. Looks like a clear improvement over what was there >> before, even with the comments that have questions. > > The lack of documentation makes it a bit hard to review but it seems > overall correct, The LazyManifestIter and LazyManifestIterEntries class > name are wrong as we don't use CamelCase (so lazymanifestiter and > lazymanifestiterentries). There is also a small error in the __getitem__ > error message (says setitem) > > However, tests do not pass with this patch so I've dropped it. fijal, you can `hg pull https://www.mercurial-scm.org/repo/users/marmoute/mercurial-dropped/ -r bbc5c39158c8` get the version with the style and message version highlighted above.
On 09/13/2016 12:52 PM, Pierre-Yves David wrote: > > > On 09/13/2016 12:47 PM, Pierre-Yves David wrote: >> >> >> On 09/12/2016 03:13 PM, Augie Fackler wrote: >>> On Mon, Sep 12, 2016 at 02:36:19PM +0200, Maciej Fijalkowski wrote: >>>> # HG changeset patch >>>> # User Maciej Fijalkowski <fijall@gmail.com> >>>> # Date 1473680234 -7200 >>>> # Mon Sep 12 13:37:14 2016 +0200 >>>> # Node ID a43b7edeb6d66afc5c4eab9e4c8bb35b94cbfa97 >>>> # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 >>>> lazymanifest: write a more efficient, pypy friendly version of >>>> lazymanifest >>> >>> Queued this. Looks like a clear improvement over what was there >>> before, even with the comments that have questions. >> >> The lack of documentation makes it a bit hard to review but it seems >> overall correct, The LazyManifestIter and LazyManifestIterEntries class >> name are wrong as we don't use CamelCase (so lazymanifestiter and >> lazymanifestiterentries). There is also a small error in the __getitem__ >> error message (says setitem) >> >> However, tests do not pass with this patch so I've dropped it. > > fijal, you can `hg pull > https://www.mercurial-scm.org/repo/users/marmoute/mercurial-dropped/ -r > bbc5c39158c8` get the version with the style and message version > highlighted above. Gah, make this -r bbc5c39158c8 because I cannot copy paste properly :-/
Hi When I'm trying to do that, I get that: https://paste.pound-python.org/show/HZsFNvYPt8EAH2ugETpx/ and then it hangs (for both system hg and local hg) On Tue, Sep 13, 2016 at 12:59 PM, Pierre-Yves David <pierre-yves.david@ens-lyon.org> wrote: > > > On 09/13/2016 12:52 PM, Pierre-Yves David wrote: >> >> >> >> On 09/13/2016 12:47 PM, Pierre-Yves David wrote: >>> >>> >>> >>> On 09/12/2016 03:13 PM, Augie Fackler wrote: >>>> >>>> On Mon, Sep 12, 2016 at 02:36:19PM +0200, Maciej Fijalkowski wrote: >>>>> >>>>> # HG changeset patch >>>>> # User Maciej Fijalkowski <fijall@gmail.com> >>>>> # Date 1473680234 -7200 >>>>> # Mon Sep 12 13:37:14 2016 +0200 >>>>> # Node ID a43b7edeb6d66afc5c4eab9e4c8bb35b94cbfa97 >>>>> # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 >>>>> lazymanifest: write a more efficient, pypy friendly version of >>>>> lazymanifest >>>> >>>> >>>> Queued this. Looks like a clear improvement over what was there >>>> before, even with the comments that have questions. >>> >>> >>> The lack of documentation makes it a bit hard to review but it seems >>> overall correct, The LazyManifestIter and LazyManifestIterEntries class >>> name are wrong as we don't use CamelCase (so lazymanifestiter and >>> lazymanifestiterentries). There is also a small error in the __getitem__ >>> error message (says setitem) >>> >>> However, tests do not pass with this patch so I've dropped it. >> >> >> fijal, you can `hg pull >> https://www.mercurial-scm.org/repo/users/marmoute/mercurial-dropped/ -r >> bbc5c39158c8` get the version with the style and message version >> highlighted above. > > > Gah, make this -r bbc5c39158c8 because I cannot copy paste properly :-/ > > -- > Pierre-Yves David
On 09/17/2016 11:17 AM, Maciej Fijalkowski wrote: > Hi > > When I'm trying to do that, I get that: > > https://paste.pound-python.org/show/HZsFNvYPt8EAH2ugETpx/ > > and then it hangs (for both system hg and local hg) Can you add --debug to get a bit more data?
https://paste.pound-python.org/show/JB0RSPFycVzWS8X6C7hc/ On Sat, Sep 17, 2016 at 12:25 PM, Pierre-Yves David <pierre-yves.david@ens-lyon.org> wrote: > > > On 09/17/2016 11:17 AM, Maciej Fijalkowski wrote: >> >> Hi >> >> When I'm trying to do that, I get that: >> >> https://paste.pound-python.org/show/HZsFNvYPt8EAH2ugETpx/ >> >> and then it hangs (for both system hg and local hg) > > > Can you add --debug to get a bit more data? > > -- > Pierre-Yves David
ok, I think at the end it is that: received listkey for "obsolete": 16746270 bytes and it takes a while, but there is no progress nothing which seems like it's hanging On Sat, Sep 17, 2016 at 1:44 PM, Maciej Fijalkowski <fijall@gmail.com> wrote: > https://paste.pound-python.org/show/JB0RSPFycVzWS8X6C7hc/ > > On Sat, Sep 17, 2016 at 12:25 PM, Pierre-Yves David > <pierre-yves.david@ens-lyon.org> wrote: >> >> >> On 09/17/2016 11:17 AM, Maciej Fijalkowski wrote: >>> >>> Hi >>> >>> When I'm trying to do that, I get that: >>> >>> https://paste.pound-python.org/show/HZsFNvYPt8EAH2ugETpx/ >>> >>> and then it hangs (for both system hg and local hg) >> >> >> Can you add --debug to get a bit more data? >> >> -- >> Pierre-Yves David
managed to pull that. How do I incorporate my fixes for the problems now? On Sat, Sep 17, 2016 at 1:45 PM, Maciej Fijalkowski <fijall@gmail.com> wrote: > ok, I think at the end it is that: > > received listkey for "obsolete": 16746270 bytes > > and it takes a while, but there is no progress nothing which seems > like it's hanging > > On Sat, Sep 17, 2016 at 1:44 PM, Maciej Fijalkowski <fijall@gmail.com> wrote: >> https://paste.pound-python.org/show/JB0RSPFycVzWS8X6C7hc/ >> >> On Sat, Sep 17, 2016 at 12:25 PM, Pierre-Yves David >> <pierre-yves.david@ens-lyon.org> wrote: >>> >>> >>> On 09/17/2016 11:17 AM, Maciej Fijalkowski wrote: >>>> >>>> Hi >>>> >>>> When I'm trying to do that, I get that: >>>> >>>> https://paste.pound-python.org/show/HZsFNvYPt8EAH2ugETpx/ >>>> >>>> and then it hangs (for both system hg and local hg) >>> >>> >>> Can you add --debug to get a bit more data? >>> >>> -- >>> Pierre-Yves David
I have fixed the problem with the test that you listed. I'm still completely unable to finish the test run - with -j it randomly hangs and does nothing with no extra output On Sat, Sep 17, 2016 at 1:47 PM, Maciej Fijalkowski <fijall@gmail.com> wrote: > managed to pull that. How do I incorporate my fixes for the problems now? > > On Sat, Sep 17, 2016 at 1:45 PM, Maciej Fijalkowski <fijall@gmail.com> wrote: >> ok, I think at the end it is that: >> >> received listkey for "obsolete": 16746270 bytes >> >> and it takes a while, but there is no progress nothing which seems >> like it's hanging >> >> On Sat, Sep 17, 2016 at 1:44 PM, Maciej Fijalkowski <fijall@gmail.com> wrote: >>> https://paste.pound-python.org/show/JB0RSPFycVzWS8X6C7hc/ >>> >>> On Sat, Sep 17, 2016 at 12:25 PM, Pierre-Yves David >>> <pierre-yves.david@ens-lyon.org> wrote: >>>> >>>> >>>> On 09/17/2016 11:17 AM, Maciej Fijalkowski wrote: >>>>> >>>>> Hi >>>>> >>>>> When I'm trying to do that, I get that: >>>>> >>>>> https://paste.pound-python.org/show/HZsFNvYPt8EAH2ugETpx/ >>>>> >>>>> and then it hangs (for both system hg and local hg) >>>> >>>> >>>> Can you add --debug to get a bit more data? >>>> >>>> -- >>>> Pierre-Yves David
On 09/17/2016 01:45 PM, Maciej Fijalkowski wrote: > ok, I think at the end it is that: > > received listkey for "obsolete": 16746270 bytes > > and it takes a while, but there is no progress nothing which seems > like it's hanging This is strange, This means you are using the old (Very inefficient) way to pull obsolescence marker. What version of Mercurial and evolve are you using? Cheer,
The one that comes with os x or homebrew I'll update to a newer one I promise On 28 Sep 2016 2:09 PM, "Pierre-Yves David" <pierre-yves.david@ens-lyon.org> wrote: > > > On 09/17/2016 01:45 PM, Maciej Fijalkowski wrote: > >> ok, I think at the end it is that: >> >> received listkey for "obsolete": 16746270 bytes >> >> and it takes a while, but there is no progress nothing which seems >> like it's hanging >> > > This is strange, This means you are using the old (Very inefficient) way > to pull obsolescence marker. What version of Mercurial and evolve are you > using? > > Cheer, > > -- > Pierre-Yves David >
Patch
diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -104,69 +104,298 @@ _checkforbidden(files) return ''.join(lines) -class _lazymanifest(dict): - """This is the pure implementation of lazymanifest. - - It has not been optimized *at all* and is not lazy. - """ - - def __init__(self, data): - dict.__init__(self) - for f, n, fl in _parse(data): - self[f] = n, fl - - def __setitem__(self, k, v): - node, flag = v - assert node is not None - if len(node) > 21: - node = node[:21] # match c implementation behavior - dict.__setitem__(self, k, (node, flag)) +class LazyManifestIter(object): + def __init__(self, lm): + self.pos = 0 + self.lm = lm def __iter__(self): - return iter(sorted(dict.keys(self))) + return self - def iterkeys(self): - return iter(sorted(dict.keys(self))) + def next(self): + try: + data, pos = self.lm._get(self.pos) + except IndexError: + raise StopIteration + if pos == -1: + self.pos += 1 + return data[0] + self.pos += 1 + zeropos = data.find('\x00', pos) + return data[pos:zeropos] - def iterentries(self): - return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) +class LazyManifestIterEntries(object): + def __init__(self, lm): + self.lm = lm + self.pos = 0 + + def __iter__(self): + return self + + def next(self): + try: + data, pos = self.lm._get(self.pos) + except IndexError: + raise StopIteration + if pos == -1: + self.pos += 1 + return data + zeropos = data.find('\x00', pos) + hashval = unhexlify(data, self.lm.extrainfo[self.pos], + zeropos + 1, 40) + flags = self.lm._getflags(data, self.pos, zeropos) + self.pos += 1 + return (data[pos:zeropos], hashval, flags) + +def unhexlify(data, extra, pos, length): + s = data[pos:pos + length].decode('hex') + if extra: + s += chr(extra & 0xff) + return s + +def _cmp(a, b): + return (a > b) - (a < b) + +class _lazymanifest(object): + def __init__(self, data, positions=None, extrainfo=None, extradata=None): + if positions is None: + self.positions = self.findlines(data) + self.extrainfo = [0] * len(self.positions) + self.data = data + self.extradata = [] + else: + self.positions = positions[:] + self.extrainfo = extrainfo[:] + self.extradata = extradata[:] + self.data = data + + def findlines(self, data): + if not data: + return [] + pos = data.find("\n") + positions = [0] + while pos < len(data) - 1 and pos != -1: + positions.append(pos + 1) + pos = data.find("\n", pos + 1) + return positions + + def _get(self, index): + # get the position encoded in pos: + # positive number is an index in 'data' + # negative number is in extrapieces + pos = self.positions[index] + if pos >= 0: + return self.data, pos + return self.extradata[-pos - 1], -1 + + def _getkey(self, pos): + if pos >= 0: + return self.data[pos:self.data.find('\x00', pos + 1)] + return self.extradata[-pos - 1][0] + + def bsearch(self, key): + first = 0 + last = len(self.positions) - 1 + + while first <= last: + midpoint = (first + last)//2 + nextpos = self.positions[midpoint] + candidate = self._getkey(nextpos) + r = _cmp(key, candidate) + if r == 0: + return midpoint + else: + if r < 0: + last = midpoint - 1 + else: + first = midpoint + 1 + return -1 + + def bsearch2(self, key): + # same as the above, but will always return the position + # done for performance reasons + first = 0 + last = len(self.positions) - 1 + + while first <= last: + midpoint = (first + last)//2 + nextpos = self.positions[midpoint] + candidate = self._getkey(nextpos) + r = _cmp(key, candidate) + if r == 0: + return (midpoint, True) + else: + if r < 0: + last = midpoint - 1 + else: + first = midpoint + 1 + return (first, False) + + def __contains__(self, key): + return self.bsearch(key) != -1 + + def _getflags(self, data, needle, pos): + start = pos + 41 + if needle < 0 or needle == len(self.positions) - 1: + end = len(data) - 1 + else: + end = data.find("\n", start) + if start == end: + return '' + return self.data[start:end] + + def __getitem__(self, key): + if not isinstance(key, str): + raise TypeError("setitem: manifest keys must be a string.") + needle = self.bsearch(key) + if needle == -1: + raise KeyError + data, pos = self._get(needle) + if pos == -1: + return (data[1], data[2]) + zeropos = data.find('\x00', pos) + assert 0 <= needle <= len(self.positions) + assert len(self.extrainfo) == len(self.positions) + hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40) + flags = self._getflags(data, needle, zeropos) + return (hashval, flags) + + def __delitem__(self, key): + needle, found = self.bsearch2(key) + if not found: + raise KeyError + cur = self.positions[needle] + self.positions = self.positions[:needle] + self.positions[needle + 1:] + self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] + if cur >= 0: + self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] + + def __setitem__(self, key, value): + if not isinstance(key, str): + raise TypeError("setitem: manifest keys must be a string.") + if not isinstance(value, tuple) or len(value) != 2: + raise TypeError("Manifest values must be a tuple of (node, flags).") + hashval = value[0] + if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22: + raise TypeError("node must be a 20-byte string") + flags = value[1] + if not isinstance(flags, str) or len(flags) > 1: + raise TypeError("flags must a 0 or 1 byte string, got %r", flags) + needle, found = self.bsearch2(key) + if found: + # put the item + pos = self.positions[needle] + if pos < 0: + self.extradata[-pos - 1] = (key, value[0], value[1]) + else: + # just don't bother + self.extradata.append((key, value[0], value[1])) + self.positions[needle] = -len(self.extradata) + else: + # not found, put it in with extra positions + self.extradata.append((key, value[0], value[1])) + self.positions = (self.positions[:needle] + [-len(self.extradata)] + + self.positions[needle:]) + self.extrainfo = (self.extrainfo[:needle] + [0] + + self.extrainfo[needle:]) def copy(self): - c = _lazymanifest('') - c.update(self) - return c + # XXX call _compact like in C? + return _lazymanifest(self.data, self.positions, self.extrainfo, + self.extradata) + + def _compact(self): + # hopefully not called TOO often + def findnextposition(positions, start, lgt): + i = start + while i < len(positions): + if positions[i] >= 0: + return positions[i] + i += 1 + return lgt + + if len(self.extradata) == 0: + return + l = [] + last_cut = 0 + i = 0 + offset = 0 + self.extrainfo = [0] * len(self.positions) + while i < len(self.positions): + if self.positions[i] >= 0: + cur = self.positions[i] + last_cut = cur + while True: + self.positions[i] = offset + i += 1 + if i == len(self.positions) or self.positions[i] < 0: + break + offset += self.positions[i] - cur + cur = self.positions[i] + end_cut = findnextposition(self.positions, i, len(self.data)) + offset += end_cut - cur + l.append(self.data[last_cut:end_cut]) + else: + while i < len(self.positions) and self.positions[i] < 0: + cur = self.positions[i] + t = self.extradata[-cur - 1] + l.append(self._pack(t)) + self.positions[i] = offset + if len(t[1]) > 20: + self.extrainfo[i] = ord(t[1][21]) + offset += len(l[-1]) + i += 1 + self.data = ''.join(l) + self.extradata = [] + + def _pack(self, d): + return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n' + + def text(self): + self._compact() + return self.data def diff(self, m2, clean=False): '''Finds changes between the current manifest and m2.''' + # XXX think whether efficiency matters here diff = {} - for fn, e1 in self.iteritems(): + for fn, e1, flags in self.iterentries(): if fn not in m2: - diff[fn] = e1, (None, '') + diff[fn] = (e1, flags), (None, '') else: e2 = m2[fn] - if e1 != e2: - diff[fn] = e1, e2 + if (e1, flags) != e2: + diff[fn] = (e1, flags), e2 elif clean: diff[fn] = None - for fn, e2 in m2.iteritems(): + for fn, e2, flags in m2.iterentries(): if fn not in self: - diff[fn] = (None, ''), e2 + diff[fn] = (None, ''), (e2, flags) return diff + def iterentries(self): + return LazyManifestIterEntries(self) + + def iterkeys(self): + return LazyManifestIter(self) + + def __iter__(self): + return LazyManifestIter(self) + + def __len__(self): + return len(self.positions) + def filtercopy(self, filterfn): + # XXX can be optimized c = _lazymanifest('') for f, n, fl in self.iterentries(): if filterfn(f): c[f] = n, fl return c - def text(self): - """Get the full data of this manifest as a bytestring.""" - return _textv1(self.iterentries()) - try: _lazymanifest = parsers.lazymanifest except AttributeError: diff --git a/mercurial/pure/bdiff.py b/mercurial/pure/bdiff.py --- a/mercurial/pure/bdiff.py +++ b/mercurial/pure/bdiff.py @@ -111,8 +111,8 @@ def blocks(sa, sb): a = ffi.new("struct bdiff_line**") b = ffi.new("struct bdiff_line**") - ac = ffi.new("char[]", sa) - bc = ffi.new("char[]", sb) + ac = ffi.new("char[]", str(sa)) + bc = ffi.new("char[]", str(sb)) l = ffi.new("struct bdiff_hunk*") try: an = lib.bdiff_splitlines(ac, len(sa), a) @@ -138,8 +138,8 @@ def bdiff(sa, sb): a = ffi.new("struct bdiff_line**") b = ffi.new("struct bdiff_line**") - ac = ffi.new("char[]", sa) - bc = ffi.new("char[]", sb) + ac = ffi.new("char[]", str(sa)) + bc = ffi.new("char[]", str(sb)) l = ffi.new("struct bdiff_hunk*") try: an = lib.bdiff_splitlines(ac, len(sa), a)