Submitter | Pierre-Yves David |
---|---|
Date | March 28, 2014, 2:14 a.m. |
Message ID | <1fbb708cc3821bbb247f.1395972882@marginatus.alto.octopoid.net> |
Download | mbox | patch |
Permalink | /patch/4098/ |
State | Superseded |
Commit | ddd56f3eb786f47e3e62320a4ad3f3c56b9b7ac4 |
Headers | show |
Comments
pierre-yves.david@ens-lyon.org writes: > # HG changeset patch > # User Pierre-Yves David <pierre-yves.david@fb.com> > # Date 1395297375 25200 > # Wed Mar 19 23:36:15 2014 -0700 > # Node ID 1fbb708cc3821bbb247f409f26ff2a940ca297c5 > # Parent e7c977d822a8c7921510c034e250ed52233fff41 > bundle2: support for bundling and unbundling payload > > We add the ability to bundle and unbundle a payload in parts. The payload is the > actual binary data of the part. It is used to convey all the applicative data. > For now we stick to very simple implementation with all the data fit in single > chunk. This open the door to some bundle2 testing usage. This will be improved before > bundle2 get used for real. We need to be able to stream the payload in multiple > part to exchange any changegroup efficiently. This simple version will do for > now. > > Bundling and unbundling are done in the same changeset because the test for > parts is less modular. However, the result is not too complex. > > diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py > --- a/mercurial/bundle2.py > +++ b/mercurial/bundle2.py > @@ -87,12 +87,17 @@ Binary format is as follow > :typename: alphanumerical part name > :option: we do not support option yet this denoted by two 16 bites zero. > > :payload: > > - The current payload is a 32bit integer with a value of 0. This is > - considered an "empty" payload. > + payload is a series of `<chunksize><chunkdata>`. > + > + `chunksize` is a 32 bits integer, `chunkdata` are plain bytes (as much as > + `chunksize` says)` The payload part is concluded by a zero size chunk. > + > + The current implementation always produces either zero or one chunk. > + This is an implementation limitation that will ultimatly be lifted. > """ > > import util > import struct > import urllib > @@ -107,10 +112,11 @@ from i18n import _ > _magicstring = 'HG20' > > _fstreamparamsize = '>H' > _fpartheadersize = '>H' > _fparttypesize = '>B' > +_fpayloadsize = '>I' > > class bundle20(object): > """represent an outgoing bundle2 container > > Use the `addparam` method to add stream level parameter. and `addpart` to > @@ -254,15 +260,21 @@ class unbundle20(object): > _offset[0] = offset + size > return data > typesize = _unpack(_fparttypesize, fromheader(1))[0] > parttype = fromheader(typesize) > self.ui.debug('part type: "%s"\n' % parttype) > - current = part(parttype) > assert fromheader(2) == '\0\0' # no option for now > self.ui.debug('part parameters: 0\n') > - assert self._readexact(4) == '\0\0\0\0' #empty payload > - self.ui.debug('payload chunk size: 0\n') > + payload = [] > + payloadsize = self._unpack(_fpayloadsize)[0] > + self.ui.debug('payload chunk size: %i\n' % payloadsize) > + while payloadsize: > + payload.append(self._readexact(payloadsize)) > + payloadsize = self._unpack(_fpayloadsize)[0] > + self.ui.debug('payload chunk size: %i\n' % payloadsize) > + payload = ''.join(payload) > + current = part(parttype, data=payload) > return current > > > class part(object): > """A bundle2 part contains application level payload > @@ -282,9 +294,14 @@ class part(object): > '\0\0', # No option support for now. > ] > headerchunk = ''.join(header) > yield _pack(_fpartheadersize, len(headerchunk)) > yield headerchunk > - # force empty part for now > - yield '\0\0\0\0' > + # we only support fixed size data now. > + # This will be improved in the future. > + yield _pack(_fpayloadsize, len(self.data)) > + yield self.data > + if len(self.data): > + # end of payload > + yield _pack(_fpayloadsize, 0) so we only mark the ned of the part when we have a len(self.data) > 0 because otherwise the self.data itself will terminate the part. shouldn't we rather go by if len(self.data): yield _pack(_fpayloadsize, len(self.data) yield self.data yield _pack(_fpayloadsize, 0) and make it explizit to always return 0 in the end.
On 03/28/2014 11:25 AM, David Soria Parra wrote: > pierre-yves.david@ens-lyon.org writes: > >> # HG changeset patch >> # User Pierre-Yves David <pierre-yves.david@fb.com> >> # Date 1395297375 25200 >> # Wed Mar 19 23:36:15 2014 -0700 >> # Node ID 1fbb708cc3821bbb247f409f26ff2a940ca297c5 >> # Parent e7c977d822a8c7921510c034e250ed52233fff41 >> bundle2: support for bundling and unbundling payload >> >> We add the ability to bundle and unbundle a payload in parts. The payload is the >> actual binary data of the part. It is used to convey all the applicative data. >> For now we stick to very simple implementation with all the data fit in single >> chunk. This open the door to some bundle2 testing usage. This will be improved before >> bundle2 get used for real. We need to be able to stream the payload in multiple >> part to exchange any changegroup efficiently. This simple version will do for >> now. >> >> Bundling and unbundling are done in the same changeset because the test for >> parts is less modular. However, the result is not too complex. >> >> diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py >> --- a/mercurial/bundle2.py >> +++ b/mercurial/bundle2.py >> @@ -87,12 +87,17 @@ Binary format is as follow >> :typename: alphanumerical part name >> :option: we do not support option yet this denoted by two 16 bites zero. >> >> :payload: >> >> - The current payload is a 32bit integer with a value of 0. This is >> - considered an "empty" payload. >> + payload is a series of `<chunksize><chunkdata>`. >> + >> + `chunksize` is a 32 bits integer, `chunkdata` are plain bytes (as much as >> + `chunksize` says)` The payload part is concluded by a zero size chunk. >> + >> + The current implementation always produces either zero or one chunk. >> + This is an implementation limitation that will ultimatly be lifted. >> """ >> >> import util >> import struct >> import urllib >> @@ -107,10 +112,11 @@ from i18n import _ >> _magicstring = 'HG20' >> >> _fstreamparamsize = '>H' >> _fpartheadersize = '>H' >> _fparttypesize = '>B' >> +_fpayloadsize = '>I' >> >> class bundle20(object): >> """represent an outgoing bundle2 container >> >> Use the `addparam` method to add stream level parameter. and `addpart` to >> @@ -254,15 +260,21 @@ class unbundle20(object): >> _offset[0] = offset + size >> return data >> typesize = _unpack(_fparttypesize, fromheader(1))[0] >> parttype = fromheader(typesize) >> self.ui.debug('part type: "%s"\n' % parttype) >> - current = part(parttype) >> assert fromheader(2) == '\0\0' # no option for now >> self.ui.debug('part parameters: 0\n') >> - assert self._readexact(4) == '\0\0\0\0' #empty payload >> - self.ui.debug('payload chunk size: 0\n') >> + payload = [] >> + payloadsize = self._unpack(_fpayloadsize)[0] >> + self.ui.debug('payload chunk size: %i\n' % payloadsize) >> + while payloadsize: >> + payload.append(self._readexact(payloadsize)) >> + payloadsize = self._unpack(_fpayloadsize)[0] >> + self.ui.debug('payload chunk size: %i\n' % payloadsize) >> + payload = ''.join(payload) >> + current = part(parttype, data=payload) >> return current >> >> >> class part(object): >> """A bundle2 part contains application level payload >> @@ -282,9 +294,14 @@ class part(object): >> '\0\0', # No option support for now. >> ] >> headerchunk = ''.join(header) >> yield _pack(_fpartheadersize, len(headerchunk)) >> yield headerchunk >> - # force empty part for now >> - yield '\0\0\0\0' >> + # we only support fixed size data now. >> + # This will be improved in the future. >> + yield _pack(_fpayloadsize, len(self.data)) >> + yield self.data >> + if len(self.data): >> + # end of payload >> + yield _pack(_fpayloadsize, 0) > > so we only mark the ned of the part when we have a len(self.data) > 0 > because otherwise the self.data itself will terminate the part. > shouldn't we rather go by > > if len(self.data): > yield _pack(_fpayloadsize, len(self.data) > yield self.data > yield _pack(_fpayloadsize, 0) > > and make it explizit to always return 0 in the end. Yop, your version is better. I;m not too concerned about this since we'll hve to rewrite this part to add support for streaming.
Patch
diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py --- a/mercurial/bundle2.py +++ b/mercurial/bundle2.py @@ -87,12 +87,17 @@ Binary format is as follow :typename: alphanumerical part name :option: we do not support option yet this denoted by two 16 bites zero. :payload: - The current payload is a 32bit integer with a value of 0. This is - considered an "empty" payload. + payload is a series of `<chunksize><chunkdata>`. + + `chunksize` is a 32 bits integer, `chunkdata` are plain bytes (as much as + `chunksize` says)` The payload part is concluded by a zero size chunk. + + The current implementation always produces either zero or one chunk. + This is an implementation limitation that will ultimatly be lifted. """ import util import struct import urllib @@ -107,10 +112,11 @@ from i18n import _ _magicstring = 'HG20' _fstreamparamsize = '>H' _fpartheadersize = '>H' _fparttypesize = '>B' +_fpayloadsize = '>I' class bundle20(object): """represent an outgoing bundle2 container Use the `addparam` method to add stream level parameter. and `addpart` to @@ -254,15 +260,21 @@ class unbundle20(object): _offset[0] = offset + size return data typesize = _unpack(_fparttypesize, fromheader(1))[0] parttype = fromheader(typesize) self.ui.debug('part type: "%s"\n' % parttype) - current = part(parttype) assert fromheader(2) == '\0\0' # no option for now self.ui.debug('part parameters: 0\n') - assert self._readexact(4) == '\0\0\0\0' #empty payload - self.ui.debug('payload chunk size: 0\n') + payload = [] + payloadsize = self._unpack(_fpayloadsize)[0] + self.ui.debug('payload chunk size: %i\n' % payloadsize) + while payloadsize: + payload.append(self._readexact(payloadsize)) + payloadsize = self._unpack(_fpayloadsize)[0] + self.ui.debug('payload chunk size: %i\n' % payloadsize) + payload = ''.join(payload) + current = part(parttype, data=payload) return current class part(object): """A bundle2 part contains application level payload @@ -282,9 +294,14 @@ class part(object): '\0\0', # No option support for now. ] headerchunk = ''.join(header) yield _pack(_fpartheadersize, len(headerchunk)) yield headerchunk - # force empty part for now - yield '\0\0\0\0' + # we only support fixed size data now. + # This will be improved in the future. + yield _pack(_fpayloadsize, len(self.data)) + yield self.data + if len(self.data): + # end of payload + yield _pack(_fpayloadsize, 0) diff --git a/tests/test-bundle2.t b/tests/test-bundle2.t --- a/tests/test-bundle2.t +++ b/tests/test-bundle2.t @@ -13,10 +13,15 @@ Create an extension to test bundle2 API > from mercurial import util > from mercurial import bundle2 > cmdtable = {} > command = cmdutil.command(cmdtable) > + > ELEPHANTSSONG = """Patali Dirapata, Cromda Cromda Ripalo, Pata Pata, Ko Ko Ko + > Bokoro Dipoulito, Rondi Rondi Pepino, Pata Pata, Ko Ko Ko + > Emana Karassoli, Loucra Loucra Ponponto, Pata Pata, Ko Ko Ko.""" + > assert len(ELEPHANTSSONG) == 178 # future test say 178 bytes, trust it. + > > @command('bundle2', > [('', 'param', [], 'stream level parameter'), > ('', 'parts', False, 'include some arbitrary parts to the bundle'),], > '[OUTPUTFILE]') > def cmdbundle2(ui, repo, path=None, **opts): @@ -33,10 +38,12 @@ Create an extension to test bundle2 API > part = bundle2.part('test:empty') > bundler.addpart(part) > # add a second one to make sure we handle multiple parts > part = bundle2.part('test:empty') > bundler.addpart(part) + > part = bundle2.part('test:song', data=ELEPHANTSSONG) + > bundler.addpart(part) > > if path is None: > file = sys.stdout > else: > file = open(path, 'w') @@ -60,10 +67,11 @@ Create an extension to test bundle2 API > ui.write(' %s\n' % value) > parts = list(unbundler) > ui.write('parts count: %i\n' % len(parts)) > for p in parts: > ui.write(' :%s:\n' % p.type) + > ui.write(' payload: %i bytes\n' % len(p.data)) > EOF $ cat >> $HGRCPATH << EOF > [extensions] > bundle2=$TESTTMP/bundle2.py > EOF @@ -236,23 +244,30 @@ Test part start emission of HG20 stream bundle parameter: start of parts bundle part: "test:empty" bundle part: "test:empty" + bundle part: "test:song" end of bundle $ cat ../parts.hg2 HG20\x00\x00\x00\r (esc) test:empty\x00\x00\x00\x00\x00\x00\x00\r (esc) - test:empty\x00\x00\x00\x00\x00\x00\x00\x00 (no-eol) (esc) + test:empty\x00\x00\x00\x00\x00\x00\x00\x0c test:song\x00\x00\x00\x00\x00\xb2Patali Dirapata, Cromda Cromda Ripalo, Pata Pata, Ko Ko Ko (esc) + Bokoro Dipoulito, Rondi Rondi Pepino, Pata Pata, Ko Ko Ko + Emana Karassoli, Loucra Loucra Ponponto, Pata Pata, Ko Ko Ko.\x00\x00\x00\x00\x00\x00 (no-eol) (esc) $ hg unbundle2 < ../parts.hg2 options count: 0 - parts count: 2 + parts count: 3 :test:empty: + payload: 0 bytes :test:empty: + payload: 0 bytes + :test:song: + payload: 178 bytes $ hg unbundle2 --debug < ../parts.hg2 start processing of HG20 stream reading bundle2 stream parameters options count: 0 @@ -263,10 +278,19 @@ Test part payload chunk size: 0 part header size: 13 part type: "test:empty" part parameters: 0 payload chunk size: 0 + part header size: 12 + part type: "test:song" + part parameters: 0 + payload chunk size: 178 + payload chunk size: 0 part header size: 0 end of bundle2 stream - parts count: 2 + parts count: 3 :test:empty: + payload: 0 bytes :test:empty: + payload: 0 bytes + :test:song: + payload: 178 bytes