Submitter | Boris Feld |
---|---|
Date | July 16, 2018, 6:50 p.m. |
Message ID | <6ed641517622f53dd511.1531767021@FB-lair> |
Download | mbox | patch |
Permalink | /patch/32868/ |
State | Accepted |
Headers | show |
Comments
On Mon, Jul 16, 2018 at 11:50 AM, Boris Feld <boris.feld@octobus.net> wrote: > # HG changeset patch > # User Paul Morelle <paul.morelle@octobus.net> > # Date 1528143798 -7200 > # Mon Jun 04 22:23:18 2018 +0200 > # Node ID 6ed641517622f53dd51193c3ae3930c66fc18078 > # Parent cc76692f401d0063cd5dfaf46f0fca1563a85fae > # EXP-Topic write-for-sparse-read > # Available At https://bitbucket.org/octobus/mercurial-devel/ > # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r > 6ed641517622 > sparse-revlog: new requirement enabled with format.sparse-revlog > Queued this series, thanks. I did flip the order of parts 1 and 2 when landing so all the sparse commits are grouped. I'm reasonably convinced this code won't have a negative impact on repos that haven't opted into the feature. So it feels safe to land just before feature freeze. > > The meaning of the new 'sparse-revlog' requirement is that the revlogs are > allowed to contain wider delta chains with larger holes between the > interesting > chunks. These sparse delta chains should be read in several chunks to > avoid a > potential explosion of memory usage. > > Former version won't know how to read a delta chain in several chunks. They > would keep reading them in a single read, and therefore would be subject > to the > potential memory explosion. Hence this new requirement: only versions > having > support of sparse-revlog reading should be allowed to read such a revlog. > > Implementation of this new algorithm and tools to enable or disable the > requirement will follow in the next changesets. > > diff --git a/mercurial/configitems.py b/mercurial/configitems.py > --- a/mercurial/configitems.py > +++ b/mercurial/configitems.py > @@ -652,6 +652,9 @@ coreconfigitem('format', 'maxchainlen', > coreconfigitem('format', 'obsstore-version', > default=None, > ) > +coreconfigitem('format', 'sparse-revlog', > + default=False, > +) > coreconfigitem('format', 'usefncache', > default=True, > ) > diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py > --- a/mercurial/localrepo.py > +++ b/mercurial/localrepo.py > @@ -354,6 +354,15 @@ class locallegacypeer(localpeer): > # clients. > REVLOGV2_REQUIREMENT = 'exp-revlogv2.0' > > +# A repository with the sparserevlog feature will have delta chains that > +# can spread over a larger span. Sparse reading cuts these large spans > into > +# pieces, so that each piece isn't too big. > +# Without the sparserevlog capability, reading from the repository could > use > +# huge amounts of memory, because the whole span would be read at once, > +# including all the intermediate revisions that aren't pertinent for the > chain. > +# This is why once a repository has enabled sparse-read, it becomes > required. > +SPARSEREVLOG_REQUIREMENT = 'sparserevlog' > + > # Functions receiving (ui, features) that extensions can register to > impact > # the ability to load repositories with custom requirements. Only > # functions defined in loaded extensions are called. > @@ -376,6 +385,7 @@ class localrepository(object): > 'generaldelta', > 'treemanifest', > REVLOGV2_REQUIREMENT, > + SPARSEREVLOG_REQUIREMENT, > } > _basesupported = supportedformats | { > 'store', > @@ -678,6 +688,8 @@ class localrepository(object): > self.svfs.options['with-sparse-read'] = withsparseread > self.svfs.options['sparse-read-density-threshold'] = > srdensitythres > self.svfs.options['sparse-read-min-gap-size'] = srmingapsize > + sparserevlog = SPARSEREVLOG_REQUIREMENT in self.requirements > + self.svfs.options['sparse-revlog'] = sparserevlog > > for r in self.requirements: > if r.startswith('exp-compression-'): > @@ -2370,6 +2382,9 @@ def newreporequirements(repo): > requirements.add('generaldelta') > if ui.configbool('experimental', 'treemanifest'): > requirements.add('treemanifest') > + # experimental config: format.sparse-revlog > + if ui.configbool('format', 'sparse-revlog'): > + requirements.add(SPARSEREVLOG_REQUIREMENT) > > revlogv2 = ui.config('experimental', 'revlogv2') > if revlogv2 == 'enable-unstable-format-and-corrupt-my-data': > diff --git a/mercurial/revlog.py b/mercurial/revlog.py > --- a/mercurial/revlog.py > +++ b/mercurial/revlog.py > @@ -895,6 +895,7 @@ class revlog(object): > self._compengine = 'zlib' > self._maxdeltachainspan = -1 > self._withsparseread = False > + self._sparserevlog = False > self._srdensitythreshold = 0.50 > self._srmingapsize = 262144 > > @@ -923,7 +924,10 @@ class revlog(object): > self._maxdeltachainspan = opts['maxdeltachainspan'] > if mmaplargeindex and 'mmapindexthreshold' in opts: > mmapindexthreshold = opts['mmapindexthreshold'] > - self._withsparseread = bool(opts.get('with-sparse-read', > False)) > + self._sparserevlog = bool(opts.get('sparse-revlog', False)) > + withsparseread = bool(opts.get('with-sparse-read', False)) > + # sparse-revlog forces sparse-read > + self._withsparseread = self._sparserevlog or withsparseread > if 'sparse-read-density-threshold' in opts: > self._srdensitythreshold = opts['sparse-read-density- > threshold'] > if 'sparse-read-min-gap-size' in opts: >
Patch
diff --git a/mercurial/configitems.py b/mercurial/configitems.py --- a/mercurial/configitems.py +++ b/mercurial/configitems.py @@ -652,6 +652,9 @@ coreconfigitem('format', 'maxchainlen', coreconfigitem('format', 'obsstore-version', default=None, ) +coreconfigitem('format', 'sparse-revlog', + default=False, +) coreconfigitem('format', 'usefncache', default=True, ) diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -354,6 +354,15 @@ class locallegacypeer(localpeer): # clients. REVLOGV2_REQUIREMENT = 'exp-revlogv2.0' +# A repository with the sparserevlog feature will have delta chains that +# can spread over a larger span. Sparse reading cuts these large spans into +# pieces, so that each piece isn't too big. +# Without the sparserevlog capability, reading from the repository could use +# huge amounts of memory, because the whole span would be read at once, +# including all the intermediate revisions that aren't pertinent for the chain. +# This is why once a repository has enabled sparse-read, it becomes required. +SPARSEREVLOG_REQUIREMENT = 'sparserevlog' + # Functions receiving (ui, features) that extensions can register to impact # the ability to load repositories with custom requirements. Only # functions defined in loaded extensions are called. @@ -376,6 +385,7 @@ class localrepository(object): 'generaldelta', 'treemanifest', REVLOGV2_REQUIREMENT, + SPARSEREVLOG_REQUIREMENT, } _basesupported = supportedformats | { 'store', @@ -678,6 +688,8 @@ class localrepository(object): self.svfs.options['with-sparse-read'] = withsparseread self.svfs.options['sparse-read-density-threshold'] = srdensitythres self.svfs.options['sparse-read-min-gap-size'] = srmingapsize + sparserevlog = SPARSEREVLOG_REQUIREMENT in self.requirements + self.svfs.options['sparse-revlog'] = sparserevlog for r in self.requirements: if r.startswith('exp-compression-'): @@ -2370,6 +2382,9 @@ def newreporequirements(repo): requirements.add('generaldelta') if ui.configbool('experimental', 'treemanifest'): requirements.add('treemanifest') + # experimental config: format.sparse-revlog + if ui.configbool('format', 'sparse-revlog'): + requirements.add(SPARSEREVLOG_REQUIREMENT) revlogv2 = ui.config('experimental', 'revlogv2') if revlogv2 == 'enable-unstable-format-and-corrupt-my-data': diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -895,6 +895,7 @@ class revlog(object): self._compengine = 'zlib' self._maxdeltachainspan = -1 self._withsparseread = False + self._sparserevlog = False self._srdensitythreshold = 0.50 self._srmingapsize = 262144 @@ -923,7 +924,10 @@ class revlog(object): self._maxdeltachainspan = opts['maxdeltachainspan'] if mmaplargeindex and 'mmapindexthreshold' in opts: mmapindexthreshold = opts['mmapindexthreshold'] - self._withsparseread = bool(opts.get('with-sparse-read', False)) + self._sparserevlog = bool(opts.get('sparse-revlog', False)) + withsparseread = bool(opts.get('with-sparse-read', False)) + # sparse-revlog forces sparse-read + self._withsparseread = self._sparserevlog or withsparseread if 'sparse-read-density-threshold' in opts: self._srdensitythreshold = opts['sparse-read-density-threshold'] if 'sparse-read-min-gap-size' in opts: