From patchwork Tue Mar 1 16:07:47 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [1,of,2] merge: separate read-only mergestate into separate module From: Simon Farnsworth X-Patchwork-Id: 13505 Message-Id: <37fe1f9d08245f7540cb.1456848467@simonfar-macbookpro.local> To: Date: Tue, 1 Mar 2016 16:07:47 +0000 # HG changeset patch # User Simon Farnsworth # Date 1456843967 0 # Tue Mar 01 14:52:47 2016 +0000 # Node ID 37fe1f9d08245f7540cb6137c312ae30dbcde688 # Parent c7f89ad87baef87f00c507545dfd4cc824bc3131 merge: separate read-only mergestate into separate module This is code motion to prepare for revset predicates that read the mergestate; without this code motion, there would be an import loop. diff --git a/mercurial/merge.py b/mercurial/merge.py --- a/mercurial/merge.py +++ b/mercurial/merge.py @@ -14,7 +14,6 @@ from .i18n import _ from .node import ( - bin, hex, nullhex, nullid, @@ -25,6 +24,7 @@ destutil, error, filemerge, + mergestate as mergestatemod, obsolete, scmutil, subrepo, @@ -33,52 +33,8 @@ ) _pack = struct.pack -_unpack = struct.unpack -def _droponode(data): - # used for compatibility for v1 - bits = data.split('\0') - bits = bits[:-2] + bits[-1:] - return '\0'.join(bits) - -class mergestate(object): - '''track 3-way merge state of individual files - - The merge state is stored on disk when needed. Two files are used: one with - an old format (version 1), and one with a new format (version 2). Version 2 - stores a superset of the data in version 1, including new kinds of records - in the future. For more about the new format, see the documentation for - `_readrecordsv2`. - - Each record can contain arbitrary content, and has an associated type. This - `type` should be a letter. If `type` is uppercase, the record is mandatory: - versions of Mercurial that don't support it should abort. If `type` is - lowercase, the record can be safely ignored. - - Currently known records: - - L: the node of the "local" part of the merge (hexified version) - O: the node of the "other" part of the merge (hexified version) - F: a file to be merged entry - C: a change/delete or delete/change conflict - D: a file that the external merge driver will merge internally - (experimental) - m: the external merge driver defined for this merge plus its run state - (experimental) - f: a (filename, dictonary) tuple of optional values for a given file - X: unsupported mandatory record type (used in tests) - x: unsupported advisory record type (used in tests) - - Merge driver run states (experimental): - u: driver-resolved files unmarked -- needs to be run next time we're about - to resolve or commit - m: driver-resolved files marked -- only needs to be run before commit - s: success/skipped -- does not need to be run any more - - ''' - statepathv1 = 'merge/state' - statepathv2 = 'merge/state2' - +class mergestate(mergestatemod.mergestatereadonly): @staticmethod def clean(repo, node=None, other=None): """Initialize a brand new merge state, removing any existing state on @@ -94,13 +50,6 @@ ms._read() return ms - def __init__(self, repo): - """Initialize the merge state. - - Do not use this directly! Instead call read() or clean().""" - self._repo = repo - self._dirty = False - def reset(self, node=None, other=None): self._state = {} self._stateextras = {} @@ -121,211 +70,6 @@ self._results = {} self._dirty = False - def _read(self): - """Analyse each record content to restore a serialized state from disk - - This function process "record" entry produced by the de-serialization - of on disk file. - """ - self._state = {} - self._stateextras = {} - self._local = None - self._other = None - for var in ('localctx', 'otherctx'): - if var in vars(self): - delattr(self, var) - self._readmergedriver = None - self._mdstate = 's' - unsupported = set() - records = self._readrecords() - for rtype, record in records: - if rtype == 'L': - self._local = bin(record) - elif rtype == 'O': - self._other = bin(record) - elif rtype == 'm': - bits = record.split('\0', 1) - mdstate = bits[1] - if len(mdstate) != 1 or mdstate not in 'ums': - # the merge driver should be idempotent, so just rerun it - mdstate = 'u' - - self._readmergedriver = bits[0] - self._mdstate = mdstate - elif rtype in 'FDC': - bits = record.split('\0') - self._state[bits[0]] = bits[1:] - elif rtype == 'f': - filename, rawextras = record.split('\0', 1) - extraparts = rawextras.split('\0') - extras = {} - i = 0 - while i < len(extraparts): - extras[extraparts[i]] = extraparts[i + 1] - i += 2 - - self._stateextras[filename] = extras - elif not rtype.islower(): - unsupported.add(rtype) - self._results = {} - self._dirty = False - - if unsupported: - raise error.UnsupportedMergeRecords(unsupported) - - def _readrecords(self): - """Read merge state from disk and return a list of record (TYPE, data) - - We read data from both v1 and v2 files and decide which one to use. - - V1 has been used by version prior to 2.9.1 and contains less data than - v2. We read both versions and check if no data in v2 contradicts - v1. If there is not contradiction we can safely assume that both v1 - and v2 were written at the same time and use the extract data in v2. If - there is contradiction we ignore v2 content as we assume an old version - of Mercurial has overwritten the mergestate file and left an old v2 - file around. - - returns list of record [(TYPE, data), ...]""" - v1records = self._readrecordsv1() - v2records = self._readrecordsv2() - if self._v1v2match(v1records, v2records): - return v2records - else: - # v1 file is newer than v2 file, use it - # we have to infer the "other" changeset of the merge - # we cannot do better than that with v1 of the format - mctx = self._repo[None].parents()[-1] - v1records.append(('O', mctx.hex())) - # add place holder "other" file node information - # nobody is using it yet so we do no need to fetch the data - # if mctx was wrong `mctx[bits[-2]]` may fails. - for idx, r in enumerate(v1records): - if r[0] == 'F': - bits = r[1].split('\0') - bits.insert(-2, '') - v1records[idx] = (r[0], '\0'.join(bits)) - return v1records - - def _v1v2match(self, v1records, v2records): - oldv2 = set() # old format version of v2 record - for rec in v2records: - if rec[0] == 'L': - oldv2.add(rec) - elif rec[0] == 'F': - # drop the onode data (not contained in v1) - oldv2.add(('F', _droponode(rec[1]))) - for rec in v1records: - if rec not in oldv2: - return False - else: - return True - - def _readrecordsv1(self): - """read on disk merge state for version 1 file - - returns list of record [(TYPE, data), ...] - - Note: the "F" data from this file are one entry short - (no "other file node" entry) - """ - records = [] - try: - f = self._repo.vfs(self.statepathv1) - for i, l in enumerate(f): - if i == 0: - records.append(('L', l[:-1])) - else: - records.append(('F', l[:-1])) - f.close() - except IOError as err: - if err.errno != errno.ENOENT: - raise - return records - - def _readrecordsv2(self): - """read on disk merge state for version 2 file - - This format is a list of arbitrary records of the form: - - [type][length][content] - - `type` is a single character, `length` is a 4 byte integer, and - `content` is an arbitrary byte sequence of length `length`. - - Mercurial versions prior to 3.7 have a bug where if there are - unsupported mandatory merge records, attempting to clear out the merge - state with hg update --clean or similar aborts. The 't' record type - works around that by writing out what those versions treat as an - advisory record, but later versions interpret as special: the first - character is the 'real' record type and everything onwards is the data. - - Returns list of records [(TYPE, data), ...].""" - records = [] - try: - f = self._repo.vfs(self.statepathv2) - data = f.read() - off = 0 - end = len(data) - while off < end: - rtype = data[off] - off += 1 - length = _unpack('>I', data[off:(off + 4)])[0] - off += 4 - record = data[off:(off + length)] - off += length - if rtype == 't': - rtype, record = record[0], record[1:] - records.append((rtype, record)) - f.close() - except IOError as err: - if err.errno != errno.ENOENT: - raise - return records - - @util.propertycache - def mergedriver(self): - # protect against the following: - # - A configures a malicious merge driver in their hgrc, then - # pauses the merge - # - A edits their hgrc to remove references to the merge driver - # - A gives a copy of their entire repo, including .hg, to B - # - B inspects .hgrc and finds it to be clean - # - B then continues the merge and the malicious merge driver - # gets invoked - configmergedriver = self._repo.ui.config('experimental', 'mergedriver') - if (self._readmergedriver is not None - and self._readmergedriver != configmergedriver): - raise error.ConfigError( - _("merge driver changed since merge started"), - hint=_("revert merge driver change or abort merge")) - - return configmergedriver - - @util.propertycache - def localctx(self): - if self._local is None: - raise RuntimeError("localctx accessed but self._local isn't set") - return self._repo[self._local] - - @util.propertycache - def otherctx(self): - if self._other is None: - raise RuntimeError("otherctx accessed but self._other isn't set") - return self._repo[self._other] - - def active(self): - """Whether mergestate is active. - - Returns True if there appears to be mergestate. This is a rough proxy - for "is a merge in progress." - """ - # Check local variables before looking at filesystem for performance - # reasons. - return bool(self._local) or bool(self._state) or \ - self._repo.vfs.exists(self.statepathv1) or \ - self._repo.vfs.exists(self.statepathv2) - def commit(self): """Write current state on disk (if necessary)""" if self._dirty: @@ -369,7 +113,7 @@ f.write(hex(self._local) + '\n') for rtype, data in irecords: if rtype == 'F': - f.write('%s\n' % _droponode(data)) + f.write('%s\n' % mergestatemod._droponode(data)) f.close() def _writerecordsv2(self, records): @@ -408,39 +152,10 @@ self._stateextras[fd] = { 'ancestorlinknode' : hex(fca.node()) } self._dirty = True - def __contains__(self, dfile): - return dfile in self._state - - def __getitem__(self, dfile): - return self._state[dfile][0] - - def __iter__(self): - return iter(sorted(self._state)) - - def files(self): - return self._state.keys() - def mark(self, dfile, state): self._state[dfile][0] = state self._dirty = True - def mdstate(self): - return self._mdstate - - def unresolved(self): - """Obtain the paths of unresolved files.""" - - for f, entry in self._state.items(): - if entry[0] == 'u': - yield f - - def driverresolved(self): - """Obtain the paths of driver-resolved files.""" - - for f, entry in self._state.items(): - if entry[0] == 'd': - yield f - def extras(self, filename): return self._stateextras.setdefault(filename, {}) @@ -534,34 +249,6 @@ Returns the exit code of the merge.""" return self._resolve(False, dfile, wctx, labels=labels)[1] - - def counts(self): - """return counts for updated, merged and removed files in this - session""" - updated, merged, removed = 0, 0, 0 - for r, action in self._results.itervalues(): - if r is None: - updated += 1 - elif r == 0: - if action == 'r': - removed += 1 - else: - merged += 1 - return updated, merged, removed - - def unresolvedcount(self): - """get unresolved count for this merge (persistent)""" - return len([True for f, entry in self._state.iteritems() - if entry[0] == 'u']) - - def actions(self): - """return lists of actions to perform on the dirstate""" - actions = {'r': [], 'f': [], 'a': [], 'am': [], 'g': []} - for f, (r, action) in self._results.iteritems(): - if action is not None: - actions[action].append((f, None, "merge result")) - return actions - def recordactions(self): """record remove/add/get actions in the dirstate""" branchmerge = self._repo.dirstate.p2() != nullid diff --git a/mercurial/mergestate.py b/mercurial/mergestate.py new file mode 100644 --- /dev/null +++ b/mercurial/mergestate.py @@ -0,0 +1,343 @@ +# mergestate.py - on-disk merge state for Mercurial +# +# Copyright 2006, 2007 Matt Mackall +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +import errno +import struct + +from .i18n import _ +from .node import bin +from . import ( + error, + util, +) + +_unpack = struct.unpack + +def _droponode(data): + # used for compatibility for v1 + bits = data.split('\0') + bits = bits[:-2] + bits[-1:] + return '\0'.join(bits) + +class mergestatereadonly(object): + '''track 3-way merge state of individual files + + The merge state is stored on disk when needed. Two files are used: one with + an old format (version 1), and one with a new format (version 2). Version 2 + stores a superset of the data in version 1, including new kinds of records + in the future. For more about the new format, see the documentation for + `_readrecordsv2`. + + Each record can contain arbitrary content, and has an associated type. This + `type` should be a letter. If `type` is uppercase, the record is mandatory: + versions of Mercurial that don't support it should abort. If `type` is + lowercase, the record can be safely ignored. + + Currently known records: + + L: the node of the "local" part of the merge (hexified version) + O: the node of the "other" part of the merge (hexified version) + F: a file to be merged entry + C: a change/delete or delete/change conflict + D: a file that the external merge driver will merge internally + (experimental) + m: the external merge driver defined for this merge plus its run state + (experimental) + f: a (filename, dictonary) tuple of optional values for a given file + X: unsupported mandatory record type (used in tests) + x: unsupported advisory record type (used in tests) + + Merge driver run states (experimental): + u: driver-resolved files unmarked -- needs to be run next time we're about + to resolve or commit + m: driver-resolved files marked -- only needs to be run before commit + s: success/skipped -- does not need to be run any more + + ''' + @staticmethod + def read(repo): + """Initialize the merge state, reading it from disk.""" + ms = mergestatereadonly(repo) + ms._read() + return ms + + def __init__(self, repo): + """Initialize the merge state. + + Do not use this directly! Instead call read() or clean().""" + self._repo = repo + self._dirty = False + + statepathv1 = 'merge/state' + statepathv2 = 'merge/state2' + + def _read(self): + """Analyse each record content to restore a serialized state from disk + + This function process "record" entry produced by the de-serialization + of on disk file. + """ + self._state = {} + self._stateextras = {} + self._local = None + self._other = None + for var in ('localctx', 'otherctx'): + if var in vars(self): + delattr(self, var) + self._readmergedriver = None + self._mdstate = 's' + unsupported = set() + records = self._readrecords() + for rtype, record in records: + if rtype == 'L': + self._local = bin(record) + elif rtype == 'O': + self._other = bin(record) + elif rtype == 'm': + bits = record.split('\0', 1) + mdstate = bits[1] + if len(mdstate) != 1 or mdstate not in 'ums': + # the merge driver should be idempotent, so just rerun it + mdstate = 'u' + + self._readmergedriver = bits[0] + self._mdstate = mdstate + elif rtype in 'FDC': + bits = record.split('\0') + self._state[bits[0]] = bits[1:] + elif rtype == 'f': + filename, rawextras = record.split('\0', 1) + extraparts = rawextras.split('\0') + extras = {} + i = 0 + while i < len(extraparts): + extras[extraparts[i]] = extraparts[i + 1] + i += 2 + + self._stateextras[filename] = extras + elif not rtype.islower(): + unsupported.add(rtype) + self._results = {} + self._dirty = False + + if unsupported: + raise error.UnsupportedMergeRecords(unsupported) + + def _readrecords(self): + """Read merge state from disk and return a list of record (TYPE, data) + + We read data from both v1 and v2 files and decide which one to use. + + V1 has been used by version prior to 2.9.1 and contains less data than + v2. We read both versions and check if no data in v2 contradicts + v1. If there is not contradiction we can safely assume that both v1 + and v2 were written at the same time and use the extract data in v2. If + there is contradiction we ignore v2 content as we assume an old version + of Mercurial has overwritten the mergestate file and left an old v2 + file around. + + returns list of record [(TYPE, data), ...]""" + v1records = self._readrecordsv1() + v2records = self._readrecordsv2() + if self._v1v2match(v1records, v2records): + return v2records + else: + # v1 file is newer than v2 file, use it + # we have to infer the "other" changeset of the merge + # we cannot do better than that with v1 of the format + mctx = self._repo[None].parents()[-1] + v1records.append(('O', mctx.hex())) + # add place holder "other" file node information + # nobody is using it yet so we do no need to fetch the data + # if mctx was wrong `mctx[bits[-2]]` may fails. + for idx, r in enumerate(v1records): + if r[0] == 'F': + bits = r[1].split('\0') + bits.insert(-2, '') + v1records[idx] = (r[0], '\0'.join(bits)) + return v1records + + def _v1v2match(self, v1records, v2records): + oldv2 = set() # old format version of v2 record + for rec in v2records: + if rec[0] == 'L': + oldv2.add(rec) + elif rec[0] == 'F': + # drop the onode data (not contained in v1) + oldv2.add(('F', _droponode(rec[1]))) + for rec in v1records: + if rec not in oldv2: + return False + else: + return True + + def _readrecordsv1(self): + """read on disk merge state for version 1 file + + returns list of record [(TYPE, data), ...] + + Note: the "F" data from this file are one entry short + (no "other file node" entry) + """ + records = [] + try: + f = self._repo.vfs(self.statepathv1) + for i, l in enumerate(f): + if i == 0: + records.append(('L', l[:-1])) + else: + records.append(('F', l[:-1])) + f.close() + except IOError as err: + if err.errno != errno.ENOENT: + raise + return records + + def _readrecordsv2(self): + """read on disk merge state for version 2 file + + This format is a list of arbitrary records of the form: + + [type][length][content] + + `type` is a single character, `length` is a 4 byte integer, and + `content` is an arbitrary byte sequence of length `length`. + + Mercurial versions prior to 3.7 have a bug where if there are + unsupported mandatory merge records, attempting to clear out the merge + state with hg update --clean or similar aborts. The 't' record type + works around that by writing out what those versions treat as an + advisory record, but later versions interpret as special: the first + character is the 'real' record type and everything onwards is the data. + + Returns list of records [(TYPE, data), ...].""" + records = [] + try: + f = self._repo.vfs(self.statepathv2) + data = f.read() + off = 0 + end = len(data) + while off < end: + rtype = data[off] + off += 1 + length = _unpack('>I', data[off:(off + 4)])[0] + off += 4 + record = data[off:(off + length)] + off += length + if rtype == 't': + rtype, record = record[0], record[1:] + records.append((rtype, record)) + f.close() + except IOError as err: + if err.errno != errno.ENOENT: + raise + return records + + @util.propertycache + def mergedriver(self): + # protect against the following: + # - A configures a malicious merge driver in their hgrc, then + # pauses the merge + # - A edits their hgrc to remove references to the merge driver + # - A gives a copy of their entire repo, including .hg, to B + # - B inspects .hgrc and finds it to be clean + # - B then continues the merge and the malicious merge driver + # gets invoked + configmergedriver = self._repo.ui.config('experimental', 'mergedriver') + if (self._readmergedriver is not None + and self._readmergedriver != configmergedriver): + raise error.ConfigError( + _("merge driver changed since merge started"), + hint=_("revert merge driver change or abort merge")) + + return configmergedriver + + @util.propertycache + def localctx(self): + if self._local is None: + raise RuntimeError("localctx accessed but self._local isn't set") + return self._repo[self._local] + + @util.propertycache + def otherctx(self): + if self._other is None: + raise RuntimeError("otherctx accessed but self._other isn't set") + return self._repo[self._other] + + def active(self): + """Whether mergestate is active. + + Returns True if there appears to be mergestate. This is a rough proxy + for "is a merge in progress." + """ + # Check local variables before looking at filesystem for performance + # reasons. + return bool(self._local) or bool(self._state) or \ + self._repo.vfs.exists(self.statepathv1) or \ + self._repo.vfs.exists(self.statepathv2) + + def __contains__(self, dfile): + return dfile in self._state + + def __getitem__(self, dfile): + return self._state[dfile][0] + + def __iter__(self): + return iter(sorted(self._state)) + + def files(self): + return self._state.keys() + + def mdstate(self): + return self._mdstate + + def unresolved(self): + """Obtain the paths of unresolved files.""" + + for f, entry in self._state.items(): + if entry[0] == 'u': + yield f + + def driverresolved(self): + """Obtain the paths of driver-resolved files.""" + + for f, entry in self._state.items(): + if entry[0] == 'd': + yield f + + def extras(self, filename): + return self._stateextras.get(filename, {}) + + def counts(self): + """return counts for updated, merged and removed files in this + session""" + updated, merged, removed = 0, 0, 0 + for r, action in self._results.itervalues(): + if r is None: + updated += 1 + elif r == 0: + if action == 'r': + removed += 1 + else: + merged += 1 + return updated, merged, removed + + def unresolvedcount(self): + """get unresolved count for this merge (persistent)""" + return len([True for f, entry in self._state.iteritems() + if entry[0] == 'u']) + + def actions(self): + """return lists of actions to perform on the dirstate""" + actions = {'r': [], 'f': [], 'a': [], 'am': [], 'g': []} + for f, (r, action) in self._results.iteritems(): + if action is not None: + actions[action].append((f, None, "merge result")) + return actions +