Submitter | Alastair Houghton |
---|---|
Date | Dec. 10, 2012, 9:50 a.m. |
Message ID | <dd56046b73acdb976ff9.1355133029@lithium.coriolis-systems.com> |
Download | mbox | patch |
Permalink | /patch/38/ |
State | Changes Requested |
Headers | show |
Comments
On Mon, Dec 10, 2012 at 09:50:29AM +0000, Alastair Houghton wrote: > # HG changeset patch > # User Alastair Houghton <alastair at coriolis-systems.com> > # Date 1355130961 0 > # Node ID dd56046b73acdb976ff98e91469c36494b1ddd94 > # Parent 40374059d227850ec2f5fb4f21a1b619136e2a6a > patch: improve handling of filenames containing spaces on import (issue3723) > > When reading git-format patches, it's possible that we will come across > a line like: > > diff --git a/foo b/foo.txt b/foo b/foo.txt > > If the git-format diff in question is a rename or a copy, all is fine, > because we can reliably read the filenames from subsequent git headers. > However, if it turns out to be some other kind of diff, we might need > to extract the filename(s) from a line in this format; this is especially > the case when dealing with a binary diff, as git's binary diff format > doesn't repeat the filenames at all. > > Fortunately, there is a heuristic we can use to render the above > parseable, namely that the two filenames must be the same (since > otherwise, we're looking at a rename or a copy and we already covered > that case). > > This patch fixes iterhunks(), readgitpatch() and diffstatdata() to use > this approach. Bellow is some comment on your patches, some of them impact code that you just move around. Feel free to either sent a prelude patch that clean that up or ignore them. This is not a LGTM as the whole function is still pretty confusing to me. I'll try to get a second deeper pass soon. > diff --git a/mercurial/patch.py b/mercurial/patch.py > --- a/mercurial/patch.py > +++ b/mercurial/patch.py > @@ -15,6 +15,7 @@ > import context > > gitre = re.compile('diff --git a/(.*) b/(.*)') > +gitre_no_namechange = re.compile(r'diff --git a/(.*) b/\1$') You want a ^ in front of this regex. Otherwise pathological case forged by some leprechaun might still bite you' note: I love how greedy a/(.*) ensure consistency of b/\1$ here. > > class PatchError(Exception): > pass > @@ -270,7 +271,8 @@ > 'islink' is True if the file is a symlink and 'isexec' is True if > the file is executable. Otherwise, 'mode' is None. > """ > - def __init__(self, path): > + def __init__(self, path, diffline=None): > + self.diffline = diffline > self.path = path > self.oldpath = None > self.mode = None > @@ -283,13 +285,26 @@ > self.mode = (islink, isexec) > > def copy(self): > - other = patchmeta(self.path) > + other = patchmeta(self.path, diffline=self.diffline) > other.oldpath = self.oldpath > other.mode = self.mode > other.op = self.op > other.binary = self.binary > return other > > + def reparse(self): > + if self.op == 'RENAME' or self.op == 'COPY': > + return True > + > + # reparse the diffline with a stricter regex > + m = gitre_no_namechange.match(self.diffline) > + if not m: > + return False > + > + self.path = m.group(1) > + > + return True > + What's the purpose of this function ? can you add a small docstring to it ? > def _ispatchinga(self, afile): > if afile == '/dev/null': > return self.op == 'ADD' > @@ -317,16 +332,16 @@ > if line.startswith('diff --git'): > m = gitre.match(line) > if m: > - if gp: > + if gp and gp.reparse(): > gitpatches.append(gp) > dst = m.group(2) > - gp = patchmeta(dst) > + gp = patchmeta(dst, diffline=line) > elif gp: > if line.startswith('--- '): > - gitpatches.append(gp) > + if gp.reparse(): > + gitpatches.append(gp) > gp = None > - continue > - if line.startswith('rename from '): > + elif line.startswith('rename from '): > gp.op = 'RENAME' > gp.oldpath = line[12:] > elif line.startswith('rename to '): > @@ -345,7 +360,11 @@ > gp.setmode(int(line[-6:], 8)) > elif line.startswith('GIT binary patch'): > gp.binary = True > - if gp: > + if gp.reparse(): > + gitpatches.append(gp) > + gp = None > + > + if gp and gp.reparse(): > gitpatches.append(gp) > > return gitpatches > @@ -1189,6 +1208,11 @@ > > # our states > BFILE = 1 > + > + scanninggit = False > + diffline = None > + gitop = None > + I would add some comment that explain what those state are about and how they impact the process. > context = None > lr = linereader(fp) > > @@ -1196,6 +1220,51 @@ > x = lr.readline() > if not x: > break > + > + if scanninggit: > + # git format patch headers end at --- or GIT binary patch > + if x.startswith('---') or x.startswith('GIT binary patch'): > + scanninggit = False > + > + ok = True > + if gitop != 'RENAME' and gitop != 'COPY': nitpick: if gitop not in ('RENAME', 'COPY'): > + # use the stricter regex > + m = gitre_no_namechange.match(diffline) > + if m: > + afile = 'a/' + m.group(1) > + bfile = 'b/' + m.group(1) > + else: > + ok = False > + > + if ok: > + while gitpatches \ > + and not gitpatches[-1].ispatching(afile, bfile): > + gp = gitpatches.pop() > + yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, > + gp.copy()) Sound like a for loop with a break and an else clause. (yep, it looks like code movement, but that also a good opportunity > + if not gitpatches: > + raise PatchError(_('failed to synchronize metadata ' > + 'for "%s"') > + % afile[2:]) > + gp = gitpatches[-1] > + newfile = True > + elif x.startswith('rename from '): > + gitop = 'RENAME' > + afile = 'a/' + x[12:].rstrip(' \r\n') > + elif x.startswith('rename to '): > + bfile = 'b/' + x[10:].rstrip(' \r\n') > + elif x.startswith('copy from '): > + gitop = 'COPY' > + afile = 'a/' + x[10:].rstrip(' \r\n') > + elif x.startswith('copy to '): > + bfile = 'b/' + x[8:].rstrip(' \r\n') if looks like adding the 'a/' and 'b/' prefix later would help readability here (applies to `gitre_no_namechange` handling too) Is that possible ? > + > + if newfile: > + newfile = False > + emitfile = True > + state = BFILE > + hunknum = 0 > + > if state == BFILE and ( > (not context and x[0] == '@') > or (context is not False and x.startswith('***************')) > @@ -1216,25 +1285,26 @@ > yield 'file', (afile, bfile, h, gp and gp.copy() or None) > yield 'hunk', h > elif x.startswith('diff --git'): > - m = gitre.match(x.rstrip(' \r\n')) > + xs = x.rstrip(' \r\n') > + m = gitre.match(xs) > if not m: > continue > + > if gitpatches is None: > # scan whole input for git metadata > gitpatches = scangitpatch(lr, x) > yield 'git', [g.copy() for g in gitpatches > if g.op in ('COPY', 'RENAME')] > gitpatches.reverse() > + > + # defer processing of git metadata until we know for certain > + # the names of the files (we need to see the metadata for this > + # patch before we really know what they are) > + scanninggit = True > + diffline = xs > afile = 'a/' + m.group(1) > bfile = 'b/' + m.group(2) > - while gitpatches and not gitpatches[-1].ispatching(afile, bfile): > - gp = gitpatches.pop() > - yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp.copy()) > - if not gitpatches: > - raise PatchError(_('failed to synchronize metadata for "%s"') > - % afile[2:]) > - gp = gitpatches[-1] > - newfile = True > + gitop = 'MODIFY' > elif x.startswith('---'): > # check for a unified diff > l2 = lr.readline() > @@ -1261,11 +1331,6 @@ > afile = parsefilename(x) > bfile = parsefilename(l2) > > - if newfile: > - newfile = False > - emitfile = True > - state = BFILE > - hunknum = 0 > > while gitpatches: > gp = gitpatches.pop() > @@ -1818,10 +1883,18 @@ > # set numbers to 0 anyway when starting new file > adds, removes, isbinary = 0, 0, False > if line.startswith('diff --git'): > - filename = gitre.search(line).group(1) > + m = gitre_no_namechange.search(line) > + if m: > + filename = m.group(1) > + else: > + filename = None > elif line.startswith('diff -r'): > # format: "diff -r ... -r ... filename" > filename = diffre.search(line).group(1) > + elif line.startswith('rename from '): > + filename = line[12:].rstrip(' \r\n') > + elif line.startswith('copy from '): > + filename = line[10:].rstrip(' \r\n') I already met those "line.startswith('rename from ')" can they be factored ? > elif line.startswith('+') and not line.startswith('+++ '): > adds += 1 > elif line.startswith('-') and not line.startswith('--- '): > @@ -1829,6 +1902,7 @@ > elif (line.startswith('GIT binary patch') or > line.startswith('Binary file')): > isbinary = True > + > addresult() > return results > > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel at selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel
On Dec 10, 2012, at 3:50 AM, Alastair Houghton wrote: > # HG changeset patch > # User Alastair Houghton <alastair at coriolis-systems.com> > # Date 1355130961 0 > # Node ID dd56046b73acdb976ff98e91469c36494b1ddd94 > # Parent 40374059d227850ec2f5fb4f21a1b619136e2a6a > patch: improve handling of filenames containing spaces on import (issue3723) > [...] > + def reparse(self): > + if self.op == 'RENAME' or self.op == 'COPY': > + return True > + > + # reparse the diffline with a stricter regex > + m = gitre_no_namechange.match(self.diffline) > + if not m: > + return False > + > + self.path = m.group(1) > + > + return True Would be nice to have a docstring describing what the return values mean here. > @@ -317,16 +332,16 @@ > if line.startswith('diff --git'): > m = gitre.match(line) > if m: > - if gp: > + if gp and gp.reparse(): > gitpatches.append(gp) > dst = m.group(2) > - gp = patchmeta(dst) > + gp = patchmeta(dst, diffline=line) > elif gp: > if line.startswith('--- '): > - gitpatches.append(gp) > + if gp.reparse(): > + gitpatches.append(gp) > gp = None > - continue > - if line.startswith('rename from '): > + elif line.startswith('rename from '): These last three lines look like an unrelated stylistic change. Do they actually change the control flow? > @@ -1829,6 +1902,7 @@ > elif (line.startswith('GIT binary patch') or > line.startswith('Binary file')): > isbinary = True > + Unrelated whitespace change. pacem in terris / ??? / ?????? / ????????? / ?? Kevin R. Bullock
Patch
diff --git a/mercurial/patch.py b/mercurial/patch.py --- a/mercurial/patch.py +++ b/mercurial/patch.py @@ -15,6 +15,7 @@ import context gitre = re.compile('diff --git a/(.*) b/(.*)') +gitre_no_namechange = re.compile(r'diff --git a/(.*) b/\1$') class PatchError(Exception): pass @@ -270,7 +271,8 @@ 'islink' is True if the file is a symlink and 'isexec' is True if the file is executable. Otherwise, 'mode' is None. """ - def __init__(self, path): + def __init__(self, path, diffline=None): + self.diffline = diffline self.path = path self.oldpath = None self.mode = None @@ -283,13 +285,26 @@ self.mode = (islink, isexec) def copy(self): - other = patchmeta(self.path) + other = patchmeta(self.path, diffline=self.diffline) other.oldpath = self.oldpath other.mode = self.mode other.op = self.op other.binary = self.binary return other + def reparse(self): + if self.op == 'RENAME' or self.op == 'COPY': + return True + + # reparse the diffline with a stricter regex + m = gitre_no_namechange.match(self.diffline) + if not m: + return False + + self.path = m.group(1) + + return True + def _ispatchinga(self, afile): if afile == '/dev/null': return self.op == 'ADD' @@ -317,16 +332,16 @@ if line.startswith('diff --git'): m = gitre.match(line) if m: - if gp: + if gp and gp.reparse(): gitpatches.append(gp) dst = m.group(2) - gp = patchmeta(dst) + gp = patchmeta(dst, diffline=line) elif gp: if line.startswith('--- '): - gitpatches.append(gp) + if gp.reparse(): + gitpatches.append(gp) gp = None - continue - if line.startswith('rename from '): + elif line.startswith('rename from '): gp.op = 'RENAME' gp.oldpath = line[12:] elif line.startswith('rename to '): @@ -345,7 +360,11 @@ gp.setmode(int(line[-6:], 8)) elif line.startswith('GIT binary patch'): gp.binary = True - if gp: + if gp.reparse(): + gitpatches.append(gp) + gp = None + + if gp and gp.reparse(): gitpatches.append(gp) return gitpatches @@ -1189,6 +1208,11 @@ # our states BFILE = 1 + + scanninggit = False + diffline = None + gitop = None + context = None lr = linereader(fp) @@ -1196,6 +1220,51 @@ x = lr.readline() if not x: break + + if scanninggit: + # git format patch headers end at --- or GIT binary patch + if x.startswith('---') or x.startswith('GIT binary patch'): + scanninggit = False + + ok = True + if gitop != 'RENAME' and gitop != 'COPY': + # use the stricter regex + m = gitre_no_namechange.match(diffline) + if m: + afile = 'a/' + m.group(1) + bfile = 'b/' + m.group(1) + else: + ok = False + + if ok: + while gitpatches \ + and not gitpatches[-1].ispatching(afile, bfile): + gp = gitpatches.pop() + yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, + gp.copy()) + if not gitpatches: + raise PatchError(_('failed to synchronize metadata ' + 'for "%s"') + % afile[2:]) + gp = gitpatches[-1] + newfile = True + elif x.startswith('rename from '): + gitop = 'RENAME' + afile = 'a/' + x[12:].rstrip(' \r\n') + elif x.startswith('rename to '): + bfile = 'b/' + x[10:].rstrip(' \r\n') + elif x.startswith('copy from '): + gitop = 'COPY' + afile = 'a/' + x[10:].rstrip(' \r\n') + elif x.startswith('copy to '): + bfile = 'b/' + x[8:].rstrip(' \r\n') + + if newfile: + newfile = False + emitfile = True + state = BFILE + hunknum = 0 + if state == BFILE and ( (not context and x[0] == '@') or (context is not False and x.startswith('***************')) @@ -1216,25 +1285,26 @@ yield 'file', (afile, bfile, h, gp and gp.copy() or None) yield 'hunk', h elif x.startswith('diff --git'): - m = gitre.match(x.rstrip(' \r\n')) + xs = x.rstrip(' \r\n') + m = gitre.match(xs) if not m: continue + if gitpatches is None: # scan whole input for git metadata gitpatches = scangitpatch(lr, x) yield 'git', [g.copy() for g in gitpatches if g.op in ('COPY', 'RENAME')] gitpatches.reverse() + + # defer processing of git metadata until we know for certain + # the names of the files (we need to see the metadata for this + # patch before we really know what they are) + scanninggit = True + diffline = xs afile = 'a/' + m.group(1) bfile = 'b/' + m.group(2) - while gitpatches and not gitpatches[-1].ispatching(afile, bfile): - gp = gitpatches.pop() - yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp.copy()) - if not gitpatches: - raise PatchError(_('failed to synchronize metadata for "%s"') - % afile[2:]) - gp = gitpatches[-1] - newfile = True + gitop = 'MODIFY' elif x.startswith('---'): # check for a unified diff l2 = lr.readline() @@ -1261,11 +1331,6 @@ afile = parsefilename(x) bfile = parsefilename(l2) - if newfile: - newfile = False - emitfile = True - state = BFILE - hunknum = 0 while gitpatches: gp = gitpatches.pop() @@ -1818,10 +1883,18 @@ # set numbers to 0 anyway when starting new file adds, removes, isbinary = 0, 0, False if line.startswith('diff --git'): - filename = gitre.search(line).group(1) + m = gitre_no_namechange.search(line) + if m: + filename = m.group(1) + else: + filename = None elif line.startswith('diff -r'): # format: "diff -r ... -r ... filename" filename = diffre.search(line).group(1) + elif line.startswith('rename from '): + filename = line[12:].rstrip(' \r\n') + elif line.startswith('copy from '): + filename = line[10:].rstrip(' \r\n') elif line.startswith('+') and not line.startswith('+++ '): adds += 1 elif line.startswith('-') and not line.startswith('--- '): @@ -1829,6 +1902,7 @@ elif (line.startswith('GIT binary patch') or line.startswith('Binary file')): isbinary = True + addresult() return results