Patchwork D7733: hgext: initial version of fastexport extension

login
register
mail settings
Submitter phabricator
Date Dec. 27, 2019, 8:12 p.m.
Message ID <differential-rev-PHID-DREV-azzdo6yrrvi3ofgfy6i5-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/44062/
State New
Headers show

Comments

phabricator - Dec. 27, 2019, 8:12 p.m.
joerg.sonnenberger created this revision.
Herald added subscribers: mercurial-devel, mjpieters.
Herald added a reviewer: hg-reviewers.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7733

AFFECTED FILES
  hgext/fastexport.py
  tests/test-fastexport.t
  tests/test-help.t

CHANGE DETAILS




To: joerg.sonnenberger, #hg-reviewers
Cc: mjpieters, mercurial-devel
phabricator - Jan. 8, 2020, 7:30 p.m.
durin42 added a comment.
durin42 accepted this revision as: durin42.


  I only did a light pass, but looks good to me.
  
  Any plans for a fast-export importer as well?

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7733/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7733

To: joerg.sonnenberger, #hg-reviewers, durin42
Cc: durin42, mjpieters, mercurial-devel

Patch

diff --git a/tests/test-help.t b/tests/test-help.t
--- a/tests/test-help.t
+++ b/tests/test-help.t
@@ -364,6 +364,7 @@ 
        eol           automatically manage newlines in repository files
        extdiff       command to allow external programs to compare revisions
        factotum      http authentication with factotum
+       fastexport    export repositories as git fast-import stream
        githelp       try mapping git commands to Mercurial commands
        gpg           commands to sign and verify changesets
        hgk           browse the repository in a graphical way
diff --git a/tests/test-fastexport.t b/tests/test-fastexport.t
new file mode 100644
--- /dev/null
+++ b/tests/test-fastexport.t
@@ -0,0 +1,709 @@ 
+  $ cat >> $HGRCPATH << EOF
+  > [extensions]
+  > fastexport=
+  > EOF
+
+  $ hg init
+
+  $ hg debugbuilddag -mon '+2:tbase @name1 +3:thead1 <tbase @name2 +4:thead2 @both /thead1 +2:tmaintip'
+
+  $ hg up -r 10
+  13 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ hg rm nf10
+  $ hg commit -u debugbuilddag --date 'Thu Jan 01 00:00:12 1970 +0000' -m r12
+  created new head
+  $ hg up -r 11
+  4 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ hg merge -r 12
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  (branch merge, don't forget to commit)
+  $ hg commit -m debugbuilddag --date 'Thu Jan 01 00:00:13 1970 +0000'
+
+  $ hg log -G
+  @    changeset:   13:e5c379648af4
+  |\   branch:      both
+  | |  tag:         tip
+  | |  parent:      11:2cbd52c10e88
+  | |  parent:      12:4f31c9604af6
+  | |  user:        test
+  | |  date:        Thu Jan 01 00:00:13 1970 +0000
+  | |  summary:     debugbuilddag
+  | |
+  | o  changeset:   12:4f31c9604af6
+  | |  branch:      both
+  | |  parent:      10:9220596cb068
+  | |  user:        debugbuilddag
+  | |  date:        Thu Jan 01 00:00:12 1970 +0000
+  | |  summary:     r12
+  | |
+  o |  changeset:   11:2cbd52c10e88
+  |/   branch:      both
+  |    tag:         tmaintip
+  |    user:        debugbuilddag
+  |    date:        Thu Jan 01 00:00:11 1970 +0000
+  |    summary:     r11
+  |
+  o  changeset:   10:9220596cb068
+  |  branch:      both
+  |  user:        debugbuilddag
+  |  date:        Thu Jan 01 00:00:10 1970 +0000
+  |  summary:     r10
+  |
+  o    changeset:   9:0767d147d86e
+  |\   branch:      both
+  | |  parent:      8:0d0219415f18
+  | |  parent:      4:e8bc3a6ab9ae
+  | |  user:        debugbuilddag
+  | |  date:        Thu Jan 01 00:00:09 1970 +0000
+  | |  summary:     r9
+  | |
+  | o  changeset:   8:0d0219415f18
+  | |  branch:      name2
+  | |  tag:         thead2
+  | |  user:        debugbuilddag
+  | |  date:        Thu Jan 01 00:00:08 1970 +0000
+  | |  summary:     r8
+  | |
+  | o  changeset:   7:82c6c8b3ac68
+  | |  branch:      name2
+  | |  user:        debugbuilddag
+  | |  date:        Thu Jan 01 00:00:07 1970 +0000
+  | |  summary:     r7
+  | |
+  | o  changeset:   6:94093a13175f
+  | |  branch:      name2
+  | |  user:        debugbuilddag
+  | |  date:        Thu Jan 01 00:00:06 1970 +0000
+  | |  summary:     r6
+  | |
+  | o  changeset:   5:4baee2f72e9e
+  | |  branch:      name2
+  | |  parent:      1:bf4022f1addd
+  | |  user:        debugbuilddag
+  | |  date:        Thu Jan 01 00:00:05 1970 +0000
+  | |  summary:     r5
+  | |
+  o |  changeset:   4:e8bc3a6ab9ae
+  | |  branch:      name1
+  | |  tag:         thead1
+  | |  user:        debugbuilddag
+  | |  date:        Thu Jan 01 00:00:04 1970 +0000
+  | |  summary:     r4
+  | |
+  o |  changeset:   3:46148e496a8a
+  | |  branch:      name1
+  | |  user:        debugbuilddag
+  | |  date:        Thu Jan 01 00:00:03 1970 +0000
+  | |  summary:     r3
+  | |
+  o |  changeset:   2:29863c4219cd
+  |/   branch:      name1
+  |    user:        debugbuilddag
+  |    date:        Thu Jan 01 00:00:02 1970 +0000
+  |    summary:     r2
+  |
+  o  changeset:   1:bf4022f1addd
+  |  tag:         tbase
+  |  user:        debugbuilddag
+  |  date:        Thu Jan 01 00:00:01 1970 +0000
+  |  summary:     r1
+  |
+  o  changeset:   0:ae6ae30a671b
+     user:        debugbuilddag
+     date:        Thu Jan 01 00:00:00 1970 +0000
+     summary:     r0
+  
+
+  $ hg fastexport > fastexport.blob
+  $ cat fastexport.blob
+  blob
+  mark :1
+  data 65
+  0 r0
+  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+  10
+  11
+  12
+  13
+  14
+  15
+  16
+  17
+  18
+  19
+  20
+  21
+  22
+  23
+  
+  blob
+  mark :2
+  data 3
+  r0
+  
+  commit refs/heads/default
+  mark :3
+  committer "debugbuilddag" <debugbuilddag> 0 -0000
+  data 2
+  r0
+  M 644 :1 mf
+  M 644 :2 nf0
+  M 644 :2 of
+  
+  blob
+  mark :4
+  data 68
+  0 r0
+  1
+  2 r1
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+  10
+  11
+  12
+  13
+  14
+  15
+  16
+  17
+  18
+  19
+  20
+  21
+  22
+  23
+  
+  blob
+  mark :5
+  data 3
+  r1
+  
+  blob
+  mark :6
+  data 3
+  r1
+  
+  commit refs/heads/default
+  mark :7
+  committer "debugbuilddag" <debugbuilddag> 1 -0000
+  data 2
+  r1
+  from :3
+  M 644 :4 mf
+  M 644 :5 nf1
+  M 644 :6 of
+  
+  blob
+  mark :8
+  data 71
+  0 r0
+  1
+  2 r1
+  3
+  4 r2
+  5
+  6
+  7
+  8
+  9
+  10
+  11
+  12
+  13
+  14
+  15
+  16
+  17
+  18
+  19
+  20
+  21
+  22
+  23
+  
+  blob
+  mark :9
+  data 3
+  r2
+  
+  blob
+  mark :10
+  data 3
+  r2
+  
+  commit refs/heads/name1
+  mark :11
+  committer "debugbuilddag" <debugbuilddag> 2 -0000
+  data 2
+  r2
+  from :7
+  M 644 :8 mf
+  M 644 :9 nf2
+  M 644 :10 of
+  
+  blob
+  mark :12
+  data 74
+  0 r0
+  1
+  2 r1
+  3
+  4 r2
+  5
+  6 r3
+  7
+  8
+  9
+  10
+  11
+  12
+  13
+  14
+  15
+  16
+  17
+  18
+  19
+  20
+  21
+  22
+  23
+  
+  blob
+  mark :13
+  data 3
+  r3
+  
+  blob
+  mark :14
+  data 3
+  r3
+  
+  commit refs/heads/name1
+  mark :15
+  committer "debugbuilddag" <debugbuilddag> 3 -0000
+  data 2
+  r3
+  from :11
+  M 644 :12 mf
+  M 644 :13 nf3
+  M 644 :14 of
+  
+  blob
+  mark :16
+  data 77
+  0 r0
+  1
+  2 r1
+  3
+  4 r2
+  5
+  6 r3
+  7
+  8 r4
+  9
+  10
+  11
+  12
+  13
+  14
+  15
+  16
+  17
+  18
+  19
+  20
+  21
+  22
+  23
+  
+  blob
+  mark :17
+  data 3
+  r4
+  
+  blob
+  mark :18
+  data 3
+  r4
+  
+  commit refs/heads/name1
+  mark :19
+  committer "debugbuilddag" <debugbuilddag> 4 -0000
+  data 2
+  r4
+  from :15
+  M 644 :16 mf
+  M 644 :17 nf4
+  M 644 :18 of
+  
+  blob
+  mark :20
+  data 71
+  0 r0
+  1
+  2 r1
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+  10 r5
+  11
+  12
+  13
+  14
+  15
+  16
+  17
+  18
+  19
+  20
+  21
+  22
+  23
+  
+  blob
+  mark :21
+  data 3
+  r5
+  
+  blob
+  mark :22
+  data 3
+  r5
+  
+  commit refs/heads/name2
+  mark :23
+  committer "debugbuilddag" <debugbuilddag> 5 -0000
+  data 2
+  r5
+  from :7
+  M 644 :20 mf
+  M 644 :21 nf5
+  M 644 :22 of
+  
+  blob
+  mark :24
+  data 74
+  0 r0
+  1
+  2 r1
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+  10 r5
+  11
+  12 r6
+  13
+  14
+  15
+  16
+  17
+  18
+  19
+  20
+  21
+  22
+  23
+  
+  blob
+  mark :25
+  data 3
+  r6
+  
+  blob
+  mark :26
+  data 3
+  r6
+  
+  commit refs/heads/name2
+  mark :27
+  committer "debugbuilddag" <debugbuilddag> 6 -0000
+  data 2
+  r6
+  from :23
+  M 644 :24 mf
+  M 644 :25 nf6
+  M 644 :26 of
+  
+  blob
+  mark :28
+  data 77
+  0 r0
+  1
+  2 r1
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+  10 r5
+  11
+  12 r6
+  13
+  14 r7
+  15
+  16
+  17
+  18
+  19
+  20
+  21
+  22
+  23
+  
+  blob
+  mark :29
+  data 3
+  r7
+  
+  blob
+  mark :30
+  data 3
+  r7
+  
+  commit refs/heads/name2
+  mark :31
+  committer "debugbuilddag" <debugbuilddag> 7 -0000
+  data 2
+  r7
+  from :27
+  M 644 :28 mf
+  M 644 :29 nf7
+  M 644 :30 of
+  
+  blob
+  mark :32
+  data 80
+  0 r0
+  1
+  2 r1
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+  10 r5
+  11
+  12 r6
+  13
+  14 r7
+  15
+  16 r8
+  17
+  18
+  19
+  20
+  21
+  22
+  23
+  
+  blob
+  mark :33
+  data 3
+  r8
+  
+  blob
+  mark :34
+  data 3
+  r8
+  
+  commit refs/heads/name2
+  mark :35
+  committer "debugbuilddag" <debugbuilddag> 8 -0000
+  data 2
+  r8
+  from :31
+  M 644 :32 mf
+  M 644 :33 nf8
+  M 644 :34 of
+  
+  blob
+  mark :36
+  data 92
+  0 r0
+  1
+  2 r1
+  3
+  4 r2
+  5
+  6 r3
+  7
+  8 r4
+  9
+  10 r5
+  11
+  12 r6
+  13
+  14 r7
+  15
+  16 r8
+  17
+  18 r9
+  19
+  20
+  21
+  22
+  23
+  
+  blob
+  mark :37
+  data 3
+  r9
+  
+  blob
+  mark :38
+  data 3
+  r9
+  
+  commit refs/heads/both
+  mark :39
+  committer "debugbuilddag" <debugbuilddag> 9 -0000
+  data 2
+  r9
+  from :35
+  merge :19
+  M 644 :36 mf
+  M 644 :9 nf2
+  M 644 :13 nf3
+  M 644 :17 nf4
+  M 644 :37 nf9
+  M 644 :38 of
+  
+  blob
+  mark :40
+  data 96
+  0 r0
+  1
+  2 r1
+  3
+  4 r2
+  5
+  6 r3
+  7
+  8 r4
+  9
+  10 r5
+  11
+  12 r6
+  13
+  14 r7
+  15
+  16 r8
+  17
+  18 r9
+  19
+  20 r10
+  21
+  22
+  23
+  
+  blob
+  mark :41
+  data 4
+  r10
+  
+  blob
+  mark :42
+  data 4
+  r10
+  
+  commit refs/heads/both
+  mark :43
+  committer "debugbuilddag" <debugbuilddag> 10 -0000
+  data 3
+  r10
+  from :39
+  M 644 :40 mf
+  M 644 :41 nf10
+  M 644 :42 of
+  
+  blob
+  mark :44
+  data 100
+  0 r0
+  1
+  2 r1
+  3
+  4 r2
+  5
+  6 r3
+  7
+  8 r4
+  9
+  10 r5
+  11
+  12 r6
+  13
+  14 r7
+  15
+  16 r8
+  17
+  18 r9
+  19
+  20 r10
+  21
+  22 r11
+  23
+  
+  blob
+  mark :45
+  data 4
+  r11
+  
+  blob
+  mark :46
+  data 4
+  r11
+  
+  commit refs/heads/both
+  mark :47
+  committer "debugbuilddag" <debugbuilddag> 11 -0000
+  data 3
+  r11
+  from :43
+  M 644 :44 mf
+  M 644 :45 nf11
+  M 644 :46 of
+  
+  commit refs/heads/both
+  mark :48
+  committer "debugbuilddag" <debugbuilddag> 12 -0000
+  data 3
+  r12
+  from :43
+  D nf10
+  
+  commit refs/heads/both
+  mark :49
+  committer "test" <test> 13 -0000
+  data 13
+  debugbuilddag
+  from :47
+  merge :48
+  D nf10
+  
diff --git a/hgext/fastexport.py b/hgext/fastexport.py
new file mode 100644
--- /dev/null
+++ b/hgext/fastexport.py
@@ -0,0 +1,196 @@ 
+# Copyright 2019 Joerg Sonnenberger <joerg@bec.de>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+'''export repositories as git fast-import stream'''
+from __future__ import absolute_import
+import re
+
+from mercurial.i18n import _
+from mercurial.node import (
+    nullrev,
+)
+from mercurial.utils import (
+    stringutil,
+)
+from mercurial import (
+    cmdutil,
+    commands,
+    destutil,
+    error,
+    formatter,
+    graphmod,
+    logcmdutil,
+    phases,
+    pycompat,
+    registrar,
+    revset,
+    revsetlang,
+    scmutil,
+)
+from .convert import convcmd
+
+# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
+# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
+# be specifying the version(s) of Mercurial they are tested with, or
+# leave the attribute unspecified.
+testedwith = 'ships-with-hg-core'
+
+cmdtable = {}
+command = registrar.command(cmdtable)
+
+GIT_PERSON_PROHIBITED = re.compile('[<>\n"]')
+GIT_EMAIL_PROHIBITED = re.compile('[<> \n]')
+
+def convert_to_git_user(authormap, user, rev):
+    mapped_user = authormap.get(user, user)
+    user_person = stringutil.person(mapped_user)
+    user_email = stringutil.email(mapped_user)
+    if (GIT_EMAIL_PROHIBITED.match(user_email) or
+        GIT_PERSON_PROHIBITED.match(user_person)):
+        raise error.Abort(_('Unable to parse user into person and '
+                            'email for revision %s') % rev)
+    if user_person:
+        return '"%s" <%s>' % (user_person, user_email)
+    else:
+        return '<%s>' % user_email
+
+def convert_to_git_date(date):
+    timestamp, utcoff = date
+    tzsign = "+" if utcoff < 0 else "-"
+    if utcoff % 60 != 0:
+        raise error.Abort(_('UTC offset in %s is not an integer '
+                            'number of seconds') % (date,))
+    utcoff = abs(utcoff) // 60
+    tzh = utcoff // 60
+    tzmin = utcoff % 60
+    return '%d %s%02d%02d' % (int(timestamp), tzsign, tzh, tzmin)
+
+def convert_to_git_ref(branch):
+    # XXX filter/map depending on git restrictions
+    return 'refs/heads/%s' % branch
+
+def write_data(buf, data, skip_newline):
+    buf.append('data %d\n' % len(data))
+    buf.append(data)
+    if not skip_newline or data[-1:] != '\n':
+        buf.append('\n')
+
+def export_commit(ui, repo, rev, marks, authormap):
+    ctx = repo[rev]
+    revid = ctx.hex()
+    if revid in marks:
+        ui.warn(_('warning: revision %s already exported, skipped\n') % revid)
+        return
+    parents = [p for p in ctx.parents() if p.rev() != nullrev]
+    for p in parents:
+        if p.hex() not in marks:
+            ui.warn(_('warning: parent %s of %s has not been exported, '
+                      'skipped\n') % (p, revid))
+            return
+    for fname in ctx.files():
+        if fname not in ctx:
+            continue
+        filectx = ctx.filectx(fname)
+        filerev = filectx.filenode().encode('hex')
+        if filerev not in marks:
+            mark = len(marks) + 1
+            marks[filerev] = mark
+            data = filectx.data()
+            buf = ['blob\n', 'mark :%d\n' % mark]
+            write_data(buf, data, False)
+            ui.write(*buf, keepprogressbar=True)
+            del buf
+
+    mark = len(marks) + 1
+    marks[revid] = mark
+    user = convert_to_git_user(authormap, ctx.user(), revid)
+    date = convert_to_git_date(ctx.date())
+    ref = convert_to_git_ref(ctx.branch())
+    description = ctx.description()
+    buf = ['commit %s\n' % ref,
+           'mark :%d\n' % mark,
+           'committer %s %s\n' % (user, date),
+          ]
+    write_data(buf, description, True)
+    if parents:
+        buf.append('from :%d\n' % marks[parents[0].hex()])
+    if len(parents) == 2:
+        buf.append('merge :%d\n' % marks[parents[1].hex()])
+        p0ctx = repo[parents[0]]
+        files = ctx.manifest().diff(p0ctx.manifest())
+    else:
+        files = ctx.repo().changelog.readfiles(ctx.node())
+    filebuf = []
+    for fname in files:
+        if fname not in ctx:
+            filebuf.append((fname, 'D %s\n' % fname))
+        else:
+            filectx = ctx.filectx(fname)
+            filerev = filectx.filenode()
+            fileperm = "755" if filectx.isexec() else "644"
+            changed = 'M %s :%d %s\n' % (fileperm,
+                                         marks[filerev.encode('hex')],
+                                         fname)
+            filebuf.append((fname, changed))
+    filebuf.sort()
+    buf.extend(changed for (fname, changed) in filebuf)
+    del filebuf
+    buf.append('\n')
+    ui.write(*buf, keepprogressbar=True)
+    del buf
+
+isrev = re.compile('^[0-9a-f]{40}$')
+
+@command('fastexport', [
+    ('r', 'rev', [],
+     _('revisions to export'), _('REV')),
+    ('i', 'import-marks', '',
+     _('old marker file to read'), _('FILE')),
+    ('e', 'export-marks', '',
+     _('new marker file to write'), _('FILE')),
+    ('A', 'authormap', '',
+     _('remap usernames using this file'), _('FILE')),
+    ], _('[OPTION]... [REV]...'),
+    helpcategory=command.CATEGORY_IMPORT_EXPORT)
+def fastexport(ui, repo, *revs, **opts):
+    opts = pycompat.byteskwargs(opts)
+
+    revs += tuple(opts.get('rev', []))
+    if not revs:
+        revs = scmutil.revrange(repo, [':'])
+    else:
+        revs = scmutil.revrange(repo, revs)
+    if not revs:
+        raise error.Abort(_('no revisions matched'))
+    authorfile = opts.get('authormap')
+    if authorfile:
+        authormap = convcmd.readauthormap(ui, authorfile)
+    else:
+        authormap = {}
+
+    import_marks = opts.get('import_marks')
+    marks = {}
+    if import_marks:
+        with open(import_marks) as import_marks_file:
+            for line in import_marks_file:
+                line = line.strip()
+                if not isrev.match(line) or line in marks:
+                    raise error.Abort(_('Corrupted marker file'))
+                marks[line] = len(marks) + 1
+
+    revs.sort()
+    with ui.makeprogress(_('exporting'), unit=_('revisions'),
+                         total=len(revs)) as progress:
+        for rev in revs:
+            export_commit(ui, repo, rev, marks, authormap)
+            progress.increment()
+
+    export_marks = opts.get('export_marks')
+    if export_marks:
+        with open(export_marks, 'w') as export_marks_file:
+            output_marks = [None] * len(marks)
+            for k, v in marks.items():
+                output_marks[v - 1] = k
+            for k in output_marks:
+                export_marks_file.write(k + '\n')