Patchwork [1,of,1] Decode UTF-8 e-mail headers

login
register
mail settings
Submitter funman@videolan.org
Date Nov. 12, 2013, 10:13 a.m.
Message ID <fb293d62a1e6e75a640a.1384251198@fat>
Download mbox | patch
Permalink /patch/2932/
State Superseded, archived
Headers show

Comments

funman@videolan.org - Nov. 12, 2013, 10:13 a.m.

Patch

# HG changeset patch
# User Rafaël Carré <funman@videolan.org>
# Date 1384251185 -3600
#      Tue Nov 12 11:13:05 2013 +0100
# Branch stable
# Node ID fb293d62a1e6e75a640a15006e014dbf2ffb96e3
# Parent  ba6486076429e5c20d910b8a5d4f8acf1e9dc1b1
Decode UTF-8 e-mail headers

diff -r ba6486076429 -r fb293d62a1e6 mercurial/patch.py
--- a/mercurial/patch.py	Wed Nov 06 10:20:18 2013 -0800
+++ b/mercurial/patch.py	Tue Nov 12 11:13:05 2013 +0100
@@ -12,6 +12,7 @@ 
 # load. This was not a problem on Python 2.7.
 import email.Generator
 import email.Parser
+import email.header
 
 from i18n import _
 from node import hex, short
@@ -162,6 +163,29 @@ 
     Any item in the returned tuple can be None. If filename is None,
     fileobj did not contain a patch. Caller must unlink filename when done.'''
 
+    def email_decode(h):
+        '''Decode ?=UTF-8? from e-mail headers.'''
+        if h is None:
+            return None
+        res = ''
+        pairs = email.header.decode_header(h)
+        if pairs is None:
+            return None
+        n = len(pairs)
+        pair = 0
+        for p in pairs:
+            pair += 1
+            if p[1] == 'utf-8':
+                res += p[0]
+                if pair < n:
+                    res += ' '
+            elif p[1] is None:
+                res += p[0]
+                if pair < n:
+                    res += ' '
+
+        return encoding.tolocal(res)
+
     # attempt to detect the start of a patch
     # (this heuristic is borrowed from quilt)
     diffre = re.compile(r'^(?:Index:[ \t]|diff[ \t]|RCS file: |'
@@ -174,8 +198,8 @@ 
     try:
         msg = email.Parser.Parser().parse(fileobj)
 
-        subject = msg['Subject']
-        user = msg['From']
+        subject = email_decode(msg['Subject'])
+        user = email_decode(msg['From'])
         if not subject and not user:
             # Not an email, restore parsed headers if any
             subject = '\n'.join(': '.join(h) for h in msg.items()) + '\n'
diff -r ba6486076429 -r fb293d62a1e6 tests/test-import.t
--- a/tests/test-import.t	Wed Nov 06 10:20:18 2013 -0800
+++ b/tests/test-import.t	Tue Nov 12 11:13:05 2013 +0100
@@ -1156,3 +1156,19 @@ 
   line
 
   $ cd ..
+
+  $ cat > utf8.patch <<EOF
+  > From: =?UTF-8?q?=C3=AB?=
+  > Subject: patch
+  > diff --git /dev/null b/a
+  > --- /dev/null
+  > +++ b/a
+  > @@ -0,0 +1,1 @@
+  > +a
+  > EOF
+  $ hg init utf
+  $ cd utf
+  $ hg import ../utf8.patch
+  applying ../utf8.patch
+  $ hg log | grep ^user -
+  user:        ë