Submitter | Augie Fackler |
---|---|
Date | Oct. 17, 2014, 10:21 p.m. |
Message ID | <b5ad46ae0e6a49525fb9.1413584484@130.17.16.172.in-addr.arpa> |
Download | mbox | patch |
Permalink | /patch/6394/ |
State | Accepted |
Headers | show |
Comments
Perhaps worth adding in-flight: perflog before: ! wall 8.229293 comb 8.210000 user 8.170000 sys 0.040000 (best of 3) perflog after: ! wall 7.663732 comb 7.640000 user 7.600000 sys 0.040000 (best of 3) On Oct 17, 2014, at 6:21 PM, Augie Fackler <raf@durin42.com> wrote: > # HG changeset patch > # User Augie Fackler <raf@durin42.com> > # Date 1413568330 14400 > # Fri Oct 17 13:52:10 2014 -0400 > # Node ID b5ad46ae0e6a49525fb9d8268a61c517a271763e > # Parent 149fc8a44184fbeddd9d9602e3860455e92c1301 > i18n: cache the result of every gettext call > > In looking at profiler output for 'hg log' on mozilla-central, I > noticed we spent a _huge_ amount of time in gettext relative to what > it's doing. Caching provides a roughly 15% performance improvement > even on repositories as small as hg. > > == hg repo on linux == > > Before: > % cumulative self > time seconds seconds name > 5.05 0.19 0.19 i18n.py:62:gettext > 4.84 0.18 0.18 revlog.py:88:decompress > 2.95 0.17 0.11 changelog.py:201:node > 2.32 0.09 0.09 ui.py:577:write > 2.11 0.08 0.08 i18n.py:72:gettext > 2.11 0.08 0.08 obsolete.py:196:_fm0readmarkers > 1.89 0.07 0.07 obsolete.py:569:_load > 1.68 0.63 0.06 localrepo.py:29:__get__ > > real 0m4.026s > user 0m3.993s > sys 0m0.034s > > After: > % cumulative self > time seconds seconds name > 8.05 0.26 0.26 revlog.py:88:decompress > 2.68 0.22 0.09 color.py:395:write > 2.20 0.07 0.07 obsolete.py:196:_fm0readmarkers > 1.95 0.06 0.06 obsolete.py:174:_fm0readmarkers > 1.95 0.06 0.06 ui.py:577:write > 1.95 0.06 0.06 util.py:1228:datestr > 1.71 0.06 0.06 utf_8.py:16:decode > 1.71 0.06 0.06 revlog.py:273:__len__ > > real 0m3.519s > user 0m3.447s > sys 0m0.073s > > > == mozilla-central repo on linux == > Before: > % cumulative self > time seconds seconds name > 7.72 2.35 2.35 revlog.py:88:decompress > 4.46 1.36 1.36 i18n.py:62:gettext > 2.22 0.67 0.67 i18n.py:72:gettext > 2.19 1.14 0.67 changelog.py:201:node > 2.16 0.66 0.66 ui.py:577:write > 1.96 0.60 0.60 utf_8.py:16:decode > 1.93 1.97 0.59 color.py:395:write > 1.85 0.81 0.56 changelog.py:136:tip > > real 0m30.822s > user 0m30.660s > sys 0m0.149s > > After: > % cumulative self > time seconds seconds name > 9.82 2.49 2.49 revlog.py:88:decompress > 2.67 1.31 0.68 localrepo.py:29:__get__ > 2.57 0.65 0.65 utf_8.py:16:decode > 2.48 1.01 0.63 changelog.py:201:node > 2.10 0.82 0.53 changelog.py:136:tip > 2.01 0.51 0.51 ui.py:577:write > 1.91 0.49 0.49 util.py:1232:datestr > 1.85 1.65 0.47 color.py:395:write > > real 0m25.619s > user 0m25.446s > sys 0m0.166s > > == cpython repo on os x = > Before: > % cumulative self > time seconds seconds name > 5.05 1.35 1.35 cmdutil.py:982:_show > 4.59 1.22 1.22 revlog.py:274:__len__ > 3.98 1.06 1.06 i18n.py:62:gettext > 3.91 1.04 1.04 revlog.py:1016:revision > 3.68 0.98 0.98 revlog.py:337:parents > 3.45 0.92 0.92 revlog.py:88:decompress > 2.91 0.78 0.78 revlog.py:309:rev > 2.62 0.70 0.70 revlog.py:1033:revision > > real 0m30.414s > user 0m28.145s > sys 0m0.541s > > > > After: > % cumulative self > time seconds seconds name > 7.98 1.66 1.66 cmdutil.py:982:_show > 6.83 1.42 1.42 changelog.py:46:decodeextra > 5.18 1.08 1.08 revlog.py:274:__len__ > 3.94 0.82 0.82 revlog.py:1016:revision > 3.41 0.71 0.71 revlog.py:309:rev > 3.32 0.69 0.69 revlog.py:88:decompress > 2.99 0.63 0.62 revlog.py:1033:revision > 2.69 0.56 0.56 revlog.py:341:start > > real 0m22.811s > user 0m21.883s > sys 0m0.397s > > diff --git a/mercurial/i18n.py b/mercurial/i18n.py > --- a/mercurial/i18n.py > +++ b/mercurial/i18n.py > @@ -41,6 +41,16 @@ def setdatapath(datapath): > global _ugettext > _ugettext = t.ugettext > > +def _cachegettext(fn): > + cache = {} > + def inner(message): > + if message not in cache: > + cache[message] = fn(message) > + return cache[message] > + return inner > + > +_msgcache = {} > + > def gettext(message): > """Translate message. > > @@ -55,24 +65,26 @@ def gettext(message): > if message is None or not _ugettext: > return message > > - if type(message) is unicode: > - # goofy unicode docstrings in test > - paragraphs = message.split(u'\n\n') > - else: > - paragraphs = [p.decode("ascii") for p in message.split('\n\n')] > - # Be careful not to translate the empty string -- it holds the > - # meta data of the .po file. > - u = u'\n\n'.join([p and _ugettext(p) or '' for p in paragraphs]) > - try: > - # encoding.tolocal cannot be used since it will first try to > - # decode the Unicode string. Calling u.decode(enc) really > - # means u.encode(sys.getdefaultencoding()).decode(enc). Since > - # the Python encoding defaults to 'ascii', this fails if the > - # translated string use non-ASCII characters. > - return u.encode(encoding.encoding, "replace") > - except LookupError: > - # An unknown encoding results in a LookupError. > - return message > + if message not in _msgcache: > + if type(message) is unicode: > + # goofy unicode docstrings in test > + paragraphs = message.split(u'\n\n') > + else: > + paragraphs = [p.decode("ascii") for p in message.split('\n\n')] > + # Be careful not to translate the empty string -- it holds the > + # meta data of the .po file. > + u = u'\n\n'.join([p and _ugettext(p) or '' for p in paragraphs]) > + try: > + # encoding.tolocal cannot be used since it will first try to > + # decode the Unicode string. Calling u.decode(enc) really > + # means u.encode(sys.getdefaultencoding()).decode(enc). Since > + # the Python encoding defaults to 'ascii', this fails if the > + # translated string use non-ASCII characters. > + _msgcache[message] = u.encode(encoding.encoding, "replace") > + except LookupError: > + # An unknown encoding results in a LookupError. > + _msgcache[message] = message > + return _msgcache[message] > > def _plain(): > if 'HGPLAIN' not in os.environ and 'HGPLAINEXCEPT' not in os.environ: > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel
On Fri, 2014-10-17 at 18:21 -0400, Augie Fackler wrote: > # HG changeset patch > # User Augie Fackler <raf@durin42.com> > # Date 1413568330 14400 > # Fri Oct 17 13:52:10 2014 -0400 > # Node ID b5ad46ae0e6a49525fb9d8268a61c517a271763e > # Parent 149fc8a44184fbeddd9d9602e3860455e92c1301 > i18n: cache the result of every gettext call Queued for default.. > +def _cachegettext(fn): > + cache = {} > + def inner(message): > + if message not in cache: > + cache[message] = fn(message) > + return cache[message] > + return inner ...without the dead code.
On Oct 17, 2014, at 6:30 PM, Matt Mackall <mpm@selenic.com> wrote: > On Fri, 2014-10-17 at 18:21 -0400, Augie Fackler wrote: >> # HG changeset patch >> # User Augie Fackler <raf@durin42.com> >> # Date 1413568330 14400 >> # Fri Oct 17 13:52:10 2014 -0400 >> # Node ID b5ad46ae0e6a49525fb9d8268a61c517a271763e >> # Parent 149fc8a44184fbeddd9d9602e3860455e92c1301 >> i18n: cache the result of every gettext call > > Queued for default.. > >> +def _cachegettext(fn): >> + cache = {} >> + def inner(message): >> + if message not in cache: >> + cache[message] = fn(message) >> + return cache[message] >> + return inner > > ...without the dead code. deeerp. Sorry about that. > > -- > Mathematics is the supreme nostalgia of our time. > >
On 10/17/2014 03:30 PM, Matt Mackall wrote: > On Fri, 2014-10-17 at 18:21 -0400, Augie Fackler wrote: >> # HG changeset patch >> # User Augie Fackler <raf@durin42.com> >> # Date 1413568330 14400 >> # Fri Oct 17 13:52:10 2014 -0400 >> # Node ID b5ad46ae0e6a49525fb9d8268a61c517a271763e >> # Parent 149fc8a44184fbeddd9d9602e3860455e92c1301 >> i18n: cache the result of every gettext call > > Queued for default.. Note: I'm to repro the speedup but this is not super obvious so far. mostly move my mozilla-central log from 31s to 28s. This is a speedup. But not as big as the advertised one.
Patch
diff --git a/mercurial/i18n.py b/mercurial/i18n.py --- a/mercurial/i18n.py +++ b/mercurial/i18n.py @@ -41,6 +41,16 @@ def setdatapath(datapath): global _ugettext _ugettext = t.ugettext +def _cachegettext(fn): + cache = {} + def inner(message): + if message not in cache: + cache[message] = fn(message) + return cache[message] + return inner + +_msgcache = {} + def gettext(message): """Translate message. @@ -55,24 +65,26 @@ def gettext(message): if message is None or not _ugettext: return message - if type(message) is unicode: - # goofy unicode docstrings in test - paragraphs = message.split(u'\n\n') - else: - paragraphs = [p.decode("ascii") for p in message.split('\n\n')] - # Be careful not to translate the empty string -- it holds the - # meta data of the .po file. - u = u'\n\n'.join([p and _ugettext(p) or '' for p in paragraphs]) - try: - # encoding.tolocal cannot be used since it will first try to - # decode the Unicode string. Calling u.decode(enc) really - # means u.encode(sys.getdefaultencoding()).decode(enc). Since - # the Python encoding defaults to 'ascii', this fails if the - # translated string use non-ASCII characters. - return u.encode(encoding.encoding, "replace") - except LookupError: - # An unknown encoding results in a LookupError. - return message + if message not in _msgcache: + if type(message) is unicode: + # goofy unicode docstrings in test + paragraphs = message.split(u'\n\n') + else: + paragraphs = [p.decode("ascii") for p in message.split('\n\n')] + # Be careful not to translate the empty string -- it holds the + # meta data of the .po file. + u = u'\n\n'.join([p and _ugettext(p) or '' for p in paragraphs]) + try: + # encoding.tolocal cannot be used since it will first try to + # decode the Unicode string. Calling u.decode(enc) really + # means u.encode(sys.getdefaultencoding()).decode(enc). Since + # the Python encoding defaults to 'ascii', this fails if the + # translated string use non-ASCII characters. + _msgcache[message] = u.encode(encoding.encoding, "replace") + except LookupError: + # An unknown encoding results in a LookupError. + _msgcache[message] = message + return _msgcache[message] def _plain(): if 'HGPLAIN' not in os.environ and 'HGPLAINEXCEPT' not in os.environ: