Patchwork [2,of,2] util: drop statmtimesec

login
register
mail settings
Submitter Matt Mackall
Date Nov. 19, 2015, 8:53 p.m.
Message ID <641f529440ad6c9c5bf8.1447966408@ruin.waste.org>
Download mbox | patch
Permalink /patch/11531/
State Accepted
Commit 448cbdab58837dc1257a77cf3d7495db59ab964f
Delegated to: Yuya Nishihara
Headers show

Comments

Matt Mackall - Nov. 19, 2015, 8:53 p.m.
# HG changeset patch
# User Matt Mackall <mpm@selenic.com>
# Date 1447960517 21600
#      Thu Nov 19 13:15:17 2015 -0600
# Node ID 641f529440ad6c9c5bf82455b083f69c4dc1bc39
# Parent  94e44c088d5990336219b7d3ca7ea717dd68b823
util: drop statmtimesec

We've globablly forced stat to return integer times which agrees with
our extension code, so this is no longer needed.

This speeds up status on mozilla-central substantially:

$ hg perfstatus
! wall 0.190179 comb 0.180000 user 0.120000 sys 0.060000 (best of 53)
$ hg perfstatus
! wall 0.275729 comb 0.270000 user 0.210000 sys 0.060000 (best of 36)
Laurent Charignon - Nov. 19, 2015, 8:59 p.m.
That looks good to me, I will let Bryan, Augie or Pierre-Yves have another look.

Thanks,

Laurent

> On Nov 19, 2015, at 12:53 PM, Matt Mackall <mpm@selenic.com> wrote:
> 
> # HG changeset patch
> # User Matt Mackall <mpm@selenic.com>
> # Date 1447960517 21600
> #      Thu Nov 19 13:15:17 2015 -0600
> # Node ID 641f529440ad6c9c5bf82455b083f69c4dc1bc39
> # Parent  94e44c088d5990336219b7d3ca7ea717dd68b823
> util: drop statmtimesec
> 
> We've globablly forced stat to return integer times which agrees with
> our extension code, so this is no longer needed.
> 
> This speeds up status on mozilla-central substantially:
> 
> $ hg perfstatus
> ! wall 0.190179 comb 0.180000 user 0.120000 sys 0.060000 (best of 53)
> $ hg perfstatus
> ! wall 0.275729 comb 0.270000 user 0.210000 sys 0.060000 (best of 36)
> 
> diff -r 94e44c088d59 -r 641f529440ad mercurial/context.py
> --- a/mercurial/context.py	Thu Nov 19 13:21:24 2015 -0600
> +++ b/mercurial/context.py	Thu Nov 19 13:15:17 2015 -0600
> @@ -1705,7 +1705,7 @@
>     def date(self):
>         t, tz = self._changectx.date()
>         try:
> -            return (util.statmtimesec(self._repo.wvfs.lstat(self._path)), tz)
> +            return (self._repo.wvfs.lstat(self._path).st_mtime, tz)
>         except OSError as err:
>             if err.errno != errno.ENOENT:
>                 raise
> diff -r 94e44c088d59 -r 641f529440ad mercurial/dirstate.py
> --- a/mercurial/dirstate.py	Thu Nov 19 13:21:24 2015 -0600
> +++ b/mercurial/dirstate.py	Thu Nov 19 13:15:17 2015 -0600
> @@ -31,7 +31,7 @@
>     '''Get "now" timestamp on filesystem'''
>     tmpfd, tmpname = vfs.mkstemp()
>     try:
> -        return util.statmtimesec(os.fstat(tmpfd))
> +        return os.fstat(tmpfd).st_mtime
>     finally:
>         os.close(tmpfd)
>         vfs.unlink(tmpname)
> @@ -471,7 +471,7 @@
>     def normal(self, f):
>         '''Mark a file normal and clean.'''
>         s = os.lstat(self._join(f))
> -        mtime = util.statmtimesec(s)
> +        mtime = s.st_mtime
>         self._addpath(f, 'n', s.st_mode,
>                       s.st_size & _rangemask, mtime & _rangemask)
>         if f in self._copymap:
> @@ -704,7 +704,7 @@
>     def _writedirstate(self, st):
>         # use the modification time of the newly created temporary file as the
>         # filesystem's notion of 'now'
> -        now = util.statmtimesec(util.fstat(st)) & _rangemask
> +        now = util.fstat(st).st_mtime & _rangemask
>         st.write(parsers.pack_dirstate(self._map, self._copymap, self._pl, now))
>         st.close()
>         self._lastnormaltime = 0
> @@ -1078,16 +1078,15 @@
>             if not st and state in "nma":
>                 dadd(fn)
>             elif state == 'n':
> -                mtime = util.statmtimesec(st)
>                 if (size >= 0 and
>                     ((size != st.st_size and size != st.st_size & _rangemask)
>                      or ((mode ^ st.st_mode) & 0o100 and checkexec))
>                     or size == -2 # other parent
>                     or fn in copymap):
>                     madd(fn)
> -                elif time != mtime and time != mtime & _rangemask:
> +                elif time != st.st_mtime and time != st.st_mtime & _rangemask:
>                     ladd(fn)
> -                elif mtime == lastnormaltime:
> +                elif st.st_mtime == lastnormaltime:
>                     # fn may have just been marked as normal and it may have
>                     # changed in the same second without changing its size.
>                     # This can happen if we quickly do multiple commits.
> diff -r 94e44c088d59 -r 641f529440ad mercurial/util.py
> --- a/mercurial/util.py	Thu Nov 19 13:21:24 2015 -0600
> +++ b/mercurial/util.py	Thu Nov 19 13:15:17 2015 -0600
> @@ -19,7 +19,6 @@
> import errno, shutil, sys, tempfile, traceback
> import re as remod
> import os, time, datetime, calendar, textwrap, signal, collections
> -import stat
> import imp, socket, urllib
> import gc
> import bz2
> @@ -926,20 +925,6 @@
>     except AttributeError:
>         return os.stat(fp.name)
> 
> -def statmtimesec(st):
> -    """Get mtime as integer of seconds
> -
> -    'int(st.st_mtime)' cannot be used because st.st_mtime is computed as
> -    'sec + 1e-9 * nsec' and double-precision floating-point type is too narrow
> -    to represent nanoseconds. If 'nsec' is close to 1 sec, 'int(st.st_mtime)'
> -    can be 'sec + 1'. (issue4836)
> -    """
> -    try:
> -        return st[stat.ST_MTIME]
> -    except (TypeError, IndexError):
> -        # osutil.stat doesn't allow index access and its st_mtime is int
> -        return st.st_mtime
> -
> # File system features
> 
> def checkcase(path):
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> https://selenic.com/mailman/listinfo/mercurial-devel
Bryan O'Sullivan - Nov. 19, 2015, 9:14 p.m.
On Thu, Nov 19, 2015 at 3:59 PM, Laurent Charignon <lcharignon@fb.com>
wrote:

> That looks good to me, I will let Bryan, Augie or Pierre-Yves have another
> look.
>

Ship it!
Yuya Nishihara - Nov. 19, 2015, 11:04 p.m.
On Thu, 19 Nov 2015 16:14:48 -0500, Bryan O'Sullivan wrote:
> On Thu, Nov 19, 2015 at 3:59 PM, Laurent Charignon <lcharignon@fb.com>
> wrote:
> 
> > That looks good to me, I will let Bryan, Augie or Pierre-Yves have another
> > look.
> >
> 
> Ship it!

Doesn't this disable the nsec filecache accuracy?
I'll check it later.
Pierre-Yves David - Nov. 20, 2015, 7 a.m.
On 11/19/2015 03:04 PM, Yuya Nishihara wrote:
> On Thu, 19 Nov 2015 16:14:48 -0500, Bryan O'Sullivan wrote:
>> On Thu, Nov 19, 2015 at 3:59 PM, Laurent Charignon <lcharignon@fb.com>
>> wrote:
>>
>>> That looks good to me, I will let Bryan, Augie or Pierre-Yves have another
>>> look.
>>>
>>
>> Ship it!
>
> Doesn't this disable the nsec filecache accuracy?
> I'll check it later.

Can you elaborate on your concern here? I'll wait on your feedback to 
move forward with this patch.
Yuya Nishihara - Nov. 20, 2015, 12:38 p.m.
On Thu, 19 Nov 2015 23:00:00 -0800, Pierre-Yves David wrote:
> On 11/19/2015 03:04 PM, Yuya Nishihara wrote:
> > On Thu, 19 Nov 2015 16:14:48 -0500, Bryan O'Sullivan wrote:
> >> On Thu, Nov 19, 2015 at 3:59 PM, Laurent Charignon <lcharignon@fb.com>
> >> wrote:
> >>
> >>> That looks good to me, I will let Bryan, Augie or Pierre-Yves have another
> >>> look.
> >>>
> >>
> >> Ship it!
> >
> > Doesn't this disable the nsec filecache accuracy?
> > I'll check it later.
> 
> Can you elaborate on your concern here? I'll wait on your feedback to
> move forward with this patch.

Matt already pointed out it. (I didn't read the commit message this morning,
sorry.)

"This has some impact on our file cache validation code in that it lowers
timestamp resolution. But as we still have to deal with low-resolution
filesystems, we're not relying on this anyway."

Well, it's true our file cache has a problem on HFS+ or ext3, but float mtime
can avoid problems on modern filesystems now. And we haven't come up with an
alternative solution yet.

So, instead of disabling float timestamps globally, perhaps we can reimplement
osutil.stat() and lstat() and use them where integer timestamps are necessary.
I'll try it.
Pierre-Yves David - Nov. 22, 2015, 1:53 a.m.
On 11/20/2015 04:38 AM, Yuya Nishihara wrote:
> On Thu, 19 Nov 2015 23:00:00 -0800, Pierre-Yves David wrote:
>> On 11/19/2015 03:04 PM, Yuya Nishihara wrote:
>>> On Thu, 19 Nov 2015 16:14:48 -0500, Bryan O'Sullivan wrote:
>>>> On Thu, Nov 19, 2015 at 3:59 PM, Laurent Charignon <lcharignon@fb.com>
>>>> wrote:
>>>>
>>>>> That looks good to me, I will let Bryan, Augie or Pierre-Yves have another
>>>>> look.
>>>>>
>>>>
>>>> Ship it!
>>>
>>> Doesn't this disable the nsec filecache accuracy?
>>> I'll check it later.
>>
>> Can you elaborate on your concern here? I'll wait on your feedback to
>> move forward with this patch.
>
> Matt already pointed out it. (I didn't read the commit message this morning,
> sorry.)
>
> "This has some impact on our file cache validation code in that it lowers
> timestamp resolution. But as we still have to deal with low-resolution
> filesystems, we're not relying on this anyway."
>
> Well, it's true our file cache has a problem on HFS+ or ext3, but float mtime
> can avoid problems on modern filesystems now. And we haven't come up with an
> alternative solution yet.
>
> So, instead of disabling float timestamps globally, perhaps we can reimplement
> osutil.stat() and lstat() and use them where integer timestamps are necessary.
> I'll try it.

I'm leaning toward yuya opinion here. The Brian approach is simple and 
effective. The only issue I'm aware of is its compatibility with older 
version of the binary. I think it would be easy to work around that 
limitation.

The main reservation I've with the Matt series is that it regress 
something that is suboptimal but we do not have a better alternative 
ready anyway. It also change some python global state which is somewhat 
scary.
Yuya Nishihara - Nov. 22, 2015, 3:56 p.m.
On Sat, 21 Nov 2015 17:53:01 -0800, Pierre-Yves David wrote:
> On 11/20/2015 04:38 AM, Yuya Nishihara wrote:
> > Matt already pointed out it. (I didn't read the commit message this morning,
> > sorry.)
> >
> > "This has some impact on our file cache validation code in that it lowers
> > timestamp resolution. But as we still have to deal with low-resolution
> > filesystems, we're not relying on this anyway."
> >
> > Well, it's true our file cache has a problem on HFS+ or ext3, but float mtime
> > can avoid problems on modern filesystems now. And we haven't come up with an
> > alternative solution yet.
> >
> > So, instead of disabling float timestamps globally, perhaps we can reimplement
> > osutil.stat() and lstat() and use them where integer timestamps are necessary.
> > I'll try it.
> 
> I'm leaning toward yuya opinion here. The Brian approach is simple and 
> effective. The only issue I'm aware of is its compatibility with older 
> version of the binary. I think it would be easy to work around that 
> limitation.

The other issue is that third-party extensions (read hgwatchman) are likely
to fake a stat_result object by namedtuple or similar type. It has different
indexes than stat_result.

Another workaround is to wrap a stat_result to make .st_mtime return an
integer timestamp. Assuming that we use osutil.listdir(stat=True) in hot loops,
this won't cause notable performance degradation.

  st = intstatresult(os.stat(f))
  st.st_mtime # => int

> The main reservation I've with the Matt series is that it regress 
> something that is suboptimal but we do not have a better alternative 
> ready anyway. It also change some python global state which is somewhat 
> scary.

Patch

diff -r 94e44c088d59 -r 641f529440ad mercurial/context.py
--- a/mercurial/context.py	Thu Nov 19 13:21:24 2015 -0600
+++ b/mercurial/context.py	Thu Nov 19 13:15:17 2015 -0600
@@ -1705,7 +1705,7 @@ 
     def date(self):
         t, tz = self._changectx.date()
         try:
-            return (util.statmtimesec(self._repo.wvfs.lstat(self._path)), tz)
+            return (self._repo.wvfs.lstat(self._path).st_mtime, tz)
         except OSError as err:
             if err.errno != errno.ENOENT:
                 raise
diff -r 94e44c088d59 -r 641f529440ad mercurial/dirstate.py
--- a/mercurial/dirstate.py	Thu Nov 19 13:21:24 2015 -0600
+++ b/mercurial/dirstate.py	Thu Nov 19 13:15:17 2015 -0600
@@ -31,7 +31,7 @@ 
     '''Get "now" timestamp on filesystem'''
     tmpfd, tmpname = vfs.mkstemp()
     try:
-        return util.statmtimesec(os.fstat(tmpfd))
+        return os.fstat(tmpfd).st_mtime
     finally:
         os.close(tmpfd)
         vfs.unlink(tmpname)
@@ -471,7 +471,7 @@ 
     def normal(self, f):
         '''Mark a file normal and clean.'''
         s = os.lstat(self._join(f))
-        mtime = util.statmtimesec(s)
+        mtime = s.st_mtime
         self._addpath(f, 'n', s.st_mode,
                       s.st_size & _rangemask, mtime & _rangemask)
         if f in self._copymap:
@@ -704,7 +704,7 @@ 
     def _writedirstate(self, st):
         # use the modification time of the newly created temporary file as the
         # filesystem's notion of 'now'
-        now = util.statmtimesec(util.fstat(st)) & _rangemask
+        now = util.fstat(st).st_mtime & _rangemask
         st.write(parsers.pack_dirstate(self._map, self._copymap, self._pl, now))
         st.close()
         self._lastnormaltime = 0
@@ -1078,16 +1078,15 @@ 
             if not st and state in "nma":
                 dadd(fn)
             elif state == 'n':
-                mtime = util.statmtimesec(st)
                 if (size >= 0 and
                     ((size != st.st_size and size != st.st_size & _rangemask)
                      or ((mode ^ st.st_mode) & 0o100 and checkexec))
                     or size == -2 # other parent
                     or fn in copymap):
                     madd(fn)
-                elif time != mtime and time != mtime & _rangemask:
+                elif time != st.st_mtime and time != st.st_mtime & _rangemask:
                     ladd(fn)
-                elif mtime == lastnormaltime:
+                elif st.st_mtime == lastnormaltime:
                     # fn may have just been marked as normal and it may have
                     # changed in the same second without changing its size.
                     # This can happen if we quickly do multiple commits.
diff -r 94e44c088d59 -r 641f529440ad mercurial/util.py
--- a/mercurial/util.py	Thu Nov 19 13:21:24 2015 -0600
+++ b/mercurial/util.py	Thu Nov 19 13:15:17 2015 -0600
@@ -19,7 +19,6 @@ 
 import errno, shutil, sys, tempfile, traceback
 import re as remod
 import os, time, datetime, calendar, textwrap, signal, collections
-import stat
 import imp, socket, urllib
 import gc
 import bz2
@@ -926,20 +925,6 @@ 
     except AttributeError:
         return os.stat(fp.name)
 
-def statmtimesec(st):
-    """Get mtime as integer of seconds
-
-    'int(st.st_mtime)' cannot be used because st.st_mtime is computed as
-    'sec + 1e-9 * nsec' and double-precision floating-point type is too narrow
-    to represent nanoseconds. If 'nsec' is close to 1 sec, 'int(st.st_mtime)'
-    can be 'sec + 1'. (issue4836)
-    """
-    try:
-        return st[stat.ST_MTIME]
-    except (TypeError, IndexError):
-        # osutil.stat doesn't allow index access and its st_mtime is int
-        return st.st_mtime
-
 # File system features
 
 def checkcase(path):