Patchwork [V3] util: move checknlink away from the dependency of a hard-coded filename

login
register
mail settings
Submitter Tony Tung
Date Aug. 25, 2016, 6:11 p.m.
Message ID <625f3bb4833b47c4d357.1472148694@andromeda.local>
Download mbox | patch
Permalink /patch/16421/
State Accepted
Headers show

Comments

Tony Tung - Aug. 25, 2016, 6:11 p.m.
# HG changeset patch
# User Tony Tung <tonytung@merly.org>
# Date 1472148645 25200
#      Thu Aug 25 11:10:45 2016 -0700
# Node ID 625f3bb4833b47c4d35754fced91f37b077f7866
# Parent  2efc5a05d80a6d4253767f2ce0c2fb062ba83cb6
util: move checknlink away from the dependency of a hard-coded filename

This somewhat insulates us against bugs in checknlink when a stale file
left behind could result in checknlink always returning False.
Yuya Nishihara - Aug. 26, 2016, 3:31 p.m.
On Thu, 25 Aug 2016 11:11:34 -0700, ttung@fb.com wrote:
> # HG changeset patch
> # User Tony Tung <tonytung@merly.org>
> # Date 1472148645 25200
> #      Thu Aug 25 11:10:45 2016 -0700
> # Node ID 625f3bb4833b47c4d35754fced91f37b077f7866
> # Parent  2efc5a05d80a6d4253767f2ce0c2fb062ba83cb6
> util: move checknlink away from the dependency of a hard-coded filename

Queued this, thanks.
Adrian Buehlmann - Aug. 26, 2016, 5:10 p.m.
On 2016-08-25 20:11, ttung@fb.com wrote:
> # HG changeset patch
> # User Tony Tung <tonytung@merly.org>
> # Date 1472148645 25200
> #      Thu Aug 25 11:10:45 2016 -0700
> # Node ID 625f3bb4833b47c4d35754fced91f37b077f7866
> # Parent  2efc5a05d80a6d4253767f2ce0c2fb062ba83cb6
> util: move checknlink away from the dependency of a hard-coded filename
> 
> This somewhat insulates us against bugs in checknlink when a stale file
> left behind could result in checknlink always returning False.
> 
> diff --git a/mercurial/util.py b/mercurial/util.py
> --- a/mercurial/util.py
> +++ b/mercurial/util.py
> @@ -1305,38 +1305,53 @@
>  def checknlink(testfile):
>      '''check whether hardlink count reporting works properly'''
>  
> -    # testfile may be open, so we need a separate file for checking to
> -    # work around issue2543 (or testfile may get lost on Samba shares)
> -    f1 = testfile + ".hgtmp1"
> -    if os.path.lexists(f1):
> -        return False
> +    f1 = {}
> +    f2 = {}
> +
>      try:
> -        posixfile(f1, 'w').close()
> -    except IOError:
> -        try:
> -            os.unlink(f1)
> -        except OSError:
> -            pass
> -        return False
> -
> -    f2 = testfile + ".hgtmp2"
> -    fd = None
> -    try:
> -        oslink(f1, f2)
> +        dirpath, filename = os.path.split(testfile)
> +
> +        # testfile may be open, so we need a separate file for checking to
> +        # work around issue2543 (or testfile may get lost on Samba shares)
> +        f1['fd'], f1['path'] = tempfile.mkstemp(prefix=filename, dir=dirpath)
> +        os.close(f1['fd'])
> +        del f1['fd']
> +
> +        f2['fd'], f2['path'] = tempfile.mkstemp(prefix=filename, dir=dirpath)
> +        os.close(f2['fd'])
> +        del f2['fd']
> +
> +        # there's a small race condition that another file can jam itself in
> +        # between the time we remove f2 and the time we create the hard link.
> +        # in the unlikely scenario that happens, we'll treat it as nlink being
> +        # unreliable.
> +        os.unlink(f2['path'])
> +        oslink(f1['path'], f2['path'])
>          # nlinks() may behave differently for files on Windows shares if
>          # the file is open.
> -        fd = posixfile(f2)
> -        return nlinks(f2) > 1
> -    except OSError:
> +        f2['handle'] = posixfile(f2['path'])
> +        return nlinks(f2['path']) > 1
> +    except EnvironmentError:
>          return False
>      finally:
> -        if fd is not None:
> -            fd.close()
> -        for f in (f1, f2):
> -            try:
> -                os.unlink(f)
> -            except OSError:
> -                pass
> +        for fi in (f1, f2):
> +            if 'fd' in fi:
> +                try:
> +                    os.close(fi['fd'])
> +                except EnvironmentError:
> +                    pass
> +
> +            if 'handle' in fi:
> +                try:
> +                    fi['handle'].close()
> +                except EnvironmentError:
> +                    pass
> +
> +            if 'path' in fi:
> +                try:
> +                    os.unlink(fi['path'])
> +                except EnvironmentError:
> +                    pass

If I'm not mistaken, this will try to unlink file f1 while hardlinked
alias name f2 still has an open handle (opened using posixfile).

This may not be a problem with posixfile, but Windows can have problems,
if a hardlinked file is deleted while it is still in open() state under
an alias name (see [1]).

I would thus recommend closing all files before unlinking.

I'd additionally prefer doing cleanup things in reverse order, i.e.
close 'handle' on f2 first.

So, I think I'd prefer doing the cleanup this way:

        # close files
        for fi in (f2, f1): # intentionally in reverse order
            if 'handle' in fi:
                try:
                    fi['handle'].close()
                except EnvironmentError:
                    pass
            if 'fd' in fi:
                try:
                    os.close(fi['fd'])
                except EnvironmentError:
                    pass

        # unlink files
        for fi in (f2, f1):  # intentionally in reverse order
            if 'path' in fi:
                try:
                    os.unlink(fi['path'])
                except EnvironmentError:
                    pass

>  
>  def endswithsep(path):
>      '''Check path ends with os.sep or os.altsep.'''

[1] https://www.mercurial-scm.org/wiki/UnlinkingFilesOnWindows
Yuya Nishihara - Aug. 27, 2016, 1:29 p.m.
On Fri, 26 Aug 2016 19:10:16 +0200, Adrian Buehlmann wrote:
> On 2016-08-25 20:11, ttung@fb.com wrote:
> > # HG changeset patch
> > # User Tony Tung <tonytung@merly.org>
> > # Date 1472148645 25200
> > #      Thu Aug 25 11:10:45 2016 -0700
> > # Node ID 625f3bb4833b47c4d35754fced91f37b077f7866
> > # Parent  2efc5a05d80a6d4253767f2ce0c2fb062ba83cb6
> > util: move checknlink away from the dependency of a hard-coded filename

> If I'm not mistaken, this will try to unlink file f1 while hardlinked
> alias name f2 still has an open handle (opened using posixfile).
> 
> This may not be a problem with posixfile, but Windows can have problems,
> if a hardlinked file is deleted while it is still in open() state under
> an alias name (see [1]).

Ah, good catch.

Tony, I'll drop this from the committed repo, so please make sure you have
a copy of this revision before pulling from there.

> I would thus recommend closing all files before unlinking.
> 
> I'd additionally prefer doing cleanup things in reverse order, i.e.
> close 'handle' on f2 first.
> 
> So, I think I'd prefer doing the cleanup this way:
> 
>         # close files
>         for fi in (f2, f1): # intentionally in reverse order
>             if 'handle' in fi:
>                 try:
>                     fi['handle'].close()
>                 except EnvironmentError:
>                     pass
>             if 'fd' in fi:
>                 try:
>                     os.close(fi['fd'])
>                 except EnvironmentError:
>                     pass
> 
>         # unlink files
>         for fi in (f2, f1):  # intentionally in reverse order
>             if 'path' in fi:
>                 try:
>                     os.unlink(fi['path'])
>                 except EnvironmentError:
>                     pass

Perhaps we were trying to be too smart. Since raw fds are closed immediately,
what we need in the finally block is just to close f2 object, then unlink
f1 and f2.

Patch

diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -1305,38 +1305,53 @@ 
 def checknlink(testfile):
     '''check whether hardlink count reporting works properly'''
 
-    # testfile may be open, so we need a separate file for checking to
-    # work around issue2543 (or testfile may get lost on Samba shares)
-    f1 = testfile + ".hgtmp1"
-    if os.path.lexists(f1):
-        return False
+    f1 = {}
+    f2 = {}
+
     try:
-        posixfile(f1, 'w').close()
-    except IOError:
-        try:
-            os.unlink(f1)
-        except OSError:
-            pass
-        return False
-
-    f2 = testfile + ".hgtmp2"
-    fd = None
-    try:
-        oslink(f1, f2)
+        dirpath, filename = os.path.split(testfile)
+
+        # testfile may be open, so we need a separate file for checking to
+        # work around issue2543 (or testfile may get lost on Samba shares)
+        f1['fd'], f1['path'] = tempfile.mkstemp(prefix=filename, dir=dirpath)
+        os.close(f1['fd'])
+        del f1['fd']
+
+        f2['fd'], f2['path'] = tempfile.mkstemp(prefix=filename, dir=dirpath)
+        os.close(f2['fd'])
+        del f2['fd']
+
+        # there's a small race condition that another file can jam itself in
+        # between the time we remove f2 and the time we create the hard link.
+        # in the unlikely scenario that happens, we'll treat it as nlink being
+        # unreliable.
+        os.unlink(f2['path'])
+        oslink(f1['path'], f2['path'])
         # nlinks() may behave differently for files on Windows shares if
         # the file is open.
-        fd = posixfile(f2)
-        return nlinks(f2) > 1
-    except OSError:
+        f2['handle'] = posixfile(f2['path'])
+        return nlinks(f2['path']) > 1
+    except EnvironmentError:
         return False
     finally:
-        if fd is not None:
-            fd.close()
-        for f in (f1, f2):
-            try:
-                os.unlink(f)
-            except OSError:
-                pass
+        for fi in (f1, f2):
+            if 'fd' in fi:
+                try:
+                    os.close(fi['fd'])
+                except EnvironmentError:
+                    pass
+
+            if 'handle' in fi:
+                try:
+                    fi['handle'].close()
+                except EnvironmentError:
+                    pass
+
+            if 'path' in fi:
+                try:
+                    os.unlink(fi['path'])
+                except EnvironmentError:
+                    pass
 
 def endswithsep(path):
     '''Check path ends with os.sep or os.altsep.'''