Patchwork [v2] osutil: add darwin-only version of os.listdir using cffi

login
register
mail settings
Submitter Maciej Fijalkowski
Date July 12, 2016, 5:30 p.m.
Message ID <e932d21fdf2c8efe3480.1468344607@brick.arcode.com>
Download mbox | patch
Permalink /patch/15804/
State Changes Requested
Headers show

Comments

Maciej Fijalkowski - July 12, 2016, 5:30 p.m.
# HG changeset patch
# User Maciej Fijalkowski <fijall@gmail.com>
# Date 1468227908 -7200
#      Mon Jul 11 11:05:08 2016 +0200
# Node ID e932d21fdf2c8efe34807f3aa5081e98b71b523d
# Parent  065fb34034a3849f07ca836ce2f4eb17c7f543db
osutil: add darwin-only version of os.listdir using cffi
Maciej Fijalkowski - July 14, 2016, 7:47 a.m.
hello, did anyone have a chance to look at that one?

On Tue, Jul 12, 2016 at 7:30 PM, Maciej Fijalkowski <fijall@gmail.com> wrote:
> # HG changeset patch
> # User Maciej Fijalkowski <fijall@gmail.com>
> # Date 1468227908 -7200
> #      Mon Jul 11 11:05:08 2016 +0200
> # Node ID e932d21fdf2c8efe34807f3aa5081e98b71b523d
> # Parent  065fb34034a3849f07ca836ce2f4eb17c7f543db
> osutil: add darwin-only version of os.listdir using cffi
>
> diff -r 065fb34034a3 -r e932d21fdf2c mercurial/build_osutil_cffi.py
> --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> +++ b/mercurial/build_osutil_cffi.py    Mon Jul 11 11:05:08 2016 +0200
> @@ -0,0 +1,76 @@
> +from __future__ import absolute_import
> +
> +import cffi
> +
> +ffi = cffi.FFI()
> +ffi.set_source("_osutil_cffi", """
> +#include <sys/attr.h>
> +#include <sys/vnode.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <time.h>
> +""", include_dirs=['mercurial'])
> +ffi.cdef('''
> +
> +typedef uint32_t attrgroup_t;
> +
> +typedef struct attrlist {
> +    uint16_t     bitmapcount; /* number of attr. bit sets in list */
> +    uint16_t   reserved;    /* (to maintain 4-byte alignment) */
> +    attrgroup_t commonattr;  /* common attribute group */
> +    attrgroup_t volattr;     /* volume attribute group */
> +    attrgroup_t dirattr;     /* directory attribute group */
> +    attrgroup_t fileattr;    /* file attribute group */
> +    attrgroup_t forkattr;    /* fork attribute group */
> +    ...;
> +};
> +
> +typedef struct attribute_set {
> +    ...;
> +} attribute_set_t;
> +
> +typedef struct attrreference {
> +    ...;
> +} attrreference_t;
> +
> +typedef int ... time_t;
> +typedef int ... off_t;
> +
> +typedef struct timespec {
> +    time_t tv_sec;
> +    ...;
> +};
> +
> +int getattrlist(const char* path, struct attrlist * attrList, void * attrBuf,
> +                size_t attrBufSize, unsigned int options);
> +
> +int getattrlistbulk(int dirfd, struct attrlist * attrList, void * attrBuf,
> +                    size_t attrBufSize, uint64_t options);
> +
> +#define ATTR_BIT_MAP_COUNT ...
> +#define ATTR_CMN_NAME ...
> +#define ATTR_CMN_OBJTYPE ...
> +#define ATTR_CMN_MODTIME ...
> +#define ATTR_CMN_ACCESSMASK ...
> +#define ATTR_CMN_ERROR ...
> +#define ATTR_CMN_RETURNED_ATTRS ...
> +#define ATTR_FILE_DATALENGTH ...
> +
> +#define VREG ...
> +#define VDIR ...
> +#define VLNK ...
> +#define VBLK ...
> +#define VCHR ...
> +#define VFIFO ...
> +#define VSOCK ...
> +
> +#define S_IFMT ...
> +
> +int open(const char *path, int oflag, int perm);
> +int close(int);
> +
> +#define O_RDONLY ...
> +''')
> +
> +if __name__ == '__main__':
> +    ffi.compile()
> diff -r 065fb34034a3 -r e932d21fdf2c mercurial/pure/osutil.py
> --- a/mercurial/pure/osutil.py  Mon Jul 11 10:44:18 2016 +0200
> +++ b/mercurial/pure/osutil.py  Mon Jul 11 11:05:08 2016 +0200
> @@ -14,6 +14,10 @@
>  import stat as statmod
>  import sys
>
> +from . import policy
> +modulepolicy = policy.policy
> +policynocffi = policy.policynocffi
> +
>  def _mode_to_kind(mode):
>      if statmod.S_ISREG(mode):
>          return statmod.S_IFREG
> @@ -31,7 +35,7 @@
>          return statmod.S_IFSOCK
>      return mode
>
> -def listdir(path, stat=False, skip=None):
> +def listdirpure(path, stat=False, skip=None):
>      '''listdir(path, stat=False) -> list_of_tuples
>
>      Return a sorted list containing information about the entries
> @@ -61,6 +65,97 @@
>              result.append((fn, _mode_to_kind(st.st_mode)))
>      return result
>
> +ffi = None
> +if modulepolicy not in policynocffi and sys.platform == 'darwin':
> +    try:
> +        from _osutil_cffi import ffi, lib
> +    except ImportError:
> +        if modulepolicy == 'cffi': # strict cffi import
> +            raise
> +        listdir = listdirpure
> +
> +if sys.platform == 'darwin' and ffi is not None:
> +    listdir_batch_size = 4096
> +
> +    attrkinds = [None] * 20
> +
> +    attrkinds[lib.VREG] = statmod.S_IFREG
> +    attrkinds[lib.VDIR] = statmod.S_IFDIR
> +    attrkinds[lib.VLNK] = statmod.S_IFLNK
> +    attrkinds[lib.VBLK] = statmod.S_IFBLK
> +    attrkinds[lib.VCHR] = statmod.S_IFCHR
> +    attrkinds[lib.VFIFO] = statmod.S_IFIFO
> +    attrkinds[lib.VSOCK] = statmod.S_IFSOCK
> +
> +    class stat_res(object):
> +        def __init__(self, st_mode, st_mtime, st_size):
> +            self.st_mode = st_mode
> +            self.st_mtime = st_mtime
> +            self.st_size = st_size
> +
> +    tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")
> +    buf = ffi.new("char[]", listdir_batch_size)
> +
> +    def listdirinternal(dfd, req, stat, skip):
> +        ret = []
> +        while True:
> +            r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0)
> +            if r == 0:
> +                break
> +            if r == -1:
> +                raise OSError(ffi.errno, os.strerror(ffi.errno))
> +            cur = int(ffi.cast("intptr_t", buf))
> +            for i in range(r):
> +                lgt = ffi.cast("uint32_t*", cur)[0]
> +                c = cur + 4 + ffi.sizeof("attribute_set_t")
> +                ofs = ffi.cast("uint32_t*", c)[0]
> +                str_lgt = ffi.cast("uint32_t*", c)[1]
> +                name = str(ffi.buffer(ffi.cast("char*", c + ofs), str_lgt - 1))
> +                c += ffi.sizeof("attrreference_t")
> +                tp = attrkinds[ffi.cast("uint8_t*", c)[0]]
> +                c += ffi.sizeof("uint32_t")
> +                if name == "." or name == "..":
> +                    continue
> +                if skip == name and tp == statmod.S_ISDIR:
> +                    return []
> +                if stat:
> +                    c1 = c + tv_sec_ofs
> +                    mtime = ffi.cast("time_t*", c1)[0]
> +                    c += ffi.sizeof("struct timespec")
> +                    mode = (ffi.cast("uint32_t*", c)[0] & ~lib.S_IFMT)| tp
> +                    c += ffi.sizeof("uint32_t")
> +                    size = ffi.cast("off_t*", c)[0]
> +                    ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime,
> +                                st_size=size)))
> +                else:
> +                    ret.append((name, tp))
> +                cur += lgt
> +        return ret
> +
> +    def listdir(path, stat=False, skip=None):
> +        req = ffi.new("struct attrlist*")
> +        req.bitmapcount = lib.ATTR_BIT_MAP_COUNT
> +        req.commonattr = (lib.ATTR_CMN_NAME | lib.ATTR_CMN_OBJTYPE |
> +                          lib.ATTR_CMN_RETURNED_ATTRS |
> +                          lib.ATTR_CMN_ACCESSMASK |
> +                          lib.ATTR_CMN_MODTIME)
> +        req.fileattr = lib.ATTR_FILE_DATALENGTH
> +        dfd = lib.open(path, lib.O_RDONLY, 0)
> +        if dfd == -1:
> +            raise OSError(ffi.errno, os.strerror(ffi.errno))
> +
> +        try:
> +            ret = listdirinternal(dfd, req, stat, skip)
> +        finally:
> +            try:
> +                lib.close(dfd)
> +            except BaseException:
> +                pass # we ignore all the errors from closing, not
> +                # much we can do about that
> +        return ret
> +else:
> +    listdir = listdirpure
> +
>  if os.name != 'nt':
>      posixfile = open
>
> diff -r 065fb34034a3 -r e932d21fdf2c setup.py
> --- a/setup.py  Mon Jul 11 10:44:18 2016 +0200
> +++ b/setup.py  Mon Jul 11 11:05:08 2016 +0200
> @@ -320,6 +320,9 @@
>          elif self.distribution.cffi:
>              exts = []
>              # cffi modules go here
> +            if sys.platform == 'darwin':
> +                from mercurial import build_osutil_cffi
> +                exts.append(build_osutil_cffi.ffi.distutils_extension())
>              self.distribution.ext_modules = exts
>          else:
>              h = os.path.join(get_python_inc(), 'Python.h')
Jun Wu - July 14, 2016, 2:22 p.m.
I did a quick look and the direction looks good to me. I think this needs
some eyes from people more familiar with OSX.

Excerpts from Maciej Fijalkowski's message of 2016-07-12 19:30:07 +0200:
> # HG changeset patch
> # User Maciej Fijalkowski <fijall@gmail.com>
> # Date 1468227908 -7200
> #      Mon Jul 11 11:05:08 2016 +0200
> # Node ID e932d21fdf2c8efe34807f3aa5081e98b71b523d
> # Parent  065fb34034a3849f07ca836ce2f4eb17c7f543db
> osutil: add darwin-only version of os.listdir using cffi
> 
> diff -r 065fb34034a3 -r e932d21fdf2c mercurial/build_osutil_cffi.py

This seems to be the first file under "mercurial" related to build. Thus
looks a bit strange. I think the existing pattern is to put these just in
setup.py.

> diff -r 065fb34034a3 -r e932d21fdf2c mercurial/pure/osutil.py
> --- a/mercurial/pure/osutil.py    Mon Jul 11 10:44:18 2016 +0200
> +++ b/mercurial/pure/osutil.py    Mon Jul 11 11:05:08 2016 +0200
> @@ -14,6 +14,10 @@
>  import stat as statmod
>  import sys
>  
> +from . import policy
> +modulepolicy = policy.policy
> +policynocffi = policy.policynocffi
> +
>  def _mode_to_kind(mode):
>      if statmod.S_ISREG(mode):
>          return statmod.S_IFREG
> @@ -31,7 +35,7 @@
>          return statmod.S_IFSOCK
>      return mode
>  
> -def listdir(path, stat=False, skip=None):
> +def listdirpure(path, stat=False, skip=None):
>      '''listdir(path, stat=False) -> list_of_tuples
>  
>      Return a sorted list containing information about the entries
> @@ -61,6 +65,97 @@
>              result.append((fn, _mode_to_kind(st.st_mode)))
>      return result
>  
> +ffi = None
> +if modulepolicy not in policynocffi and sys.platform == 'darwin':
> +    try:
> +        from _osutil_cffi import ffi, lib

Is _osutil_cffi in future patches?

> +    except ImportError:
> +        if modulepolicy == 'cffi': # strict cffi import
> +            raise

> +        listdir = listdirpure

This line is unnecessary.

> +if sys.platform == 'darwin' and ffi is not None:
> +    listdir_batch_size = 4096
> +
> +    attrkinds = [None] * 20

Better to add a comment on how the number 20 is chosen.

> +
> +    attrkinds[lib.VREG] = statmod.S_IFREG
> +    attrkinds[lib.VDIR] = statmod.S_IFDIR
> +    attrkinds[lib.VLNK] = statmod.S_IFLNK
> +    attrkinds[lib.VBLK] = statmod.S_IFBLK
> +    attrkinds[lib.VCHR] = statmod.S_IFCHR
> +    attrkinds[lib.VFIFO] = statmod.S_IFIFO
> +    attrkinds[lib.VSOCK] = statmod.S_IFSOCK
> +
> +    class stat_res(object):
> +        def __init__(self, st_mode, st_mtime, st_size):
> +            self.st_mode = st_mode
> +            self.st_mtime = st_mtime
> +            self.st_size = st_size
> +
> +    tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")
> +    buf = ffi.new("char[]", listdir_batch_size)
> +
> +    def listdirinternal(dfd, req, stat, skip):
> +        ret = []
> +        while True:
> +            r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0)
> +            if r == 0:
> +                break
> +            if r == -1:
> +                raise OSError(ffi.errno, os.strerror(ffi.errno))
> +            cur = int(ffi.cast("intptr_t", buf))
> +            for i in range(r):
> +                lgt = ffi.cast("uint32_t*", cur)[0]
> +                c = cur + 4 + ffi.sizeof("attribute_set_t")

Could we define the "val_attrs_t" struct in the ffi module to avoid manual
pointer operations here? Will it hurt perf?

     typedef struct val_attrs {
         uint32_t          length;
         attribute_set_t   returned;
         uint32_t          error;
         attrreference_t   name_info;
         char              *name;
         fsobj_type_t      obj_type;
     } val_attrs_t;

> +                ofs = ffi.cast("uint32_t*", c)[0]
> +                str_lgt = ffi.cast("uint32_t*", c)[1]
> +                name = str(ffi.buffer(ffi.cast("char*", c + ofs), str_lgt - 1))
> +                c += ffi.sizeof("attrreference_t")
> +                tp = attrkinds[ffi.cast("uint8_t*", c)[0]]
> +                c += ffi.sizeof("uint32_t")
> +                if name == "." or name == "..":
> +                    continue
> +                if skip == name and tp == statmod.S_ISDIR:
> +                    return []
> +                if stat:
> +                    c1 = c + tv_sec_ofs
> +                    mtime = ffi.cast("time_t*", c1)[0]
> +                    c += ffi.sizeof("struct timespec")
> +                    mode = (ffi.cast("uint32_t*", c)[0] & ~lib.S_IFMT)| tp
> +                    c += ffi.sizeof("uint32_t")
> +                    size = ffi.cast("off_t*", c)[0]
> +                    ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime,
> +                                st_size=size)))
> +                else:
> +                    ret.append((name, tp))
> +                cur += lgt
> +        return ret
> +
> +    def listdir(path, stat=False, skip=None):
> +        req = ffi.new("struct attrlist*")
> +        req.bitmapcount = lib.ATTR_BIT_MAP_COUNT
> +        req.commonattr = (lib.ATTR_CMN_NAME | lib.ATTR_CMN_OBJTYPE |
> +                          lib.ATTR_CMN_RETURNED_ATTRS |
> +                          lib.ATTR_CMN_ACCESSMASK |
> +                          lib.ATTR_CMN_MODTIME)
> +        req.fileattr = lib.ATTR_FILE_DATALENGTH
> +        dfd = lib.open(path, lib.O_RDONLY, 0)
> +        if dfd == -1:
> +            raise OSError(ffi.errno, os.strerror(ffi.errno))
> +
> +        try:
> +            ret = listdirinternal(dfd, req, stat, skip)
> +        finally:
> +            try:
> +                lib.close(dfd)
> +            except BaseException:
> +                pass # we ignore all the errors from closing, not
> +                # much we can do about that
> +        return ret
> +else:
> +    listdir = listdirpure
> +
>  if os.name != 'nt':
>      posixfile = open
>  
> diff -r 065fb34034a3 -r e932d21fdf2c setup.py
> --- a/setup.py    Mon Jul 11 10:44:18 2016 +0200
> +++ b/setup.py    Mon Jul 11 11:05:08 2016 +0200
> @@ -320,6 +320,9 @@
>          elif self.distribution.cffi:
>              exts = []
>              # cffi modules go here
> +            if sys.platform == 'darwin':
> +                from mercurial import build_osutil_cffi
> +                exts.append(build_osutil_cffi.ffi.distutils_extension())
>              self.distribution.ext_modules = exts
>          else:
>              h = os.path.join(get_python_inc(), 'Python.h')
Maciej Fijalkowski - July 15, 2016, 7:58 a.m.
On Thu, Jul 14, 2016 at 4:22 PM, Jun Wu <quark@fb.com> wrote:
> I did a quick look and the direction looks good to me. I think this needs
> some eyes from people more familiar with OSX.
>
> Excerpts from Maciej Fijalkowski's message of 2016-07-12 19:30:07 +0200:
>> # HG changeset patch
>> # User Maciej Fijalkowski <fijall@gmail.com>
>> # Date 1468227908 -7200
>> #      Mon Jul 11 11:05:08 2016 +0200
>> # Node ID e932d21fdf2c8efe34807f3aa5081e98b71b523d
>> # Parent  065fb34034a3849f07ca836ce2f4eb17c7f543db
>> osutil: add darwin-only version of os.listdir using cffi
>>
>> diff -r 065fb34034a3 -r e932d21fdf2c mercurial/build_osutil_cffi.py
>
> This seems to be the first file under "mercurial" related to build. Thus
> looks a bit strange. I think the existing pattern is to put these just in
> setup.py.

I would prefer for setup.py not to grow indefinitely (it's already
huge) which directory you want it?

>
>> diff -r 065fb34034a3 -r e932d21fdf2c mercurial/pure/osutil.py
>> --- a/mercurial/pure/osutil.py    Mon Jul 11 10:44:18 2016 +0200
>> +++ b/mercurial/pure/osutil.py    Mon Jul 11 11:05:08 2016 +0200
>> @@ -14,6 +14,10 @@
>>  import stat as statmod
>>  import sys
>>
>> +from . import policy
>> +modulepolicy = policy.policy
>> +policynocffi = policy.policynocffi
>> +
>>  def _mode_to_kind(mode):
>>      if statmod.S_ISREG(mode):
>>          return statmod.S_IFREG
>> @@ -31,7 +35,7 @@
>>          return statmod.S_IFSOCK
>>      return mode
>>
>> -def listdir(path, stat=False, skip=None):
>> +def listdirpure(path, stat=False, skip=None):
>>      '''listdir(path, stat=False) -> list_of_tuples
>>
>>      Return a sorted list containing information about the entries
>> @@ -61,6 +65,97 @@
>>              result.append((fn, _mode_to_kind(st.st_mode)))
>>      return result
>>
>> +ffi = None
>> +if modulepolicy not in policynocffi and sys.platform == 'darwin':
>> +    try:
>> +        from _osutil_cffi import ffi, lib
>
> Is _osutil_cffi in future patches?

It's the effect of running build_osutil_cffi

>
>> +    except ImportError:
>> +        if modulepolicy == 'cffi': # strict cffi import
>> +            raise
>
>> +        listdir = listdirpure
>
> This line is unnecessary.

indeed

>
>> +if sys.platform == 'darwin' and ffi is not None:
>> +    listdir_batch_size = 4096
>> +
>> +    attrkinds = [None] * 20
>
> Better to add a comment on how the number 20 is chosen.

noted

>
>> +
>> +    attrkinds[lib.VREG] = statmod.S_IFREG
>> +    attrkinds[lib.VDIR] = statmod.S_IFDIR
>> +    attrkinds[lib.VLNK] = statmod.S_IFLNK
>> +    attrkinds[lib.VBLK] = statmod.S_IFBLK
>> +    attrkinds[lib.VCHR] = statmod.S_IFCHR
>> +    attrkinds[lib.VFIFO] = statmod.S_IFIFO
>> +    attrkinds[lib.VSOCK] = statmod.S_IFSOCK
>> +
>> +    class stat_res(object):
>> +        def __init__(self, st_mode, st_mtime, st_size):
>> +            self.st_mode = st_mode
>> +            self.st_mtime = st_mtime
>> +            self.st_size = st_size
>> +
>> +    tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")
>> +    buf = ffi.new("char[]", listdir_batch_size)
>> +
>> +    def listdirinternal(dfd, req, stat, skip):
>> +        ret = []
>> +        while True:
>> +            r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0)
>> +            if r == 0:
>> +                break
>> +            if r == -1:
>> +                raise OSError(ffi.errno, os.strerror(ffi.errno))
>> +            cur = int(ffi.cast("intptr_t", buf))
>> +            for i in range(r):
>> +                lgt = ffi.cast("uint32_t*", cur)[0]
>> +                c = cur + 4 + ffi.sizeof("attribute_set_t")
>
> Could we define the "val_attrs_t" struct in the ffi module to avoid manual
> pointer operations here? Will it hurt perf?
>
>      typedef struct val_attrs {
>          uint32_t          length;
>          attribute_set_t   returned;
>          uint32_t          error;
>          attrreference_t   name_info;
>          char              *name;
>          fsobj_type_t      obj_type;
>      } val_attrs_t;

cffi does not support __attribute__(packed), so we can't define it
like that (maybe there is a better way)

>
>> +                ofs = ffi.cast("uint32_t*", c)[0]
>> +                str_lgt = ffi.cast("uint32_t*", c)[1]
>> +                name = str(ffi.buffer(ffi.cast("char*", c + ofs), str_lgt - 1))
>> +                c += ffi.sizeof("attrreference_t")
>> +                tp = attrkinds[ffi.cast("uint8_t*", c)[0]]
>> +                c += ffi.sizeof("uint32_t")
>> +                if name == "." or name == "..":
>> +                    continue
>> +                if skip == name and tp == statmod.S_ISDIR:
>> +                    return []
>> +                if stat:
>> +                    c1 = c + tv_sec_ofs
>> +                    mtime = ffi.cast("time_t*", c1)[0]
>> +                    c += ffi.sizeof("struct timespec")
>> +                    mode = (ffi.cast("uint32_t*", c)[0] & ~lib.S_IFMT)| tp
>> +                    c += ffi.sizeof("uint32_t")
>> +                    size = ffi.cast("off_t*", c)[0]
>> +                    ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime,
>> +                                st_size=size)))
>> +                else:
>> +                    ret.append((name, tp))
>> +                cur += lgt
>> +        return ret
>> +
>> +    def listdir(path, stat=False, skip=None):
>> +        req = ffi.new("struct attrlist*")
>> +        req.bitmapcount = lib.ATTR_BIT_MAP_COUNT
>> +        req.commonattr = (lib.ATTR_CMN_NAME | lib.ATTR_CMN_OBJTYPE |
>> +                          lib.ATTR_CMN_RETURNED_ATTRS |
>> +                          lib.ATTR_CMN_ACCESSMASK |
>> +                          lib.ATTR_CMN_MODTIME)
>> +        req.fileattr = lib.ATTR_FILE_DATALENGTH
>> +        dfd = lib.open(path, lib.O_RDONLY, 0)
>> +        if dfd == -1:
>> +            raise OSError(ffi.errno, os.strerror(ffi.errno))
>> +
>> +        try:
>> +            ret = listdirinternal(dfd, req, stat, skip)
>> +        finally:
>> +            try:
>> +                lib.close(dfd)
>> +            except BaseException:
>> +                pass # we ignore all the errors from closing, not
>> +                # much we can do about that
>> +        return ret
>> +else:
>> +    listdir = listdirpure
>> +
>>  if os.name != 'nt':
>>      posixfile = open
>>
>> diff -r 065fb34034a3 -r e932d21fdf2c setup.py
>> --- a/setup.py    Mon Jul 11 10:44:18 2016 +0200
>> +++ b/setup.py    Mon Jul 11 11:05:08 2016 +0200
>> @@ -320,6 +320,9 @@
>>          elif self.distribution.cffi:
>>              exts = []
>>              # cffi modules go here
>> +            if sys.platform == 'darwin':
>> +                from mercurial import build_osutil_cffi
>> +                exts.append(build_osutil_cffi.ffi.distutils_extension())
>>              self.distribution.ext_modules = exts
>>          else:
>>              h = os.path.join(get_python_inc(), 'Python.h')
Jun Wu - July 15, 2016, 11:11 a.m.
Excerpts from Maciej Fijalkowski's message of 2016-07-15 09:58:59 +0200:
> I would prefer for setup.py not to grow indefinitely (it's already
> huge) which directory you want it?

I think reviewers with push access can answer this question better.
Personally I feel you will get less objections and move faster if just use
setup.py (scope the logic in a function or class).

> cffi does not support __attribute__(packed), so we can't define it
> like that (maybe there is a better way)

Seems it supports now?
https://bitbucket.org/cffi/cffi/commits/c5e17441bc96
Augie Fackler - July 15, 2016, 5:05 p.m.
On Fri, Jul 15, 2016 at 09:58:59AM +0200, Maciej Fijalkowski wrote:
> On Thu, Jul 14, 2016 at 4:22 PM, Jun Wu <quark@fb.com> wrote:
> > I did a quick look and the direction looks good to me. I think this needs
> > some eyes from people more familiar with OSX.
> >
> > Excerpts from Maciej Fijalkowski's message of 2016-07-12 19:30:07 +0200:
> >> # HG changeset patch
> >> # User Maciej Fijalkowski <fijall@gmail.com>
> >> # Date 1468227908 -7200
> >> #      Mon Jul 11 11:05:08 2016 +0200
> >> # Node ID e932d21fdf2c8efe34807f3aa5081e98b71b523d
> >> # Parent  065fb34034a3849f07ca836ce2f4eb17c7f543db
> >> osutil: add darwin-only version of os.listdir using cffi
> >>
> >> diff -r 065fb34034a3 -r e932d21fdf2c mercurial/build_osutil_cffi.py
> >
> > This seems to be the first file under "mercurial" related to build. Thus
> > looks a bit strange. I think the existing pattern is to put these just in
> > setup.py.
>
> I would prefer for setup.py not to grow indefinitely (it's already
> huge) which directory you want it?

Maybe call it setup_osutil_cffi.py and put it in the repo root. It
definitely doesn't belong in mercurial/ as a namespace.

>
> >
> >> diff -r 065fb34034a3 -r e932d21fdf2c mercurial/pure/osutil.py
> >> --- a/mercurial/pure/osutil.py    Mon Jul 11 10:44:18 2016 +0200
> >> +++ b/mercurial/pure/osutil.py    Mon Jul 11 11:05:08 2016 +0200
> >> @@ -14,6 +14,10 @@
> >>  import stat as statmod
> >>  import sys
> >>
> >> +from . import policy
> >> +modulepolicy = policy.policy
> >> +policynocffi = policy.policynocffi
> >> +
> >>  def _mode_to_kind(mode):
> >>      if statmod.S_ISREG(mode):
> >>          return statmod.S_IFREG
> >> @@ -31,7 +35,7 @@
> >>          return statmod.S_IFSOCK
> >>      return mode
> >>
> >> -def listdir(path, stat=False, skip=None):
> >> +def listdirpure(path, stat=False, skip=None):
> >>      '''listdir(path, stat=False) -> list_of_tuples
> >>
> >>      Return a sorted list containing information about the entries
> >> @@ -61,6 +65,97 @@
> >>              result.append((fn, _mode_to_kind(st.st_mode)))
> >>      return result
> >>
> >> +ffi = None
> >> +if modulepolicy not in policynocffi and sys.platform == 'darwin':
> >> +    try:
> >> +        from _osutil_cffi import ffi, lib
> >
> > Is _osutil_cffi in future patches?
>
> It's the effect of running build_osutil_cffi
>
> >
> >> +    except ImportError:
> >> +        if modulepolicy == 'cffi': # strict cffi import
> >> +            raise
> >
> >> +        listdir = listdirpure
> >
> > This line is unnecessary.
>
> indeed
>
> >
> >> +if sys.platform == 'darwin' and ffi is not None:
> >> +    listdir_batch_size = 4096
> >> +
> >> +    attrkinds = [None] * 20
> >
> > Better to add a comment on how the number 20 is chosen.
>
> noted
>
> >
> >> +
> >> +    attrkinds[lib.VREG] = statmod.S_IFREG
> >> +    attrkinds[lib.VDIR] = statmod.S_IFDIR
> >> +    attrkinds[lib.VLNK] = statmod.S_IFLNK
> >> +    attrkinds[lib.VBLK] = statmod.S_IFBLK
> >> +    attrkinds[lib.VCHR] = statmod.S_IFCHR
> >> +    attrkinds[lib.VFIFO] = statmod.S_IFIFO
> >> +    attrkinds[lib.VSOCK] = statmod.S_IFSOCK
> >> +
> >> +    class stat_res(object):
> >> +        def __init__(self, st_mode, st_mtime, st_size):
> >> +            self.st_mode = st_mode
> >> +            self.st_mtime = st_mtime
> >> +            self.st_size = st_size
> >> +
> >> +    tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")
> >> +    buf = ffi.new("char[]", listdir_batch_size)
> >> +
> >> +    def listdirinternal(dfd, req, stat, skip):
> >> +        ret = []
> >> +        while True:
> >> +            r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0)
> >> +            if r == 0:
> >> +                break
> >> +            if r == -1:
> >> +                raise OSError(ffi.errno, os.strerror(ffi.errno))
> >> +            cur = int(ffi.cast("intptr_t", buf))
> >> +            for i in range(r):
> >> +                lgt = ffi.cast("uint32_t*", cur)[0]
> >> +                c = cur + 4 + ffi.sizeof("attribute_set_t")
> >
> > Could we define the "val_attrs_t" struct in the ffi module to avoid manual
> > pointer operations here? Will it hurt perf?
> >
> >      typedef struct val_attrs {
> >          uint32_t          length;
> >          attribute_set_t   returned;
> >          uint32_t          error;
> >          attrreference_t   name_info;
> >          char              *name;
> >          fsobj_type_t      obj_type;
> >      } val_attrs_t;
>
> cffi does not support __attribute__(packed), so we can't define it
> like that (maybe there is a better way)
>
> >
> >> +                ofs = ffi.cast("uint32_t*", c)[0]
> >> +                str_lgt = ffi.cast("uint32_t*", c)[1]
> >> +                name = str(ffi.buffer(ffi.cast("char*", c + ofs), str_lgt - 1))
> >> +                c += ffi.sizeof("attrreference_t")
> >> +                tp = attrkinds[ffi.cast("uint8_t*", c)[0]]
> >> +                c += ffi.sizeof("uint32_t")
> >> +                if name == "." or name == "..":
> >> +                    continue
> >> +                if skip == name and tp == statmod.S_ISDIR:
> >> +                    return []
> >> +                if stat:
> >> +                    c1 = c + tv_sec_ofs
> >> +                    mtime = ffi.cast("time_t*", c1)[0]
> >> +                    c += ffi.sizeof("struct timespec")
> >> +                    mode = (ffi.cast("uint32_t*", c)[0] & ~lib.S_IFMT)| tp
> >> +                    c += ffi.sizeof("uint32_t")
> >> +                    size = ffi.cast("off_t*", c)[0]
> >> +                    ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime,
> >> +                                st_size=size)))
> >> +                else:
> >> +                    ret.append((name, tp))
> >> +                cur += lgt
> >> +        return ret
> >> +
> >> +    def listdir(path, stat=False, skip=None):
> >> +        req = ffi.new("struct attrlist*")
> >> +        req.bitmapcount = lib.ATTR_BIT_MAP_COUNT
> >> +        req.commonattr = (lib.ATTR_CMN_NAME | lib.ATTR_CMN_OBJTYPE |
> >> +                          lib.ATTR_CMN_RETURNED_ATTRS |
> >> +                          lib.ATTR_CMN_ACCESSMASK |
> >> +                          lib.ATTR_CMN_MODTIME)
> >> +        req.fileattr = lib.ATTR_FILE_DATALENGTH
> >> +        dfd = lib.open(path, lib.O_RDONLY, 0)
> >> +        if dfd == -1:
> >> +            raise OSError(ffi.errno, os.strerror(ffi.errno))
> >> +
> >> +        try:
> >> +            ret = listdirinternal(dfd, req, stat, skip)
> >> +        finally:
> >> +            try:
> >> +                lib.close(dfd)
> >> +            except BaseException:
> >> +                pass # we ignore all the errors from closing, not
> >> +                # much we can do about that
> >> +        return ret
> >> +else:
> >> +    listdir = listdirpure
> >> +
> >>  if os.name != 'nt':
> >>      posixfile = open
> >>
> >> diff -r 065fb34034a3 -r e932d21fdf2c setup.py
> >> --- a/setup.py    Mon Jul 11 10:44:18 2016 +0200
> >> +++ b/setup.py    Mon Jul 11 11:05:08 2016 +0200
> >> @@ -320,6 +320,9 @@
> >>          elif self.distribution.cffi:
> >>              exts = []
> >>              # cffi modules go here
> >> +            if sys.platform == 'darwin':
> >> +                from mercurial import build_osutil_cffi
> >> +                exts.append(build_osutil_cffi.ffi.distutils_extension())
> >>              self.distribution.ext_modules = exts
> >>          else:
> >>              h = os.path.join(get_python_inc(), 'Python.h')
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Maciej Fijalkowski - July 18, 2016, 11:10 a.m.
On Fri, Jul 15, 2016 at 1:11 PM, Jun Wu <quark@fb.com> wrote:
> Excerpts from Maciej Fijalkowski's message of 2016-07-15 09:58:59 +0200:
>> I would prefer for setup.py not to grow indefinitely (it's already
>> huge) which directory you want it?
>
> I think reviewers with push access can answer this question better.
> Personally I feel you will get less objections and move faster if just use
> setup.py (scope the logic in a function or class).
>
>> cffi does not support __attribute__(packed), so we can't define it
>> like that (maybe there is a better way)
>
> Seems it supports now?
> https://bitbucket.org/cffi/cffi/commits/c5e17441bc96

I found a way around  it, but I would vastly prefer not to rely on
features that would require e.g. not-yet-released version of pypy
Jun Wu - July 18, 2016, 11:34 a.m.
The cdef(packed=True) feature has been in pypy since 2.4 years ago:
https://bitbucket.org/pypy/pypy/commits/8d8d72389794

And I can confirm it exists in pypy 5.3.1.

Excerpts from Maciej Fijalkowski's message of 2016-07-18 13:10:07 +0200:
> I found a way around  it, but I would vastly prefer not to rely on
> features that would require e.g. not-yet-released version of pypy

Patch

diff -r 065fb34034a3 -r e932d21fdf2c mercurial/build_osutil_cffi.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/build_osutil_cffi.py	Mon Jul 11 11:05:08 2016 +0200
@@ -0,0 +1,76 @@ 
+from __future__ import absolute_import
+
+import cffi
+
+ffi = cffi.FFI()
+ffi.set_source("_osutil_cffi", """
+#include <sys/attr.h>
+#include <sys/vnode.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <time.h>
+""", include_dirs=['mercurial'])
+ffi.cdef('''
+
+typedef uint32_t attrgroup_t;
+
+typedef struct attrlist {
+    uint16_t     bitmapcount; /* number of attr. bit sets in list */
+    uint16_t   reserved;    /* (to maintain 4-byte alignment) */
+    attrgroup_t commonattr;  /* common attribute group */
+    attrgroup_t volattr;     /* volume attribute group */
+    attrgroup_t dirattr;     /* directory attribute group */
+    attrgroup_t fileattr;    /* file attribute group */
+    attrgroup_t forkattr;    /* fork attribute group */
+    ...;
+};
+
+typedef struct attribute_set {
+    ...;
+} attribute_set_t;
+
+typedef struct attrreference {
+    ...;
+} attrreference_t;
+
+typedef int ... time_t;
+typedef int ... off_t;
+
+typedef struct timespec {
+    time_t tv_sec;
+    ...;
+};
+
+int getattrlist(const char* path, struct attrlist * attrList, void * attrBuf,
+                size_t attrBufSize, unsigned int options);
+
+int getattrlistbulk(int dirfd, struct attrlist * attrList, void * attrBuf,
+                    size_t attrBufSize, uint64_t options);
+
+#define ATTR_BIT_MAP_COUNT ...
+#define ATTR_CMN_NAME ...
+#define ATTR_CMN_OBJTYPE ...
+#define ATTR_CMN_MODTIME ...
+#define ATTR_CMN_ACCESSMASK ...
+#define ATTR_CMN_ERROR ...
+#define ATTR_CMN_RETURNED_ATTRS ...
+#define ATTR_FILE_DATALENGTH ...
+
+#define VREG ...
+#define VDIR ...
+#define VLNK ...
+#define VBLK ...
+#define VCHR ...
+#define VFIFO ...
+#define VSOCK ...
+
+#define S_IFMT ...
+
+int open(const char *path, int oflag, int perm);
+int close(int);
+
+#define O_RDONLY ...
+''')
+
+if __name__ == '__main__':
+    ffi.compile()
diff -r 065fb34034a3 -r e932d21fdf2c mercurial/pure/osutil.py
--- a/mercurial/pure/osutil.py	Mon Jul 11 10:44:18 2016 +0200
+++ b/mercurial/pure/osutil.py	Mon Jul 11 11:05:08 2016 +0200
@@ -14,6 +14,10 @@ 
 import stat as statmod
 import sys
 
+from . import policy
+modulepolicy = policy.policy
+policynocffi = policy.policynocffi
+
 def _mode_to_kind(mode):
     if statmod.S_ISREG(mode):
         return statmod.S_IFREG
@@ -31,7 +35,7 @@ 
         return statmod.S_IFSOCK
     return mode
 
-def listdir(path, stat=False, skip=None):
+def listdirpure(path, stat=False, skip=None):
     '''listdir(path, stat=False) -> list_of_tuples
 
     Return a sorted list containing information about the entries
@@ -61,6 +65,97 @@ 
             result.append((fn, _mode_to_kind(st.st_mode)))
     return result
 
+ffi = None
+if modulepolicy not in policynocffi and sys.platform == 'darwin':
+    try:
+        from _osutil_cffi import ffi, lib
+    except ImportError:
+        if modulepolicy == 'cffi': # strict cffi import
+            raise
+        listdir = listdirpure
+
+if sys.platform == 'darwin' and ffi is not None:
+    listdir_batch_size = 4096
+
+    attrkinds = [None] * 20
+
+    attrkinds[lib.VREG] = statmod.S_IFREG
+    attrkinds[lib.VDIR] = statmod.S_IFDIR
+    attrkinds[lib.VLNK] = statmod.S_IFLNK
+    attrkinds[lib.VBLK] = statmod.S_IFBLK
+    attrkinds[lib.VCHR] = statmod.S_IFCHR
+    attrkinds[lib.VFIFO] = statmod.S_IFIFO
+    attrkinds[lib.VSOCK] = statmod.S_IFSOCK
+
+    class stat_res(object):
+        def __init__(self, st_mode, st_mtime, st_size):
+            self.st_mode = st_mode
+            self.st_mtime = st_mtime
+            self.st_size = st_size
+
+    tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")
+    buf = ffi.new("char[]", listdir_batch_size)
+
+    def listdirinternal(dfd, req, stat, skip):
+        ret = []
+        while True:
+            r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0)
+            if r == 0:
+                break
+            if r == -1:
+                raise OSError(ffi.errno, os.strerror(ffi.errno))
+            cur = int(ffi.cast("intptr_t", buf))
+            for i in range(r):
+                lgt = ffi.cast("uint32_t*", cur)[0]
+                c = cur + 4 + ffi.sizeof("attribute_set_t")
+                ofs = ffi.cast("uint32_t*", c)[0]
+                str_lgt = ffi.cast("uint32_t*", c)[1]
+                name = str(ffi.buffer(ffi.cast("char*", c + ofs), str_lgt - 1))
+                c += ffi.sizeof("attrreference_t")
+                tp = attrkinds[ffi.cast("uint8_t*", c)[0]]
+                c += ffi.sizeof("uint32_t")
+                if name == "." or name == "..":
+                    continue
+                if skip == name and tp == statmod.S_ISDIR:
+                    return []
+                if stat:
+                    c1 = c + tv_sec_ofs
+                    mtime = ffi.cast("time_t*", c1)[0]
+                    c += ffi.sizeof("struct timespec")
+                    mode = (ffi.cast("uint32_t*", c)[0] & ~lib.S_IFMT)| tp
+                    c += ffi.sizeof("uint32_t")
+                    size = ffi.cast("off_t*", c)[0]
+                    ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime,
+                                st_size=size)))
+                else:
+                    ret.append((name, tp))
+                cur += lgt
+        return ret
+
+    def listdir(path, stat=False, skip=None):
+        req = ffi.new("struct attrlist*")
+        req.bitmapcount = lib.ATTR_BIT_MAP_COUNT
+        req.commonattr = (lib.ATTR_CMN_NAME | lib.ATTR_CMN_OBJTYPE |
+                          lib.ATTR_CMN_RETURNED_ATTRS |
+                          lib.ATTR_CMN_ACCESSMASK |
+                          lib.ATTR_CMN_MODTIME)
+        req.fileattr = lib.ATTR_FILE_DATALENGTH
+        dfd = lib.open(path, lib.O_RDONLY, 0)
+        if dfd == -1:
+            raise OSError(ffi.errno, os.strerror(ffi.errno))
+
+        try:
+            ret = listdirinternal(dfd, req, stat, skip)
+        finally:
+            try:
+                lib.close(dfd)
+            except BaseException:
+                pass # we ignore all the errors from closing, not
+                # much we can do about that
+        return ret
+else:
+    listdir = listdirpure
+
 if os.name != 'nt':
     posixfile = open
 
diff -r 065fb34034a3 -r e932d21fdf2c setup.py
--- a/setup.py	Mon Jul 11 10:44:18 2016 +0200
+++ b/setup.py	Mon Jul 11 11:05:08 2016 +0200
@@ -320,6 +320,9 @@ 
         elif self.distribution.cffi:
             exts = []
             # cffi modules go here
+            if sys.platform == 'darwin':
+                from mercurial import build_osutil_cffi
+                exts.append(build_osutil_cffi.ffi.distutils_extension())
             self.distribution.ext_modules = exts
         else:
             h = os.path.join(get_python_inc(), 'Python.h')