Submitter | Maciej Fijalkowski |
---|---|
Date | July 12, 2016, 5:30 p.m. |
Message ID | <e932d21fdf2c8efe3480.1468344607@brick.arcode.com> |
Download | mbox | patch |
Permalink | /patch/15804/ |
State | Changes Requested |
Headers | show |
Comments
hello, did anyone have a chance to look at that one? On Tue, Jul 12, 2016 at 7:30 PM, Maciej Fijalkowski <fijall@gmail.com> wrote: > # HG changeset patch > # User Maciej Fijalkowski <fijall@gmail.com> > # Date 1468227908 -7200 > # Mon Jul 11 11:05:08 2016 +0200 > # Node ID e932d21fdf2c8efe34807f3aa5081e98b71b523d > # Parent 065fb34034a3849f07ca836ce2f4eb17c7f543db > osutil: add darwin-only version of os.listdir using cffi > > diff -r 065fb34034a3 -r e932d21fdf2c mercurial/build_osutil_cffi.py > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/mercurial/build_osutil_cffi.py Mon Jul 11 11:05:08 2016 +0200 > @@ -0,0 +1,76 @@ > +from __future__ import absolute_import > + > +import cffi > + > +ffi = cffi.FFI() > +ffi.set_source("_osutil_cffi", """ > +#include <sys/attr.h> > +#include <sys/vnode.h> > +#include <unistd.h> > +#include <fcntl.h> > +#include <time.h> > +""", include_dirs=['mercurial']) > +ffi.cdef(''' > + > +typedef uint32_t attrgroup_t; > + > +typedef struct attrlist { > + uint16_t bitmapcount; /* number of attr. bit sets in list */ > + uint16_t reserved; /* (to maintain 4-byte alignment) */ > + attrgroup_t commonattr; /* common attribute group */ > + attrgroup_t volattr; /* volume attribute group */ > + attrgroup_t dirattr; /* directory attribute group */ > + attrgroup_t fileattr; /* file attribute group */ > + attrgroup_t forkattr; /* fork attribute group */ > + ...; > +}; > + > +typedef struct attribute_set { > + ...; > +} attribute_set_t; > + > +typedef struct attrreference { > + ...; > +} attrreference_t; > + > +typedef int ... time_t; > +typedef int ... off_t; > + > +typedef struct timespec { > + time_t tv_sec; > + ...; > +}; > + > +int getattrlist(const char* path, struct attrlist * attrList, void * attrBuf, > + size_t attrBufSize, unsigned int options); > + > +int getattrlistbulk(int dirfd, struct attrlist * attrList, void * attrBuf, > + size_t attrBufSize, uint64_t options); > + > +#define ATTR_BIT_MAP_COUNT ... > +#define ATTR_CMN_NAME ... > +#define ATTR_CMN_OBJTYPE ... > +#define ATTR_CMN_MODTIME ... > +#define ATTR_CMN_ACCESSMASK ... > +#define ATTR_CMN_ERROR ... > +#define ATTR_CMN_RETURNED_ATTRS ... > +#define ATTR_FILE_DATALENGTH ... > + > +#define VREG ... > +#define VDIR ... > +#define VLNK ... > +#define VBLK ... > +#define VCHR ... > +#define VFIFO ... > +#define VSOCK ... > + > +#define S_IFMT ... > + > +int open(const char *path, int oflag, int perm); > +int close(int); > + > +#define O_RDONLY ... > +''') > + > +if __name__ == '__main__': > + ffi.compile() > diff -r 065fb34034a3 -r e932d21fdf2c mercurial/pure/osutil.py > --- a/mercurial/pure/osutil.py Mon Jul 11 10:44:18 2016 +0200 > +++ b/mercurial/pure/osutil.py Mon Jul 11 11:05:08 2016 +0200 > @@ -14,6 +14,10 @@ > import stat as statmod > import sys > > +from . import policy > +modulepolicy = policy.policy > +policynocffi = policy.policynocffi > + > def _mode_to_kind(mode): > if statmod.S_ISREG(mode): > return statmod.S_IFREG > @@ -31,7 +35,7 @@ > return statmod.S_IFSOCK > return mode > > -def listdir(path, stat=False, skip=None): > +def listdirpure(path, stat=False, skip=None): > '''listdir(path, stat=False) -> list_of_tuples > > Return a sorted list containing information about the entries > @@ -61,6 +65,97 @@ > result.append((fn, _mode_to_kind(st.st_mode))) > return result > > +ffi = None > +if modulepolicy not in policynocffi and sys.platform == 'darwin': > + try: > + from _osutil_cffi import ffi, lib > + except ImportError: > + if modulepolicy == 'cffi': # strict cffi import > + raise > + listdir = listdirpure > + > +if sys.platform == 'darwin' and ffi is not None: > + listdir_batch_size = 4096 > + > + attrkinds = [None] * 20 > + > + attrkinds[lib.VREG] = statmod.S_IFREG > + attrkinds[lib.VDIR] = statmod.S_IFDIR > + attrkinds[lib.VLNK] = statmod.S_IFLNK > + attrkinds[lib.VBLK] = statmod.S_IFBLK > + attrkinds[lib.VCHR] = statmod.S_IFCHR > + attrkinds[lib.VFIFO] = statmod.S_IFIFO > + attrkinds[lib.VSOCK] = statmod.S_IFSOCK > + > + class stat_res(object): > + def __init__(self, st_mode, st_mtime, st_size): > + self.st_mode = st_mode > + self.st_mtime = st_mtime > + self.st_size = st_size > + > + tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec") > + buf = ffi.new("char[]", listdir_batch_size) > + > + def listdirinternal(dfd, req, stat, skip): > + ret = [] > + while True: > + r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0) > + if r == 0: > + break > + if r == -1: > + raise OSError(ffi.errno, os.strerror(ffi.errno)) > + cur = int(ffi.cast("intptr_t", buf)) > + for i in range(r): > + lgt = ffi.cast("uint32_t*", cur)[0] > + c = cur + 4 + ffi.sizeof("attribute_set_t") > + ofs = ffi.cast("uint32_t*", c)[0] > + str_lgt = ffi.cast("uint32_t*", c)[1] > + name = str(ffi.buffer(ffi.cast("char*", c + ofs), str_lgt - 1)) > + c += ffi.sizeof("attrreference_t") > + tp = attrkinds[ffi.cast("uint8_t*", c)[0]] > + c += ffi.sizeof("uint32_t") > + if name == "." or name == "..": > + continue > + if skip == name and tp == statmod.S_ISDIR: > + return [] > + if stat: > + c1 = c + tv_sec_ofs > + mtime = ffi.cast("time_t*", c1)[0] > + c += ffi.sizeof("struct timespec") > + mode = (ffi.cast("uint32_t*", c)[0] & ~lib.S_IFMT)| tp > + c += ffi.sizeof("uint32_t") > + size = ffi.cast("off_t*", c)[0] > + ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime, > + st_size=size))) > + else: > + ret.append((name, tp)) > + cur += lgt > + return ret > + > + def listdir(path, stat=False, skip=None): > + req = ffi.new("struct attrlist*") > + req.bitmapcount = lib.ATTR_BIT_MAP_COUNT > + req.commonattr = (lib.ATTR_CMN_NAME | lib.ATTR_CMN_OBJTYPE | > + lib.ATTR_CMN_RETURNED_ATTRS | > + lib.ATTR_CMN_ACCESSMASK | > + lib.ATTR_CMN_MODTIME) > + req.fileattr = lib.ATTR_FILE_DATALENGTH > + dfd = lib.open(path, lib.O_RDONLY, 0) > + if dfd == -1: > + raise OSError(ffi.errno, os.strerror(ffi.errno)) > + > + try: > + ret = listdirinternal(dfd, req, stat, skip) > + finally: > + try: > + lib.close(dfd) > + except BaseException: > + pass # we ignore all the errors from closing, not > + # much we can do about that > + return ret > +else: > + listdir = listdirpure > + > if os.name != 'nt': > posixfile = open > > diff -r 065fb34034a3 -r e932d21fdf2c setup.py > --- a/setup.py Mon Jul 11 10:44:18 2016 +0200 > +++ b/setup.py Mon Jul 11 11:05:08 2016 +0200 > @@ -320,6 +320,9 @@ > elif self.distribution.cffi: > exts = [] > # cffi modules go here > + if sys.platform == 'darwin': > + from mercurial import build_osutil_cffi > + exts.append(build_osutil_cffi.ffi.distutils_extension()) > self.distribution.ext_modules = exts > else: > h = os.path.join(get_python_inc(), 'Python.h')
I did a quick look and the direction looks good to me. I think this needs some eyes from people more familiar with OSX. Excerpts from Maciej Fijalkowski's message of 2016-07-12 19:30:07 +0200: > # HG changeset patch > # User Maciej Fijalkowski <fijall@gmail.com> > # Date 1468227908 -7200 > # Mon Jul 11 11:05:08 2016 +0200 > # Node ID e932d21fdf2c8efe34807f3aa5081e98b71b523d > # Parent 065fb34034a3849f07ca836ce2f4eb17c7f543db > osutil: add darwin-only version of os.listdir using cffi > > diff -r 065fb34034a3 -r e932d21fdf2c mercurial/build_osutil_cffi.py This seems to be the first file under "mercurial" related to build. Thus looks a bit strange. I think the existing pattern is to put these just in setup.py. > diff -r 065fb34034a3 -r e932d21fdf2c mercurial/pure/osutil.py > --- a/mercurial/pure/osutil.py Mon Jul 11 10:44:18 2016 +0200 > +++ b/mercurial/pure/osutil.py Mon Jul 11 11:05:08 2016 +0200 > @@ -14,6 +14,10 @@ > import stat as statmod > import sys > > +from . import policy > +modulepolicy = policy.policy > +policynocffi = policy.policynocffi > + > def _mode_to_kind(mode): > if statmod.S_ISREG(mode): > return statmod.S_IFREG > @@ -31,7 +35,7 @@ > return statmod.S_IFSOCK > return mode > > -def listdir(path, stat=False, skip=None): > +def listdirpure(path, stat=False, skip=None): > '''listdir(path, stat=False) -> list_of_tuples > > Return a sorted list containing information about the entries > @@ -61,6 +65,97 @@ > result.append((fn, _mode_to_kind(st.st_mode))) > return result > > +ffi = None > +if modulepolicy not in policynocffi and sys.platform == 'darwin': > + try: > + from _osutil_cffi import ffi, lib Is _osutil_cffi in future patches? > + except ImportError: > + if modulepolicy == 'cffi': # strict cffi import > + raise > + listdir = listdirpure This line is unnecessary. > +if sys.platform == 'darwin' and ffi is not None: > + listdir_batch_size = 4096 > + > + attrkinds = [None] * 20 Better to add a comment on how the number 20 is chosen. > + > + attrkinds[lib.VREG] = statmod.S_IFREG > + attrkinds[lib.VDIR] = statmod.S_IFDIR > + attrkinds[lib.VLNK] = statmod.S_IFLNK > + attrkinds[lib.VBLK] = statmod.S_IFBLK > + attrkinds[lib.VCHR] = statmod.S_IFCHR > + attrkinds[lib.VFIFO] = statmod.S_IFIFO > + attrkinds[lib.VSOCK] = statmod.S_IFSOCK > + > + class stat_res(object): > + def __init__(self, st_mode, st_mtime, st_size): > + self.st_mode = st_mode > + self.st_mtime = st_mtime > + self.st_size = st_size > + > + tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec") > + buf = ffi.new("char[]", listdir_batch_size) > + > + def listdirinternal(dfd, req, stat, skip): > + ret = [] > + while True: > + r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0) > + if r == 0: > + break > + if r == -1: > + raise OSError(ffi.errno, os.strerror(ffi.errno)) > + cur = int(ffi.cast("intptr_t", buf)) > + for i in range(r): > + lgt = ffi.cast("uint32_t*", cur)[0] > + c = cur + 4 + ffi.sizeof("attribute_set_t") Could we define the "val_attrs_t" struct in the ffi module to avoid manual pointer operations here? Will it hurt perf? typedef struct val_attrs { uint32_t length; attribute_set_t returned; uint32_t error; attrreference_t name_info; char *name; fsobj_type_t obj_type; } val_attrs_t; > + ofs = ffi.cast("uint32_t*", c)[0] > + str_lgt = ffi.cast("uint32_t*", c)[1] > + name = str(ffi.buffer(ffi.cast("char*", c + ofs), str_lgt - 1)) > + c += ffi.sizeof("attrreference_t") > + tp = attrkinds[ffi.cast("uint8_t*", c)[0]] > + c += ffi.sizeof("uint32_t") > + if name == "." or name == "..": > + continue > + if skip == name and tp == statmod.S_ISDIR: > + return [] > + if stat: > + c1 = c + tv_sec_ofs > + mtime = ffi.cast("time_t*", c1)[0] > + c += ffi.sizeof("struct timespec") > + mode = (ffi.cast("uint32_t*", c)[0] & ~lib.S_IFMT)| tp > + c += ffi.sizeof("uint32_t") > + size = ffi.cast("off_t*", c)[0] > + ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime, > + st_size=size))) > + else: > + ret.append((name, tp)) > + cur += lgt > + return ret > + > + def listdir(path, stat=False, skip=None): > + req = ffi.new("struct attrlist*") > + req.bitmapcount = lib.ATTR_BIT_MAP_COUNT > + req.commonattr = (lib.ATTR_CMN_NAME | lib.ATTR_CMN_OBJTYPE | > + lib.ATTR_CMN_RETURNED_ATTRS | > + lib.ATTR_CMN_ACCESSMASK | > + lib.ATTR_CMN_MODTIME) > + req.fileattr = lib.ATTR_FILE_DATALENGTH > + dfd = lib.open(path, lib.O_RDONLY, 0) > + if dfd == -1: > + raise OSError(ffi.errno, os.strerror(ffi.errno)) > + > + try: > + ret = listdirinternal(dfd, req, stat, skip) > + finally: > + try: > + lib.close(dfd) > + except BaseException: > + pass # we ignore all the errors from closing, not > + # much we can do about that > + return ret > +else: > + listdir = listdirpure > + > if os.name != 'nt': > posixfile = open > > diff -r 065fb34034a3 -r e932d21fdf2c setup.py > --- a/setup.py Mon Jul 11 10:44:18 2016 +0200 > +++ b/setup.py Mon Jul 11 11:05:08 2016 +0200 > @@ -320,6 +320,9 @@ > elif self.distribution.cffi: > exts = [] > # cffi modules go here > + if sys.platform == 'darwin': > + from mercurial import build_osutil_cffi > + exts.append(build_osutil_cffi.ffi.distutils_extension()) > self.distribution.ext_modules = exts > else: > h = os.path.join(get_python_inc(), 'Python.h')
On Thu, Jul 14, 2016 at 4:22 PM, Jun Wu <quark@fb.com> wrote: > I did a quick look and the direction looks good to me. I think this needs > some eyes from people more familiar with OSX. > > Excerpts from Maciej Fijalkowski's message of 2016-07-12 19:30:07 +0200: >> # HG changeset patch >> # User Maciej Fijalkowski <fijall@gmail.com> >> # Date 1468227908 -7200 >> # Mon Jul 11 11:05:08 2016 +0200 >> # Node ID e932d21fdf2c8efe34807f3aa5081e98b71b523d >> # Parent 065fb34034a3849f07ca836ce2f4eb17c7f543db >> osutil: add darwin-only version of os.listdir using cffi >> >> diff -r 065fb34034a3 -r e932d21fdf2c mercurial/build_osutil_cffi.py > > This seems to be the first file under "mercurial" related to build. Thus > looks a bit strange. I think the existing pattern is to put these just in > setup.py. I would prefer for setup.py not to grow indefinitely (it's already huge) which directory you want it? > >> diff -r 065fb34034a3 -r e932d21fdf2c mercurial/pure/osutil.py >> --- a/mercurial/pure/osutil.py Mon Jul 11 10:44:18 2016 +0200 >> +++ b/mercurial/pure/osutil.py Mon Jul 11 11:05:08 2016 +0200 >> @@ -14,6 +14,10 @@ >> import stat as statmod >> import sys >> >> +from . import policy >> +modulepolicy = policy.policy >> +policynocffi = policy.policynocffi >> + >> def _mode_to_kind(mode): >> if statmod.S_ISREG(mode): >> return statmod.S_IFREG >> @@ -31,7 +35,7 @@ >> return statmod.S_IFSOCK >> return mode >> >> -def listdir(path, stat=False, skip=None): >> +def listdirpure(path, stat=False, skip=None): >> '''listdir(path, stat=False) -> list_of_tuples >> >> Return a sorted list containing information about the entries >> @@ -61,6 +65,97 @@ >> result.append((fn, _mode_to_kind(st.st_mode))) >> return result >> >> +ffi = None >> +if modulepolicy not in policynocffi and sys.platform == 'darwin': >> + try: >> + from _osutil_cffi import ffi, lib > > Is _osutil_cffi in future patches? It's the effect of running build_osutil_cffi > >> + except ImportError: >> + if modulepolicy == 'cffi': # strict cffi import >> + raise > >> + listdir = listdirpure > > This line is unnecessary. indeed > >> +if sys.platform == 'darwin' and ffi is not None: >> + listdir_batch_size = 4096 >> + >> + attrkinds = [None] * 20 > > Better to add a comment on how the number 20 is chosen. noted > >> + >> + attrkinds[lib.VREG] = statmod.S_IFREG >> + attrkinds[lib.VDIR] = statmod.S_IFDIR >> + attrkinds[lib.VLNK] = statmod.S_IFLNK >> + attrkinds[lib.VBLK] = statmod.S_IFBLK >> + attrkinds[lib.VCHR] = statmod.S_IFCHR >> + attrkinds[lib.VFIFO] = statmod.S_IFIFO >> + attrkinds[lib.VSOCK] = statmod.S_IFSOCK >> + >> + class stat_res(object): >> + def __init__(self, st_mode, st_mtime, st_size): >> + self.st_mode = st_mode >> + self.st_mtime = st_mtime >> + self.st_size = st_size >> + >> + tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec") >> + buf = ffi.new("char[]", listdir_batch_size) >> + >> + def listdirinternal(dfd, req, stat, skip): >> + ret = [] >> + while True: >> + r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0) >> + if r == 0: >> + break >> + if r == -1: >> + raise OSError(ffi.errno, os.strerror(ffi.errno)) >> + cur = int(ffi.cast("intptr_t", buf)) >> + for i in range(r): >> + lgt = ffi.cast("uint32_t*", cur)[0] >> + c = cur + 4 + ffi.sizeof("attribute_set_t") > > Could we define the "val_attrs_t" struct in the ffi module to avoid manual > pointer operations here? Will it hurt perf? > > typedef struct val_attrs { > uint32_t length; > attribute_set_t returned; > uint32_t error; > attrreference_t name_info; > char *name; > fsobj_type_t obj_type; > } val_attrs_t; cffi does not support __attribute__(packed), so we can't define it like that (maybe there is a better way) > >> + ofs = ffi.cast("uint32_t*", c)[0] >> + str_lgt = ffi.cast("uint32_t*", c)[1] >> + name = str(ffi.buffer(ffi.cast("char*", c + ofs), str_lgt - 1)) >> + c += ffi.sizeof("attrreference_t") >> + tp = attrkinds[ffi.cast("uint8_t*", c)[0]] >> + c += ffi.sizeof("uint32_t") >> + if name == "." or name == "..": >> + continue >> + if skip == name and tp == statmod.S_ISDIR: >> + return [] >> + if stat: >> + c1 = c + tv_sec_ofs >> + mtime = ffi.cast("time_t*", c1)[0] >> + c += ffi.sizeof("struct timespec") >> + mode = (ffi.cast("uint32_t*", c)[0] & ~lib.S_IFMT)| tp >> + c += ffi.sizeof("uint32_t") >> + size = ffi.cast("off_t*", c)[0] >> + ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime, >> + st_size=size))) >> + else: >> + ret.append((name, tp)) >> + cur += lgt >> + return ret >> + >> + def listdir(path, stat=False, skip=None): >> + req = ffi.new("struct attrlist*") >> + req.bitmapcount = lib.ATTR_BIT_MAP_COUNT >> + req.commonattr = (lib.ATTR_CMN_NAME | lib.ATTR_CMN_OBJTYPE | >> + lib.ATTR_CMN_RETURNED_ATTRS | >> + lib.ATTR_CMN_ACCESSMASK | >> + lib.ATTR_CMN_MODTIME) >> + req.fileattr = lib.ATTR_FILE_DATALENGTH >> + dfd = lib.open(path, lib.O_RDONLY, 0) >> + if dfd == -1: >> + raise OSError(ffi.errno, os.strerror(ffi.errno)) >> + >> + try: >> + ret = listdirinternal(dfd, req, stat, skip) >> + finally: >> + try: >> + lib.close(dfd) >> + except BaseException: >> + pass # we ignore all the errors from closing, not >> + # much we can do about that >> + return ret >> +else: >> + listdir = listdirpure >> + >> if os.name != 'nt': >> posixfile = open >> >> diff -r 065fb34034a3 -r e932d21fdf2c setup.py >> --- a/setup.py Mon Jul 11 10:44:18 2016 +0200 >> +++ b/setup.py Mon Jul 11 11:05:08 2016 +0200 >> @@ -320,6 +320,9 @@ >> elif self.distribution.cffi: >> exts = [] >> # cffi modules go here >> + if sys.platform == 'darwin': >> + from mercurial import build_osutil_cffi >> + exts.append(build_osutil_cffi.ffi.distutils_extension()) >> self.distribution.ext_modules = exts >> else: >> h = os.path.join(get_python_inc(), 'Python.h')
Excerpts from Maciej Fijalkowski's message of 2016-07-15 09:58:59 +0200: > I would prefer for setup.py not to grow indefinitely (it's already > huge) which directory you want it? I think reviewers with push access can answer this question better. Personally I feel you will get less objections and move faster if just use setup.py (scope the logic in a function or class). > cffi does not support __attribute__(packed), so we can't define it > like that (maybe there is a better way) Seems it supports now? https://bitbucket.org/cffi/cffi/commits/c5e17441bc96
On Fri, Jul 15, 2016 at 09:58:59AM +0200, Maciej Fijalkowski wrote: > On Thu, Jul 14, 2016 at 4:22 PM, Jun Wu <quark@fb.com> wrote: > > I did a quick look and the direction looks good to me. I think this needs > > some eyes from people more familiar with OSX. > > > > Excerpts from Maciej Fijalkowski's message of 2016-07-12 19:30:07 +0200: > >> # HG changeset patch > >> # User Maciej Fijalkowski <fijall@gmail.com> > >> # Date 1468227908 -7200 > >> # Mon Jul 11 11:05:08 2016 +0200 > >> # Node ID e932d21fdf2c8efe34807f3aa5081e98b71b523d > >> # Parent 065fb34034a3849f07ca836ce2f4eb17c7f543db > >> osutil: add darwin-only version of os.listdir using cffi > >> > >> diff -r 065fb34034a3 -r e932d21fdf2c mercurial/build_osutil_cffi.py > > > > This seems to be the first file under "mercurial" related to build. Thus > > looks a bit strange. I think the existing pattern is to put these just in > > setup.py. > > I would prefer for setup.py not to grow indefinitely (it's already > huge) which directory you want it? Maybe call it setup_osutil_cffi.py and put it in the repo root. It definitely doesn't belong in mercurial/ as a namespace. > > > > >> diff -r 065fb34034a3 -r e932d21fdf2c mercurial/pure/osutil.py > >> --- a/mercurial/pure/osutil.py Mon Jul 11 10:44:18 2016 +0200 > >> +++ b/mercurial/pure/osutil.py Mon Jul 11 11:05:08 2016 +0200 > >> @@ -14,6 +14,10 @@ > >> import stat as statmod > >> import sys > >> > >> +from . import policy > >> +modulepolicy = policy.policy > >> +policynocffi = policy.policynocffi > >> + > >> def _mode_to_kind(mode): > >> if statmod.S_ISREG(mode): > >> return statmod.S_IFREG > >> @@ -31,7 +35,7 @@ > >> return statmod.S_IFSOCK > >> return mode > >> > >> -def listdir(path, stat=False, skip=None): > >> +def listdirpure(path, stat=False, skip=None): > >> '''listdir(path, stat=False) -> list_of_tuples > >> > >> Return a sorted list containing information about the entries > >> @@ -61,6 +65,97 @@ > >> result.append((fn, _mode_to_kind(st.st_mode))) > >> return result > >> > >> +ffi = None > >> +if modulepolicy not in policynocffi and sys.platform == 'darwin': > >> + try: > >> + from _osutil_cffi import ffi, lib > > > > Is _osutil_cffi in future patches? > > It's the effect of running build_osutil_cffi > > > > >> + except ImportError: > >> + if modulepolicy == 'cffi': # strict cffi import > >> + raise > > > >> + listdir = listdirpure > > > > This line is unnecessary. > > indeed > > > > >> +if sys.platform == 'darwin' and ffi is not None: > >> + listdir_batch_size = 4096 > >> + > >> + attrkinds = [None] * 20 > > > > Better to add a comment on how the number 20 is chosen. > > noted > > > > >> + > >> + attrkinds[lib.VREG] = statmod.S_IFREG > >> + attrkinds[lib.VDIR] = statmod.S_IFDIR > >> + attrkinds[lib.VLNK] = statmod.S_IFLNK > >> + attrkinds[lib.VBLK] = statmod.S_IFBLK > >> + attrkinds[lib.VCHR] = statmod.S_IFCHR > >> + attrkinds[lib.VFIFO] = statmod.S_IFIFO > >> + attrkinds[lib.VSOCK] = statmod.S_IFSOCK > >> + > >> + class stat_res(object): > >> + def __init__(self, st_mode, st_mtime, st_size): > >> + self.st_mode = st_mode > >> + self.st_mtime = st_mtime > >> + self.st_size = st_size > >> + > >> + tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec") > >> + buf = ffi.new("char[]", listdir_batch_size) > >> + > >> + def listdirinternal(dfd, req, stat, skip): > >> + ret = [] > >> + while True: > >> + r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0) > >> + if r == 0: > >> + break > >> + if r == -1: > >> + raise OSError(ffi.errno, os.strerror(ffi.errno)) > >> + cur = int(ffi.cast("intptr_t", buf)) > >> + for i in range(r): > >> + lgt = ffi.cast("uint32_t*", cur)[0] > >> + c = cur + 4 + ffi.sizeof("attribute_set_t") > > > > Could we define the "val_attrs_t" struct in the ffi module to avoid manual > > pointer operations here? Will it hurt perf? > > > > typedef struct val_attrs { > > uint32_t length; > > attribute_set_t returned; > > uint32_t error; > > attrreference_t name_info; > > char *name; > > fsobj_type_t obj_type; > > } val_attrs_t; > > cffi does not support __attribute__(packed), so we can't define it > like that (maybe there is a better way) > > > > >> + ofs = ffi.cast("uint32_t*", c)[0] > >> + str_lgt = ffi.cast("uint32_t*", c)[1] > >> + name = str(ffi.buffer(ffi.cast("char*", c + ofs), str_lgt - 1)) > >> + c += ffi.sizeof("attrreference_t") > >> + tp = attrkinds[ffi.cast("uint8_t*", c)[0]] > >> + c += ffi.sizeof("uint32_t") > >> + if name == "." or name == "..": > >> + continue > >> + if skip == name and tp == statmod.S_ISDIR: > >> + return [] > >> + if stat: > >> + c1 = c + tv_sec_ofs > >> + mtime = ffi.cast("time_t*", c1)[0] > >> + c += ffi.sizeof("struct timespec") > >> + mode = (ffi.cast("uint32_t*", c)[0] & ~lib.S_IFMT)| tp > >> + c += ffi.sizeof("uint32_t") > >> + size = ffi.cast("off_t*", c)[0] > >> + ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime, > >> + st_size=size))) > >> + else: > >> + ret.append((name, tp)) > >> + cur += lgt > >> + return ret > >> + > >> + def listdir(path, stat=False, skip=None): > >> + req = ffi.new("struct attrlist*") > >> + req.bitmapcount = lib.ATTR_BIT_MAP_COUNT > >> + req.commonattr = (lib.ATTR_CMN_NAME | lib.ATTR_CMN_OBJTYPE | > >> + lib.ATTR_CMN_RETURNED_ATTRS | > >> + lib.ATTR_CMN_ACCESSMASK | > >> + lib.ATTR_CMN_MODTIME) > >> + req.fileattr = lib.ATTR_FILE_DATALENGTH > >> + dfd = lib.open(path, lib.O_RDONLY, 0) > >> + if dfd == -1: > >> + raise OSError(ffi.errno, os.strerror(ffi.errno)) > >> + > >> + try: > >> + ret = listdirinternal(dfd, req, stat, skip) > >> + finally: > >> + try: > >> + lib.close(dfd) > >> + except BaseException: > >> + pass # we ignore all the errors from closing, not > >> + # much we can do about that > >> + return ret > >> +else: > >> + listdir = listdirpure > >> + > >> if os.name != 'nt': > >> posixfile = open > >> > >> diff -r 065fb34034a3 -r e932d21fdf2c setup.py > >> --- a/setup.py Mon Jul 11 10:44:18 2016 +0200 > >> +++ b/setup.py Mon Jul 11 11:05:08 2016 +0200 > >> @@ -320,6 +320,9 @@ > >> elif self.distribution.cffi: > >> exts = [] > >> # cffi modules go here > >> + if sys.platform == 'darwin': > >> + from mercurial import build_osutil_cffi > >> + exts.append(build_osutil_cffi.ffi.distutils_extension()) > >> self.distribution.ext_modules = exts > >> else: > >> h = os.path.join(get_python_inc(), 'Python.h') > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
On Fri, Jul 15, 2016 at 1:11 PM, Jun Wu <quark@fb.com> wrote: > Excerpts from Maciej Fijalkowski's message of 2016-07-15 09:58:59 +0200: >> I would prefer for setup.py not to grow indefinitely (it's already >> huge) which directory you want it? > > I think reviewers with push access can answer this question better. > Personally I feel you will get less objections and move faster if just use > setup.py (scope the logic in a function or class). > >> cffi does not support __attribute__(packed), so we can't define it >> like that (maybe there is a better way) > > Seems it supports now? > https://bitbucket.org/cffi/cffi/commits/c5e17441bc96 I found a way around it, but I would vastly prefer not to rely on features that would require e.g. not-yet-released version of pypy
The cdef(packed=True) feature has been in pypy since 2.4 years ago: https://bitbucket.org/pypy/pypy/commits/8d8d72389794 And I can confirm it exists in pypy 5.3.1. Excerpts from Maciej Fijalkowski's message of 2016-07-18 13:10:07 +0200: > I found a way around it, but I would vastly prefer not to rely on > features that would require e.g. not-yet-released version of pypy
Patch
diff -r 065fb34034a3 -r e932d21fdf2c mercurial/build_osutil_cffi.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/build_osutil_cffi.py Mon Jul 11 11:05:08 2016 +0200 @@ -0,0 +1,76 @@ +from __future__ import absolute_import + +import cffi + +ffi = cffi.FFI() +ffi.set_source("_osutil_cffi", """ +#include <sys/attr.h> +#include <sys/vnode.h> +#include <unistd.h> +#include <fcntl.h> +#include <time.h> +""", include_dirs=['mercurial']) +ffi.cdef(''' + +typedef uint32_t attrgroup_t; + +typedef struct attrlist { + uint16_t bitmapcount; /* number of attr. bit sets in list */ + uint16_t reserved; /* (to maintain 4-byte alignment) */ + attrgroup_t commonattr; /* common attribute group */ + attrgroup_t volattr; /* volume attribute group */ + attrgroup_t dirattr; /* directory attribute group */ + attrgroup_t fileattr; /* file attribute group */ + attrgroup_t forkattr; /* fork attribute group */ + ...; +}; + +typedef struct attribute_set { + ...; +} attribute_set_t; + +typedef struct attrreference { + ...; +} attrreference_t; + +typedef int ... time_t; +typedef int ... off_t; + +typedef struct timespec { + time_t tv_sec; + ...; +}; + +int getattrlist(const char* path, struct attrlist * attrList, void * attrBuf, + size_t attrBufSize, unsigned int options); + +int getattrlistbulk(int dirfd, struct attrlist * attrList, void * attrBuf, + size_t attrBufSize, uint64_t options); + +#define ATTR_BIT_MAP_COUNT ... +#define ATTR_CMN_NAME ... +#define ATTR_CMN_OBJTYPE ... +#define ATTR_CMN_MODTIME ... +#define ATTR_CMN_ACCESSMASK ... +#define ATTR_CMN_ERROR ... +#define ATTR_CMN_RETURNED_ATTRS ... +#define ATTR_FILE_DATALENGTH ... + +#define VREG ... +#define VDIR ... +#define VLNK ... +#define VBLK ... +#define VCHR ... +#define VFIFO ... +#define VSOCK ... + +#define S_IFMT ... + +int open(const char *path, int oflag, int perm); +int close(int); + +#define O_RDONLY ... +''') + +if __name__ == '__main__': + ffi.compile() diff -r 065fb34034a3 -r e932d21fdf2c mercurial/pure/osutil.py --- a/mercurial/pure/osutil.py Mon Jul 11 10:44:18 2016 +0200 +++ b/mercurial/pure/osutil.py Mon Jul 11 11:05:08 2016 +0200 @@ -14,6 +14,10 @@ import stat as statmod import sys +from . import policy +modulepolicy = policy.policy +policynocffi = policy.policynocffi + def _mode_to_kind(mode): if statmod.S_ISREG(mode): return statmod.S_IFREG @@ -31,7 +35,7 @@ return statmod.S_IFSOCK return mode -def listdir(path, stat=False, skip=None): +def listdirpure(path, stat=False, skip=None): '''listdir(path, stat=False) -> list_of_tuples Return a sorted list containing information about the entries @@ -61,6 +65,97 @@ result.append((fn, _mode_to_kind(st.st_mode))) return result +ffi = None +if modulepolicy not in policynocffi and sys.platform == 'darwin': + try: + from _osutil_cffi import ffi, lib + except ImportError: + if modulepolicy == 'cffi': # strict cffi import + raise + listdir = listdirpure + +if sys.platform == 'darwin' and ffi is not None: + listdir_batch_size = 4096 + + attrkinds = [None] * 20 + + attrkinds[lib.VREG] = statmod.S_IFREG + attrkinds[lib.VDIR] = statmod.S_IFDIR + attrkinds[lib.VLNK] = statmod.S_IFLNK + attrkinds[lib.VBLK] = statmod.S_IFBLK + attrkinds[lib.VCHR] = statmod.S_IFCHR + attrkinds[lib.VFIFO] = statmod.S_IFIFO + attrkinds[lib.VSOCK] = statmod.S_IFSOCK + + class stat_res(object): + def __init__(self, st_mode, st_mtime, st_size): + self.st_mode = st_mode + self.st_mtime = st_mtime + self.st_size = st_size + + tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec") + buf = ffi.new("char[]", listdir_batch_size) + + def listdirinternal(dfd, req, stat, skip): + ret = [] + while True: + r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0) + if r == 0: + break + if r == -1: + raise OSError(ffi.errno, os.strerror(ffi.errno)) + cur = int(ffi.cast("intptr_t", buf)) + for i in range(r): + lgt = ffi.cast("uint32_t*", cur)[0] + c = cur + 4 + ffi.sizeof("attribute_set_t") + ofs = ffi.cast("uint32_t*", c)[0] + str_lgt = ffi.cast("uint32_t*", c)[1] + name = str(ffi.buffer(ffi.cast("char*", c + ofs), str_lgt - 1)) + c += ffi.sizeof("attrreference_t") + tp = attrkinds[ffi.cast("uint8_t*", c)[0]] + c += ffi.sizeof("uint32_t") + if name == "." or name == "..": + continue + if skip == name and tp == statmod.S_ISDIR: + return [] + if stat: + c1 = c + tv_sec_ofs + mtime = ffi.cast("time_t*", c1)[0] + c += ffi.sizeof("struct timespec") + mode = (ffi.cast("uint32_t*", c)[0] & ~lib.S_IFMT)| tp + c += ffi.sizeof("uint32_t") + size = ffi.cast("off_t*", c)[0] + ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime, + st_size=size))) + else: + ret.append((name, tp)) + cur += lgt + return ret + + def listdir(path, stat=False, skip=None): + req = ffi.new("struct attrlist*") + req.bitmapcount = lib.ATTR_BIT_MAP_COUNT + req.commonattr = (lib.ATTR_CMN_NAME | lib.ATTR_CMN_OBJTYPE | + lib.ATTR_CMN_RETURNED_ATTRS | + lib.ATTR_CMN_ACCESSMASK | + lib.ATTR_CMN_MODTIME) + req.fileattr = lib.ATTR_FILE_DATALENGTH + dfd = lib.open(path, lib.O_RDONLY, 0) + if dfd == -1: + raise OSError(ffi.errno, os.strerror(ffi.errno)) + + try: + ret = listdirinternal(dfd, req, stat, skip) + finally: + try: + lib.close(dfd) + except BaseException: + pass # we ignore all the errors from closing, not + # much we can do about that + return ret +else: + listdir = listdirpure + if os.name != 'nt': posixfile = open diff -r 065fb34034a3 -r e932d21fdf2c setup.py --- a/setup.py Mon Jul 11 10:44:18 2016 +0200 +++ b/setup.py Mon Jul 11 11:05:08 2016 +0200 @@ -320,6 +320,9 @@ elif self.distribution.cffi: exts = [] # cffi modules go here + if sys.platform == 'darwin': + from mercurial import build_osutil_cffi + exts.append(build_osutil_cffi.ffi.distutils_extension()) self.distribution.ext_modules = exts else: h = os.path.join(get_python_inc(), 'Python.h')