Patchwork [2,of,2] osutil: use getdirentriesattr on OS X if possible

login
register
mail settings
Submitter Siddharth Agarwal
Date March 26, 2015, 3:14 a.m.
Message ID <c015024cd8c4f82ed1f5.1427339673@devbig136.prn2.facebook.com>
Download mbox | patch
Permalink /patch/8286/
State Superseded
Headers show

Comments

Siddharth Agarwal - March 26, 2015, 3:14 a.m.
# HG changeset patch
# User Siddharth Agarwal <sid0@fb.com>
# Date 1427324131 25200
#      Wed Mar 25 15:55:31 2015 -0700
# Node ID c015024cd8c4f82ed1f581f151f645a2228130af
# Parent  247fb9a8ff9de9026d2b5d58b825da9975a37e5c
osutil: use getdirentriesattr on OS X if possible

This is a significant win for large repositories on OS X, especially with a
cold cache. Unfortunately not all filesystems support getdirentriesattr, which
means that we need to keep the lstat-based implementation around as well.

The below tests are all performed on a Mac with an SSD running OS X 10.9, on a
repository with over 200k files. The results are best of 5 with simulated
best-effort conditions.

The gains with a hot cache are pretty impressive: 'hg status' goes from 5.18
seconds to 3.79 seconds.

However, in the hot cache case, a repository that large will probably already be
using something like hgwatchman [1]. Where this really helps is when the cache
is cold [2]. hg status goes from 31.0 seconds to 9.66.

See http://lists.apple.com/archives/filesystem-dev/2014/Dec/msg00002.html for
some more discussion about this function.

This is based on a patch by Sean Farley <smf@smf.io>.

[1] https://bitbucket.org/facebook/hgwatchman

[2] There appears to be no easy way to clear the file cache (aka "vnodes") on
OS X short of rebooting. purge(8) purportedly does that but in my testing had
little effect. The workaround I came up with was to assume that vnode eviction
was LRU, make sure the kern.maxvnodes sysctl is smaller than the size of the
repository, then make sure we'd always miss the cache by running 'hg status' in
another clone of the repository before running it in the test repository.
Siddharth Agarwal - March 26, 2015, 3:19 a.m.
On 03/25/2015 08:14 PM, Siddharth Agarwal wrote:
> # HG changeset patch
> # User Siddharth Agarwal <sid0@fb.com>
> # Date 1427324131 25200
> #      Wed Mar 25 15:55:31 2015 -0700
> # Node ID c015024cd8c4f82ed1f581f151f645a2228130af
> # Parent  247fb9a8ff9de9026d2b5d58b825da9975a37e5c
> osutil: use getdirentriesattr on OS X if possible

There are a couple of error cases I need to handle properly. I'll send
an updated V2.

>
> This is a significant win for large repositories on OS X, especially with a
> cold cache. Unfortunately not all filesystems support getdirentriesattr, which
> means that we need to keep the lstat-based implementation around as well.
>
> The below tests are all performed on a Mac with an SSD running OS X 10.9, on a
> repository with over 200k files. The results are best of 5 with simulated
> best-effort conditions.
>
> The gains with a hot cache are pretty impressive: 'hg status' goes from 5.18
> seconds to 3.79 seconds.
>
> However, in the hot cache case, a repository that large will probably already be
> using something like hgwatchman [1]. Where this really helps is when the cache
> is cold [2]. hg status goes from 31.0 seconds to 9.66.
>
> See http://lists.apple.com/archives/filesystem-dev/2014/Dec/msg00002.html for
> some more discussion about this function.
>
> This is based on a patch by Sean Farley <smf@smf.io>.
>
> [1] https://bitbucket.org/facebook/hgwatchman
>
> [2] There appears to be no easy way to clear the file cache (aka "vnodes") on
> OS X short of rebooting. purge(8) purportedly does that but in my testing had
> little effect. The workaround I came up with was to assume that vnode eviction
> was LRU, make sure the kern.maxvnodes sysctl is smaller than the size of the
> repository, then make sure we'd always miss the cache by running 'hg status' in
> another clone of the repository before running it in the test repository.
>
> diff --git a/mercurial/osutil.c b/mercurial/osutil.c
> --- a/mercurial/osutil.c
> +++ b/mercurial/osutil.c
> @@ -24,6 +24,11 @@
>  #include <unistd.h>
>  #endif
>  
> +#ifdef __APPLE__
> +#include <sys/attr.h>
> +#include <sys/vnode.h>
> +#endif
> +
>  #include "util.h"
>  
>  /* some platforms lack the PATH_MAX definition (eg. GNU/Hurd) */
> @@ -392,8 +397,171 @@
>  	return ret;
>  }
>  
> +#ifdef __APPLE__
> +
> +typedef struct {
> +	u_int32_t length;
> +	attrreference_t name;
> +	fsobj_type_t obj_type;
> +	struct timespec mtime;
> +#if __LITTLE_ENDIAN__
> +	mode_t access_mask;
> +	uint16_t padding;
> +#else
> +	uint16_t padding;
> +	mode_t access_mask;
> +#endif
> +	off_t size;
> +} __attribute__((packed)) attrbuf_entry;
> +
> +int attrkind(attrbuf_entry *entry)
> +{
> +	switch (entry->obj_type) {
> +	case VREG: return S_IFREG;
> +	case VDIR: return S_IFDIR;
> +	case VLNK: return S_IFLNK;
> +	case VBLK: return S_IFBLK;
> +	case VCHR: return S_IFCHR;
> +	case VFIFO: return S_IFIFO;
> +	case VSOCK: return S_IFSOCK;
> +	}
> +	return -1;
> +}
> +
> +/* get these many entries at a time */
> +#define LISTDIR_BATCH_SIZE 50
> +
> +static PyObject *_listdir_batch(char *path, int pathlen, int keepstat,
> +				char *skip, bool *fallback)
> +{
> +	PyObject *list, *elem, *stat = NULL, *ret = NULL;
> +	char fullpath[PATH_MAX + 10];
> +	int kind, err;
> +	unsigned long index;
> +	int done = 0;
> +	unsigned int count;
> +	attrbuf_entry *entry;
> +	/* from the getattrlist(2) man page: a path can be no longer than
> +	   (NAME_MAX * 3 + 1) bytes. Also, "The getattrlist() function will
> +	   silently truncate attribute data if attrBufSize is too small." So
> +	   pass in a buffer big enough for the worst case. */
> +	char attrbuf[LISTDIR_BATCH_SIZE * (sizeof(attrbuf_entry) + NAME_MAX * 3 + 1)];
> +	unsigned int unused1, unused2;
> +
> +	struct stat st;
> +
> +	/* these must match the attrbuf_entry struct, otherwise you'll end up
> +	   with garbage */
> +	struct attrlist requested_attr = {0};
> +	requested_attr.bitmapcount = ATTR_BIT_MAP_COUNT;
> +	requested_attr.commonattr = (ATTR_CMN_NAME | ATTR_CMN_OBJTYPE |
> +				     ATTR_CMN_MODTIME | ATTR_CMN_ACCESSMASK);
> +	requested_attr.fileattr = ATTR_FILE_TOTALSIZE;
> +
> +	*fallback = false;
> +
> +	int dfd = -1;
> +
> +	if (pathlen >= PATH_MAX) {
> +		errno = ENAMETOOLONG;
> +		PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
> +		goto error_value;
> +	}
> +
> +	strncpy(fullpath, path, PATH_MAX);
> +	fullpath[pathlen] = '/';
> +
> +	dfd = open(path, O_RDONLY);
> +	if (dfd == -1) {
> +		PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
> +		goto error_value;
> +	}
> +
> +	list = PyList_New(0);
> +	if (!list)
> +		goto error_dir;
> +
> +	do {
> +		count = LISTDIR_BATCH_SIZE;
> +		err = getdirentriesattr(dfd, &requested_attr, &attrbuf,
> +					sizeof(attrbuf), &count, &unused1,
> +					&unused2, 0);
> +		if (err < 0) {
> +			if (errno == ENOTSUP) {
> +				/* We're on a filesystem that doesn't support
> +				   getdirentriesattr.  Fall back to the
> +				   stat-based implementation. */
> +				*fallback = true;
> +			} else
> +				PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
> +			goto error;
> +		} else
> +			done = err;
> +
> +		entry = (attrbuf_entry *)attrbuf;
> +
> +		for (index = 0; index < count; index++) {
> +			char *filename = ((char *)&entry->name) +
> +				entry->name.attr_dataoffset;
> +
> +			if (!strcmp(filename, ".") || !strcmp(filename, ".."))
> +				continue;
> +
> +			kind = attrkind(entry);
> +
> +			/* quit early? */
> +			if (skip && kind == S_IFDIR && !strcmp(filename, skip)) {
> +				ret = PyList_New(0);
> +				goto error;
> +			}
> +
> +			if (keepstat) {
> +				/* from the getattrlist(2) man page: "Only the
> +				   permission bits ... are valid". */
> +				st.st_mode = (entry->access_mask & ~S_IFMT) | kind;
> +				st.st_mtime = entry->mtime.tv_sec;
> +				st.st_size = entry->size;
> +				stat = makestat(&st);
> +				if (!stat)
> +					goto error;
> +				elem = Py_BuildValue("siN", filename, kind, stat);
> +			} else
> +				elem = Py_BuildValue("si", filename, kind);
> +			if (!elem)
> +				goto error;
> +			stat = NULL;
> +
> +			PyList_Append(list, elem);
> +			Py_DECREF(elem);
> +
> +			entry = (attrbuf_entry *)((char *)entry + entry->length);
> +		}
> +	} while (err == 0 && !done);
> +
> +	ret = list;
> +	Py_INCREF(ret);
> +
> +error:
> +	Py_DECREF(list);
> +	Py_XDECREF(stat);
> +error_dir:
> +	close(dfd);
> +error_value:
> +	return ret;
> +}
> +
> +#endif /* __APPLE__ */
> +
>  static PyObject *_listdir(char *path, int pathlen, int keepstat, char *skip)
>  {
> +#ifdef __APPLE__
> +	PyObject *ret;
> +	bool fallback = false;
> +
> +	ret = _listdir_batch(path, pathlen, keepstat, skip, &fallback);
> +	if (ret != NULL || !fallback)
> +		return ret;
> +#endif
>  	return _listdir_stat(path, pathlen, keepstat, skip);
>  }
>  
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel

Patch

diff --git a/mercurial/osutil.c b/mercurial/osutil.c
--- a/mercurial/osutil.c
+++ b/mercurial/osutil.c
@@ -24,6 +24,11 @@ 
 #include <unistd.h>
 #endif
 
+#ifdef __APPLE__
+#include <sys/attr.h>
+#include <sys/vnode.h>
+#endif
+
 #include "util.h"
 
 /* some platforms lack the PATH_MAX definition (eg. GNU/Hurd) */
@@ -392,8 +397,171 @@ 
 	return ret;
 }
 
+#ifdef __APPLE__
+
+typedef struct {
+	u_int32_t length;
+	attrreference_t name;
+	fsobj_type_t obj_type;
+	struct timespec mtime;
+#if __LITTLE_ENDIAN__
+	mode_t access_mask;
+	uint16_t padding;
+#else
+	uint16_t padding;
+	mode_t access_mask;
+#endif
+	off_t size;
+} __attribute__((packed)) attrbuf_entry;
+
+int attrkind(attrbuf_entry *entry)
+{
+	switch (entry->obj_type) {
+	case VREG: return S_IFREG;
+	case VDIR: return S_IFDIR;
+	case VLNK: return S_IFLNK;
+	case VBLK: return S_IFBLK;
+	case VCHR: return S_IFCHR;
+	case VFIFO: return S_IFIFO;
+	case VSOCK: return S_IFSOCK;
+	}
+	return -1;
+}
+
+/* get these many entries at a time */
+#define LISTDIR_BATCH_SIZE 50
+
+static PyObject *_listdir_batch(char *path, int pathlen, int keepstat,
+				char *skip, bool *fallback)
+{
+	PyObject *list, *elem, *stat = NULL, *ret = NULL;
+	char fullpath[PATH_MAX + 10];
+	int kind, err;
+	unsigned long index;
+	int done = 0;
+	unsigned int count;
+	attrbuf_entry *entry;
+	/* from the getattrlist(2) man page: a path can be no longer than
+	   (NAME_MAX * 3 + 1) bytes. Also, "The getattrlist() function will
+	   silently truncate attribute data if attrBufSize is too small." So
+	   pass in a buffer big enough for the worst case. */
+	char attrbuf[LISTDIR_BATCH_SIZE * (sizeof(attrbuf_entry) + NAME_MAX * 3 + 1)];
+	unsigned int unused1, unused2;
+
+	struct stat st;
+
+	/* these must match the attrbuf_entry struct, otherwise you'll end up
+	   with garbage */
+	struct attrlist requested_attr = {0};
+	requested_attr.bitmapcount = ATTR_BIT_MAP_COUNT;
+	requested_attr.commonattr = (ATTR_CMN_NAME | ATTR_CMN_OBJTYPE |
+				     ATTR_CMN_MODTIME | ATTR_CMN_ACCESSMASK);
+	requested_attr.fileattr = ATTR_FILE_TOTALSIZE;
+
+	*fallback = false;
+
+	int dfd = -1;
+
+	if (pathlen >= PATH_MAX) {
+		errno = ENAMETOOLONG;
+		PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
+		goto error_value;
+	}
+
+	strncpy(fullpath, path, PATH_MAX);
+	fullpath[pathlen] = '/';
+
+	dfd = open(path, O_RDONLY);
+	if (dfd == -1) {
+		PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
+		goto error_value;
+	}
+
+	list = PyList_New(0);
+	if (!list)
+		goto error_dir;
+
+	do {
+		count = LISTDIR_BATCH_SIZE;
+		err = getdirentriesattr(dfd, &requested_attr, &attrbuf,
+					sizeof(attrbuf), &count, &unused1,
+					&unused2, 0);
+		if (err < 0) {
+			if (errno == ENOTSUP) {
+				/* We're on a filesystem that doesn't support
+				   getdirentriesattr.  Fall back to the
+				   stat-based implementation. */
+				*fallback = true;
+			} else
+				PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
+			goto error;
+		} else
+			done = err;
+
+		entry = (attrbuf_entry *)attrbuf;
+
+		for (index = 0; index < count; index++) {
+			char *filename = ((char *)&entry->name) +
+				entry->name.attr_dataoffset;
+
+			if (!strcmp(filename, ".") || !strcmp(filename, ".."))
+				continue;
+
+			kind = attrkind(entry);
+
+			/* quit early? */
+			if (skip && kind == S_IFDIR && !strcmp(filename, skip)) {
+				ret = PyList_New(0);
+				goto error;
+			}
+
+			if (keepstat) {
+				/* from the getattrlist(2) man page: "Only the
+				   permission bits ... are valid". */
+				st.st_mode = (entry->access_mask & ~S_IFMT) | kind;
+				st.st_mtime = entry->mtime.tv_sec;
+				st.st_size = entry->size;
+				stat = makestat(&st);
+				if (!stat)
+					goto error;
+				elem = Py_BuildValue("siN", filename, kind, stat);
+			} else
+				elem = Py_BuildValue("si", filename, kind);
+			if (!elem)
+				goto error;
+			stat = NULL;
+
+			PyList_Append(list, elem);
+			Py_DECREF(elem);
+
+			entry = (attrbuf_entry *)((char *)entry + entry->length);
+		}
+	} while (err == 0 && !done);
+
+	ret = list;
+	Py_INCREF(ret);
+
+error:
+	Py_DECREF(list);
+	Py_XDECREF(stat);
+error_dir:
+	close(dfd);
+error_value:
+	return ret;
+}
+
+#endif /* __APPLE__ */
+
 static PyObject *_listdir(char *path, int pathlen, int keepstat, char *skip)
 {
+#ifdef __APPLE__
+	PyObject *ret;
+	bool fallback = false;
+
+	ret = _listdir_batch(path, pathlen, keepstat, skip, &fallback);
+	if (ret != NULL || !fallback)
+		return ret;
+#endif
 	return _listdir_stat(path, pathlen, keepstat, skip);
 }