Patchwork [2,of,2,V2] osutil: use getdirentriesattr on OS X if possible

login
register
mail settings
Submitter Siddharth Agarwal
Date March 26, 2015, 5:01 a.m.
Message ID <4a644f3fd340147e492f.1427346115@devbig136.prn2.facebook.com>
Download mbox | patch
Permalink /patch/8290/
State Accepted
Commit 05ccfe6763f11a1903bf69f4ba6a9580199d831d
Headers show

Comments

Siddharth Agarwal - March 26, 2015, 5:01 a.m.
# HG changeset patch
# User Siddharth Agarwal <sid0@fb.com>
# Date 1427324131 25200
#      Wed Mar 25 15:55:31 2015 -0700
# Node ID 4a644f3fd340147e492ff60e73f80c579fc85b20
# Parent  247fb9a8ff9de9026d2b5d58b825da9975a37e5c
osutil: use getdirentriesattr on OS X if possible

This is a significant win for large repositories on OS X, especially with a
cold cache. Unfortunately we need to keep the lstat-based implementation around
for two reasons:

- Not all filesystems support this call.
- There's an edge case in which it's best to fall back to avoid a retry loop.
  More about this in the comments.

The below tests are all performed on a Mac with an SSD running OS X 10.9, on a
repository with over 200k files. The results are best of 5 with simulated
best-effort conditions.

The gains with a hot cache are pretty impressive: 'hg status' goes from 5.18
seconds to 3.79 seconds.

However, a repository that large will probably already be using something like
hgwatchman [1], which helps much more (for this repo, 'hg status' with
hgwatchman is approximately 1 second). Where this really helps is when the
cache is cold [2]: hg status goes from 31.0 seconds to 9.66.

See http://lists.apple.com/archives/filesystem-dev/2014/Dec/msg00002.html for
some more discussion about this function.

This is based on a patch by Sean Farley <sean@farley.io>.

[1] https://bitbucket.org/facebook/hgwatchman

[2] There appears to be no easy way to clear the file cache (aka "vnodes") on
OS X short of rebooting. purge(8) purportedly does that but in my testing had
little effect. The workaround I came up with was to assume that vnode eviction
was LRU, make sure the kern.maxvnodes sysctl is smaller than the size of the
repository, then make sure we'd always miss the cache by running 'hg status' in
another clone of the repository before running it in the test repository.
Matt Mackall - March 26, 2015, 8:32 p.m.
On Wed, 2015-03-25 at 22:01 -0700, Siddharth Agarwal wrote:
> # HG changeset patch
> # User Siddharth Agarwal <sid0@fb.com>
> # Date 1427324131 25200
> #      Wed Mar 25 15:55:31 2015 -0700
> # Node ID 4a644f3fd340147e492ff60e73f80c579fc85b20
> # Parent  247fb9a8ff9de9026d2b5d58b825da9975a37e5c
> osutil: use getdirentriesattr on OS X if possible

These are queued for default, thanks.

Patch

diff --git a/mercurial/osutil.c b/mercurial/osutil.c
--- a/mercurial/osutil.c
+++ b/mercurial/osutil.c
@@ -24,6 +24,11 @@ 
 #include <unistd.h>
 #endif
 
+#ifdef __APPLE__
+#include <sys/attr.h>
+#include <sys/vnode.h>
+#endif
+
 #include "util.h"
 
 /* some platforms lack the PATH_MAX definition (eg. GNU/Hurd) */
@@ -392,8 +397,195 @@ 
 	return ret;
 }
 
+#ifdef __APPLE__
+
+typedef struct {
+	u_int32_t length;
+	attrreference_t name;
+	fsobj_type_t obj_type;
+	struct timespec mtime;
+#if __LITTLE_ENDIAN__
+	mode_t access_mask;
+	uint16_t padding;
+#else
+	uint16_t padding;
+	mode_t access_mask;
+#endif
+	off_t size;
+} __attribute__((packed)) attrbuf_entry;
+
+int attrkind(attrbuf_entry *entry)
+{
+	switch (entry->obj_type) {
+	case VREG: return S_IFREG;
+	case VDIR: return S_IFDIR;
+	case VLNK: return S_IFLNK;
+	case VBLK: return S_IFBLK;
+	case VCHR: return S_IFCHR;
+	case VFIFO: return S_IFIFO;
+	case VSOCK: return S_IFSOCK;
+	}
+	return -1;
+}
+
+/* get these many entries at a time */
+#define LISTDIR_BATCH_SIZE 50
+
+static PyObject *_listdir_batch(char *path, int pathlen, int keepstat,
+				char *skip, bool *fallback)
+{
+	PyObject *list, *elem, *stat = NULL, *ret = NULL;
+	int kind, err;
+	unsigned long index;
+	unsigned int count, old_state, new_state;
+	bool state_seen = false;
+	attrbuf_entry *entry;
+	/* from the getattrlist(2) man page: a path can be no longer than
+	   (NAME_MAX * 3 + 1) bytes. Also, "The getattrlist() function will
+	   silently truncate attribute data if attrBufSize is too small." So
+	   pass in a buffer big enough for the worst case. */
+	char attrbuf[LISTDIR_BATCH_SIZE * (sizeof(attrbuf_entry) + NAME_MAX * 3 + 1)];
+	unsigned int basep_unused;
+
+	struct stat st;
+	int dfd = -1;
+
+	/* these must match the attrbuf_entry struct, otherwise you'll end up
+	   with garbage */
+	struct attrlist requested_attr = {0};
+	requested_attr.bitmapcount = ATTR_BIT_MAP_COUNT;
+	requested_attr.commonattr = (ATTR_CMN_NAME | ATTR_CMN_OBJTYPE |
+				     ATTR_CMN_MODTIME | ATTR_CMN_ACCESSMASK);
+	requested_attr.fileattr = ATTR_FILE_TOTALSIZE;
+
+	*fallback = false;
+
+	if (pathlen >= PATH_MAX) {
+		errno = ENAMETOOLONG;
+		PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
+		goto error_value;
+	}
+
+	dfd = open(path, O_RDONLY);
+	if (dfd == -1) {
+		PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
+		goto error_value;
+	}
+
+	list = PyList_New(0);
+	if (!list)
+		goto error_dir;
+
+	do {
+		count = LISTDIR_BATCH_SIZE;
+		err = getdirentriesattr(dfd, &requested_attr, &attrbuf,
+					sizeof(attrbuf), &count, &basep_unused,
+					&new_state, 0);
+		if (err < 0) {
+			if (errno == ENOTSUP) {
+				/* We're on a filesystem that doesn't support
+				   getdirentriesattr. Fall back to the
+				   stat-based implementation. */
+				*fallback = true;
+			} else
+				PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
+			goto error;
+		}
+
+		if (!state_seen) {
+			old_state = new_state;
+			state_seen = true;
+		} else if (old_state != new_state) {
+			/* There's an edge case with getdirentriesattr. Consider
+			   the following initial list of files:
+
+			   a
+			   b
+			   <--
+			   c
+			   d
+
+			   If the iteration is paused at the arrow, and b is
+			   deleted before it is resumed, getdirentriesattr will
+			   not return d at all!  Ordinarily we're expected to
+			   restart the iteration from the beginning. To avoid
+			   getting stuck in a retry loop here, fall back to
+			   stat. */
+			*fallback = true;
+			goto error;
+		}
+
+		entry = (attrbuf_entry *)attrbuf;
+
+		for (index = 0; index < count; index++) {
+			char *filename = ((char *)&entry->name) +
+				entry->name.attr_dataoffset;
+
+			if (!strcmp(filename, ".") || !strcmp(filename, ".."))
+				continue;
+
+			kind = attrkind(entry);
+			if (kind == -1) {
+				PyErr_Format(PyExc_OSError,
+					     "unknown object type %u for file "
+					     "%s%s!",
+					     entry->obj_type, path, filename);
+				goto error;
+			}
+
+			/* quit early? */
+			if (skip && kind == S_IFDIR && !strcmp(filename, skip)) {
+				ret = PyList_New(0);
+				goto error;
+			}
+
+			if (keepstat) {
+				/* from the getattrlist(2) man page: "Only the
+				   permission bits ... are valid". */
+				st.st_mode = (entry->access_mask & ~S_IFMT) | kind;
+				st.st_mtime = entry->mtime.tv_sec;
+				st.st_size = entry->size;
+				stat = makestat(&st);
+				if (!stat)
+					goto error;
+				elem = Py_BuildValue("siN", filename, kind, stat);
+			} else
+				elem = Py_BuildValue("si", filename, kind);
+			if (!elem)
+				goto error;
+			stat = NULL;
+
+			PyList_Append(list, elem);
+			Py_DECREF(elem);
+
+			entry = (attrbuf_entry *)((char *)entry + entry->length);
+		}
+	} while (err == 0);
+
+	ret = list;
+	Py_INCREF(ret);
+
+error:
+	Py_DECREF(list);
+	Py_XDECREF(stat);
+error_dir:
+	close(dfd);
+error_value:
+	return ret;
+}
+
+#endif /* __APPLE__ */
+
 static PyObject *_listdir(char *path, int pathlen, int keepstat, char *skip)
 {
+#ifdef __APPLE__
+	PyObject *ret;
+	bool fallback = false;
+
+	ret = _listdir_batch(path, pathlen, keepstat, skip, &fallback);
+	if (ret != NULL || !fallback)
+		return ret;
+#endif
 	return _listdir_stat(path, pathlen, keepstat, skip);
 }