Patchwork osutil: use getdirentriesattr for HFS+ (DO NOT COMMIT)

login
register
mail settings
Submitter Sean Farley
Date Sept. 20, 2014, 2:04 p.m.
Message ID <a816acfcb4ae39280905.1411221856@1.0.0.127.in-addr.arpa>
Download mbox | patch
Permalink /patch/5901/
State Deferred
Headers show

Comments

Sean Farley - Sept. 20, 2014, 2:04 p.m.
# HG changeset patch
# User Sean Farley <sean.michael.farley@gmail.com>
# Date 1411220690 25200
#      Sat Sep 20 06:44:50 2014 -0700
# Node ID a816acfcb4ae39280905073a06dee60a2635a793
# Parent  ca854cd4a26a8770fbc22b4d7ee1ac6823b682a5
osutil: use getdirentriesattr for HFS+ (DO NOT COMMIT)

This is a proof of concept patch that uses getdirentriesattr to do a batch call
of fileson OS X in the hopes of speeding up 'hg status'. Unfortunately, the
tests show that this adds no speed at all.

I hope I am doing something really obviously slow. If not, then I hope this
patch serves as a warning to others thinking of using getdirentriesattr.
Matt Mackall - Oct. 2, 2014, 7:09 p.m.
On Sat, 2014-09-20 at 07:04 -0700, Sean Farley wrote:
> # HG changeset patch
> # User Sean Farley <sean.michael.farley@gmail.com>
> # Date 1411220690 25200
> #      Sat Sep 20 06:44:50 2014 -0700
> # Node ID a816acfcb4ae39280905073a06dee60a2635a793
> # Parent  ca854cd4a26a8770fbc22b4d7ee1ac6823b682a5
> osutil: use getdirentriesattr for HFS+ (DO NOT COMMIT)

Doesn't look obviously wrong. I think we'll need to find an actual Mac
filesystem expert (aka a BSD kernel filesystem hacker employed by
Apple).
Augie Fackler - Oct. 2, 2014, 10:02 p.m.
On Thu, Oct 2, 2014 at 3:09 PM, Matt Mackall <mpm@selenic.com> wrote:
> On Sat, 2014-09-20 at 07:04 -0700, Sean Farley wrote:
>> # HG changeset patch
>> # User Sean Farley <sean.michael.farley@gmail.com>
>> # Date 1411220690 25200
>> #      Sat Sep 20 06:44:50 2014 -0700
>> # Node ID a816acfcb4ae39280905073a06dee60a2635a793
>> # Parent  ca854cd4a26a8770fbc22b4d7ee1ac6823b682a5
>> osutil: use getdirentriesattr for HFS+ (DO NOT COMMIT)
>
> Doesn't look obviously wrong. I think we'll need to find an actual Mac
> filesystem expert (aka a BSD kernel filesystem hacker employed by
> Apple).


According to a friend at Apple,
https://lists.apple.com/mailman/listinfo/filesystem-dev may contain
such filesystems experts. I recommend continuing the quest there.

Patch

diff --git a/mercurial/osutil.c b/mercurial/osutil.c
--- a/mercurial/osutil.c
+++ b/mercurial/osutil.c
@@ -17,10 +17,11 @@ 
 #ifdef _WIN32
 #include <windows.h>
 #include <io.h>
 #else
 #include <dirent.h>
+#include <sys/attr.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
 #endif
 
@@ -284,32 +285,65 @@  static PyObject *makestat(const struct s
 	if (stat)
 		memcpy(&((struct listdir_stat *)stat)->st, st, sizeof(*st));
 	return stat;
 }
 
+typedef struct attrbuff {
+	u_int32_t           length;
+	attrreference_t     name;
+	struct timespec     modTime;
+#if __LITTLE_ENDIAN__
+	mode_t              accessMask;
+	short               padding;
+#else
+	short               padding;
+	mode_t              accessMask;
+#endif
+	off_t               fileLogicalSize;
+} __attribute__ ((packed)) attrbuff;
+
 static PyObject *_listdir(char *path, int pathlen, int keepstat, char *skip)
 {
 	PyObject *list, *elem, *stat, *ret = NULL;
 	char fullpath[PATH_MAX + 10];
 	int kind, err;
 	struct stat st;
-	struct dirent *ent;
+
+	unsigned long  index;
+	unsigned short done = 0;
+	/* get 50 files at a time */
+	unsigned int   batch = 50;
+	unsigned int   count;
+	attrbuff *thisEntry;
+	char permAttributes[batch * (sizeof(attrbuff) + 64)];
+	/* the next two variables are junk; we don't use them */
+	unsigned int   junkBaseP;
+	unsigned int   newState;
+
+
+	/* initialize the struct that tells getdirentries which attributes to
+	   retrieve; these must match the attrbuff or else memory will be overwritten
+	   at the wrong place and you'll end up with garbage */
+	struct attrlist attrList = { 0 };
+	memset(&attrList, 0, sizeof(attrList));
+	attrList.bitmapcount = ATTR_BIT_MAP_COUNT;
+	attrList.commonattr  = ATTR_CMN_NAME | ATTR_CMN_MODTIME | ATTR_CMN_ACCESSMASK;
+	attrList.fileattr = ATTR_FILE_TOTALSIZE;
+
 	DIR *dir;
-#ifdef AT_SYMLINK_NOFOLLOW
 	int dfd = -1;
-#endif
 
 	if (pathlen >= PATH_MAX) {
 		errno = ENAMETOOLONG;
 		PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
 		goto error_value;
 	}
 	strncpy(fullpath, path, PATH_MAX);
 	fullpath[pathlen] = '/';
 
+	dfd = open(path, O_RDONLY, 0);
 #ifdef AT_SYMLINK_NOFOLLOW
-	dfd = open(path, O_RDONLY);
 	if (dfd == -1) {
 		PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
 		goto error_value;
 	}
 	dir = fdopendir(dfd);
@@ -323,70 +357,89 @@  static PyObject *_listdir(char *path, in
 
 	list = PyList_New(0);
 	if (!list)
 		goto error_list;
 
-	while ((ent = readdir(dir))) {
-		if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
+	do {
+		count = batch;
+		err = getdirentriesattr(dfd, &attrList, &permAttributes, sizeof(permAttributes), &count, &junkBaseP, &newState, 0);
+		if (err < 0) {
+			printf("error with getdirentriesattr for '%s'. %d/%s\n", path, err, strerror(err));
+			goto error_dir;
+		} else {
+			done = err;
+		}
+		thisEntry = (attrbuff *) permAttributes;
+
+	for (index = 0; index < count; index++) {
+
+		char *nameStart = ((char *) &thisEntry->name) + thisEntry->name.attr_dataoffset;
+		if (!strcmp(nameStart, ".") || !strcmp(nameStart, ".."))
 			continue;
 
-		kind = entkind(ent);
-		if (kind == -1 || keepstat) {
+		if (keepstat) {
 #ifdef AT_SYMLINK_NOFOLLOW
 			err = fstatat(dfd, ent->d_name, &st,
 				      AT_SYMLINK_NOFOLLOW);
 #else
-			strncpy(fullpath + pathlen + 1, ent->d_name,
+			strncpy(fullpath + pathlen + 1, nameStart,
 				PATH_MAX - pathlen);
 			fullpath[PATH_MAX] = 0;
-			err = lstat(fullpath, &st);
+			/* Copy the getdirentries info into st so that the code churn is low */
+			st.st_mode = thisEntry->accessMask;
+			st.st_mtime = thisEntry->modTime.tv_sec;
+			st.st_size = thisEntry->fileLogicalSize;
 #endif
 			if (err == -1) {
 				/* race with file deletion? */
 				if (errno == ENOENT)
 					continue;
-				strncpy(fullpath + pathlen + 1, ent->d_name,
+				strncpy(fullpath + pathlen + 1, nameStart,
 					PATH_MAX - pathlen);
 				fullpath[PATH_MAX] = 0;
 				PyErr_SetFromErrnoWithFilename(PyExc_OSError,
 							       fullpath);
 				goto error;
 			}
 			kind = st.st_mode & S_IFMT;
 		}
 
 		/* quit early? */
-		if (skip && kind == S_IFDIR && !strcmp(ent->d_name, skip)) {
+		if (skip && kind == S_IFDIR && !strcmp(nameStart, skip)) {
 			ret = PyList_New(0);
 			goto error;
 		}
 
 		if (keepstat) {
 			stat = makestat(&st);
 			if (!stat)
 				goto error;
-			elem = Py_BuildValue("siN", ent->d_name, kind, stat);
+			elem = Py_BuildValue("siN", nameStart, kind, stat);
 		} else
-			elem = Py_BuildValue("si", ent->d_name, kind);
+			elem = Py_BuildValue("si", nameStart, kind);
 		if (!elem)
 			goto error;
 
 		PyList_Append(list, elem);
 		Py_DECREF(elem);
+
+		thisEntry = (attrbuff*)((char*)thisEntry + thisEntry->length);
+
 	}
+	} while ( err == 0 && ! done );
 
 	ret = list;
 	Py_INCREF(ret);
 
 error:
 	Py_DECREF(list);
 error_list:
 	closedir(dir);
 error_dir:
-#ifdef AT_SYMLINK_NOFOLLOW
+
 	close(dfd);
-#endif
+
 error_value:
 	return ret;
 }
 
 static PyObject *statfiles(PyObject *self, PyObject *args)