Patchwork [4,of,6,RFC] radixlink: add pure C implementation for radixlink read operation

login
register
mail settings
Submitter Jun Wu
Date May 22, 2017, 1:31 a.m.
Message ID <23ea47ebaf6e9f0d711b.1495416671@x1c>
Download mbox | patch
Permalink /patch/20812/
State Changes Requested
Headers show

Comments

Jun Wu - May 22, 2017, 1:31 a.m.
# HG changeset patch
# User Jun Wu <quark@fb.com>
# Date 1495404843 25200
#      Sun May 21 15:14:03 2017 -0700
# Node ID 23ea47ebaf6e9f0d711b90d7677cf4628c8761c6
# Parent  929ea30d953466dc04f0e5d8ca8cd9ed06ca2310
# Available At https://bitbucket.org/quark-zju/hg-draft
#              hg pull https://bitbucket.org/quark-zju/hg-draft -r 23ea47ebaf6e
radixlink: add pure C implementation for radixlink read operation
Augie Fackler - May 24, 2017, 9:21 p.m.
On Sun, May 21, 2017 at 06:31:11PM -0700, Jun Wu wrote:
> # HG changeset patch
> # User Jun Wu <quark@fb.com>
> # Date 1495404843 25200
> #      Sun May 21 15:14:03 2017 -0700
> # Node ID 23ea47ebaf6e9f0d711b90d7677cf4628c8761c6
> # Parent  929ea30d953466dc04f0e5d8ca8cd9ed06ca2310
> # Available At https://bitbucket.org/quark-zju/hg-draft
> #              hg pull https://bitbucket.org/quark-zju/hg-draft -r 23ea47ebaf6e
> radixlink: add pure C implementation for radixlink read operation
>
> diff --git a/mercurial/cext/radixlink.c b/mercurial/cext/radixlink.c
> new file mode 100644
> --- /dev/null
> +++ b/mercurial/cext/radixlink.c
> @@ -0,0 +1,122 @@
> +/* radixlink.c - on-disk radixtree index pointing to linked list data
> + *
> + * Copyright 2017 Facebook, Inc.
> + *
> + * This software may be used and distributed according to the terms of the
> + * GNU General Public License version 2 or any later version. */
> +
> +#include <stdlib.h>
> +#include "radixlink.h"
> +
> +/* safe read utilities (assuming "return -1" is error reporting) */

nit: could these be static inline and let the compiler do the work of
making it efficient? I'd kind of rather that than macros...

> +#define read8(dest, rlbuf, offset) \
> +	if (rlbuf->size < (offset) + 1) return -1; \
> +	else dest = rlbuf->buf[(offset)];
> +#define read16(dest, rlbuf, offset) \
> +	if (rlbuf->size < (offset) + 2) return -1; \
> +	else dest = getbeuint16((const char *)(rlbuf->buf + (offset)));
> +#define read32(dest, rlbuf, offset) \
> +	if (rlbuf->size < (offset) + 4) return -1; \
> +	else dest = getbe32((const char *)(rlbuf->buf + (offset)));
> +
> +#define getb16(dest, key, boffset) { \
> +	uint8_t _v = key[(boffset) / 2]; \
> +	dest = ((boffset) & 1) ? (_v & 0xf) : (_v >> 4); }
> +
> +static const uint8_t RADIX_TYPE = 0;
> +static const uint8_t LEAF_TYPE = 1;

[...]
Jun Wu - May 24, 2017, 9:27 p.m.
Excerpts from Augie Fackler's message of 2017-05-24 17:21:48 -0400:
> [...]
> 
> nit: could these be static inline and let the compiler do the work of
> making it efficient? I'd kind of rather that than macros...

They are mainly to make the code shorter and easier to read. Compare:

  uint8_t x;
  read8(x, data, offset);

with:

  uint8_t x;
  if (tryreadoffset(...) == -1) {
    return -1;
  }

I guess we prefer the more explicit (longer) way. I can replace macros.

> [...]
Augie Fackler - May 24, 2017, 9:30 p.m.
> On May 24, 2017, at 17:27, Jun Wu <quark@fb.com> wrote:
> 
> Excerpts from Augie Fackler's message of 2017-05-24 17:21:48 -0400:
>> [...]
>> 
>> nit: could these be static inline and let the compiler do the work of
>> making it efficient? I'd kind of rather that than macros...
> 
> They are mainly to make the code shorter and easier to read. Compare:
> 
>  uint8_t x;
>  read8(x, data, offset);
> 
> with:
> 
>  uint8_t x;
>  if (tryreadoffset(...) == -1) {
>    return -1;
>  }
> 
> I guess we prefer the more explicit (longer) way. I can replace macros.

In a perfect world shorter would always be better, but yeah, I'd rather avoid the macros I guess.

> 
>> [...]

Patch

diff --git a/mercurial/cext/radixlink.c b/mercurial/cext/radixlink.c
new file mode 100644
--- /dev/null
+++ b/mercurial/cext/radixlink.c
@@ -0,0 +1,122 @@ 
+/* radixlink.c - on-disk radixtree index pointing to linked list data
+ *
+ * Copyright 2017 Facebook, Inc.
+ *
+ * This software may be used and distributed according to the terms of the
+ * GNU General Public License version 2 or any later version. */
+
+#include <stdlib.h>
+#include "radixlink.h"
+
+/* safe read utilities (assuming "return -1" is error reporting) */
+#define read8(dest, rlbuf, offset) \
+	if (rlbuf->size < (offset) + 1) return -1; \
+	else dest = rlbuf->buf[(offset)];
+#define read16(dest, rlbuf, offset) \
+	if (rlbuf->size < (offset) + 2) return -1; \
+	else dest = getbeuint16((const char *)(rlbuf->buf + (offset)));
+#define read32(dest, rlbuf, offset) \
+	if (rlbuf->size < (offset) + 4) return -1; \
+	else dest = getbe32((const char *)(rlbuf->buf + (offset)));
+
+#define getb16(dest, key, boffset) { \
+	uint8_t _v = key[(boffset) / 2]; \
+	dest = ((boffset) & 1) ? (_v & 0xf) : (_v >> 4); }
+
+static const uint8_t RADIX_TYPE = 0;
+static const uint8_t LEAF_TYPE = 1;
+
+static int comparebkey(size_t boff, const uint8_t k[], size_t klen, /* bytes */
+		       const uint8_t b[], size_t blen /* base16 */)
+{
+	if (klen * 2 - boff != blen)
+		return -1;
+	for (size_t i = 0; i < blen; ++i) {
+		uint8_t v;
+		getb16(v, k, boff);
+		boff += 1;
+		if (v != b[i])
+			return -1;
+	}
+	return 0;
+}
+
+static int follow(radixlink_buffer_t *index, const uint8_t key[], size_t keylen,
+		  uint32_t *pioff, size_t *pboff, uint32_t *ppoff)
+{
+	uint8_t type;
+	read8(type, index, *pioff);
+	if (type == LEAF_TYPE) {
+		size_t koff = (*pioff) + 1 + 4;
+		uint16_t klen;
+		int c;
+		read16(klen, index, koff);
+		koff += 2;
+		if (index->size < koff + klen)
+			return -1;
+		c = comparebkey(*pboff, key, keylen, index->buf + koff, klen);
+		if (c == 0) /* matched */
+			*pboff = keylen * 2;
+		else /* not matched */
+			*pioff = 0;
+		return 0;
+	} else if (type == RADIX_TYPE) {
+		uint32_t nextioff;
+		uint8_t v;
+		uint32_t poff;
+		getb16(v, key, *pboff);
+		poff = *pioff + 1 + v * 4;
+		read32(nextioff, index, poff);
+		*pioff = nextioff;
+		*pboff += 1;
+		*ppoff = poff;
+		return 0;
+	}
+	return -1;
+}
+
+static int readlinkoffset(radixlink_buffer_t *index, uint32_t *pioff,
+			  uint32_t *ploff)
+{
+	uint8_t type;
+	uint32_t ioff = *pioff;
+	read8(type, index, ioff);
+	if (type != LEAF_TYPE)
+		return -1;
+	ioff += 1;
+	read32(*ploff, index, ioff);
+	*pioff = ioff;
+	return 0;
+}
+
+int radixlink_index_find(radixlink_buffer_t *index, const uint8_t key[],
+			 size_t keylen, uint32_t *ploffset)
+{
+	uint32_t ioff = 8;
+	size_t boff = 0;
+	uint32_t poff = 0;
+	while (boff < keylen * 2) {
+		if (follow(index, key, keylen, &ioff, &boff, &poff) != 0)
+			return -1;
+		if (ioff == 0) {
+			*ploffset = 0;
+			return 0;
+		}
+	}
+
+	if (readlinkoffset(index, &ioff, ploffset) != 0)
+		return -1;
+
+	return 0;
+}
+
+int radixlink_link_read(radixlink_buffer_t *link, uint32_t *pvalue,
+			uint32_t *ploffset)
+{
+	uint32_t loff = *ploffset;
+	uint32_t nextloff;
+	read32(nextloff, link, loff);
+	read32(*pvalue, link, loff + 4);
+	*ploffset = nextloff;
+	return 0;
+}
diff --git a/mercurial/cext/radixlink.h b/mercurial/cext/radixlink.h
new file mode 100644
--- /dev/null
+++ b/mercurial/cext/radixlink.h
@@ -0,0 +1,16 @@ 
+#ifndef _HG_RADIXLINK_H_
+#define _HG_RADIXLINK_H_
+
+#include "bitmanipulation.h"
+
+typedef struct {
+	uint8_t *buf;
+	size_t size;
+} radixlink_buffer_t;
+
+int radixlink_index_find(radixlink_buffer_t *index, const uint8_t key[],
+			 size_t keylen, uint32_t *ploffset);
+
+int radixlink_link_read(radixlink_buffer_t *link, uint32_t *pvalue,
+			uint32_t *ploffset);
+#endif