[PATCH] udevd: de-duplicate strings in rules

linux-hotplug.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] udevd: de-duplicate strings in rules
@ 2008-11-11 20:20 Alan Jenkins
  2008-11-11 21:23 ` Kay Sievers
                   ` (9 more replies)
  0 siblings, 10 replies; 11+ messages in thread
From: Alan Jenkins @ 2008-11-11 20:20 UTC (permalink / raw)
  To: linux-hotplug

On my Ubuntu installation this removes 15k of duplicate strings,
using a temporary index of about 25k.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>

diff --git a/udev/udev-rules.c b/udev/udev-rules.c
index 810a863..df5ed53 100644
--- a/udev/udev-rules.c
+++ b/udev/udev-rules.c
@@ -16,6 +16,7 @@
  */
 
 #include <stddef.h>
+#include <limits.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
@@ -30,6 +31,7 @@
 
 #define PREALLOC_TOKEN			2048
 #define PREALLOC_STRBUF			32 * 1024
+#define PREALLOC_TRIE			256
 
 struct uid_gid {
 	unsigned int name_off;
@@ -39,7 +41,16 @@ struct uid_gid {
 	};
 };
 
-/* KEY="", KEY!="", KEY+="", KEY="", KEY:="" */
+#define TRIE_CHILD_MAX 10
+
+struct trie_node {
+	unsigned int value_off;
+	size_t value_len;
+	unsigned char child_cur;
+	unsigned char child_key[TRIE_CHILD_MAX];
+	unsigned short child[TRIE_CHILD_MAX];
+};
+
 struct udev_rules {
 	struct udev *udev;
 	int resolve_names;
@@ -55,6 +66,12 @@ struct udev_rules {
 	size_t buf_max;
 	unsigned int buf_count;
 
+	/* key strings are indexed to avoid wasting space with duplicates */
+	struct trie_node *trie;
+	unsigned short trie_cur;
+	unsigned short trie_max;
+	unsigned short *trie_root;
+
 	/* during rule parsing, we cache uid/gid lookup results */
 	struct uid_gid *uids;
 	unsigned int uids_cur;
@@ -64,6 +81,7 @@ struct udev_rules {
 	unsigned int gids_max;
 };
 
+/* KEY="", KEY!="", KEY+="", KEY="", KEY:="" */
 enum operation_type {
 	OP_UNSET,
 
@@ -392,25 +410,19 @@ static inline void dump_token(struct udev_rules *rules, struct token *token) {}
 static inline void dump_rules(struct udev_rules *rules) {}
 #endif /* DEBUG */
 
-/* we could lookup and return existing strings, or tails of strings */
-static int add_string(struct udev_rules *rules, const char *str)
+static int add_new_string(struct udev_rules *rules, const char *str, size_t bytes)
 {
-	size_t len = strlen(str)+1;
 	int off;
 
-	/* offset 0 is always '\0' */
-	if (str[0] = '\0')
-		return 0;
-
 	/* grow buffer if needed */
-	if (rules->buf_cur + len+1 >= rules->buf_max) {
+	if (rules->buf_cur + bytes+1 >= rules->buf_max) {
 		char *buf;
 		unsigned int add;
 
 		/* double the buffer size */
 		add = rules->buf_max;
-		if (add < len * 8)
-			add = len * 8;
+		if (add < bytes * 8)
+			add = bytes * 8;
 
 		buf = realloc(rules->buf, rules->buf_max + add);
 		if (buf = NULL)
@@ -420,15 +432,128 @@ static int add_string(struct udev_rules *rules, const char *str)
 		rules->buf_max += add;
 	}
 	off = rules->buf_cur;
-	memcpy(&rules->buf[rules->buf_cur], str, len);
-	rules->buf_cur += len;
+	memcpy(&rules->buf[rules->buf_cur], str, bytes);
+	rules->buf_cur += bytes;
 	rules->buf_count++;
 	return off;
 }
 
-static int add_token(struct udev_rules *rules, struct token *token)
+static unsigned char trie_child_slot(struct trie_node *node, char key)
 {
+	unsigned char child_slot;
+
+	for (child_slot = 0; child_slot < node->child_cur; child_slot++) {
+		if (node->child_key[child_slot] = key)
+			break;
+	}
+
+	return child_slot;
+}
+
+static int add_string(struct udev_rules *rules, const char *str)
+{
+	struct trie_node *child;
+	unsigned short child_off;
+	unsigned short node_off;
+	unsigned char key;
+	size_t len;
+	int depth;
+	unsigned int off;
+
+	len = strlen(str);
+
+	/* offset 0 is always '\0' */
+	if (len = 0)
+		return 0;
+
+	/* strings with spaces are probably commands e.g. modprobe,
+	   with unique arguments. */
+	if (strchr(str, ' ') != NULL)
+		return add_new_string(rules, str, len + 1);
+
+	/* descend root - start from last character of str */
+	key = str[len - 1];
+	node_off = rules->trie_root[key];
+	depth = 0;
+
+	/* descend suffix trie */
+	if (node_off != 0) {
+		while (1) {
+			struct trie_node *node = &rules->trie[node_off];
+			unsigned char child_slot;
+
+			depth++;
+			off = node->value_off + node->value_len - len;
+
+			/* match against current node */
+			if (depth = len ||
+			    (node->value_len >= len &&
+			    memcmp(&rules->buf[off], str, len) = 0))
+			{
+				return off;
+			}
 
+			/* lookup child node */
+			key = str[len - 1 - depth];
+			child_slot = trie_child_slot(node, key);
+
+			if(child_slot = node->child_cur)
+				break;
+
+			node_off = node->child[child_slot];
+		}
+	}
+
+	/* string not found, add it */
+	off = add_new_string(rules, str, len + 1);
+
+	/* grow trie storage if needed */
+	if (rules->trie_cur >= rules->trie_max) {
+		struct trie_node *trie;
+		unsigned short add;
+
+		/* double the buffer size */
+		add = rules->trie_max;
+		if (add < 8)
+			add = 8;
+
+		trie = realloc(rules->trie, (rules->trie_max + add) * sizeof(struct trie_node));
+		if (trie = NULL)
+			return -1;
+		dbg(rules->udev, "extend string index nodes from %u to %u\n", rules->trie_max, rules->trie_max + add);
+		rules->trie = trie;
+		rules->trie_max += add;
+	}
+
+	/* insert new child node */
+	child_off = rules->trie_cur;
+	if (depth = 0) {
+		rules->trie_root[key] = child_off;
+	} else {
+		struct trie_node *parent = &rules->trie[node_off];
+		unsigned char child_slot = parent->child_cur;
+
+		/* no space in parent, we can't index this string, nevermind */
+		if (child_slot = TRIE_CHILD_MAX)
+			return off;
+
+		parent->child[child_slot] = child_off;
+		parent->child_key[child_slot] = key;
+		parent->child_cur = child_slot + 1;
+	}
+
+	/* allocate and construct the child node */
+	rules->trie_cur++;
+	child = &rules->trie[child_off];
+	memset(child, 0x00, sizeof(struct trie_node));
+	child->value_off = off;
+	child->value_len = len;
+
+	return off;
+}
+
+static int add_token(struct udev_rules *rules, struct token *token)
+{
 	/* grow buffer if needed */
 	if (rules->token_cur+1 >= rules->token_max) {
 		struct token *tokens;
@@ -1607,9 +1732,16 @@ struct udev_rules *udev_rules_new(struct udev *udev, int resolve_names)
 	if (rules->buf = NULL)
 		return NULL;
 	rules->buf_max = PREALLOC_STRBUF;
+	rules->trie = malloc(PREALLOC_TRIE * sizeof(struct trie_node));
+	if (rules->trie = NULL)
+		return NULL;
+	rules->trie_max = PREALLOC_TRIE;
+	rules->trie_root = calloc(UCHAR_MAX + 1, sizeof(unsigned short));
 	/* offset 0 is always '\0' */
 	rules->buf[0] = '\0';
 	rules->buf_cur = 1;
+	/* offset 0 is reserved for the null trie node */
+	rules->trie_cur = 1;
 	dbg(udev, "prealloc %zu bytes tokens (%u * %zu bytes), %zu bytes buffer\n",
 	    rules->token_max * sizeof(struct token), rules->token_max, sizeof(struct token), rules->buf_max);
 
@@ -1725,6 +1857,16 @@ struct udev_rules *udev_rules_new(struct udev *udev, int resolve_names)
 	}
 	info(udev, "shrunk to %zu bytes tokens (%u * %zu bytes), %zu bytes buffer\n",
 	     rules->token_max * sizeof(struct token), rules->token_max, sizeof(struct token), rules->buf_max);
+	info(udev, "used %zu bytes of string index nodes (%hu * %zu bytes)\n",
+	     rules->trie_cur * sizeof(struct trie_node), rules->trie_cur, sizeof(struct trie_node));
+
+	/* cleanup trie */
+	free(rules->trie);
+	rules->trie = NULL;
+	rules->trie_cur = 0;
+	rules->trie_max = 0;
+	free(rules->trie_root);
+	rules->trie_root = NULL;
 
 	/* cleanup uid/gid cache */
 	free(rules->uids);
@@ -1746,6 +1888,8 @@ void udev_rules_unref(struct udev_rules *rules)
 		return;
 	free(rules->tokens);
 	free(rules->buf);
+	free(rules->trie);
+	free(rules->trie_root);
 	free(rules->uids);
 	free(rules->gids);
 	free(rules);



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] udevd: de-duplicate strings in rules
  2008-11-11 20:20 [PATCH] udevd: de-duplicate strings in rules Alan Jenkins
@ 2008-11-11 21:23 ` Kay Sievers
  2008-11-12  5:11 ` Kay Sievers
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Kay Sievers @ 2008-11-11 21:23 UTC (permalink / raw)
  To: linux-hotplug

On Tue, Nov 11, 2008 at 21:20, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
> On my Ubuntu installation this removes 15k of duplicate strings,
> using a temporary index of about 25k.

Great. That looks nice.

Thats's the diff of the rule dump before and after the patch:
  ...
  -[] udev_rules_new: shrunk to 64896 bytes tokens (5408 * 12 bytes),
57298 bytes buffer
  -[] dump_rules: dumping 5408 (64896 bytes) tokens, 5818 (57298 bytes) strings
  +[] udev_rules_new: shrunk to 64896 bytes tokens (5408 * 12 bytes),
18204 bytes buffer
  +[] udev_rules_new: used 40512 bytes of string index nodes (844 * 48 bytes)
  +[] dump_rules: dumping 5408 (64896 bytes) tokens, 1369 (18204 bytes) strings
  ...

Applied.

Kay

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] udevd: de-duplicate strings in rules
  2008-11-11 20:20 [PATCH] udevd: de-duplicate strings in rules Alan Jenkins
  2008-11-11 21:23 ` Kay Sievers
@ 2008-11-12  5:11 ` Kay Sievers
  2008-11-12 16:50 ` Alan Jenkins
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Kay Sievers @ 2008-11-12  5:11 UTC (permalink / raw)
  To: linux-hotplug

On Tue, Nov 11, 2008 at 22:23, Kay Sievers <kay.sievers@vrfy.org> wrote:
> On Tue, Nov 11, 2008 at 21:20, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>> On my Ubuntu installation this removes 15k of duplicate strings,
>> using a temporary index of about 25k.
>
> Great. That looks nice.
>
> Thats's the diff of the rule dump before and after the patch:
>  ...
>  -[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 57298 bytes buffer
>  -[] dumping 5408 (64896 bytes) tokens, 5818 (57298 bytes) strings
>  +[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 18204 bytes buffer
>  +[] used 40512 bytes of string index nodes (844 * 48 bytes)
>  +[] dumping 5408 (64896 bytes) tokens, 1369 (18204 bytes) strings

I split the nodes and the childs in two independent arrays, so we got
rid of the limit of 10 childs per node. I've got ~200 fully uses slots
with the huge rules set here. Unlimited childs in the index removes
another 3 kB of duplicates, and the temporary index seems also a bit
smaller:
  shrunk to 64896 bytes tokens (5408 * 12 bytes), 15324 bytes buffer
  used 29456 bytes for index (1076 * 16 bytes nodes, 1020 * 12 bytes
child links)

Would be great, if you can check if it still works for you as expected. :)

Thanks,
Kay

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] udevd: de-duplicate strings in rules
  2008-11-11 20:20 [PATCH] udevd: de-duplicate strings in rules Alan Jenkins
  2008-11-11 21:23 ` Kay Sievers
  2008-11-12  5:11 ` Kay Sievers
@ 2008-11-12 16:50 ` Alan Jenkins
  2008-11-12 18:00 ` Kay Sievers
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Alan Jenkins @ 2008-11-12 16:50 UTC (permalink / raw)
  To: linux-hotplug

Kay Sievers wrote:
> On Tue, Nov 11, 2008 at 22:23, Kay Sievers <kay.sievers@vrfy.org> wrote:
>   
>> On Tue, Nov 11, 2008 at 21:20, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>>     
>>> On my Ubuntu installation this removes 15k of duplicate strings,
>>> using a temporary index of about 25k.
>>>       
>> Great. That looks nice.
>>
>> Thats's the diff of the rule dump before and after the patch:
>>  ...
>>  -[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 57298 bytes buffer
>>  -[] dumping 5408 (64896 bytes) tokens, 5818 (57298 bytes) strings
>>  +[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 18204 bytes buffer
>>  +[] used 40512 bytes of string index nodes (844 * 48 bytes)
>>  +[] dumping 5408 (64896 bytes) tokens, 1369 (18204 bytes) strings
>>     
>
> I split the nodes and the childs in two independent arrays, so we got
> rid of the limit of 10 childs per node. I've got ~200 fully uses slots
> with the huge rules set here. Unlimited childs in the index removes
> another 3 kB of duplicates, and the temporary index seems also a bit
> smaller:
>   shrunk to 64896 bytes tokens (5408 * 12 bytes), 15324 bytes buffer
>   used 29456 bytes for index (1076 * 16 bytes nodes, 1020 * 12 bytes
> child links)
>
> Would be great, if you can check if it still works for you as expected. :)
>   

Did you have a particular reason to keep the trie_root array?  Now
there's no fixed limit on children, you could just use trie[1] as the
root node.  Remove the special case for depth = 0.  And initialize it's
value and length to 0, then you can remove the special case for len = 0.

Alan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] udevd: de-duplicate strings in rules
  2008-11-11 20:20 [PATCH] udevd: de-duplicate strings in rules Alan Jenkins
                   ` (2 preceding siblings ...)
  2008-11-12 16:50 ` Alan Jenkins
@ 2008-11-12 18:00 ` Kay Sievers
  2008-11-12 18:05 ` Alan Jenkins
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Kay Sievers @ 2008-11-12 18:00 UTC (permalink / raw)
  To: linux-hotplug

On Wed, Nov 12, 2008 at 17:50, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
> Kay Sievers wrote:
>> On Tue, Nov 11, 2008 at 22:23, Kay Sievers <kay.sievers@vrfy.org> wrote:
>>
>>> On Tue, Nov 11, 2008 at 21:20, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>>>
>>>> On my Ubuntu installation this removes 15k of duplicate strings,
>>>> using a temporary index of about 25k.
>>>>
>>> Great. That looks nice.
>>>
>>> Thats's the diff of the rule dump before and after the patch:
>>>  ...
>>>  -[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 57298 bytes buffer
>>>  -[] dumping 5408 (64896 bytes) tokens, 5818 (57298 bytes) strings
>>>  +[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 18204 bytes buffer
>>>  +[] used 40512 bytes of string index nodes (844 * 48 bytes)
>>>  +[] dumping 5408 (64896 bytes) tokens, 1369 (18204 bytes) strings
>>>
>>
>> I split the nodes and the childs in two independent arrays, so we got
>> rid of the limit of 10 childs per node. I've got ~200 fully uses slots
>> with the huge rules set here. Unlimited childs in the index removes
>> another 3 kB of duplicates, and the temporary index seems also a bit
>> smaller:
>>   shrunk to 64896 bytes tokens (5408 * 12 bytes), 15324 bytes buffer
>>   used 29456 bytes for index (1076 * 16 bytes nodes, 1020 * 12 bytes
>> child links)
>>
>> Would be great, if you can check if it still works for you as expected. :)
>
> Did you have a particular reason to keep the trie_root array?  Now
> there's no fixed limit on children, you could just use trie[1] as the
> root node.  Remove the special case for depth = 0.  And initialize it's
> value and length to 0, then you can remove the special case for len = 0.

No special reason, I thought about that too, but it was already 5am,
and I was unable to think it through. :)

Sounds nice to do that, did you try already, have a patch?

Thanks,
Kay

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] udevd: de-duplicate strings in rules
  2008-11-11 20:20 [PATCH] udevd: de-duplicate strings in rules Alan Jenkins
                   ` (3 preceding siblings ...)
  2008-11-12 18:00 ` Kay Sievers
@ 2008-11-12 18:05 ` Alan Jenkins
  2008-11-12 18:20 ` Kay Sievers
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Alan Jenkins @ 2008-11-12 18:05 UTC (permalink / raw)
  To: linux-hotplug

Kay Sievers wrote:
> On Wed, Nov 12, 2008 at 17:50, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>   
>> Kay Sievers wrote:
>>     
>>> On Tue, Nov 11, 2008 at 22:23, Kay Sievers <kay.sievers@vrfy.org> wrote:
>>>
>>>       
>>>> On Tue, Nov 11, 2008 at 21:20, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>>>>
>>>>         
>>>>> On my Ubuntu installation this removes 15k of duplicate strings,
>>>>> using a temporary index of about 25k.
>>>>>
>>>>>           
>>>> Great. That looks nice.
>>>>
>>>> Thats's the diff of the rule dump before and after the patch:
>>>>  ...
>>>>  -[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 57298 bytes buffer
>>>>  -[] dumping 5408 (64896 bytes) tokens, 5818 (57298 bytes) strings
>>>>  +[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 18204 bytes buffer
>>>>  +[] used 40512 bytes of string index nodes (844 * 48 bytes)
>>>>  +[] dumping 5408 (64896 bytes) tokens, 1369 (18204 bytes) strings
>>>>
>>>>         
>>> I split the nodes and the childs in two independent arrays, so we got
>>> rid of the limit of 10 childs per node. I've got ~200 fully uses slots
>>> with the huge rules set here. Unlimited childs in the index removes
>>> another 3 kB of duplicates, and the temporary index seems also a bit
>>> smaller:
>>>   shrunk to 64896 bytes tokens (5408 * 12 bytes), 15324 bytes buffer
>>>   used 29456 bytes for index (1076 * 16 bytes nodes, 1020 * 12 bytes
>>> child links)
>>>
>>> Would be great, if you can check if it still works for you as expected. :)
>>>       
>> Did you have a particular reason to keep the trie_root array?  Now
>> there's no fixed limit on children, you could just use trie[1] as the
>> root node.  Remove the special case for depth = 0.  And initialize it's
>> value and length to 0, then you can remove the special case for len = 0.
>>     
>
> No special reason, I thought about that too, but it was already 5am,
> and I was unable to think it through. :)
>
> Sounds nice to do that, did you try already, have a patch?
>   
No, sorry :).


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] udevd: de-duplicate strings in rules
  2008-11-11 20:20 [PATCH] udevd: de-duplicate strings in rules Alan Jenkins
                   ` (4 preceding siblings ...)
  2008-11-12 18:05 ` Alan Jenkins
@ 2008-11-12 18:20 ` Kay Sievers
  2008-11-12 20:48 ` Alan Jenkins
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Kay Sievers @ 2008-11-12 18:20 UTC (permalink / raw)
  To: linux-hotplug

On Wed, Nov 12, 2008 at 19:05, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
> Kay Sievers wrote:
>> On Wed, Nov 12, 2008 at 17:50, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>>> Kay Sievers wrote:
>>>> On Tue, Nov 11, 2008 at 22:23, Kay Sievers <kay.sievers@vrfy.org> wrote:
>>>>> On Tue, Nov 11, 2008 at 21:20, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>>>>>
>>>>>> On my Ubuntu installation this removes 15k of duplicate strings,
>>>>>> using a temporary index of about 25k.
>>>>>>
>>>>> Great. That looks nice.
>>>>>
>>>>> Thats's the diff of the rule dump before and after the patch:
>>>>>  ...
>>>>>  -[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 57298 bytes buffer
>>>>>  -[] dumping 5408 (64896 bytes) tokens, 5818 (57298 bytes) strings
>>>>>  +[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 18204 bytes buffer
>>>>>  +[] used 40512 bytes of string index nodes (844 * 48 bytes)
>>>>>  +[] dumping 5408 (64896 bytes) tokens, 1369 (18204 bytes) strings
>>>>>
>>>> I split the nodes and the childs in two independent arrays, so we got
>>>> rid of the limit of 10 childs per node. I've got ~200 fully uses slots
>>>> with the huge rules set here. Unlimited childs in the index removes
>>>> another 3 kB of duplicates, and the temporary index seems also a bit
>>>> smaller:
>>>>   shrunk to 64896 bytes tokens (5408 * 12 bytes), 15324 bytes buffer
>>>>   used 29456 bytes for index (1076 * 16 bytes nodes, 1020 * 12 bytes
>>>> child links)
>>>>
>>>> Would be great, if you can check if it still works for you as expected. :)
>>>>
>>> Did you have a particular reason to keep the trie_root array?  Now
>>> there's no fixed limit on children, you could just use trie[1] as the
>>> root node.  Remove the special case for depth = 0.  And initialize it's
>>> value and length to 0, then you can remove the special case for len = 0.
>>>
>>
>> No special reason, I thought about that too, but it was already 5am,
>> and I was unable to think it through. :)
>>
>> Sounds nice to do that, did you try already, have a patch?
>>
> No, sorry :).

Ah, now by looking at it, maybe the then needed linear search for the
key in the root is not as good as the plain root array index?

Kay

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] udevd: de-duplicate strings in rules
  2008-11-11 20:20 [PATCH] udevd: de-duplicate strings in rules Alan Jenkins
                   ` (5 preceding siblings ...)
  2008-11-12 18:20 ` Kay Sievers
@ 2008-11-12 20:48 ` Alan Jenkins
  2008-11-12 21:37 ` Kay Sievers
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Alan Jenkins @ 2008-11-12 20:48 UTC (permalink / raw)
  To: linux-hotplug

Kay Sievers wrote:
> On Wed, Nov 12, 2008 at 19:05, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>   
>> Kay Sievers wrote:
>>     
>>> On Wed, Nov 12, 2008 at 17:50, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>>>       
>>>> Kay Sievers wrote:
>>>>         
>>>>> On Tue, Nov 11, 2008 at 22:23, Kay Sievers <kay.sievers@vrfy.org> wrote:
>>>>>           
>>>>>> On Tue, Nov 11, 2008 at 21:20, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>>>>>>
>>>>>>             
>>>>>>> On my Ubuntu installation this removes 15k of duplicate strings,
>>>>>>> using a temporary index of about 25k.
>>>>>>>
>>>>>>>               
>>>>>> Great. That looks nice.
>>>>>>
>>>>>> Thats's the diff of the rule dump before and after the patch:
>>>>>>  ...
>>>>>>  -[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 57298 bytes buffer
>>>>>>  -[] dumping 5408 (64896 bytes) tokens, 5818 (57298 bytes) strings
>>>>>>  +[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 18204 bytes buffer
>>>>>>  +[] used 40512 bytes of string index nodes (844 * 48 bytes)
>>>>>>  +[] dumping 5408 (64896 bytes) tokens, 1369 (18204 bytes) strings
>>>>>>
>>>>>>             
>>>>> I split the nodes and the childs in two independent arrays, so we got
>>>>> rid of the limit of 10 childs per node. I've got ~200 fully uses slots
>>>>> with the huge rules set here. Unlimited childs in the index removes
>>>>> another 3 kB of duplicates, and the temporary index seems also a bit
>>>>> smaller:
>>>>>   shrunk to 64896 bytes tokens (5408 * 12 bytes), 15324 bytes buffer
>>>>>   used 29456 bytes for index (1076 * 16 bytes nodes, 1020 * 12 bytes
>>>>> child links)
>>>>>
>>>>> Would be great, if you can check if it still works for you as expected. :)
>>>>>
>>>>>           
>>>> Did you have a particular reason to keep the trie_root array?  Now
>>>> there's no fixed limit on children, you could just use trie[1] as the
>>>> root node.  Remove the special case for depth = 0.  And initialize it's
>>>> value and length to 0, then you can remove the special case for len = 0.
>>>>
>>>>         
>>> No special reason, I thought about that too, but it was already 5am,
>>> and I was unable to think it through. :)
>>>
>>> Sounds nice to do that, did you try already, have a patch?
>>>
>>>       
>> No, sorry :).
>>     
>
> Ah, now by looking at it, maybe the then needed linear search for the
> key in the root is not as good as the plain root array index?
>   
Mmm.  Ok, without the root array add_string() takes twice as long, which 
increases the total rules-loading time by 10%.  (user time measured by 
cachegrind).  Let's leave it.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] udevd: de-duplicate strings in rules
  2008-11-11 20:20 [PATCH] udevd: de-duplicate strings in rules Alan Jenkins
                   ` (6 preceding siblings ...)
  2008-11-12 20:48 ` Alan Jenkins
@ 2008-11-12 21:37 ` Kay Sievers
  2008-11-13 10:12 ` Alan Jenkins
  2008-11-13 19:03 ` Kay Sievers
  9 siblings, 0 replies; 11+ messages in thread
From: Kay Sievers @ 2008-11-12 21:37 UTC (permalink / raw)
  To: linux-hotplug

On Wed, 2008-11-12 at 20:48 +0000, Alan Jenkins wrote:
> Kay Sievers wrote:
> > On Wed, Nov 12, 2008 at 19:05, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:   
> >> Kay Sievers wrote:
> >>> On Wed, Nov 12, 2008 at 17:50, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
> >>>> Kay Sievers wrote:
> >>>>> On Tue, Nov 11, 2008 at 22:23, Kay Sievers <kay.sievers@vrfy.org> wrote:
> >>>>>> On Tue, Nov 11, 2008 at 21:20, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
> >>>>>>
> >>>>>>> On my Ubuntu installation this removes 15k of duplicate strings,
> >>>>>>> using a temporary index of about 25k.
> >>>>>>>               
> >>>>>> Great. That looks nice.
> >>>>>>
> >>>>>> Thats's the diff of the rule dump before and after the patch:
> >>>>>>  ...
> >>>>>>  -[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 57298 bytes buffer
> >>>>>>  -[] dumping 5408 (64896 bytes) tokens, 5818 (57298 bytes) strings
> >>>>>>  +[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 18204 bytes buffer
> >>>>>>  +[] used 40512 bytes of string index nodes (844 * 48 bytes)
> >>>>>>  +[] dumping 5408 (64896 bytes) tokens, 1369 (18204 bytes) strings
> >>>>>>
> >>>>>>             
> >>>>> I split the nodes and the childs in two independent arrays, so we got
> >>>>> rid of the limit of 10 childs per node. I've got ~200 fully uses slots
> >>>>> with the huge rules set here. Unlimited childs in the index removes
> >>>>> another 3 kB of duplicates, and the temporary index seems also a bit
> >>>>> smaller:
> >>>>>   shrunk to 64896 bytes tokens (5408 * 12 bytes), 15324 bytes buffer
> >>>>>   used 29456 bytes for index (1076 * 16 bytes nodes, 1020 * 12 bytes
> >>>>> child links)
> >>>>>
> >>>>> Would be great, if you can check if it still works for you as expected. :)
> >>>>>
> >>>>>           
> >>>> Did you have a particular reason to keep the trie_root array?  Now
> >>>> there's no fixed limit on children, you could just use trie[1] as the
> >>>> root node.  Remove the special case for depth = 0.  And initialize it's
> >>>> value and length to 0, then you can remove the special case for len = 0.
> >>>>
> >>>>         
> >>> No special reason, I thought about that too, but it was already 5am,
> >>> and I was unable to think it through. :)
> >>>
> >>> Sounds nice to do that, did you try already, have a patch?
> >>>
> >>>       
> >> No, sorry :).
> >>     
> >
> > Ah, now by looking at it, maybe the then needed linear search for the
> > key in the root is not as good as the plain root array index?
> >   
> Mmm.  Ok, without the root array add_string() takes twice as long, which 
> increases the total rules-loading time by 10%.  (user time measured by 
> cachegrind).  Let's leave it.

Hmm, now I liked the idea. :)

How about this? It has only a single array again, and no root, and no
child limits. Seems to work fine, but, it looks somehow too simple
now. :)

  rules use 64896 bytes tokens (5408 * 12 bytes), 15324 bytes buffer
  temporary index used 21520 bytes (1076 * 20 bytes)


diff --git a/udev/udev-rules.c b/udev/udev-rules.c
index 1f28e4f..80c61b5 100644
--- a/udev/udev-rules.c
+++ b/udev/udev-rules.c
@@ -41,17 +41,16 @@ struct uid_gid {
 	};
 };
 
-struct trie_child {
-	unsigned int next_idx;
-	unsigned int node_idx;
-	unsigned char key;
-};
-
 struct trie_node {
+	/* this nodes child list */
 	unsigned int child_idx;
+	/* the next child of our parent node's child list */
+	unsigned int next_child_idx;
+	/* this nodes last child (shortcut) */
 	unsigned int last_child_idx;
 	unsigned int value_off;
 	unsigned short value_len;
+	unsigned char key;
 };
 
 struct udev_rules {
@@ -70,13 +69,9 @@ struct udev_rules {
 	unsigned int buf_count;
 
 	/* during rule parsing, strings are indexed to find duplicates */
-	unsigned int *trie_root;
 	struct trie_node *trie_nodes;
 	unsigned int trie_nodes_cur;
 	unsigned int trie_nodes_max;
-	struct trie_child *trie_childs;
-	unsigned int trie_childs_cur;
-	unsigned int trie_childs_max;
 
 	/* during rule parsing, uid/gid lookup results are cached */
 	struct uid_gid *uids;
@@ -453,6 +448,7 @@ static int add_string(struct udev_rules *rules, const char *str)
 	unsigned short len;
 	unsigned int depth;
 	unsigned int off;
+	struct trie_node *parent;
 
 	len = strlen(str);
 
@@ -460,37 +456,31 @@ static int add_string(struct udev_rules *rules, const char *str)
 	if (len = 0)
 		return 0;
 
-	/* descend root - start from last character of str */
+	/* walk trie, start from last character of str to find matching tails */
+	node_idx = 0;
 	key = str[len - 1];
-	node_idx = rules->trie_root[key];
-	depth = 0;
-
-	/* descend suffix trie */
-	if (node_idx > 0) {
-		while (1) {
-			struct trie_node *node;
-			unsigned int child_idx;
-
-			node = &rules->trie_nodes[node_idx];
-			depth++;
-			off = node->value_off + node->value_len - len;
-
-			/* match against current node */
-			if (depth = len || (node->value_len >= len && memcmp(&rules->buf[off], str, len) = 0))
-				return off;
-
-			/* lookup child node */
-			key = str[len - 1 - depth];
-			child_idx = node->child_idx;
-			while (child_idx > 0) {
-				if (rules->trie_childs[child_idx].key = key)
-					break;
-				child_idx = rules->trie_childs[child_idx].next_idx;
-			}
-			if (child_idx = 0)
+	for (depth = 0; depth <= len; depth++) {
+		struct trie_node *node;
+		unsigned int child_idx;
+
+		node = &rules->trie_nodes[node_idx];
+		off = node->value_off + node->value_len - len;
+
+		/* match against current node */
+		if (depth = len || (node->value_len >= len && memcmp(&rules->buf[off], str, len) = 0))
+			return off;
+
+		/* lookup child node */
+		key = str[len - 1 - depth];
+		child_idx = node->child_idx;
+		while (child_idx > 0) {
+			if (rules->trie_nodes[child_idx].key = key)
 				break;
-			node_idx = rules->trie_childs[child_idx].node_idx;
+			child_idx = rules->trie_nodes[child_idx].next_child_idx;
 		}
+		if (child_idx = 0)
+			break;
+		node_idx = child_idx;
 	}
 
 	/* string not found, add it */
@@ -515,25 +505,6 @@ static int add_string(struct udev_rules *rules, const char *str)
 		rules->trie_nodes_max += add;
 	}
 
-	/* grow trie childs if needed */
-	if (rules->trie_childs_cur >= rules->trie_childs_max) {
-		struct trie_child *childs;
-		unsigned int add;
-
-		/* double the buffer size */
-		add = rules->trie_childs_max;
-		if (add < 8)
-			add = 8;
-
-		childs = realloc(rules->trie_childs, (rules->trie_childs_max + add) * sizeof(struct trie_child));
-		if (childs = NULL)
-			return -1;
-		dbg(rules->udev, "extend trie childs from %u to %u\n",
-		    rules->trie_childs_max, rules->trie_childs_max + add);
-		rules->trie_childs = childs;
-		rules->trie_childs_max += add;
-	}
-
 	/* get new node */
 	new_node_idx = rules->trie_nodes_cur;
 	rules->trie_nodes_cur++;
@@ -542,36 +513,20 @@ static int add_string(struct udev_rules *rules, const char *str)
 	new_node->value_len = len;
 	new_node->child_idx = 0;
 	new_node->last_child_idx = 0;
+	new_node->next_child_idx = 0;
+	new_node->key = key;
 
-	if (depth = 0) {
-		/* add node to root */
-		rules->trie_root[key] = new_node_idx;
+	/* append child link to list of childs of parent */
+	parent = &rules->trie_nodes[node_idx];
+	if (parent->child_idx = 0) {
+		parent->child_idx = new_node_idx;
 	} else {
-		/* add node to parent */
-		struct trie_node *parent;
-		struct trie_child *new_child;
-		unsigned int new_child_idx;
-
-		/* get new child link for list of childs of parent */
-		new_child_idx = rules->trie_childs_cur;
-		rules->trie_childs_cur++;
-		new_child = &rules->trie_childs[new_child_idx];
-		new_child->next_idx = 0;
-		new_child->node_idx = new_node_idx;
-		new_child->key = key;
-
-		/* append child link to list of childs of parent */
-		parent = &rules->trie_nodes[node_idx];
-		if (parent->child_idx = 0) {
-			parent->child_idx = new_child_idx;
-		} else {
-			struct trie_child *last_child;
+		struct trie_node *last_child;
 
-			last_child = &rules->trie_childs[parent->last_child_idx];
-			last_child->next_idx = new_child_idx;
-		}
-		parent->last_child_idx = new_child_idx;
+		last_child = &rules->trie_nodes[parent->last_child_idx];
+		last_child->next_child_idx = new_node_idx;
 	}
+	parent->last_child_idx = new_node_idx;
 	return off;
 }
 
@@ -1766,18 +1721,10 @@ struct udev_rules *udev_rules_new(struct udev *udev, int resolve_names)
 	if (rules->trie_nodes = NULL)
 		return NULL;
 	rules->trie_nodes_max = PREALLOC_TRIE;
-	/* offset 0 is reserved for the null trie node */
+	/* offset 0 is the trie root */
+	memset(rules->trie_nodes, 0x00, sizeof(struct trie_node));
 	rules->trie_nodes_cur = 1;
 
-	rules->trie_childs = malloc(PREALLOC_TRIE * sizeof(struct trie_child));
-	if (rules->trie_childs = NULL)
-		return NULL;
-	rules->trie_childs_max = PREALLOC_TRIE;
-	/* offset 0 is reserved for the null child node */
-	rules->trie_childs_cur = 1;
-
-	rules->trie_root = calloc(UCHAR_MAX + 1, sizeof(unsigned short));
-
 	if (udev_get_rules_path(udev) != NULL) {
 		/* custom rules location for testing */
 		add_matching_files(udev, &file_list, udev_get_rules_path(udev), ".rules");
@@ -1890,22 +1837,15 @@ struct udev_rules *udev_rules_new(struct udev *udev, int resolve_names)
 	}
 	info(udev, "rules use %zu bytes tokens (%u * %zu bytes), %zu bytes buffer\n",
 	     rules->token_max * sizeof(struct token), rules->token_max, sizeof(struct token), rules->buf_max);
-	info(udev, "temporary index used %zu bytes (%u * %zu bytes nodes, %u * %zu bytes child links)\n",
-	     rules->trie_nodes_cur * sizeof(struct trie_node) + rules->trie_childs_cur * sizeof(struct trie_child),
-	     rules->trie_nodes_cur, sizeof(struct trie_node),
-	     rules->trie_childs_cur, sizeof(struct trie_child));
+	info(udev, "temporary index used %zu bytes (%u * %zu bytes)\n",
+	     rules->trie_nodes_cur * sizeof(struct trie_node),
+	     rules->trie_nodes_cur, sizeof(struct trie_node));
 
 	/* cleanup trie */
 	free(rules->trie_nodes);
 	rules->trie_nodes = NULL;
 	rules->trie_nodes_cur = 0;
 	rules->trie_nodes_max = 0;
-	free(rules->trie_childs);
-	rules->trie_childs = NULL;
-	rules->trie_childs_cur = 0;
-	rules->trie_childs_max = 0;
-	free(rules->trie_root);
-	rules->trie_root = NULL;
 
 	/* cleanup uid/gid cache */
 	free(rules->uids);
@@ -1928,8 +1868,6 @@ void udev_rules_unref(struct udev_rules *rules)
 	free(rules->tokens);
 	free(rules->buf);
 	free(rules->trie_nodes);
-	free(rules->trie_childs);
-	free(rules->trie_root);
 	free(rules->uids);
 	free(rules->gids);
 	free(rules);




^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] udevd: de-duplicate strings in rules
  2008-11-11 20:20 [PATCH] udevd: de-duplicate strings in rules Alan Jenkins
                   ` (7 preceding siblings ...)
  2008-11-12 21:37 ` Kay Sievers
@ 2008-11-13 10:12 ` Alan Jenkins
  2008-11-13 19:03 ` Kay Sievers
  9 siblings, 0 replies; 11+ messages in thread
From: Alan Jenkins @ 2008-11-13 10:12 UTC (permalink / raw)
  To: linux-hotplug

Kay Sievers wrote:
> On Wed, 2008-11-12 at 20:48 +0000, Alan Jenkins wrote:
>   
>> Kay Sievers wrote:
>>     
>>> On Wed, Nov 12, 2008 at 19:05, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:   
>>>       
>>>> Kay Sievers wrote:
>>>>         
>>>>> On Wed, Nov 12, 2008 at 17:50, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>>>>>           
>>>>>> Kay Sievers wrote:
>>>>>>             
>>>>>>> On Tue, Nov 11, 2008 at 22:23, Kay Sievers <kay.sievers@vrfy.org> wrote:
>>>>>>>               
>>>>>>>> On Tue, Nov 11, 2008 at 21:20, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>>>>>>>>
>>>>>>>>                 
>>>>>>>>> On my Ubuntu installation this removes 15k of duplicate strings,
>>>>>>>>> using a temporary index of about 25k.
>>>>>>>>>               
>>>>>>>>>                   
>>>>>>>> Great. That looks nice.
>>>>>>>>
>>>>>>>> Thats's the diff of the rule dump before and after the patch:
>>>>>>>>  ...
>>>>>>>>  -[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 57298 bytes buffer
>>>>>>>>  -[] dumping 5408 (64896 bytes) tokens, 5818 (57298 bytes) strings
>>>>>>>>  +[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 18204 bytes buffer
>>>>>>>>  +[] used 40512 bytes of string index nodes (844 * 48 bytes)
>>>>>>>>  +[] dumping 5408 (64896 bytes) tokens, 1369 (18204 bytes) strings
>>>>>>>>
>>>>>>>>             
>>>>>>>>                 
>>>>>>> I split the nodes and the childs in two independent arrays, so we got
>>>>>>> rid of the limit of 10 childs per node. I've got ~200 fully uses slots
>>>>>>> with the huge rules set here. Unlimited childs in the index removes
>>>>>>> another 3 kB of duplicates, and the temporary index seems also a bit
>>>>>>> smaller:
>>>>>>>   shrunk to 64896 bytes tokens (5408 * 12 bytes), 15324 bytes buffer
>>>>>>>   used 29456 bytes for index (1076 * 16 bytes nodes, 1020 * 12 bytes
>>>>>>> child links)
>>>>>>>
>>>>>>> Would be great, if you can check if it still works for you as expected. :)
>>>>>>>
>>>>>>>           
>>>>>>>               
>>>>>> Did you have a particular reason to keep the trie_root array?  Now
>>>>>> there's no fixed limit on children, you could just use trie[1] as the
>>>>>> root node.  Remove the special case for depth = 0.  And initialize it's
>>>>>> value and length to 0, then you can remove the special case for len = 0.
>>>>>>
>>>>>>         
>>>>>>             
>>>>> No special reason, I thought about that too, but it was already 5am,
>>>>> and I was unable to think it through. :)
>>>>>
>>>>> Sounds nice to do that, did you try already, have a patch?
>>>>>
>>>>>       
>>>>>           
>>>> No, sorry :).
>>>>     
>>>>         
>>> Ah, now by looking at it, maybe the then needed linear search for the
>>> key in the root is not as good as the plain root array index?
>>>   
>>>       
>> Mmm.  Ok, without the root array add_string() takes twice as long, which 
>> increases the total rules-loading time by 10%.  (user time measured by 
>> cachegrind).  Let's leave it.
>>     
>
> Hmm, now I liked the idea. :)
>
> How about this? It has only a single array again, and no root, and no
> child limits. Seems to work fine, but, it looks somehow too simple
> now. :)
>   

Simple is good.  It's no faster, but I shouldn't care about 10% load
time - because the total is only 0.01 seconds.  What matters is that you
can understand it, and rewriting it yourself won't hurt :).

As I say, the len=0 special case is now redundant.  If you remove it,
it'll get handled by the root node which represents the empty string.

> @@ -460,37 +456,31 @@ static int add_string(struct udev_rules *rules, const char *str)
>  	if (len = 0)
>  		return 0;

Thanks
Alan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] udevd: de-duplicate strings in rules
  2008-11-11 20:20 [PATCH] udevd: de-duplicate strings in rules Alan Jenkins
                   ` (8 preceding siblings ...)
  2008-11-13 10:12 ` Alan Jenkins
@ 2008-11-13 19:03 ` Kay Sievers
  9 siblings, 0 replies; 11+ messages in thread
From: Kay Sievers @ 2008-11-13 19:03 UTC (permalink / raw)
  To: linux-hotplug

On Thu, Nov 13, 2008 at 11:12, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
> Kay Sievers wrote:
>> On Wed, 2008-11-12 at 20:48 +0000, Alan Jenkins wrote:
>>> Kay Sievers wrote:
>>>> On Wed, Nov 12, 2008 at 19:05, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>>>>> Kay Sievers wrote:
>>>>>> On Wed, Nov 12, 2008 at 17:50, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>>>>>>> Kay Sievers wrote:
>>>>>>>> On Tue, Nov 11, 2008 at 22:23, Kay Sievers <kay.sievers@vrfy.org> wrote:
>>>>>>>>> On Tue, Nov 11, 2008 at 21:20, Alan Jenkins <alan-jenkins@tuffmail.co.uk> wrote:
>>>>>>>>>
>>>>>>>>>> On my Ubuntu installation this removes 15k of duplicate strings,
>>>>>>>>>> using a temporary index of about 25k.
>>>>>>>>>>
>>>>>>>>> Great. That looks nice.
>>>>>>>>>
>>>>>>>>> Thats's the diff of the rule dump before and after the patch:
>>>>>>>>>  ...
>>>>>>>>>  -[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 57298 bytes buffer
>>>>>>>>>  -[] dumping 5408 (64896 bytes) tokens, 5818 (57298 bytes) strings
>>>>>>>>>  +[] shrunk to 64896 bytes tokens (5408 * 12 bytes), 18204 bytes buffer
>>>>>>>>>  +[] used 40512 bytes of string index nodes (844 * 48 bytes)
>>>>>>>>>  +[] dumping 5408 (64896 bytes) tokens, 1369 (18204 bytes) strings
>>>>>>>>>
>>>>>>>> I split the nodes and the childs in two independent arrays, so we got
>>>>>>>> rid of the limit of 10 childs per node. I've got ~200 fully uses slots
>>>>>>>> with the huge rules set here. Unlimited childs in the index removes
>>>>>>>> another 3 kB of duplicates, and the temporary index seems also a bit
>>>>>>>> smaller:
>>>>>>>>   shrunk to 64896 bytes tokens (5408 * 12 bytes), 15324 bytes buffer
>>>>>>>>   used 29456 bytes for index (1076 * 16 bytes nodes, 1020 * 12 bytes
>>>>>>>> child links)
>>>>>>>>
>>>>>>>> Would be great, if you can check if it still works for you as expected. :)
>>>>>>>>
>>>>>>> Did you have a particular reason to keep the trie_root array?  Now
>>>>>>> there's no fixed limit on children, you could just use trie[1] as the
>>>>>>> root node.  Remove the special case for depth = 0.  And initialize it's
>>>>>>> value and length to 0, then you can remove the special case for len = 0.
>>>>>>>
>>>>>> No special reason, I thought about that too, but it was already 5am,
>>>>>> and I was unable to think it through. :)
>>>>>>
>>>>>> Sounds nice to do that, did you try already, have a patch?
>>>>>>
>>>>> No, sorry :).
>>>>>
>>>> Ah, now by looking at it, maybe the then needed linear search for the
>>>> key in the root is not as good as the plain root array index?
>>>>
>>> Mmm.  Ok, without the root array add_string() takes twice as long, which
>>> increases the total rules-loading time by 10%.  (user time measured by
>>> cachegrind).  Let's leave it.
>>
>> Hmm, now I liked the idea. :)
>>
>> How about this? It has only a single array again, and no root, and no
>> child limits. Seems to work fine, but, it looks somehow too simple
>> now. :)
>
> Simple is good.  It's no faster, but I shouldn't care about 10% load
> time - because the total is only 0.01 seconds.

Yeah, it's 10% of almost nothing here too. So I think it's fine.

> What matters is that you
> can understand it, and rewriting it yourself won't hurt :).

Heh.

> As I say, the len=0 special case is now redundant.  If you remove it,
> it'll get handled by the root node which represents the empty string.
>
>> @@ -460,37 +456,31 @@ static int add_string(struct udev_rules *rules, const char *str)
>>       if (len = 0)
>>               return 0;

Removed it.

Thanks,
Kay

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-11-13 19:03 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-11 20:20 [PATCH] udevd: de-duplicate strings in rules Alan Jenkins
2008-11-11 21:23 ` Kay Sievers
2008-11-12  5:11 ` Kay Sievers
2008-11-12 16:50 ` Alan Jenkins
2008-11-12 18:00 ` Kay Sievers
2008-11-12 18:05 ` Alan Jenkins
2008-11-12 18:20 ` Kay Sievers
2008-11-12 20:48 ` Alan Jenkins
2008-11-12 21:37 ` Kay Sievers
2008-11-13 10:12 ` Alan Jenkins
2008-11-13 19:03 ` Kay Sievers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).