All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
@ 2011-05-02 12:28 Dimitris Papastamos
  2011-05-02 12:28 ` [PATCH 2/2 v2] ASoC: soc-cache: cache a pointer to the last accessed rbtree block Dimitris Papastamos
                   ` (2 more replies)
  0 siblings, 3 replies; 36+ messages in thread
From: Dimitris Papastamos @ 2011-05-02 12:28 UTC (permalink / raw)
  To: Mark Brown, Liam Girdwood; +Cc: alsa-devel, patches, Liam Girdwood

This patch prepares the ground for the actual rbtree optimization patch
which will save a pointer to the last accessed register block that was used
in either the read() or write() functions.

Each node manages a block of up to RBTREE_BLOCK_NUM registers.  There can be
no two nodes with overlapping blocks.  Currently there is no check in the code
to scream in case that ever happens.  Each block has a base and top register,
all others lie in between these registers.  Note that variable length blocks
aren't supported.  So if you have an interval [8, 15] and only some of those
registers actually exist on the device, the block will have the non-existent
registers as zero.  There is also no way of reporting that any of those
non-existent registers were accessed/modified.

The larger the block size, the more probable it is that one of the
managed registers is non-zero, and therefore the node will need to be
allocated at initialization time and waste space.

If register N is accessed and it is not part of any of the current
blocks in the rbtree, a new node is created with a base register
which is floor(N / RBTREE_BLOCK_NUM) * RBTREE_BLOCK_NUM and a top
register as base_register + RBTREE_BLOCK_NUM - 1.  All other registers
in the block are initialized as expected and the node is inserted into
the tree.

Signed-off-by: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
---
 sound/soc/soc-cache.c |  177 +++++++++++++++++++++++++++++++++++--------------
 1 files changed, 128 insertions(+), 49 deletions(-)

diff --git a/sound/soc/soc-cache.c b/sound/soc/soc-cache.c
index a217db2..c518b6e 100644
--- a/sound/soc/soc-cache.c
+++ b/sound/soc/soc-cache.c
@@ -593,32 +593,58 @@ static unsigned int snd_soc_get_cache_val(const void *base, unsigned int idx,
 	return -1;
 }
 
-struct snd_soc_rbtree_node {
-	struct rb_node node;
+struct snd_soc_rbtree_reg_blk {
 	unsigned int reg;
 	unsigned int value;
 	unsigned int defval;
 } __attribute__ ((packed));
 
+#define RBTREE_BLOCK_NUM 32
+struct snd_soc_rbtree_node {
+	struct rb_node node;
+	struct snd_soc_rbtree_reg_blk block[RBTREE_BLOCK_NUM];
+	unsigned int blklen;
+} __attribute__ ((packed));
+
 struct snd_soc_rbtree_ctx {
 	struct rb_root root;
 };
 
+static inline int snd_soc_rbtree_block_count(void)
+{
+	return RBTREE_BLOCK_NUM;
+}
+
+static inline struct snd_soc_rbtree_reg_blk *snd_soc_rbtree_base_block(
+	struct snd_soc_rbtree_node *rbnode)
+{
+	return &rbnode->block[0];
+}
+
+static inline struct snd_soc_rbtree_reg_blk *snd_soc_rbtree_top_block(
+	struct snd_soc_rbtree_node *rbnode)
+{
+	return &rbnode->block[snd_soc_rbtree_block_count() - 1];
+}
+
 static struct snd_soc_rbtree_node *snd_soc_rbtree_lookup(
 	struct rb_root *root, unsigned int reg)
 {
 	struct rb_node *node;
 	struct snd_soc_rbtree_node *rbnode;
+	struct snd_soc_rbtree_reg_blk *baseblk, *topblk;
 
 	node = root->rb_node;
 	while (node) {
 		rbnode = container_of(node, struct snd_soc_rbtree_node, node);
-		if (rbnode->reg < reg)
-			node = node->rb_left;
-		else if (rbnode->reg > reg)
-			node = node->rb_right;
-		else
+		baseblk = snd_soc_rbtree_base_block(rbnode);
+		topblk = snd_soc_rbtree_top_block(rbnode);
+		if (reg >= baseblk->reg && reg <= topblk->reg)
 			return rbnode;
+		else if (reg > topblk->reg)
+			node = node->rb_right;
+		else if (reg < baseblk->reg)
+			node = node->rb_left;
 	}
 
 	return NULL;
@@ -629,19 +655,28 @@ static int snd_soc_rbtree_insert(struct rb_root *root,
 {
 	struct rb_node **new, *parent;
 	struct snd_soc_rbtree_node *rbnode_tmp;
+	struct snd_soc_rbtree_reg_blk *baseblk_tmp, *topblk_tmp;
+	struct snd_soc_rbtree_reg_blk *baseblk;
 
 	parent = NULL;
 	new = &root->rb_node;
 	while (*new) {
 		rbnode_tmp = container_of(*new, struct snd_soc_rbtree_node,
 					  node);
+		/* base and top blocks of the current rbnode */
+		baseblk_tmp = snd_soc_rbtree_base_block(rbnode_tmp);
+		topblk_tmp = snd_soc_rbtree_top_block(rbnode_tmp);
+		/* base block of the rbnode to be added */
+		baseblk = snd_soc_rbtree_base_block(rbnode);
 		parent = *new;
-		if (rbnode_tmp->reg < rbnode->reg)
-			new = &((*new)->rb_left);
-		else if (rbnode_tmp->reg > rbnode->reg)
-			new = &((*new)->rb_right);
-		else
+		/* if this block has already been inserted, just return */
+		if (baseblk->reg >= baseblk_tmp->reg &&
+		    baseblk->reg <= topblk_tmp->reg)
 			return 0;
+		else if (baseblk->reg > topblk_tmp->reg)
+			new = &((*new)->rb_right);
+		else if (baseblk->reg < baseblk_tmp->reg)
+			new = &((*new)->rb_left);
 	}
 
 	/* insert the node into the rbtree */
@@ -656,26 +691,31 @@ static int snd_soc_rbtree_cache_sync(struct snd_soc_codec *codec)
 	struct snd_soc_rbtree_ctx *rbtree_ctx;
 	struct rb_node *node;
 	struct snd_soc_rbtree_node *rbnode;
-	unsigned int val;
+	struct snd_soc_rbtree_reg_blk *regblk;
+	unsigned int val, i;
 	int ret;
 
 	rbtree_ctx = codec->reg_cache;
+	/* for each node sync all its blocks */
 	for (node = rb_first(&rbtree_ctx->root); node; node = rb_next(node)) {
 		rbnode = rb_entry(node, struct snd_soc_rbtree_node, node);
-		if (rbnode->value == rbnode->defval)
-			continue;
-		WARN_ON(codec->writable_register &&
-			codec->writable_register(codec, rbnode->reg));
-		ret = snd_soc_cache_read(codec, rbnode->reg, &val);
-		if (ret)
-			return ret;
-		codec->cache_bypass = 1;
-		ret = snd_soc_write(codec, rbnode->reg, val);
-		codec->cache_bypass = 0;
-		if (ret)
-			return ret;
-		dev_dbg(codec->dev, "Synced register %#x, value = %#x\n",
-			rbnode->reg, val);
+		for (i = 0; i < rbnode->blklen; ++i) {
+			regblk = &rbnode->block[i];
+			if (regblk->value == regblk->defval)
+				continue;
+			WARN_ON(codec->writable_register &&
+				codec->writable_register(codec, regblk->reg));
+			ret = snd_soc_cache_read(codec, regblk->reg, &val);
+			if (ret)
+				return ret;
+			codec->cache_bypass = 1;
+			ret = snd_soc_write(codec, regblk->reg, val);
+			codec->cache_bypass = 0;
+			if (ret)
+				return ret;
+			dev_dbg(codec->dev, "Synced register %#x, value = %#x\n",
+				regblk->reg, val);
+		}
 	}
 
 	return 0;
@@ -686,27 +726,40 @@ static int snd_soc_rbtree_cache_write(struct snd_soc_codec *codec,
 {
 	struct snd_soc_rbtree_ctx *rbtree_ctx;
 	struct snd_soc_rbtree_node *rbnode;
+	struct snd_soc_rbtree_reg_blk *regblk;
+	struct snd_soc_rbtree_reg_blk *baseblk;
+	unsigned int base_reg;
+	int blkcount, i;
 
 	rbtree_ctx = codec->reg_cache;
 	rbnode = snd_soc_rbtree_lookup(&rbtree_ctx->root, reg);
 	if (rbnode) {
-		if (rbnode->value == value)
+		baseblk = snd_soc_rbtree_base_block(rbnode);
+		regblk = &rbnode->block[reg - baseblk->reg];
+		if (regblk->value == value)
 			return 0;
-		rbnode->value = value;
+		regblk->value = value;
 	} else {
 		/* bail out early, no need to create the rbnode yet */
 		if (!value)
 			return 0;
 		/*
-		 * for uninitialized registers whose value is changed
-		 * from the default zero, create an rbnode and insert
-		 * it into the tree.
+		 * if the value of any of the uninitialized registers in a
+		 * block is changed from the default zero, create an rbnode
+		 * and insert it into the tree.  Make sure to initialize all
+		 * the registers in the block.
 		 */
 		rbnode = kzalloc(sizeof *rbnode, GFP_KERNEL);
 		if (!rbnode)
 			return -ENOMEM;
-		rbnode->reg = reg;
-		rbnode->value = value;
+		blkcount = snd_soc_rbtree_block_count();
+		base_reg = (reg / blkcount) * blkcount;
+		for (i = 0; i < blkcount; ++i) {
+			regblk = &rbnode->block[i];
+			regblk->reg = base_reg + i;
+			if (regblk->reg == reg)
+				regblk->value = value;
+		}
 		snd_soc_rbtree_insert(&rbtree_ctx->root, rbnode);
 	}
 
@@ -718,11 +771,13 @@ static int snd_soc_rbtree_cache_read(struct snd_soc_codec *codec,
 {
 	struct snd_soc_rbtree_ctx *rbtree_ctx;
 	struct snd_soc_rbtree_node *rbnode;
+	struct snd_soc_rbtree_reg_blk *baseblk;
 
 	rbtree_ctx = codec->reg_cache;
 	rbnode = snd_soc_rbtree_lookup(&rbtree_ctx->root, reg);
 	if (rbnode) {
-		*value = rbnode->value;
+		baseblk = snd_soc_rbtree_base_block(rbnode);
+		*value = rbnode->block[reg - baseblk->reg].value;
 	} else {
 		/* uninitialized registers default to 0 */
 		*value = 0;
@@ -762,10 +817,13 @@ static int snd_soc_rbtree_cache_init(struct snd_soc_codec *codec)
 {
 	struct snd_soc_rbtree_node *rbtree_node;
 	struct snd_soc_rbtree_ctx *rbtree_ctx;
-	unsigned int val;
+	unsigned int reg, val;
 	unsigned int word_size;
-	int i;
+	int i, j;
 	int ret;
+	int blkcount;
+	int numblocks;
+	int flag;
 
 	codec->reg_cache = kmalloc(sizeof *rbtree_ctx, GFP_KERNEL);
 	if (!codec->reg_cache)
@@ -777,28 +835,49 @@ static int snd_soc_rbtree_cache_init(struct snd_soc_codec *codec)
 	if (!codec->reg_def_copy)
 		return 0;
 
-	/*
-	 * populate the rbtree with the initialized registers.  All other
-	 * registers will be inserted when they are first modified.
-	 */
 	word_size = codec->driver->reg_word_size;
-	for (i = 0; i < codec->driver->reg_cache_size; ++i) {
-		val = snd_soc_get_cache_val(codec->reg_def_copy, i, word_size);
-		if (!val)
+	blkcount = snd_soc_rbtree_block_count();
+	numblocks = (codec->driver->reg_cache_size - 1 + blkcount) / blkcount;
+	for (i = 0; i < numblocks; ++i) {
+		/* check if a block is comprised of uninitialized registers only */
+		for (j = 0, flag = 0; j < blkcount; ++j) {
+			reg = i * blkcount + j;
+			if (reg >= codec->driver->reg_cache_size)
+				break;
+			val = snd_soc_get_cache_val(codec->reg_def_copy,
+						    reg, word_size);
+			if (val) {
+				flag = 1;
+				break;
+			}
+		}
+		/* ignore this block until one of its registers is modified */
+		if (!flag)
 			continue;
 		rbtree_node = kzalloc(sizeof *rbtree_node, GFP_KERNEL);
 		if (!rbtree_node) {
 			ret = -ENOMEM;
-			snd_soc_cache_exit(codec);
-			break;
+			goto err;
+		}
+		/* initialize the register block */
+		for (j = 0; j < blkcount; ++j) {
+			reg = i * blkcount + j;
+			if (reg >= codec->driver->reg_cache_size)
+				break;
+			val = snd_soc_get_cache_val(codec->reg_def_copy, reg, word_size);
+			rbtree_node->block[j].reg = reg;
+			rbtree_node->block[j].value = val;
+			rbtree_node->block[j].defval = val;
 		}
-		rbtree_node->reg = i;
-		rbtree_node->value = val;
-		rbtree_node->defval = val;
+		rbtree_node->blklen = j;
 		snd_soc_rbtree_insert(&rbtree_ctx->root, rbtree_node);
 	}
 
 	return 0;
+
+err:
+	snd_soc_cache_exit(codec);
+	return ret;
 }
 
 #ifdef CONFIG_SND_SOC_CACHE_LZO
-- 
1.7.5

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 2/2 v2] ASoC: soc-cache: cache a pointer to the last accessed rbtree block
  2011-05-02 12:28 [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression Dimitris Papastamos
@ 2011-05-02 12:28 ` Dimitris Papastamos
  2011-05-02 14:29 ` [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression Takashi Iwai
  2011-05-03 11:21 ` Mark Brown
  2 siblings, 0 replies; 36+ messages in thread
From: Dimitris Papastamos @ 2011-05-02 12:28 UTC (permalink / raw)
  To: Mark Brown, Liam Girdwood; +Cc: alsa-devel, patches, Liam Girdwood

Whenever we are doing a read or a write through the rbtree code, we'll
cache a pointer to the register block.  To avoid looking up the register
everytime we do a read or a write, we first check if it can be found in
the cached register block, otherwise we go through the slow path and in the
end we cache the pointer to the register block.

Signed-off-by: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
---
 sound/soc/soc-cache.c |   34 ++++++++++++++++++++++++++++++++--
 1 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/sound/soc/soc-cache.c b/sound/soc/soc-cache.c
index c518b6e..09048ea 100644
--- a/sound/soc/soc-cache.c
+++ b/sound/soc/soc-cache.c
@@ -608,6 +608,7 @@ struct snd_soc_rbtree_node {
 
 struct snd_soc_rbtree_ctx {
 	struct rb_root root;
+	struct snd_soc_rbtree_node *cached_rbnode;
 };
 
 static inline int snd_soc_rbtree_block_count(void)
@@ -727,11 +728,25 @@ static int snd_soc_rbtree_cache_write(struct snd_soc_codec *codec,
 	struct snd_soc_rbtree_ctx *rbtree_ctx;
 	struct snd_soc_rbtree_node *rbnode;
 	struct snd_soc_rbtree_reg_blk *regblk;
-	struct snd_soc_rbtree_reg_blk *baseblk;
+	struct snd_soc_rbtree_reg_blk *baseblk, *topblk;
 	unsigned int base_reg;
 	int blkcount, i;
 
 	rbtree_ctx = codec->reg_cache;
+	/* handle cached write */
+	rbnode = rbtree_ctx->cached_rbnode;
+	if (rbnode) {
+		baseblk = snd_soc_rbtree_base_block(rbnode);
+		topblk = snd_soc_rbtree_top_block(rbnode);
+		if (reg >= baseblk->reg && reg <= topblk->reg) {
+			regblk = &rbnode->block[reg - baseblk->reg];
+			if (regblk->value == value)
+				return 0;
+			regblk->value = value;
+			return 0;
+		}
+	}
+	/* if not cached, look it up in the rbtree and cache it */
 	rbnode = snd_soc_rbtree_lookup(&rbtree_ctx->root, reg);
 	if (rbnode) {
 		baseblk = snd_soc_rbtree_base_block(rbnode);
@@ -739,6 +754,7 @@ static int snd_soc_rbtree_cache_write(struct snd_soc_codec *codec,
 		if (regblk->value == value)
 			return 0;
 		regblk->value = value;
+		rbtree_ctx->cached_rbnode = rbnode;
 	} else {
 		/* bail out early, no need to create the rbnode yet */
 		if (!value)
@@ -761,6 +777,7 @@ static int snd_soc_rbtree_cache_write(struct snd_soc_codec *codec,
 				regblk->value = value;
 		}
 		snd_soc_rbtree_insert(&rbtree_ctx->root, rbnode);
+		rbtree_ctx->cached_rbnode = rbnode;
 	}
 
 	return 0;
@@ -771,13 +788,25 @@ static int snd_soc_rbtree_cache_read(struct snd_soc_codec *codec,
 {
 	struct snd_soc_rbtree_ctx *rbtree_ctx;
 	struct snd_soc_rbtree_node *rbnode;
-	struct snd_soc_rbtree_reg_blk *baseblk;
+	struct snd_soc_rbtree_reg_blk *baseblk, *topblk;
 
 	rbtree_ctx = codec->reg_cache;
+	/* handle cached read */
+	rbnode = rbtree_ctx->cached_rbnode;
+	if (rbnode) {
+		baseblk = snd_soc_rbtree_base_block(rbnode);
+		topblk = snd_soc_rbtree_top_block(rbnode);
+		if (reg >= baseblk->reg && reg <= topblk->reg) {
+			*value = rbnode->block[reg - baseblk->reg].value;
+			return 0;
+		}
+	}
+	/* if not cached, look it up in the rbtree and cache it */
 	rbnode = snd_soc_rbtree_lookup(&rbtree_ctx->root, reg);
 	if (rbnode) {
 		baseblk = snd_soc_rbtree_base_block(rbnode);
 		*value = rbnode->block[reg - baseblk->reg].value;
+		rbtree_ctx->cached_rbnode = rbnode;
 	} else {
 		/* uninitialized registers default to 0 */
 		*value = 0;
@@ -831,6 +860,7 @@ static int snd_soc_rbtree_cache_init(struct snd_soc_codec *codec)
 
 	rbtree_ctx = codec->reg_cache;
 	rbtree_ctx->root = RB_ROOT;
+	rbtree_ctx->cached_rbnode = NULL;
 
 	if (!codec->reg_def_copy)
 		return 0;
-- 
1.7.5

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-02 12:28 [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression Dimitris Papastamos
  2011-05-02 12:28 ` [PATCH 2/2 v2] ASoC: soc-cache: cache a pointer to the last accessed rbtree block Dimitris Papastamos
@ 2011-05-02 14:29 ` Takashi Iwai
  2011-05-02 14:40   ` Dimitris Papastamos
  2011-05-02 14:42   ` Dimitris Papastamos
  2011-05-03 11:21 ` Mark Brown
  2 siblings, 2 replies; 36+ messages in thread
From: Takashi Iwai @ 2011-05-02 14:29 UTC (permalink / raw)
  To: Dimitris Papastamos
  Cc: alsa-devel, Mark Brown, Liam Girdwood, Liam Girdwood, patches

At Mon,  2 May 2011 13:28:28 +0100,
Dimitris Papastamos wrote:
> 
> This patch prepares the ground for the actual rbtree optimization patch
> which will save a pointer to the last accessed register block that was used
> in either the read() or write() functions.
> 
> Each node manages a block of up to RBTREE_BLOCK_NUM registers.  There can be
> no two nodes with overlapping blocks.  Currently there is no check in the code
> to scream in case that ever happens.  Each block has a base and top register,
> all others lie in between these registers.  Note that variable length blocks
> aren't supported.  So if you have an interval [8, 15] and only some of those
> registers actually exist on the device, the block will have the non-existent
> registers as zero.  There is also no way of reporting that any of those
> non-existent registers were accessed/modified.
> 
> The larger the block size, the more probable it is that one of the
> managed registers is non-zero, and therefore the node will need to be
> allocated at initialization time and waste space.
> 
> If register N is accessed and it is not part of any of the current
> blocks in the rbtree, a new node is created with a base register
> which is floor(N / RBTREE_BLOCK_NUM) * RBTREE_BLOCK_NUM and a top
> register as base_register + RBTREE_BLOCK_NUM - 1.  All other registers
> in the block are initialized as expected and the node is inserted into
> the tree.
> 
> Signed-off-by: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>

Looking through the patch, I wonder whether it gives a performance
gain enough for the additional complexity.  Did you measure somehow?


thanks,

Takashi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-02 14:29 ` [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression Takashi Iwai
@ 2011-05-02 14:40   ` Dimitris Papastamos
  2011-05-02 14:46     ` Takashi Iwai
  2011-05-02 14:42   ` Dimitris Papastamos
  1 sibling, 1 reply; 36+ messages in thread
From: Dimitris Papastamos @ 2011-05-02 14:40 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: alsa-devel, Mark Brown, Liam Girdwood, Liam Girdwood, patches

On Mon, May 02, 2011 at 04:29:01PM +0200, Takashi Iwai wrote:
> At Mon,  2 May 2011 13:28:28 +0100,
> Dimitris Papastamos wrote:
> > 
> > This patch prepares the ground for the actual rbtree optimization patch
> > which will save a pointer to the last accessed register block that was used
> > in either the read() or write() functions.
> > 
> > Each node manages a block of up to RBTREE_BLOCK_NUM registers.  There can be
> > no two nodes with overlapping blocks.  Currently there is no check in the code
> > to scream in case that ever happens.  Each block has a base and top register,
> > all others lie in between these registers.  Note that variable length blocks
> > aren't supported.  So if you have an interval [8, 15] and only some of those
> > registers actually exist on the device, the block will have the non-existent
> > registers as zero.  There is also no way of reporting that any of those
> > non-existent registers were accessed/modified.
> > 
> > The larger the block size, the more probable it is that one of the
> > managed registers is non-zero, and therefore the node will need to be
> > allocated at initialization time and waste space.
> > 
> > If register N is accessed and it is not part of any of the current
> > blocks in the rbtree, a new node is created with a base register
> > which is floor(N / RBTREE_BLOCK_NUM) * RBTREE_BLOCK_NUM and a top
> > register as base_register + RBTREE_BLOCK_NUM - 1.  All other registers
> > in the block are initialized as expected and the node is inserted into
> > the tree.
> > 
> > Signed-off-by: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
> 
> Looking through the patch, I wonder whether it gives a performance
> gain enough for the additional complexity.  Did you measure somehow?

This patch itself does not provide a performance benefit.  The next
patch in line which caches the last used block will provide a
performance benefit.  I have planted some cache hit/miss counters in my
debug code to measure this.  Ideally it'd be nice for the codec
drivers/machine drivers to be able to tune the block size at init time.

Thanks,
Dimitris

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-02 14:29 ` [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression Takashi Iwai
  2011-05-02 14:40   ` Dimitris Papastamos
@ 2011-05-02 14:42   ` Dimitris Papastamos
  2011-05-02 14:47     ` Takashi Iwai
  1 sibling, 1 reply; 36+ messages in thread
From: Dimitris Papastamos @ 2011-05-02 14:42 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: alsa-devel, Mark Brown, Liam Girdwood, Liam Girdwood, patches

On Mon, May 02, 2011 at 04:29:01PM +0200, Takashi Iwai wrote:
> At Mon,  2 May 2011 13:28:28 +0100,
> Dimitris Papastamos wrote:
> > 
> > This patch prepares the ground for the actual rbtree optimization patch
> > which will save a pointer to the last accessed register block that was used
> > in either the read() or write() functions.
> > 
> > Each node manages a block of up to RBTREE_BLOCK_NUM registers.  There can be
> > no two nodes with overlapping blocks.  Currently there is no check in the code
> > to scream in case that ever happens.  Each block has a base and top register,
> > all others lie in between these registers.  Note that variable length blocks
> > aren't supported.  So if you have an interval [8, 15] and only some of those
> > registers actually exist on the device, the block will have the non-existent
> > registers as zero.  There is also no way of reporting that any of those
> > non-existent registers were accessed/modified.
> > 
> > The larger the block size, the more probable it is that one of the
> > managed registers is non-zero, and therefore the node will need to be
> > allocated at initialization time and waste space.
> > 
> > If register N is accessed and it is not part of any of the current
> > blocks in the rbtree, a new node is created with a base register
> > which is floor(N / RBTREE_BLOCK_NUM) * RBTREE_BLOCK_NUM and a top
> > register as base_register + RBTREE_BLOCK_NUM - 1.  All other registers
> > in the block are initialized as expected and the node is inserted into
> > the tree.
> > 
> > Signed-off-by: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
> 
> Looking through the patch, I wonder whether it gives a performance
> gain enough for the additional complexity.  Did you measure somehow?

This patch might still provide a performance benefit by itself since by
increasing the block size we decrease the number of nodes in the rbtree
that need to be looked up everytime we touch the cache.

Thanks,
Dimitris

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-02 14:40   ` Dimitris Papastamos
@ 2011-05-02 14:46     ` Takashi Iwai
  2011-05-02 14:51       ` Dimitris Papastamos
  0 siblings, 1 reply; 36+ messages in thread
From: Takashi Iwai @ 2011-05-02 14:46 UTC (permalink / raw)
  To: Dimitris Papastamos
  Cc: alsa-devel, Mark Brown, Liam Girdwood, Liam Girdwood, patches

At Mon, 2 May 2011 15:40:20 +0100,
Dimitris Papastamos wrote:
> 
> On Mon, May 02, 2011 at 04:29:01PM +0200, Takashi Iwai wrote:
> > At Mon,  2 May 2011 13:28:28 +0100,
> > Dimitris Papastamos wrote:
> > > 
> > > This patch prepares the ground for the actual rbtree optimization patch
> > > which will save a pointer to the last accessed register block that was used
> > > in either the read() or write() functions.
> > > 
> > > Each node manages a block of up to RBTREE_BLOCK_NUM registers.  There can be
> > > no two nodes with overlapping blocks.  Currently there is no check in the code
> > > to scream in case that ever happens.  Each block has a base and top register,
> > > all others lie in between these registers.  Note that variable length blocks
> > > aren't supported.  So if you have an interval [8, 15] and only some of those
> > > registers actually exist on the device, the block will have the non-existent
> > > registers as zero.  There is also no way of reporting that any of those
> > > non-existent registers were accessed/modified.
> > > 
> > > The larger the block size, the more probable it is that one of the
> > > managed registers is non-zero, and therefore the node will need to be
> > > allocated at initialization time and waste space.
> > > 
> > > If register N is accessed and it is not part of any of the current
> > > blocks in the rbtree, a new node is created with a base register
> > > which is floor(N / RBTREE_BLOCK_NUM) * RBTREE_BLOCK_NUM and a top
> > > register as base_register + RBTREE_BLOCK_NUM - 1.  All other registers
> > > in the block are initialized as expected and the node is inserted into
> > > the tree.
> > > 
> > > Signed-off-by: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
> > 
> > Looking through the patch, I wonder whether it gives a performance
> > gain enough for the additional complexity.  Did you measure somehow?
> 
> This patch itself does not provide a performance benefit.  The next
> patch in line which caches the last used block will provide a
> performance benefit.

Yeah, sure, it was a question to this series of patches :)

>  I have planted some cache hit/miss counters in my
> debug code to measure this.  Ideally it'd be nice for the codec
> drivers/machine drivers to be able to tune the block size at init time.

For such a performance improvement change, always the objective
measurement is preferred.  If only hit/miss matters, a hash table with
a reasonable size can be often a better option (and much simpler).


thanks,

Takashi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-02 14:42   ` Dimitris Papastamos
@ 2011-05-02 14:47     ` Takashi Iwai
  2011-05-02 14:58       ` Dimitris Papastamos
  0 siblings, 1 reply; 36+ messages in thread
From: Takashi Iwai @ 2011-05-02 14:47 UTC (permalink / raw)
  To: Dimitris Papastamos
  Cc: alsa-devel, Mark Brown, Liam Girdwood, Liam Girdwood, patches

At Mon, 2 May 2011 15:42:56 +0100,
Dimitris Papastamos wrote:
> 
> On Mon, May 02, 2011 at 04:29:01PM +0200, Takashi Iwai wrote:
> > At Mon,  2 May 2011 13:28:28 +0100,
> > Dimitris Papastamos wrote:
> > > 
> > > This patch prepares the ground for the actual rbtree optimization patch
> > > which will save a pointer to the last accessed register block that was used
> > > in either the read() or write() functions.
> > > 
> > > Each node manages a block of up to RBTREE_BLOCK_NUM registers.  There can be
> > > no two nodes with overlapping blocks.  Currently there is no check in the code
> > > to scream in case that ever happens.  Each block has a base and top register,
> > > all others lie in between these registers.  Note that variable length blocks
> > > aren't supported.  So if you have an interval [8, 15] and only some of those
> > > registers actually exist on the device, the block will have the non-existent
> > > registers as zero.  There is also no way of reporting that any of those
> > > non-existent registers were accessed/modified.
> > > 
> > > The larger the block size, the more probable it is that one of the
> > > managed registers is non-zero, and therefore the node will need to be
> > > allocated at initialization time and waste space.
> > > 
> > > If register N is accessed and it is not part of any of the current
> > > blocks in the rbtree, a new node is created with a base register
> > > which is floor(N / RBTREE_BLOCK_NUM) * RBTREE_BLOCK_NUM and a top
> > > register as base_register + RBTREE_BLOCK_NUM - 1.  All other registers
> > > in the block are initialized as expected and the node is inserted into
> > > the tree.
> > > 
> > > Signed-off-by: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
> > 
> > Looking through the patch, I wonder whether it gives a performance
> > gain enough for the additional complexity.  Did you measure somehow?
> 
> This patch might still provide a performance benefit by itself since by
> increasing the block size we decrease the number of nodes in the rbtree
> that need to be looked up everytime we touch the cache.

Right.  But does it buy so much, in comparison with the additional
complexity of the code?  That's my primary question.


thanks,

Takashi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-02 14:46     ` Takashi Iwai
@ 2011-05-02 14:51       ` Dimitris Papastamos
  0 siblings, 0 replies; 36+ messages in thread
From: Dimitris Papastamos @ 2011-05-02 14:51 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: alsa-devel, Mark Brown, Liam Girdwood, Liam Girdwood, patches

On Mon, May 02, 2011 at 04:46:00PM +0200, Takashi Iwai wrote:
> At Mon, 2 May 2011 15:40:20 +0100,
> Dimitris Papastamos wrote:
> > 
> > On Mon, May 02, 2011 at 04:29:01PM +0200, Takashi Iwai wrote:
> > > At Mon,  2 May 2011 13:28:28 +0100,
> > > Dimitris Papastamos wrote:
> > > > 
> > > > This patch prepares the ground for the actual rbtree optimization patch
> > > > which will save a pointer to the last accessed register block that was used
> > > > in either the read() or write() functions.
> > > > 
> > > > Each node manages a block of up to RBTREE_BLOCK_NUM registers.  There can be
> > > > no two nodes with overlapping blocks.  Currently there is no check in the code
> > > > to scream in case that ever happens.  Each block has a base and top register,
> > > > all others lie in between these registers.  Note that variable length blocks
> > > > aren't supported.  So if you have an interval [8, 15] and only some of those
> > > > registers actually exist on the device, the block will have the non-existent
> > > > registers as zero.  There is also no way of reporting that any of those
> > > > non-existent registers were accessed/modified.
> > > > 
> > > > The larger the block size, the more probable it is that one of the
> > > > managed registers is non-zero, and therefore the node will need to be
> > > > allocated at initialization time and waste space.
> > > > 
> > > > If register N is accessed and it is not part of any of the current
> > > > blocks in the rbtree, a new node is created with a base register
> > > > which is floor(N / RBTREE_BLOCK_NUM) * RBTREE_BLOCK_NUM and a top
> > > > register as base_register + RBTREE_BLOCK_NUM - 1.  All other registers
> > > > in the block are initialized as expected and the node is inserted into
> > > > the tree.
> > > > 
> > > > Signed-off-by: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
> > > 
> > > Looking through the patch, I wonder whether it gives a performance
> > > gain enough for the additional complexity.  Did you measure somehow?
> > 
> > This patch itself does not provide a performance benefit.  The next
> > patch in line which caches the last used block will provide a
> > performance benefit.
> 
> Yeah, sure, it was a question to this series of patches :)
> 
> >  I have planted some cache hit/miss counters in my
> > debug code to measure this.  Ideally it'd be nice for the codec
> > drivers/machine drivers to be able to tune the block size at init time.
> 
> For such a performance improvement change, always the objective
> measurement is preferred.  If only hit/miss matters, a hash table with
> a reasonable size can be often a better option (and much simpler).

The original idea was to use an rbtree to decrease memory footprint, by
only allocating nodes for registers that are non-zero.  The same idea
has been progressed further to use a block of registers.  So a node is
only allocated if there is at least one register in the block that is
non-zero.  This patch series hopes to make the entire process less CPU
intensive but to still keep the low memory footprint.

Thanks,
Dimitris

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-02 14:47     ` Takashi Iwai
@ 2011-05-02 14:58       ` Dimitris Papastamos
  2011-05-03  9:43         ` Mark Brown
  0 siblings, 1 reply; 36+ messages in thread
From: Dimitris Papastamos @ 2011-05-02 14:58 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: patches, alsa-devel, Mark Brown, Liam Girdwood, Liam Girdwood

On Mon, May 02, 2011 at 04:47:41PM +0200, Takashi Iwai wrote:
> At Mon, 2 May 2011 15:42:56 +0100,
> Dimitris Papastamos wrote:
> > 
> > On Mon, May 02, 2011 at 04:29:01PM +0200, Takashi Iwai wrote:
> > > At Mon,  2 May 2011 13:28:28 +0100,
> > > Dimitris Papastamos wrote:
> > > > 
> > > > This patch prepares the ground for the actual rbtree optimization patch
> > > > which will save a pointer to the last accessed register block that was used
> > > > in either the read() or write() functions.
> > > > 
> > > > Each node manages a block of up to RBTREE_BLOCK_NUM registers.  There can be
> > > > no two nodes with overlapping blocks.  Currently there is no check in the code
> > > > to scream in case that ever happens.  Each block has a base and top register,
> > > > all others lie in between these registers.  Note that variable length blocks
> > > > aren't supported.  So if you have an interval [8, 15] and only some of those
> > > > registers actually exist on the device, the block will have the non-existent
> > > > registers as zero.  There is also no way of reporting that any of those
> > > > non-existent registers were accessed/modified.
> > > > 
> > > > The larger the block size, the more probable it is that one of the
> > > > managed registers is non-zero, and therefore the node will need to be
> > > > allocated at initialization time and waste space.
> > > > 
> > > > If register N is accessed and it is not part of any of the current
> > > > blocks in the rbtree, a new node is created with a base register
> > > > which is floor(N / RBTREE_BLOCK_NUM) * RBTREE_BLOCK_NUM and a top
> > > > register as base_register + RBTREE_BLOCK_NUM - 1.  All other registers
> > > > in the block are initialized as expected and the node is inserted into
> > > > the tree.
> > > > 
> > > > Signed-off-by: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
> > > 
> > > Looking through the patch, I wonder whether it gives a performance
> > > gain enough for the additional complexity.  Did you measure somehow?
> > 
> > This patch might still provide a performance benefit by itself since by
> > increasing the block size we decrease the number of nodes in the rbtree
> > that need to be looked up everytime we touch the cache.
> 
> Right.  But does it buy so much, in comparison with the additional
> complexity of the code?  That's my primary question.

For some codecs, the register maps group together registers of similar
functionality.  With an optimal block size or near optimal, there will
be only one rbtree look up if we continually touch any of those
registers.  This helps primarily when we sync the register map as well
as during initialization.  I think the added code complexity is worth
it.  I could perhaps send patches incrementally to simplify this
further.

Thanks,
Dimitris

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-02 14:58       ` Dimitris Papastamos
@ 2011-05-03  9:43         ` Mark Brown
  2011-05-03 10:38           ` Takashi Iwai
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Brown @ 2011-05-03  9:43 UTC (permalink / raw)
  To: Dimitris Papastamos
  Cc: Takashi Iwai, alsa-devel, patches, Liam Girdwood, Liam Girdwood

On Mon, May 02, 2011 at 03:58:14PM +0100, Dimitris Papastamos wrote:
> On Mon, May 02, 2011 at 04:47:41PM +0200, Takashi Iwai wrote:
> > Dimitris Papastamos wrote:

> > Right.  But does it buy so much, in comparison with the additional
> > complexity of the code?  That's my primary question.

> For some codecs, the register maps group together registers of similar
> functionality.  With an optimal block size or near optimal, there will
> be only one rbtree look up if we continually touch any of those
> registers.  This helps primarily when we sync the register map as well
> as during initialization.  I think the added code complexity is worth
> it.  I could perhaps send patches incrementally to simplify this
> further.

The other thing which Dimitris didn't mention here but which is much
more of a win is that for operations which work on large numbers of
registers like register cache restore having the registers grouped
together then if they're grouped together then you can normally do a
block I/O.  For the common case of I2C CODECs this means you can save
transmitting both the I2C device address and the register address for
the second and subsequent registers in a block you'll usually get more
than 100% bandwidth saving.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03  9:43         ` Mark Brown
@ 2011-05-03 10:38           ` Takashi Iwai
  2011-05-03 10:47             ` Mark Brown
  0 siblings, 1 reply; 36+ messages in thread
From: Takashi Iwai @ 2011-05-03 10:38 UTC (permalink / raw)
  To: Mark Brown
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

At Tue, 3 May 2011 10:43:20 +0100,
Mark Brown wrote:
> 
> On Mon, May 02, 2011 at 03:58:14PM +0100, Dimitris Papastamos wrote:
> > On Mon, May 02, 2011 at 04:47:41PM +0200, Takashi Iwai wrote:
> > > Dimitris Papastamos wrote:
> 
> > > Right.  But does it buy so much, in comparison with the additional
> > > complexity of the code?  That's my primary question.
> 
> > For some codecs, the register maps group together registers of similar
> > functionality.  With an optimal block size or near optimal, there will
> > be only one rbtree look up if we continually touch any of those
> > registers.  This helps primarily when we sync the register map as well
> > as during initialization.  I think the added code complexity is worth
> > it.  I could perhaps send patches incrementally to simplify this
> > further.
> 
> The other thing which Dimitris didn't mention here but which is much
> more of a win is that for operations which work on large numbers of
> registers like register cache restore having the registers grouped
> together then if they're grouped together then you can normally do a
> block I/O.  For the common case of I2C CODECs this means you can save
> transmitting both the I2C device address and the register address for
> the second and subsequent registers in a block you'll usually get more
> than 100% bandwidth saving.

So... when the low-level stuff updates the registers in a block
manner, it'll update caches of several registers at once, too.
If I understand correctly, the patches help for reducing the CPU usage
because the registers looked up in such a case tend to be adjacent.
Or am I missing other scenario?

Then my primary question is whether this CPU usage is really heavy.
If such an improvement becomes really a big win, something is often
wrong in the first stand-point; e.g. it means that RB-tree lookup
can be too heavy, and some other method should be considered.


thanks,

Takashi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 10:38           ` Takashi Iwai
@ 2011-05-03 10:47             ` Mark Brown
  2011-05-03 10:50               ` Takashi Iwai
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Brown @ 2011-05-03 10:47 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

On Tue, May 03, 2011 at 12:38:46PM +0200, Takashi Iwai wrote:
> Mark Brown wrote:

> > The other thing which Dimitris didn't mention here but which is much
> > more of a win is that for operations which work on large numbers of
> > registers like register cache restore having the registers grouped
> > together then if they're grouped together then you can normally do a
> > block I/O.  For the common case of I2C CODECs this means you can save

> So... when the low-level stuff updates the registers in a block
> manner, it'll update caches of several registers at once, too.
> If I understand correctly, the patches help for reducing the CPU usage
> because the registers looked up in such a case tend to be adjacent.
> Or am I missing other scenario?

This isn't about CPU usage, it's about I/O bandwidth which is a big
concern in situations like resume where you can be bringing the device
back up from cold.  I'd also expect better cache performance and lower
memory usage.

> Then my primary question is whether this CPU usage is really heavy.
> If such an improvement becomes really a big win, something is often
> wrong in the first stand-point; e.g. it means that RB-tree lookup
> can be too heavy, and some other method should be considered.

CPU usage isn't really that much of an issue; we need to burn an awful
lot of CPU for it to take longer than the I/O takes.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 10:47             ` Mark Brown
@ 2011-05-03 10:50               ` Takashi Iwai
  2011-05-03 11:02                 ` Mark Brown
  0 siblings, 1 reply; 36+ messages in thread
From: Takashi Iwai @ 2011-05-03 10:50 UTC (permalink / raw)
  To: Mark Brown
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

At Tue, 3 May 2011 11:47:02 +0100,
Mark Brown wrote:
> 
> On Tue, May 03, 2011 at 12:38:46PM +0200, Takashi Iwai wrote:
> > Mark Brown wrote:
> 
> > > The other thing which Dimitris didn't mention here but which is much
> > > more of a win is that for operations which work on large numbers of
> > > registers like register cache restore having the registers grouped
> > > together then if they're grouped together then you can normally do a
> > > block I/O.  For the common case of I2C CODECs this means you can save
> 
> > So... when the low-level stuff updates the registers in a block
> > manner, it'll update caches of several registers at once, too.
> > If I understand correctly, the patches help for reducing the CPU usage
> > because the registers looked up in such a case tend to be adjacent.
> > Or am I missing other scenario?
> 
> This isn't about CPU usage, it's about I/O bandwidth which is a big
> concern in situations like resume where you can be bringing the device
> back up from cold.

Hm, but how do these patches achieve it?  I see no change in the I/O
access side.

> I'd also expect better cache performance and lower memory usage.

Pretty depends on the register mapping, I suppose.

> > Then my primary question is whether this CPU usage is really heavy.
> > If such an improvement becomes really a big win, something is often
> > wrong in the first stand-point; e.g. it means that RB-tree lookup
> > can be too heavy, and some other method should be considered.
> 
> CPU usage isn't really that much of an issue; we need to burn an awful
> lot of CPU for it to take longer than the I/O takes.

Yes, that's why I've been asking it...


Takashi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 10:50               ` Takashi Iwai
@ 2011-05-03 11:02                 ` Mark Brown
  2011-05-03 12:25                   ` Takashi Iwai
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Brown @ 2011-05-03 11:02 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

On Tue, May 03, 2011 at 12:50:03PM +0200, Takashi Iwai wrote:
> Mark Brown wrote:

> > This isn't about CPU usage, it's about I/O bandwidth which is a big
> > concern in situations like resume where you can be bringing the device
> > back up from cold.

> Hm, but how do these patches achieve it?  I see no change in the I/O
> access side.

There's none directly but we need to get the data into blocks before we
can do bulk I/O (or do complicated gather bulk I/O).

> > CPU usage isn't really that much of an issue; we need to burn an awful
> > lot of CPU for it to take longer than the I/O takes.

> Yes, that's why I've been asking it...

We're talking I2C here so you're looking at 400kHz contended buses for
the I/O, though of course optimisations in CPU usage are also useful.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-02 12:28 [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression Dimitris Papastamos
  2011-05-02 12:28 ` [PATCH 2/2 v2] ASoC: soc-cache: cache a pointer to the last accessed rbtree block Dimitris Papastamos
  2011-05-02 14:29 ` [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression Takashi Iwai
@ 2011-05-03 11:21 ` Mark Brown
  2011-05-03 11:24   ` Dimitris Papastamos
  2 siblings, 1 reply; 36+ messages in thread
From: Mark Brown @ 2011-05-03 11:21 UTC (permalink / raw)
  To: Dimitris Papastamos; +Cc: alsa-devel, patches, Liam Girdwood, Liam Girdwood

On Mon, May 02, 2011 at 01:28:28PM +0100, Dimitris Papastamos wrote:

> Each node manages a block of up to RBTREE_BLOCK_NUM registers.  There can be
> no two nodes with overlapping blocks.  Currently there is no check in the code
> to scream in case that ever happens.  Each block has a base and top register,
> all others lie in between these registers.  Note that variable length blocks
> aren't supported.  So if you have an interval [8, 15] and only some of those
> registers actually exist on the device, the block will have the non-existent
> registers as zero.  There is also no way of reporting that any of those
> non-existent registers were accessed/modified.

As we discussed verbally I'd really rather see this support variable
length blocks and figure out (ideally at init time) which registers
exist.  This avoids all the fiddling around working out what to set the
block size to which seems a lot clearer and cleaner.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 11:21 ` Mark Brown
@ 2011-05-03 11:24   ` Dimitris Papastamos
  2011-05-03 11:26     ` Mark Brown
  0 siblings, 1 reply; 36+ messages in thread
From: Dimitris Papastamos @ 2011-05-03 11:24 UTC (permalink / raw)
  To: Mark Brown; +Cc: alsa-devel, patches, Liam Girdwood, Liam Girdwood

On Tue, May 03, 2011 at 12:21:25PM +0100, Mark Brown wrote:
> On Mon, May 02, 2011 at 01:28:28PM +0100, Dimitris Papastamos wrote:
> 
> > Each node manages a block of up to RBTREE_BLOCK_NUM registers.  There can be
> > no two nodes with overlapping blocks.  Currently there is no check in the code
> > to scream in case that ever happens.  Each block has a base and top register,
> > all others lie in between these registers.  Note that variable length blocks
> > aren't supported.  So if you have an interval [8, 15] and only some of those
> > registers actually exist on the device, the block will have the non-existent
> > registers as zero.  There is also no way of reporting that any of those
> > non-existent registers were accessed/modified.
> 
> As we discussed verbally I'd really rather see this support variable
> length blocks and figure out (ideally at init time) which registers
> exist.  This avoids all the fiddling around working out what to set the
> block size to which seems a lot clearer and cleaner.

Yes, I will look into that.  The block size will still need to be to set
to determine the maximum size of the variable length block.  Otherwise
there is no way to determine if we need to allocate a new node for the
rbtree or not.

Thanks,
Dimitris

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 11:24   ` Dimitris Papastamos
@ 2011-05-03 11:26     ` Mark Brown
  2011-05-03 11:46       ` Dimitris Papastamos
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Brown @ 2011-05-03 11:26 UTC (permalink / raw)
  To: Dimitris Papastamos; +Cc: alsa-devel, patches, Liam Girdwood, Liam Girdwood

On Tue, May 03, 2011 at 12:24:24PM +0100, Dimitris Papastamos wrote:

> Yes, I will look into that.  The block size will still need to be to set
> to determine the maximum size of the variable length block.  Otherwise
> there is no way to determine if we need to allocate a new node for the
> rbtree or not.

I don't think that's required - if we just allocate blocks containing
only contiguous registers we don't need to worry about it, the decision
is just based on if there's an adjacent register we know about it.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 11:26     ` Mark Brown
@ 2011-05-03 11:46       ` Dimitris Papastamos
  2011-05-03 13:44         ` Mark Brown
  0 siblings, 1 reply; 36+ messages in thread
From: Dimitris Papastamos @ 2011-05-03 11:46 UTC (permalink / raw)
  To: Mark Brown; +Cc: alsa-devel, patches, Liam Girdwood, Liam Girdwood

On Tue, May 03, 2011 at 12:26:51PM +0100, Mark Brown wrote:
> On Tue, May 03, 2011 at 12:24:24PM +0100, Dimitris Papastamos wrote:
> 
> > Yes, I will look into that.  The block size will still need to be to set
> > to determine the maximum size of the variable length block.  Otherwise
> > there is no way to determine if we need to allocate a new node for the
> > rbtree or not.
> 
> I don't think that's required - if we just allocate blocks containing
> only contiguous registers we don't need to worry about it, the decision
> is just based on if there's an adjacent register we know about it.

So at init time, we populate the tree based on the defaults register
map.  Each block contains only contiguous registers.  If there exist
two nodes A and B, containing adjacent blocks meaning that the last
register in A's block is one before the first register in B's block, we can
eliminate one node and merge the two blocks into the other node.

We can do this yes.  One possible problem is that if we start merging
blocks like that, in the long run we might end-up having one node with a
very large block.  This will happen for example if we write to the
entire register map.  In that case looking up a register will require
O(n) time in the worst case where n is the number of registers.  If we
however have a constant limit k on the block size, we can cut this down to
O(klogn) in the worst case.

Of course we can also avoid merging blocks altogether, and just consider
blocks as full.

Thanks,
Dimitris

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 11:02                 ` Mark Brown
@ 2011-05-03 12:25                   ` Takashi Iwai
  2011-05-03 13:02                     ` Mark Brown
  0 siblings, 1 reply; 36+ messages in thread
From: Takashi Iwai @ 2011-05-03 12:25 UTC (permalink / raw)
  To: Mark Brown
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

At Tue, 3 May 2011 12:02:06 +0100,
Mark Brown wrote:
> 
> On Tue, May 03, 2011 at 12:50:03PM +0200, Takashi Iwai wrote:
> > Mark Brown wrote:
> 
> > > This isn't about CPU usage, it's about I/O bandwidth which is a big
> > > concern in situations like resume where you can be bringing the device
> > > back up from cold.
> 
> > Hm, but how do these patches achieve it?  I see no change in the I/O
> > access side.
> 
> There's none directly but we need to get the data into blocks before we
> can do bulk I/O (or do complicated gather bulk I/O).

So, this is the preliminary work for implementing the bulk I/O?
If so, it's worth to consider once whether implementing in the rb-tree
cache code is the right choice.  Can it be implemented in the cache
management core, since you'll need an API anyway for getting the bulk
register array via cache manager?

It can be that implementing in each cache backend code is the best
choice in the end, but an overlook from a high place is always good
before going forward.


Takashi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 12:25                   ` Takashi Iwai
@ 2011-05-03 13:02                     ` Mark Brown
  2011-05-03 13:18                       ` Takashi Iwai
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Brown @ 2011-05-03 13:02 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

On Tue, May 03, 2011 at 02:25:12PM +0200, Takashi Iwai wrote:

> So, this is the preliminary work for implementing the bulk I/O?
> If so, it's worth to consider once whether implementing in the rb-tree
> cache code is the right choice.  Can it be implemented in the cache
> management core, since you'll need an API anyway for getting the bulk
> register array via cache manager?

The whole point of the caches is to be responsible for abstracting out
the in-memory layout of the data.  The core already provides information
that the caches can use about the register format and what registers are
present.

The other cache structures we have at the minute are fine since they
store everything as flat arrays anyway.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 13:02                     ` Mark Brown
@ 2011-05-03 13:18                       ` Takashi Iwai
  2011-05-03 13:24                         ` Mark Brown
  0 siblings, 1 reply; 36+ messages in thread
From: Takashi Iwai @ 2011-05-03 13:18 UTC (permalink / raw)
  To: Mark Brown
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

At Tue, 3 May 2011 14:02:59 +0100,
Mark Brown wrote:
> 
> On Tue, May 03, 2011 at 02:25:12PM +0200, Takashi Iwai wrote:
> 
> > So, this is the preliminary work for implementing the bulk I/O?
> > If so, it's worth to consider once whether implementing in the rb-tree
> > cache code is the right choice.  Can it be implemented in the cache
> > management core, since you'll need an API anyway for getting the bulk
> > register array via cache manager?
> 
> The whole point of the caches is to be responsible for abstracting out
> the in-memory layout of the data.  The core already provides information
> that the caches can use about the register format and what registers are
> present.

Yes, but the core doesn't give how linearly it's stored although its
storing order influences on the bulk I/O behavior.  In other words,
the patches try to put registers partly linearly in magical blocks.
But it doesn't guarantee whether the cache block is aligned to what
hardware prefers since it's behind the scene.


Takashi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 13:18                       ` Takashi Iwai
@ 2011-05-03 13:24                         ` Mark Brown
  2011-05-03 13:32                           ` Takashi Iwai
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Brown @ 2011-05-03 13:24 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

On Tue, May 03, 2011 at 03:18:47PM +0200, Takashi Iwai wrote:

> Yes, but the core doesn't give how linearly it's stored although its
> storing order influences on the bulk I/O behavior.  In other words,
> the patches try to put registers partly linearly in magical blocks.
> But it doesn't guarantee whether the cache block is aligned to what
> hardware prefers since it's behind the scene.

Eh?  I don't follow what you're saying at all, sorry.

The wire formats used by controllers are nailed down by the APIs for the
relevant buses and the rest of the register I/O infrastructure.  The
only extra bit the cache needs to worry about is the set of registers
which are present in a given chip and the cache already has information
about that (which is what I was telling Dimitris he should use in my
direct reply to the patch).

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 13:24                         ` Mark Brown
@ 2011-05-03 13:32                           ` Takashi Iwai
  2011-05-03 13:51                             ` Mark Brown
  0 siblings, 1 reply; 36+ messages in thread
From: Takashi Iwai @ 2011-05-03 13:32 UTC (permalink / raw)
  To: Mark Brown
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

At Tue, 3 May 2011 14:24:20 +0100,
Mark Brown wrote:
> 
> On Tue, May 03, 2011 at 03:18:47PM +0200, Takashi Iwai wrote:
> 
> > Yes, but the core doesn't give how linearly it's stored although its
> > storing order influences on the bulk I/O behavior.  In other words,
> > the patches try to put registers partly linearly in magical blocks.
> > But it doesn't guarantee whether the cache block is aligned to what
> > hardware prefers since it's behind the scene.
> 
> Eh?  I don't follow what you're saying at all, sorry.
> 
> The wire formats used by controllers are nailed down by the APIs for the
> relevant buses and the rest of the register I/O infrastructure.  The
> only extra bit the cache needs to worry about is the set of registers
> which are present in a given chip and the cache already has information
> about that (which is what I was telling Dimitris he should use in my
> direct reply to the patch).

Hrm, then I don't understand why changing the cache management changes
the bulk I/O behavior as you described.  What's missing there?


Takashi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 11:46       ` Dimitris Papastamos
@ 2011-05-03 13:44         ` Mark Brown
  0 siblings, 0 replies; 36+ messages in thread
From: Mark Brown @ 2011-05-03 13:44 UTC (permalink / raw)
  To: Dimitris Papastamos; +Cc: alsa-devel, patches, Liam Girdwood, Liam Girdwood

On Tue, May 03, 2011 at 12:46:18PM +0100, Dimitris Papastamos wrote:

> We can do this yes.  One possible problem is that if we start merging
> blocks like that, in the long run we might end-up having one node with a
> very large block.  This will happen for example if we write to the
> entire register map.  In that case looking up a register will require
> O(n) time in the worst case where n is the number of registers.  If we
> however have a constant limit k on the block size, we can cut this down to
> O(klogn) in the worst case.

Summarising offline discussion for the list: if we use an array for the
data and realloc it if we need to change the node size then we keep the
O(klogn) for these cases

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 13:32                           ` Takashi Iwai
@ 2011-05-03 13:51                             ` Mark Brown
  2011-05-03 14:07                               ` Takashi Iwai
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Brown @ 2011-05-03 13:51 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

On Tue, May 03, 2011 at 03:32:47PM +0200, Takashi Iwai wrote:

> Hrm, then I don't understand why changing the cache management changes
> the bulk I/O behavior as you described.  What's missing there?

If we can't get the data laid out in a contiguous array in memory then
we have to gather the data for transmit in the I/O code which is
painful and wasteful.  As I say for other cache formats this isn't an
issue.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 13:51                             ` Mark Brown
@ 2011-05-03 14:07                               ` Takashi Iwai
  2011-05-03 14:27                                 ` Mark Brown
  0 siblings, 1 reply; 36+ messages in thread
From: Takashi Iwai @ 2011-05-03 14:07 UTC (permalink / raw)
  To: Mark Brown
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

At Tue, 3 May 2011 14:51:28 +0100,
Mark Brown wrote:
> 
> On Tue, May 03, 2011 at 03:32:47PM +0200, Takashi Iwai wrote:
> 
> > Hrm, then I don't understand why changing the cache management changes
> > the bulk I/O behavior as you described.  What's missing there?
> 
> If we can't get the data laid out in a contiguous array in memory then
> we have to gather the data for transmit in the I/O code which is
> painful and wasteful.

But it's just a matter of CPU usage to look for the caches.
Well, what I don't understand is the text you wrote:

| > So... when the low-level stuff updates the registers in a block
| > manner, it'll update caches of several registers at once, too.
| > If I understand correctly, the patches help for reducing the CPU usage
| > because the registers looked up in such a case tend to be adjacent.
| > Or am I missing other scenario?
|
| This isn't about CPU usage, it's about I/O bandwidth which is a big
| concern in situations like resume where you can be bringing the device
| back up from cold.

How does the contiguous memory layout inside the cache management
reduce the I/O bandwidth...?


Takashi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 14:07                               ` Takashi Iwai
@ 2011-05-03 14:27                                 ` Mark Brown
  2011-05-03 15:22                                   ` Takashi Iwai
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Brown @ 2011-05-03 14:27 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

On Tue, May 03, 2011 at 04:07:42PM +0200, Takashi Iwai wrote:
> Mark Brown wrote:

> > If we can't get the data laid out in a contiguous array in memory then
> > we have to gather the data for transmit in the I/O code which is
> > painful and wasteful.

> But it's just a matter of CPU usage to look for the caches.
> Well, what I don't understand is the text you wrote:

No, as I said in my initial reply the big win is I/O bandwidth from
block writes.  There will also be a memory and consequent dcache win
from reducing the size of the data structure but that is likely to be
dwarfed by any write coalescing.

> | This isn't about CPU usage, it's about I/O bandwidth which is a big
> | concern in situations like resume where you can be bringing the device
> | back up from cold.

> How does the contiguous memory layout inside the cache management
> reduce the I/O bandwidth...?

If the data is laid out contiguously then we can easily send it to the
chip in one block.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 14:27                                 ` Mark Brown
@ 2011-05-03 15:22                                   ` Takashi Iwai
  2011-05-03 15:24                                     ` Mark Brown
  0 siblings, 1 reply; 36+ messages in thread
From: Takashi Iwai @ 2011-05-03 15:22 UTC (permalink / raw)
  To: Mark Brown
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

At Tue, 3 May 2011 15:27:29 +0100,
Mark Brown wrote:
> 
> On Tue, May 03, 2011 at 04:07:42PM +0200, Takashi Iwai wrote:
> > Mark Brown wrote:
> 
> > > If we can't get the data laid out in a contiguous array in memory then
> > > we have to gather the data for transmit in the I/O code which is
> > > painful and wasteful.
> 
> > But it's just a matter of CPU usage to look for the caches.
> > Well, what I don't understand is the text you wrote:
> 
> No, as I said in my initial reply the big win is I/O bandwidth from
> block writes.

This is a mysterious part.  See below.

>  There will also be a memory and consequent dcache win
> from reducing the size of the data structure but that is likely to be
> dwarfed by any write coalescing.

Yes, but this can also bloat depending on the register map, too :)

> > | This isn't about CPU usage, it's about I/O bandwidth which is a big
> > | concern in situations like resume where you can be bringing the device
> > | back up from cold.
> 
> > How does the contiguous memory layout inside the cache management
> > reduce the I/O bandwidth...?
> 
> If the data is laid out contiguously then we can easily send it to the
> chip in one block.

This is what I don't understand.  The sync loop is anyway done in the
sorted order in rb-tree.  How can the contiguous array helps to change
the I/O pattern...?


Takashi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 15:22                                   ` Takashi Iwai
@ 2011-05-03 15:24                                     ` Mark Brown
  2011-05-03 15:30                                       ` Takashi Iwai
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Brown @ 2011-05-03 15:24 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

On Tue, May 03, 2011 at 05:22:12PM +0200, Takashi Iwai wrote:

> This is what I don't understand.  The sync loop is anyway done in the
> sorted order in rb-tree.  How can the contiguous array helps to change
> the I/O pattern...?

You don't need to retransmit the I2C device address or the register
address for every single register.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 15:24                                     ` Mark Brown
@ 2011-05-03 15:30                                       ` Takashi Iwai
  2011-05-03 15:40                                         ` Mark Brown
  0 siblings, 1 reply; 36+ messages in thread
From: Takashi Iwai @ 2011-05-03 15:30 UTC (permalink / raw)
  To: Mark Brown
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

At Tue, 3 May 2011 16:24:05 +0100,
Mark Brown wrote:
> 
> On Tue, May 03, 2011 at 05:22:12PM +0200, Takashi Iwai wrote:
> 
> > This is what I don't understand.  The sync loop is anyway done in the
> > sorted order in rb-tree.  How can the contiguous array helps to change
> > the I/O pattern...?
> 
> You don't need to retransmit the I2C device address or the register
> address for every single register.

Is this behavior achieved really by these patches?  As far as I read
these two patches, they change how values are stored in the rb-tree.
Now it'll be partially an array instead of one-value-per-node.  But,
this doesn't change how snd_soc_read() / snd_soc_write() are called.

Could you give an example which driver would be influenced actually?


Takashi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 15:30                                       ` Takashi Iwai
@ 2011-05-03 15:40                                         ` Mark Brown
  2011-05-03 15:47                                           ` Takashi Iwai
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Brown @ 2011-05-03 15:40 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

On Tue, May 03, 2011 at 05:30:44PM +0200, Takashi Iwai wrote:
> Mark Brown wrote:

> > You don't need to retransmit the I2C device address or the register
> > address for every single register.

> Is this behavior achieved really by these patches?  As far as I read

As I've repeatedly said these patches only provide a building block for
that, they don't actually do anything to help with I/O by themselves.

> these two patches, they change how values are stored in the rb-tree.
> Now it'll be partially an array instead of one-value-per-node.  But,
> this doesn't change how snd_soc_read() / snd_soc_write() are called.

> Could you give an example which driver would be influenced actually?

There are no drivers that would benefit yet since the register caches
they rely on can't support this yet!

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 15:40                                         ` Mark Brown
@ 2011-05-03 15:47                                           ` Takashi Iwai
  2011-05-03 15:48                                             ` Mark Brown
  0 siblings, 1 reply; 36+ messages in thread
From: Takashi Iwai @ 2011-05-03 15:47 UTC (permalink / raw)
  To: Mark Brown
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

At Tue, 3 May 2011 16:40:43 +0100,
Mark Brown wrote:
> 
> On Tue, May 03, 2011 at 05:30:44PM +0200, Takashi Iwai wrote:
> > Mark Brown wrote:
> 
> > > You don't need to retransmit the I2C device address or the register
> > > address for every single register.
> 
> > Is this behavior achieved really by these patches?  As far as I read
> 
> As I've repeatedly said these patches only provide a building block for
> that, they don't actually do anything to help with I/O by themselves.

Oh, you wrote it only once in the thread, and it wasn't very clear :)

> > these two patches, they change how values are stored in the rb-tree.
> > Now it'll be partially an array instead of one-value-per-node.  But,
> > this doesn't change how snd_soc_read() / snd_soc_write() are called.
> 
> > Could you give an example which driver would be influenced actually?
> 
> There are no drivers that would benefit yet since the register caches
> they rely on can't support this yet!

Now got it.

IMO, it's easier to expose an API that allows the update of an
register array.  The rest is a job of the cache backend stuff. 
As a fallback, it can be a loop of the single read/write.


Takashi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 15:47                                           ` Takashi Iwai
@ 2011-05-03 15:48                                             ` Mark Brown
  2011-05-03 15:54                                               ` Takashi Iwai
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Brown @ 2011-05-03 15:48 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

On Tue, May 03, 2011 at 05:47:07PM +0200, Takashi Iwai wrote:

> IMO, it's easier to expose an API that allows the update of an
> register array.  The rest is a job of the cache backend stuff. 
> As a fallback, it can be a loop of the single read/write.

We'll want to do that as well, but we still want the actual data
structure underneath to support that.  Since most register maps that
benefit from compression are also sparse rbtree is the common case for
getting a win from this.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 15:48                                             ` Mark Brown
@ 2011-05-03 15:54                                               ` Takashi Iwai
  2011-05-03 15:55                                                 ` Mark Brown
  0 siblings, 1 reply; 36+ messages in thread
From: Takashi Iwai @ 2011-05-03 15:54 UTC (permalink / raw)
  To: Mark Brown
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

At Tue, 3 May 2011 16:48:47 +0100,
Mark Brown wrote:
> 
> On Tue, May 03, 2011 at 05:47:07PM +0200, Takashi Iwai wrote:
> 
> > IMO, it's easier to expose an API that allows the update of an
> > register array.  The rest is a job of the cache backend stuff. 
> > As a fallback, it can be a loop of the single read/write.
> 
> We'll want to do that as well, but we still want the actual data
> structure underneath to support that.  Since most register maps that
> benefit from compression are also sparse rbtree is the common case for
> getting a win from this.

Hm, but iteration in the sorted order is pretty easy and cheap in
rb-tree structure.  It's not necessarily to be exported in an array.


Takashi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 15:54                                               ` Takashi Iwai
@ 2011-05-03 15:55                                                 ` Mark Brown
  2011-05-03 16:06                                                   ` Takashi Iwai
  0 siblings, 1 reply; 36+ messages in thread
From: Mark Brown @ 2011-05-03 15:55 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

On Tue, May 03, 2011 at 05:54:21PM +0200, Takashi Iwai wrote:
> Mark Brown wrote:

> > We'll want to do that as well, but we still want the actual data
> > structure underneath to support that.  Since most register maps that
> > benefit from compression are also sparse rbtree is the common case for
> > getting a win from this.

> Hm, but iteration in the sorted order is pretty easy and cheap in
> rb-tree structure.  It's not necessarily to be exported in an array.

I2C and SPI controllers don't typically do gather terribly well, though,
and there is a win from reducing the number of rbtree nodes anyway.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression
  2011-05-03 15:55                                                 ` Mark Brown
@ 2011-05-03 16:06                                                   ` Takashi Iwai
  0 siblings, 0 replies; 36+ messages in thread
From: Takashi Iwai @ 2011-05-03 16:06 UTC (permalink / raw)
  To: Mark Brown
  Cc: Dimitris Papastamos, alsa-devel, patches, Liam Girdwood,
	Liam Girdwood

At Tue, 3 May 2011 16:55:59 +0100,
Mark Brown wrote:
> 
> On Tue, May 03, 2011 at 05:54:21PM +0200, Takashi Iwai wrote:
> > Mark Brown wrote:
> 
> > > We'll want to do that as well, but we still want the actual data
> > > structure underneath to support that.  Since most register maps that
> > > benefit from compression are also sparse rbtree is the common case for
> > > getting a win from this.
> 
> > Hm, but iteration in the sorted order is pretty easy and cheap in
> > rb-tree structure.  It's not necessarily to be exported in an array.
> 
> I2C and SPI controllers don't typically do gather terribly well, though,
> and there is a win from reducing the number of rbtree nodes anyway.

That's true.  But it may also result in holes.
And, this gives more code complexity.  That's my point.

That being said, I see the usefulness of this change, too.  But, also
interested in a bit more quantitative investigation about its gain,
too.


Takashi

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2011-05-03 16:06 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-05-02 12:28 [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression Dimitris Papastamos
2011-05-02 12:28 ` [PATCH 2/2 v2] ASoC: soc-cache: cache a pointer to the last accessed rbtree block Dimitris Papastamos
2011-05-02 14:29 ` [PATCH 1/2 v2] ASoC: soc-cache: block based rbtree compression Takashi Iwai
2011-05-02 14:40   ` Dimitris Papastamos
2011-05-02 14:46     ` Takashi Iwai
2011-05-02 14:51       ` Dimitris Papastamos
2011-05-02 14:42   ` Dimitris Papastamos
2011-05-02 14:47     ` Takashi Iwai
2011-05-02 14:58       ` Dimitris Papastamos
2011-05-03  9:43         ` Mark Brown
2011-05-03 10:38           ` Takashi Iwai
2011-05-03 10:47             ` Mark Brown
2011-05-03 10:50               ` Takashi Iwai
2011-05-03 11:02                 ` Mark Brown
2011-05-03 12:25                   ` Takashi Iwai
2011-05-03 13:02                     ` Mark Brown
2011-05-03 13:18                       ` Takashi Iwai
2011-05-03 13:24                         ` Mark Brown
2011-05-03 13:32                           ` Takashi Iwai
2011-05-03 13:51                             ` Mark Brown
2011-05-03 14:07                               ` Takashi Iwai
2011-05-03 14:27                                 ` Mark Brown
2011-05-03 15:22                                   ` Takashi Iwai
2011-05-03 15:24                                     ` Mark Brown
2011-05-03 15:30                                       ` Takashi Iwai
2011-05-03 15:40                                         ` Mark Brown
2011-05-03 15:47                                           ` Takashi Iwai
2011-05-03 15:48                                             ` Mark Brown
2011-05-03 15:54                                               ` Takashi Iwai
2011-05-03 15:55                                                 ` Mark Brown
2011-05-03 16:06                                                   ` Takashi Iwai
2011-05-03 11:21 ` Mark Brown
2011-05-03 11:24   ` Dimitris Papastamos
2011-05-03 11:26     ` Mark Brown
2011-05-03 11:46       ` Dimitris Papastamos
2011-05-03 13:44         ` Mark Brown

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.