linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] cfq-iosched: remove dead_key from cfq_io_context
@ 2010-05-20 19:21 Konstantin Khlebnikov
  2010-05-20 19:21 ` [PATCH 2/2] cfq-iosched: compact io_context radix_tree Konstantin Khlebnikov
  2010-05-21  9:40 ` [PATCH 1/2] cfq-iosched: remove dead_key from cfq_io_context Jens Axboe
  0 siblings, 2 replies; 5+ messages in thread
From: Konstantin Khlebnikov @ 2010-05-20 19:21 UTC (permalink / raw)
  To: linux-kernel, Jens Axboe; +Cc: Dmitry Monakhov

Remove ->dead_key field from cfq_io_context to shrink its size to 128 bytes.
(64 bytes for 32-bit hosts)

Use lower bit in ->key as dead-mark, instead of moving key to separate field.
After this for dead cfq_io_context we got cic->key != cfqd automatically.
Thus, io_context's last-hit cache should work without changing.

Now to check ->key for non-dead state compare it with cfqd,
instead of checking ->key for non-null value as it was before.

Plus remove obsolete race protection in cfq_cic_lookup.
This race gone after v2.6.24-1728-g4ac845a

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
---
 block/cfq-iosched.c       |   41 ++++++++++++++++++++++++++++-------------
 include/linux/iocontext.h |    1 -
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 5f127cf..67745e5 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -431,6 +431,23 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
 	cic->cfqq[is_sync] = cfqq;
 }
 
+#define CIC_DEAD_KEY	1ul
+
+static inline void *cfqd_dead_key(struct cfq_data *cfqd)
+{
+	return (void *)((unsigned long) cfqd | CIC_DEAD_KEY);
+}
+
+static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
+{
+	struct cfq_data *cfqd = cic->key;
+
+	if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY))
+		return NULL;
+
+	return cfqd;
+}
+
 /*
  * We regard a request as SYNC, if it's either a read or has the SYNC bit
  * set (in which case it could also be direct WRITE).
@@ -2476,11 +2493,12 @@ static void cfq_cic_free(struct cfq_io_context *cic)
 static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
 {
 	unsigned long flags;
+	unsigned long dead_key = (unsigned long) cic->key;
 
-	BUG_ON(!cic->dead_key);
+	BUG_ON(!(dead_key & CIC_DEAD_KEY));
 
 	spin_lock_irqsave(&ioc->lock, flags);
-	radix_tree_delete(&ioc->radix_root, cic->dead_key);
+	radix_tree_delete(&ioc->radix_root, dead_key & ~CIC_DEAD_KEY);
 	hlist_del_rcu(&cic->cic_list);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 
@@ -2539,11 +2557,10 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
 	list_del_init(&cic->queue_list);
 
 	/*
-	 * Make sure key == NULL is seen for dead queues
+	 * Make sure dead mark is seen for dead queues
 	 */
 	smp_wmb();
-	cic->dead_key = (unsigned long) cic->key;
-	cic->key = NULL;
+	cic->key = cfqd_dead_key(cfqd);
 
 	if (ioc->ioc_data == cic)
 		rcu_assign_pointer(ioc->ioc_data, NULL);
@@ -2562,7 +2579,7 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
 static void cfq_exit_single_io_context(struct io_context *ioc,
 				       struct cfq_io_context *cic)
 {
-	struct cfq_data *cfqd = cic->key;
+	struct cfq_data *cfqd = cic_to_cfqd(cic);
 
 	if (cfqd) {
 		struct request_queue *q = cfqd->queue;
@@ -2575,7 +2592,7 @@ static void cfq_exit_single_io_context(struct io_context *ioc,
 		 * race between exiting task and queue
 		 */
 		smp_read_barrier_depends();
-		if (cic->key)
+		if (cic->key == cfqd)
 			__cfq_exit_single_io_context(cfqd, cic);
 
 		spin_unlock_irqrestore(q->queue_lock, flags);
@@ -2655,7 +2672,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
 
 static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
 {
-	struct cfq_data *cfqd = cic->key;
+	struct cfq_data *cfqd = cic_to_cfqd(cic);
 	struct cfq_queue *cfqq;
 	unsigned long flags;
 
@@ -2712,7 +2729,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
 {
 	struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
-	struct cfq_data *cfqd = cic->key;
+	struct cfq_data *cfqd = cic_to_cfqd(cic);
 	unsigned long flags;
 	struct request_queue *q;
 
@@ -2849,6 +2866,7 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
 	unsigned long flags;
 
 	WARN_ON(!list_empty(&cic->queue_list));
+	BUG_ON(cic->key != cfqd_dead_key(cfqd));
 
 	spin_lock_irqsave(&ioc->lock, flags);
 
@@ -2866,7 +2884,6 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 {
 	struct cfq_io_context *cic;
 	unsigned long flags;
-	void *k;
 
 	if (unlikely(!ioc))
 		return NULL;
@@ -2887,9 +2904,7 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 		rcu_read_unlock();
 		if (!cic)
 			break;
-		/* ->key must be copied to avoid race with cfq_exit_queue() */
-		k = cic->key;
-		if (unlikely(!k)) {
+		if (unlikely(cic->key != cfqd)) {
 			cfq_drop_dead_cic(cfqd, ioc, cic);
 			rcu_read_lock();
 			continue;
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index a0bb301..64d5291 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -7,7 +7,6 @@
 struct cfq_queue;
 struct cfq_io_context {
 	void *key;
-	unsigned long dead_key;
 
 	struct cfq_queue *cfqq[2];
 


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] cfq-iosched: compact io_context radix_tree
  2010-05-20 19:21 [PATCH 1/2] cfq-iosched: remove dead_key from cfq_io_context Konstantin Khlebnikov
@ 2010-05-20 19:21 ` Konstantin Khlebnikov
  2010-05-21  9:40 ` [PATCH 1/2] cfq-iosched: remove dead_key from cfq_io_context Jens Axboe
  1 sibling, 0 replies; 5+ messages in thread
From: Konstantin Khlebnikov @ 2010-05-20 19:21 UTC (permalink / raw)
  To: linux-kernel, Jens Axboe; +Cc: Dmitry Monakhov

Use small consequent indexes as radix tree keys instead of sparse cfqd address.

This change will reduce radix tree depth from 11 (6 for 32-bit hosts)
to 1 if host have <=64 disks under cfq control, or to 0 if there only one disk.
So, this patch save 10*560 bytes for each process (5*296 for 32-bit hosts)

For each cfqd allocate cic index from ida.
To unlink dead cic from tree without cfqd access store index into ->key.
(bit 0 -- dead mark, bits 1..30 -- index: ida produce id in range 0..2^31-1)

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
---
 block/cfq-iosched.c |   44 +++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 67745e5..6c44abb 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -63,6 +63,9 @@ static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
 static struct completion *ioc_gone;
 static DEFINE_SPINLOCK(ioc_gone_lock);
 
+static DEFINE_SPINLOCK(cic_index_lock);
+static DEFINE_IDA(cic_index_ida);
+
 #define CFQ_PRIO_LISTS		IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
 #define cfq_class_rt(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
@@ -272,6 +275,7 @@ struct cfq_data {
 	unsigned int cfq_latency;
 	unsigned int cfq_group_isolation;
 
+	unsigned int cic_index;
 	struct list_head cic_list;
 
 	/*
@@ -432,10 +436,11 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
 }
 
 #define CIC_DEAD_KEY	1ul
+#define CIC_DEAD_INDEX_SHIFT	1
 
 static inline void *cfqd_dead_key(struct cfq_data *cfqd)
 {
-	return (void *)((unsigned long) cfqd | CIC_DEAD_KEY);
+	return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
 }
 
 static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
@@ -2498,7 +2503,7 @@ static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
 	BUG_ON(!(dead_key & CIC_DEAD_KEY));
 
 	spin_lock_irqsave(&ioc->lock, flags);
-	radix_tree_delete(&ioc->radix_root, dead_key & ~CIC_DEAD_KEY);
+	radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT);
 	hlist_del_rcu(&cic->cic_list);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 
@@ -2872,7 +2877,7 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
 
 	BUG_ON(ioc->ioc_data == cic);
 
-	radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd);
+	radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
 	hlist_del_rcu(&cic->cic_list);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 
@@ -2900,7 +2905,7 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 	}
 
 	do {
-		cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd);
+		cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index);
 		rcu_read_unlock();
 		if (!cic)
 			break;
@@ -2937,7 +2942,7 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
 
 		spin_lock_irqsave(&ioc->lock, flags);
 		ret = radix_tree_insert(&ioc->radix_root,
-						(unsigned long) cfqd, cic);
+						cfqd->cic_index, cic);
 		if (!ret)
 			hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
 		spin_unlock_irqrestore(&ioc->lock, flags);
@@ -3676,10 +3681,32 @@ static void cfq_exit_queue(struct elevator_queue *e)
 
 	cfq_shutdown_timer_wq(cfqd);
 
+	spin_lock(&cic_index_lock);
+	ida_remove(&cic_index_ida, cfqd->cic_index);
+	spin_unlock(&cic_index_lock);
+
 	/* Wait for cfqg->blkg->key accessors to exit their grace periods. */
 	call_rcu(&cfqd->rcu, cfq_cfqd_free);
 }
 
+static int cfq_alloc_cic_index(void)
+{
+	int index, error;
+
+	do {
+		if (!ida_pre_get(&cic_index_ida, GFP_KERNEL))
+			return -ENOMEM;
+
+		spin_lock(&cic_index_lock);
+		error = ida_get_new(&cic_index_ida, &index);
+		spin_unlock(&cic_index_lock);
+		if (error && error != -EAGAIN)
+			return error;
+	} while (error);
+
+	return index;
+}
+
 static void *cfq_init_queue(struct request_queue *q)
 {
 	struct cfq_data *cfqd;
@@ -3687,10 +3714,16 @@ static void *cfq_init_queue(struct request_queue *q)
 	struct cfq_group *cfqg;
 	struct cfq_rb_root *st;
 
+	i = cfq_alloc_cic_index();
+	if (i < 0)
+		return NULL;
+
 	cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
 	if (!cfqd)
 		return NULL;
 
+	cfqd->cic_index = i;
+
 	/* Init root service tree */
 	cfqd->grp_service_tree = CFQ_RB_ROOT;
 
@@ -3952,6 +3985,7 @@ static void __exit cfq_exit(void)
 	 */
 	if (elv_ioc_count_read(cfq_ioc_count))
 		wait_for_completion(&all_gone);
+	ida_destroy(&cic_index_ida);
 	cfq_slab_kill();
 }
 


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] cfq-iosched: remove dead_key from cfq_io_context
  2010-05-20 19:21 [PATCH 1/2] cfq-iosched: remove dead_key from cfq_io_context Konstantin Khlebnikov
  2010-05-20 19:21 ` [PATCH 2/2] cfq-iosched: compact io_context radix_tree Konstantin Khlebnikov
@ 2010-05-21  9:40 ` Jens Axboe
  2010-05-21 14:53   ` Konstantin Khlebnikov
  1 sibling, 1 reply; 5+ messages in thread
From: Jens Axboe @ 2010-05-21  9:40 UTC (permalink / raw)
  To: Konstantin Khlebnikov; +Cc: linux-kernel, Dmitry Monakhov

On Thu, May 20 2010, Konstantin Khlebnikov wrote:
> Remove ->dead_key field from cfq_io_context to shrink its size to 128 bytes.
> (64 bytes for 32-bit hosts)
> 
> Use lower bit in ->key as dead-mark, instead of moving key to separate field.
> After this for dead cfq_io_context we got cic->key != cfqd automatically.
> Thus, io_context's last-hit cache should work without changing.
> 
> Now to check ->key for non-dead state compare it with cfqd,
> instead of checking ->key for non-null value as it was before.
> 
> Plus remove obsolete race protection in cfq_cic_lookup.
> This race gone after v2.6.24-1728-g4ac845a

This, and the second patch, look really good. How much testing have you
done with it?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] cfq-iosched: remove dead_key from cfq_io_context
  2010-05-21  9:40 ` [PATCH 1/2] cfq-iosched: remove dead_key from cfq_io_context Jens Axboe
@ 2010-05-21 14:53   ` Konstantin Khlebnikov
  2010-05-21 17:34     ` Jens Axboe
  0 siblings, 1 reply; 5+ messages in thread
From: Konstantin Khlebnikov @ 2010-05-21 14:53 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel@vger.kernel.org, Dmitry Monakhov

On 21.05.2010 13:40, Jens Axboe wrote:
> On Thu, May 20 2010, Konstantin Khlebnikov wrote:
>> Remove ->dead_key field from cfq_io_context to shrink its size to 128 bytes.
>> (64 bytes for 32-bit hosts)
>>
>> Use lower bit in ->key as dead-mark, instead of moving key to separate field.
>> After this for dead cfq_io_context we got cic->key != cfqd automatically.
>> Thus, io_context's last-hit cache should work without changing.
>>
>> Now to check ->key for non-dead state compare it with cfqd,
>> instead of checking ->key for non-null value as it was before.
>>
>> Plus remove obsolete race protection in cfq_cic_lookup.
>> This race gone after v2.6.24-1728-g4ac845a
>
> This, and the second patch, look really good. How much testing have you
> done with it?
>

I have run multiple fsstress and simultaneously switch io-scheduler in loop
for several hours, no oopses and leaks detected.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] cfq-iosched: remove dead_key from cfq_io_context
  2010-05-21 14:53   ` Konstantin Khlebnikov
@ 2010-05-21 17:34     ` Jens Axboe
  0 siblings, 0 replies; 5+ messages in thread
From: Jens Axboe @ 2010-05-21 17:34 UTC (permalink / raw)
  To: Konstantin Khlebnikov; +Cc: linux-kernel@vger.kernel.org, Dmitry Monakhov

On Fri, May 21 2010, Konstantin Khlebnikov wrote:
> On 21.05.2010 13:40, Jens Axboe wrote:
>> On Thu, May 20 2010, Konstantin Khlebnikov wrote:
>>> Remove ->dead_key field from cfq_io_context to shrink its size to 128 bytes.
>>> (64 bytes for 32-bit hosts)
>>>
>>> Use lower bit in ->key as dead-mark, instead of moving key to separate field.
>>> After this for dead cfq_io_context we got cic->key != cfqd automatically.
>>> Thus, io_context's last-hit cache should work without changing.
>>>
>>> Now to check ->key for non-dead state compare it with cfqd,
>>> instead of checking ->key for non-null value as it was before.
>>>
>>> Plus remove obsolete race protection in cfq_cic_lookup.
>>> This race gone after v2.6.24-1728-g4ac845a
>>
>> This, and the second patch, look really good. How much testing have you
>> done with it?
>>
>
> I have run multiple fsstress and simultaneously switch io-scheduler in loop
> for several hours, no oopses and leaks detected.

OK, I'll queue it up for later in the 2.6.35 cycle.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-05-21 17:34 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-20 19:21 [PATCH 1/2] cfq-iosched: remove dead_key from cfq_io_context Konstantin Khlebnikov
2010-05-20 19:21 ` [PATCH 2/2] cfq-iosched: compact io_context radix_tree Konstantin Khlebnikov
2010-05-21  9:40 ` [PATCH 1/2] cfq-iosched: remove dead_key from cfq_io_context Jens Axboe
2010-05-21 14:53   ` Konstantin Khlebnikov
2010-05-21 17:34     ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).