From: Jens Axboe <axboe@suse.de>
To: Norberto Bensa <norberto+linux-kernel@bensa.ath.cx>
Cc: linux-kernel@vger.kernel.org
Subject: Re: Badness in cfq_sort_rr_list at drivers/block/cfq-iosched.c:428
Date: Wed, 15 Sep 2004 08:43:28 +0200 [thread overview]
Message-ID: <20040915064327.GE2304@suse.de> (raw)
In-Reply-To: <200409150235.18367.norberto+linux-kernel@bensa.ath.cx>
On Wed, Sep 15 2004, Norberto Bensa wrote:
> Hello,
>
> Kernel is 2.6.9-rc1-mm5. I just found this on dmesg. Not sure what the box was
> doing at that time:
>
> Badness in cfq_sort_rr_list at drivers/block/cfq-iosched.c:428
> [<c01e3674>] cfq_add_crq_rb+0xe2/0x144
> [<c01e4215>] cfq_enqueue+0x33/0x5f
> [<c01e42ad>] cfq_insert_request+0x6c/0xe3
> [<c01de2d2>] __elv_add_request+0x3f/0x79
> [<c01e0940>] __make_request+0x402/0x449
It's harmless. I've sent this incremental patch to Andrew that fixes
this error and some others.
This update:
- Removes some debugging code and the dprintk() stuff
- Detects when tagging is used or not, defines a threshold of
CFQ_MAX_TAG (default: 4) for when we change from non-tagged to tagged
operations. This affects accounting.
- Fix a bug where the rr_list was sorted badly. Also make sure we always
keep it sorted, and add some logic to detect when to resort. This
could trigger a harmless WARN_ON at line 428.
- Don't set ->service_start when queue is removed from rr_list, this
could starve a queue if it was quickly readded to the rr_list again.
In that case, we never would hit the 1 second interval for aging the
service_used down.
- Cache jiffies in some function instead of re-reading it 2-3 times.
You can fold this in with the cfq-v2 patch if you want.
Signed-off-by: Jens Axboe <axboe@suse.de>
--- drivers/block/cfq-iosched.c~ 2004-09-13 20:06:30.425042983 +0200
+++ drivers/block/cfq-iosched.c 2004-09-13 20:14:33.934118844 +0200
@@ -22,14 +22,6 @@
#include <linux/rbtree.h>
#include <linux/mempool.h>
-#undef CFQ_DEBUG
-
-#ifdef CFQ_DEBUG
-#define dprintk(fmt, args...) printk(KERN_ERR "cfq: " fmt, ##args)
-#else
-#define dprintk(fmt, args...)
-#endif
-
static unsigned long max_elapsed_crq;
static unsigned long max_elapsed_dispatch;
@@ -84,6 +76,11 @@
#define rq_rb_key(rq) (rq)->sector
/*
+ * threshold for switching off non-tag accounting
+ */
+#define CFQ_MAX_TAG (4)
+
+/*
* sort key types and names
*/
enum {
@@ -125,18 +122,21 @@
sector_t last_sector;
+ int rq_in_driver;
+
/*
* tunables, see top of file
*/
unsigned int cfq_quantum;
unsigned int cfq_queued;
- unsigned int cfq_tagged;
unsigned int cfq_fifo_expire_r;
unsigned int cfq_fifo_expire_w;
unsigned int cfq_fifo_batch_expire;
unsigned int cfq_back_penalty;
unsigned int cfq_back_max;
unsigned int find_best_crq;
+
+ unsigned int cfq_tagged;
};
struct cfq_queue {
@@ -170,14 +170,12 @@
unsigned long service_start;
unsigned long service_used;
+ unsigned int max_rate;
+
/* number of requests that have been handed to the driver */
int in_flight;
/* number of currently allocated requests */
int alloc_limit[2];
-
-#ifdef CFQ_DEBUG
- char name[16];
-#endif
};
struct cfq_rq {
@@ -307,7 +305,7 @@
if (blk_barrier_rq(rq))
break;
-
+
if (distance < abs(s1 - rq->sector + rq->nr_sectors)) {
distance = abs(s1 - rq->sector +rq->nr_sectors);
last = rq->sector + rq->nr_sectors;
@@ -404,11 +402,42 @@
cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
}
-static inline void
-cfq_sort_rr_list(struct cfq_queue *cfqq)
+static int cfq_check_sort_rr_list(struct cfq_queue *cfqq)
+{
+ struct list_head *head = &cfqq->cfqd->rr_list;
+ struct list_head *next, *prev;
+
+ /*
+ * list might still be ordered
+ */
+ next = cfqq->cfq_list.next;
+ if (next != head) {
+ struct cfq_queue *cnext = list_entry_cfqq(next);
+
+ if (cfqq->service_used > cnext->service_used)
+ return 1;
+ }
+
+ prev = cfqq->cfq_list.prev;
+ if (prev != head) {
+ struct cfq_queue *cprev = list_entry_cfqq(prev);
+
+ if (cfqq->service_used < cprev->service_used)
+ return 1;
+ }
+
+ return 0;
+}
+
+static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue)
{
struct list_head *entry = &cfqq->cfqd->rr_list;
+ if (!cfqq->on_rr)
+ return;
+ if (!new_queue && !cfq_check_sort_rr_list(cfqq))
+ return;
+
list_del(&cfqq->cfq_list);
/*
@@ -446,21 +475,16 @@
static inline void
cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
- BUG_ON(cfqq->on_rr);
-
/*
* it's currently on the empty list
*/
- cfq_sort_rr_list(cfqq);
cfqq->on_rr = 1;
cfqd->busy_queues++;
- /*
- * if the queue is on the empty_list, service_start was the time
- * where it was deleted from the rr_list.
- */
if (time_after(jiffies, cfqq->service_start + cfq_service))
cfqq->service_used >>= 3;
+
+ cfq_sort_rr_list(cfqq, 1);
}
static inline void
@@ -468,7 +492,6 @@
{
list_move(&cfqq->cfq_list, &cfqd->empty_list);
cfqq->on_rr = 0;
- cfqq->service_start = jiffies;
BUG_ON(!cfqd->busy_queues);
cfqd->busy_queues--;
@@ -492,10 +515,8 @@
rb_erase(&crq->rb_node, &cfqq->sort_list);
RB_CLEAR_COLOR(&crq->rb_node);
- if (RB_EMPTY(&cfqq->sort_list) && cfqq->on_rr) {
- dprintk("moving 0x%p empty_list\n", cfqq);
+ if (RB_EMPTY(&cfqq->sort_list) && cfqq->on_rr)
cfq_del_cfqq_rr(cfqd, cfqq);
- }
}
}
@@ -541,11 +562,8 @@
rb_insert_color(&crq->rb_node, &cfqq->sort_list);
- if (!cfqq->on_rr) {
+ if (!cfqq->on_rr)
cfq_add_cfqq_rr(cfqd, cfqq);
- dprintk("moving to rr list %d\n", cfqd->busy_queues);
- } else
- dprintk("already on rr list %d\n", cfqd->busy_queues);
/*
* check if this request is a better next-serve candidate
@@ -590,11 +608,30 @@
return NULL;
}
-static void cfq_remove_request(request_queue_t *q, struct request *rq)
+/*
+ * make sure the service time gets corrected on reissue of this request
+ */
+static void cfq_requeue_request(request_queue_t *q, struct request *rq)
{
struct cfq_rq *crq = RQ_DATA(rq);
- dprintk("removing 0x%p\n", rq);
+ if (crq) {
+ struct cfq_queue *cfqq = crq->cfq_queue;
+
+ if (cfqq->cfqd->cfq_tagged) {
+ cfqq->service_used--;
+ cfq_sort_rr_list(cfqq, 0);
+ }
+
+ crq->accounted = 0;
+ cfqq->cfqd->rq_in_driver--;
+ }
+ list_add(&rq->queuelist, &q->queue_head);
+}
+
+static void cfq_remove_request(request_queue_t *q, struct request *rq)
+{
+ struct cfq_rq *crq = RQ_DATA(rq);
if (crq) {
cfq_remove_merge_hints(q, crq);
@@ -730,20 +767,21 @@
struct cfq_data *cfqd = cfqq->cfqd;
const int reads = !list_empty(&cfqq->fifo[0]);
const int writes = !list_empty(&cfqq->fifo[1]);
+ unsigned long now = jiffies;
struct cfq_rq *crq;
- if (jiffies - cfqq->last_fifo_expire < cfqd->cfq_fifo_batch_expire)
+ if (time_before(now, cfqq->last_fifo_expire + cfqd->cfq_fifo_batch_expire))
return NULL;
crq = RQ_DATA(list_entry(cfqq->fifo[0].next, struct request, queuelist));
- if (reads && time_after(jiffies, crq->queue_start + cfqd->cfq_fifo_expire_r)) {
- cfqq->last_fifo_expire = jiffies;
+ if (reads && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) {
+ cfqq->last_fifo_expire = now;
return crq;
}
crq = RQ_DATA(list_entry(cfqq->fifo[1].next, struct request, queuelist));
- if (writes && time_after(jiffies, crq->queue_start + cfqd->cfq_fifo_expire_w)) {
- cfqq->last_fifo_expire = jiffies;
+ if (writes && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) {
+ cfqq->last_fifo_expire = now;
return crq;
}
@@ -822,7 +860,8 @@
static inline void cfq_account_dispatch(struct cfq_rq *crq)
{
struct cfq_queue *cfqq = crq->cfq_queue;
- unsigned long elapsed = jiffies - crq->queue_start;
+ struct cfq_data *cfqd = cfqq->cfqd;
+ unsigned long now, elapsed;
/*
* accounted bit is necessary since some drivers will call
@@ -831,52 +870,62 @@
if (crq->accounted)
return;
+ now = jiffies;
+ if (cfqq->service_start == ~0UL)
+ cfqq->service_start = now;
+
/*
* on drives with tagged command queueing, command turn-around time
* doesn't necessarily reflect the time spent processing this very
* command inside the drive. so do the accounting differently there,
* by just sorting on the number of requests
*/
- if (cfqq->cfqd->cfq_tagged) {
- if (time_after(jiffies, cfqq->service_start + cfq_service)) {
- cfqq->service_start = jiffies;
+ if (cfqd->cfq_tagged) {
+ if (time_after(now, cfqq->service_start + cfq_service)) {
+ cfqq->service_start = now;
cfqq->service_used /= 10;
}
cfqq->service_used++;
+ cfq_sort_rr_list(cfqq, 0);
}
+ elapsed = now - crq->queue_start;
if (elapsed > max_elapsed_dispatch)
max_elapsed_dispatch = elapsed;
crq->accounted = 1;
- crq->service_start = jiffies;
+ crq->service_start = now;
+
+ if (++cfqd->rq_in_driver >= CFQ_MAX_TAG && !cfqd->cfq_tagged) {
+ cfqq->cfqd->cfq_tagged = 1;
+ printk("cfq: depth %d reached, tagging now on\n", CFQ_MAX_TAG);
+ }
}
static inline void
cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq)
{
- unsigned long start_val = cfqq->service_used;
+ struct cfq_data *cfqd = cfqq->cfqd;
+
+ WARN_ON(!cfqd->rq_in_driver);
+ cfqd->rq_in_driver--;
- if (!cfqq->cfqd->cfq_tagged) {
- unsigned long duration = jiffies - crq->service_start;
+ if (!cfqd->cfq_tagged) {
+ unsigned long now = jiffies;
+ unsigned long duration = now - crq->service_start;
- if (time_after(jiffies, cfqq->service_start + cfq_service)) {
- cfqq->service_start = jiffies;
+ if (time_after(now, cfqq->service_start + cfq_service)) {
+ cfqq->service_start = now;
cfqq->service_used >>= 3;
}
cfqq->service_used += duration;
+ cfq_sort_rr_list(cfqq, 0);
if (duration > max_elapsed_crq)
max_elapsed_crq = duration;
}
-
- /*
- * make sure list stays properly sorted, but only do so if necessary
- */
- if (cfqq->on_rr && cfqq->service_used != start_val)
- cfq_sort_rr_list(cfqq);
}
static struct request *cfq_next_request(request_queue_t *q)
@@ -913,13 +962,9 @@
{
BUG_ON(!atomic_read(&cfqq->ref));
- dprintk("cfq_put_queue 0x%p, ref\n", atomic_read(&cfqq->ref));
-
if (!atomic_dec_and_test(&cfqq->ref))
return;
- dprintk("killing queue 0x%p/%s\n", cfqq, cfqq->name);
-
BUG_ON(rb_first(&cfqq->sort_list));
BUG_ON(cfqq->on_rr);
@@ -1163,11 +1208,8 @@
hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
atomic_set(&cfqq->ref, 0);
cfqq->cfqd = cfqd;
-#ifdef CFQ_DEBUG
- strncpy(cfqq->name, current->comm, sizeof(cfqq->name)-1);
-#endif
- dprintk("cfqq set up for 0x%p/%s\n", cfqq, cfqq->name);
cfqq->key_type = cfqd->key_type;
+ cfqq->service_start = ~0UL;
}
if (new_cfqq)
@@ -1212,17 +1254,14 @@
switch (where) {
case ELEVATOR_INSERT_BACK:
- dprintk("adding back 0x%p\n", rq);
while (cfq_dispatch_requests(q, cfqd->cfq_quantum))
;
list_add_tail(&rq->queuelist, &q->queue_head);
break;
case ELEVATOR_INSERT_FRONT:
- dprintk("adding front 0x%p\n", rq);
list_add(&rq->queuelist, &q->queue_head);
break;
case ELEVATOR_INSERT_SORT:
- dprintk("adding sort 0x%p\n", rq);
BUG_ON(!blk_fs_request(rq));
cfq_enqueue(cfqd, crq);
break;
@@ -1511,7 +1599,6 @@
cfqd->cfq_back_max = cfq_back_max;
cfqd->cfq_back_penalty = cfq_back_penalty;
- dprintk("cfq on queue 0x%p\n", q);
return 0;
out_spare:
mempool_destroy(cfqd->crq_pool);
@@ -1632,7 +1719,7 @@
}
len += sprintf(page+len, " busy queues total: %d\n", i);
queues = i;
-
+
len += sprintf(page+len, "Empty queue list:\n");
i = 0;
list_for_each(entry, &cfqd->empty_list) {
@@ -1654,7 +1741,6 @@
}
SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum);
SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued);
-SHOW_FUNCTION(cfq_tagged_show, cfqd->cfq_tagged);
SHOW_FUNCTION(cfq_fifo_expire_r_show, cfqd->cfq_fifo_expire_r);
SHOW_FUNCTION(cfq_fifo_expire_w_show, cfqd->cfq_fifo_expire_w);
SHOW_FUNCTION(cfq_fifo_batch_expire_show, cfqd->cfq_fifo_batch_expire);
@@ -1675,7 +1761,6 @@
}
STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX);
STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX);
-STORE_FUNCTION(cfq_tagged_store, &cfqd->cfq_tagged, 0, 1);
STORE_FUNCTION(cfq_fifo_expire_r_store, &cfqd->cfq_fifo_expire_r, 1, UINT_MAX);
STORE_FUNCTION(cfq_fifo_expire_w_store, &cfqd->cfq_fifo_expire_w, 1, UINT_MAX);
STORE_FUNCTION(cfq_fifo_batch_expire_store, &cfqd->cfq_fifo_batch_expire, 0, UINT_MAX);
@@ -1694,11 +1779,6 @@
.show = cfq_queued_show,
.store = cfq_queued_store,
};
-static struct cfq_fs_entry cfq_tagged_entry = {
- .attr = {.name = "tagged", .mode = S_IRUGO | S_IWUSR },
- .show = cfq_tagged_show,
- .store = cfq_tagged_store,
-};
static struct cfq_fs_entry cfq_fifo_expire_r_entry = {
.attr = {.name = "fifo_expire_sync", .mode = S_IRUGO | S_IWUSR },
.show = cfq_fifo_expire_r_show,
@@ -1746,7 +1826,6 @@
static struct attribute *default_attrs[] = {
&cfq_quantum_entry.attr,
&cfq_queued_entry.attr,
- &cfq_tagged_entry.attr,
&cfq_fifo_expire_r_entry.attr,
&cfq_fifo_expire_w_entry.attr,
&cfq_fifo_batch_expire_entry.attr,
@@ -1805,6 +1884,7 @@
.elevator_next_req_fn = cfq_next_request,
.elevator_add_req_fn = cfq_insert_request,
.elevator_remove_req_fn = cfq_remove_request,
+ .elevator_requeue_req_fn = cfq_requeue_request,
.elevator_queue_empty_fn = cfq_queue_empty,
.elevator_completed_req_fn = cfq_completed_request,
.elevator_former_req_fn = cfq_former_request,
--
Jens Axboe
prev parent reply other threads:[~2004-09-15 6:45 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-09-15 5:35 Badness in cfq_sort_rr_list at drivers/block/cfq-iosched.c:428 Norberto Bensa
2004-09-15 6:43 ` Jens Axboe [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20040915064327.GE2304@suse.de \
--to=axboe@suse.de \
--cc=linux-kernel@vger.kernel.org \
--cc=norberto+linux-kernel@bensa.ath.cx \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.