From: Jens Axboe <jens.axboe@oracle.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Cc: chris.mason@oracle.com, david@fromorbit.com, hch@infradead.org,
akpm@linux-foundation.org, jack@suse.cz,
yanmin_zhang@linux.intel.com, Jens Axboe <jens.axboe@oracle.com>
Subject: [PATCH 10/13] block: add function for waiting for a specific free tag
Date: Mon, 25 May 2009 09:31:02 +0200 [thread overview]
Message-ID: <1243236668-3398-20-git-send-email-jens.axboe@oracle.com> (raw)
In-Reply-To: <1243236668-3398-1-git-send-email-jens.axboe@oracle.com>
We need this in libata to ensure that we don't race between internal
tag usage and the block layer usage.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
block/blk-tag.c | 99 ++++++++++++++++++++++++++++++++-------------
drivers/ata/libata-core.c | 29 +++++++++----
include/linux/blkdev.h | 3 +
3 files changed, 95 insertions(+), 36 deletions(-)
diff --git a/block/blk-tag.c b/block/blk-tag.c
index e9a7501..208468b 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -149,6 +149,7 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
goto fail;
atomic_set(&tags->refcnt, 1);
+ init_waitqueue_head(&tags->waitq);
return tags;
fail:
kfree(tags);
@@ -264,6 +265,65 @@ int blk_queue_resize_tags(struct request_queue *q, int new_depth)
}
EXPORT_SYMBOL(blk_queue_resize_tags);
+void blk_queue_acquire_tag(struct request_queue *q, int tag)
+{
+ struct blk_queue_tag *bqt;
+
+ if (!blk_queue_tagged(q) || !q->queue_tags)
+ return;
+
+ bqt = q->queue_tags;
+ do {
+ DEFINE_WAIT(wait);
+
+ if (!test_and_set_bit_lock(tag, bqt->tag_map))
+ break;
+
+ prepare_to_wait(&bqt->waitq, &wait, TASK_UNINTERRUPTIBLE);
+ if (test_and_set_bit_lock(tag, bqt->tag_map)) {
+ spin_unlock_irq(q->queue_lock);
+ schedule();
+ spin_lock_irq(q->queue_lock);
+ }
+ finish_wait(&bqt->waitq, &wait);
+ } while (1);
+}
+
+void blk_queue_release_tag(struct request_queue *q, int tag)
+{
+ struct blk_queue_tag *bqt = q->queue_tags;
+
+ if (!blk_queue_tagged(q))
+ return;
+
+ /*
+ * Normally we store a request pointer in the tag index, but for
+ * blk_queue_acquire_tag() usage, we may not have something specific
+ * assigned to the tag slot. In any case, be safe and clear it.
+ */
+ bqt->tag_index[tag] = NULL;
+
+ if (unlikely(!test_bit(tag, bqt->tag_map))) {
+ printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
+ __func__, tag);
+ return;
+ }
+ /*
+ * The tag_map bit acts as a lock for tag_index[bit], so we need
+ * unlock memory barrier semantics.
+ */
+ clear_bit_unlock(tag, bqt->tag_map);
+
+ /*
+ * We don't need a memory barrier here, since we have the bit lock
+ * ordering above. Otherwise it would need an smp_mb();
+ */
+ if (waitqueue_active(&bqt->waitq))
+ wake_up(&bqt->waitq);
+
+}
+EXPORT_SYMBOL(blk_queue_release_tag);
+
/**
* blk_queue_end_tag - end tag operations for a request
* @q: the request queue for the device
@@ -285,33 +345,17 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
BUG_ON(tag == -1);
- if (unlikely(tag >= bqt->real_max_depth))
- /*
- * This can happen after tag depth has been reduced.
- * FIXME: how about a warning or info message here?
- */
- return;
-
- list_del_init(&rq->queuelist);
- rq->cmd_flags &= ~REQ_QUEUED;
- rq->tag = -1;
-
- if (unlikely(bqt->tag_index[tag] == NULL))
- printk(KERN_ERR "%s: tag %d is missing\n",
- __func__, tag);
-
- bqt->tag_index[tag] = NULL;
-
- if (unlikely(!test_bit(tag, bqt->tag_map))) {
- printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
- __func__, tag);
- return;
- }
/*
- * The tag_map bit acts as a lock for tag_index[bit], so we need
- * unlock memory barrier semantics.
+ * When the tag depth is being reduced, we don't wait for higher tags
+ * to finish. So we could see this triggering without it being an error.
*/
- clear_bit_unlock(tag, bqt->tag_map);
+ if (tag < bqt->real_max_depth) {
+ list_del_init(&rq->queuelist);
+ rq->cmd_flags &= ~REQ_QUEUED;
+ rq->tag = -1;
+
+ blk_queue_release_tag(q, tag);
+ }
}
EXPORT_SYMBOL(blk_queue_end_tag);
@@ -336,8 +380,7 @@ EXPORT_SYMBOL(blk_queue_end_tag);
int blk_queue_start_tag(struct request_queue *q, struct request *rq)
{
struct blk_queue_tag *bqt = q->queue_tags;
- unsigned max_depth;
- int tag;
+ int max_depth, tag;
if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
printk(KERN_ERR
@@ -371,7 +414,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
} while (test_and_set_bit_lock(tag, bqt->tag_map));
/*
* We need lock ordering semantics given by test_and_set_bit_lock.
- * See blk_queue_end_tag for details.
+ * See blk_queue_release_tag() for details.
*/
rq->cmd_flags |= REQ_QUEUED;
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 74c9055..5e43f2b 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -61,6 +61,7 @@
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
#include <linux/libata.h>
#include <asm/byteorder.h>
#include <linux/cdrom.h>
@@ -1765,15 +1766,14 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
u32 preempted_sactive, preempted_qc_active;
int preempted_nr_active_links;
DECLARE_COMPLETION_ONSTACK(wait);
- unsigned long flags;
unsigned int err_mask;
int rc;
- spin_lock_irqsave(ap->lock, flags);
+ spin_lock_irq(ap->lock);
/* no internal command while frozen */
if (ap->pflags & ATA_PFLAG_FROZEN) {
- spin_unlock_irqrestore(ap->lock, flags);
+ spin_unlock_irq(ap->lock);
return AC_ERR_SYSTEM;
}
@@ -1789,6 +1789,16 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
else
tag = 0;
+ /*
+ * We could be racing with the tag freeing in the block layer, so
+ * we need to ensure that our tag is free.
+ */
+ if (dev->sdev && dev->sdev->request_queue)
+ blk_queue_acquire_tag(dev->sdev->request_queue, tag);
+
+ /*
+ * The tag is now ours
+ */
qc = __ata_qc_from_tag(ap, tag);
qc->tag = tag;
@@ -1828,7 +1838,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
ata_qc_issue(qc);
- spin_unlock_irqrestore(ap->lock, flags);
+ spin_unlock_irq(ap->lock);
if (!timeout) {
if (ata_probe_timeout)
@@ -1844,7 +1854,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
ata_port_flush_task(ap);
if (!rc) {
- spin_lock_irqsave(ap->lock, flags);
+ spin_lock_irq(ap->lock);
/* We're racing with irq here. If we lose, the
* following test prevents us from completing the qc
@@ -1864,7 +1874,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
"qc timeout (cmd 0x%x)\n", command);
}
- spin_unlock_irqrestore(ap->lock, flags);
+ spin_unlock_irq(ap->lock);
}
/* do post_internal_cmd */
@@ -1884,11 +1894,14 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
}
/* finish up */
- spin_lock_irqsave(ap->lock, flags);
+ spin_lock_irq(ap->lock);
*tf = qc->result_tf;
err_mask = qc->err_mask;
+ if (dev->sdev && dev->sdev->request_queue)
+ blk_queue_release_tag(dev->sdev->request_queue, tag);
+
ata_qc_free(qc);
link->active_tag = preempted_tag;
link->sactive = preempted_sactive;
@@ -1911,7 +1924,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
ata_port_probe(ap);
}
- spin_unlock_irqrestore(ap->lock, flags);
+ spin_unlock_irq(ap->lock);
if ((err_mask & AC_ERR_TIMEOUT) && auto_timeout)
ata_internal_cmd_timed_out(dev, command);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ca322da..f2b6b92 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -307,6 +307,7 @@ struct blk_queue_tag {
int max_depth; /* what we will send to device */
int real_max_depth; /* what the array can hold */
atomic_t refcnt; /* map can be shared */
+ wait_queue_head_t waitq; /* for waiting on tags */
};
#define BLK_SCSI_MAX_CMDS (256)
@@ -929,6 +930,8 @@ extern void blk_put_queue(struct request_queue *);
* tag stuff
*/
#define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED)
+extern void blk_queue_acquire_tag(struct request_queue *, int);
+extern void blk_queue_release_tag(struct request_queue *, int);
extern int blk_queue_start_tag(struct request_queue *, struct request *);
extern struct request *blk_queue_find_tag(struct request_queue *, int);
extern void blk_queue_end_tag(struct request_queue *, struct request *);
--
1.6.3.rc0.1.gf800
next prev parent reply other threads:[~2009-05-25 7:31 UTC|newest]
Thread overview: 59+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-05-25 7:30 [PATCH 0/12] Per-bdi writeback flusher threads #5 Jens Axboe
2009-05-25 7:30 ` [PATCH 01/13] libata: get rid of ATA_MAX_QUEUE loop in ata_qc_complete_multiple() Jens Axboe
2009-05-25 7:30 ` [PATCH 01/12] ntfs: remove old debug check for dirty data in ntfs_put_super() Jens Axboe
2009-05-25 7:30 ` [PATCH 02/13] block: add static rq allocation cache Jens Axboe
2009-05-25 7:30 ` [PATCH 02/12] btrfs: properly register fs backing device Jens Axboe
2009-05-25 7:30 ` [PATCH 03/13] scsi: unify allocation of scsi command and sense buffer Jens Axboe
2009-05-25 7:41 ` Christoph Hellwig
2009-05-25 7:46 ` Jens Axboe
2009-05-25 7:50 ` Christoph Hellwig
2009-05-25 7:54 ` Jens Axboe
2009-05-25 10:33 ` Boaz Harrosh
2009-05-25 10:42 ` Christoph Hellwig
2009-05-25 10:49 ` Jens Axboe
2009-05-26 4:36 ` FUJITA Tomonori
2009-05-26 5:08 ` FUJITA Tomonori
2009-05-25 8:15 ` Pekka Enberg
2009-05-25 11:32 ` Nick Piggin
2009-05-25 9:28 ` Boaz Harrosh
2009-05-26 1:45 ` Roland Dreier
2009-05-26 4:36 ` FUJITA Tomonori
2009-05-26 6:29 ` Jens Axboe
2009-05-26 7:25 ` FUJITA Tomonori
2009-05-26 7:32 ` Jens Axboe
2009-05-26 7:38 ` FUJITA Tomonori
2009-05-26 14:47 ` James Bottomley
2009-05-26 15:13 ` Matthew Wilcox
2009-05-26 15:31 ` FUJITA Tomonori
2009-05-26 16:05 ` Boaz Harrosh
2009-05-27 1:36 ` FUJITA Tomonori
2009-05-27 7:54 ` Boaz Harrosh
2009-05-27 8:26 ` FUJITA Tomonori
2009-05-27 9:11 ` Boaz Harrosh
2009-05-26 16:12 ` Boaz Harrosh
2009-05-26 16:28 ` Boaz Harrosh
2009-05-26 7:56 ` FUJITA Tomonori
2009-05-26 5:23 ` FUJITA Tomonori
2009-05-25 7:30 ` [PATCH 03/12] writeback: move dirty inodes from super_block to backing_dev_info Jens Axboe
2009-05-25 7:30 ` [PATCH 04/13] scsi: get rid of lock in __scsi_put_command() Jens Axboe
2009-05-25 7:30 ` [PATCH 04/12] writeback: switch to per-bdi threads for flushing data Jens Axboe
2009-05-25 7:30 ` [PATCH 05/13] aio: mostly crap Jens Axboe
2009-05-25 9:09 ` Jan Kara
2009-05-25 7:30 ` [PATCH 05/12] writeback: get rid of pdflush completely Jens Axboe
2009-05-25 7:30 ` [PATCH 06/13] block: move elevator ops into the queue Jens Axboe
2009-05-25 7:30 ` [PATCH 06/12] writeback: separate the flushing state/task from the bdi Jens Axboe
2009-05-25 7:30 ` [PATCH 07/13] block: avoid indirect calls to enter cfq io scheduler Jens Axboe
2009-05-26 9:02 ` Nikanth K
2009-05-25 7:30 ` [PATCH 07/12] writeback: support > 1 flusher thread per bdi Jens Axboe
2009-05-25 7:30 ` [PATCH 08/13] block: change the tag sync vs async restriction logic Jens Axboe
2009-05-25 7:30 ` [PATCH 08/12] writeback: include default_backing_dev_info in writeback Jens Axboe
2009-05-25 7:31 ` [PATCH 09/13] libata: switch to using block layer tagging support Jens Axboe
2009-05-25 7:31 ` [PATCH 09/12] writeback: allow sleepy exit of default writeback task Jens Axboe
2009-05-25 7:31 ` Jens Axboe [this message]
2009-05-25 7:31 ` [PATCH 10/12] writeback: add some debug inode list counters to bdi stats Jens Axboe
2009-05-25 7:31 ` [PATCH 11/13] block: disallow merging of read-ahead bits into normal request Jens Axboe
2009-05-25 7:31 ` [PATCH 11/12] writeback: add name to backing_dev_info Jens Axboe
2009-05-25 7:31 ` [PATCH 12/13] block: first cut at implementing a NAPI approach for block devices Jens Axboe
2009-05-25 7:31 ` [PATCH 12/12] writeback: check for registered bdi in flusher add and inode dirty Jens Axboe
2009-05-25 7:31 ` [PATCH 13/13] block: unlocked completion test patch Jens Axboe
2009-05-25 7:33 ` [PATCH 0/12] Per-bdi writeback flusher threads #5 Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1243236668-3398-20-git-send-email-jens.axboe@oracle.com \
--to=jens.axboe@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=chris.mason@oracle.com \
--cc=david@fromorbit.com \
--cc=hch@infradead.org \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=yanmin_zhang@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).