* [RFC] scsi_debug: locks and delays
@ 2014-06-19 1:53 Douglas Gilbert
2014-06-24 1:02 ` Elliott, Robert (Server Storage)
0 siblings, 1 reply; 2+ messages in thread
From: Douglas Gilbert @ 2014-06-19 1:53 UTC (permalink / raw)
To: SCSI development list
[-- Attachment #1: Type: text/plain, Size: 1001 bytes --]
Currently the scsi_debug driver wraps every queued command
with the host_lock and every mid-level completion callback
(apart from delay=0) with a spinlock.
Attached is a patch against Linus's tree that also applies
to lk 3.15.1 . It attempts to address some of these issues.
ChangeLog
- 'host_lock' option added that simply drops that lock
when host_lock=0 (which is the default)
- allow delay=-1 [delay=1 (jiffy) is still the default]
that uses a tasklet to schedule the response quickly
- for completions (when delay!=0) the callback to the
mid-level is un-(spin)locked
- completions are counted; can be viewed with
cat /proc/scsi/scsi_debug/<host_num>
- delay_override removed from TEST UNIT READY.
This makes 'sg_turs -n 1m -t /dev/bsg/<hctl>' a more
realistic test of command overhead. I get about 100k
IOPS on my laptop.
This patch has been lightly tested. Perhaps someone could
throw a scsi-mq test at it.
Comments welcome.
Doug Gilbert
[-- Attachment #2: sdebug_hlock2.patch --]
[-- Type: text/x-patch, Size: 11020 bytes --]
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 1328a26..6e66fe9 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -42,6 +42,9 @@
#include <linux/scatterlist.h>
#include <linux/blkdev.h>
#include <linux/crc-t10dif.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/atomic.h>
#include <net/checksum.h>
@@ -120,6 +123,7 @@ static const char * scsi_debug_version_date = "20100324";
#define DEF_VIRTUAL_GB 0
#define DEF_VPD_USE_HOSTNO 1
#define DEF_WRITESAME_LENGTH 0xFFFF
+#define DEF_HOST_LOCK 0
/* bit mask values for scsi_debug_opts */
#define SCSI_DEBUG_OPT_NOISE 1
@@ -198,8 +202,10 @@ static unsigned int scsi_debug_unmap_max_desc = DEF_UNMAP_MAX_DESC;
static unsigned int scsi_debug_write_same_length = DEF_WRITESAME_LENGTH;
static bool scsi_debug_removable = DEF_REMOVABLE;
static bool scsi_debug_clustering;
+static bool scsi_debug_host_lock = DEF_HOST_LOCK;
static int scsi_debug_cmnd_count = 0;
+static atomic_t scsi_debug_completions;
#define DEV_READONLY(TGT) (0)
@@ -254,6 +260,7 @@ typedef void (* done_funct_t) (struct scsi_cmnd *);
struct sdebug_queued_cmd {
int in_use;
struct timer_list cmnd_timer;
+ struct tasklet_struct tlet;
done_funct_t done_funct;
struct scsi_cmnd * a_cmnd;
int scsi_result;
@@ -2365,32 +2372,38 @@ static int resp_xdwriteread(struct scsi_cmnd *scp, unsigned long long lba,
return 0;
}
-/* When timer goes off this function is called. */
-static void timer_intr_handler(unsigned long indx)
+/* When timer or tasket goes off this function is called. */
+static void completion_handler(unsigned long indx)
{
struct sdebug_queued_cmd * sqcp;
unsigned long iflags;
+ done_funct_t df;
+ struct scsi_cmnd *scp;
+ atomic_inc(&scsi_debug_completions);
if (indx >= scsi_debug_max_queue) {
- printk(KERN_ERR "scsi_debug:timer_intr_handler: indx too "
+ printk(KERN_ERR "scsi_debug:completion_handler: index too "
"large\n");
return;
}
spin_lock_irqsave(&queued_arr_lock, iflags);
sqcp = &queued_arr[(int)indx];
if (! sqcp->in_use) {
- printk(KERN_ERR "scsi_debug:timer_intr_handler: Unexpected "
- "interrupt\n");
+ printk(KERN_ERR "scsi_debug:completion_handler: Unexpected "
+ "completion\n");
spin_unlock_irqrestore(&queued_arr_lock, iflags);
return;
}
+ scp = sqcp->a_cmnd;
+ df = sqcp->done_funct;
+ if (df && scp)
+ scp->result = sqcp->scsi_result;
sqcp->in_use = 0;
- if (sqcp->done_funct) {
- sqcp->a_cmnd->result = sqcp->scsi_result;
- sqcp->done_funct(sqcp->a_cmnd); /* callback to mid level */
- }
+ sqcp->a_cmnd = NULL;
sqcp->done_funct = NULL;
spin_unlock_irqrestore(&queued_arr_lock, iflags);
+ if (df && scp)
+ df(scp); /* callback to mid level */
}
@@ -2516,7 +2529,10 @@ static int stop_queued_cmnd(struct scsi_cmnd *cmnd)
for (k = 0; k < scsi_debug_max_queue; ++k) {
sqcp = &queued_arr[k];
if (sqcp->in_use && (cmnd == sqcp->a_cmnd)) {
- del_timer_sync(&sqcp->cmnd_timer);
+ if (scsi_debug_delay > 0)
+ del_timer_sync(&sqcp->cmnd_timer);
+ else if (scsi_debug_delay < 0)
+ tasklet_kill(&sqcp->tlet);
sqcp->in_use = 0;
sqcp->a_cmnd = NULL;
break;
@@ -2537,7 +2553,10 @@ static void stop_all_queued(void)
for (k = 0; k < scsi_debug_max_queue; ++k) {
sqcp = &queued_arr[k];
if (sqcp->in_use && sqcp->a_cmnd) {
- del_timer_sync(&sqcp->cmnd_timer);
+ if (scsi_debug_delay > 0)
+ del_timer_sync(&sqcp->cmnd_timer);
+ else if (scsi_debug_delay < 0)
+ tasklet_kill(&sqcp->tlet);
sqcp->in_use = 0;
sqcp->a_cmnd = NULL;
}
@@ -2642,7 +2661,8 @@ static void __init init_all_queued(void)
spin_lock_irqsave(&queued_arr_lock, iflags);
for (k = 0; k < scsi_debug_max_queue; ++k) {
sqcp = &queued_arr[k];
- init_timer(&sqcp->cmnd_timer);
+ if (scsi_debug_delay > 0)
+ init_timer(&sqcp->cmnd_timer);
sqcp->in_use = 0;
sqcp->a_cmnd = NULL;
}
@@ -2720,7 +2740,7 @@ static int schedule_resp(struct scsi_cmnd * cmnd,
(SCSI_SENSE_BUFFERSIZE > SDEBUG_SENSE_LEN) ?
SDEBUG_SENSE_LEN : SCSI_SENSE_BUFFERSIZE);
}
- if (delta_jiff <= 0) {
+ if (delta_jiff == 0) {
if (cmnd)
cmnd->result = scsi_result;
if (done)
@@ -2746,10 +2766,15 @@ static int schedule_resp(struct scsi_cmnd * cmnd,
sqcp->a_cmnd = cmnd;
sqcp->scsi_result = scsi_result;
sqcp->done_funct = done;
- sqcp->cmnd_timer.function = timer_intr_handler;
- sqcp->cmnd_timer.data = k;
- sqcp->cmnd_timer.expires = jiffies + delta_jiff;
- add_timer(&sqcp->cmnd_timer);
+ if (delta_jiff > 0) {
+ sqcp->cmnd_timer.function = completion_handler;
+ sqcp->cmnd_timer.data = k;
+ sqcp->cmnd_timer.expires = jiffies + delta_jiff;
+ add_timer(&sqcp->cmnd_timer);
+ } else {
+ tasklet_init(&sqcp->tlet, completion_handler, k);
+ tasklet_schedule(&sqcp->tlet);
+ }
spin_unlock_irqrestore(&queued_arr_lock, iflags);
if (cmnd)
cmnd->result = 0;
@@ -2773,6 +2798,7 @@ module_param_named(dsense, scsi_debug_dsense, int, S_IRUGO | S_IWUSR);
module_param_named(every_nth, scsi_debug_every_nth, int, S_IRUGO | S_IWUSR);
module_param_named(fake_rw, scsi_debug_fake_rw, int, S_IRUGO | S_IWUSR);
module_param_named(guard, scsi_debug_guard, uint, S_IRUGO);
+module_param_named(host_lock, scsi_debug_host_lock, bool, S_IRUGO | S_IWUSR);
module_param_named(lbpu, scsi_debug_lbpu, int, S_IRUGO);
module_param_named(lbpws, scsi_debug_lbpws, int, S_IRUGO);
module_param_named(lbpws10, scsi_debug_lbpws10, int, S_IRUGO);
@@ -2809,7 +2835,7 @@ MODULE_VERSION(SCSI_DEBUG_VERSION);
MODULE_PARM_DESC(add_host, "0..127 hosts allowed(def=1)");
MODULE_PARM_DESC(ato, "application tag ownership: 0=disk 1=host (def=1)");
MODULE_PARM_DESC(clustering, "when set enables larger transfers (def=0)");
-MODULE_PARM_DESC(delay, "# of jiffies to delay response(def=1)");
+MODULE_PARM_DESC(delay, "response delay in jiffies (def=1); 0:imm, -1:tiny");
MODULE_PARM_DESC(dev_size_mb, "size in MB of ram shared by devs(def=8)");
MODULE_PARM_DESC(dif, "data integrity field type: 0-3 (def=0)");
MODULE_PARM_DESC(dix, "data integrity extensions mask (def=0)");
@@ -2817,6 +2843,7 @@ MODULE_PARM_DESC(dsense, "use descriptor sense format(def=0 -> fixed)");
MODULE_PARM_DESC(every_nth, "timeout every nth command(def=0)");
MODULE_PARM_DESC(fake_rw, "fake reads/writes instead of copying (def=0)");
MODULE_PARM_DESC(guard, "protection checksum: 0=crc, 1=ip (def=0)");
+MODULE_PARM_DESC(host_lock, "use host_lock around all commands (def=0)");
MODULE_PARM_DESC(lbpu, "enable LBP, support UNMAP command (def=0)");
MODULE_PARM_DESC(lbpws, "enable LBP, support WRITE SAME(16) with UNMAP bit (def=0)");
MODULE_PARM_DESC(lbpws10, "enable LBP, support WRITE SAME(10) with UNMAP bit (def=0)");
@@ -2881,7 +2908,7 @@ static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host)
"%s [%s]\n"
"num_tgts=%d, shared (ram) size=%d MB, opts=0x%x, "
"every_nth=%d(curr:%d)\n"
- "delay=%d, max_luns=%d, scsi_level=%d\n"
+ "delay=%d, max_luns=%d, scsi_level=%d, completions=%d\n"
"sector_size=%d bytes, cylinders=%d, heads=%d, sectors=%d\n"
"number of aborts=%d, device_reset=%d, bus_resets=%d, "
"host_resets=%d\ndix_reads=%d dix_writes=%d dif_errors=%d\n",
@@ -2889,6 +2916,7 @@ static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host)
scsi_debug_dev_size_mb, scsi_debug_opts, scsi_debug_every_nth,
scsi_debug_cmnd_count, scsi_debug_delay,
scsi_debug_max_luns, scsi_debug_scsi_level,
+ atomic_read(&scsi_debug_completions),
scsi_debug_sector_size, sdebug_cylinders_per, sdebug_heads,
sdebug_sectors_per, num_aborts, num_dev_resets, num_bus_resets,
num_host_resets, dix_reads, dix_writes, dif_errors);
@@ -2907,7 +2935,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf,
char work[20];
if (1 == sscanf(buf, "%10s", work)) {
- if ((1 == sscanf(work, "%d", &delay)) && (delay >= 0)) {
+ if (1 == sscanf(work, "%d", &delay)) {
scsi_debug_delay = delay;
return count;
}
@@ -3234,6 +3262,24 @@ static ssize_t removable_store(struct device_driver *ddp, const char *buf,
}
static DRIVER_ATTR_RW(removable);
+static ssize_t host_lock_show(struct device_driver *ddp, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%d\n", !!scsi_debug_host_lock);
+}
+static ssize_t host_lock_store(struct device_driver *ddp, const char *buf,
+ size_t count)
+{
+ int n;
+
+ if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n >= 0)) {
+ scsi_debug_host_lock = (n > 0);
+ return count;
+ }
+ return -EINVAL;
+}
+static DRIVER_ATTR_RW(host_lock);
+
+
/* Note: The following array creates attribute files in the
/sys/bus/pseudo/drivers/scsi_debug directory. The advantage of these
files (over those found in the /sys/module/scsi_debug/parameters
@@ -3266,6 +3312,7 @@ static struct attribute *sdebug_drv_attrs[] = {
&driver_attr_ato.attr,
&driver_attr_map.attr,
&driver_attr_removable.attr,
+ &driver_attr_host_lock.attr,
NULL,
};
ATTRIBUTE_GROUPS(sdebug_drv);
@@ -3570,7 +3617,7 @@ static void sdebug_remove_adapter(void)
}
static
-int scsi_debug_queuecommand_lck(struct scsi_cmnd *SCpnt, done_funct_t done)
+int scsi_debug_queuecommand_int(struct scsi_cmnd *SCpnt, done_funct_t done)
{
unsigned char *cmd = (unsigned char *) SCpnt->cmnd;
int len, k;
@@ -3678,7 +3725,7 @@ int scsi_debug_queuecommand_lck(struct scsi_cmnd *SCpnt, done_funct_t done)
errsts = check_readiness(SCpnt, 1, devip);
break;
case TEST_UNIT_READY: /* mandatory */
- delay_override = 1;
+ /* delay_override = 1; */
errsts = check_readiness(SCpnt, 0, devip);
break;
case RESERVE:
@@ -3926,7 +3973,20 @@ write:
(delay_override ? 0 : scsi_debug_delay));
}
-static DEF_SCSI_QCMD(scsi_debug_queuecommand)
+static int
+scsi_debug_queuecommand_choose(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
+{
+ unsigned long irq_flags;
+ int rc;
+
+ if (scsi_debug_host_lock) {
+ spin_lock_irqsave(shost->host_lock, irq_flags);
+ rc = scsi_debug_queuecommand_int(cmd, cmd->scsi_done);
+ spin_unlock_irqrestore(shost->host_lock, irq_flags);
+ return rc;
+ } else
+ return scsi_debug_queuecommand_int(cmd, cmd->scsi_done);
+}
static struct scsi_host_template sdebug_driver_template = {
.show_info = scsi_debug_show_info,
@@ -3938,7 +3998,7 @@ static struct scsi_host_template sdebug_driver_template = {
.slave_configure = scsi_debug_slave_configure,
.slave_destroy = scsi_debug_slave_destroy,
.ioctl = scsi_debug_ioctl,
- .queuecommand = scsi_debug_queuecommand,
+ .queuecommand = scsi_debug_queuecommand_choose,
.eh_abort_handler = scsi_debug_abort,
.eh_bus_reset_handler = scsi_debug_bus_reset,
.eh_device_reset_handler = scsi_debug_device_reset,
@@ -4032,7 +4092,7 @@ static int sdebug_driver_probe(struct device * dev)
} else
scsi_scan_host(hpnt);
-
+ atomic_set(&scsi_debug_completions, 0);
return error;
}
^ permalink raw reply related [flat|nested] 2+ messages in thread
* RE: [RFC] scsi_debug: locks and delays
2014-06-19 1:53 [RFC] scsi_debug: locks and delays Douglas Gilbert
@ 2014-06-24 1:02 ` Elliott, Robert (Server Storage)
0 siblings, 0 replies; 2+ messages in thread
From: Elliott, Robert (Server Storage) @ 2014-06-24 1:02 UTC (permalink / raw)
To: dgilbert@interlog.com, SCSI development list
> -----Original Message-----
> From: linux-scsi-owner@vger.kernel.org [mailto:linux-scsi-
> owner@vger.kernel.org] On Behalf Of Douglas Gilbert
> Sent: Wednesday, 18 June, 2014 8:54 PM
> To: SCSI development list
> Subject: [RFC] scsi_debug: locks and delays
>
> Currently the scsi_debug driver wraps every queued command
> with the host_lock and every mid-level completion callback
> (apart from delay=0) with a spinlock.
>
> Attached is a patch against Linus's tree that also applies
> to lk 3.15.1 . It attempts to address some of these issues.
>
> ChangeLog
> - 'host_lock' option added that simply drops that lock
> when host_lock=0 (which is the default)
> - allow delay=-1 [delay=1 (jiffy) is still the default]
> that uses a tasklet to schedule the response quickly
> - for completions (when delay!=0) the callback to the
> mid-level is un-(spin)locked
> - completions are counted; can be viewed with
> cat /proc/scsi/scsi_debug/<host_num>
> - delay_override removed from TEST UNIT READY.
> This makes 'sg_turs -n 1m -t /dev/bsg/<hctl>' a more
> realistic test of command overhead. I get about 100k
> IOPS on my laptop.
>
> This patch has been lightly tested. Perhaps someone could
> throw a scsi-mq test at it.
>
> Comments welcome.
>
> Doug Gilbert
This is indeed much better with scsi-mq than without scsi-mq,
which was exhibiting 90K to 171K IOPS on various systems
when I last tried it.
Using one scsi_debug device in fake_rw mode, 4 KiB random
reads get these results:
236K IOPS 1 thread
1081K IOPS 6 threads
1205K IOPS 6 threads, iodepth_batch=4 iodepth_batch_complete=4
1270K IOPS 6 threads, iodepth_batch=16 iodepth_batch_complete=16
Toying with a few sysfs properties:
* add_random=1 drops from 1270K to 1170K IOPS.
* nomerges=0, 1, or 2 doesn't matter. The average queue size
never exceeds 3 because they complete so fast, so there's not
much searching going on.
I created the device with:
modprobe scsi_debug fake_rw=1 delay=0 physblk_exp=3 dev_size_mb=128
fio options (adjust as needed):
$ cat 1drive_sdah.fio
[global]
direct=1
ioengine=libaio
norandommap
randrepeat=0
bs=4096
iodepth=96
numjobs=1
#numjobs=6
runtime=216000
time_based=1
group_reporting
thread
gtod_reduce=1
#iodepth_batch=4
#iodepth_batch_complete=4
cpus_allowed=0-5
cpus_allowed_policy=split
rw=randread
[4_KiB_RR_drive_ah]
filename=/dev/sdah
---
Rob Elliott HP Server Storage
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2014-06-24 1:03 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-06-19 1:53 [RFC] scsi_debug: locks and delays Douglas Gilbert
2014-06-24 1:02 ` Elliott, Robert (Server Storage)
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox