* [PATCH/RFC] allow userspace to modify scsi command filter on per device basis
@ 2008-06-13 19:33 Adel Gadllah
2008-06-13 19:54 ` Matthew Wilcox
0 siblings, 1 reply; 65+ messages in thread
From: Adel Gadllah @ 2008-06-13 19:33 UTC (permalink / raw)
To: linux-scsi; +Cc: pjones, Jens Axboe
Hi,
The attached patch is based on the older one from Peter which I have
ported with his help to current mainline.
It allows userspace to modiy the scsi command filter via sysfs.
I tested it with my PX-755 and was able to send vendor specific
commands to it as a user after configuring it in.
-----------
From: Peter Jones <pjones@redhat.com>
This patch exports the per-gendisk command filter to user space through
sysfs, so it can be changed by the system administrator.
Signed-off-by: Adel Gadllah <adel.gadllah@gmail.com>
Signed-off-by: Peter Jones <pjones@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/block/Makefile b/block/Makefile
index 5a43c7d..717733c 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -4,7 +4,8 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
- blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o
+ blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
+ cmd-filter.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/bsg.c b/block/bsg.c
index f0b7cd3..72f2587 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -44,11 +44,12 @@ struct bsg_device {
char name[BUS_ID_SIZE];
int max_queue;
unsigned long flags;
+ struct blk_scsi_cmd_filter *cmd_filter;
+ mode_t *f_mode;
};
enum {
BSG_F_BLOCK = 1,
- BSG_F_WRITE_PERM = 2,
};
#define BSG_DEFAULT_CMDS 64
@@ -172,7 +173,7 @@ unlock:
}
static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
- struct sg_io_v4 *hdr, int has_write_perm)
+ struct sg_io_v4 *hdr, struct bsg_device *bd)
{
if (hdr->request_len > BLK_MAX_CDB) {
rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
@@ -185,7 +186,7 @@ static int blk_fill_sgv4_hdr_rq(struct
request_queue *q, struct request *rq,
return -EFAULT;
if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
- if (blk_verify_command(rq->cmd, has_write_perm))
+ if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd, bd->f_mode))
return -EPERM;
} else if (!capable(CAP_SYS_RAWIO))
return -EPERM;
@@ -263,8 +264,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
rq = blk_get_request(q, rw, GFP_KERNEL);
if (!rq)
return ERR_PTR(-ENOMEM);
- ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
- &bd->flags));
+ ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd);
if (ret)
goto out;
@@ -566,12 +566,23 @@ static inline void bsg_set_block(struct
bsg_device *bd, struct file *file)
set_bit(BSG_F_BLOCK, &bd->flags);
}
-static inline void bsg_set_write_perm(struct bsg_device *bd, struct file *file)
+static void bsg_set_cmd_filter(struct bsg_device *bd,
+ struct file *file)
{
- if (file->f_mode & FMODE_WRITE)
- set_bit(BSG_F_WRITE_PERM, &bd->flags);
- else
- clear_bit(BSG_F_WRITE_PERM, &bd->flags);
+ struct inode *inode;
+ struct gendisk *disk;
+
+ if (!file)
+ return;
+
+ inode = file->f_dentry->d_inode;
+ if (!inode)
+ return;
+
+ disk = inode->i_bdev->bd_disk;
+
+ bd->cmd_filter = &disk->cmd_filter;
+ bd->f_mode = &file->f_mode;
}
/*
@@ -595,6 +606,8 @@ bsg_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
dprintk("%s: read %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
+ bsg_set_cmd_filter(bd, file);
+
bytes_read = 0;
ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
*ppos = bytes_read;
@@ -668,7 +681,7 @@ bsg_write(struct file *file, const char __user
*buf, size_t count, loff_t *ppos)
dprintk("%s: write %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
- bsg_set_write_perm(bd, file);
+ bsg_set_cmd_filter(bd, file);
bytes_written = 0;
ret = __bsg_write(bd, buf, count, &bytes_written);
@@ -771,7 +784,9 @@ static struct bsg_device *bsg_add_device(struct
inode *inode,
}
bd->queue = rq;
+
bsg_set_block(bd, file);
+ bsg_set_cmd_filter(bd, file);
atomic_set(&bd->ref_count, 1);
mutex_lock(&bsg_mutex);
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
new file mode 100644
index 0000000..50e7154
--- /dev/null
+++ b/block/cmd-filter.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright 2004 Peter M. Jones <pjones@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ *
+ */
+
+#include <linux/list.h>
+#include <linux/genhd.h>
+#include <linux/spinlock.h>
+#include <linux/parser.h>
+#include <linux/capability.h>
+#include <asm/bitops.h>
+
+#include <scsi/scsi.h>
+#include <linux/cdrom.h>
+
+int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
+ unsigned char *cmd, mode_t *f_mode)
+{
+ /* root can do any command. */
+ if (capable(CAP_SYS_RAWIO))
+ return 0;
+
+ /* if there's no filter set, assume we're filtering everything out */
+ if (!filter)
+ return -EPERM;
+
+ /* Anybody who can open the device can do a read-safe command */
+ if (test_bit(cmd[0], filter->read_ok))
+ return 0;
+
+ /* Write-safe commands require a writable open */
+ if (test_bit(cmd[0], filter->write_ok) && (*f_mode & FMODE_WRITE))
+ return 0;
+
+ return -EPERM;
+}
+
+EXPORT_SYMBOL(blk_cmd_filter_verify_command);
+
+int blk_verify_command(struct file *file, unsigned char *cmd)
+{
+ struct gendisk *disk;
+ struct inode *inode;
+
+ if(!file)
+ return -EINVAL;
+
+ inode = file->f_dentry->d_inode;
+ if (!inode)
+ return -EINVAL;
+
+ disk = inode->i_bdev->bd_disk;
+
+ return blk_cmd_filter_verify_command(&disk->cmd_filter, cmd, &file->f_mode);
+}
+
+EXPORT_SYMBOL(blk_verify_command);
+
+/* and now, the sysfs stuff */
+static ssize_t rcf_cmds_show(struct blk_scsi_cmd_filter *filter, char *page,
+ int rw)
+{
+ char *npage = page;
+ unsigned long *okbits;
+ int i;
+
+ if (rw == READ)
+ okbits = filter->read_ok;
+ else
+ okbits = filter->write_ok;
+
+ for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) {
+ if (test_bit(i, okbits)) {
+ sprintf(npage, "%02x", i);
+ npage += 2;
+ if (i < BLK_SCSI_MAX_CMDS - 1)
+ sprintf(npage++, " ");
+ }
+ }
+
+ if (npage != page)
+ npage += sprintf(npage, "\n");
+
+ return npage - page;
+}
+
+static ssize_t rcf_readcmds_show(struct blk_scsi_cmd_filter *filter,
char *page)
+{
+ return rcf_cmds_show(filter, page, READ);
+}
+
+static ssize_t rcf_writecmds_show(struct blk_scsi_cmd_filter
*filter,char *page)
+{
+ return rcf_cmds_show(filter, page, WRITE);
+}
+
+static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count, int rw)
+{
+ ssize_t ret = 0;
+ unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
+ int cmd, status, len;
+ substring_t ss;
+
+ memset(&okbits, 0, sizeof (okbits));
+
+ for (len = strlen(page); len > 0; len -= 3) {
+ if (len < 2)
+ break;
+ ss.from = (char *) page + ret;
+ ss.to = (char *) page + ret + 2;
+ ret+=3;
+ status = match_hex(&ss, &cmd);
+ /* either of these cases means invalid input, so do nothing. */
+ if (status || cmd >= BLK_SCSI_MAX_CMDS)
+ return -EINVAL;
+
+ set_bit(cmd, okbits);
+ }
+
+ if (rw == READ)
+ target_okbits = filter->read_ok;
+ else
+ target_okbits = filter->write_ok;
+
+ memmove(target_okbits, okbits, sizeof (okbits));
+ return count;
+}
+
+static ssize_t rcf_readcmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count)
+{
+ return rcf_cmds_store(filter, page, count, READ);
+}
+
+static ssize_t rcf_writecmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count)
+{
+ return rcf_cmds_store(filter, page, count, WRITE);
+}
+
+struct rcf_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct blk_scsi_cmd_filter *, char *);
+ ssize_t (*store)(struct blk_scsi_cmd_filter *, const char *, size_t);
+};
+
+static struct rcf_sysfs_entry rcf_readcmds_entry = {
+ .attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR },
+ .show = rcf_readcmds_show,
+ .store = rcf_readcmds_store,
+};
+
+static struct rcf_sysfs_entry rcf_writecmds_entry = {
+ .attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR },
+ .show = rcf_writecmds_show,
+ .store = rcf_writecmds_store,
+};
+
+static struct attribute *default_attrs[] = {
+ &rcf_readcmds_entry.attr,
+ &rcf_writecmds_entry.attr,
+ NULL,
+};
+
+#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr)
+
+static ssize_t
+rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct rcf_sysfs_entry *entry = to_rcf(attr);
+ struct blk_scsi_cmd_filter *filter;
+
+ filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+ if (entry->show)
+ return entry->show(filter, page);
+
+ return 0;
+}
+
+static ssize_t
+rcf_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *page, size_t length)
+{
+ struct rcf_sysfs_entry *entry = to_rcf(attr);
+ struct blk_scsi_cmd_filter *filter;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ if (!entry->store)
+ return -EINVAL;
+
+ filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+ return entry->store(filter, page, length);
+}
+
+static struct sysfs_ops rcf_sysfs_ops = {
+ .show = rcf_attr_show,
+ .store = rcf_attr_store,
+};
+
+static struct kobj_type rcf_ktype = {
+ .sysfs_ops = &rcf_sysfs_ops,
+ .default_attrs = default_attrs,
+};
+
+static void rcf_set_defaults(struct blk_scsi_cmd_filter *filter)
+{
+ /* Basic read-only commands */
+ set_bit(TEST_UNIT_READY, filter->read_ok);
+ set_bit(REQUEST_SENSE, filter->read_ok);
+ set_bit(READ_6, filter->read_ok);
+ set_bit(READ_10, filter->read_ok);
+ set_bit(READ_12, filter->read_ok);
+ set_bit(READ_16, filter->read_ok);
+ set_bit(READ_BUFFER, filter->read_ok);
+ set_bit(READ_DEFECT_DATA, filter->read_ok);
+ set_bit(READ_LONG, filter->read_ok);
+ set_bit(INQUIRY, filter->read_ok);
+ set_bit(MODE_SENSE, filter->read_ok);
+ set_bit(MODE_SENSE_10, filter->read_ok);
+ set_bit(LOG_SENSE, filter->read_ok);
+ set_bit(START_STOP, filter->read_ok);
+ set_bit(GPCMD_VERIFY_10, filter->read_ok);
+ set_bit(VERIFY_16, filter->read_ok);
+ set_bit(GPCMD_READ_BUFFER_CAPACITY, filter->read_ok);
+
+ /* Audio CD commands */
+ set_bit(GPCMD_PLAY_CD, filter->read_ok);
+ set_bit(GPCMD_PLAY_AUDIO_10, filter->read_ok);
+ set_bit(GPCMD_PLAY_AUDIO_MSF, filter->read_ok);
+ set_bit(GPCMD_PLAY_AUDIO_TI, filter->read_ok);
+ set_bit(GPCMD_PAUSE_RESUME, filter->read_ok);
+
+ /* CD/DVD data reading */
+ set_bit(GPCMD_READ_CD, filter->read_ok);
+ set_bit(GPCMD_READ_CD_MSF, filter->read_ok);
+ set_bit(GPCMD_READ_DISC_INFO, filter->read_ok);
+ set_bit(GPCMD_READ_CDVD_CAPACITY, filter->read_ok);
+ set_bit(GPCMD_READ_DVD_STRUCTURE, filter->read_ok);
+ set_bit(GPCMD_READ_HEADER, filter->read_ok);
+ set_bit(GPCMD_READ_TRACK_RZONE_INFO, filter->read_ok);
+ set_bit(GPCMD_READ_SUBCHANNEL, filter->read_ok);
+ set_bit(GPCMD_READ_TOC_PMA_ATIP, filter->read_ok);
+ set_bit(GPCMD_REPORT_KEY, filter->read_ok);
+ set_bit(GPCMD_SCAN, filter->read_ok);
+ set_bit(GPCMD_GET_CONFIGURATION, filter->read_ok);
+ set_bit(GPCMD_READ_FORMAT_CAPACITIES, filter->read_ok);
+ set_bit(GPCMD_GET_EVENT_STATUS_NOTIFICATION, filter->read_ok);
+ set_bit(GPCMD_GET_PERFORMANCE, filter->read_ok);
+ set_bit(GPCMD_SEEK, filter->read_ok);
+ set_bit(GPCMD_STOP_PLAY_SCAN, filter->read_ok);
+
+ /* Basic writing commands */
+ set_bit(WRITE_6, filter->write_ok);
+ set_bit(WRITE_10, filter->write_ok);
+ set_bit(WRITE_VERIFY, filter->write_ok);
+ set_bit(WRITE_12, filter->write_ok);
+ set_bit(WRITE_VERIFY_12, filter->write_ok);
+ set_bit(WRITE_16, filter->write_ok);
+ set_bit(WRITE_LONG, filter->write_ok);
+ set_bit(WRITE_LONG_2, filter->write_ok);
+ set_bit(ERASE, filter->write_ok);
+ set_bit(GPCMD_MODE_SELECT_10, filter->write_ok);
+ set_bit(MODE_SELECT, filter->write_ok);
+ set_bit(LOG_SELECT, filter->write_ok);
+ set_bit(GPCMD_BLANK, filter->write_ok);
+ set_bit(GPCMD_CLOSE_TRACK, filter->write_ok);
+ set_bit(GPCMD_FLUSH_CACHE, filter->write_ok);
+ set_bit(GPCMD_FORMAT_UNIT, filter->write_ok);
+ set_bit(GPCMD_REPAIR_RZONE_TRACK, filter->write_ok);
+ set_bit(GPCMD_RESERVE_RZONE_TRACK, filter->write_ok);
+ set_bit(GPCMD_SEND_DVD_STRUCTURE, filter->write_ok);
+ set_bit(GPCMD_SEND_EVENT, filter->write_ok);
+ set_bit(GPCMD_SEND_KEY, filter->write_ok);
+ set_bit(GPCMD_SEND_OPC, filter->write_ok);
+ set_bit(GPCMD_SEND_CUE_SHEET, filter->write_ok);
+ set_bit(GPCMD_SET_SPEED, filter->write_ok);
+ set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
+ set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
+ set_bit(GPCMD_SET_STREAMING, filter->write_ok);
+}
+
+int blk_register_filter(struct gendisk *disk)
+{
+ int ret;
+ struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+ struct kobject *parent = kobject_get(disk->holder_dir);
+
+ if(!parent) {
+ return -EBUSY;
+ }
+
+ ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent, "%s", "filter");
+
+ if (ret < 0)
+ return ret;
+
+ rcf_set_defaults(filter);
+ return 0;
+}
+
+void blk_unregister_filter(struct gendisk *disk)
+{
+ struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+
+ kobject_put(&filter->kobj);
+ kobject_put(disk->holder_dir);
+}
+
diff --git a/block/genhd.c b/block/genhd.c
index b922d48..6f45845 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -189,6 +189,7 @@ void add_disk(struct gendisk *disk)
disk->minors, NULL, exact_match, exact_lock, disk);
register_disk(disk);
blk_register_queue(disk);
+ blk_register_filter(disk);
bdi = &disk->queue->backing_dev_info;
bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor));
@@ -200,6 +201,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
void unlink_gendisk(struct gendisk *disk)
{
+ blk_unregister_filter(disk);
sysfs_remove_link(&disk->dev.kobj, "bdi");
bdi_unregister(&disk->queue->backing_dev_info);
blk_unregister_queue(disk);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 78199c0..c5b9bcf 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -105,120 +105,12 @@ static int sg_emulated_host(struct
request_queue *q, int __user *p)
return put_user(1, p);
}
-#define CMD_READ_SAFE 0x01
-#define CMD_WRITE_SAFE 0x02
-#define CMD_WARNED 0x04
-#define safe_for_read(cmd) [cmd] = CMD_READ_SAFE
-#define safe_for_write(cmd) [cmd] = CMD_WRITE_SAFE
-
-int blk_verify_command(unsigned char *cmd, int has_write_perm)
-{
- static unsigned char cmd_type[256] = {
-
- /* Basic read-only commands */
- safe_for_read(TEST_UNIT_READY),
- safe_for_read(REQUEST_SENSE),
- safe_for_read(READ_6),
- safe_for_read(READ_10),
- safe_for_read(READ_12),
- safe_for_read(READ_16),
- safe_for_read(READ_BUFFER),
- safe_for_read(READ_DEFECT_DATA),
- safe_for_read(READ_LONG),
- safe_for_read(INQUIRY),
- safe_for_read(MODE_SENSE),
- safe_for_read(MODE_SENSE_10),
- safe_for_read(LOG_SENSE),
- safe_for_read(START_STOP),
- safe_for_read(GPCMD_VERIFY_10),
- safe_for_read(VERIFY_16),
-
- /* Audio CD commands */
- safe_for_read(GPCMD_PLAY_CD),
- safe_for_read(GPCMD_PLAY_AUDIO_10),
- safe_for_read(GPCMD_PLAY_AUDIO_MSF),
- safe_for_read(GPCMD_PLAY_AUDIO_TI),
- safe_for_read(GPCMD_PAUSE_RESUME),
-
- /* CD/DVD data reading */
- safe_for_read(GPCMD_READ_BUFFER_CAPACITY),
- safe_for_read(GPCMD_READ_CD),
- safe_for_read(GPCMD_READ_CD_MSF),
- safe_for_read(GPCMD_READ_DISC_INFO),
- safe_for_read(GPCMD_READ_CDVD_CAPACITY),
- safe_for_read(GPCMD_READ_DVD_STRUCTURE),
- safe_for_read(GPCMD_READ_HEADER),
- safe_for_read(GPCMD_READ_TRACK_RZONE_INFO),
- safe_for_read(GPCMD_READ_SUBCHANNEL),
- safe_for_read(GPCMD_READ_TOC_PMA_ATIP),
- safe_for_read(GPCMD_REPORT_KEY),
- safe_for_read(GPCMD_SCAN),
- safe_for_read(GPCMD_GET_CONFIGURATION),
- safe_for_read(GPCMD_READ_FORMAT_CAPACITIES),
- safe_for_read(GPCMD_GET_EVENT_STATUS_NOTIFICATION),
- safe_for_read(GPCMD_GET_PERFORMANCE),
- safe_for_read(GPCMD_SEEK),
- safe_for_read(GPCMD_STOP_PLAY_SCAN),
-
- /* Basic writing commands */
- safe_for_write(WRITE_6),
- safe_for_write(WRITE_10),
- safe_for_write(WRITE_VERIFY),
- safe_for_write(WRITE_12),
- safe_for_write(WRITE_VERIFY_12),
- safe_for_write(WRITE_16),
- safe_for_write(WRITE_LONG),
- safe_for_write(WRITE_LONG_2),
- safe_for_write(ERASE),
- safe_for_write(GPCMD_MODE_SELECT_10),
- safe_for_write(MODE_SELECT),
- safe_for_write(LOG_SELECT),
- safe_for_write(GPCMD_BLANK),
- safe_for_write(GPCMD_CLOSE_TRACK),
- safe_for_write(GPCMD_FLUSH_CACHE),
- safe_for_write(GPCMD_FORMAT_UNIT),
- safe_for_write(GPCMD_REPAIR_RZONE_TRACK),
- safe_for_write(GPCMD_RESERVE_RZONE_TRACK),
- safe_for_write(GPCMD_SEND_DVD_STRUCTURE),
- safe_for_write(GPCMD_SEND_EVENT),
- safe_for_write(GPCMD_SEND_KEY),
- safe_for_write(GPCMD_SEND_OPC),
- safe_for_write(GPCMD_SEND_CUE_SHEET),
- safe_for_write(GPCMD_SET_SPEED),
- safe_for_write(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL),
- safe_for_write(GPCMD_LOAD_UNLOAD),
- safe_for_write(GPCMD_SET_STREAMING),
- };
- unsigned char type = cmd_type[cmd[0]];
-
- /* Anybody who can open the device can do a read-safe command */
- if (type & CMD_READ_SAFE)
- return 0;
-
- /* Write-safe commands just require a writable open.. */
- if ((type & CMD_WRITE_SAFE) && has_write_perm)
- return 0;
-
- /* And root can do any command.. */
- if (capable(CAP_SYS_RAWIO))
- return 0;
-
- if (!type) {
- cmd_type[cmd[0]] = CMD_WARNED;
- printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
- }
-
- /* Otherwise fail it with an "Operation not permitted" */
- return -EPERM;
-}
-EXPORT_SYMBOL_GPL(blk_verify_command);
-
static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
- struct sg_io_hdr *hdr, int has_write_perm)
+ struct sg_io_hdr *hdr, struct file *file)
{
if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
return -EFAULT;
- if (blk_verify_command(rq->cmd, has_write_perm))
+ if (blk_verify_command(file, rq->cmd))
return -EPERM;
/*
@@ -287,7 +179,7 @@ static int sg_io(struct file *file, struct request_queue *q,
struct gendisk *bd_disk, struct sg_io_hdr *hdr)
{
unsigned long start_time;
- int writing = 0, ret = 0, has_write_perm = 0;
+ int writing = 0, ret = 0;
struct request *rq;
char sense[SCSI_SENSE_BUFFERSIZE];
struct bio *bio;
@@ -316,10 +208,7 @@ static int sg_io(struct file *file, struct
request_queue *q,
if (!rq)
return -ENOMEM;
- if (file)
- has_write_perm = file->f_mode & FMODE_WRITE;
-
- if (blk_fill_sghdr_rq(q, rq, hdr, has_write_perm)) {
+ if (blk_fill_sghdr_rq(q, rq, hdr, file)) {
blk_put_request(rq);
return -EFAULT;
}
@@ -451,7 +340,7 @@ int sg_scsi_ioctl(struct file *file, struct
request_queue *q,
if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
goto error;
- err = blk_verify_command(rq->cmd, file->f_mode & FMODE_WRITE);
+ err = blk_verify_command(file, rq->cmd);
if (err)
goto error;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d2a1b71..b4e539b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -676,7 +676,6 @@ extern int blk_execute_rq(struct request_queue *,
struct gendisk *,
struct request *, int);
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
struct request *, int, rq_end_io_fn *);
-extern int blk_verify_command(unsigned char *, int);
extern void blk_unplug(struct request_queue *q);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
@@ -802,6 +801,15 @@ static inline struct request
*blk_map_queue_find_tag(struct blk_queue_tag *bqt,
extern int blkdev_issue_flush(struct block_device *, sector_t *);
+/*
+* command filter functions
+*/
+extern int blk_verify_command(struct file *file, unsigned char *cmd);
+extern int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
+ unsigned char *cmd, mode_t *f_mode);
+extern int blk_register_filter(struct gendisk *disk);
+extern void blk_unregister_filter(struct gendisk *disk);
+
#define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128
#define SAFE_MAX_SECTORS 255
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index ae7aec3..ab49a77 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -110,6 +110,14 @@ struct hd_struct {
#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
#define GENHD_FL_FAIL 64
+#define BLK_SCSI_MAX_CMDS (256)
+#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
+
+struct blk_scsi_cmd_filter {
+ unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
+ unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
+ struct kobject kobj;
+};
struct gendisk {
int major; /* major number of driver */
@@ -120,6 +128,7 @@ struct gendisk {
struct hd_struct **part; /* [indexed by minor] */
struct block_device_operations *fops;
struct request_queue *queue;
+ struct blk_scsi_cmd_filter cmd_filter;
void *private_data;
sector_t capacity;
^ permalink raw reply related [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC] allow userspace to modify scsi command filter on per device basis
2008-06-13 19:33 [PATCH/RFC] allow userspace to modify scsi command filter on per device basis Adel Gadllah
@ 2008-06-13 19:54 ` Matthew Wilcox
2008-06-13 20:22 ` Adel Gadllah
2008-06-14 20:26 ` [PATCH/RFC] allow userspace to modify scsi command filter on per device basis Jens Axboe
0 siblings, 2 replies; 65+ messages in thread
From: Matthew Wilcox @ 2008-06-13 19:54 UTC (permalink / raw)
To: Adel Gadllah; +Cc: linux-scsi, pjones, Jens Axboe
On Fri, Jun 13, 2008 at 09:33:27PM +0200, Adel Gadllah wrote:
> - if (blk_verify_command(rq->cmd, has_write_perm))
> + if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd, bd->f_mode))
Could you wrap to 80 columns?
> +static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
> + const char *page, size_t count, int rw)
> +{
> + ssize_t ret = 0;
> + unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
> + int cmd, status, len;
> + substring_t ss;
> +
> + memset(&okbits, 0, sizeof (okbits));
> +
> + for (len = strlen(page); len > 0; len -= 3) {
> + if (len < 2)
> + break;
> + ss.from = (char *) page + ret;
> + ss.to = (char *) page + ret + 2;
> + ret+=3;
> + status = match_hex(&ss, &cmd);
> + /* either of these cases means invalid input, so do nothing. */
> + if (status || cmd >= BLK_SCSI_MAX_CMDS)
> + return -EINVAL;
> +
> + set_bit(cmd, okbits);
set_bit is atomic. locked ops can be quite painful on some processors.
Since okbits is local, the atomicity isn't necessary and you can simply
use __set_bit.
> +static void rcf_set_defaults(struct blk_scsi_cmd_filter *filter)
> +{
> + /* Basic read-only commands */
> + set_bit(TEST_UNIT_READY, filter->read_ok);
The set_bit vs __set_bit comment also applies here.
> +int blk_register_filter(struct gendisk *disk)
> +{
> + int ret;
> + struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
> + struct kobject *parent = kobject_get(disk->holder_dir);
> +
> + if(!parent) {
> + return -EBUSY;
> + }
Normal style would be to write
if (!parent)
return -EBUSY;
(though I don't understand why no parent means we're busy)
> +
> + ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent, "%s", "filter");
> +
> + if (ret < 0)
> + return ret;
> +
> + rcf_set_defaults(filter);
Surely we should set the bits before we make the object visible?
> @@ -189,6 +189,7 @@ void add_disk(struct gendisk *disk)
> disk->minors, NULL, exact_match, exact_lock, disk);
> register_disk(disk);
> blk_register_queue(disk);
> + blk_register_filter(disk);
We don't need to handle errors here? Why not?
--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC] allow userspace to modify scsi command filter on per device basis
2008-06-13 19:54 ` Matthew Wilcox
@ 2008-06-13 20:22 ` Adel Gadllah
2008-06-13 20:23 ` Adel Gadllah
2008-06-14 20:26 ` [PATCH/RFC] allow userspace to modify scsi command filter on per device basis Jens Axboe
1 sibling, 1 reply; 65+ messages in thread
From: Adel Gadllah @ 2008-06-13 20:22 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: linux-scsi, pjones, Jens Axboe
2008/6/13 Matthew Wilcox <matthew@wil.cx>:
> On Fri, Jun 13, 2008 at 09:33:27PM +0200, Adel Gadllah wrote:
>> - if (blk_verify_command(rq->cmd, has_write_perm))
>> + if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd, bd->f_mode))
>
> Could you wrap to 80 columns?
yeah fixed all checkpatch.pl errors/warnings in the attached patch.
>> +static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
>> + const char *page, size_t count, int rw)
>> +{
>> + ssize_t ret = 0;
>> + unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
>> + int cmd, status, len;
>> + substring_t ss;
>> +
>> + memset(&okbits, 0, sizeof (okbits));
>> +
>> + for (len = strlen(page); len > 0; len -= 3) {
>> + if (len < 2)
>> + break;
>> + ss.from = (char *) page + ret;
>> + ss.to = (char *) page + ret + 2;
>> + ret+=3;
>> + status = match_hex(&ss, &cmd);
>> + /* either of these cases means invalid input, so do nothing. */
>> + if (status || cmd >= BLK_SCSI_MAX_CMDS)
>> + return -EINVAL;
>> +
>> + set_bit(cmd, okbits);
>
> set_bit is atomic. locked ops can be quite painful on some processors.
> Since okbits is local, the atomicity isn't necessary and you can simply
> use __set_bit.
ok, fixed.
>> +static void rcf_set_defaults(struct blk_scsi_cmd_filter *filter)
>> +{
>> + /* Basic read-only commands */
>> + set_bit(TEST_UNIT_READY, filter->read_ok);
>
> The set_bit vs __set_bit comment also applies here.
fixed.
>> +int blk_register_filter(struct gendisk *disk)
>> +{
>> + int ret;
>> + struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
>> + struct kobject *parent = kobject_get(disk->holder_dir);
>> +
>> + if(!parent) {
>> + return -EBUSY;
>> + }
>
> Normal style would be to write
>
> if (!parent)
> return -EBUSY;
fixed while adressing the checkpatch.pl stuff.
> (though I don't understand why no parent means we're busy)
changed to -ENODEV;
>> +
>> + ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent, "%s", "filter");
>> +
>> + if (ret < 0)
>> + return ret;
>> +
>> + rcf_set_defaults(filter);
>
> Surely we should set the bits before we make the object visible?
When is the kobject
Does it matter?
>> @@ -189,6 +189,7 @@ void add_disk(struct gendisk *disk)
>> disk->minors, NULL, exact_match, exact_lock, disk);
>> register_disk(disk);
>> blk_register_queue(disk);
>> + blk_register_filter(disk);
>
> We don't need to handle errors here? Why not?
Good question ... seems nothing in this function cares about errors.
New patch attached.
-----
From: Peter Jones <pjones@redhat.com>
This patch exports the per-gendisk command filter to user space through
sysfs, so it can be changed by the system administrator.
Signed-off-by: Adel Gadllah <adel.gadllah@gmail.com>
Signed-off-by: Peter Jones <pjones@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/block/Makefile b/block/Makefile
index 5a43c7d..717733c 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -4,7 +4,8 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
- blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o
+ blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
+ cmd-filter.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/bsg.c b/block/bsg.c
index f0b7cd3..439940c 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -44,11 +44,12 @@ struct bsg_device {
char name[BUS_ID_SIZE];
int max_queue;
unsigned long flags;
+ struct blk_scsi_cmd_filter *cmd_filter;
+ mode_t *f_mode;
};
enum {
BSG_F_BLOCK = 1,
- BSG_F_WRITE_PERM = 2,
};
#define BSG_DEFAULT_CMDS 64
@@ -172,7 +173,7 @@ unlock:
}
static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
- struct sg_io_v4 *hdr, int has_write_perm)
+ struct sg_io_v4 *hdr, struct bsg_device *bd)
{
if (hdr->request_len > BLK_MAX_CDB) {
rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
@@ -185,7 +186,8 @@ static int blk_fill_sgv4_hdr_rq(struct
request_queue *q, struct request *rq,
return -EFAULT;
if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
- if (blk_verify_command(rq->cmd, has_write_perm))
+ if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd,
+ bd->f_mode))
return -EPERM;
} else if (!capable(CAP_SYS_RAWIO))
return -EPERM;
@@ -263,8 +265,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
rq = blk_get_request(q, rw, GFP_KERNEL);
if (!rq)
return ERR_PTR(-ENOMEM);
- ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
- &bd->flags));
+ ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd);
if (ret)
goto out;
@@ -566,12 +567,23 @@ static inline void bsg_set_block(struct
bsg_device *bd, struct file *file)
set_bit(BSG_F_BLOCK, &bd->flags);
}
-static inline void bsg_set_write_perm(struct bsg_device *bd, struct file *file)
+static void bsg_set_cmd_filter(struct bsg_device *bd,
+ struct file *file)
{
- if (file->f_mode & FMODE_WRITE)
- set_bit(BSG_F_WRITE_PERM, &bd->flags);
- else
- clear_bit(BSG_F_WRITE_PERM, &bd->flags);
+ struct inode *inode;
+ struct gendisk *disk;
+
+ if (!file)
+ return;
+
+ inode = file->f_dentry->d_inode;
+ if (!inode)
+ return;
+
+ disk = inode->i_bdev->bd_disk;
+
+ bd->cmd_filter = &disk->cmd_filter;
+ bd->f_mode = &file->f_mode;
}
/*
@@ -595,6 +607,8 @@ bsg_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
dprintk("%s: read %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
+ bsg_set_cmd_filter(bd, file);
+
bytes_read = 0;
ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
*ppos = bytes_read;
@@ -668,7 +682,7 @@ bsg_write(struct file *file, const char __user
*buf, size_t count, loff_t *ppos)
dprintk("%s: write %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
- bsg_set_write_perm(bd, file);
+ bsg_set_cmd_filter(bd, file);
bytes_written = 0;
ret = __bsg_write(bd, buf, count, &bytes_written);
@@ -771,7 +785,9 @@ static struct bsg_device *bsg_add_device(struct
inode *inode,
}
bd->queue = rq;
+
bsg_set_block(bd, file);
+ bsg_set_cmd_filter(bd, file);
atomic_set(&bd->ref_count, 1);
mutex_lock(&bsg_mutex);
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
new file mode 100644
index 0000000..004c9d9
--- /dev/null
+++ b/block/cmd-filter.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright 2004 Peter M. Jones <pjones@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ *
+ */
+
+#include <linux/list.h>
+#include <linux/genhd.h>
+#include <linux/spinlock.h>
+#include <linux/parser.h>
+#include <linux/capability.h>
+#include <linux/bitops.h>
+
+#include <scsi/scsi.h>
+#include <linux/cdrom.h>
+
+int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
+ unsigned char *cmd, mode_t *f_mode)
+{
+ /* root can do any command. */
+ if (capable(CAP_SYS_RAWIO))
+ return 0;
+
+ /* if there's no filter set, assume we're filtering everything out */
+ if (!filter)
+ return -EPERM;
+
+ /* Anybody who can open the device can do a read-safe command */
+ if (test_bit(cmd[0], filter->read_ok))
+ return 0;
+
+ /* Write-safe commands require a writable open */
+ if (test_bit(cmd[0], filter->write_ok) && (*f_mode & FMODE_WRITE))
+ return 0;
+
+ return -EPERM;
+}
+EXPORT_SYMBOL(blk_cmd_filter_verify_command);
+
+int blk_verify_command(struct file *file, unsigned char *cmd)
+{
+ struct gendisk *disk;
+ struct inode *inode;
+
+ if (!file)
+ return -EINVAL;
+
+ inode = file->f_dentry->d_inode;
+ if (!inode)
+ return -EINVAL;
+
+ disk = inode->i_bdev->bd_disk;
+
+ return blk_cmd_filter_verify_command(&disk->cmd_filter,
+ cmd, &file->f_mode);
+}
+EXPORT_SYMBOL(blk_verify_command);
+
+/* and now, the sysfs stuff */
+static ssize_t rcf_cmds_show(struct blk_scsi_cmd_filter *filter, char *page,
+ int rw)
+{
+ char *npage = page;
+ unsigned long *okbits;
+ int i;
+
+ if (rw == READ)
+ okbits = filter->read_ok;
+ else
+ okbits = filter->write_ok;
+
+ for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) {
+ if (test_bit(i, okbits)) {
+ sprintf(npage, "%02x", i);
+ npage += 2;
+ if (i < BLK_SCSI_MAX_CMDS - 1)
+ sprintf(npage++, " ");
+ }
+ }
+
+ if (npage != page)
+ npage += sprintf(npage, "\n");
+
+ return npage - page;
+}
+
+static ssize_t rcf_readcmds_show(struct blk_scsi_cmd_filter *filter,
char *page)
+{
+ return rcf_cmds_show(filter, page, READ);
+}
+
+static ssize_t rcf_writecmds_show(struct blk_scsi_cmd_filter *filter,
+ char *page)
+{
+ return rcf_cmds_show(filter, page, WRITE);
+}
+
+static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count, int rw)
+{
+ ssize_t ret = 0;
+ unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
+ int cmd, status, len;
+ substring_t ss;
+
+ memset(&okbits, 0, sizeof(okbits));
+
+ for (len = strlen(page); len > 0; len -= 3) {
+ if (len < 2)
+ break;
+ ss.from = (char *) page + ret;
+ ss.to = (char *) page + ret + 2;
+ ret += 3;
+ status = match_hex(&ss, &cmd);
+ /* either of these cases means invalid input, so do nothing. */
+ if (status || cmd >= BLK_SCSI_MAX_CMDS)
+ return -EINVAL;
+
+ __set_bit(cmd, okbits);
+ }
+
+ if (rw == READ)
+ target_okbits = filter->read_ok;
+ else
+ target_okbits = filter->write_ok;
+
+ memmove(target_okbits, okbits, sizeof(okbits));
+ return count;
+}
+
+static ssize_t rcf_readcmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count)
+{
+ return rcf_cmds_store(filter, page, count, READ);
+}
+
+static ssize_t rcf_writecmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count)
+{
+ return rcf_cmds_store(filter, page, count, WRITE);
+}
+
+struct rcf_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct blk_scsi_cmd_filter *, char *);
+ ssize_t (*store)(struct blk_scsi_cmd_filter *, const char *, size_t);
+};
+
+static struct rcf_sysfs_entry rcf_readcmds_entry = {
+ .attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR },
+ .show = rcf_readcmds_show,
+ .store = rcf_readcmds_store,
+};
+
+static struct rcf_sysfs_entry rcf_writecmds_entry = {
+ .attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR },
+ .show = rcf_writecmds_show,
+ .store = rcf_writecmds_store,
+};
+
+static struct attribute *default_attrs[] = {
+ &rcf_readcmds_entry.attr,
+ &rcf_writecmds_entry.attr,
+ NULL,
+};
+
+#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr)
+
+static ssize_t
+rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct rcf_sysfs_entry *entry = to_rcf(attr);
+ struct blk_scsi_cmd_filter *filter;
+
+ filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+ if (entry->show)
+ return entry->show(filter, page);
+
+ return 0;
+}
+
+static ssize_t
+rcf_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *page, size_t length)
+{
+ struct rcf_sysfs_entry *entry = to_rcf(attr);
+ struct blk_scsi_cmd_filter *filter;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ if (!entry->store)
+ return -EINVAL;
+
+ filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+ return entry->store(filter, page, length);
+}
+
+static struct sysfs_ops rcf_sysfs_ops = {
+ .show = rcf_attr_show,
+ .store = rcf_attr_store,
+};
+
+static struct kobj_type rcf_ktype = {
+ .sysfs_ops = &rcf_sysfs_ops,
+ .default_attrs = default_attrs,
+};
+
+static void rcf_set_defaults(struct blk_scsi_cmd_filter *filter)
+{
+ /* Basic read-only commands */
+ __set_bit(TEST_UNIT_READY, filter->read_ok);
+ __set_bit(REQUEST_SENSE, filter->read_ok);
+ __set_bit(READ_6, filter->read_ok);
+ __set_bit(READ_10, filter->read_ok);
+ __set_bit(READ_12, filter->read_ok);
+ __set_bit(READ_16, filter->read_ok);
+ __set_bit(READ_BUFFER, filter->read_ok);
+ __set_bit(READ_DEFECT_DATA, filter->read_ok);
+ __set_bit(READ_LONG, filter->read_ok);
+ __set_bit(INQUIRY, filter->read_ok);
+ __set_bit(MODE_SENSE, filter->read_ok);
+ __set_bit(MODE_SENSE_10, filter->read_ok);
+ __set_bit(LOG_SENSE, filter->read_ok);
+ __set_bit(START_STOP, filter->read_ok);
+ __set_bit(GPCMD_VERIFY_10, filter->read_ok);
+ __set_bit(VERIFY_16, filter->read_ok);
+ __set_bit(GPCMD_READ_BUFFER_CAPACITY, filter->read_ok);
+
+ /* Audio CD commands */
+ __set_bit(GPCMD_PLAY_CD, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_10, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_MSF, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_TI, filter->read_ok);
+ __set_bit(GPCMD_PAUSE_RESUME, filter->read_ok);
+
+ /* CD/DVD data reading */
+ __set_bit(GPCMD_READ_CD, filter->read_ok);
+ __set_bit(GPCMD_READ_CD_MSF, filter->read_ok);
+ __set_bit(GPCMD_READ_DISC_INFO, filter->read_ok);
+ __set_bit(GPCMD_READ_CDVD_CAPACITY, filter->read_ok);
+ __set_bit(GPCMD_READ_DVD_STRUCTURE, filter->read_ok);
+ __set_bit(GPCMD_READ_HEADER, filter->read_ok);
+ __set_bit(GPCMD_READ_TRACK_RZONE_INFO, filter->read_ok);
+ __set_bit(GPCMD_READ_SUBCHANNEL, filter->read_ok);
+ __set_bit(GPCMD_READ_TOC_PMA_ATIP, filter->read_ok);
+ __set_bit(GPCMD_REPORT_KEY, filter->read_ok);
+ __set_bit(GPCMD_SCAN, filter->read_ok);
+ __set_bit(GPCMD_GET_CONFIGURATION, filter->read_ok);
+ __set_bit(GPCMD_READ_FORMAT_CAPACITIES, filter->read_ok);
+ __set_bit(GPCMD_GET_EVENT_STATUS_NOTIFICATION, filter->read_ok);
+ __set_bit(GPCMD_GET_PERFORMANCE, filter->read_ok);
+ __set_bit(GPCMD_SEEK, filter->read_ok);
+ __set_bit(GPCMD_STOP_PLAY_SCAN, filter->read_ok);
+
+ /* Basic writing commands */
+ __set_bit(WRITE_6, filter->write_ok);
+ __set_bit(WRITE_10, filter->write_ok);
+ __set_bit(WRITE_VERIFY, filter->write_ok);
+ __set_bit(WRITE_12, filter->write_ok);
+ __set_bit(WRITE_VERIFY_12, filter->write_ok);
+ __set_bit(WRITE_16, filter->write_ok);
+ __set_bit(WRITE_LONG, filter->write_ok);
+ __set_bit(WRITE_LONG_2, filter->write_ok);
+ __set_bit(ERASE, filter->write_ok);
+ __set_bit(GPCMD_MODE_SELECT_10, filter->write_ok);
+ __set_bit(MODE_SELECT, filter->write_ok);
+ __set_bit(LOG_SELECT, filter->write_ok);
+ __set_bit(GPCMD_BLANK, filter->write_ok);
+ __set_bit(GPCMD_CLOSE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_FLUSH_CACHE, filter->write_ok);
+ __set_bit(GPCMD_FORMAT_UNIT, filter->write_ok);
+ __set_bit(GPCMD_REPAIR_RZONE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_RESERVE_RZONE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_SEND_DVD_STRUCTURE, filter->write_ok);
+ __set_bit(GPCMD_SEND_EVENT, filter->write_ok);
+ __set_bit(GPCMD_SEND_KEY, filter->write_ok);
+ __set_bit(GPCMD_SEND_OPC, filter->write_ok);
+ __set_bit(GPCMD_SEND_CUE_SHEET, filter->write_ok);
+ __set_bit(GPCMD_SET_SPEED, filter->write_ok);
+ __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
+ __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
+ __set_bit(GPCMD_SET_STREAMING, filter->write_ok);
+}
+
+int blk_register_filter(struct gendisk *disk)
+{
+ int ret;
+ struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+ struct kobject *parent = kobject_get(disk->holder_dir);
+
+ if (!parent)
+ return -ENODEV;
+
+ ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent,
+ "%s", "filter");
+
+ if (ret < 0)
+ return ret;
+
+ rcf_set_defaults(filter);
+ return 0;
+}
+
+void blk_unregister_filter(struct gendisk *disk)
+{
+ struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+
+ kobject_put(&filter->kobj);
+ kobject_put(disk->holder_dir);
+}
+
diff --git a/block/genhd.c b/block/genhd.c
index b922d48..6f45845 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -189,6 +189,7 @@ void add_disk(struct gendisk *disk)
disk->minors, NULL, exact_match, exact_lock, disk);
register_disk(disk);
blk_register_queue(disk);
+ blk_register_filter(disk);
bdi = &disk->queue->backing_dev_info;
bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor));
@@ -200,6 +201,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
void unlink_gendisk(struct gendisk *disk)
{
+ blk_unregister_filter(disk);
sysfs_remove_link(&disk->dev.kobj, "bdi");
bdi_unregister(&disk->queue->backing_dev_info);
blk_unregister_queue(disk);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 78199c0..c5b9bcf 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -105,120 +105,12 @@ static int sg_emulated_host(struct
request_queue *q, int __user *p)
return put_user(1, p);
}
-#define CMD_READ_SAFE 0x01
-#define CMD_WRITE_SAFE 0x02
-#define CMD_WARNED 0x04
-#define safe_for_read(cmd) [cmd] = CMD_READ_SAFE
-#define safe_for_write(cmd) [cmd] = CMD_WRITE_SAFE
-
-int blk_verify_command(unsigned char *cmd, int has_write_perm)
-{
- static unsigned char cmd_type[256] = {
-
- /* Basic read-only commands */
- safe_for_read(TEST_UNIT_READY),
- safe_for_read(REQUEST_SENSE),
- safe_for_read(READ_6),
- safe_for_read(READ_10),
- safe_for_read(READ_12),
- safe_for_read(READ_16),
- safe_for_read(READ_BUFFER),
- safe_for_read(READ_DEFECT_DATA),
- safe_for_read(READ_LONG),
- safe_for_read(INQUIRY),
- safe_for_read(MODE_SENSE),
- safe_for_read(MODE_SENSE_10),
- safe_for_read(LOG_SENSE),
- safe_for_read(START_STOP),
- safe_for_read(GPCMD_VERIFY_10),
- safe_for_read(VERIFY_16),
-
- /* Audio CD commands */
- safe_for_read(GPCMD_PLAY_CD),
- safe_for_read(GPCMD_PLAY_AUDIO_10),
- safe_for_read(GPCMD_PLAY_AUDIO_MSF),
- safe_for_read(GPCMD_PLAY_AUDIO_TI),
- safe_for_read(GPCMD_PAUSE_RESUME),
-
- /* CD/DVD data reading */
- safe_for_read(GPCMD_READ_BUFFER_CAPACITY),
- safe_for_read(GPCMD_READ_CD),
- safe_for_read(GPCMD_READ_CD_MSF),
- safe_for_read(GPCMD_READ_DISC_INFO),
- safe_for_read(GPCMD_READ_CDVD_CAPACITY),
- safe_for_read(GPCMD_READ_DVD_STRUCTURE),
- safe_for_read(GPCMD_READ_HEADER),
- safe_for_read(GPCMD_READ_TRACK_RZONE_INFO),
- safe_for_read(GPCMD_READ_SUBCHANNEL),
- safe_for_read(GPCMD_READ_TOC_PMA_ATIP),
- safe_for_read(GPCMD_REPORT_KEY),
- safe_for_read(GPCMD_SCAN),
- safe_for_read(GPCMD_GET_CONFIGURATION),
- safe_for_read(GPCMD_READ_FORMAT_CAPACITIES),
- safe_for_read(GPCMD_GET_EVENT_STATUS_NOTIFICATION),
- safe_for_read(GPCMD_GET_PERFORMANCE),
- safe_for_read(GPCMD_SEEK),
- safe_for_read(GPCMD_STOP_PLAY_SCAN),
-
- /* Basic writing commands */
- safe_for_write(WRITE_6),
- safe_for_write(WRITE_10),
- safe_for_write(WRITE_VERIFY),
- safe_for_write(WRITE_12),
- safe_for_write(WRITE_VERIFY_12),
- safe_for_write(WRITE_16),
- safe_for_write(WRITE_LONG),
- safe_for_write(WRITE_LONG_2),
- safe_for_write(ERASE),
- safe_for_write(GPCMD_MODE_SELECT_10),
- safe_for_write(MODE_SELECT),
- safe_for_write(LOG_SELECT),
- safe_for_write(GPCMD_BLANK),
- safe_for_write(GPCMD_CLOSE_TRACK),
- safe_for_write(GPCMD_FLUSH_CACHE),
- safe_for_write(GPCMD_FORMAT_UNIT),
- safe_for_write(GPCMD_REPAIR_RZONE_TRACK),
- safe_for_write(GPCMD_RESERVE_RZONE_TRACK),
- safe_for_write(GPCMD_SEND_DVD_STRUCTURE),
- safe_for_write(GPCMD_SEND_EVENT),
- safe_for_write(GPCMD_SEND_KEY),
- safe_for_write(GPCMD_SEND_OPC),
- safe_for_write(GPCMD_SEND_CUE_SHEET),
- safe_for_write(GPCMD_SET_SPEED),
- safe_for_write(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL),
- safe_for_write(GPCMD_LOAD_UNLOAD),
- safe_for_write(GPCMD_SET_STREAMING),
- };
- unsigned char type = cmd_type[cmd[0]];
-
- /* Anybody who can open the device can do a read-safe command */
- if (type & CMD_READ_SAFE)
- return 0;
-
- /* Write-safe commands just require a writable open.. */
- if ((type & CMD_WRITE_SAFE) && has_write_perm)
- return 0;
-
- /* And root can do any command.. */
- if (capable(CAP_SYS_RAWIO))
- return 0;
-
- if (!type) {
- cmd_type[cmd[0]] = CMD_WARNED;
- printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
- }
-
- /* Otherwise fail it with an "Operation not permitted" */
- return -EPERM;
-}
-EXPORT_SYMBOL_GPL(blk_verify_command);
-
static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
- struct sg_io_hdr *hdr, int has_write_perm)
+ struct sg_io_hdr *hdr, struct file *file)
{
if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
return -EFAULT;
- if (blk_verify_command(rq->cmd, has_write_perm))
+ if (blk_verify_command(file, rq->cmd))
return -EPERM;
/*
@@ -287,7 +179,7 @@ static int sg_io(struct file *file, struct request_queue *q,
struct gendisk *bd_disk, struct sg_io_hdr *hdr)
{
unsigned long start_time;
- int writing = 0, ret = 0, has_write_perm = 0;
+ int writing = 0, ret = 0;
struct request *rq;
char sense[SCSI_SENSE_BUFFERSIZE];
struct bio *bio;
@@ -316,10 +208,7 @@ static int sg_io(struct file *file, struct
request_queue *q,
if (!rq)
return -ENOMEM;
- if (file)
- has_write_perm = file->f_mode & FMODE_WRITE;
-
- if (blk_fill_sghdr_rq(q, rq, hdr, has_write_perm)) {
+ if (blk_fill_sghdr_rq(q, rq, hdr, file)) {
blk_put_request(rq);
return -EFAULT;
}
@@ -451,7 +340,7 @@ int sg_scsi_ioctl(struct file *file, struct
request_queue *q,
if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
goto error;
- err = blk_verify_command(rq->cmd, file->f_mode & FMODE_WRITE);
+ err = blk_verify_command(file, rq->cmd);
if (err)
goto error;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d2a1b71..b4e539b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -676,7 +676,6 @@ extern int blk_execute_rq(struct request_queue *,
struct gendisk *,
struct request *, int);
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
struct request *, int, rq_end_io_fn *);
-extern int blk_verify_command(unsigned char *, int);
extern void blk_unplug(struct request_queue *q);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
@@ -802,6 +801,15 @@ static inline struct request
*blk_map_queue_find_tag(struct blk_queue_tag *bqt,
extern int blkdev_issue_flush(struct block_device *, sector_t *);
+/*
+* command filter functions
+*/
+extern int blk_verify_command(struct file *file, unsigned char *cmd);
+extern int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
+ unsigned char *cmd, mode_t *f_mode);
+extern int blk_register_filter(struct gendisk *disk);
+extern void blk_unregister_filter(struct gendisk *disk);
+
#define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128
#define SAFE_MAX_SECTORS 255
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index ae7aec3..ab49a77 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -110,6 +110,14 @@ struct hd_struct {
#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
#define GENHD_FL_FAIL 64
+#define BLK_SCSI_MAX_CMDS (256)
+#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
+
+struct blk_scsi_cmd_filter {
+ unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
+ unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
+ struct kobject kobj;
+};
struct gendisk {
int major; /* major number of driver */
@@ -120,6 +128,7 @@ struct gendisk {
struct hd_struct **part; /* [indexed by minor] */
struct block_device_operations *fops;
struct request_queue *queue;
+ struct blk_scsi_cmd_filter cmd_filter;
void *private_data;
sector_t capacity;
^ permalink raw reply related [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC] allow userspace to modify scsi command filter on per device basis
2008-06-13 20:22 ` Adel Gadllah
@ 2008-06-13 20:23 ` Adel Gadllah
2008-06-14 6:51 ` [PATCH/RFC v2] " Adel Gadllah
0 siblings, 1 reply; 65+ messages in thread
From: Adel Gadllah @ 2008-06-13 20:23 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: linux-scsi, pjones, Jens Axboe
2008/6/13 Adel Gadllah <adel.gadllah@gmail.com>:
> 2008/6/13 Matthew Wilcox <matthew@wil.cx>:
>> On Fri, Jun 13, 2008 at 09:33:27PM +0200, Adel Gadllah wrote:
>>> - if (blk_verify_command(rq->cmd, has_write_perm))
>>> + if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd, bd->f_mode))
>>
>> Could you wrap to 80 columns?
>
> yeah fixed all checkpatch.pl errors/warnings in the attached patch.
>
>>> +static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
>>> + const char *page, size_t count, int rw)
>>> +{
>>> + ssize_t ret = 0;
>>> + unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
>>> + int cmd, status, len;
>>> + substring_t ss;
>>> +
>>> + memset(&okbits, 0, sizeof (okbits));
>>> +
>>> + for (len = strlen(page); len > 0; len -= 3) {
>>> + if (len < 2)
>>> + break;
>>> + ss.from = (char *) page + ret;
>>> + ss.to = (char *) page + ret + 2;
>>> + ret+=3;
>>> + status = match_hex(&ss, &cmd);
>>> + /* either of these cases means invalid input, so do nothing. */
>>> + if (status || cmd >= BLK_SCSI_MAX_CMDS)
>>> + return -EINVAL;
>>> +
>>> + set_bit(cmd, okbits);
>>
>> set_bit is atomic. locked ops can be quite painful on some processors.
>> Since okbits is local, the atomicity isn't necessary and you can simply
>> use __set_bit.
>
> ok, fixed.
>
>>> +static void rcf_set_defaults(struct blk_scsi_cmd_filter *filter)
>>> +{
>>> + /* Basic read-only commands */
>>> + set_bit(TEST_UNIT_READY, filter->read_ok);
>>
>> The set_bit vs __set_bit comment also applies here.
>
> fixed.
>
>>> +int blk_register_filter(struct gendisk *disk)
>>> +{
>>> + int ret;
>>> + struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
>>> + struct kobject *parent = kobject_get(disk->holder_dir);
>>> +
>>> + if(!parent) {
>>> + return -EBUSY;
>>> + }
>>
>> Normal style would be to write
>>
>> if (!parent)
>> return -EBUSY;
>
> fixed while adressing the checkpatch.pl stuff.
>
>> (though I don't understand why no parent means we're busy)
>
> changed to -ENODEV;
>
>>> +
>>> + ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent, "%s", "filter");
>>> +
>>> + if (ret < 0)
>>> + return ret;
>>> +
>>> + rcf_set_defaults(filter);
>>
>> Surely we should set the bits before we make the object visible?
>
> When is the kobject
^^ made visisble
^ permalink raw reply [flat|nested] 65+ messages in thread
* [PATCH/RFC v2] allow userspace to modify scsi command filter on per device basis
2008-06-13 20:23 ` Adel Gadllah
@ 2008-06-14 6:51 ` Adel Gadllah
2008-06-16 2:55 ` FUJITA Tomonori
0 siblings, 1 reply; 65+ messages in thread
From: Adel Gadllah @ 2008-06-14 6:51 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: linux-scsi, pjones, Jens Axboe
Attached a new version of the patch.
Moved the sysfs entry up one level (to not be in the "holders"
directory) and renamed it t cmd_filter (reflects its purpose better
than "filter").
-----------------------------
From: Peter Jones <pjones@redhat.com>
This patch exports the per-gendisk command filter to user space through
sysfs, so it can be changed by the system administrator.
Signed-off-by: Adel Gadllah <adel.gadllah@gmail.com>
Signed-off-by: Peter Jones <pjones@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/block/Makefile b/block/Makefile
index 5a43c7d..717733c 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -4,7 +4,8 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
- blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o
+ blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
+ cmd-filter.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/bsg.c b/block/bsg.c
index f0b7cd3..439940c 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -44,11 +44,12 @@ struct bsg_device {
char name[BUS_ID_SIZE];
int max_queue;
unsigned long flags;
+ struct blk_scsi_cmd_filter *cmd_filter;
+ mode_t *f_mode;
};
enum {
BSG_F_BLOCK = 1,
- BSG_F_WRITE_PERM = 2,
};
#define BSG_DEFAULT_CMDS 64
@@ -172,7 +173,7 @@ unlock:
}
static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
- struct sg_io_v4 *hdr, int has_write_perm)
+ struct sg_io_v4 *hdr, struct bsg_device *bd)
{
if (hdr->request_len > BLK_MAX_CDB) {
rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
@@ -185,7 +186,8 @@ static int blk_fill_sgv4_hdr_rq(struct
request_queue *q, struct request *rq,
return -EFAULT;
if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
- if (blk_verify_command(rq->cmd, has_write_perm))
+ if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd,
+ bd->f_mode))
return -EPERM;
} else if (!capable(CAP_SYS_RAWIO))
return -EPERM;
@@ -263,8 +265,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
rq = blk_get_request(q, rw, GFP_KERNEL);
if (!rq)
return ERR_PTR(-ENOMEM);
- ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
- &bd->flags));
+ ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd);
if (ret)
goto out;
@@ -566,12 +567,23 @@ static inline void bsg_set_block(struct
bsg_device *bd, struct file *file)
set_bit(BSG_F_BLOCK, &bd->flags);
}
-static inline void bsg_set_write_perm(struct bsg_device *bd, struct file *file)
+static void bsg_set_cmd_filter(struct bsg_device *bd,
+ struct file *file)
{
- if (file->f_mode & FMODE_WRITE)
- set_bit(BSG_F_WRITE_PERM, &bd->flags);
- else
- clear_bit(BSG_F_WRITE_PERM, &bd->flags);
+ struct inode *inode;
+ struct gendisk *disk;
+
+ if (!file)
+ return;
+
+ inode = file->f_dentry->d_inode;
+ if (!inode)
+ return;
+
+ disk = inode->i_bdev->bd_disk;
+
+ bd->cmd_filter = &disk->cmd_filter;
+ bd->f_mode = &file->f_mode;
}
/*
@@ -595,6 +607,8 @@ bsg_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
dprintk("%s: read %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
+ bsg_set_cmd_filter(bd, file);
+
bytes_read = 0;
ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
*ppos = bytes_read;
@@ -668,7 +682,7 @@ bsg_write(struct file *file, const char __user
*buf, size_t count, loff_t *ppos)
dprintk("%s: write %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
- bsg_set_write_perm(bd, file);
+ bsg_set_cmd_filter(bd, file);
bytes_written = 0;
ret = __bsg_write(bd, buf, count, &bytes_written);
@@ -771,7 +785,9 @@ static struct bsg_device *bsg_add_device(struct
inode *inode,
}
bd->queue = rq;
+
bsg_set_block(bd, file);
+ bsg_set_cmd_filter(bd, file);
atomic_set(&bd->ref_count, 1);
mutex_lock(&bsg_mutex);
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
new file mode 100644
index 0000000..2121453
--- /dev/null
+++ b/block/cmd-filter.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright 2004 Peter M. Jones <pjones@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ *
+ */
+
+#include <linux/list.h>
+#include <linux/genhd.h>
+#include <linux/spinlock.h>
+#include <linux/parser.h>
+#include <linux/capability.h>
+#include <linux/bitops.h>
+
+#include <scsi/scsi.h>
+#include <linux/cdrom.h>
+
+int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
+ unsigned char *cmd, mode_t *f_mode)
+{
+ /* root can do any command. */
+ if (capable(CAP_SYS_RAWIO))
+ return 0;
+
+ /* if there's no filter set, assume we're filtering everything out */
+ if (!filter)
+ return -EPERM;
+
+ /* Anybody who can open the device can do a read-safe command */
+ if (test_bit(cmd[0], filter->read_ok))
+ return 0;
+
+ /* Write-safe commands require a writable open */
+ if (test_bit(cmd[0], filter->write_ok) && (*f_mode & FMODE_WRITE))
+ return 0;
+
+ return -EPERM;
+}
+EXPORT_SYMBOL(blk_cmd_filter_verify_command);
+
+int blk_verify_command(struct file *file, unsigned char *cmd)
+{
+ struct gendisk *disk;
+ struct inode *inode;
+
+ if (!file)
+ return -EINVAL;
+
+ inode = file->f_dentry->d_inode;
+ if (!inode)
+ return -EINVAL;
+
+ disk = inode->i_bdev->bd_disk;
+
+ return blk_cmd_filter_verify_command(&disk->cmd_filter,
+ cmd, &file->f_mode);
+}
+EXPORT_SYMBOL(blk_verify_command);
+
+/* and now, the sysfs stuff */
+static ssize_t rcf_cmds_show(struct blk_scsi_cmd_filter *filter, char *page,
+ int rw)
+{
+ char *npage = page;
+ unsigned long *okbits;
+ int i;
+
+ if (rw == READ)
+ okbits = filter->read_ok;
+ else
+ okbits = filter->write_ok;
+
+ for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) {
+ if (test_bit(i, okbits)) {
+ sprintf(npage, "%02x", i);
+ npage += 2;
+ if (i < BLK_SCSI_MAX_CMDS - 1)
+ sprintf(npage++, " ");
+ }
+ }
+
+ if (npage != page)
+ npage += sprintf(npage, "\n");
+
+ return npage - page;
+}
+
+static ssize_t rcf_readcmds_show(struct blk_scsi_cmd_filter *filter,
char *page)
+{
+ return rcf_cmds_show(filter, page, READ);
+}
+
+static ssize_t rcf_writecmds_show(struct blk_scsi_cmd_filter *filter,
+ char *page)
+{
+ return rcf_cmds_show(filter, page, WRITE);
+}
+
+static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count, int rw)
+{
+ ssize_t ret = 0;
+ unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
+ int cmd, status, len;
+ substring_t ss;
+
+ memset(&okbits, 0, sizeof(okbits));
+
+ for (len = strlen(page); len > 0; len -= 3) {
+ if (len < 2)
+ break;
+ ss.from = (char *) page + ret;
+ ss.to = (char *) page + ret + 2;
+ ret += 3;
+ status = match_hex(&ss, &cmd);
+ /* either of these cases means invalid input, so do nothing. */
+ if (status || cmd >= BLK_SCSI_MAX_CMDS)
+ return -EINVAL;
+
+ __set_bit(cmd, okbits);
+ }
+
+ if (rw == READ)
+ target_okbits = filter->read_ok;
+ else
+ target_okbits = filter->write_ok;
+
+ memmove(target_okbits, okbits, sizeof(okbits));
+ return count;
+}
+
+static ssize_t rcf_readcmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count)
+{
+ return rcf_cmds_store(filter, page, count, READ);
+}
+
+static ssize_t rcf_writecmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count)
+{
+ return rcf_cmds_store(filter, page, count, WRITE);
+}
+
+struct rcf_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct blk_scsi_cmd_filter *, char *);
+ ssize_t (*store)(struct blk_scsi_cmd_filter *, const char *, size_t);
+};
+
+static struct rcf_sysfs_entry rcf_readcmds_entry = {
+ .attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR },
+ .show = rcf_readcmds_show,
+ .store = rcf_readcmds_store,
+};
+
+static struct rcf_sysfs_entry rcf_writecmds_entry = {
+ .attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR },
+ .show = rcf_writecmds_show,
+ .store = rcf_writecmds_store,
+};
+
+static struct attribute *default_attrs[] = {
+ &rcf_readcmds_entry.attr,
+ &rcf_writecmds_entry.attr,
+ NULL,
+};
+
+#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr)
+
+static ssize_t
+rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct rcf_sysfs_entry *entry = to_rcf(attr);
+ struct blk_scsi_cmd_filter *filter;
+
+ filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+ if (entry->show)
+ return entry->show(filter, page);
+
+ return 0;
+}
+
+static ssize_t
+rcf_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *page, size_t length)
+{
+ struct rcf_sysfs_entry *entry = to_rcf(attr);
+ struct blk_scsi_cmd_filter *filter;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ if (!entry->store)
+ return -EINVAL;
+
+ filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+ return entry->store(filter, page, length);
+}
+
+static struct sysfs_ops rcf_sysfs_ops = {
+ .show = rcf_attr_show,
+ .store = rcf_attr_store,
+};
+
+static struct kobj_type rcf_ktype = {
+ .sysfs_ops = &rcf_sysfs_ops,
+ .default_attrs = default_attrs,
+};
+
+static void rcf_set_defaults(struct blk_scsi_cmd_filter *filter)
+{
+ /* Basic read-only commands */
+ __set_bit(TEST_UNIT_READY, filter->read_ok);
+ __set_bit(REQUEST_SENSE, filter->read_ok);
+ __set_bit(READ_6, filter->read_ok);
+ __set_bit(READ_10, filter->read_ok);
+ __set_bit(READ_12, filter->read_ok);
+ __set_bit(READ_16, filter->read_ok);
+ __set_bit(READ_BUFFER, filter->read_ok);
+ __set_bit(READ_DEFECT_DATA, filter->read_ok);
+ __set_bit(READ_LONG, filter->read_ok);
+ __set_bit(INQUIRY, filter->read_ok);
+ __set_bit(MODE_SENSE, filter->read_ok);
+ __set_bit(MODE_SENSE_10, filter->read_ok);
+ __set_bit(LOG_SENSE, filter->read_ok);
+ __set_bit(START_STOP, filter->read_ok);
+ __set_bit(GPCMD_VERIFY_10, filter->read_ok);
+ __set_bit(VERIFY_16, filter->read_ok);
+ __set_bit(GPCMD_READ_BUFFER_CAPACITY, filter->read_ok);
+
+ /* Audio CD commands */
+ __set_bit(GPCMD_PLAY_CD, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_10, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_MSF, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_TI, filter->read_ok);
+ __set_bit(GPCMD_PAUSE_RESUME, filter->read_ok);
+
+ /* CD/DVD data reading */
+ __set_bit(GPCMD_READ_CD, filter->read_ok);
+ __set_bit(GPCMD_READ_CD_MSF, filter->read_ok);
+ __set_bit(GPCMD_READ_DISC_INFO, filter->read_ok);
+ __set_bit(GPCMD_READ_CDVD_CAPACITY, filter->read_ok);
+ __set_bit(GPCMD_READ_DVD_STRUCTURE, filter->read_ok);
+ __set_bit(GPCMD_READ_HEADER, filter->read_ok);
+ __set_bit(GPCMD_READ_TRACK_RZONE_INFO, filter->read_ok);
+ __set_bit(GPCMD_READ_SUBCHANNEL, filter->read_ok);
+ __set_bit(GPCMD_READ_TOC_PMA_ATIP, filter->read_ok);
+ __set_bit(GPCMD_REPORT_KEY, filter->read_ok);
+ __set_bit(GPCMD_SCAN, filter->read_ok);
+ __set_bit(GPCMD_GET_CONFIGURATION, filter->read_ok);
+ __set_bit(GPCMD_READ_FORMAT_CAPACITIES, filter->read_ok);
+ __set_bit(GPCMD_GET_EVENT_STATUS_NOTIFICATION, filter->read_ok);
+ __set_bit(GPCMD_GET_PERFORMANCE, filter->read_ok);
+ __set_bit(GPCMD_SEEK, filter->read_ok);
+ __set_bit(GPCMD_STOP_PLAY_SCAN, filter->read_ok);
+
+ /* Basic writing commands */
+ __set_bit(WRITE_6, filter->write_ok);
+ __set_bit(WRITE_10, filter->write_ok);
+ __set_bit(WRITE_VERIFY, filter->write_ok);
+ __set_bit(WRITE_12, filter->write_ok);
+ __set_bit(WRITE_VERIFY_12, filter->write_ok);
+ __set_bit(WRITE_16, filter->write_ok);
+ __set_bit(WRITE_LONG, filter->write_ok);
+ __set_bit(WRITE_LONG_2, filter->write_ok);
+ __set_bit(ERASE, filter->write_ok);
+ __set_bit(GPCMD_MODE_SELECT_10, filter->write_ok);
+ __set_bit(MODE_SELECT, filter->write_ok);
+ __set_bit(LOG_SELECT, filter->write_ok);
+ __set_bit(GPCMD_BLANK, filter->write_ok);
+ __set_bit(GPCMD_CLOSE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_FLUSH_CACHE, filter->write_ok);
+ __set_bit(GPCMD_FORMAT_UNIT, filter->write_ok);
+ __set_bit(GPCMD_REPAIR_RZONE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_RESERVE_RZONE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_SEND_DVD_STRUCTURE, filter->write_ok);
+ __set_bit(GPCMD_SEND_EVENT, filter->write_ok);
+ __set_bit(GPCMD_SEND_KEY, filter->write_ok);
+ __set_bit(GPCMD_SEND_OPC, filter->write_ok);
+ __set_bit(GPCMD_SEND_CUE_SHEET, filter->write_ok);
+ __set_bit(GPCMD_SET_SPEED, filter->write_ok);
+ __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
+ __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
+ __set_bit(GPCMD_SET_STREAMING, filter->write_ok);
+}
+
+int blk_register_filter(struct gendisk *disk)
+{
+ int ret;
+ struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+ struct kobject *parent = kobject_get(disk->holder_dir->parent);
+
+ if (!parent)
+ return -ENODEV;
+
+ ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent,
+ "%s", "cmd_filter");
+
+ if (ret < 0)
+ return ret;
+
+ rcf_set_defaults(filter);
+ return 0;
+}
+
+void blk_unregister_filter(struct gendisk *disk)
+{
+ struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+
+ kobject_put(&filter->kobj);
+ kobject_put(disk->holder_dir->parent);
+}
+
diff --git a/block/genhd.c b/block/genhd.c
index b922d48..6f45845 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -189,6 +189,7 @@ void add_disk(struct gendisk *disk)
disk->minors, NULL, exact_match, exact_lock, disk);
register_disk(disk);
blk_register_queue(disk);
+ blk_register_filter(disk);
bdi = &disk->queue->backing_dev_info;
bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor));
@@ -200,6 +201,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
void unlink_gendisk(struct gendisk *disk)
{
+ blk_unregister_filter(disk);
sysfs_remove_link(&disk->dev.kobj, "bdi");
bdi_unregister(&disk->queue->backing_dev_info);
blk_unregister_queue(disk);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 78199c0..c5b9bcf 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -105,120 +105,12 @@ static int sg_emulated_host(struct
request_queue *q, int __user *p)
return put_user(1, p);
}
-#define CMD_READ_SAFE 0x01
-#define CMD_WRITE_SAFE 0x02
-#define CMD_WARNED 0x04
-#define safe_for_read(cmd) [cmd] = CMD_READ_SAFE
-#define safe_for_write(cmd) [cmd] = CMD_WRITE_SAFE
-
-int blk_verify_command(unsigned char *cmd, int has_write_perm)
-{
- static unsigned char cmd_type[256] = {
-
- /* Basic read-only commands */
- safe_for_read(TEST_UNIT_READY),
- safe_for_read(REQUEST_SENSE),
- safe_for_read(READ_6),
- safe_for_read(READ_10),
- safe_for_read(READ_12),
- safe_for_read(READ_16),
- safe_for_read(READ_BUFFER),
- safe_for_read(READ_DEFECT_DATA),
- safe_for_read(READ_LONG),
- safe_for_read(INQUIRY),
- safe_for_read(MODE_SENSE),
- safe_for_read(MODE_SENSE_10),
- safe_for_read(LOG_SENSE),
- safe_for_read(START_STOP),
- safe_for_read(GPCMD_VERIFY_10),
- safe_for_read(VERIFY_16),
-
- /* Audio CD commands */
- safe_for_read(GPCMD_PLAY_CD),
- safe_for_read(GPCMD_PLAY_AUDIO_10),
- safe_for_read(GPCMD_PLAY_AUDIO_MSF),
- safe_for_read(GPCMD_PLAY_AUDIO_TI),
- safe_for_read(GPCMD_PAUSE_RESUME),
-
- /* CD/DVD data reading */
- safe_for_read(GPCMD_READ_BUFFER_CAPACITY),
- safe_for_read(GPCMD_READ_CD),
- safe_for_read(GPCMD_READ_CD_MSF),
- safe_for_read(GPCMD_READ_DISC_INFO),
- safe_for_read(GPCMD_READ_CDVD_CAPACITY),
- safe_for_read(GPCMD_READ_DVD_STRUCTURE),
- safe_for_read(GPCMD_READ_HEADER),
- safe_for_read(GPCMD_READ_TRACK_RZONE_INFO),
- safe_for_read(GPCMD_READ_SUBCHANNEL),
- safe_for_read(GPCMD_READ_TOC_PMA_ATIP),
- safe_for_read(GPCMD_REPORT_KEY),
- safe_for_read(GPCMD_SCAN),
- safe_for_read(GPCMD_GET_CONFIGURATION),
- safe_for_read(GPCMD_READ_FORMAT_CAPACITIES),
- safe_for_read(GPCMD_GET_EVENT_STATUS_NOTIFICATION),
- safe_for_read(GPCMD_GET_PERFORMANCE),
- safe_for_read(GPCMD_SEEK),
- safe_for_read(GPCMD_STOP_PLAY_SCAN),
-
- /* Basic writing commands */
- safe_for_write(WRITE_6),
- safe_for_write(WRITE_10),
- safe_for_write(WRITE_VERIFY),
- safe_for_write(WRITE_12),
- safe_for_write(WRITE_VERIFY_12),
- safe_for_write(WRITE_16),
- safe_for_write(WRITE_LONG),
- safe_for_write(WRITE_LONG_2),
- safe_for_write(ERASE),
- safe_for_write(GPCMD_MODE_SELECT_10),
- safe_for_write(MODE_SELECT),
- safe_for_write(LOG_SELECT),
- safe_for_write(GPCMD_BLANK),
- safe_for_write(GPCMD_CLOSE_TRACK),
- safe_for_write(GPCMD_FLUSH_CACHE),
- safe_for_write(GPCMD_FORMAT_UNIT),
- safe_for_write(GPCMD_REPAIR_RZONE_TRACK),
- safe_for_write(GPCMD_RESERVE_RZONE_TRACK),
- safe_for_write(GPCMD_SEND_DVD_STRUCTURE),
- safe_for_write(GPCMD_SEND_EVENT),
- safe_for_write(GPCMD_SEND_KEY),
- safe_for_write(GPCMD_SEND_OPC),
- safe_for_write(GPCMD_SEND_CUE_SHEET),
- safe_for_write(GPCMD_SET_SPEED),
- safe_for_write(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL),
- safe_for_write(GPCMD_LOAD_UNLOAD),
- safe_for_write(GPCMD_SET_STREAMING),
- };
- unsigned char type = cmd_type[cmd[0]];
-
- /* Anybody who can open the device can do a read-safe command */
- if (type & CMD_READ_SAFE)
- return 0;
-
- /* Write-safe commands just require a writable open.. */
- if ((type & CMD_WRITE_SAFE) && has_write_perm)
- return 0;
-
- /* And root can do any command.. */
- if (capable(CAP_SYS_RAWIO))
- return 0;
-
- if (!type) {
- cmd_type[cmd[0]] = CMD_WARNED;
- printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
- }
-
- /* Otherwise fail it with an "Operation not permitted" */
- return -EPERM;
-}
-EXPORT_SYMBOL_GPL(blk_verify_command);
-
static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
- struct sg_io_hdr *hdr, int has_write_perm)
+ struct sg_io_hdr *hdr, struct file *file)
{
if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
return -EFAULT;
- if (blk_verify_command(rq->cmd, has_write_perm))
+ if (blk_verify_command(file, rq->cmd))
return -EPERM;
/*
@@ -287,7 +179,7 @@ static int sg_io(struct file *file, struct request_queue *q,
struct gendisk *bd_disk, struct sg_io_hdr *hdr)
{
unsigned long start_time;
- int writing = 0, ret = 0, has_write_perm = 0;
+ int writing = 0, ret = 0;
struct request *rq;
char sense[SCSI_SENSE_BUFFERSIZE];
struct bio *bio;
@@ -316,10 +208,7 @@ static int sg_io(struct file *file, struct
request_queue *q,
if (!rq)
return -ENOMEM;
- if (file)
- has_write_perm = file->f_mode & FMODE_WRITE;
-
- if (blk_fill_sghdr_rq(q, rq, hdr, has_write_perm)) {
+ if (blk_fill_sghdr_rq(q, rq, hdr, file)) {
blk_put_request(rq);
return -EFAULT;
}
@@ -451,7 +340,7 @@ int sg_scsi_ioctl(struct file *file, struct
request_queue *q,
if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
goto error;
- err = blk_verify_command(rq->cmd, file->f_mode & FMODE_WRITE);
+ err = blk_verify_command(file, rq->cmd);
if (err)
goto error;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d2a1b71..b4e539b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -676,7 +676,6 @@ extern int blk_execute_rq(struct request_queue *,
struct gendisk *,
struct request *, int);
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
struct request *, int, rq_end_io_fn *);
-extern int blk_verify_command(unsigned char *, int);
extern void blk_unplug(struct request_queue *q);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
@@ -802,6 +801,15 @@ static inline struct request
*blk_map_queue_find_tag(struct blk_queue_tag *bqt,
extern int blkdev_issue_flush(struct block_device *, sector_t *);
+/*
+* command filter functions
+*/
+extern int blk_verify_command(struct file *file, unsigned char *cmd);
+extern int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
+ unsigned char *cmd, mode_t *f_mode);
+extern int blk_register_filter(struct gendisk *disk);
+extern void blk_unregister_filter(struct gendisk *disk);
+
#define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128
#define SAFE_MAX_SECTORS 255
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index ae7aec3..ab49a77 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -110,6 +110,14 @@ struct hd_struct {
#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
#define GENHD_FL_FAIL 64
+#define BLK_SCSI_MAX_CMDS (256)
+#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
+
+struct blk_scsi_cmd_filter {
+ unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
+ unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
+ struct kobject kobj;
+};
struct gendisk {
int major; /* major number of driver */
@@ -120,6 +128,7 @@ struct gendisk {
struct hd_struct **part; /* [indexed by minor] */
struct block_device_operations *fops;
struct request_queue *queue;
+ struct blk_scsi_cmd_filter cmd_filter;
void *private_data;
sector_t capacity;
^ permalink raw reply related [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC] allow userspace to modify scsi command filter on per device basis
2008-06-13 19:54 ` Matthew Wilcox
2008-06-13 20:22 ` Adel Gadllah
@ 2008-06-14 20:26 ` Jens Axboe
1 sibling, 0 replies; 65+ messages in thread
From: Jens Axboe @ 2008-06-14 20:26 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: Adel Gadllah, linux-scsi, pjones
On Fri, Jun 13 2008, Matthew Wilcox wrote:
> On Fri, Jun 13, 2008 at 09:33:27PM +0200, Adel Gadllah wrote:
> > - if (blk_verify_command(rq->cmd, has_write_perm))
> > + if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd, bd->f_mode))
>
> Could you wrap to 80 columns?
>
> > +static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
> > + const char *page, size_t count, int rw)
> > +{
> > + ssize_t ret = 0;
> > + unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
> > + int cmd, status, len;
> > + substring_t ss;
> > +
> > + memset(&okbits, 0, sizeof (okbits));
> > +
> > + for (len = strlen(page); len > 0; len -= 3) {
> > + if (len < 2)
> > + break;
> > + ss.from = (char *) page + ret;
> > + ss.to = (char *) page + ret + 2;
> > + ret+=3;
> > + status = match_hex(&ss, &cmd);
> > + /* either of these cases means invalid input, so do nothing. */
> > + if (status || cmd >= BLK_SCSI_MAX_CMDS)
> > + return -EINVAL;
> > +
> > + set_bit(cmd, okbits);
>
> set_bit is atomic. locked ops can be quite painful on some processors.
> Since okbits is local, the atomicity isn't necessary and you can simply
> use __set_bit.
Which that is strictly true, this is only invoked when someone sets a
new filter for the device. So probably once in the life time of that
kernel boot, hardly performance critical...
--
Jens Axboe
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v2] allow userspace to modify scsi command filter on per device basis
2008-06-14 6:51 ` [PATCH/RFC v2] " Adel Gadllah
@ 2008-06-16 2:55 ` FUJITA Tomonori
2008-06-16 5:49 ` Adel Gadllah
0 siblings, 1 reply; 65+ messages in thread
From: FUJITA Tomonori @ 2008-06-16 2:55 UTC (permalink / raw)
To: adel.gadllah; +Cc: matthew, linux-scsi, pjones, jens.axboe
On Sat, 14 Jun 2008 08:51:16 +0200
"Adel Gadllah" <adel.gadllah@gmail.com> wrote:
> Attached a new version of the patch.
> Moved the sysfs entry up one level (to not be in the "holders"
> directory) and renamed it t cmd_filter (reflects its purpose better
> than "filter").
>
> -----------------------------
>
> From: Peter Jones <pjones@redhat.com>
>
> This patch exports the per-gendisk command filter to user space through
> sysfs, so it can be changed by the system administrator.
>
> Signed-off-by: Adel Gadllah <adel.gadllah@gmail.com>
> Signed-off-by: Peter Jones <pjones@redhat.com>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
I think that the scsi command filter infrastructure should work with
all the interfaces, bsg, SG_IO, and sg. This doesn't work with sg,
right?
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v2] allow userspace to modify scsi command filter on per device basis
2008-06-16 2:55 ` FUJITA Tomonori
@ 2008-06-16 5:49 ` Adel Gadllah
2008-06-16 6:13 ` FUJITA Tomonori
0 siblings, 1 reply; 65+ messages in thread
From: Adel Gadllah @ 2008-06-16 5:49 UTC (permalink / raw)
To: FUJITA Tomonori; +Cc: matthew, linux-scsi, pjones, jens.axboe
2008/6/16 FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>:
> On Sat, 14 Jun 2008 08:51:16 +0200
> "Adel Gadllah" <adel.gadllah@gmail.com> wrote:
>
>> Attached a new version of the patch.
>> Moved the sysfs entry up one level (to not be in the "holders"
>> directory) and renamed it t cmd_filter (reflects its purpose better
>> than "filter").
>>
>> -----------------------------
>>
>> From: Peter Jones <pjones@redhat.com>
>>
>> This patch exports the per-gendisk command filter to user space through
>> sysfs, so it can be changed by the system administrator.
>>
>> Signed-off-by: Adel Gadllah <adel.gadllah@gmail.com>
>> Signed-off-by: Peter Jones <pjones@redhat.com>
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>
> I think that the scsi command filter infrastructure should work with
> all the interfaces, bsg, SG_IO, and sg. This doesn't work with sg,
> right?
>
sg never used any command filtering. (I have converted all users of
the old command filter to the new one).
It allowed any scsi command without checking but normally /dev/sg* is
only accessable by root anyway.
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v2] allow userspace to modify scsi command filter on per device basis
2008-06-16 5:49 ` Adel Gadllah
@ 2008-06-16 6:13 ` FUJITA Tomonori
2008-06-16 9:22 ` [PATCH/RFC v3] " Adel Gadllah
0 siblings, 1 reply; 65+ messages in thread
From: FUJITA Tomonori @ 2008-06-16 6:13 UTC (permalink / raw)
To: adel.gadllah; +Cc: fujita.tomonori, matthew, linux-scsi, pjones, jens.axboe
On Mon, 16 Jun 2008 07:49:49 +0200
"Adel Gadllah" <adel.gadllah@gmail.com> wrote:
> 2008/6/16 FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>:
> > On Sat, 14 Jun 2008 08:51:16 +0200
> > "Adel Gadllah" <adel.gadllah@gmail.com> wrote:
> >
> >> Attached a new version of the patch.
> >> Moved the sysfs entry up one level (to not be in the "holders"
> >> directory) and renamed it t cmd_filter (reflects its purpose better
> >> than "filter").
> >>
> >> -----------------------------
> >>
> >> From: Peter Jones <pjones@redhat.com>
> >>
> >> This patch exports the per-gendisk command filter to user space through
> >> sysfs, so it can be changed by the system administrator.
> >>
> >> Signed-off-by: Adel Gadllah <adel.gadllah@gmail.com>
> >> Signed-off-by: Peter Jones <pjones@redhat.com>
> >> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> >
> > I think that the scsi command filter infrastructure should work with
> > all the interfaces, bsg, SG_IO, and sg. This doesn't work with sg,
> > right?
> >
>
> sg never used any command filtering. (I have converted all users of
> the old command filter to the new one).
No, sg has own command filtering mechanism, see sg_allow_access().
When we discussed the per-gendisk command filter before, I think that
we agreed that we had better to have one command filtering mechanism.
> It allowed any scsi command without checking but normally /dev/sg* is
> only accessable by root anyway.
^ permalink raw reply [flat|nested] 65+ messages in thread
* [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-16 6:13 ` FUJITA Tomonori
@ 2008-06-16 9:22 ` Adel Gadllah
2008-06-17 20:14 ` FUJITA Tomonori
0 siblings, 1 reply; 65+ messages in thread
From: Adel Gadllah @ 2008-06-16 9:22 UTC (permalink / raw)
To: FUJITA Tomonori; +Cc: matthew, linux-scsi, pjones, jens.axboe, dgilbert
2008/6/16 FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>:
> No, sg has own command filtering mechanism, see sg_allow_access().
>
> When we discussed the per-gendisk command filter before, I think that
> we agreed that we had better to have one command filtering mechanism.
The attached patch converts sg to use the cmd_filter too.
The sg driver seems to verify commands for read access only. I have
not changed this behaviour to avoid breaking things (userspace),
but if we want to change this I can submit another patch.
--------------------------------
From: Peter Jones <pjones@redhat.com>
This patch exports the per-gendisk command filter to user space through
sysfs, so it can be changed by the system administrator.
Signed-off-by: Adel Gadllah <adel.gadllah@gmail.com>
Signed-off-by: Peter Jones <pjones@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/block/Makefile b/block/Makefile
index 5a43c7d..717733c 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -4,7 +4,8 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
- blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o
+ blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
+ cmd-filter.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/bsg.c b/block/bsg.c
index f0b7cd3..439940c 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -44,11 +44,12 @@ struct bsg_device {
char name[BUS_ID_SIZE];
int max_queue;
unsigned long flags;
+ struct blk_scsi_cmd_filter *cmd_filter;
+ mode_t *f_mode;
};
enum {
BSG_F_BLOCK = 1,
- BSG_F_WRITE_PERM = 2,
};
#define BSG_DEFAULT_CMDS 64
@@ -172,7 +173,7 @@ unlock:
}
static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
- struct sg_io_v4 *hdr, int has_write_perm)
+ struct sg_io_v4 *hdr, struct bsg_device *bd)
{
if (hdr->request_len > BLK_MAX_CDB) {
rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
@@ -185,7 +186,8 @@ static int blk_fill_sgv4_hdr_rq(struct
request_queue *q, struct request *rq,
return -EFAULT;
if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
- if (blk_verify_command(rq->cmd, has_write_perm))
+ if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd,
+ bd->f_mode))
return -EPERM;
} else if (!capable(CAP_SYS_RAWIO))
return -EPERM;
@@ -263,8 +265,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
rq = blk_get_request(q, rw, GFP_KERNEL);
if (!rq)
return ERR_PTR(-ENOMEM);
- ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
- &bd->flags));
+ ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd);
if (ret)
goto out;
@@ -566,12 +567,23 @@ static inline void bsg_set_block(struct
bsg_device *bd, struct file *file)
set_bit(BSG_F_BLOCK, &bd->flags);
}
-static inline void bsg_set_write_perm(struct bsg_device *bd, struct file *file)
+static void bsg_set_cmd_filter(struct bsg_device *bd,
+ struct file *file)
{
- if (file->f_mode & FMODE_WRITE)
- set_bit(BSG_F_WRITE_PERM, &bd->flags);
- else
- clear_bit(BSG_F_WRITE_PERM, &bd->flags);
+ struct inode *inode;
+ struct gendisk *disk;
+
+ if (!file)
+ return;
+
+ inode = file->f_dentry->d_inode;
+ if (!inode)
+ return;
+
+ disk = inode->i_bdev->bd_disk;
+
+ bd->cmd_filter = &disk->cmd_filter;
+ bd->f_mode = &file->f_mode;
}
/*
@@ -595,6 +607,8 @@ bsg_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
dprintk("%s: read %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
+ bsg_set_cmd_filter(bd, file);
+
bytes_read = 0;
ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
*ppos = bytes_read;
@@ -668,7 +682,7 @@ bsg_write(struct file *file, const char __user
*buf, size_t count, loff_t *ppos)
dprintk("%s: write %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
- bsg_set_write_perm(bd, file);
+ bsg_set_cmd_filter(bd, file);
bytes_written = 0;
ret = __bsg_write(bd, buf, count, &bytes_written);
@@ -771,7 +785,9 @@ static struct bsg_device *bsg_add_device(struct
inode *inode,
}
bd->queue = rq;
+
bsg_set_block(bd, file);
+ bsg_set_cmd_filter(bd, file);
atomic_set(&bd->ref_count, 1);
mutex_lock(&bsg_mutex);
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
new file mode 100644
index 0000000..2121453
--- /dev/null
+++ b/block/cmd-filter.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright 2004 Peter M. Jones <pjones@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ *
+ */
+
+#include <linux/list.h>
+#include <linux/genhd.h>
+#include <linux/spinlock.h>
+#include <linux/parser.h>
+#include <linux/capability.h>
+#include <linux/bitops.h>
+
+#include <scsi/scsi.h>
+#include <linux/cdrom.h>
+
+int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
+ unsigned char *cmd, mode_t *f_mode)
+{
+ /* root can do any command. */
+ if (capable(CAP_SYS_RAWIO))
+ return 0;
+
+ /* if there's no filter set, assume we're filtering everything out */
+ if (!filter)
+ return -EPERM;
+
+ /* Anybody who can open the device can do a read-safe command */
+ if (test_bit(cmd[0], filter->read_ok))
+ return 0;
+
+ /* Write-safe commands require a writable open */
+ if (test_bit(cmd[0], filter->write_ok) && (*f_mode & FMODE_WRITE))
+ return 0;
+
+ return -EPERM;
+}
+EXPORT_SYMBOL(blk_cmd_filter_verify_command);
+
+int blk_verify_command(struct file *file, unsigned char *cmd)
+{
+ struct gendisk *disk;
+ struct inode *inode;
+
+ if (!file)
+ return -EINVAL;
+
+ inode = file->f_dentry->d_inode;
+ if (!inode)
+ return -EINVAL;
+
+ disk = inode->i_bdev->bd_disk;
+
+ return blk_cmd_filter_verify_command(&disk->cmd_filter,
+ cmd, &file->f_mode);
+}
+EXPORT_SYMBOL(blk_verify_command);
+
+/* and now, the sysfs stuff */
+static ssize_t rcf_cmds_show(struct blk_scsi_cmd_filter *filter, char *page,
+ int rw)
+{
+ char *npage = page;
+ unsigned long *okbits;
+ int i;
+
+ if (rw == READ)
+ okbits = filter->read_ok;
+ else
+ okbits = filter->write_ok;
+
+ for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) {
+ if (test_bit(i, okbits)) {
+ sprintf(npage, "%02x", i);
+ npage += 2;
+ if (i < BLK_SCSI_MAX_CMDS - 1)
+ sprintf(npage++, " ");
+ }
+ }
+
+ if (npage != page)
+ npage += sprintf(npage, "\n");
+
+ return npage - page;
+}
+
+static ssize_t rcf_readcmds_show(struct blk_scsi_cmd_filter *filter,
char *page)
+{
+ return rcf_cmds_show(filter, page, READ);
+}
+
+static ssize_t rcf_writecmds_show(struct blk_scsi_cmd_filter *filter,
+ char *page)
+{
+ return rcf_cmds_show(filter, page, WRITE);
+}
+
+static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count, int rw)
+{
+ ssize_t ret = 0;
+ unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
+ int cmd, status, len;
+ substring_t ss;
+
+ memset(&okbits, 0, sizeof(okbits));
+
+ for (len = strlen(page); len > 0; len -= 3) {
+ if (len < 2)
+ break;
+ ss.from = (char *) page + ret;
+ ss.to = (char *) page + ret + 2;
+ ret += 3;
+ status = match_hex(&ss, &cmd);
+ /* either of these cases means invalid input, so do nothing. */
+ if (status || cmd >= BLK_SCSI_MAX_CMDS)
+ return -EINVAL;
+
+ __set_bit(cmd, okbits);
+ }
+
+ if (rw == READ)
+ target_okbits = filter->read_ok;
+ else
+ target_okbits = filter->write_ok;
+
+ memmove(target_okbits, okbits, sizeof(okbits));
+ return count;
+}
+
+static ssize_t rcf_readcmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count)
+{
+ return rcf_cmds_store(filter, page, count, READ);
+}
+
+static ssize_t rcf_writecmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count)
+{
+ return rcf_cmds_store(filter, page, count, WRITE);
+}
+
+struct rcf_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct blk_scsi_cmd_filter *, char *);
+ ssize_t (*store)(struct blk_scsi_cmd_filter *, const char *, size_t);
+};
+
+static struct rcf_sysfs_entry rcf_readcmds_entry = {
+ .attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR },
+ .show = rcf_readcmds_show,
+ .store = rcf_readcmds_store,
+};
+
+static struct rcf_sysfs_entry rcf_writecmds_entry = {
+ .attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR },
+ .show = rcf_writecmds_show,
+ .store = rcf_writecmds_store,
+};
+
+static struct attribute *default_attrs[] = {
+ &rcf_readcmds_entry.attr,
+ &rcf_writecmds_entry.attr,
+ NULL,
+};
+
+#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr)
+
+static ssize_t
+rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct rcf_sysfs_entry *entry = to_rcf(attr);
+ struct blk_scsi_cmd_filter *filter;
+
+ filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+ if (entry->show)
+ return entry->show(filter, page);
+
+ return 0;
+}
+
+static ssize_t
+rcf_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *page, size_t length)
+{
+ struct rcf_sysfs_entry *entry = to_rcf(attr);
+ struct blk_scsi_cmd_filter *filter;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ if (!entry->store)
+ return -EINVAL;
+
+ filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+ return entry->store(filter, page, length);
+}
+
+static struct sysfs_ops rcf_sysfs_ops = {
+ .show = rcf_attr_show,
+ .store = rcf_attr_store,
+};
+
+static struct kobj_type rcf_ktype = {
+ .sysfs_ops = &rcf_sysfs_ops,
+ .default_attrs = default_attrs,
+};
+
+static void rcf_set_defaults(struct blk_scsi_cmd_filter *filter)
+{
+ /* Basic read-only commands */
+ __set_bit(TEST_UNIT_READY, filter->read_ok);
+ __set_bit(REQUEST_SENSE, filter->read_ok);
+ __set_bit(READ_6, filter->read_ok);
+ __set_bit(READ_10, filter->read_ok);
+ __set_bit(READ_12, filter->read_ok);
+ __set_bit(READ_16, filter->read_ok);
+ __set_bit(READ_BUFFER, filter->read_ok);
+ __set_bit(READ_DEFECT_DATA, filter->read_ok);
+ __set_bit(READ_LONG, filter->read_ok);
+ __set_bit(INQUIRY, filter->read_ok);
+ __set_bit(MODE_SENSE, filter->read_ok);
+ __set_bit(MODE_SENSE_10, filter->read_ok);
+ __set_bit(LOG_SENSE, filter->read_ok);
+ __set_bit(START_STOP, filter->read_ok);
+ __set_bit(GPCMD_VERIFY_10, filter->read_ok);
+ __set_bit(VERIFY_16, filter->read_ok);
+ __set_bit(GPCMD_READ_BUFFER_CAPACITY, filter->read_ok);
+
+ /* Audio CD commands */
+ __set_bit(GPCMD_PLAY_CD, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_10, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_MSF, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_TI, filter->read_ok);
+ __set_bit(GPCMD_PAUSE_RESUME, filter->read_ok);
+
+ /* CD/DVD data reading */
+ __set_bit(GPCMD_READ_CD, filter->read_ok);
+ __set_bit(GPCMD_READ_CD_MSF, filter->read_ok);
+ __set_bit(GPCMD_READ_DISC_INFO, filter->read_ok);
+ __set_bit(GPCMD_READ_CDVD_CAPACITY, filter->read_ok);
+ __set_bit(GPCMD_READ_DVD_STRUCTURE, filter->read_ok);
+ __set_bit(GPCMD_READ_HEADER, filter->read_ok);
+ __set_bit(GPCMD_READ_TRACK_RZONE_INFO, filter->read_ok);
+ __set_bit(GPCMD_READ_SUBCHANNEL, filter->read_ok);
+ __set_bit(GPCMD_READ_TOC_PMA_ATIP, filter->read_ok);
+ __set_bit(GPCMD_REPORT_KEY, filter->read_ok);
+ __set_bit(GPCMD_SCAN, filter->read_ok);
+ __set_bit(GPCMD_GET_CONFIGURATION, filter->read_ok);
+ __set_bit(GPCMD_READ_FORMAT_CAPACITIES, filter->read_ok);
+ __set_bit(GPCMD_GET_EVENT_STATUS_NOTIFICATION, filter->read_ok);
+ __set_bit(GPCMD_GET_PERFORMANCE, filter->read_ok);
+ __set_bit(GPCMD_SEEK, filter->read_ok);
+ __set_bit(GPCMD_STOP_PLAY_SCAN, filter->read_ok);
+
+ /* Basic writing commands */
+ __set_bit(WRITE_6, filter->write_ok);
+ __set_bit(WRITE_10, filter->write_ok);
+ __set_bit(WRITE_VERIFY, filter->write_ok);
+ __set_bit(WRITE_12, filter->write_ok);
+ __set_bit(WRITE_VERIFY_12, filter->write_ok);
+ __set_bit(WRITE_16, filter->write_ok);
+ __set_bit(WRITE_LONG, filter->write_ok);
+ __set_bit(WRITE_LONG_2, filter->write_ok);
+ __set_bit(ERASE, filter->write_ok);
+ __set_bit(GPCMD_MODE_SELECT_10, filter->write_ok);
+ __set_bit(MODE_SELECT, filter->write_ok);
+ __set_bit(LOG_SELECT, filter->write_ok);
+ __set_bit(GPCMD_BLANK, filter->write_ok);
+ __set_bit(GPCMD_CLOSE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_FLUSH_CACHE, filter->write_ok);
+ __set_bit(GPCMD_FORMAT_UNIT, filter->write_ok);
+ __set_bit(GPCMD_REPAIR_RZONE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_RESERVE_RZONE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_SEND_DVD_STRUCTURE, filter->write_ok);
+ __set_bit(GPCMD_SEND_EVENT, filter->write_ok);
+ __set_bit(GPCMD_SEND_KEY, filter->write_ok);
+ __set_bit(GPCMD_SEND_OPC, filter->write_ok);
+ __set_bit(GPCMD_SEND_CUE_SHEET, filter->write_ok);
+ __set_bit(GPCMD_SET_SPEED, filter->write_ok);
+ __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
+ __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
+ __set_bit(GPCMD_SET_STREAMING, filter->write_ok);
+}
+
+int blk_register_filter(struct gendisk *disk)
+{
+ int ret;
+ struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+ struct kobject *parent = kobject_get(disk->holder_dir->parent);
+
+ if (!parent)
+ return -ENODEV;
+
+ ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent,
+ "%s", "cmd_filter");
+
+ if (ret < 0)
+ return ret;
+
+ rcf_set_defaults(filter);
+ return 0;
+}
+
+void blk_unregister_filter(struct gendisk *disk)
+{
+ struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+
+ kobject_put(&filter->kobj);
+ kobject_put(disk->holder_dir->parent);
+}
+
diff --git a/block/genhd.c b/block/genhd.c
index b922d48..6f45845 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -189,6 +189,7 @@ void add_disk(struct gendisk *disk)
disk->minors, NULL, exact_match, exact_lock, disk);
register_disk(disk);
blk_register_queue(disk);
+ blk_register_filter(disk);
bdi = &disk->queue->backing_dev_info;
bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor));
@@ -200,6 +201,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
void unlink_gendisk(struct gendisk *disk)
{
+ blk_unregister_filter(disk);
sysfs_remove_link(&disk->dev.kobj, "bdi");
bdi_unregister(&disk->queue->backing_dev_info);
blk_unregister_queue(disk);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 78199c0..c5b9bcf 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -105,120 +105,12 @@ static int sg_emulated_host(struct
request_queue *q, int __user *p)
return put_user(1, p);
}
-#define CMD_READ_SAFE 0x01
-#define CMD_WRITE_SAFE 0x02
-#define CMD_WARNED 0x04
-#define safe_for_read(cmd) [cmd] = CMD_READ_SAFE
-#define safe_for_write(cmd) [cmd] = CMD_WRITE_SAFE
-
-int blk_verify_command(unsigned char *cmd, int has_write_perm)
-{
- static unsigned char cmd_type[256] = {
-
- /* Basic read-only commands */
- safe_for_read(TEST_UNIT_READY),
- safe_for_read(REQUEST_SENSE),
- safe_for_read(READ_6),
- safe_for_read(READ_10),
- safe_for_read(READ_12),
- safe_for_read(READ_16),
- safe_for_read(READ_BUFFER),
- safe_for_read(READ_DEFECT_DATA),
- safe_for_read(READ_LONG),
- safe_for_read(INQUIRY),
- safe_for_read(MODE_SENSE),
- safe_for_read(MODE_SENSE_10),
- safe_for_read(LOG_SENSE),
- safe_for_read(START_STOP),
- safe_for_read(GPCMD_VERIFY_10),
- safe_for_read(VERIFY_16),
-
- /* Audio CD commands */
- safe_for_read(GPCMD_PLAY_CD),
- safe_for_read(GPCMD_PLAY_AUDIO_10),
- safe_for_read(GPCMD_PLAY_AUDIO_MSF),
- safe_for_read(GPCMD_PLAY_AUDIO_TI),
- safe_for_read(GPCMD_PAUSE_RESUME),
-
- /* CD/DVD data reading */
- safe_for_read(GPCMD_READ_BUFFER_CAPACITY),
- safe_for_read(GPCMD_READ_CD),
- safe_for_read(GPCMD_READ_CD_MSF),
- safe_for_read(GPCMD_READ_DISC_INFO),
- safe_for_read(GPCMD_READ_CDVD_CAPACITY),
- safe_for_read(GPCMD_READ_DVD_STRUCTURE),
- safe_for_read(GPCMD_READ_HEADER),
- safe_for_read(GPCMD_READ_TRACK_RZONE_INFO),
- safe_for_read(GPCMD_READ_SUBCHANNEL),
- safe_for_read(GPCMD_READ_TOC_PMA_ATIP),
- safe_for_read(GPCMD_REPORT_KEY),
- safe_for_read(GPCMD_SCAN),
- safe_for_read(GPCMD_GET_CONFIGURATION),
- safe_for_read(GPCMD_READ_FORMAT_CAPACITIES),
- safe_for_read(GPCMD_GET_EVENT_STATUS_NOTIFICATION),
- safe_for_read(GPCMD_GET_PERFORMANCE),
- safe_for_read(GPCMD_SEEK),
- safe_for_read(GPCMD_STOP_PLAY_SCAN),
-
- /* Basic writing commands */
- safe_for_write(WRITE_6),
- safe_for_write(WRITE_10),
- safe_for_write(WRITE_VERIFY),
- safe_for_write(WRITE_12),
- safe_for_write(WRITE_VERIFY_12),
- safe_for_write(WRITE_16),
- safe_for_write(WRITE_LONG),
- safe_for_write(WRITE_LONG_2),
- safe_for_write(ERASE),
- safe_for_write(GPCMD_MODE_SELECT_10),
- safe_for_write(MODE_SELECT),
- safe_for_write(LOG_SELECT),
- safe_for_write(GPCMD_BLANK),
- safe_for_write(GPCMD_CLOSE_TRACK),
- safe_for_write(GPCMD_FLUSH_CACHE),
- safe_for_write(GPCMD_FORMAT_UNIT),
- safe_for_write(GPCMD_REPAIR_RZONE_TRACK),
- safe_for_write(GPCMD_RESERVE_RZONE_TRACK),
- safe_for_write(GPCMD_SEND_DVD_STRUCTURE),
- safe_for_write(GPCMD_SEND_EVENT),
- safe_for_write(GPCMD_SEND_KEY),
- safe_for_write(GPCMD_SEND_OPC),
- safe_for_write(GPCMD_SEND_CUE_SHEET),
- safe_for_write(GPCMD_SET_SPEED),
- safe_for_write(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL),
- safe_for_write(GPCMD_LOAD_UNLOAD),
- safe_for_write(GPCMD_SET_STREAMING),
- };
- unsigned char type = cmd_type[cmd[0]];
-
- /* Anybody who can open the device can do a read-safe command */
- if (type & CMD_READ_SAFE)
- return 0;
-
- /* Write-safe commands just require a writable open.. */
- if ((type & CMD_WRITE_SAFE) && has_write_perm)
- return 0;
-
- /* And root can do any command.. */
- if (capable(CAP_SYS_RAWIO))
- return 0;
-
- if (!type) {
- cmd_type[cmd[0]] = CMD_WARNED;
- printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
- }
-
- /* Otherwise fail it with an "Operation not permitted" */
- return -EPERM;
-}
-EXPORT_SYMBOL_GPL(blk_verify_command);
-
static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
- struct sg_io_hdr *hdr, int has_write_perm)
+ struct sg_io_hdr *hdr, struct file *file)
{
if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
return -EFAULT;
- if (blk_verify_command(rq->cmd, has_write_perm))
+ if (blk_verify_command(file, rq->cmd))
return -EPERM;
/*
@@ -287,7 +179,7 @@ static int sg_io(struct file *file, struct request_queue *q,
struct gendisk *bd_disk, struct sg_io_hdr *hdr)
{
unsigned long start_time;
- int writing = 0, ret = 0, has_write_perm = 0;
+ int writing = 0, ret = 0;
struct request *rq;
char sense[SCSI_SENSE_BUFFERSIZE];
struct bio *bio;
@@ -316,10 +208,7 @@ static int sg_io(struct file *file, struct
request_queue *q,
if (!rq)
return -ENOMEM;
- if (file)
- has_write_perm = file->f_mode & FMODE_WRITE;
-
- if (blk_fill_sghdr_rq(q, rq, hdr, has_write_perm)) {
+ if (blk_fill_sghdr_rq(q, rq, hdr, file)) {
blk_put_request(rq);
return -EFAULT;
}
@@ -451,7 +340,7 @@ int sg_scsi_ioctl(struct file *file, struct
request_queue *q,
if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
goto error;
- err = blk_verify_command(rq->cmd, file->f_mode & FMODE_WRITE);
+ err = blk_verify_command(file, rq->cmd);
if (err)
goto error;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index ea0edd1..f7abcca 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -182,8 +182,9 @@ static int sg_build_sgat(Sg_scatter_hold * schp,
const Sg_fd * sfp,
int tablesize);
static ssize_t sg_new_read(Sg_fd * sfp, char __user *buf, size_t count,
Sg_request * srp);
-static ssize_t sg_new_write(Sg_fd * sfp, const char __user *buf, size_t count,
- int blocking, int read_only, Sg_request ** o_srp);
+static ssize_t sg_new_write(Sg_fd *sfp, struct file *file,
+ const char __user *buf, size_t count, int blocking,
+ int read_only, Sg_request **o_srp);
static int sg_common_write(Sg_fd * sfp, Sg_request * srp,
unsigned char *cmnd, int timeout, int blocking);
static int sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind,
@@ -204,7 +205,6 @@ static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id);
static Sg_request *sg_add_request(Sg_fd * sfp);
static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
static int sg_res_in_use(Sg_fd * sfp);
-static int sg_allow_access(unsigned char opcode, char dev_type);
static int sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len);
static Sg_device *sg_get_dev(int dev);
#ifdef CONFIG_SCSI_PROC_FS
@@ -544,7 +544,7 @@ sg_write(struct file *filp, const char __user
*buf, size_t count, loff_t * ppos)
return -EFAULT;
blocking = !(filp->f_flags & O_NONBLOCK);
if (old_hdr.reply_len < 0)
- return sg_new_write(sfp, buf, count, blocking, 0, NULL);
+ return sg_new_write(sfp, filp, buf, count, blocking, 0, NULL);
if (count < (SZ_SG_HEADER + 6))
return -EIO; /* The minimum scsi command length is 6 bytes. */
@@ -621,8 +621,9 @@ sg_write(struct file *filp, const char __user
*buf, size_t count, loff_t * ppos)
}
static ssize_t
-sg_new_write(Sg_fd * sfp, const char __user *buf, size_t count,
- int blocking, int read_only, Sg_request ** o_srp)
+sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf,
+ size_t count, int blocking, int read_only,
+ Sg_request **o_srp)
{
int k;
Sg_request *srp;
@@ -678,8 +679,7 @@ sg_new_write(Sg_fd * sfp, const char __user *buf,
size_t count,
sg_remove_request(sfp, srp);
return -EFAULT;
}
- if (read_only &&
- (!sg_allow_access(cmnd[0], sfp->parentdp->device->type))) {
+ if (read_only && (!blk_verify_command(file, cmnd))) {
sg_remove_request(sfp, srp);
return -EPERM;
}
@@ -799,7 +799,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
if (!access_ok(VERIFY_WRITE, p, SZ_SG_IO_HDR))
return -EFAULT;
result =
- sg_new_write(sfp, p, SZ_SG_IO_HDR,
+ sg_new_write(sfp, filp, p, SZ_SG_IO_HDR,
blocking, read_only, &srp);
if (result < 0)
return result;
@@ -1048,7 +1048,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
if (copy_from_user(&opcode, siocp->data, 1))
return -EFAULT;
- if (!sg_allow_access(opcode, sdp->device->type))
+ if (!blk_verify_command(filp, &opcode))
return -EPERM;
}
return sg_scsi_ioctl(filp, sdp->device->request_queue, NULL, p);
@@ -2506,26 +2506,6 @@ sg_page_free(struct page *page, int size)
#define MAINTENANCE_IN_CMD 0xa3
#endif
-static unsigned char allow_ops[] = { TEST_UNIT_READY, REQUEST_SENSE,
- INQUIRY, READ_CAPACITY, READ_BUFFER, READ_6, READ_10, READ_12,
- READ_16, MODE_SENSE, MODE_SENSE_10, LOG_SENSE, REPORT_LUNS,
- SERVICE_ACTION_IN, RECEIVE_DIAGNOSTIC, READ_LONG, MAINTENANCE_IN_CMD
-};
-
-static int
-sg_allow_access(unsigned char opcode, char dev_type)
-{
- int k;
-
- if (TYPE_SCANNER == dev_type) /* TYPE_ROM maybe burner */
- return 1;
- for (k = 0; k < sizeof (allow_ops); ++k) {
- if (opcode == allow_ops[k])
- return 1;
- }
- return 0;
-}
-
#ifdef CONFIG_SCSI_PROC_FS
static int
sg_idr_max_id(int id, void *p, void *data)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d2a1b71..b4e539b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -676,7 +676,6 @@ extern int blk_execute_rq(struct request_queue *,
struct gendisk *,
struct request *, int);
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
struct request *, int, rq_end_io_fn *);
-extern int blk_verify_command(unsigned char *, int);
extern void blk_unplug(struct request_queue *q);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
@@ -802,6 +801,15 @@ static inline struct request
*blk_map_queue_find_tag(struct blk_queue_tag *bqt,
extern int blkdev_issue_flush(struct block_device *, sector_t *);
+/*
+* command filter functions
+*/
+extern int blk_verify_command(struct file *file, unsigned char *cmd);
+extern int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
+ unsigned char *cmd, mode_t *f_mode);
+extern int blk_register_filter(struct gendisk *disk);
+extern void blk_unregister_filter(struct gendisk *disk);
+
#define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128
#define SAFE_MAX_SECTORS 255
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index ae7aec3..ab49a77 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -110,6 +110,14 @@ struct hd_struct {
#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
#define GENHD_FL_FAIL 64
+#define BLK_SCSI_MAX_CMDS (256)
+#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
+
+struct blk_scsi_cmd_filter {
+ unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
+ unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
+ struct kobject kobj;
+};
struct gendisk {
int major; /* major number of driver */
@@ -120,6 +128,7 @@ struct gendisk {
struct hd_struct **part; /* [indexed by minor] */
struct block_device_operations *fops;
struct request_queue *queue;
+ struct blk_scsi_cmd_filter cmd_filter;
void *private_data;
sector_t capacity;
^ permalink raw reply related [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-16 9:22 ` [PATCH/RFC v3] " Adel Gadllah
@ 2008-06-17 20:14 ` FUJITA Tomonori
2008-06-17 21:45 ` Peter Jones
0 siblings, 1 reply; 65+ messages in thread
From: FUJITA Tomonori @ 2008-06-17 20:14 UTC (permalink / raw)
To: adel.gadllah
Cc: fujita.tomonori, matthew, linux-scsi, pjones, jens.axboe,
dgilbert
From: "Adel Gadllah" <adel.gadllah@gmail.com>
Subject: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
Date: Mon, 16 Jun 2008 11:22:56 +0200
> 2008/6/16 FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>:
> > No, sg has own command filtering mechanism, see sg_allow_access().
> >
> > When we discussed the per-gendisk command filter before, I think that
> > we agreed that we had better to have one command filtering mechanism.
>
> The attached patch converts sg to use the cmd_filter too.
> The sg driver seems to verify commands for read access only. I have
sg driver lets you perform any command if you have the write
permission.
> not changed this behaviour to avoid breaking things (userspace),
> but if we want to change this I can submit another patch.
Well, this changes sg behaviour since sg's allow_ops filter has a
access permission different from blk_verify_command filter's.
I guess that the first thing you need to do is that figuring out a
proper access permission for each command, which sg maintainer, etc
can agree. It's pretty hard and that's the reason why this patch has
not been merged for years, I think.
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-17 20:14 ` FUJITA Tomonori
@ 2008-06-17 21:45 ` Peter Jones
2008-06-17 22:40 ` FUJITA Tomonori
` (2 more replies)
0 siblings, 3 replies; 65+ messages in thread
From: Peter Jones @ 2008-06-17 21:45 UTC (permalink / raw)
To: FUJITA Tomonori; +Cc: adel.gadllah, matthew, linux-scsi, jens.axboe, dgilbert
FUJITA Tomonori wrote:
> Well, this changes sg behaviour since sg's allow_ops filter has a
> access permission different from blk_verify_command filter's.
>
> I guess that the first thing you need to do is that figuring out a
> proper access permission for each command, which sg maintainer, etc
> can agree. It's pretty hard and that's the reason why this patch has
> not been merged for years, I think.
I don't think this logic is sound.
The patch makes it so distros (and individuals, if they're so inclined)
can configure the filter correctly for whatever hardware is present,
regardless of the kernel's ideas of which commands are correct. It
leaves intact the defaults from the current list used by SG_IO and bsg
(and maybe some other interfaces?), which most programs have been using
for quite some time.
If anything, sg is overdue with converting to using the same command
filter as other direct-scsi-command mechanisms. sg_allow_access() is
really not something we should be keeping.
I don't think this is a reason not to merge the patch; in fact, quite
the opposite. This is another case where we've got a specific filter in
one code path that doesn't match any of the others. Fixing it is
something that needs to be done. Making it configurable from the
userland at the same time effectively aleviates the pain that could
result from doing so.
--
Peter
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-17 21:45 ` Peter Jones
@ 2008-06-17 22:40 ` FUJITA Tomonori
2008-06-17 22:49 ` FUJITA Tomonori
2008-06-17 23:01 ` Douglas Gilbert
2 siblings, 0 replies; 65+ messages in thread
From: FUJITA Tomonori @ 2008-06-17 22:40 UTC (permalink / raw)
To: pjones
Cc: fujita.tomonori, adel.gadllah, matthew, linux-scsi, jens.axboe,
dgilbert
On Tue, 17 Jun 2008 17:45:24 -0400
Peter Jones <pjones@redhat.com> wrote:
> FUJITA Tomonori wrote:
>
> > Well, this changes sg behaviour since sg's allow_ops filter has a
> > access permission different from blk_verify_command filter's.
> >
> > I guess that the first thing you need to do is that figuring out a
> > proper access permission for each command, which sg maintainer, etc
> > can agree. It's pretty hard and that's the reason why this patch has
> > not been merged for years, I think.
>
> I don't think this logic is sound.
>
> The patch makes it so distros (and individuals, if they're so inclined)
> can configure the filter correctly for whatever hardware is present,
> regardless of the kernel's ideas of which commands are correct. It
> leaves intact the defaults from the current list used by SG_IO and bsg
> (and maybe some other interfaces?), which most programs have been using
> for quite some time.
I know that. I've tried to merge this patch in the past.
> If anything, sg is overdue with converting to using the same command
> filter as other direct-scsi-command mechanisms. sg_allow_access() is
> really not something we should be keeping.
>
> I don't think this is a reason not to merge the patch; in fact, quite
> the opposite. This is another case where we've got a specific filter in
> one code path that doesn't match any of the others. Fixing it is
> something that needs to be done. Making it configurable from the
> userland at the same time effectively aleviates the pain that could
> result from doing so.
Even if you can configure the access permissions from the userland,
the kernel needs to configure the default access permissions. Seemed
it's hard for everyone to agree on what are proper default permissions
(I can't recall when this topic was discussed, at LSF'07 or
somewhere).
Well, filtering SCSI commands is not a good idea, so removing the
filtering mechanism would be a better option but...
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-17 21:45 ` Peter Jones
2008-06-17 22:40 ` FUJITA Tomonori
@ 2008-06-17 22:49 ` FUJITA Tomonori
2008-06-17 23:01 ` Douglas Gilbert
2 siblings, 0 replies; 65+ messages in thread
From: FUJITA Tomonori @ 2008-06-17 22:49 UTC (permalink / raw)
To: pjones
Cc: fujita.tomonori, adel.gadllah, matthew, linux-scsi, jens.axboe,
dgilbert
On Tue, 17 Jun 2008 17:45:24 -0400
Peter Jones <pjones@redhat.com> wrote:
> FUJITA Tomonori wrote:
>
> > Well, this changes sg behaviour since sg's allow_ops filter has a
> > access permission different from blk_verify_command filter's.
> >
> > I guess that the first thing you need to do is that figuring out a
> > proper access permission for each command, which sg maintainer, etc
> > can agree. It's pretty hard and that's the reason why this patch has
> > not been merged for years, I think.
>
> I don't think this logic is sound.
>
> The patch makes it so distros (and individuals, if they're so inclined)
> can configure the filter correctly for whatever hardware is present,
> regardless of the kernel's ideas of which commands are correct. It
> leaves intact the defaults from the current list used by SG_IO and bsg
> (and maybe some other interfaces?), which most programs have been using
> for quite some time.
>
> If anything, sg is overdue with converting to using the same command
> filter as other direct-scsi-command mechanisms. sg_allow_access() is
> really not something we should be keeping.
Seems that there is some confusion in the interfaces.
We have theree interfaces; scsi_ioctl, bsg, and sg, which let us to
execute SCSI commands from the userland.
All the interfaces support SG_IO (and they also support their own
method). But only scsi_ioctl and bsg use blk_verify_command and sg has
the own permission table.
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-17 21:45 ` Peter Jones
2008-06-17 22:40 ` FUJITA Tomonori
2008-06-17 22:49 ` FUJITA Tomonori
@ 2008-06-17 23:01 ` Douglas Gilbert
2008-06-18 1:13 ` Pete Wyckoff
` (3 more replies)
2 siblings, 4 replies; 65+ messages in thread
From: Douglas Gilbert @ 2008-06-17 23:01 UTC (permalink / raw)
To: Peter Jones
Cc: FUJITA Tomonori, adel.gadllah, matthew, linux-scsi, jens.axboe
Peter Jones wrote:
> FUJITA Tomonori wrote:
>
>> Well, this changes sg behaviour since sg's allow_ops filter has a
>> access permission different from blk_verify_command filter's.
> >
>> I guess that the first thing you need to do is that figuring out a
>> proper access permission for each command, which sg maintainer, etc
>> can agree. It's pretty hard and that's the reason why this patch has
>> not been merged for years, I think.
>
> I don't think this logic is sound.
That depends on your viewpoint.
IMO all command filtering should be dropped **. We now have
ATA commands tunnelled through SCSI commands (e.g. via SAT)
and will soon have encrypted SCSI commands. Are per device
command filters being proposed? If not, why should we have
the same SCSI command filter for a USB BD drive and a SCSI
enclosure services (SES) device controlling a FC array, just
because they are on the same system?
Why do linux kernel developers have such a hangup about
command filtering? If the user has sufficient permissions
on the pass-through device, let them send commands, simple.
Let udev probe the device, and set its permissions according
to udev's policies. Let the target device do command filtering!
Would any sensible user accept Linux if the kernel developers
decided what could and could not be written to a file?
As far as I can see Microsoft only filters one SCSI command
in their SCSI pass-though, that is the EXTENDED COPY command.
That might give security folks a warm feeling inside but
not someone who needs to use that command via that OS.
Faced with that limitation I would ask the SCSI device
supplier to define a vendor specific SCSI command that did
the same as EXTENDED COPY.
We have situations where the device is smart enough to
decide what SCSI commands should be allowed. For example
a RAID presents its logical volume as a /dev/sd* device
and exposes its physical disks as /dev/sg* devices. In that
situation I think that it is sensible for RAID controller
to disallow WRITE (FORMAT, etc) commands that will corrupt
the state of the volume. Meanwhile smartmontools can be used
to monitor the health of the physical drives via /dev/sg*
(or bsg) devices.
> The patch makes it so distros (and individuals, if they're so inclined)
> can configure the filter correctly for whatever hardware is present,
> regardless of the kernel's ideas of which commands are correct. It
> leaves intact the defaults from the current list used by SG_IO and bsg
> (and maybe some other interfaces?), which most programs have been using
> for quite some time.
>
> If anything, sg is overdue with converting to using the same command
> filter as other direct-scsi-command mechanisms. sg_allow_access() is
> really not something we should be keeping.
>
> I don't think this is a reason not to merge the patch; in fact, quite
> the opposite. This is another case where we've got a specific filter in
> one code path that doesn't match any of the others. Fixing it is
> something that needs to be done. Making it configurable from the
> userland at the same time effectively aleviates the pain that could
> result from doing so.
Sounds like I'm wasting my time.
** So I think sg's command filtering goes too far and the
block layer's filtering just compounds the silliness (and
tilts it in the direction of older MMC commands).
Doug Gilbert
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-17 23:01 ` Douglas Gilbert
@ 2008-06-18 1:13 ` Pete Wyckoff
2008-06-18 7:33 ` Adel Gadllah
` (2 subsequent siblings)
3 siblings, 0 replies; 65+ messages in thread
From: Pete Wyckoff @ 2008-06-18 1:13 UTC (permalink / raw)
To: Douglas Gilbert
Cc: Peter Jones, FUJITA Tomonori, adel.gadllah, matthew, linux-scsi,
jens.axboe
dgilbert@interlog.com wrote on Wed, 18 Jun 2008 01:01 +0200:
> IMO all command filtering should be dropped
To add fuel to the fire: I carry a patch to hack around the SCSI
filtering invoked by bsg to be able to use an object-based storage
device (OSD). These devices have a fairly comprehensive policy
framework that filters commands based on the object being accessed,
and how it is accessed. User-space applications acquire credentials
from a server somewhere on the network and craft SCSI commands that
present these credentials to the device, which also likely sits
remotely and is accessed via iSCSI or similar.
OSD commands all have the same opcode 0x7f (variable length CDB),
and the "service action" (read, write, create, flush, ...) is deeper
in the CDB, along with the credentials. Filtering on the single
byte 0x7f isn't useful, and the local kernel really has no role in
mediating device access. Linux provides SCSI initiator services
including discovery, transport, etc. but has no role in
authenticating how applications use OSDs.
-- Pete
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-17 23:01 ` Douglas Gilbert
2008-06-18 1:13 ` Pete Wyckoff
@ 2008-06-18 7:33 ` Adel Gadllah
2008-06-18 14:55 ` James Smart
2008-06-18 14:56 ` Peter Jones
3 siblings, 0 replies; 65+ messages in thread
From: Adel Gadllah @ 2008-06-18 7:33 UTC (permalink / raw)
To: dgilbert
Cc: Peter Jones, FUJITA Tomonori, matthew, linux-scsi, jens.axboe,
torvalds
2008/6/18 Douglas Gilbert <dgilbert@interlog.com>:
> Peter Jones wrote:
>>
>> FUJITA Tomonori wrote:
>>
>>> Well, this changes sg behaviour since sg's allow_ops filter has a
>>> access permission different from blk_verify_command filter's.
>>
>> >
>>>
>>> I guess that the first thing you need to do is that figuring out a
>>> proper access permission for each command, which sg maintainer, etc
>>> can agree. It's pretty hard and that's the reason why this patch has
>>> not been merged for years, I think.
>>
>> I don't think this logic is sound.
>
> That depends on your viewpoint.
>
> IMO all command filtering should be dropped **.
Here is the last discussion to this topic.
http://thread.gmane.org/gmane.linux.scsi/26229/focus=26229
Seems like people wanted to remove the filter back there but Linus was
against it.
So we have to options let people keep running there apps as root or
with sbit or make the filter configurable.
I would prefer the later....
We now have
> ATA commands tunnelled through SCSI commands (e.g. via SAT)
> and will soon have encrypted SCSI commands. Are per device
> command filters being proposed? If not, why should we have
> the same SCSI command filter for a USB BD drive and a SCSI
> enclosure services (SES) device controlling a FC array, just
> because they are on the same system?
The patch does the filtering per device. So each device can have its
own filtering logic.
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-17 23:01 ` Douglas Gilbert
2008-06-18 1:13 ` Pete Wyckoff
2008-06-18 7:33 ` Adel Gadllah
@ 2008-06-18 14:55 ` James Smart
2008-06-18 14:56 ` Peter Jones
3 siblings, 0 replies; 65+ messages in thread
From: James Smart @ 2008-06-18 14:55 UTC (permalink / raw)
To: dgilbert
Cc: Peter Jones, FUJITA Tomonori, adel.gadllah, matthew, linux-scsi,
jens.axboe
Douglas Gilbert wrote:
> IMO all command filtering should be dropped **.
>
> ... If the user has sufficient permissions
> on the pass-through device, let them send commands, simple.
> Let udev probe the device, and set its permissions according
> to udev's policies. Let the target device do command filtering!
I second Doug's opinion.
-- james s
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-17 23:01 ` Douglas Gilbert
` (2 preceding siblings ...)
2008-06-18 14:55 ` James Smart
@ 2008-06-18 14:56 ` Peter Jones
2008-06-26 10:10 ` Adel Gadllah
3 siblings, 1 reply; 65+ messages in thread
From: Peter Jones @ 2008-06-18 14:56 UTC (permalink / raw)
To: dgilbert; +Cc: FUJITA Tomonori, adel.gadllah, matthew, linux-scsi, jens.axboe
Douglas Gilbert wrote:
> Peter Jones wrote:
>> FUJITA Tomonori wrote:
>>
>>> Well, this changes sg behaviour since sg's allow_ops filter has a
>>> access permission different from blk_verify_command filter's.
>> >
>>> I guess that the first thing you need to do is that figuring out a
>>> proper access permission for each command, which sg maintainer, etc
>>> can agree. It's pretty hard and that's the reason why this patch has
>>> not been merged for years, I think.
>>
>> I don't think this logic is sound.
>
> That depends on your viewpoint.
My viewpoint is this:
1) Whether you agree with his reasons or not, Linus made it pretty clear
that he's against removing the command filter (see
http://marc.info/?l=linux-scsi&m=115419945212450&w=2 )
2) Having different code paths use different filtering code just adds
more confusion.
3) If we're going to have filtering, it should be configurable on a
per-device basis from userland.
Which of these do you disagree with?
[...]
> Are per device command filters being proposed?
Yes, that's what the patch implements. And it allows the userland to
configure them according to the needs of the hardware.
--
Peter
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-18 14:56 ` Peter Jones
@ 2008-06-26 10:10 ` Adel Gadllah
2008-06-26 10:13 ` Jens Axboe
2008-06-26 14:36 ` FUJITA Tomonori
0 siblings, 2 replies; 65+ messages in thread
From: Adel Gadllah @ 2008-06-26 10:10 UTC (permalink / raw)
To: Peter Jones; +Cc: dgilbert, FUJITA Tomonori, matthew, linux-scsi, jens.axboe
2008/6/18 Peter Jones <pjones@redhat.com>:
> Douglas Gilbert wrote:
>>
>> Peter Jones wrote:
>>>
>>> FUJITA Tomonori wrote:
>>>
>>>> Well, this changes sg behaviour since sg's allow_ops filter has a
>>>> access permission different from blk_verify_command filter's.
>>>
>>> >
>>>>
>>>> I guess that the first thing you need to do is that figuring out a
>>>> proper access permission for each command, which sg maintainer, etc
>>>> can agree. It's pretty hard and that's the reason why this patch has
>>>> not been merged for years, I think.
>>>
>>> I don't think this logic is sound.
>>
>> That depends on your viewpoint.
>
> My viewpoint is this:
>
> 1) Whether you agree with his reasons or not, Linus made it pretty clear
> that he's against removing the command filter (see
> http://marc.info/?l=linux-scsi&m=115419945212450&w=2 )
> 2) Having different code paths use different filtering code just adds more
> confusion.
> 3) If we're going to have filtering, it should be configurable on a
> per-device basis from userland.
>
> Which of these do you disagree with?
>
> [...]
>>
>> Are per device command filters being proposed?
>
> Yes, that's what the patch implements. And it allows the userland to
> configure them according to the needs of the hardware.
Jens can we add merge this for .27 or does anyone still has objections?
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-26 10:10 ` Adel Gadllah
@ 2008-06-26 10:13 ` Jens Axboe
2008-06-26 14:36 ` FUJITA Tomonori
1 sibling, 0 replies; 65+ messages in thread
From: Jens Axboe @ 2008-06-26 10:13 UTC (permalink / raw)
To: Adel Gadllah; +Cc: Peter Jones, dgilbert, FUJITA Tomonori, matthew, linux-scsi
On Thu, Jun 26 2008, Adel Gadllah wrote:
> 2008/6/18 Peter Jones <pjones@redhat.com>:
> > Douglas Gilbert wrote:
> >>
> >> Peter Jones wrote:
> >>>
> >>> FUJITA Tomonori wrote:
> >>>
> >>>> Well, this changes sg behaviour since sg's allow_ops filter has a
> >>>> access permission different from blk_verify_command filter's.
> >>>
> >>> >
> >>>>
> >>>> I guess that the first thing you need to do is that figuring out a
> >>>> proper access permission for each command, which sg maintainer, etc
> >>>> can agree. It's pretty hard and that's the reason why this patch has
> >>>> not been merged for years, I think.
> >>>
> >>> I don't think this logic is sound.
> >>
> >> That depends on your viewpoint.
> >
> > My viewpoint is this:
> >
> > 1) Whether you agree with his reasons or not, Linus made it pretty clear
> > that he's against removing the command filter (see
> > http://marc.info/?l=linux-scsi&m=115419945212450&w=2 )
> > 2) Having different code paths use different filtering code just adds more
> > confusion.
> > 3) If we're going to have filtering, it should be configurable on a
> > per-device basis from userland.
> >
> > Which of these do you disagree with?
> >
> > [...]
> >>
> >> Are per device command filters being proposed?
> >
> > Yes, that's what the patch implements. And it allows the userland to
> > configure them according to the needs of the hardware.
>
> Jens can we add merge this for .27 or does anyone still has objections?
Sure, I think it's a good idea. Personally I'd rather get rid of the
filter, but that isn't an option. So the next best thing is to make it
really per-device and exposed and tweakable. Thanks for picking up the
abandoned patch and updating it, I'll add it for 2.6.27.
--
Jens Axboe
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-26 10:10 ` Adel Gadllah
2008-06-26 10:13 ` Jens Axboe
@ 2008-06-26 14:36 ` FUJITA Tomonori
2008-06-26 15:05 ` Adel Gadllah
2008-07-24 1:11 ` Dan Williams
1 sibling, 2 replies; 65+ messages in thread
From: FUJITA Tomonori @ 2008-06-26 14:36 UTC (permalink / raw)
To: adel.gadllah
Cc: pjones, dgilbert, fujita.tomonori, matthew, linux-scsi,
jens.axboe
On Thu, 26 Jun 2008 12:10:25 +0200
"Adel Gadllah" <adel.gadllah@gmail.com> wrote:
> 2008/6/18 Peter Jones <pjones@redhat.com>:
> > Douglas Gilbert wrote:
> >>
> >> Peter Jones wrote:
> >>>
> >>> FUJITA Tomonori wrote:
> >>>
> >>>> Well, this changes sg behaviour since sg's allow_ops filter has a
> >>>> access permission different from blk_verify_command filter's.
> >>>
> >>> >
> >>>>
> >>>> I guess that the first thing you need to do is that figuring out a
> >>>> proper access permission for each command, which sg maintainer, etc
> >>>> can agree. It's pretty hard and that's the reason why this patch has
> >>>> not been merged for years, I think.
> >>>
> >>> I don't think this logic is sound.
> >>
> >> That depends on your viewpoint.
> >
> > My viewpoint is this:
> >
> > 1) Whether you agree with his reasons or not, Linus made it pretty clear
> > that he's against removing the command filter (see
> > http://marc.info/?l=linux-scsi&m=115419945212450&w=2 )
> > 2) Having different code paths use different filtering code just adds more
> > confusion.
> > 3) If we're going to have filtering, it should be configurable on a
> > per-device basis from userland.
> >
> > Which of these do you disagree with?
> >
> > [...]
> >>
> >> Are per device command filters being proposed?
> >
> > Yes, that's what the patch implements. And it allows the userland to
> > configure them according to the needs of the hardware.
>
> Jens can we add merge this for .27 or does anyone still has objections?
I think that this patch makes sg's permission stricter. So this could
break the existing user-space applications.
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-26 14:36 ` FUJITA Tomonori
@ 2008-06-26 15:05 ` Adel Gadllah
2008-06-26 15:08 ` FUJITA Tomonori
2008-07-24 1:11 ` Dan Williams
1 sibling, 1 reply; 65+ messages in thread
From: Adel Gadllah @ 2008-06-26 15:05 UTC (permalink / raw)
To: FUJITA Tomonori; +Cc: pjones, dgilbert, matthew, linux-scsi, jens.axboe
2008/6/26 FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>:
> On Thu, 26 Jun 2008 12:10:25 +0200
> "Adel Gadllah" <adel.gadllah@gmail.com> wrote:
>
>> 2008/6/18 Peter Jones <pjones@redhat.com>:
>> > Douglas Gilbert wrote:
>> >>
>> >> Peter Jones wrote:
>> >>>
>> >>> FUJITA Tomonori wrote:
>> >>>
>> >>>> Well, this changes sg behaviour since sg's allow_ops filter has a
>> >>>> access permission different from blk_verify_command filter's.
>> >>>
>> >>> >
>> >>>>
>> >>>> I guess that the first thing you need to do is that figuring out a
>> >>>> proper access permission for each command, which sg maintainer, etc
>> >>>> can agree. It's pretty hard and that's the reason why this patch has
>> >>>> not been merged for years, I think.
>> >>>
>> >>> I don't think this logic is sound.
>> >>
>> >> That depends on your viewpoint.
>> >
>> > My viewpoint is this:
>> >
>> > 1) Whether you agree with his reasons or not, Linus made it pretty clear
>> > that he's against removing the command filter (see
>> > http://marc.info/?l=linux-scsi&m=115419945212450&w=2 )
>> > 2) Having different code paths use different filtering code just adds more
>> > confusion.
>> > 3) If we're going to have filtering, it should be configurable on a
>> > per-device basis from userland.
>> >
>> > Which of these do you disagree with?
>> >
>> > [...]
>> >>
>> >> Are per device command filters being proposed?
>> >
>> > Yes, that's what the patch implements. And it allows the userland to
>> > configure them according to the needs of the hardware.
>>
>> Jens can we add merge this for .27 or does anyone still has objections?
>
> I think that this patch makes sg's permission stricter. So this could
> break the existing user-space applications.
>
any particular app in mind?
for write access it still allows all commands (because there are some
userspace apps tha rely on this).
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-26 15:05 ` Adel Gadllah
@ 2008-06-26 15:08 ` FUJITA Tomonori
2008-06-26 15:26 ` FUJITA Tomonori
0 siblings, 1 reply; 65+ messages in thread
From: FUJITA Tomonori @ 2008-06-26 15:08 UTC (permalink / raw)
To: adel.gadllah
Cc: fujita.tomonori, pjones, dgilbert, matthew, linux-scsi,
jens.axboe
On Thu, 26 Jun 2008 17:05:50 +0200
"Adel Gadllah" <adel.gadllah@gmail.com> wrote:
> 2008/6/26 FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>:
> > On Thu, 26 Jun 2008 12:10:25 +0200
> > "Adel Gadllah" <adel.gadllah@gmail.com> wrote:
> >
> >> 2008/6/18 Peter Jones <pjones@redhat.com>:
> >> > Douglas Gilbert wrote:
> >> >>
> >> >> Peter Jones wrote:
> >> >>>
> >> >>> FUJITA Tomonori wrote:
> >> >>>
> >> >>>> Well, this changes sg behaviour since sg's allow_ops filter has a
> >> >>>> access permission different from blk_verify_command filter's.
> >> >>>
> >> >>> >
> >> >>>>
> >> >>>> I guess that the first thing you need to do is that figuring out a
> >> >>>> proper access permission for each command, which sg maintainer, etc
> >> >>>> can agree. It's pretty hard and that's the reason why this patch has
> >> >>>> not been merged for years, I think.
> >> >>>
> >> >>> I don't think this logic is sound.
> >> >>
> >> >> That depends on your viewpoint.
> >> >
> >> > My viewpoint is this:
> >> >
> >> > 1) Whether you agree with his reasons or not, Linus made it pretty clear
> >> > that he's against removing the command filter (see
> >> > http://marc.info/?l=linux-scsi&m=115419945212450&w=2 )
> >> > 2) Having different code paths use different filtering code just adds more
> >> > confusion.
> >> > 3) If we're going to have filtering, it should be configurable on a
> >> > per-device basis from userland.
> >> >
> >> > Which of these do you disagree with?
> >> >
> >> > [...]
> >> >>
> >> >> Are per device command filters being proposed?
> >> >
> >> > Yes, that's what the patch implements. And it allows the userland to
> >> > configure them according to the needs of the hardware.
> >>
> >> Jens can we add merge this for .27 or does anyone still has objections?
> >
> > I think that this patch makes sg's permission stricter. So this could
> > break the existing user-space applications.
> >
>
> any particular app in mind?
No, but there would be some.
> for write access it still allows all commands (because there are some
> userspace apps tha rely on this).
Yeah, I know. But for read access, some commands will be blocked.
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-26 15:08 ` FUJITA Tomonori
@ 2008-06-26 15:26 ` FUJITA Tomonori
0 siblings, 0 replies; 65+ messages in thread
From: FUJITA Tomonori @ 2008-06-26 15:26 UTC (permalink / raw)
To: adel.gadllah; +Cc: pjones, dgilbert, matthew, linux-scsi, jens.axboe
On Fri, 27 Jun 2008 00:08:46 +0900
FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> wrote:
> On Thu, 26 Jun 2008 17:05:50 +0200
> "Adel Gadllah" <adel.gadllah@gmail.com> wrote:
>
> > 2008/6/26 FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>:
> > > On Thu, 26 Jun 2008 12:10:25 +0200
> > > "Adel Gadllah" <adel.gadllah@gmail.com> wrote:
> > >
> > >> 2008/6/18 Peter Jones <pjones@redhat.com>:
> > >> > Douglas Gilbert wrote:
> > >> >>
> > >> >> Peter Jones wrote:
> > >> >>>
> > >> >>> FUJITA Tomonori wrote:
> > >> >>>
> > >> >>>> Well, this changes sg behaviour since sg's allow_ops filter has a
> > >> >>>> access permission different from blk_verify_command filter's.
> > >> >>>
> > >> >>> >
> > >> >>>>
> > >> >>>> I guess that the first thing you need to do is that figuring out a
> > >> >>>> proper access permission for each command, which sg maintainer, etc
> > >> >>>> can agree. It's pretty hard and that's the reason why this patch has
> > >> >>>> not been merged for years, I think.
> > >> >>>
> > >> >>> I don't think this logic is sound.
> > >> >>
> > >> >> That depends on your viewpoint.
> > >> >
> > >> > My viewpoint is this:
> > >> >
> > >> > 1) Whether you agree with his reasons or not, Linus made it pretty clear
> > >> > that he's against removing the command filter (see
> > >> > http://marc.info/?l=linux-scsi&m=115419945212450&w=2 )
> > >> > 2) Having different code paths use different filtering code just adds more
> > >> > confusion.
> > >> > 3) If we're going to have filtering, it should be configurable on a
> > >> > per-device basis from userland.
> > >> >
> > >> > Which of these do you disagree with?
> > >> >
> > >> > [...]
> > >> >>
> > >> >> Are per device command filters being proposed?
> > >> >
> > >> > Yes, that's what the patch implements. And it allows the userland to
> > >> > configure them according to the needs of the hardware.
> > >>
> > >> Jens can we add merge this for .27 or does anyone still has objections?
> > >
> > > I think that this patch makes sg's permission stricter. So this could
> > > break the existing user-space applications.
> > >
> >
> > any particular app in mind?
>
> No, but there would be some.
>
>
> > for write access it still allows all commands (because there are some
> > userspace apps tha rely on this).
>
> Yeah, I know. But for read access, some commands will be blocked.
I think that it's not a good idea to say "this patch could break
something but we have no idea about them. So we can merge this."
It's better to loosen scsi_ioctl's permissions to match with sg's
permission.
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-06-26 14:36 ` FUJITA Tomonori
2008-06-26 15:05 ` Adel Gadllah
@ 2008-07-24 1:11 ` Dan Williams
2008-07-24 3:31 ` FUJITA Tomonori
1 sibling, 1 reply; 65+ messages in thread
From: Dan Williams @ 2008-07-24 1:11 UTC (permalink / raw)
To: FUJITA Tomonori
Cc: adel.gadllah, pjones, dgilbert, matthew, linux-scsi, jens.axboe
On Thu, Jun 26, 2008 at 7:36 AM, FUJITA Tomonori
<fujita.tomonori@lab.ntt.co.jp> wrote:
>> Jens can we add merge this for .27 or does anyone still has objections?
>
> I think that this patch makes sg's permission stricter. So this could
> break the existing user-space applications.
Yes, it seems to have broken a simple routine I had to retrieve the
disk serial number from /dev/sg0. Works fine if I run the ioctl on
/dev/sda.
Can we consider this a regression or am I doing something wrong? The
following returns 2 on post-2.6.26 mainline and 0 otherwise.
#include <string.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <scsi/scsi.h>
#include <scsi/sg.h>
int scsi_get_serial(int fd, void *buf, size_t buf_len)
{
unsigned char inq_cmd[] = {INQUIRY, 1, 0x80, 0, buf_len, 0};
unsigned char sense[32];
struct sg_io_hdr io_hdr;
memset(&io_hdr, 0, sizeof(io_hdr));
io_hdr.interface_id = 'S';
io_hdr.cmdp = inq_cmd;
io_hdr.cmd_len = sizeof(inq_cmd);
io_hdr.dxferp = buf;
io_hdr.dxfer_len = buf_len;
io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
io_hdr.sbp = sense;
io_hdr.mx_sb_len = sizeof(sense);
io_hdr.timeout = 5000;
return ioctl(fd, SG_IO, &io_hdr);
}
int main(void)
{
char buf[255];
int fd;
int ret;
fd = open("/dev/sda", O_RDONLY);
ret = scsi_get_serial(fd, buf, sizeof(buf));
if (ret != 0)
return 1;
fd = open("/dev/sg0", O_RDONLY);
ret = scsi_get_serial(fd, buf, sizeof(buf));
if (ret != 0)
return 2;
return 0;
}
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH/RFC v3] allow userspace to modify scsi command filter on per device basis
2008-07-24 1:11 ` Dan Williams
@ 2008-07-24 3:31 ` FUJITA Tomonori
2008-07-26 9:03 ` [PATCH 0/3] cmd_filter fixes FUJITA Tomonori
0 siblings, 1 reply; 65+ messages in thread
From: FUJITA Tomonori @ 2008-07-24 3:31 UTC (permalink / raw)
To: dan.j.williams
Cc: fujita.tomonori, adel.gadllah, pjones, dgilbert, matthew,
linux-scsi, jens.axboe, James.Bottomley, viro
On Wed, 23 Jul 2008 18:11:58 -0700
"Dan Williams" <dan.j.williams@intel.com> wrote:
> On Thu, Jun 26, 2008 at 7:36 AM, FUJITA Tomonori
> <fujita.tomonori@lab.ntt.co.jp> wrote:
> >> Jens can we add merge this for .27 or does anyone still has objections?
> >
> > I think that this patch makes sg's permission stricter. So this could
> > break the existing user-space applications.
>
> Yes, it seems to have broken a simple routine I had to retrieve the
> disk serial number from /dev/sg0. Works fine if I run the ioctl on
> /dev/sda.
>
> Can we consider this a regression or am I doing something wrong? The
> following returns 2 on post-2.6.26 mainline and 0 otherwise.
This is a regression. Obviously, the filter patch was tested with only
the block SG_IO partly. It breaks sg and bsg. Al Viro pointed out:
http://marc.info/?l=linux-kernel&m=121662171123285&w=2
I think that it breaks even the block SG_IO with tape drives.
I'll send fixes tonight.
^ permalink raw reply [flat|nested] 65+ messages in thread
* [PATCH 0/3] cmd_filter fixes
2008-07-24 3:31 ` FUJITA Tomonori
@ 2008-07-26 9:03 ` FUJITA Tomonori
2008-07-26 9:03 ` [PATCH 1/3] move cmd_filter from gendisk to request_queue FUJITA Tomonori
2008-07-27 19:59 ` [PATCH 0/3] cmd_filter fixes Adel Gadllah
0 siblings, 2 replies; 65+ messages in thread
From: FUJITA Tomonori @ 2008-07-26 9:03 UTC (permalink / raw)
To: linux-scsi
Cc: jens.axboe, James.Bottomley, dan.j.williams, adel.gadllah, pjones,
viro, dougg, matthew, fujita.tomonori
cmd_filter works only for the block layer SG_IO with SCSI block
devices. It breaks scsi/sg.c, bsg, and the block layer SG_IO with SCSI
character devices (such as st). We might hit a kernel crash with them.
The problem is that cmd_filter code accesses to gendisk (having struct
blk_scsi_cmd_filter) via inode->i_bdev->bd_disk. It works for only
SCSI block device files. With character device files, inode->i_bdev
leads you to struct cdev. inode->i_bdev->bd_disk->blk_scsi_cmd_filter
isn't safe. For example, I got the following kernel crash with bsg:
Pid: 1480, comm: bsg-test Not tainted (2.6.26-06879-gfb2e405 #1)
EIP: 0060:[<c01b02fd>] EFLAGS: 00010202 CPU: 0
EIP is at blk_cmd_filter_verify_command+0x1e/0x41
EAX: 00000000 EBX: 00000237 ECX: 00000011 EDX: 00000003
ESI: df9692ae EDI: de93485c EBP: de8ea3a8 ESP: de937e90
DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
Process bsg-test (pid: 1480, ti=de936000 task=df8b7230 task.ti=de936000)
Stack: de937ec4 df969228 fffffff2 c01b0866 df025030 00000000 de937ec4 c01b1009
bf9a5b10 de8ea3a8 c01b110e 00000246 df1985f4 00000051 00000000 00000000
00000006 bf9a5bd2 00000000 00000000 00000000 00000000 00000000 00000000
Call Trace:
[<c01b0866>] bsg_map_hdr+0xe7/0x225
[<c01b1009>] bsg_ioctl+0x0/0x17c
[<c01b110e>] bsg_ioctl+0x105/0x17c
[<c01b1009>] bsg_ioctl+0x0/0x17c
[<c015a11a>] vfs_ioctl+0x16/0x48
[<c015a332>] do_vfs_ioctl+0x1e6/0x1f9
[<c01b770c>] trace_hardirqs_on_thunk+0xc/0x10
[<c015a371>] sys_ioctl+0x2c/0x43
[<c01028b1>] sysenter_do_call+0x12/0x35
=======================
Code: 10 89 fa ff d3 89 c2 89 d0 5b 5e 5f c3 57 56 53 89 c3 89 d6 89 cf b8 11 00 00 00 e8 fd 94 f6 ff 85 c0 75 1f 85 db 74 1f 0f b6 16 <0f> a3 13 19 c0 85 c0 75 0f 0f a3 53 20 19 c0 85 c0 74 09 f6 07
EIP: [<c01b02fd>] blk_cmd_filter_verify_command+0x1e/0x41 SS:ESP 0068:de937e90
---[ end trace 0cceddb2f202a402 ]---
SCSI ULDs don't expose gendisk; they keep it private. bsg needs to be
independent on any protocols. We shouldn't change ULDs to expose their
gendisk.
This patchset moves struct blk_scsi_cmd_filter from gendisk to
request_queue, a common object, which eveyone can access to.
The user interface doesn't change; users can change the filters via
/sys/block/. gendisk has a pointer to request_queue so the cmd_filter
code accesses to struct blk_scsi_cmd_filter.
^ permalink raw reply [flat|nested] 65+ messages in thread
* [PATCH 1/3] move cmd_filter from gendisk to request_queue
2008-07-26 9:03 ` [PATCH 0/3] cmd_filter fixes FUJITA Tomonori
@ 2008-07-26 9:03 ` FUJITA Tomonori
2008-07-26 9:03 ` [PATCH 2/3] sg: restore command permission for TYPE_SCANNER FUJITA Tomonori
` (2 more replies)
2008-07-27 19:59 ` [PATCH 0/3] cmd_filter fixes Adel Gadllah
1 sibling, 3 replies; 65+ messages in thread
From: FUJITA Tomonori @ 2008-07-26 9:03 UTC (permalink / raw)
To: linux-scsi
Cc: jens.axboe, James.Bottomley, dan.j.williams, adel.gadllah, pjones,
viro, dougg, matthew, FUJITA Tomonori
cmd_filter works only for the block layer SG_IO with SCSI block
devices. It breaks scsi/sg.c, bsg, and the block layer SG_IO with SCSI
character devices (such as st). We hit a kernel crash with them.
The problem is that cmd_filter code accesses to gendisk (having struct
blk_scsi_cmd_filter) via inode->i_bdev->bd_disk. It works for only
SCSI block device files. With character device files, inode->i_bdev
leads you to struct cdev. inode->i_bdev->bd_disk->blk_scsi_cmd_filter
isn't safe.
SCSI ULDs don't expose gendisk; they keep it private. bsg needs to be
independent on any protocols. We shouldn't change ULDs to expose their
gendisk.
This patch moves struct blk_scsi_cmd_filter from gendisk to
request_queue, a common object, which eveyone can access to.
The user interface doesn't change; users can change the filters via
/sys/block/. gendisk has a pointer to request_queue so the cmd_filter
code accesses to struct blk_scsi_cmd_filter.
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
block/bsg.c | 45 +++++-------------
block/cmd-filter.c | 118 ++--------------------------------------------
block/scsi_ioctl.c | 94 ++++++++++++++++++++++++++++++++++++-
drivers/scsi/scsi_lib.c | 2 +
drivers/scsi/sg.c | 11 ++++-
include/linux/blkdev.h | 16 +++++-
include/linux/genhd.h | 10 ----
7 files changed, 133 insertions(+), 163 deletions(-)
diff --git a/block/bsg.c b/block/bsg.c
index 5a68b09..6c5c6f3 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -45,8 +45,6 @@ struct bsg_device {
char name[BUS_ID_SIZE];
int max_queue;
unsigned long flags;
- struct blk_scsi_cmd_filter *cmd_filter;
- mode_t *f_mode;
};
enum {
@@ -87,6 +85,7 @@ struct bsg_command {
int err;
struct sg_io_v4 hdr;
char sense[SCSI_SENSE_BUFFERSIZE];
+ int has_write_perm;
};
static void bsg_free_command(struct bsg_command *bc)
@@ -174,7 +173,8 @@ unlock:
}
static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
- struct sg_io_v4 *hdr, struct bsg_device *bd)
+ struct sg_io_v4 *hdr, struct bsg_device *bd,
+ int has_write_perm)
{
if (hdr->request_len > BLK_MAX_CDB) {
rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
@@ -187,8 +187,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
return -EFAULT;
if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
- if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd,
- bd->f_mode))
+ if (blk_verify_command(&q->cmd_filter, rq->cmd, has_write_perm))
return -EPERM;
} else if (!capable(CAP_SYS_RAWIO))
return -EPERM;
@@ -244,7 +243,7 @@ bsg_validate_sgv4_hdr(struct request_queue *q, struct sg_io_v4 *hdr, int *rw)
* map sg_io_v4 to a request.
*/
static struct request *
-bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
+bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm)
{
struct request_queue *q = bd->queue;
struct request *rq, *next_rq = NULL;
@@ -266,7 +265,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
rq = blk_get_request(q, rw, GFP_KERNEL);
if (!rq)
return ERR_PTR(-ENOMEM);
- ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd);
+ ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
if (ret)
goto out;
@@ -568,25 +567,6 @@ static inline void bsg_set_block(struct bsg_device *bd, struct file *file)
set_bit(BSG_F_BLOCK, &bd->flags);
}
-static void bsg_set_cmd_filter(struct bsg_device *bd,
- struct file *file)
-{
- struct inode *inode;
- struct gendisk *disk;
-
- if (!file)
- return;
-
- inode = file->f_dentry->d_inode;
- if (!inode)
- return;
-
- disk = inode->i_bdev->bd_disk;
-
- bd->cmd_filter = &disk->cmd_filter;
- bd->f_mode = &file->f_mode;
-}
-
/*
* Check if the error is a "real" error that we should return.
*/
@@ -608,7 +588,6 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
dprintk("%s: read %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
- bsg_set_cmd_filter(bd, file);
bytes_read = 0;
ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
@@ -621,7 +600,7 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
}
static int __bsg_write(struct bsg_device *bd, const char __user *buf,
- size_t count, ssize_t *bytes_written)
+ size_t count, ssize_t *bytes_written, int has_write_perm)
{
struct bsg_command *bc;
struct request *rq;
@@ -652,7 +631,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf,
/*
* get a request, fill in the blanks, and add to request queue
*/
- rq = bsg_map_hdr(bd, &bc->hdr);
+ rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm);
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
rq = NULL;
@@ -683,10 +662,11 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
dprintk("%s: write %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
- bsg_set_cmd_filter(bd, file);
bytes_written = 0;
- ret = __bsg_write(bd, buf, count, &bytes_written);
+ ret = __bsg_write(bd, buf, count, &bytes_written,
+ file->f_mode & FMODE_WRITE);
+
*ppos = bytes_written;
/*
@@ -792,7 +772,6 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
bd->queue = rq;
bsg_set_block(bd, file);
- bsg_set_cmd_filter(bd, file);
atomic_set(&bd->ref_count, 1);
mutex_lock(&bsg_mutex);
@@ -943,7 +922,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
if (copy_from_user(&hdr, uarg, sizeof(hdr)))
return -EFAULT;
- rq = bsg_map_hdr(bd, &hdr);
+ rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE);
if (IS_ERR(rq))
return PTR_ERR(rq);
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
index eec4404..c705c33 100644
--- a/block/cmd-filter.c
+++ b/block/cmd-filter.c
@@ -27,8 +27,8 @@
#include <scsi/scsi.h>
#include <linux/cdrom.h>
-int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
- unsigned char *cmd, mode_t *f_mode)
+int blk_verify_command(struct blk_scsi_cmd_filter *filter,
+ unsigned char *cmd, int has_write_perm)
{
/* root can do any command. */
if (capable(CAP_SYS_RAWIO))
@@ -43,30 +43,11 @@ int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
return 0;
/* Write-safe commands require a writable open */
- if (test_bit(cmd[0], filter->write_ok) && (*f_mode & FMODE_WRITE))
+ if (test_bit(cmd[0], filter->write_ok) && has_write_perm)
return 0;
return -EPERM;
}
-EXPORT_SYMBOL(blk_cmd_filter_verify_command);
-
-int blk_verify_command(struct file *file, unsigned char *cmd)
-{
- struct gendisk *disk;
- struct inode *inode;
-
- if (!file)
- return -EINVAL;
-
- inode = file->f_dentry->d_inode;
- if (!inode)
- return -EINVAL;
-
- disk = inode->i_bdev->bd_disk;
-
- return blk_cmd_filter_verify_command(&disk->cmd_filter,
- cmd, &file->f_mode);
-}
EXPORT_SYMBOL(blk_verify_command);
/* and now, the sysfs stuff */
@@ -219,114 +200,27 @@ static struct kobj_type rcf_ktype = {
.default_attrs = default_attrs,
};
-#ifndef MAINTENANCE_IN_CMD
-#define MAINTENANCE_IN_CMD 0xa3
-#endif
-
-static void rcf_set_defaults(struct blk_scsi_cmd_filter *filter)
-{
- /* Basic read-only commands */
- __set_bit(TEST_UNIT_READY, filter->read_ok);
- __set_bit(REQUEST_SENSE, filter->read_ok);
- __set_bit(READ_6, filter->read_ok);
- __set_bit(READ_10, filter->read_ok);
- __set_bit(READ_12, filter->read_ok);
- __set_bit(READ_16, filter->read_ok);
- __set_bit(READ_BUFFER, filter->read_ok);
- __set_bit(READ_DEFECT_DATA, filter->read_ok);
- __set_bit(READ_CAPACITY, filter->read_ok);
- __set_bit(READ_LONG, filter->read_ok);
- __set_bit(INQUIRY, filter->read_ok);
- __set_bit(MODE_SENSE, filter->read_ok);
- __set_bit(MODE_SENSE_10, filter->read_ok);
- __set_bit(LOG_SENSE, filter->read_ok);
- __set_bit(START_STOP, filter->read_ok);
- __set_bit(GPCMD_VERIFY_10, filter->read_ok);
- __set_bit(VERIFY_16, filter->read_ok);
- __set_bit(REPORT_LUNS, filter->read_ok);
- __set_bit(SERVICE_ACTION_IN, filter->read_ok);
- __set_bit(RECEIVE_DIAGNOSTIC, filter->read_ok);
- __set_bit(MAINTENANCE_IN_CMD, filter->read_ok);
- __set_bit(GPCMD_READ_BUFFER_CAPACITY, filter->read_ok);
-
- /* Audio CD commands */
- __set_bit(GPCMD_PLAY_CD, filter->read_ok);
- __set_bit(GPCMD_PLAY_AUDIO_10, filter->read_ok);
- __set_bit(GPCMD_PLAY_AUDIO_MSF, filter->read_ok);
- __set_bit(GPCMD_PLAY_AUDIO_TI, filter->read_ok);
- __set_bit(GPCMD_PAUSE_RESUME, filter->read_ok);
-
- /* CD/DVD data reading */
- __set_bit(GPCMD_READ_CD, filter->read_ok);
- __set_bit(GPCMD_READ_CD_MSF, filter->read_ok);
- __set_bit(GPCMD_READ_DISC_INFO, filter->read_ok);
- __set_bit(GPCMD_READ_CDVD_CAPACITY, filter->read_ok);
- __set_bit(GPCMD_READ_DVD_STRUCTURE, filter->read_ok);
- __set_bit(GPCMD_READ_HEADER, filter->read_ok);
- __set_bit(GPCMD_READ_TRACK_RZONE_INFO, filter->read_ok);
- __set_bit(GPCMD_READ_SUBCHANNEL, filter->read_ok);
- __set_bit(GPCMD_READ_TOC_PMA_ATIP, filter->read_ok);
- __set_bit(GPCMD_REPORT_KEY, filter->read_ok);
- __set_bit(GPCMD_SCAN, filter->read_ok);
- __set_bit(GPCMD_GET_CONFIGURATION, filter->read_ok);
- __set_bit(GPCMD_READ_FORMAT_CAPACITIES, filter->read_ok);
- __set_bit(GPCMD_GET_EVENT_STATUS_NOTIFICATION, filter->read_ok);
- __set_bit(GPCMD_GET_PERFORMANCE, filter->read_ok);
- __set_bit(GPCMD_SEEK, filter->read_ok);
- __set_bit(GPCMD_STOP_PLAY_SCAN, filter->read_ok);
-
- /* Basic writing commands */
- __set_bit(WRITE_6, filter->write_ok);
- __set_bit(WRITE_10, filter->write_ok);
- __set_bit(WRITE_VERIFY, filter->write_ok);
- __set_bit(WRITE_12, filter->write_ok);
- __set_bit(WRITE_VERIFY_12, filter->write_ok);
- __set_bit(WRITE_16, filter->write_ok);
- __set_bit(WRITE_LONG, filter->write_ok);
- __set_bit(WRITE_LONG_2, filter->write_ok);
- __set_bit(ERASE, filter->write_ok);
- __set_bit(GPCMD_MODE_SELECT_10, filter->write_ok);
- __set_bit(MODE_SELECT, filter->write_ok);
- __set_bit(LOG_SELECT, filter->write_ok);
- __set_bit(GPCMD_BLANK, filter->write_ok);
- __set_bit(GPCMD_CLOSE_TRACK, filter->write_ok);
- __set_bit(GPCMD_FLUSH_CACHE, filter->write_ok);
- __set_bit(GPCMD_FORMAT_UNIT, filter->write_ok);
- __set_bit(GPCMD_REPAIR_RZONE_TRACK, filter->write_ok);
- __set_bit(GPCMD_RESERVE_RZONE_TRACK, filter->write_ok);
- __set_bit(GPCMD_SEND_DVD_STRUCTURE, filter->write_ok);
- __set_bit(GPCMD_SEND_EVENT, filter->write_ok);
- __set_bit(GPCMD_SEND_KEY, filter->write_ok);
- __set_bit(GPCMD_SEND_OPC, filter->write_ok);
- __set_bit(GPCMD_SEND_CUE_SHEET, filter->write_ok);
- __set_bit(GPCMD_SET_SPEED, filter->write_ok);
- __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
- __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
- __set_bit(GPCMD_SET_STREAMING, filter->write_ok);
-}
-
int blk_register_filter(struct gendisk *disk)
{
int ret;
- struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+ struct blk_scsi_cmd_filter *filter = &disk->queue->cmd_filter;
struct kobject *parent = kobject_get(disk->holder_dir->parent);
if (!parent)
return -ENODEV;
ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent,
- "%s", "cmd_filter");
+ "%s", "cmd_filter");
if (ret < 0)
return ret;
- rcf_set_defaults(filter);
return 0;
}
void blk_unregister_filter(struct gendisk *disk)
{
- struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+ struct blk_scsi_cmd_filter *filter = &disk->queue->cmd_filter;
kobject_put(&filter->kobj);
kobject_put(disk->holder_dir->parent);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index c5b9bcf..1f7d56e 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -105,12 +105,96 @@ static int sg_emulated_host(struct request_queue *q, int __user *p)
return put_user(1, p);
}
+void blk_set_cmd_filter_defaults(struct blk_scsi_cmd_filter *filter)
+{
+ /* Basic read-only commands */
+ __set_bit(TEST_UNIT_READY, filter->read_ok);
+ __set_bit(REQUEST_SENSE, filter->read_ok);
+ __set_bit(READ_6, filter->read_ok);
+ __set_bit(READ_10, filter->read_ok);
+ __set_bit(READ_12, filter->read_ok);
+ __set_bit(READ_16, filter->read_ok);
+ __set_bit(READ_BUFFER, filter->read_ok);
+ __set_bit(READ_DEFECT_DATA, filter->read_ok);
+ __set_bit(READ_CAPACITY, filter->read_ok);
+ __set_bit(READ_LONG, filter->read_ok);
+ __set_bit(INQUIRY, filter->read_ok);
+ __set_bit(MODE_SENSE, filter->read_ok);
+ __set_bit(MODE_SENSE_10, filter->read_ok);
+ __set_bit(LOG_SENSE, filter->read_ok);
+ __set_bit(START_STOP, filter->read_ok);
+ __set_bit(GPCMD_VERIFY_10, filter->read_ok);
+ __set_bit(VERIFY_16, filter->read_ok);
+ __set_bit(REPORT_LUNS, filter->read_ok);
+ __set_bit(SERVICE_ACTION_IN, filter->read_ok);
+ __set_bit(RECEIVE_DIAGNOSTIC, filter->read_ok);
+ __set_bit(MAINTENANCE_IN, filter->read_ok);
+ __set_bit(GPCMD_READ_BUFFER_CAPACITY, filter->read_ok);
+
+ /* Audio CD commands */
+ __set_bit(GPCMD_PLAY_CD, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_10, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_MSF, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_TI, filter->read_ok);
+ __set_bit(GPCMD_PAUSE_RESUME, filter->read_ok);
+
+ /* CD/DVD data reading */
+ __set_bit(GPCMD_READ_CD, filter->read_ok);
+ __set_bit(GPCMD_READ_CD_MSF, filter->read_ok);
+ __set_bit(GPCMD_READ_DISC_INFO, filter->read_ok);
+ __set_bit(GPCMD_READ_CDVD_CAPACITY, filter->read_ok);
+ __set_bit(GPCMD_READ_DVD_STRUCTURE, filter->read_ok);
+ __set_bit(GPCMD_READ_HEADER, filter->read_ok);
+ __set_bit(GPCMD_READ_TRACK_RZONE_INFO, filter->read_ok);
+ __set_bit(GPCMD_READ_SUBCHANNEL, filter->read_ok);
+ __set_bit(GPCMD_READ_TOC_PMA_ATIP, filter->read_ok);
+ __set_bit(GPCMD_REPORT_KEY, filter->read_ok);
+ __set_bit(GPCMD_SCAN, filter->read_ok);
+ __set_bit(GPCMD_GET_CONFIGURATION, filter->read_ok);
+ __set_bit(GPCMD_READ_FORMAT_CAPACITIES, filter->read_ok);
+ __set_bit(GPCMD_GET_EVENT_STATUS_NOTIFICATION, filter->read_ok);
+ __set_bit(GPCMD_GET_PERFORMANCE, filter->read_ok);
+ __set_bit(GPCMD_SEEK, filter->read_ok);
+ __set_bit(GPCMD_STOP_PLAY_SCAN, filter->read_ok);
+
+ /* Basic writing commands */
+ __set_bit(WRITE_6, filter->write_ok);
+ __set_bit(WRITE_10, filter->write_ok);
+ __set_bit(WRITE_VERIFY, filter->write_ok);
+ __set_bit(WRITE_12, filter->write_ok);
+ __set_bit(WRITE_VERIFY_12, filter->write_ok);
+ __set_bit(WRITE_16, filter->write_ok);
+ __set_bit(WRITE_LONG, filter->write_ok);
+ __set_bit(WRITE_LONG_2, filter->write_ok);
+ __set_bit(ERASE, filter->write_ok);
+ __set_bit(GPCMD_MODE_SELECT_10, filter->write_ok);
+ __set_bit(MODE_SELECT, filter->write_ok);
+ __set_bit(LOG_SELECT, filter->write_ok);
+ __set_bit(GPCMD_BLANK, filter->write_ok);
+ __set_bit(GPCMD_CLOSE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_FLUSH_CACHE, filter->write_ok);
+ __set_bit(GPCMD_FORMAT_UNIT, filter->write_ok);
+ __set_bit(GPCMD_REPAIR_RZONE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_RESERVE_RZONE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_SEND_DVD_STRUCTURE, filter->write_ok);
+ __set_bit(GPCMD_SEND_EVENT, filter->write_ok);
+ __set_bit(GPCMD_SEND_KEY, filter->write_ok);
+ __set_bit(GPCMD_SEND_OPC, filter->write_ok);
+ __set_bit(GPCMD_SEND_CUE_SHEET, filter->write_ok);
+ __set_bit(GPCMD_SET_SPEED, filter->write_ok);
+ __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
+ __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
+ __set_bit(GPCMD_SET_STREAMING, filter->write_ok);
+}
+EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults);
+
static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
struct sg_io_hdr *hdr, struct file *file)
{
if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
return -EFAULT;
- if (blk_verify_command(file, rq->cmd))
+ if (blk_verify_command(&q->cmd_filter, rq->cmd,
+ file->f_mode & FMODE_WRITE))
return -EPERM;
/*
@@ -298,7 +382,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q,
struct gendisk *disk, struct scsi_ioctl_command __user *sic)
{
struct request *rq;
- int err;
+ int err, write_perm = 0;
unsigned int in_len, out_len, bytes, opcode, cmdlen;
char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE];
@@ -340,7 +424,11 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q,
if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
goto error;
- err = blk_verify_command(file, rq->cmd);
+ /* scsi_ioctl passes NULL */
+ if (file && (file->f_mode & FMODE_WRITE))
+ write_perm = 1;
+
+ err = blk_verify_command(&q->cmd_filter, rq->cmd, write_perm);
if (err)
goto error;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 88d1b5f..51cdae8 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1617,6 +1617,8 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
*/
blk_queue_dma_alignment(q, 0x03);
+ blk_set_cmd_filter_defaults(&q->cmd_filter);
+
return q;
}
EXPORT_SYMBOL(__scsi_alloc_queue);
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index d3b8ebb..207c357 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -641,6 +641,7 @@ sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf,
unsigned char cmnd[MAX_COMMAND_SIZE];
int timeout;
unsigned long ul_timeout;
+ struct request_queue *q;
if (count < SZ_SG_IO_HDR)
return -EINVAL;
@@ -689,7 +690,9 @@ sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf,
sg_remove_request(sfp, srp);
return -EFAULT;
}
- if (read_only && !blk_verify_command(file, cmnd)) {
+ q = sfp->parentdp->device->request_queue;
+ if (read_only && blk_verify_command(&q->cmd_filter, cmnd,
+ file->f_mode & FMODE_WRITE)) {
sg_remove_request(sfp, srp);
return -EPERM;
}
@@ -793,6 +796,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp)))
return -ENXIO;
+
SCSI_LOG_TIMEOUT(3, printk("sg_ioctl: %s, cmd=0x%x\n",
sdp->disk->disk_name, (int) cmd_in));
read_only = (O_RDWR != (filp->f_flags & O_ACCMODE));
@@ -1057,11 +1061,14 @@ sg_ioctl(struct inode *inode, struct file *filp,
return -ENODEV;
if (read_only) {
unsigned char opcode = WRITE_6;
+ struct request_queue *q = sdp->device->request_queue;
Scsi_Ioctl_Command __user *siocp = p;
if (copy_from_user(&opcode, siocp->data, 1))
return -EFAULT;
- if (!blk_verify_command(filp, &opcode))
+ if (blk_verify_command(&q->cmd_filter,
+ &opcode,
+ filp->f_mode & FMODE_WRITE))
return -EPERM;
}
return sg_scsi_ioctl(filp, sdp->device->request_queue, NULL, p);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 88d6808..211160f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -280,6 +280,15 @@ struct blk_queue_tag {
atomic_t refcnt; /* map can be shared */
};
+#define BLK_SCSI_MAX_CMDS (256)
+#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
+
+struct blk_scsi_cmd_filter {
+ unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
+ unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
+ struct kobject kobj;
+};
+
struct request_queue
{
/*
@@ -398,6 +407,7 @@ struct request_queue
#if defined(CONFIG_BLK_DEV_BSG)
struct bsg_class_device bsg_dev;
#endif
+ struct blk_scsi_cmd_filter cmd_filter;
};
#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
@@ -832,11 +842,11 @@ extern int blkdev_issue_flush(struct block_device *, sector_t *);
/*
* command filter functions
*/
-extern int blk_verify_command(struct file *file, unsigned char *cmd);
-extern int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
- unsigned char *cmd, mode_t *f_mode);
+extern int blk_verify_command(struct blk_scsi_cmd_filter *filter,
+ unsigned char *cmd, int has_write_perm);
extern int blk_register_filter(struct gendisk *disk);
extern void blk_unregister_filter(struct gendisk *disk);
+extern void blk_set_cmd_filter_defaults(struct blk_scsi_cmd_filter *filter);
#define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e878741..0978e1e 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -110,15 +110,6 @@ struct hd_struct {
#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
#define GENHD_FL_FAIL 64
-#define BLK_SCSI_MAX_CMDS (256)
-#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
-
-struct blk_scsi_cmd_filter {
- unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
- unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
- struct kobject kobj;
-};
-
struct gendisk {
int major; /* major number of driver */
int first_minor;
@@ -128,7 +119,6 @@ struct gendisk {
struct hd_struct **part; /* [indexed by minor] */
struct block_device_operations *fops;
struct request_queue *queue;
- struct blk_scsi_cmd_filter cmd_filter;
void *private_data;
sector_t capacity;
--
1.5.4.2
^ permalink raw reply related [flat|nested] 65+ messages in thread
* [PATCH 2/3] sg: restore command permission for TYPE_SCANNER
2008-07-26 9:03 ` [PATCH 1/3] move cmd_filter from gendisk to request_queue FUJITA Tomonori
@ 2008-07-26 9:03 ` FUJITA Tomonori
2008-07-26 9:03 ` [PATCH 3/3] rename blk_scsi_cmd_filter to blk_cmd_filter FUJITA Tomonori
2008-07-30 20:10 ` [PATCH 1/3] move cmd_filter from gendisk to request_queue Peter Jones
2008-08-16 5:47 ` FUJITA Tomonori
2 siblings, 1 reply; 65+ messages in thread
From: FUJITA Tomonori @ 2008-07-26 9:03 UTC (permalink / raw)
To: linux-scsi
Cc: jens.axboe, James.Bottomley, dan.j.williams, adel.gadllah, pjones,
viro, dougg, matthew, FUJITA Tomonori
sg allowed any command for TYPE_SCANNER. The cmd_filter patchset
doesn't. We can't change sg's permission since it might break the
existing software.
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
drivers/scsi/sg.c | 22 ++++++++++++++--------
1 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 207c357..2973773 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -217,6 +217,18 @@ static int sg_last_dev(void);
#define SZ_SG_IOVEC sizeof(sg_iovec_t)
#define SZ_SG_REQ_INFO sizeof(sg_req_info_t)
+static int sg_allow_access(struct file *filp, unsigned char *cmd)
+{
+ struct sg_fd *sfp = (struct sg_fd *)filp->private_data;
+ struct request_queue *q = sfp->parentdp->device->request_queue;
+
+ if (sfp->parentdp->device->type == TYPE_SCANNER)
+ return 0;
+
+ return blk_verify_command(&q->cmd_filter,
+ cmd, filp->f_mode & FMODE_WRITE);
+}
+
static int
sg_open(struct inode *inode, struct file *filp)
{
@@ -641,7 +653,6 @@ sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf,
unsigned char cmnd[MAX_COMMAND_SIZE];
int timeout;
unsigned long ul_timeout;
- struct request_queue *q;
if (count < SZ_SG_IO_HDR)
return -EINVAL;
@@ -690,9 +701,7 @@ sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf,
sg_remove_request(sfp, srp);
return -EFAULT;
}
- q = sfp->parentdp->device->request_queue;
- if (read_only && blk_verify_command(&q->cmd_filter, cmnd,
- file->f_mode & FMODE_WRITE)) {
+ if (read_only && sg_allow_access(file, cmnd)) {
sg_remove_request(sfp, srp);
return -EPERM;
}
@@ -1061,14 +1070,11 @@ sg_ioctl(struct inode *inode, struct file *filp,
return -ENODEV;
if (read_only) {
unsigned char opcode = WRITE_6;
- struct request_queue *q = sdp->device->request_queue;
Scsi_Ioctl_Command __user *siocp = p;
if (copy_from_user(&opcode, siocp->data, 1))
return -EFAULT;
- if (blk_verify_command(&q->cmd_filter,
- &opcode,
- filp->f_mode & FMODE_WRITE))
+ if (sg_allow_access(filp, &opcode))
return -EPERM;
}
return sg_scsi_ioctl(filp, sdp->device->request_queue, NULL, p);
--
1.5.4.2
^ permalink raw reply related [flat|nested] 65+ messages in thread
* [PATCH 3/3] rename blk_scsi_cmd_filter to blk_cmd_filter
2008-07-26 9:03 ` [PATCH 2/3] sg: restore command permission for TYPE_SCANNER FUJITA Tomonori
@ 2008-07-26 9:03 ` FUJITA Tomonori
0 siblings, 0 replies; 65+ messages in thread
From: FUJITA Tomonori @ 2008-07-26 9:03 UTC (permalink / raw)
To: linux-scsi
Cc: jens.axboe, James.Bottomley, dan.j.williams, adel.gadllah, pjones,
viro, dougg, matthew, FUJITA Tomonori
Technically, the cmd_filter would be applied to other protocols though
it's unlikely to happen. Putting SCSI stuff to request_queue is kinda
layer violation. So let's rename it.
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
block/cmd-filter.c | 30 +++++++++++++++---------------
block/scsi_ioctl.c | 2 +-
include/linux/blkdev.h | 8 ++++----
3 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
index c705c33..0e3a123 100644
--- a/block/cmd-filter.c
+++ b/block/cmd-filter.c
@@ -27,7 +27,7 @@
#include <scsi/scsi.h>
#include <linux/cdrom.h>
-int blk_verify_command(struct blk_scsi_cmd_filter *filter,
+int blk_verify_command(struct blk_cmd_filter *filter,
unsigned char *cmd, int has_write_perm)
{
/* root can do any command. */
@@ -51,7 +51,7 @@ int blk_verify_command(struct blk_scsi_cmd_filter *filter,
EXPORT_SYMBOL(blk_verify_command);
/* and now, the sysfs stuff */
-static ssize_t rcf_cmds_show(struct blk_scsi_cmd_filter *filter, char *page,
+static ssize_t rcf_cmds_show(struct blk_cmd_filter *filter, char *page,
int rw)
{
char *npage = page;
@@ -78,18 +78,18 @@ static ssize_t rcf_cmds_show(struct blk_scsi_cmd_filter *filter, char *page,
return npage - page;
}
-static ssize_t rcf_readcmds_show(struct blk_scsi_cmd_filter *filter, char *page)
+static ssize_t rcf_readcmds_show(struct blk_cmd_filter *filter, char *page)
{
return rcf_cmds_show(filter, page, READ);
}
-static ssize_t rcf_writecmds_show(struct blk_scsi_cmd_filter *filter,
+static ssize_t rcf_writecmds_show(struct blk_cmd_filter *filter,
char *page)
{
return rcf_cmds_show(filter, page, WRITE);
}
-static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
+static ssize_t rcf_cmds_store(struct blk_cmd_filter *filter,
const char *page, size_t count, int rw)
{
ssize_t ret = 0;
@@ -122,13 +122,13 @@ static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
return count;
}
-static ssize_t rcf_readcmds_store(struct blk_scsi_cmd_filter *filter,
+static ssize_t rcf_readcmds_store(struct blk_cmd_filter *filter,
const char *page, size_t count)
{
return rcf_cmds_store(filter, page, count, READ);
}
-static ssize_t rcf_writecmds_store(struct blk_scsi_cmd_filter *filter,
+static ssize_t rcf_writecmds_store(struct blk_cmd_filter *filter,
const char *page, size_t count)
{
return rcf_cmds_store(filter, page, count, WRITE);
@@ -136,8 +136,8 @@ static ssize_t rcf_writecmds_store(struct blk_scsi_cmd_filter *filter,
struct rcf_sysfs_entry {
struct attribute attr;
- ssize_t (*show)(struct blk_scsi_cmd_filter *, char *);
- ssize_t (*store)(struct blk_scsi_cmd_filter *, const char *, size_t);
+ ssize_t (*show)(struct blk_cmd_filter *, char *);
+ ssize_t (*store)(struct blk_cmd_filter *, const char *, size_t);
};
static struct rcf_sysfs_entry rcf_readcmds_entry = {
@@ -164,9 +164,9 @@ static ssize_t
rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
struct rcf_sysfs_entry *entry = to_rcf(attr);
- struct blk_scsi_cmd_filter *filter;
+ struct blk_cmd_filter *filter;
- filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+ filter = container_of(kobj, struct blk_cmd_filter, kobj);
if (entry->show)
return entry->show(filter, page);
@@ -178,7 +178,7 @@ rcf_attr_store(struct kobject *kobj, struct attribute *attr,
const char *page, size_t length)
{
struct rcf_sysfs_entry *entry = to_rcf(attr);
- struct blk_scsi_cmd_filter *filter;
+ struct blk_cmd_filter *filter;
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
@@ -186,7 +186,7 @@ rcf_attr_store(struct kobject *kobj, struct attribute *attr,
if (!entry->store)
return -EINVAL;
- filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+ filter = container_of(kobj, struct blk_cmd_filter, kobj);
return entry->store(filter, page, length);
}
@@ -203,7 +203,7 @@ static struct kobj_type rcf_ktype = {
int blk_register_filter(struct gendisk *disk)
{
int ret;
- struct blk_scsi_cmd_filter *filter = &disk->queue->cmd_filter;
+ struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
struct kobject *parent = kobject_get(disk->holder_dir->parent);
if (!parent)
@@ -220,7 +220,7 @@ int blk_register_filter(struct gendisk *disk)
void blk_unregister_filter(struct gendisk *disk)
{
- struct blk_scsi_cmd_filter *filter = &disk->queue->cmd_filter;
+ struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
kobject_put(&filter->kobj);
kobject_put(disk->holder_dir->parent);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 1f7d56e..6ae41a5 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -105,7 +105,7 @@ static int sg_emulated_host(struct request_queue *q, int __user *p)
return put_user(1, p);
}
-void blk_set_cmd_filter_defaults(struct blk_scsi_cmd_filter *filter)
+void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
{
/* Basic read-only commands */
__set_bit(TEST_UNIT_READY, filter->read_ok);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 211160f..e23f2ad 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -283,7 +283,7 @@ struct blk_queue_tag {
#define BLK_SCSI_MAX_CMDS (256)
#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
-struct blk_scsi_cmd_filter {
+struct blk_cmd_filter {
unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
struct kobject kobj;
@@ -407,7 +407,7 @@ struct request_queue
#if defined(CONFIG_BLK_DEV_BSG)
struct bsg_class_device bsg_dev;
#endif
- struct blk_scsi_cmd_filter cmd_filter;
+ struct blk_cmd_filter cmd_filter;
};
#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
@@ -842,11 +842,11 @@ extern int blkdev_issue_flush(struct block_device *, sector_t *);
/*
* command filter functions
*/
-extern int blk_verify_command(struct blk_scsi_cmd_filter *filter,
+extern int blk_verify_command(struct blk_cmd_filter *filter,
unsigned char *cmd, int has_write_perm);
extern int blk_register_filter(struct gendisk *disk);
extern void blk_unregister_filter(struct gendisk *disk);
-extern void blk_set_cmd_filter_defaults(struct blk_scsi_cmd_filter *filter);
+extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter);
#define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128
--
1.5.4.2
^ permalink raw reply related [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-07-26 9:03 ` [PATCH 0/3] cmd_filter fixes FUJITA Tomonori
2008-07-26 9:03 ` [PATCH 1/3] move cmd_filter from gendisk to request_queue FUJITA Tomonori
@ 2008-07-27 19:59 ` Adel Gadllah
2008-07-27 20:02 ` Adel Gadllah
1 sibling, 1 reply; 65+ messages in thread
From: Adel Gadllah @ 2008-07-27 19:59 UTC (permalink / raw)
To: FUJITA Tomonori
Cc: linux-scsi, jens.axboe, James.Bottomley, dan.j.williams, pjones,
viro, dougg, matthew
2008/7/26 FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>:
> cmd_filter works only for the block layer SG_IO with SCSI block
> devices. It breaks scsi/sg.c, bsg, and the block layer SG_IO with SCSI
> character devices (such as st). We might hit a kernel crash with them.
>
> The problem is that cmd_filter code accesses to gendisk (having struct
> blk_scsi_cmd_filter) via inode->i_bdev->bd_disk. It works for only
> SCSI block device files. With character device files, inode->i_bdev
> leads you to struct cdev. inode->i_bdev->bd_disk->blk_scsi_cmd_filter
> isn't safe. For example, I got the following kernel crash with bsg:
>
> Pid: 1480, comm: bsg-test Not tainted (2.6.26-06879-gfb2e405 #1)
> EIP: 0060:[<c01b02fd>] EFLAGS: 00010202 CPU: 0
> EIP is at blk_cmd_filter_verify_command+0x1e/0x41
> EAX: 00000000 EBX: 00000237 ECX: 00000011 EDX: 00000003
> ESI: df9692ae EDI: de93485c EBP: de8ea3a8 ESP: de937e90
> DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
> Process bsg-test (pid: 1480, ti=de936000 task=df8b7230 task.ti=de936000)
> Stack: de937ec4 df969228 fffffff2 c01b0866 df025030 00000000 de937ec4 c01b1009
> bf9a5b10 de8ea3a8 c01b110e 00000246 df1985f4 00000051 00000000 00000000
> 00000006 bf9a5bd2 00000000 00000000 00000000 00000000 00000000 00000000
> Call Trace:
> [<c01b0866>] bsg_map_hdr+0xe7/0x225
> [<c01b1009>] bsg_ioctl+0x0/0x17c
> [<c01b110e>] bsg_ioctl+0x105/0x17c
> [<c01b1009>] bsg_ioctl+0x0/0x17c
> [<c015a11a>] vfs_ioctl+0x16/0x48
> [<c015a332>] do_vfs_ioctl+0x1e6/0x1f9
> [<c01b770c>] trace_hardirqs_on_thunk+0xc/0x10
> [<c015a371>] sys_ioctl+0x2c/0x43
> [<c01028b1>] sysenter_do_call+0x12/0x35
> =======================
> Code: 10 89 fa ff d3 89 c2 89 d0 5b 5e 5f c3 57 56 53 89 c3 89 d6 89 cf b8 11 00 00 00 e8 fd 94 f6 ff 85 c0 75 1f 85 db 74 1f 0f b6 16 <0f> a3 13 19 c0 85 c0 75 0f 0f a3 53 20 19 c0 85 c0 74 09 f6 07
> EIP: [<c01b02fd>] blk_cmd_filter_verify_command+0x1e/0x41 SS:ESP 0068:de937e90
> ---[ end trace 0cceddb2f202a402 ]---
>
>
> SCSI ULDs don't expose gendisk; they keep it private. bsg needs to be
> independent on any protocols. We shouldn't change ULDs to expose their
> gendisk.
>
> This patchset moves struct blk_scsi_cmd_filter from gendisk to
> request_queue, a common object, which eveyone can access to.
>
> The user interface doesn't change; users can change the filters via
> /sys/block/. gendisk has a pointer to request_queue so the cmd_filter
> code accesses to struct blk_scsi_cmd_filter.
Thanks for the fixes, wanted to test them but they don't apply cleanly
here (tried against Jens and Linus tree).
Anyway the look fine to me from just reading it ... have you tested if
changing the filter actually works?
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-07-27 19:59 ` [PATCH 0/3] cmd_filter fixes Adel Gadllah
@ 2008-07-27 20:02 ` Adel Gadllah
2008-07-28 2:18 ` FUJITA Tomonori
0 siblings, 1 reply; 65+ messages in thread
From: Adel Gadllah @ 2008-07-27 20:02 UTC (permalink / raw)
To: FUJITA Tomonori
Cc: linux-scsi, jens.axboe, James.Bottomley, dan.j.williams, pjones,
viro, dougg, matthew
2008/7/27 Adel Gadllah <adel.gadllah@gmail.com>:
> 2008/7/26 FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>:
>> cmd_filter works only for the block layer SG_IO with SCSI block
>> devices. It breaks scsi/sg.c, bsg, and the block layer SG_IO with SCSI
>> character devices (such as st). We might hit a kernel crash with them.
>>
>> The problem is that cmd_filter code accesses to gendisk (having struct
>> blk_scsi_cmd_filter) via inode->i_bdev->bd_disk. It works for only
>> SCSI block device files. With character device files, inode->i_bdev
>> leads you to struct cdev. inode->i_bdev->bd_disk->blk_scsi_cmd_filter
>> isn't safe. For example, I got the following kernel crash with bsg:
>>
>> Pid: 1480, comm: bsg-test Not tainted (2.6.26-06879-gfb2e405 #1)
>> EIP: 0060:[<c01b02fd>] EFLAGS: 00010202 CPU: 0
>> EIP is at blk_cmd_filter_verify_command+0x1e/0x41
>> EAX: 00000000 EBX: 00000237 ECX: 00000011 EDX: 00000003
>> ESI: df9692ae EDI: de93485c EBP: de8ea3a8 ESP: de937e90
>> DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
>> Process bsg-test (pid: 1480, ti=de936000 task=df8b7230 task.ti=de936000)
>> Stack: de937ec4 df969228 fffffff2 c01b0866 df025030 00000000 de937ec4 c01b1009
>> bf9a5b10 de8ea3a8 c01b110e 00000246 df1985f4 00000051 00000000 00000000
>> 00000006 bf9a5bd2 00000000 00000000 00000000 00000000 00000000 00000000
>> Call Trace:
>> [<c01b0866>] bsg_map_hdr+0xe7/0x225
>> [<c01b1009>] bsg_ioctl+0x0/0x17c
>> [<c01b110e>] bsg_ioctl+0x105/0x17c
>> [<c01b1009>] bsg_ioctl+0x0/0x17c
>> [<c015a11a>] vfs_ioctl+0x16/0x48
>> [<c015a332>] do_vfs_ioctl+0x1e6/0x1f9
>> [<c01b770c>] trace_hardirqs_on_thunk+0xc/0x10
>> [<c015a371>] sys_ioctl+0x2c/0x43
>> [<c01028b1>] sysenter_do_call+0x12/0x35
>> =======================
>> Code: 10 89 fa ff d3 89 c2 89 d0 5b 5e 5f c3 57 56 53 89 c3 89 d6 89 cf b8 11 00 00 00 e8 fd 94 f6 ff 85 c0 75 1f 85 db 74 1f 0f b6 16 <0f> a3 13 19 c0 85 c0 75 0f 0f a3 53 20 19 c0 85 c0 74 09 f6 07
>> EIP: [<c01b02fd>] blk_cmd_filter_verify_command+0x1e/0x41 SS:ESP 0068:de937e90
>> ---[ end trace 0cceddb2f202a402 ]---
>>
>>
>> SCSI ULDs don't expose gendisk; they keep it private. bsg needs to be
>> independent on any protocols. We shouldn't change ULDs to expose their
>> gendisk.
>>
>> This patchset moves struct blk_scsi_cmd_filter from gendisk to
>> request_queue, a common object, which eveyone can access to.
>>
>> The user interface doesn't change; users can change the filters via
>> /sys/block/. gendisk has a pointer to request_queue so the cmd_filter
>> code accesses to struct blk_scsi_cmd_filter.
>
> Thanks for the fixes, wanted to test them but they don't apply cleanly
> here (tried against Jens and Linus tree).
> Anyway the look fine to me from just reading it ... have you tested if
> changing the filter actually works?
>
Should be "Anyway the look fine to me from just reading them ... have
you tested if
changing the filter (via sysfs) actually works?"
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-07-27 20:02 ` Adel Gadllah
@ 2008-07-28 2:18 ` FUJITA Tomonori
2008-07-30 19:59 ` Adel Gadllah
0 siblings, 1 reply; 65+ messages in thread
From: FUJITA Tomonori @ 2008-07-28 2:18 UTC (permalink / raw)
To: adel.gadllah
Cc: fujita.tomonori, linux-scsi, jens.axboe, James.Bottomley,
dan.j.williams, pjones, viro, dougg, matthew
On Sun, 27 Jul 2008 22:02:15 +0200
"Adel Gadllah" <adel.gadllah@gmail.com> wrote:
> 2008/7/27 Adel Gadllah <adel.gadllah@gmail.com>:
> > 2008/7/26 FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>:
> >> cmd_filter works only for the block layer SG_IO with SCSI block
> >> devices. It breaks scsi/sg.c, bsg, and the block layer SG_IO with SCSI
> >> character devices (such as st). We might hit a kernel crash with them.
> >>
> >> The problem is that cmd_filter code accesses to gendisk (having struct
> >> blk_scsi_cmd_filter) via inode->i_bdev->bd_disk. It works for only
> >> SCSI block device files. With character device files, inode->i_bdev
> >> leads you to struct cdev. inode->i_bdev->bd_disk->blk_scsi_cmd_filter
> >> isn't safe. For example, I got the following kernel crash with bsg:
> >>
> >> Pid: 1480, comm: bsg-test Not tainted (2.6.26-06879-gfb2e405 #1)
> >> EIP: 0060:[<c01b02fd>] EFLAGS: 00010202 CPU: 0
> >> EIP is at blk_cmd_filter_verify_command+0x1e/0x41
> >> EAX: 00000000 EBX: 00000237 ECX: 00000011 EDX: 00000003
> >> ESI: df9692ae EDI: de93485c EBP: de8ea3a8 ESP: de937e90
> >> DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
> >> Process bsg-test (pid: 1480, ti=de936000 task=df8b7230 task.ti=de936000)
> >> Stack: de937ec4 df969228 fffffff2 c01b0866 df025030 00000000 de937ec4 c01b1009
> >> bf9a5b10 de8ea3a8 c01b110e 00000246 df1985f4 00000051 00000000 00000000
> >> 00000006 bf9a5bd2 00000000 00000000 00000000 00000000 00000000 00000000
> >> Call Trace:
> >> [<c01b0866>] bsg_map_hdr+0xe7/0x225
> >> [<c01b1009>] bsg_ioctl+0x0/0x17c
> >> [<c01b110e>] bsg_ioctl+0x105/0x17c
> >> [<c01b1009>] bsg_ioctl+0x0/0x17c
> >> [<c015a11a>] vfs_ioctl+0x16/0x48
> >> [<c015a332>] do_vfs_ioctl+0x1e6/0x1f9
> >> [<c01b770c>] trace_hardirqs_on_thunk+0xc/0x10
> >> [<c015a371>] sys_ioctl+0x2c/0x43
> >> [<c01028b1>] sysenter_do_call+0x12/0x35
> >> =======================
> >> Code: 10 89 fa ff d3 89 c2 89 d0 5b 5e 5f c3 57 56 53 89 c3 89 d6 89 cf b8 11 00 00 00 e8 fd 94 f6 ff 85 c0 75 1f 85 db 74 1f 0f b6 16 <0f> a3 13 19 c0 85 c0 75 0f 0f a3 53 20 19 c0 85 c0 74 09 f6 07
> >> EIP: [<c01b02fd>] blk_cmd_filter_verify_command+0x1e/0x41 SS:ESP 0068:de937e90
> >> ---[ end trace 0cceddb2f202a402 ]---
> >>
> >>
> >> SCSI ULDs don't expose gendisk; they keep it private. bsg needs to be
> >> independent on any protocols. We shouldn't change ULDs to expose their
> >> gendisk.
> >>
> >> This patchset moves struct blk_scsi_cmd_filter from gendisk to
> >> request_queue, a common object, which eveyone can access to.
> >>
> >> The user interface doesn't change; users can change the filters via
> >> /sys/block/. gendisk has a pointer to request_queue so the cmd_filter
> >> code accesses to struct blk_scsi_cmd_filter.
> >
> > Thanks for the fixes, wanted to test them but they don't apply cleanly
> > here (tried against Jens and Linus tree).
Hmm, I can cleanly apply them against Linus's latest tree:
commit c9272c4f9fbe2087beb3392f526dc5b19efaa56b
Merge: fb4284b... 744d18d...
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun Jul 27 16:47:55 2008 -0700
Merge branch 'hotfixes' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6
For your convenience, I also put them to a git tree:
git://git.kernel.org/pub/scm/linux/kernel/git/tomo/linux-2.6-misc.git cmdfilter
> > Anyway the look fine to me from just reading it ... have you tested if
> > changing the filter actually works?
> >
> Should be "Anyway the look fine to me from just reading them ... have
> you tested if
> changing the filter (via sysfs) actually works?"
Yes though not much.
BTW, I think that we need a document about how to control command
filters via sysfs (and IMO, there might be a better interface to
change the filter).
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-07-28 2:18 ` FUJITA Tomonori
@ 2008-07-30 19:59 ` Adel Gadllah
2008-07-31 4:55 ` FUJITA Tomonori
0 siblings, 1 reply; 65+ messages in thread
From: Adel Gadllah @ 2008-07-30 19:59 UTC (permalink / raw)
To: FUJITA Tomonori
Cc: linux-scsi, jens.axboe, James.Bottomley, dan.j.williams, pjones,
viro, dougg, matthew
2008/7/28 FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>:
>> > Thanks for the fixes, wanted to test them but they don't apply cleanly
>> > here (tried against Jens and Linus tree).
>
> Hmm, I can cleanly apply them against Linus's latest tree:
>
> commit c9272c4f9fbe2087beb3392f526dc5b19efaa56b
> Merge: fb4284b... 744d18d...
> Author: Linus Torvalds <torvalds@linux-foundation.org>
> Date: Sun Jul 27 16:47:55 2008 -0700
>
> Merge branch 'hotfixes' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6
>
> For your convenience, I also put them to a git tree:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tomo/linux-2.6-misc.git cmdfilter
>
Odd, got them to build now from quick testing there seem to work fine.
>> > Anyway the look fine to me from just reading it ... have you tested if
>> > changing the filter actually works?
>> >
>> Should be "Anyway the look fine to me from just reading them ... have
>> you tested if
>> changing the filter (via sysfs) actually works?"
>
> Yes though not much.
OK, thanks. Will do more detailed testing this weekend.
> BTW, I think that we need a document about how to control command
> filters via sysfs (and IMO, there might be a better interface to
> change the filter).
Yeah agreed, will add some docs this weekend.
As for "better interface" ... I am open for suggestions ;)
This was the one used in Peter's original patch and it looks straight forward.
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 1/3] move cmd_filter from gendisk to request_queue
2008-07-26 9:03 ` [PATCH 1/3] move cmd_filter from gendisk to request_queue FUJITA Tomonori
2008-07-26 9:03 ` [PATCH 2/3] sg: restore command permission for TYPE_SCANNER FUJITA Tomonori
@ 2008-07-30 20:10 ` Peter Jones
2008-07-31 5:13 ` FUJITA Tomonori
2008-08-16 5:47 ` FUJITA Tomonori
2 siblings, 1 reply; 65+ messages in thread
From: Peter Jones @ 2008-07-30 20:10 UTC (permalink / raw)
To: FUJITA Tomonori
Cc: linux-scsi, jens.axboe, James.Bottomley, dan.j.williams,
adel.gadllah, viro, dougg, matthew
FUJITA Tomonori wrote:
> diff --git a/block/bsg.c b/block/bsg.c
> index 5a68b09..6c5c6f3 100644
> --- a/block/bsg.c
> +++ b/block/bsg.c
...
> @@ -87,6 +85,7 @@ struct bsg_command {
> int err;
> struct sg_io_v4 hdr;
> char sense[SCSI_SENSE_BUFFERSIZE];
> + int has_write_perm;
> };
>
> static void bsg_free_command(struct bsg_command *bc)
Why is this added? It never seems to be used.
--
Peter
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-07-30 19:59 ` Adel Gadllah
@ 2008-07-31 4:55 ` FUJITA Tomonori
2008-07-31 7:18 ` Matthew Wilcox
0 siblings, 1 reply; 65+ messages in thread
From: FUJITA Tomonori @ 2008-07-31 4:55 UTC (permalink / raw)
To: adel.gadllah
Cc: fujita.tomonori, linux-scsi, jens.axboe, James.Bottomley,
dan.j.williams, pjones, viro, dougg, matthew
On Wed, 30 Jul 2008 21:59:41 +0200
"Adel Gadllah" <adel.gadllah@gmail.com> wrote:
> >> > Anyway the look fine to me from just reading it ... have you tested if
> >> > changing the filter actually works?
> >> >
> >> Should be "Anyway the look fine to me from just reading them ... have
> >> you tested if
> >> changing the filter (via sysfs) actually works?"
> >
> > Yes though not much.
>
> OK, thanks. Will do more detailed testing this weekend.
Thanks a lot.
> > BTW, I think that we need a document about how to control command
> > filters via sysfs (and IMO, there might be a better interface to
> > change the filter).
>
> Yeah agreed, will add some docs this weekend.
Thanks again.
> As for "better interface" ... I am open for suggestions ;)
> This was the one used in Peter's original patch and it looks straight forward.
I propose two changes at least.
First, The current code can handle only hexadecimal notation but it
doesn't present hexadecimal notation properly. I think that it would
be better to make it clear that we use hexadecimal notation. I prefer
the following:
fujita@clover:/sys/block/cciss!c0d0/cmd_filter$ cat read_table
0x00 0x03 0x08 0x12 0x1a 0x1b 0x1c
rather than the current one:
fujita@clover:/sys/block/cciss!c0d0/cmd_filter$ cat
00 03 08 12 1a 1b 1c
It's the same about changing the filter, I like to do:
clover:/sys/block/cciss!c0d0/cmd_filter# echo "0x00 0x03 0x08 0x12" > read_table
instead of
clover:/sys/block/cciss!c0d0/cmd_filter# echo "00 03 08 12" > read_table
Secondly, getting the different results from the following commands is
confusing for me:
clover:/sys/block/cciss!c0d0/cmd_filter# echo "03 08 12 1a 1b 1c" > read_table
clover:/sys/block/cciss!c0d0/cmd_filter# cat read_table
03 08 12 1a 1b 1c
clover:/sys/block/cciss!c0d0/cmd_filter# echo "3 08 12 1a 1b 1c" > read_table
clover:/sys/block/cciss!c0d0/cmd_filter# cat read_table
02 03 08 0a 0b 0c
I can send patches about them but before that, I like to discuss how
to fix the oops.
Someone might think that it would be better if we can change the
filter without specifying all the accepted commands. For example, we
could do like:
clover:/sys/block/cciss!c0d0/cmd_filter# cat read_table
03 08 12 1a 1b 1c
clover:/sys/block/cciss!c0d0/cmd_filter# echo 02 > read_table_on
02 03 08 12 1a 1b 1c
But I don't have strong opinions about it.
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 1/3] move cmd_filter from gendisk to request_queue
2008-07-30 20:10 ` [PATCH 1/3] move cmd_filter from gendisk to request_queue Peter Jones
@ 2008-07-31 5:13 ` FUJITA Tomonori
0 siblings, 0 replies; 65+ messages in thread
From: FUJITA Tomonori @ 2008-07-31 5:13 UTC (permalink / raw)
To: pjones
Cc: fujita.tomonori, linux-scsi, jens.axboe, James.Bottomley,
dan.j.williams, adel.gadllah, viro, dougg, matthew
On Wed, 30 Jul 2008 16:10:54 -0400
Peter Jones <pjones@redhat.com> wrote:
> FUJITA Tomonori wrote:
>
> > diff --git a/block/bsg.c b/block/bsg.c
> > index 5a68b09..6c5c6f3 100644
> > --- a/block/bsg.c
> > +++ b/block/bsg.c
> ...
> > @@ -87,6 +85,7 @@ struct bsg_command {
> > int err;
> > struct sg_io_v4 hdr;
> > char sense[SCSI_SENSE_BUFFERSIZE];
> > + int has_write_perm;
> > };
> >
> > static void bsg_free_command(struct bsg_command *bc)
>
> Why is this added? It never seems to be used.
Oops, I just forgot to remove it. Thanks.
I've updated a fixed version of the patchset:
git://git.kernel.org/pub/scm/linux/kernel/git/tomo/linux-2.6-misc.git cmdfilter
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-07-31 4:55 ` FUJITA Tomonori
@ 2008-07-31 7:18 ` Matthew Wilcox
2008-07-31 7:24 ` FUJITA Tomonori
0 siblings, 1 reply; 65+ messages in thread
From: Matthew Wilcox @ 2008-07-31 7:18 UTC (permalink / raw)
To: FUJITA Tomonori
Cc: adel.gadllah, linux-scsi, jens.axboe, James.Bottomley,
dan.j.williams, pjones, viro, dougg
On Thu, Jul 31, 2008 at 01:55:31PM +0900, FUJITA Tomonori wrote:
> Someone might think that it would be better if we can change the
> filter without specifying all the accepted commands. For example, we
> could do like:
>
> clover:/sys/block/cciss!c0d0/cmd_filter# cat read_table
> 03 08 12 1a 1b 1c
>
> clover:/sys/block/cciss!c0d0/cmd_filter# echo 02 > read_table_on
> 02 03 08 12 1a 1b 1c
>
> But I don't have strong opinions about it.
We need to be able to delete as well as add.
We could have a scheme in which:
echo -- "-03" >read_table
cat read_table
08 12 1a 1b 1c
echo -- "+02 +07 +08" >read_table
cat read_table
02 07 08 12 1a 1b 1c
The + could be implicit.
--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-07-31 7:18 ` Matthew Wilcox
@ 2008-07-31 7:24 ` FUJITA Tomonori
2008-07-31 13:04 ` Matthew Wilcox
0 siblings, 1 reply; 65+ messages in thread
From: FUJITA Tomonori @ 2008-07-31 7:24 UTC (permalink / raw)
To: matthew
Cc: fujita.tomonori, adel.gadllah, linux-scsi, jens.axboe,
James.Bottomley, dan.j.williams, pjones, viro, dougg
On Thu, 31 Jul 2008 01:18:52 -0600
Matthew Wilcox <matthew@wil.cx> wrote:
> On Thu, Jul 31, 2008 at 01:55:31PM +0900, FUJITA Tomonori wrote:
> > Someone might think that it would be better if we can change the
> > filter without specifying all the accepted commands. For example, we
> > could do like:
> >
> > clover:/sys/block/cciss!c0d0/cmd_filter# cat read_table
> > 03 08 12 1a 1b 1c
> >
> > clover:/sys/block/cciss!c0d0/cmd_filter# echo 02 > read_table_on
> > 02 03 08 12 1a 1b 1c
> >
> > But I don't have strong opinions about it.
>
> We need to be able to delete as well as add.
Yeah, I thought about adding a new file such as read_table_off.
> We could have a scheme in which:
>
> echo -- "-03" >read_table
> cat read_table
> 08 12 1a 1b 1c
>
> echo -- "+02 +07 +08" >read_table
> cat read_table
> 02 07 08 12 1a 1b 1c
>
> The + could be implicit.
Looks another potential option though I prefer "-0x03" than "-03" as I
said in the previous mail.
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-07-31 7:24 ` FUJITA Tomonori
@ 2008-07-31 13:04 ` Matthew Wilcox
2008-07-31 15:18 ` FUJITA Tomonori
0 siblings, 1 reply; 65+ messages in thread
From: Matthew Wilcox @ 2008-07-31 13:04 UTC (permalink / raw)
To: FUJITA Tomonori
Cc: adel.gadllah, linux-scsi, jens.axboe, James.Bottomley,
dan.j.williams, pjones, viro, dougg
On Thu, Jul 31, 2008 at 04:24:54PM +0900, FUJITA Tomonori wrote:
> > We need to be able to delete as well as add.
>
> Yeah, I thought about adding a new file such as read_table_off.
Three files seems excessive. Plus the +/- notation lets you do:
echo -- +0x02 +0x03 -0x08
atomically. I don't know if that's an important feature to have.
> Looks another potential option though I prefer "-0x03" than "-03" as I
> said in the previous mail.
Makes sense.
--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-07-31 13:04 ` Matthew Wilcox
@ 2008-07-31 15:18 ` FUJITA Tomonori
2008-08-07 18:47 ` Adel Gadllah
0 siblings, 1 reply; 65+ messages in thread
From: FUJITA Tomonori @ 2008-07-31 15:18 UTC (permalink / raw)
To: matthew
Cc: fujita.tomonori, adel.gadllah, linux-scsi, jens.axboe,
James.Bottomley, dan.j.williams, pjones, viro, dougg
On Thu, 31 Jul 2008 07:04:22 -0600
Matthew Wilcox <matthew@wil.cx> wrote:
> On Thu, Jul 31, 2008 at 04:24:54PM +0900, FUJITA Tomonori wrote:
> > > We need to be able to delete as well as add.
> >
> > Yeah, I thought about adding a new file such as read_table_off.
>
> Three files seems excessive. Plus the +/- notation lets you do:
>
> echo -- +0x02 +0x03 -0x08
I like it. This interface can handle both multiple changes and a
single change nicely.
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-07-31 15:18 ` FUJITA Tomonori
@ 2008-08-07 18:47 ` Adel Gadllah
2008-08-08 0:20 ` FUJITA Tomonori
0 siblings, 1 reply; 65+ messages in thread
From: Adel Gadllah @ 2008-08-07 18:47 UTC (permalink / raw)
To: FUJITA Tomonori
Cc: matthew, linux-scsi, jens.axboe, James.Bottomley, dan.j.williams,
pjones, viro, dougg
Can we push this patches to Linus?
They fix the regression and do not seem to introduce a new one
(couldn't find any while testing it).
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-08-07 18:47 ` Adel Gadllah
@ 2008-08-08 0:20 ` FUJITA Tomonori
2008-08-08 5:54 ` Jens Axboe
0 siblings, 1 reply; 65+ messages in thread
From: FUJITA Tomonori @ 2008-08-08 0:20 UTC (permalink / raw)
To: jens.axboe, adel.gadllah
Cc: fujita.tomonori, matthew, linux-scsi, James.Bottomley,
dan.j.williams, pjones, viro, dougg
On Thu, 7 Aug 2008 20:47:45 +0200
"Adel Gadllah" <adel.gadllah@gmail.com> wrote:
> Can we push this patches to Linus?
> They fix the regression and do not seem to introduce a new one
> (couldn't find any while testing it).
Jens? Any comments on the patchset?
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-08-08 0:20 ` FUJITA Tomonori
@ 2008-08-08 5:54 ` Jens Axboe
2008-08-08 6:11 ` FUJITA Tomonori
2008-08-21 9:26 ` [PATCH 0/3] cmd_filter fixes Adel Gadllah
0 siblings, 2 replies; 65+ messages in thread
From: Jens Axboe @ 2008-08-08 5:54 UTC (permalink / raw)
To: FUJITA Tomonori
Cc: adel.gadllah, matthew, linux-scsi, James.Bottomley,
dan.j.williams, pjones, viro, dougg
On Fri, Aug 08 2008, FUJITA Tomonori wrote:
> On Thu, 7 Aug 2008 20:47:45 +0200
> "Adel Gadllah" <adel.gadllah@gmail.com> wrote:
>
> > Can we push this patches to Linus?
> > They fix the regression and do not seem to introduce a new one
> > (couldn't find any while testing it).
>
> Jens? Any comments on the patchset?
Looks fine, I'll queue it up.
--
Jens Axboe
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-08-08 5:54 ` Jens Axboe
@ 2008-08-08 6:11 ` FUJITA Tomonori
2008-08-08 6:15 ` Jens Axboe
2008-08-21 9:26 ` [PATCH 0/3] cmd_filter fixes Adel Gadllah
1 sibling, 1 reply; 65+ messages in thread
From: FUJITA Tomonori @ 2008-08-08 6:11 UTC (permalink / raw)
To: jens.axboe
Cc: fujita.tomonori, adel.gadllah, matthew, linux-scsi,
James.Bottomley, dan.j.williams, pjones, viro, dougg
On Fri, 8 Aug 2008 07:54:07 +0200
Jens Axboe <jens.axboe@oracle.com> wrote:
> On Fri, Aug 08 2008, FUJITA Tomonori wrote:
> > On Thu, 7 Aug 2008 20:47:45 +0200
> > "Adel Gadllah" <adel.gadllah@gmail.com> wrote:
> >
> > > Can we push this patches to Linus?
> > > They fix the regression and do not seem to introduce a new one
> > > (couldn't find any while testing it).
> >
> > Jens? Any comments on the patchset?
>
> Looks fine, I'll queue it up.
Cool, thanks.
We also discussed the possible interface changes:
http://marc.info/?t=121706320300003&r=1&w=2
Adel already has a patch for these changes:
http://marc.info/?l=linux-scsi&m=121768334007545&w=2
BTW, we also wait for your verdict on:
http://marc.info/?t=121611935500002&r=1&w=2
Thanks,
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-08-08 6:11 ` FUJITA Tomonori
@ 2008-08-08 6:15 ` Jens Axboe
2008-08-08 6:29 ` FUJITA Tomonori
0 siblings, 1 reply; 65+ messages in thread
From: Jens Axboe @ 2008-08-08 6:15 UTC (permalink / raw)
To: FUJITA Tomonori
Cc: adel.gadllah, matthew, linux-scsi, James.Bottomley,
dan.j.williams, pjones, viro, dougg
On Fri, Aug 08 2008, FUJITA Tomonori wrote:
> On Fri, 8 Aug 2008 07:54:07 +0200
> Jens Axboe <jens.axboe@oracle.com> wrote:
>
> > On Fri, Aug 08 2008, FUJITA Tomonori wrote:
> > > On Thu, 7 Aug 2008 20:47:45 +0200
> > > "Adel Gadllah" <adel.gadllah@gmail.com> wrote:
> > >
> > > > Can we push this patches to Linus?
> > > > They fix the regression and do not seem to introduce a new one
> > > > (couldn't find any while testing it).
> > >
> > > Jens? Any comments on the patchset?
> >
> > Looks fine, I'll queue it up.
>
> Cool, thanks.
>
> We also discussed the possible interface changes:
>
> http://marc.info/?t=121706320300003&r=1&w=2
>
> Adel already has a patch for these changes:
>
> http://marc.info/?l=linux-scsi&m=121768334007545&w=2
Yep, looks good to me.
> BTW, we also wait for your verdict on:
>
> http://marc.info/?t=121611935500002&r=1&w=2
I've always hated the iommu virtual merging complexity. My plan is to
rip it out.
--
Jens Axboe
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-08-08 6:15 ` Jens Axboe
@ 2008-08-08 6:29 ` FUJITA Tomonori
2008-08-08 6:35 ` Jens Axboe
0 siblings, 1 reply; 65+ messages in thread
From: FUJITA Tomonori @ 2008-08-08 6:29 UTC (permalink / raw)
To: jens.axboe
Cc: fujita.tomonori, adel.gadllah, matthew, linux-scsi,
James.Bottomley, dan.j.williams, pjones, viro, dougg
On Fri, 8 Aug 2008 08:15:37 +0200
Jens Axboe <jens.axboe@oracle.com> wrote:
> > BTW, we also wait for your verdict on:
> >
> > http://marc.info/?t=121611935500002&r=1&w=2
>
> I've always hated the iommu virtual merging complexity. My plan is to
> rip it out.
No complaint from me. I'm just happy to see the verdict at length.
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-08-08 6:29 ` FUJITA Tomonori
@ 2008-08-08 6:35 ` Jens Axboe
2008-08-08 16:53 ` [PATCH 1/2] drop vmerge accounting Mikulas Patocka
0 siblings, 1 reply; 65+ messages in thread
From: Jens Axboe @ 2008-08-08 6:35 UTC (permalink / raw)
To: FUJITA Tomonori
Cc: adel.gadllah, matthew, linux-scsi, James.Bottomley,
dan.j.williams, pjones, viro, dougg, mpatocka
On Fri, Aug 08 2008, FUJITA Tomonori wrote:
> On Fri, 8 Aug 2008 08:15:37 +0200
> Jens Axboe <jens.axboe@oracle.com> wrote:
>
> > > BTW, we also wait for your verdict on:
> > >
> > > http://marc.info/?t=121611935500002&r=1&w=2
> >
> > I've always hated the iommu virtual merging complexity. My plan is to
> > rip it out.
>
> No complaint from me. I'm just happy to see the verdict at length.
Mikulas, you had a patch for this. Can you strip out the arch bits and
just send me the block bits? The arch bits should go in via the arch
maintainers.
The iommu code may still do virtual merging, it would be silly not to do
that if possible. Now that later kernels expose the necessary parameters
at that level as well, it's perfectly feasible.
--
Jens Axboe
^ permalink raw reply [flat|nested] 65+ messages in thread
* [PATCH 1/2] drop vmerge accounting
2008-08-08 6:35 ` Jens Axboe
@ 2008-08-08 16:53 ` Mikulas Patocka
2008-08-08 17:07 ` [PATCH 2/2] " Mikulas Patocka
0 siblings, 1 reply; 65+ messages in thread
From: Mikulas Patocka @ 2008-08-08 16:53 UTC (permalink / raw)
To: Jens Axboe
Cc: FUJITA Tomonori, adel.gadllah, matthew, linux-scsi,
James.Bottomley, dan.j.williams, pjones, viro, dougg
On Fri, 8 Aug 2008, Jens Axboe wrote:
> On Fri, Aug 08 2008, FUJITA Tomonori wrote:
> > On Fri, 8 Aug 2008 08:15:37 +0200
> > Jens Axboe <jens.axboe@oracle.com> wrote:
> >
> > > > BTW, we also wait for your verdict on:
> > > >
> > > > http://marc.info/?t=121611935500002&r=1&w=2
> > >
> > > I've always hated the iommu virtual merging complexity. My plan is to
> > > rip it out.
> >
> > No complaint from me. I'm just happy to see the verdict at length.
>
> Mikulas, you had a patch for this. Can you strip out the arch bits and
> just send me the block bits? The arch bits should go in via the arch
> maintainers.
>
> The iommu code may still do virtual merging, it would be silly not to do
> that if possible. Now that later kernels expose the necessary parameters
> at that level as well, it's perfectly feasible.
Here I'm sending the first one, it removes virtual merge accounting from
blk-merge.c (it acts as if all architectures undefined
BIO_VMERGE_BOUNDARY). I also created second patch that removes
bi_hw_segments field from struct bio and struct request. The next task
would be to remove nr_hw_segments from the request_queue, but it will
require changing the drivers.
Mikulas
---
Remove virtual merge accounting.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
block/blk-merge.c | 76 ++++------------------------------------------------
fs/bio.c | 6 +---
include/linux/bio.h | 15 ----------
3 files changed, 8 insertions(+), 89 deletions(-)
Index: linux-2.6.26-devel/block/blk-merge.c
===================================================================
--- linux-2.6.26-devel.orig/block/blk-merge.c 2008-07-15 23:29:39.000000000 +0200
+++ linux-2.6.26-devel/block/blk-merge.c 2008-08-08 18:46:57.000000000 +0200
@@ -66,7 +66,7 @@ void blk_recalc_rq_segments(struct reque
*/
high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
if (high || highprv)
- goto new_hw_segment;
+ goto new_segment;
if (cluster) {
if (seg_size + bv->bv_len > q->max_segment_size)
goto new_segment;
@@ -74,8 +74,6 @@ void blk_recalc_rq_segments(struct reque
goto new_segment;
if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
goto new_segment;
- if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
- goto new_hw_segment;
seg_size += bv->bv_len;
hw_seg_size += bv->bv_len;
@@ -83,17 +81,11 @@ void blk_recalc_rq_segments(struct reque
continue;
}
new_segment:
- if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
- !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
- hw_seg_size += bv->bv_len;
- else {
-new_hw_segment:
- if (nr_hw_segs == 1 &&
- hw_seg_size > rq->bio->bi_hw_front_size)
- rq->bio->bi_hw_front_size = hw_seg_size;
- hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
- nr_hw_segs++;
- }
+ if (nr_hw_segs == 1 &&
+ hw_seg_size > rq->bio->bi_hw_front_size)
+ rq->bio->bi_hw_front_size = hw_seg_size;
+ hw_seg_size = bv->bv_len;
+ nr_hw_segs++;
nr_phys_segs++;
bvprv = bv;
@@ -146,22 +138,6 @@ static int blk_phys_contig_segment(struc
return 0;
}
-static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
- struct bio *nxt)
-{
- if (!bio_flagged(bio, BIO_SEG_VALID))
- blk_recount_segments(q, bio);
- if (!bio_flagged(nxt, BIO_SEG_VALID))
- blk_recount_segments(q, nxt);
- if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
- BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))
- return 0;
- if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size)
- return 0;
-
- return 1;
-}
-
/*
* map a request to scatterlist, return number of sg entries setup. Caller
* must make sure sg can hold rq->nr_phys_segments entries
@@ -299,7 +275,6 @@ int ll_back_merge_fn(struct request_queu
struct bio *bio)
{
unsigned short max_sectors;
- int len;
if (unlikely(blk_pc_request(req)))
max_sectors = q->max_hw_sectors;
@@ -316,19 +291,6 @@ int ll_back_merge_fn(struct request_queu
blk_recount_segments(q, req->biotail);
if (!bio_flagged(bio, BIO_SEG_VALID))
blk_recount_segments(q, bio);
- len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
- if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio))
- && !BIOVEC_VIRT_OVERSIZE(len)) {
- int mergeable = ll_new_mergeable(q, req, bio);
-
- if (mergeable) {
- if (req->nr_hw_segments == 1)
- req->bio->bi_hw_front_size = len;
- if (bio->bi_hw_segments == 1)
- bio->bi_hw_back_size = len;
- }
- return mergeable;
- }
return ll_new_hw_segment(q, req, bio);
}
@@ -337,7 +299,6 @@ int ll_front_merge_fn(struct request_que
struct bio *bio)
{
unsigned short max_sectors;
- int len;
if (unlikely(blk_pc_request(req)))
max_sectors = q->max_hw_sectors;
@@ -351,23 +312,10 @@ int ll_front_merge_fn(struct request_que
q->last_merge = NULL;
return 0;
}
- len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
if (!bio_flagged(bio, BIO_SEG_VALID))
blk_recount_segments(q, bio);
if (!bio_flagged(req->bio, BIO_SEG_VALID))
blk_recount_segments(q, req->bio);
- if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
- !BIOVEC_VIRT_OVERSIZE(len)) {
- int mergeable = ll_new_mergeable(q, req, bio);
-
- if (mergeable) {
- if (bio->bi_hw_segments == 1)
- bio->bi_hw_front_size = len;
- if (req->nr_hw_segments == 1)
- req->biotail->bi_hw_back_size = len;
- }
- return mergeable;
- }
return ll_new_hw_segment(q, req, bio);
}
@@ -399,18 +347,6 @@ static int ll_merge_requests_fn(struct r
return 0;
total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
- if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
- int len = req->biotail->bi_hw_back_size +
- next->bio->bi_hw_front_size;
- /*
- * propagate the combined length to the end of the requests
- */
- if (req->nr_hw_segments == 1)
- req->bio->bi_hw_front_size = len;
- if (next->nr_hw_segments == 1)
- next->biotail->bi_hw_back_size = len;
- total_hw_segments--;
- }
if (total_hw_segments > q->max_hw_segments)
return 0;
Index: linux-2.6.26-devel/include/linux/bio.h
===================================================================
--- linux-2.6.26-devel.orig/include/linux/bio.h 2008-07-15 23:29:39.000000000 +0200
+++ linux-2.6.26-devel/include/linux/bio.h 2008-08-08 18:46:29.000000000 +0200
@@ -26,21 +26,8 @@
#ifdef CONFIG_BLOCK
-/* Platforms may set this to teach the BIO layer about IOMMU hardware. */
#include <asm/io.h>
-#if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY)
-#define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1))
-#define BIOVEC_VIRT_OVERSIZE(x) ((x) > BIO_VMERGE_MAX_SIZE)
-#else
-#define BIOVEC_VIRT_START_SIZE(x) 0
-#define BIOVEC_VIRT_OVERSIZE(x) 0
-#endif
-
-#ifndef BIO_VMERGE_BOUNDARY
-#define BIO_VMERGE_BOUNDARY 0
-#endif
-
#define BIO_DEBUG
#ifdef BIO_DEBUG
@@ -235,8 +222,6 @@ static inline void *bio_data(struct bio
((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
#endif
-#define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \
- ((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0)
#define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
(((addr1) | (mask)) == (((addr2) - 1) | (mask)))
#define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
Index: linux-2.6.26-devel/fs/bio.c
===================================================================
--- linux-2.6.26-devel.orig/fs/bio.c 2008-07-15 23:29:39.000000000 +0200
+++ linux-2.6.26-devel/fs/bio.c 2008-08-08 18:46:30.000000000 +0200
@@ -352,8 +352,7 @@ static int __bio_add_page(struct request
*/
while (bio->bi_phys_segments >= q->max_phys_segments
- || bio->bi_hw_segments >= q->max_hw_segments
- || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) {
+ || bio->bi_hw_segments >= q->max_hw_segments) {
if (retried_segments)
return 0;
@@ -390,8 +389,7 @@ static int __bio_add_page(struct request
}
/* If we may be able to merge these biovecs, force a recount */
- if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) ||
- BIOVEC_VIRT_MERGEABLE(bvec-1, bvec)))
+ if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
bio->bi_flags &= ~(1 << BIO_SEG_VALID);
bio->bi_vcnt++;
^ permalink raw reply [flat|nested] 65+ messages in thread
* [PATCH 2/2] drop vmerge accounting
2008-08-08 16:53 ` [PATCH 1/2] drop vmerge accounting Mikulas Patocka
@ 2008-08-08 17:07 ` Mikulas Patocka
2008-08-15 9:48 ` Jens Axboe
0 siblings, 1 reply; 65+ messages in thread
From: Mikulas Patocka @ 2008-08-08 17:07 UTC (permalink / raw)
To: Jens Axboe
Cc: FUJITA Tomonori, adel.gadllah, matthew, linux-scsi,
James.Bottomley, dan.j.williams, pjones, viro, dougg
On Fri, 8 Aug 2008, Mikulas Patocka wrote:
> On Fri, 8 Aug 2008, Jens Axboe wrote:
>
> > On Fri, Aug 08 2008, FUJITA Tomonori wrote:
> > > On Fri, 8 Aug 2008 08:15:37 +0200
> > > Jens Axboe <jens.axboe@oracle.com> wrote:
> > >
> > > > > BTW, we also wait for your verdict on:
> > > > >
> > > > > http://marc.info/?t=121611935500002&r=1&w=2
> > > >
> > > > I've always hated the iommu virtual merging complexity. My plan is to
> > > > rip it out.
> > >
> > > No complaint from me. I'm just happy to see the verdict at length.
> >
> > Mikulas, you had a patch for this. Can you strip out the arch bits and
> > just send me the block bits? The arch bits should go in via the arch
> > maintainers.
> >
> > The iommu code may still do virtual merging, it would be silly not to do
> > that if possible. Now that later kernels expose the necessary parameters
> > at that level as well, it's perfectly feasible.
>
> Here I'm sending the first one, it removes virtual merge accounting from
> blk-merge.c (it acts as if all architectures undefined
> BIO_VMERGE_BOUNDARY). I also created second patch that removes
> bi_hw_segments field from struct bio and struct request. The next task
> would be to remove nr_hw_segments from the request_queue, but it will
> require changing the drivers.
>
> Mikulas
(note: raid5 uses nr_hw_segments field for some other purpose, so this
patch will break it. It should be fixed by raid5 maintainers)
Remove hw_segments field from struct bio and struct request. Without virtual
merge accounting they have no purpose.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
block/blk-core.c | 1 -
block/blk-merge.c | 31 ++++---------------------------
drivers/md/raid1.c | 3 ---
drivers/md/raid10.c | 3 ---
fs/bio.c | 12 +-----------
include/linux/bio.h | 16 +---------------
include/linux/blkdev.h | 7 -------
7 files changed, 6 insertions(+), 67 deletions(-)
Index: linux-2.6.26-devel/block/blk-core.c
===================================================================
--- linux-2.6.26-devel.orig/block/blk-core.c 2008-08-08 18:46:30.000000000 +0200
+++ linux-2.6.26-devel/block/blk-core.c 2008-08-08 18:47:59.000000000 +0200
@@ -2003,7 +2003,6 @@ void blk_rq_bio_prep(struct request_queu
rq->cmd_flags |= (bio->bi_rw & 3);
rq->nr_phys_segments = bio_phys_segments(q, bio);
- rq->nr_hw_segments = bio_hw_segments(q, bio);
rq->current_nr_sectors = bio_cur_sectors(bio);
rq->hard_cur_sectors = rq->current_nr_sectors;
rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
Index: linux-2.6.26-devel/block/blk-merge.c
===================================================================
--- linux-2.6.26-devel.orig/block/blk-merge.c 2008-08-08 18:46:57.000000000 +0200
+++ linux-2.6.26-devel/block/blk-merge.c 2008-08-08 18:47:59.000000000 +0200
@@ -41,12 +41,9 @@ void blk_recalc_rq_sectors(struct reques
void blk_recalc_rq_segments(struct request *rq)
{
int nr_phys_segs;
- int nr_hw_segs;
unsigned int phys_size;
- unsigned int hw_size;
struct bio_vec *bv, *bvprv = NULL;
int seg_size;
- int hw_seg_size;
int cluster;
struct req_iterator iter;
int high, highprv = 1;
@@ -56,8 +53,8 @@ void blk_recalc_rq_segments(struct reque
return;
cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
- hw_seg_size = seg_size = 0;
- phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
+ seg_size = 0;
+ phys_size = nr_phys_segs = 0;
rq_for_each_segment(bv, rq, iter) {
/*
* the trick here is making sure that a high page is never
@@ -76,30 +73,17 @@ void blk_recalc_rq_segments(struct reque
goto new_segment;
seg_size += bv->bv_len;
- hw_seg_size += bv->bv_len;
bvprv = bv;
continue;
}
new_segment:
- if (nr_hw_segs == 1 &&
- hw_seg_size > rq->bio->bi_hw_front_size)
- rq->bio->bi_hw_front_size = hw_seg_size;
- hw_seg_size = bv->bv_len;
- nr_hw_segs++;
-
nr_phys_segs++;
bvprv = bv;
seg_size = bv->bv_len;
highprv = high;
}
- if (nr_hw_segs == 1 &&
- hw_seg_size > rq->bio->bi_hw_front_size)
- rq->bio->bi_hw_front_size = hw_seg_size;
- if (hw_seg_size > rq->biotail->bi_hw_back_size)
- rq->biotail->bi_hw_back_size = hw_seg_size;
rq->nr_phys_segments = nr_phys_segs;
- rq->nr_hw_segments = nr_hw_segs;
}
void blk_recount_segments(struct request_queue *q, struct bio *bio)
@@ -112,7 +96,6 @@ void blk_recount_segments(struct request
blk_recalc_rq_segments(&rq);
bio->bi_next = nxt;
bio->bi_phys_segments = rq.nr_phys_segments;
- bio->bi_hw_segments = rq.nr_hw_segments;
bio->bi_flags |= (1 << BIO_SEG_VALID);
}
EXPORT_SYMBOL(blk_recount_segments);
@@ -251,10 +234,9 @@ static inline int ll_new_hw_segment(stru
struct request *req,
struct bio *bio)
{
- int nr_hw_segs = bio_hw_segments(q, bio);
int nr_phys_segs = bio_phys_segments(q, bio);
- if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
+ if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
|| req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
@@ -266,7 +248,6 @@ static inline int ll_new_hw_segment(stru
* This will form the start of a new hw segment. Bump both
* counters.
*/
- req->nr_hw_segments += nr_hw_segs;
req->nr_phys_segments += nr_phys_segs;
return 1;
}
@@ -324,7 +305,6 @@ static int ll_merge_requests_fn(struct r
struct request *next)
{
int total_phys_segments;
- int total_hw_segments;
/*
* First check if the either of the requests are re-queued
@@ -346,14 +326,11 @@ static int ll_merge_requests_fn(struct r
if (total_phys_segments > q->max_phys_segments)
return 0;
- total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
-
- if (total_hw_segments > q->max_hw_segments)
+ if (total_phys_segments > q->max_hw_segments)
return 0;
/* Merge is OK... */
req->nr_phys_segments = total_phys_segments;
- req->nr_hw_segments = total_hw_segments;
return 1;
}
Index: linux-2.6.26-devel/include/linux/bio.h
===================================================================
--- linux-2.6.26-devel.orig/include/linux/bio.h 2008-08-08 18:46:29.000000000 +0200
+++ linux-2.6.26-devel/include/linux/bio.h 2008-08-08 18:47:59.000000000 +0200
@@ -79,21 +79,8 @@ struct bio {
*/
unsigned short bi_phys_segments;
- /* Number of segments after physical and DMA remapping
- * hardware coalescing is performed.
- */
- unsigned short bi_hw_segments;
-
unsigned int bi_size; /* residual I/O count */
- /*
- * To keep track of the max hw size, we account for the
- * sizes of the first and last virtually mergeable segments
- * in this bio
- */
- unsigned int bi_hw_front_size;
- unsigned int bi_hw_back_size;
-
unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
struct bio_vec *bi_io_vec; /* the actual vec list */
@@ -112,7 +99,7 @@ struct bio {
#define BIO_UPTODATE 0 /* ok after I/O completion */
#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */
#define BIO_EOF 2 /* out-out-bounds error */
-#define BIO_SEG_VALID 3 /* nr_hw_seg valid */
+#define BIO_SEG_VALID 3 /* bi_phys_segments valid */
#define BIO_CLONED 4 /* doesn't own data */
#define BIO_BOUNCED 5 /* bio is a bounce bio */
#define BIO_USER_MAPPED 6 /* contains user pages */
@@ -292,7 +279,6 @@ extern void bio_free(struct bio *, struc
extern void bio_endio(struct bio *, int);
struct request_queue;
extern int bio_phys_segments(struct request_queue *, struct bio *);
-extern int bio_hw_segments(struct request_queue *, struct bio *);
extern void __bio_clone(struct bio *, struct bio *);
extern struct bio *bio_clone(struct bio *, gfp_t);
Index: linux-2.6.26-devel/include/linux/blkdev.h
===================================================================
--- linux-2.6.26-devel.orig/include/linux/blkdev.h 2008-08-08 18:46:29.000000000 +0200
+++ linux-2.6.26-devel/include/linux/blkdev.h 2008-08-08 18:47:59.000000000 +0200
@@ -195,13 +195,6 @@ struct request {
*/
unsigned short nr_phys_segments;
- /* Number of scatter-gather addr+len pairs after
- * physical and DMA remapping hardware coalescing is performed.
- * This is the number of scatter-gather entries the driver
- * will actually have to deal with after DMA mapping is done.
- */
- unsigned short nr_hw_segments;
-
unsigned short ioprio;
void *special;
Index: linux-2.6.26-devel/fs/bio.c
===================================================================
--- linux-2.6.26-devel.orig/fs/bio.c 2008-08-08 18:46:30.000000000 +0200
+++ linux-2.6.26-devel/fs/bio.c 2008-08-08 18:48:00.000000000 +0200
@@ -229,14 +229,6 @@ inline int bio_phys_segments(struct requ
return bio->bi_phys_segments;
}
-inline int bio_hw_segments(struct request_queue *q, struct bio *bio)
-{
- if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
- blk_recount_segments(q, bio);
-
- return bio->bi_hw_segments;
-}
-
/**
* __bio_clone - clone a bio
* @bio: destination bio
@@ -352,7 +344,7 @@ static int __bio_add_page(struct request
*/
while (bio->bi_phys_segments >= q->max_phys_segments
- || bio->bi_hw_segments >= q->max_hw_segments) {
+ || bio->bi_phys_segments >= q->max_hw_segments) {
if (retried_segments)
return 0;
@@ -394,7 +386,6 @@ static int __bio_add_page(struct request
bio->bi_vcnt++;
bio->bi_phys_segments++;
- bio->bi_hw_segments++;
done:
bio->bi_size += len;
return len;
@@ -1387,7 +1378,6 @@ EXPORT_SYMBOL(bio_init);
EXPORT_SYMBOL(__bio_clone);
EXPORT_SYMBOL(bio_clone);
EXPORT_SYMBOL(bio_phys_segments);
-EXPORT_SYMBOL(bio_hw_segments);
EXPORT_SYMBOL(bio_add_page);
EXPORT_SYMBOL(bio_add_pc_page);
EXPORT_SYMBOL(bio_get_nr_vecs);
Index: linux-2.6.26-devel/drivers/md/raid1.c
===================================================================
--- linux-2.6.26-devel.orig/drivers/md/raid1.c 2008-08-08 19:01:55.000000000 +0200
+++ linux-2.6.26-devel/drivers/md/raid1.c 2008-08-08 19:04:10.000000000 +0200
@@ -1297,9 +1297,6 @@ static void sync_request_write(mddev_t *
sbio->bi_size = r1_bio->sectors << 9;
sbio->bi_idx = 0;
sbio->bi_phys_segments = 0;
- sbio->bi_hw_segments = 0;
- sbio->bi_hw_front_size = 0;
- sbio->bi_hw_back_size = 0;
sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
sbio->bi_flags |= 1 << BIO_UPTODATE;
sbio->bi_next = NULL;
Index: linux-2.6.26-devel/drivers/md/raid10.c
===================================================================
--- linux-2.6.26-devel.orig/drivers/md/raid10.c 2008-08-08 19:01:58.000000000 +0200
+++ linux-2.6.26-devel/drivers/md/raid10.c 2008-08-08 19:04:19.000000000 +0200
@@ -1334,9 +1334,6 @@ static void sync_request_write(mddev_t *
tbio->bi_size = r10_bio->sectors << 9;
tbio->bi_idx = 0;
tbio->bi_phys_segments = 0;
- tbio->bi_hw_segments = 0;
- tbio->bi_hw_front_size = 0;
- tbio->bi_hw_back_size = 0;
tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
tbio->bi_flags |= 1 << BIO_UPTODATE;
tbio->bi_next = NULL;
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 2/2] drop vmerge accounting
2008-08-08 17:07 ` [PATCH 2/2] " Mikulas Patocka
@ 2008-08-15 9:48 ` Jens Axboe
2008-08-15 18:23 ` [PATCH 3/4] " Mikulas Patocka
2008-08-15 18:26 ` Mikulas Patocka
0 siblings, 2 replies; 65+ messages in thread
From: Jens Axboe @ 2008-08-15 9:48 UTC (permalink / raw)
To: Mikulas Patocka
Cc: FUJITA Tomonori, adel.gadllah, matthew, linux-scsi,
James.Bottomley, dan.j.williams, pjones, viro, dougg
On Fri, Aug 08 2008, Mikulas Patocka wrote:
> On Fri, 8 Aug 2008, Mikulas Patocka wrote:
>
> > On Fri, 8 Aug 2008, Jens Axboe wrote:
> >
> > > On Fri, Aug 08 2008, FUJITA Tomonori wrote:
> > > > On Fri, 8 Aug 2008 08:15:37 +0200
> > > > Jens Axboe <jens.axboe@oracle.com> wrote:
> > > >
> > > > > > BTW, we also wait for your verdict on:
> > > > > >
> > > > > > http://marc.info/?t=121611935500002&r=1&w=2
> > > > >
> > > > > I've always hated the iommu virtual merging complexity. My plan is to
> > > > > rip it out.
> > > >
> > > > No complaint from me. I'm just happy to see the verdict at length.
> > >
> > > Mikulas, you had a patch for this. Can you strip out the arch bits and
> > > just send me the block bits? The arch bits should go in via the arch
> > > maintainers.
> > >
> > > The iommu code may still do virtual merging, it would be silly not to do
> > > that if possible. Now that later kernels expose the necessary parameters
> > > at that level as well, it's perfectly feasible.
> >
> > Here I'm sending the first one, it removes virtual merge accounting from
> > blk-merge.c (it acts as if all architectures undefined
> > BIO_VMERGE_BOUNDARY). I also created second patch that removes
> > bi_hw_segments field from struct bio and struct request. The next task
> > would be to remove nr_hw_segments from the request_queue, but it will
> > require changing the drivers.
> >
> > Mikulas
>
> (note: raid5 uses nr_hw_segments field for some other purpose, so this
> patch will break it. It should be fixed by raid5 maintainers)
>
> Remove hw_segments field from struct bio and struct request. Without virtual
> merge accounting they have no purpose.
I have applied this and the previous, with a fixup to raid5 to overload
both active stripe and process count in bi_phys_segments.
--
Jens Axboe
^ permalink raw reply [flat|nested] 65+ messages in thread
* [PATCH 3/4] drop vmerge accounting
2008-08-15 9:48 ` Jens Axboe
@ 2008-08-15 18:23 ` Mikulas Patocka
2008-08-22 9:10 ` Jens Axboe
2008-08-22 9:29 ` Pierre Ossman
2008-08-15 18:26 ` Mikulas Patocka
1 sibling, 2 replies; 65+ messages in thread
From: Mikulas Patocka @ 2008-08-15 18:23 UTC (permalink / raw)
To: Jens Axboe
Cc: FUJITA Tomonori, adel.gadllah, matthew, linux-scsi,
James.Bottomley, dan.j.williams, pjones, viro, dougg, drzeus-mmc
> I have applied this and the previous, with a fixup to raid5 to overload
> both active stripe and process count in bi_phys_segments.
>
> --
> Jens Axboe
Hi
So here are two new patches. Patch 3 drops blk_queue_max_hw_segments
function and max_hw_segments entry. I checked all the drivers, most of
them set blk_queue_max_hw_segments equal to blk_queue_max_phys_segments,
for the few ones that don't (for example scsi_lib.c), I set minimum of the
two values with blk_queue_max_phys_segments.
The next patch 4 drops the hw_segments logic from MMC card subsystem.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com
---
arch/um/drivers/ubd_kern.c | 2 -
block/blk-core.c | 1
block/blk-merge.c | 6 -----
block/blk-settings.c | 38 ++++--------------------------------
block/elevator.c | 2 -
drivers/ata/sata_nv.c | 2 -
drivers/block/DAC960.c | 1
drivers/block/cciss.c | 3 --
drivers/block/cpqarray.c | 3 --
drivers/block/paride/pf.c | 1
drivers/block/ps3disk.c | 1
drivers/block/sunvdc.c | 1
drivers/block/sx8.c | 1
drivers/block/ub.c | 1
drivers/block/viodasd.c | 1
drivers/block/virtio_blk.c | 2 -
drivers/block/xen-blkfront.c | 1
drivers/cdrom/gdrom.c | 2 -
drivers/cdrom/viocd.c | 1
drivers/ide/ide-probe.c | 1
drivers/md/dm-table.c | 9 --------
drivers/md/raid5.c | 3 --
drivers/memstick/core/mspro_block.c | 1
drivers/message/i2o/i2o_block.c | 4 ---
drivers/mmc/card/queue.c | 5 +---
drivers/s390/block/dasd.c | 1
drivers/s390/char/tape_block.c | 1
drivers/scsi/scsi_lib.c | 3 --
drivers/scsi/sg.c | 5 +---
drivers/scsi/st.c | 3 --
fs/bio.c | 5 ----
include/linux/blkdev.h | 3 --
include/linux/device-mapper.h | 1
33 files changed, 20 insertions(+), 95 deletions(-)
Index: linux-2.6.27-rc3-devel/block/blk-merge.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/block/blk-merge.c 2008-08-15 19:07:26.000000000 +0200
+++ linux-2.6.27-rc3-devel/block/blk-merge.c 2008-08-15 19:07:42.000000000 +0200
@@ -236,8 +236,7 @@ static inline int ll_new_hw_segment(stru
{
int nr_phys_segs = bio_phys_segments(q, bio);
- if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
- || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
+ if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
@@ -326,9 +325,6 @@ static int ll_merge_requests_fn(struct r
if (total_phys_segments > q->max_phys_segments)
return 0;
- if (total_phys_segments > q->max_hw_segments)
- return 0;
-
/* Merge is OK... */
req->nr_phys_segments = total_phys_segments;
return 1;
Index: linux-2.6.27-rc3-devel/include/linux/blkdev.h
===================================================================
--- linux-2.6.27-rc3-devel.orig/include/linux/blkdev.h 2008-08-15 19:05:50.000000000 +0200
+++ linux-2.6.27-rc3-devel/include/linux/blkdev.h 2008-08-15 19:16:29.000000000 +0200
@@ -353,7 +353,6 @@ struct request_queue
unsigned int max_sectors;
unsigned int max_hw_sectors;
unsigned short max_phys_segments;
- unsigned short max_hw_segments;
unsigned short hardsect_size;
unsigned int max_segment_size;
@@ -764,7 +763,6 @@ extern void blk_queue_make_request(struc
extern void blk_queue_bounce_limit(struct request_queue *, u64);
extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
-extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
extern void blk_queue_hardsect_size(struct request_queue *, unsigned short);
extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
@@ -833,7 +831,6 @@ extern int blk_register_filter(struct ge
extern void blk_unregister_filter(struct gendisk *disk);
#define MAX_PHYS_SEGMENTS 128
-#define MAX_HW_SEGMENTS 128
#define SAFE_MAX_SECTORS 255
#define BLK_DEF_MAX_SECTORS 1024
Index: linux-2.6.27-rc3-devel/block/blk-settings.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/block/blk-settings.c 2008-08-15 19:09:45.000000000 +0200
+++ linux-2.6.27-rc3-devel/block/blk-settings.c 2008-08-15 19:09:49.000000000 +0200
@@ -89,7 +89,6 @@ void blk_queue_make_request(struct reque
*/
q->nr_requests = BLKDEV_MAX_RQ;
blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
- blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
q->make_request_fn = mfn;
q->backing_dev_info.ra_pages =
(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
@@ -205,30 +204,6 @@ void blk_queue_max_phys_segments(struct
EXPORT_SYMBOL(blk_queue_max_phys_segments);
/**
- * blk_queue_max_hw_segments - set max hw segments for a request for this queue
- * @q: the request queue for the device
- * @max_segments: max number of segments
- *
- * Description:
- * Enables a low level driver to set an upper limit on the number of
- * hw data segments in a request. This would be the largest number of
- * address/length pairs the host adapter can actually give as once
- * to the device.
- **/
-void blk_queue_max_hw_segments(struct request_queue *q,
- unsigned short max_segments)
-{
- if (!max_segments) {
- max_segments = 1;
- printk(KERN_INFO "%s: set to minimum %d\n",
- __func__, max_segments);
- }
-
- q->max_hw_segments = max_segments;
-}
-EXPORT_SYMBOL(blk_queue_max_hw_segments);
-
-/**
* blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
* @q: the request queue for the device
* @max_size: max size of segment in bytes
@@ -283,7 +258,6 @@ void blk_queue_stack_limits(struct reque
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
t->max_phys_segments = min(t->max_phys_segments, b->max_phys_segments);
- t->max_hw_segments = min(t->max_hw_segments, b->max_hw_segments);
t->max_segment_size = min(t->max_segment_size, b->max_segment_size);
t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
if (!t->queue_lock)
@@ -346,21 +320,19 @@ EXPORT_SYMBOL(blk_queue_update_dma_pad);
* does is adjust the queue so that the buf is always appended
* silently to the scatterlist.
*
- * Note: This routine adjusts max_hw_segments to make room for
- * appending the drain buffer. If you call
- * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after
- * calling this routine, you must set the limit to one fewer than your
- * device can support otherwise there won't be room for the drain
+ * Note: This routine adjusts max_phys_segments to make room for
+ * appending the drain buffer. If you call * blk_queue_max_phys_segments()
+ * after calling this routine, you must set the limit to one fewer than
+ * your device can support otherwise there won't be room for the drain
* buffer.
*/
int blk_queue_dma_drain(struct request_queue *q,
dma_drain_needed_fn *dma_drain_needed,
void *buf, unsigned int size)
{
- if (q->max_hw_segments < 2 || q->max_phys_segments < 2)
+ if (q->max_phys_segments < 2)
return -EINVAL;
/* make room for appending the drain */
- --q->max_hw_segments;
--q->max_phys_segments;
q->dma_drain_needed = dma_drain_needed;
q->dma_drain_buffer = buf;
Index: linux-2.6.27-rc3-devel/block/elevator.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/block/elevator.c 2008-08-15 19:10:00.000000000 +0200
+++ linux-2.6.27-rc3-devel/block/elevator.c 2008-08-15 19:10:15.000000000 +0200
@@ -777,7 +777,7 @@ struct request *elv_next_request(struct
if (q->dma_drain_size && rq->data_len) {
/*
* make sure space for the drain appears we
- * know we can do this because max_hw_segments
+ * know we can do this because max_phys_segments
* has been adjusted to be one fewer than the
* device can handle
*/
Index: linux-2.6.27-rc3-devel/drivers/md/dm-table.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/md/dm-table.c 2008-08-15 19:10:24.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/md/dm-table.c 2008-08-15 19:10:56.000000000 +0200
@@ -91,9 +91,6 @@ static void combine_restrictions_low(str
lhs->max_phys_segments =
min_not_zero(lhs->max_phys_segments, rhs->max_phys_segments);
- lhs->max_hw_segments =
- min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
-
lhs->hardsect_size = max(lhs->hardsect_size, rhs->hardsect_size);
lhs->max_segment_size =
@@ -504,9 +501,6 @@ void dm_set_device_limits(struct dm_targ
min_not_zero(rs->max_phys_segments,
q->max_phys_segments);
- rs->max_hw_segments =
- min_not_zero(rs->max_hw_segments, q->max_hw_segments);
-
rs->hardsect_size = max(rs->hardsect_size, q->hardsect_size);
rs->max_segment_size =
@@ -656,8 +650,6 @@ static void check_for_valid_limits(struc
rs->max_hw_sectors = SAFE_MAX_SECTORS;
if (!rs->max_phys_segments)
rs->max_phys_segments = MAX_PHYS_SEGMENTS;
- if (!rs->max_hw_segments)
- rs->max_hw_segments = MAX_HW_SEGMENTS;
if (!rs->hardsect_size)
rs->hardsect_size = 1 << SECTOR_SHIFT;
if (!rs->max_segment_size)
@@ -849,7 +841,6 @@ void dm_table_set_restrictions(struct dm
*/
blk_queue_max_sectors(q, t->limits.max_sectors);
q->max_phys_segments = t->limits.max_phys_segments;
- q->max_hw_segments = t->limits.max_hw_segments;
q->hardsect_size = t->limits.hardsect_size;
q->max_segment_size = t->limits.max_segment_size;
q->max_hw_sectors = t->limits.max_hw_sectors;
Index: linux-2.6.27-rc3-devel/drivers/message/i2o/i2o_block.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/message/i2o/i2o_block.c 2008-08-15 19:11:34.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/message/i2o/i2o_block.c 2008-08-15 19:25:40.000000000 +0200
@@ -1069,13 +1069,11 @@ static int i2o_block_probe(struct device
queue = gd->queue;
queue->queuedata = i2o_blk_dev;
- blk_queue_max_phys_segments(queue, I2O_MAX_PHYS_SEGMENTS);
+ blk_queue_max_phys_segments(queue, min(I2O_MAX_PHYS_SEGMENTS, i2o_sg_tablesize(c, body_size)));
blk_queue_max_sectors(queue, max_sectors);
- blk_queue_max_hw_segments(queue, i2o_sg_tablesize(c, body_size));
osm_debug("max sectors = %d\n", queue->max_sectors);
osm_debug("phys segments = %d\n", queue->max_phys_segments);
- osm_debug("max hw segments = %d\n", queue->max_hw_segments);
/*
* Ask for the current media data. If that isn't supported
Index: linux-2.6.27-rc3-devel/drivers/scsi/sg.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/scsi/sg.c 2008-08-15 19:12:00.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/scsi/sg.c 2008-08-15 19:12:16.000000000 +0200
@@ -292,8 +292,7 @@ sg_open(struct inode *inode, struct file
if (!sdp->headfp) { /* no existing opens on this device */
sdp->sgdebug = 0;
q = sdp->device->request_queue;
- sdp->sg_tablesize = min(q->max_hw_segments,
- q->max_phys_segments);
+ sdp->sg_tablesize = q->max_phys_segments;
}
if ((sfp = sg_add_sfp(sdp, dev)))
filp->private_data = sfp;
@@ -1394,7 +1393,7 @@ static Sg_device *sg_alloc(struct gendis
sdp->disk = disk;
sdp->device = scsidp;
init_waitqueue_head(&sdp->o_excl_wait);
- sdp->sg_tablesize = min(q->max_hw_segments, q->max_phys_segments);
+ sdp->sg_tablesize = q->max_phys_segments;
sdp->index = k;
error = 0;
Index: linux-2.6.27-rc3-devel/drivers/scsi/st.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/scsi/st.c 2008-08-15 19:12:24.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/scsi/st.c 2008-08-15 19:12:35.000000000 +0200
@@ -3930,8 +3930,7 @@ static int st_probe(struct device *dev)
return -ENODEV;
}
- i = min(SDp->request_queue->max_hw_segments,
- SDp->request_queue->max_phys_segments);
+ i = SDp->request_queue->max_phys_segments;
if (st_max_sg_segs < i)
i = st_max_sg_segs;
buffer = new_tape_buffer(1, (SDp->host)->unchecked_isa_dma, i);
Index: linux-2.6.27-rc3-devel/fs/bio.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/fs/bio.c 2008-08-15 19:12:44.000000000 +0200
+++ linux-2.6.27-rc3-devel/fs/bio.c 2008-08-15 19:13:08.000000000 +0200
@@ -281,8 +281,6 @@ int bio_get_nr_vecs(struct block_device
nr_pages = ((q->max_sectors << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (nr_pages > q->max_phys_segments)
nr_pages = q->max_phys_segments;
- if (nr_pages > q->max_hw_segments)
- nr_pages = q->max_hw_segments;
return nr_pages;
}
@@ -341,8 +339,7 @@ static int __bio_add_page(struct request
* make this too complex.
*/
- while (bio->bi_phys_segments >= q->max_phys_segments
- || bio->bi_phys_segments >= q->max_hw_segments) {
+ while (bio->bi_phys_segments >= q->max_phys_segments) {
if (retried_segments)
return 0;
Index: linux-2.6.27-rc3-devel/include/linux/device-mapper.h
===================================================================
--- linux-2.6.27-rc3-devel.orig/include/linux/device-mapper.h 2008-08-15 19:13:20.000000000 +0200
+++ linux-2.6.27-rc3-devel/include/linux/device-mapper.h 2008-08-15 19:13:27.000000000 +0200
@@ -122,7 +122,6 @@ struct io_restrictions {
unsigned max_sectors;
unsigned max_segment_size;
unsigned short hardsect_size;
- unsigned short max_hw_segments;
unsigned short max_phys_segments;
unsigned char no_cluster; /* inverted so that 0 is default */
};
Index: linux-2.6.27-rc3-devel/arch/um/drivers/ubd_kern.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/arch/um/drivers/ubd_kern.c 2008-08-15 19:15:02.000000000 +0200
+++ linux-2.6.27-rc3-devel/arch/um/drivers/ubd_kern.c 2008-08-15 19:15:21.000000000 +0200
@@ -870,7 +870,7 @@ static int ubd_add(int n, char **error_o
}
ubd_dev->queue->queuedata = ubd_dev;
- blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG);
+ blk_queue_max_phys_segments(ubd_dev->queue, MAX_SG);
err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]);
if(err){
*error_out = "Failed to register device";
Index: linux-2.6.27-rc3-devel/block/blk-core.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/block/blk-core.c 2008-08-15 19:15:29.000000000 +0200
+++ linux-2.6.27-rc3-devel/block/blk-core.c 2008-08-15 19:16:17.000000000 +0200
@@ -577,7 +577,6 @@ blk_init_queue_node(request_fn_proc *rfn
blk_queue_make_request(q, __make_request);
blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
- blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
q->sg_reserved_size = INT_MAX;
Index: linux-2.6.27-rc3-devel/drivers/ata/sata_nv.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/ata/sata_nv.c 2008-08-15 19:16:51.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/ata/sata_nv.c 2008-08-15 19:18:06.000000000 +0200
@@ -702,7 +702,7 @@ static int nv_adma_slave_config(struct s
}
blk_queue_segment_boundary(sdev->request_queue, segment_boundary);
- blk_queue_max_hw_segments(sdev->request_queue, sg_tablesize);
+ blk_queue_max_phys_segments(sdev->request_queue, sg_tablesize);
ata_port_printk(ap, KERN_INFO,
"DMA mask 0x%llX, segment boundary 0x%lX, hw segs %hu\n",
(unsigned long long)*ap->host->dev->dma_mask,
Index: linux-2.6.27-rc3-devel/drivers/block/DAC960.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/block/DAC960.c 2008-08-15 19:18:12.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/block/DAC960.c 2008-08-15 19:18:47.000000000 +0200
@@ -2531,7 +2531,6 @@ static bool DAC960_RegisterBlockDevice(D
Controller->RequestQueue[n] = RequestQueue;
blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit);
RequestQueue->queuedata = Controller;
- blk_queue_max_hw_segments(RequestQueue, Controller->DriverScatterGatherLimit);
blk_queue_max_phys_segments(RequestQueue, Controller->DriverScatterGatherLimit);
blk_queue_max_sectors(RequestQueue, Controller->MaxBlocksPerCommand);
disk->queue = RequestQueue;
Index: linux-2.6.27-rc3-devel/drivers/block/cciss.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/block/cciss.c 2008-08-15 19:18:55.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/block/cciss.c 2008-08-15 19:19:20.000000000 +0200
@@ -1370,9 +1370,6 @@ static void cciss_add_disk(ctlr_info_t *
blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
/* This is a hardware imposed limit. */
- blk_queue_max_hw_segments(disk->queue, MAXSGENTRIES);
-
- /* This is a limit in the driver and could be eliminated. */
blk_queue_max_phys_segments(disk->queue, MAXSGENTRIES);
blk_queue_max_sectors(disk->queue, h->cciss_max_sectors);
Index: linux-2.6.27-rc3-devel/drivers/block/cpqarray.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/block/cpqarray.c 2008-08-15 19:19:25.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/block/cpqarray.c 2008-08-15 19:19:31.000000000 +0200
@@ -451,9 +451,6 @@ static int __init cpqarray_register_ctlr
blk_queue_bounce_limit(q, hba[i]->pci_dev->dma_mask);
/* This is a hardware imposed limit. */
- blk_queue_max_hw_segments(q, SG_MAX);
-
- /* This is a driver limit and could be eliminated. */
blk_queue_max_phys_segments(q, SG_MAX);
init_timer(&hba[i]->timer);
Index: linux-2.6.27-rc3-devel/drivers/block/paride/pf.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/block/paride/pf.c 2008-08-15 19:19:38.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/block/paride/pf.c 2008-08-15 19:20:06.000000000 +0200
@@ -958,7 +958,6 @@ static int __init pf_init(void)
}
blk_queue_max_phys_segments(pf_queue, cluster);
- blk_queue_max_hw_segments(pf_queue, cluster);
for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
struct gendisk *disk = pf->disk;
Index: linux-2.6.27-rc3-devel/drivers/block/ps3disk.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/block/ps3disk.c 2008-08-15 19:20:13.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/block/ps3disk.c 2008-08-15 19:20:24.000000000 +0200
@@ -484,7 +484,6 @@ static int __devinit ps3disk_probe(struc
ps3disk_prepare_flush);
blk_queue_max_phys_segments(queue, -1);
- blk_queue_max_hw_segments(queue, -1);
blk_queue_max_segment_size(queue, dev->bounce_size);
gendisk = alloc_disk(PS3DISK_MINORS);
Index: linux-2.6.27-rc3-devel/drivers/block/sunvdc.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/block/sunvdc.c 2008-08-15 19:20:28.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/block/sunvdc.c 2008-08-15 19:20:33.000000000 +0200
@@ -697,7 +697,6 @@ static int probe_disk(struct vdc_port *p
port->disk = g;
- blk_queue_max_hw_segments(q, port->ring_cookies);
blk_queue_max_phys_segments(q, port->ring_cookies);
blk_queue_max_sectors(q, port->max_xfer_size);
g->major = vdc_major;
Index: linux-2.6.27-rc3-devel/drivers/block/sx8.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/block/sx8.c 2008-08-15 19:20:39.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/block/sx8.c 2008-08-15 19:20:44.000000000 +0200
@@ -1521,7 +1521,6 @@ static int carm_init_disks(struct carm_h
break;
}
disk->queue = q;
- blk_queue_max_hw_segments(q, CARM_MAX_REQ_SG);
blk_queue_max_phys_segments(q, CARM_MAX_REQ_SG);
blk_queue_segment_boundary(q, CARM_SG_BOUNDARY);
Index: linux-2.6.27-rc3-devel/drivers/block/ub.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/block/ub.c 2008-08-15 19:20:49.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/block/ub.c 2008-08-15 19:20:54.000000000 +0200
@@ -2325,7 +2325,6 @@ static int ub_probe_lun(struct ub_dev *s
disk->queue = q;
blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
- blk_queue_max_hw_segments(q, UB_MAX_REQ_SG);
blk_queue_max_phys_segments(q, UB_MAX_REQ_SG);
blk_queue_segment_boundary(q, 0xffffffff); /* Dubious. */
blk_queue_max_sectors(q, UB_MAX_SECTORS);
Index: linux-2.6.27-rc3-devel/drivers/block/viodasd.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/block/viodasd.c 2008-08-15 19:21:00.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/block/viodasd.c 2008-08-15 19:21:05.000000000 +0200
@@ -482,7 +482,6 @@ retry:
}
d->disk = g;
- blk_queue_max_hw_segments(q, VIOMAXBLOCKDMA);
blk_queue_max_phys_segments(q, VIOMAXBLOCKDMA);
blk_queue_max_sectors(q, VIODASD_MAXSECTORS);
g->major = VIODASD_MAJOR;
Index: linux-2.6.27-rc3-devel/drivers/block/virtio_blk.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/block/virtio_blk.c 2008-08-15 19:21:09.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/block/virtio_blk.c 2008-08-15 19:21:23.000000000 +0200
@@ -289,7 +289,7 @@ static int virtblk_probe(struct virtio_d
offsetof(struct virtio_blk_config, seg_max),
&v);
if (!err)
- blk_queue_max_hw_segments(vblk->disk->queue, v);
+ blk_queue_max_phys_segments(vblk->disk->queue, v);
/* Host can optionally specify the block size of the device */
err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
Index: linux-2.6.27-rc3-devel/drivers/block/xen-blkfront.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/block/xen-blkfront.c 2008-08-15 19:21:28.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/block/xen-blkfront.c 2008-08-15 19:21:35.000000000 +0200
@@ -354,7 +354,6 @@ static int xlvbd_init_blk_queue(struct g
/* Ensure a merged request will fit in a single I/O ring slot. */
blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
- blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
/* Make sure buffer addresses are sector-aligned. */
blk_queue_dma_alignment(rq, 511);
Index: linux-2.6.27-rc3-devel/drivers/cdrom/gdrom.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/cdrom/gdrom.c 2008-08-15 19:21:47.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/cdrom/gdrom.c 2008-08-15 19:22:03.000000000 +0200
@@ -737,7 +737,7 @@ static int __devinit probe_gdrom_setupqu
{
blk_queue_hardsect_size(gd.gdrom_rq, GDROM_HARD_SECTOR);
/* using DMA so memory will need to be contiguous */
- blk_queue_max_hw_segments(gd.gdrom_rq, 1);
+ blk_queue_max_phys_segments(gd.gdrom_rq, 1);
/* set a large max size to get most from DMA */
blk_queue_max_segment_size(gd.gdrom_rq, 0x40000);
gd.disk->queue = gd.gdrom_rq;
Index: linux-2.6.27-rc3-devel/drivers/cdrom/viocd.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/cdrom/viocd.c 2008-08-15 19:22:08.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/cdrom/viocd.c 2008-08-15 19:22:15.000000000 +0200
@@ -625,7 +625,6 @@ static int viocd_probe(struct vio_dev *v
gendisk->first_minor = deviceno;
strncpy(gendisk->disk_name, c->name,
sizeof(gendisk->disk_name));
- blk_queue_max_hw_segments(q, 1);
blk_queue_max_phys_segments(q, 1);
blk_queue_max_sectors(q, 4096 / 512);
gendisk->queue = q;
Index: linux-2.6.27-rc3-devel/drivers/ide/ide-probe.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/ide/ide-probe.c 2008-08-15 19:22:20.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/ide/ide-probe.c 2008-08-15 19:22:27.000000000 +0200
@@ -927,7 +927,6 @@ static int ide_init_queue(ide_drive_t *d
max_sg_entries >>= 1;
#endif /* CONFIG_PCI */
- blk_queue_max_hw_segments(q, max_sg_entries);
blk_queue_max_phys_segments(q, max_sg_entries);
/* assign drive queue */
Index: linux-2.6.27-rc3-devel/drivers/memstick/core/mspro_block.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/memstick/core/mspro_block.c 2008-08-15 19:22:36.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/memstick/core/mspro_block.c 2008-08-15 19:22:42.000000000 +0200
@@ -1215,7 +1215,6 @@ static int mspro_block_init_disk(struct
blk_queue_bounce_limit(msb->queue, limit);
blk_queue_max_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES);
blk_queue_max_phys_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
- blk_queue_max_hw_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
blk_queue_max_segment_size(msb->queue,
MSPRO_BLOCK_MAX_PAGES * msb->page_size);
Index: linux-2.6.27-rc3-devel/drivers/mmc/card/queue.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/mmc/card/queue.c 2008-08-15 19:22:48.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/mmc/card/queue.c 2008-08-15 19:27:48.000000000 +0200
@@ -151,7 +151,6 @@ int mmc_init_queue(struct mmc_queue *mq,
blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
blk_queue_max_sectors(mq->queue, bouncesz / 512);
blk_queue_max_phys_segments(mq->queue, bouncesz / 512);
- blk_queue_max_hw_segments(mq->queue, bouncesz / 512);
blk_queue_max_segment_size(mq->queue, bouncesz);
mq->sg = kmalloc(sizeof(struct scatterlist),
@@ -176,8 +175,8 @@ int mmc_init_queue(struct mmc_queue *mq,
if (!mq->bounce_buf) {
blk_queue_bounce_limit(mq->queue, limit);
blk_queue_max_sectors(mq->queue, host->max_req_size / 512);
- blk_queue_max_phys_segments(mq->queue, host->max_phys_segs);
- blk_queue_max_hw_segments(mq->queue, host->max_hw_segs);
+ /* TODO: drop host->max_hw_segs */
+ blk_queue_max_phys_segments(mq->queue, min(host->max_phys_segs, host->max_hw_segs));
blk_queue_max_segment_size(mq->queue, host->max_seg_size);
mq->sg = kmalloc(sizeof(struct scatterlist) *
Index: linux-2.6.27-rc3-devel/drivers/s390/block/dasd.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/s390/block/dasd.c 2008-08-15 19:23:16.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/s390/block/dasd.c 2008-08-15 19:23:22.000000000 +0200
@@ -1976,7 +1976,6 @@ static void dasd_setup_queue(struct dasd
max = block->base->discipline->max_blocks << block->s2b_shift;
blk_queue_max_sectors(block->request_queue, max);
blk_queue_max_phys_segments(block->request_queue, -1L);
- blk_queue_max_hw_segments(block->request_queue, -1L);
blk_queue_max_segment_size(block->request_queue, -1L);
blk_queue_segment_boundary(block->request_queue, -1L);
blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN, NULL);
Index: linux-2.6.27-rc3-devel/drivers/s390/char/tape_block.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/s390/char/tape_block.c 2008-08-15 19:23:27.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/s390/char/tape_block.c 2008-08-15 19:23:30.000000000 +0200
@@ -233,7 +233,6 @@ tapeblock_setup_device(struct tape_devic
blk_queue_hardsect_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE);
blk_queue_max_sectors(blkdat->request_queue, TAPEBLOCK_MAX_SEC);
blk_queue_max_phys_segments(blkdat->request_queue, -1L);
- blk_queue_max_hw_segments(blkdat->request_queue, -1L);
blk_queue_max_segment_size(blkdat->request_queue, -1L);
blk_queue_segment_boundary(blkdat->request_queue, -1L);
Index: linux-2.6.27-rc3-devel/drivers/scsi/scsi_lib.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/scsi/scsi_lib.c 2008-08-15 19:23:35.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/scsi/scsi_lib.c 2008-08-15 19:39:19.000000000 +0200
@@ -1639,8 +1639,7 @@ struct request_queue *__scsi_alloc_queue
/*
* this limit is imposed by hardware restrictions
*/
- blk_queue_max_hw_segments(q, shost->sg_tablesize);
- blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
+ blk_queue_max_phys_segments(q, min(shost->sg_tablesize, (unsigned short)SCSI_MAX_SG_CHAIN_SEGMENTS));
blk_queue_max_sectors(q, shost->max_sectors);
blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
Index: linux-2.6.27-rc3-devel/drivers/md/raid5.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/md/raid5.c 2008-08-15 19:11:08.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/md/raid5.c 2008-08-15 19:32:50.000000000 +0200
@@ -3193,8 +3193,7 @@ static int bio_fits_rdev(struct bio *bi)
if ((bi->bi_size>>9) > q->max_sectors)
return 0;
blk_recount_segments(q, bi);
- if (bi->bi_phys_segments > q->max_phys_segments ||
- bi->bi_hw_segments > q->max_hw_segments)
+ if (bi->bi_phys_segments > q->max_phys_segments)
return 0;
if (q->merge_bvec_fn)
^ permalink raw reply [flat|nested] 65+ messages in thread
* [PATCH 4/4] drop vmerge accounting
2008-08-15 9:48 ` Jens Axboe
2008-08-15 18:23 ` [PATCH 3/4] " Mikulas Patocka
@ 2008-08-15 18:26 ` Mikulas Patocka
1 sibling, 0 replies; 65+ messages in thread
From: Mikulas Patocka @ 2008-08-15 18:26 UTC (permalink / raw)
To: Jens Axboe
Cc: FUJITA Tomonori, adel.gadllah, matthew, linux-scsi,
James.Bottomley, dan.j.williams, pjones, viro, dougg
Remove max_hw_segs from MMC subsystems, because it is already being
removed from the block layer. Most controller drivers set max_hw_segs
equal to max_phys_segs. The only one that doesn't is sdhci.
I don't have any of the cards, I at least tried to compile the code.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
drivers/mmc/card/queue.c | 5 ++---
drivers/mmc/core/host.c | 1 -
drivers/mmc/host/atmel-mci.c | 1 -
drivers/mmc/host/imxmmc.c | 1 -
drivers/mmc/host/mmci.c | 1 -
drivers/mmc/host/omap.c | 1 -
drivers/mmc/host/s3cmci.c | 1 -
drivers/mmc/host/sdhci.c | 7 +++----
drivers/mmc/host/tifm_sd.c | 3 +--
drivers/mmc/host/wbsd.c | 1 -
include/linux/mmc/host.h | 1 -
11 files changed, 6 insertions(+), 17 deletions(-)
Index: linux-2.6.27-rc3-devel/drivers/mmc/card/queue.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/mmc/card/queue.c 2008-08-15 19:48:52.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/mmc/card/queue.c 2008-08-15 19:49:10.000000000 +0200
@@ -133,7 +133,7 @@ int mmc_init_queue(struct mmc_queue *mq,
blk_queue_prep_rq(mq->queue, mmc_prep_request);
#ifdef CONFIG_MMC_BLOCK_BOUNCE
- if (host->max_hw_segs == 1) {
+ if (host->max_phys_segs == 1) {
unsigned int bouncesz;
bouncesz = MMC_QUEUE_BOUNCESZ;
@@ -175,8 +175,7 @@ int mmc_init_queue(struct mmc_queue *mq,
if (!mq->bounce_buf) {
blk_queue_bounce_limit(mq->queue, limit);
blk_queue_max_sectors(mq->queue, host->max_req_size / 512);
- /* TODO: drop host->max_hw_segs */
- blk_queue_max_phys_segments(mq->queue, min(host->max_phys_segs, host->max_hw_segs));
+ blk_queue_max_phys_segments(mq->queue, host->max_phys_segs);
blk_queue_max_segment_size(mq->queue, host->max_seg_size);
mq->sg = kmalloc(sizeof(struct scatterlist) *
Index: linux-2.6.27-rc3-devel/drivers/mmc/core/host.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/mmc/core/host.c 2008-08-15 19:49:17.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/mmc/core/host.c 2008-08-15 19:49:19.000000000 +0200
@@ -89,7 +89,6 @@ struct mmc_host *mmc_alloc_host(int extr
* By default, hosts do not support SGIO or large requests.
* They have to set these according to their abilities.
*/
- host->max_hw_segs = 1;
host->max_phys_segs = 1;
host->max_seg_size = PAGE_CACHE_SIZE;
Index: linux-2.6.27-rc3-devel/drivers/mmc/host/atmel-mci.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/mmc/host/atmel-mci.c 2008-08-15 19:51:36.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/mmc/host/atmel-mci.c 2008-08-15 19:51:40.000000000 +0200
@@ -1037,7 +1037,6 @@ static int __init atmci_probe(struct pla
mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
mmc->caps |= MMC_CAP_4_BIT_DATA;
- mmc->max_hw_segs = 64;
mmc->max_phys_segs = 64;
mmc->max_req_size = 32768 * 512;
mmc->max_blk_size = 32768;
Index: linux-2.6.27-rc3-devel/drivers/mmc/host/imxmmc.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/mmc/host/imxmmc.c 2008-08-15 19:49:33.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/mmc/host/imxmmc.c 2008-08-15 19:49:35.000000000 +0200
@@ -958,7 +958,6 @@ static int imxmci_probe(struct platform_
mmc->caps = MMC_CAP_4_BIT_DATA;
/* MMC core transfer sizes tunable parameters */
- mmc->max_hw_segs = 64;
mmc->max_phys_segs = 64;
mmc->max_seg_size = 64*512; /* default PAGE_CACHE_SIZE */
mmc->max_req_size = 64*512; /* default PAGE_CACHE_SIZE */
Index: linux-2.6.27-rc3-devel/drivers/mmc/host/mmci.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/mmc/host/mmci.c 2008-08-15 19:49:43.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/mmc/host/mmci.c 2008-08-15 19:50:30.000000000 +0200
@@ -539,7 +539,6 @@ static int mmci_probe(struct amba_device
/*
* We can do SGIO
*/
- mmc->max_hw_segs = 16;
mmc->max_phys_segs = NR_SG;
/*
Index: linux-2.6.27-rc3-devel/drivers/mmc/host/omap.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/mmc/host/omap.c 2008-08-15 19:50:37.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/mmc/host/omap.c 2008-08-15 19:50:42.000000000 +0200
@@ -1336,7 +1336,6 @@ static int __init mmc_omap_new_slot(stru
* normally used (except e.g. for reading SD registers).
*/
mmc->max_phys_segs = 32;
- mmc->max_hw_segs = 32;
mmc->max_blk_size = 2048; /* BLEN is 11 bits (+1) */
mmc->max_blk_count = 2048; /* NBLK is 11 bits (+1) */
mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
Index: linux-2.6.27-rc3-devel/drivers/mmc/host/s3cmci.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/mmc/host/s3cmci.c 2008-08-15 19:49:25.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/mmc/host/s3cmci.c 2008-08-15 19:49:29.000000000 +0200
@@ -1288,7 +1288,6 @@ static int __devinit s3cmci_probe(struct
mmc->max_seg_size = mmc->max_req_size;
mmc->max_phys_segs = 128;
- mmc->max_hw_segs = 128;
dbg(host, dbg_debug,
"probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%u.\n",
Index: linux-2.6.27-rc3-devel/drivers/mmc/host/sdhci.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/mmc/host/sdhci.c 2008-08-15 19:50:46.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/mmc/host/sdhci.c 2008-08-15 19:51:05.000000000 +0200
@@ -1628,12 +1628,11 @@ int sdhci_add_host(struct sdhci_host *ho
* can do scatter/gather or not.
*/
if (host->flags & SDHCI_USE_ADMA)
- mmc->max_hw_segs = 128;
+ mmc->max_phys_segs = 128;
else if (host->flags & SDHCI_USE_DMA)
- mmc->max_hw_segs = 1;
+ mmc->max_phys_segs = 1;
else /* PIO */
- mmc->max_hw_segs = 128;
- mmc->max_phys_segs = 128;
+ mmc->max_phys_segs = 128;
/*
* Maximum number of sectors in one transfer. Limited by DMA boundary
Index: linux-2.6.27-rc3-devel/drivers/mmc/host/tifm_sd.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/mmc/host/tifm_sd.c 2008-08-15 19:51:11.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/mmc/host/tifm_sd.c 2008-08-15 19:51:22.000000000 +0200
@@ -978,11 +978,10 @@ static int tifm_sd_probe(struct tifm_dev
mmc->f_max = 24000000;
mmc->max_blk_count = 2048;
- mmc->max_hw_segs = mmc->max_blk_count;
+ mmc->max_phys_segs = mmc->max_blk_count;
mmc->max_blk_size = min(TIFM_MMCSD_MAX_BLOCK_SIZE, PAGE_SIZE);
mmc->max_seg_size = mmc->max_blk_count * mmc->max_blk_size;
mmc->max_req_size = mmc->max_seg_size;
- mmc->max_phys_segs = mmc->max_hw_segs;
sock->card_event = tifm_sd_card_event;
sock->data_event = tifm_sd_data_event;
Index: linux-2.6.27-rc3-devel/drivers/mmc/host/wbsd.c
===================================================================
--- linux-2.6.27-rc3-devel.orig/drivers/mmc/host/wbsd.c 2008-08-15 19:51:27.000000000 +0200
+++ linux-2.6.27-rc3-devel/drivers/mmc/host/wbsd.c 2008-08-15 19:51:31.000000000 +0200
@@ -1234,7 +1234,6 @@ static int __devinit wbsd_alloc_mmc(stru
* Maximum number of segments. Worst case is one sector per segment
* so this will be 64kB/512.
*/
- mmc->max_hw_segs = 128;
mmc->max_phys_segs = 128;
/*
Index: linux-2.6.27-rc3-devel/include/linux/mmc/host.h
===================================================================
--- linux-2.6.27-rc3-devel.orig/include/linux/mmc/host.h 2008-08-15 19:48:18.000000000 +0200
+++ linux-2.6.27-rc3-devel/include/linux/mmc/host.h 2008-08-15 19:48:27.000000000 +0200
@@ -119,7 +119,6 @@ struct mmc_host {
/* host specific block data */
unsigned int max_seg_size; /* see blk_queue_max_segment_size */
- unsigned short max_hw_segs; /* see blk_queue_max_hw_segments */
unsigned short max_phys_segs; /* see blk_queue_max_phys_segments */
unsigned short unused;
unsigned int max_req_size; /* maximum number of bytes in one req */
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 1/3] move cmd_filter from gendisk to request_queue
2008-07-26 9:03 ` [PATCH 1/3] move cmd_filter from gendisk to request_queue FUJITA Tomonori
2008-07-26 9:03 ` [PATCH 2/3] sg: restore command permission for TYPE_SCANNER FUJITA Tomonori
2008-07-30 20:10 ` [PATCH 1/3] move cmd_filter from gendisk to request_queue Peter Jones
@ 2008-08-16 5:47 ` FUJITA Tomonori
2 siblings, 0 replies; 65+ messages in thread
From: FUJITA Tomonori @ 2008-08-16 5:47 UTC (permalink / raw)
To: jens.axboe
Cc: linux-scsi, fujita.tomonori, James.Bottomley, dan.j.williams,
adel.gadllah, pjones, viro, dougg, matthew
On Sat, 26 Jul 2008 18:03:23 +0900
FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> wrote:
> cmd_filter works only for the block layer SG_IO with SCSI block
> devices. It breaks scsi/sg.c, bsg, and the block layer SG_IO with SCSI
> character devices (such as st). We hit a kernel crash with them.
>
> The problem is that cmd_filter code accesses to gendisk (having struct
> blk_scsi_cmd_filter) via inode->i_bdev->bd_disk. It works for only
> SCSI block device files. With character device files, inode->i_bdev
> leads you to struct cdev. inode->i_bdev->bd_disk->blk_scsi_cmd_filter
> isn't safe.
>
> SCSI ULDs don't expose gendisk; they keep it private. bsg needs to be
> independent on any protocols. We shouldn't change ULDs to expose their
> gendisk.
>
> This patch moves struct blk_scsi_cmd_filter from gendisk to
> request_queue, a common object, which eveyone can access to.
>
> The user interface doesn't change; users can change the filters via
> /sys/block/. gendisk has a pointer to request_queue so the cmd_filter
> code accesses to struct blk_scsi_cmd_filter.
>
> Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
> ---
> block/bsg.c | 45 +++++-------------
> block/cmd-filter.c | 118 ++--------------------------------------------
> block/scsi_ioctl.c | 94 ++++++++++++++++++++++++++++++++++++-
> drivers/scsi/scsi_lib.c | 2 +
> drivers/scsi/sg.c | 11 ++++-
> include/linux/blkdev.h | 16 +++++-
> include/linux/genhd.h | 10 ----
> 7 files changed, 133 insertions(+), 163 deletions(-)
(snip)
> diff --git a/block/bsg.c b/block/bsg.c
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 88d1b5f..51cdae8 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -1617,6 +1617,8 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
> */
> blk_queue_dma_alignment(q, 0x03);
>
> + blk_set_cmd_filter_defaults(&q->cmd_filter);
> +
I guess that I was SCSI biased. Some block drivers (such as cciss)
also support some of SCSI commands and use blk_verify_command. They
also need to call blk_set_cmd_filter_defaults.
I add blk_set_cmd_filter_defaults to blk_init_queue_node (and removed
one in __scsi_alloc_queue). Adding it to blk_alloc_queue_node should
be fine but blk_init_queue_node looks a reasonable place.
I put this patchset with Adel's sysfs interface fix to:
git://git.kernel.org/pub/scm/linux/kernel/git/tomo/linux-2.6-misc.git cmdfilter
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-08-08 5:54 ` Jens Axboe
2008-08-08 6:11 ` FUJITA Tomonori
@ 2008-08-21 9:26 ` Adel Gadllah
2008-08-22 9:10 ` Jens Axboe
1 sibling, 1 reply; 65+ messages in thread
From: Adel Gadllah @ 2008-08-21 9:26 UTC (permalink / raw)
To: Jens Axboe
Cc: FUJITA Tomonori, matthew, linux-scsi, James.Bottomley,
dan.j.williams, pjones, viro, dougg
2008/8/8 Jens Axboe <jens.axboe@oracle.com>:
> On Fri, Aug 08 2008, FUJITA Tomonori wrote:
>> On Thu, 7 Aug 2008 20:47:45 +0200
>> "Adel Gadllah" <adel.gadllah@gmail.com> wrote:
>>
>> > Can we push this patches to Linus?
>> > They fix the regression and do not seem to introduce a new one
>> > (couldn't find any while testing it).
>>
>> Jens? Any comments on the patchset?
>
> Looks fine, I'll queue it up.
ping?
seems still not merged in rc4.....
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 0/3] cmd_filter fixes
2008-08-21 9:26 ` [PATCH 0/3] cmd_filter fixes Adel Gadllah
@ 2008-08-22 9:10 ` Jens Axboe
0 siblings, 0 replies; 65+ messages in thread
From: Jens Axboe @ 2008-08-22 9:10 UTC (permalink / raw)
To: Adel Gadllah
Cc: FUJITA Tomonori, matthew, linux-scsi, James.Bottomley,
dan.j.williams, pjones, viro, dougg
On Thu, Aug 21 2008, Adel Gadllah wrote:
> 2008/8/8 Jens Axboe <jens.axboe@oracle.com>:
> > On Fri, Aug 08 2008, FUJITA Tomonori wrote:
> >> On Thu, 7 Aug 2008 20:47:45 +0200
> >> "Adel Gadllah" <adel.gadllah@gmail.com> wrote:
> >>
> >> > Can we push this patches to Linus?
> >> > They fix the regression and do not seem to introduce a new one
> >> > (couldn't find any while testing it).
> >>
> >> Jens? Any comments on the patchset?
> >
> > Looks fine, I'll queue it up.
>
> ping?
> seems still not merged in rc4.....
It's queued up, I'll send it upstream soon.
--
Jens Axboe
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 3/4] drop vmerge accounting
2008-08-15 18:23 ` [PATCH 3/4] " Mikulas Patocka
@ 2008-08-22 9:10 ` Jens Axboe
2008-08-22 9:17 ` Jens Axboe
2008-08-22 9:29 ` Pierre Ossman
1 sibling, 1 reply; 65+ messages in thread
From: Jens Axboe @ 2008-08-22 9:10 UTC (permalink / raw)
To: Mikulas Patocka
Cc: FUJITA Tomonori, adel.gadllah, matthew, linux-scsi,
James.Bottomley, dan.j.williams, pjones, viro, dougg, drzeus-mmc
On Fri, Aug 15 2008, Mikulas Patocka wrote:
> > I have applied this and the previous, with a fixup to raid5 to overload
> > both active stripe and process count in bi_phys_segments.
> >
> > --
> > Jens Axboe
>
> Hi
>
> So here are two new patches. Patch 3 drops blk_queue_max_hw_segments
> function and max_hw_segments entry. I checked all the drivers, most of
> them set blk_queue_max_hw_segments equal to blk_queue_max_phys_segments,
> for the few ones that don't (for example scsi_lib.c), I set minimum of the
> two values with blk_queue_max_phys_segments.
>
> The next patch 4 drops the hw_segments logic from MMC card subsystem.
Thanks, I've queued both up for review and inclusion for 2.6.28.
--
Jens Axboe
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 3/4] drop vmerge accounting
2008-08-22 9:10 ` Jens Axboe
@ 2008-08-22 9:17 ` Jens Axboe
2008-08-22 16:58 ` Mikulas Patocka
0 siblings, 1 reply; 65+ messages in thread
From: Jens Axboe @ 2008-08-22 9:17 UTC (permalink / raw)
To: Mikulas Patocka
Cc: FUJITA Tomonori, adel.gadllah, matthew, linux-scsi,
James.Bottomley, dan.j.williams, pjones, viro, dougg, drzeus-mmc
On Fri, Aug 22 2008, Jens Axboe wrote:
> On Fri, Aug 15 2008, Mikulas Patocka wrote:
> > > I have applied this and the previous, with a fixup to raid5 to overload
> > > both active stripe and process count in bi_phys_segments.
> > >
> > > --
> > > Jens Axboe
> >
> > Hi
> >
> > So here are two new patches. Patch 3 drops blk_queue_max_hw_segments
> > function and max_hw_segments entry. I checked all the drivers, most of
> > them set blk_queue_max_hw_segments equal to blk_queue_max_phys_segments,
> > for the few ones that don't (for example scsi_lib.c), I set minimum of the
> > two values with blk_queue_max_phys_segments.
> >
> > The next patch 4 drops the hw_segments logic from MMC card subsystem.
>
> Thanks, I've queued both up for review and inclusion for 2.6.28.
Spoke too soon, they don't really apply to the for-2.6.28 branch of the
block git tree that has your two previous patches. Can you please
recheck and resend against that?
--
Jens Axboe
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 3/4] drop vmerge accounting
2008-08-15 18:23 ` [PATCH 3/4] " Mikulas Patocka
2008-08-22 9:10 ` Jens Axboe
@ 2008-08-22 9:29 ` Pierre Ossman
2008-08-22 9:33 ` Jens Axboe
1 sibling, 1 reply; 65+ messages in thread
From: Pierre Ossman @ 2008-08-22 9:29 UTC (permalink / raw)
To: Mikulas Patocka
Cc: Jens Axboe, FUJITA Tomonori, adel.gadllah, matthew, linux-scsi,
James.Bottomley, dan.j.williams, pjones, viro, dougg
On Fri, 15 Aug 2008 14:23:16 -0400 (EDT)
Mikulas Patocka <mpatocka@redhat.com> wrote:
>
> The next patch 4 drops the hw_segments logic from MMC card subsystem.
>
What happened to this one? It seems I didn't get a cc for that one.
Fortunately I could have a look at marc.info and find it.
Both patches seem fine to me, so you can add:
Acked-by: Pierre Ossman <drzeus@drzeus.cx>
--
-- Pierre Ossman
Linux kernel, MMC maintainer http://www.kernel.org
rdesktop, core developer http://www.rdesktop.org
WARNING: This correspondence is being monitored by the
Swedish government. Make sure your server uses encryption
for SMTP traffic and consider using PGP for end-to-end
encryption.
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 3/4] drop vmerge accounting
2008-08-22 9:29 ` Pierre Ossman
@ 2008-08-22 9:33 ` Jens Axboe
2008-08-22 21:34 ` Mikulas Patocka
2008-08-22 21:35 ` [PATCH 4/4] " Mikulas Patocka
0 siblings, 2 replies; 65+ messages in thread
From: Jens Axboe @ 2008-08-22 9:33 UTC (permalink / raw)
To: Pierre Ossman
Cc: Mikulas Patocka, FUJITA Tomonori, adel.gadllah, matthew,
linux-scsi, James.Bottomley, dan.j.williams, pjones, viro, dougg
On Fri, Aug 22 2008, Pierre Ossman wrote:
> On Fri, 15 Aug 2008 14:23:16 -0400 (EDT)
> Mikulas Patocka <mpatocka@redhat.com> wrote:
>
> >
> > The next patch 4 drops the hw_segments logic from MMC card subsystem.
> >
>
> What happened to this one? It seems I didn't get a cc for that one.
> Fortunately I could have a look at marc.info and find it.
>
> Both patches seem fine to me, so you can add:
>
> Acked-by: Pierre Ossman <drzeus@drzeus.cx>
Thanks, it'll go into for-2.6.28 once Mikulas resends it.
--
Jens Axboe
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 3/4] drop vmerge accounting
2008-08-22 9:17 ` Jens Axboe
@ 2008-08-22 16:58 ` Mikulas Patocka
2008-08-22 17:05 ` Mikulas Patocka
0 siblings, 1 reply; 65+ messages in thread
From: Mikulas Patocka @ 2008-08-22 16:58 UTC (permalink / raw)
To: Jens Axboe
Cc: FUJITA Tomonori, adel.gadllah, matthew, linux-scsi,
James.Bottomley, dan.j.williams, pjones, viro, dougg, drzeus-mmc
On Fri, 22 Aug 2008, Jens Axboe wrote:
> On Fri, Aug 22 2008, Jens Axboe wrote:
> > On Fri, Aug 15 2008, Mikulas Patocka wrote:
> > > > I have applied this and the previous, with a fixup to raid5 to overload
> > > > both active stripe and process count in bi_phys_segments.
> > > >
> > > > --
> > > > Jens Axboe
> > >
> > > Hi
> > >
> > > So here are two new patches. Patch 3 drops blk_queue_max_hw_segments
> > > function and max_hw_segments entry. I checked all the drivers, most of
> > > them set blk_queue_max_hw_segments equal to blk_queue_max_phys_segments,
> > > for the few ones that don't (for example scsi_lib.c), I set minimum of the
> > > two values with blk_queue_max_phys_segments.
> > >
> > > The next patch 4 drops the hw_segments logic from MMC card subsystem.
> >
> > Thanks, I've queued both up for review and inclusion for 2.6.28.
>
> Spoke too soon, they don't really apply to the for-2.6.28 branch of the
> block git tree that has your two previous patches. Can you please
> recheck and resend against that?
>
> --
> Jens Axboe
I've just cloned
git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
and found out that none of my four patches are there. So no wonder that
patches 3 and 4 won't apply if the patches 1 and 2 aren't there.
What do you want the patches against? git/axboe/linux-2.6-block.git? Or
something different?
Mikulas
^ permalink raw reply [flat|nested] 65+ messages in thread
* Re: [PATCH 3/4] drop vmerge accounting
2008-08-22 16:58 ` Mikulas Patocka
@ 2008-08-22 17:05 ` Mikulas Patocka
0 siblings, 0 replies; 65+ messages in thread
From: Mikulas Patocka @ 2008-08-22 17:05 UTC (permalink / raw)
To: Jens Axboe
Cc: FUJITA Tomonori, adel.gadllah, matthew, linux-scsi,
James.Bottomley, dan.j.williams, pjones, viro, dougg, drzeus-mmc
On Fri, 22 Aug 2008, Mikulas Patocka wrote:
>
>
> On Fri, 22 Aug 2008, Jens Axboe wrote:
>
> > On Fri, Aug 22 2008, Jens Axboe wrote:
> > > On Fri, Aug 15 2008, Mikulas Patocka wrote:
> > > > > I have applied this and the previous, with a fixup to raid5 to overload
> > > > > both active stripe and process count in bi_phys_segments.
> > > > >
> > > > > --
> > > > > Jens Axboe
> > > >
> > > > Hi
> > > >
> > > > So here are two new patches. Patch 3 drops blk_queue_max_hw_segments
> > > > function and max_hw_segments entry. I checked all the drivers, most of
> > > > them set blk_queue_max_hw_segments equal to blk_queue_max_phys_segments,
> > > > for the few ones that don't (for example scsi_lib.c), I set minimum of the
> > > > two values with blk_queue_max_phys_segments.
> > > >
> > > > The next patch 4 drops the hw_segments logic from MMC card subsystem.
> > >
> > > Thanks, I've queued both up for review and inclusion for 2.6.28.
> >
> > Spoke too soon, they don't really apply to the for-2.6.28 branch of the
> > block git tree that has your two previous patches. Can you please
> > recheck and resend against that?
> >
> > --
> > Jens Axboe
>
> I've just cloned
> git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
> and found out that none of my four patches are there. So no wonder that
> patches 3 and 4 won't apply if the patches 1 and 2 aren't there.
>
> What do you want the patches against? git/axboe/linux-2.6-block.git? Or
> something different?
>
> Mikulas
Oh, I forgot to switch to 28 branch, ignore this.
Mikulas
^ permalink raw reply [flat|nested] 65+ messages in thread
* [PATCH 3/4] drop vmerge accounting
2008-08-22 9:33 ` Jens Axboe
@ 2008-08-22 21:34 ` Mikulas Patocka
2008-08-22 21:35 ` [PATCH 4/4] " Mikulas Patocka
1 sibling, 0 replies; 65+ messages in thread
From: Mikulas Patocka @ 2008-08-22 21:34 UTC (permalink / raw)
To: Jens Axboe
Cc: Pierre Ossman, FUJITA Tomonori, adel.gadllah, matthew, linux-scsi,
James.Bottomley, dan.j.williams, pjones, viro, dougg
On Fri, 22 Aug 2008, Jens Axboe wrote:
> On Fri, Aug 22 2008, Pierre Ossman wrote:
> > On Fri, 15 Aug 2008 14:23:16 -0400 (EDT)
> > Mikulas Patocka <mpatocka@redhat.com> wrote:
> >
> > >
> > > The next patch 4 drops the hw_segments logic from MMC card subsystem.
> > >
> >
> > What happened to this one? It seems I didn't get a cc for that one.
> > Fortunately I could have a look at marc.info and find it.
> >
> > Both patches seem fine to me, so you can add:
> >
> > Acked-by: Pierre Ossman <drzeus@drzeus.cx>
>
> Thanks, it'll go into for-2.6.28 once Mikulas resends it.
>
> --
> Jens Axboe
Here is the updated patch. It compiled and boots fine:
Remove blk_queue_max_hw_segments function and max_hw_segments variable.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index b58fb89..d011883 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -870,7 +870,7 @@ static int ubd_add(int n, char **error_out)
}
ubd_dev->queue->queuedata = ubd_dev;
- blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG);
+ blk_queue_max_phys_segments(ubd_dev->queue, MAX_SG);
err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]);
if(err){
*error_out = "Failed to register device";
diff --git a/block/blk-core.c b/block/blk-core.c
index 52824c0..4532783 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -577,7 +577,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
blk_queue_make_request(q, __make_request);
blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
- blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
q->sg_reserved_size = INT_MAX;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index d81d914..3dff8d3 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -240,8 +240,7 @@ static inline int ll_new_hw_segment(struct request_queue *q,
{
int nr_phys_segs = bio_phys_segments(q, bio);
- if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
- || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
+ if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
@@ -330,9 +329,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
if (total_phys_segments > q->max_phys_segments)
return 0;
- if (total_phys_segments > q->max_hw_segments)
- return 0;
-
/* Merge is OK... */
req->nr_phys_segments = total_phys_segments;
return 1;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d70692b..7e05103 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -106,7 +106,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
*/
q->nr_requests = BLKDEV_MAX_RQ;
blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
- blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
q->make_request_fn = mfn;
q->backing_dev_info.ra_pages =
(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
@@ -222,30 +221,6 @@ void blk_queue_max_phys_segments(struct request_queue *q,
EXPORT_SYMBOL(blk_queue_max_phys_segments);
/**
- * blk_queue_max_hw_segments - set max hw segments for a request for this queue
- * @q: the request queue for the device
- * @max_segments: max number of segments
- *
- * Description:
- * Enables a low level driver to set an upper limit on the number of
- * hw data segments in a request. This would be the largest number of
- * address/length pairs the host adapter can actually give at once
- * to the device.
- **/
-void blk_queue_max_hw_segments(struct request_queue *q,
- unsigned short max_segments)
-{
- if (!max_segments) {
- max_segments = 1;
- printk(KERN_INFO "%s: set to minimum %d\n",
- __func__, max_segments);
- }
-
- q->max_hw_segments = max_segments;
-}
-EXPORT_SYMBOL(blk_queue_max_hw_segments);
-
-/**
* blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
* @q: the request queue for the device
* @max_size: max size of segment in bytes
@@ -300,7 +275,6 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
t->max_phys_segments = min(t->max_phys_segments, b->max_phys_segments);
- t->max_hw_segments = min(t->max_hw_segments, b->max_hw_segments);
t->max_segment_size = min(t->max_segment_size, b->max_segment_size);
t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
if (!t->queue_lock)
@@ -363,21 +337,19 @@ EXPORT_SYMBOL(blk_queue_update_dma_pad);
* does is adjust the queue so that the buf is always appended
* silently to the scatterlist.
*
- * Note: This routine adjusts max_hw_segments to make room for
- * appending the drain buffer. If you call
- * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after
- * calling this routine, you must set the limit to one fewer than your
- * device can support otherwise there won't be room for the drain
+ * Note: This routine adjusts max_phys_segments to make room for
+ * appending the drain buffer. If you call * blk_queue_max_phys_segments()
+ * after calling this routine, you must set the limit to one fewer than
+ * your device can support otherwise there won't be room for the drain
* buffer.
*/
int blk_queue_dma_drain(struct request_queue *q,
dma_drain_needed_fn *dma_drain_needed,
void *buf, unsigned int size)
{
- if (q->max_hw_segments < 2 || q->max_phys_segments < 2)
+ if (q->max_phys_segments < 2)
return -EINVAL;
/* make room for appending the drain */
- --q->max_hw_segments;
--q->max_phys_segments;
q->dma_drain_needed = dma_drain_needed;
q->dma_drain_buffer = buf;
diff --git a/block/elevator.c b/block/elevator.c
index 269615e..1c464d6 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -785,7 +785,7 @@ struct request *elv_next_request(struct request_queue *q)
if (q->dma_drain_size && rq->data_len) {
/*
* make sure space for the drain appears we
- * know we can do this because max_hw_segments
+ * know we can do this because max_phys_segments
* has been adjusted to be one fewer than the
* device can handle
*/
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 858f706..236c980 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -702,7 +702,7 @@ static int nv_adma_slave_config(struct scsi_device *sdev)
}
blk_queue_segment_boundary(sdev->request_queue, segment_boundary);
- blk_queue_max_hw_segments(sdev->request_queue, sg_tablesize);
+ blk_queue_max_phys_segments(sdev->request_queue, sg_tablesize);
ata_port_printk(ap, KERN_INFO,
"DMA mask 0x%llX, segment boundary 0x%lX, hw segs %hu\n",
(unsigned long long)*ap->host->dev->dma_mask,
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index a002a38..e9bee96 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -2531,7 +2531,6 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
Controller->RequestQueue[n] = RequestQueue;
blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit);
RequestQueue->queuedata = Controller;
- blk_queue_max_hw_segments(RequestQueue, Controller->DriverScatterGatherLimit);
blk_queue_max_phys_segments(RequestQueue, Controller->DriverScatterGatherLimit);
blk_queue_max_sectors(RequestQueue, Controller->MaxBlocksPerCommand);
disk->queue = RequestQueue;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index b73116e..8ed72f6 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1370,9 +1370,6 @@ static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
/* This is a hardware imposed limit. */
- blk_queue_max_hw_segments(disk->queue, MAXSGENTRIES);
-
- /* This is a limit in the driver and could be eliminated. */
blk_queue_max_phys_segments(disk->queue, MAXSGENTRIES);
blk_queue_max_sectors(disk->queue, h->cciss_max_sectors);
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 09c1434..50b3845 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -451,9 +451,6 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev)
blk_queue_bounce_limit(q, hba[i]->pci_dev->dma_mask);
/* This is a hardware imposed limit. */
- blk_queue_max_hw_segments(q, SG_MAX);
-
- /* This is a driver limit and could be eliminated. */
blk_queue_max_phys_segments(q, SG_MAX);
init_timer(&hba[i]->timer);
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index e7fe6ca..42b9ba3 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -958,7 +958,6 @@ static int __init pf_init(void)
}
blk_queue_max_phys_segments(pf_queue, cluster);
- blk_queue_max_hw_segments(pf_queue, cluster);
for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
struct gendisk *disk = pf->disk;
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 4b0d6c7..d25d405 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -487,7 +487,6 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev)
ps3disk_prepare_flush);
blk_queue_max_phys_segments(queue, -1);
- blk_queue_max_hw_segments(queue, -1);
blk_queue_max_segment_size(queue, dev->bounce_size);
gendisk = alloc_disk(PS3DISK_MINORS);
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index a8de037..39ebd7f 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -697,7 +697,6 @@ static int probe_disk(struct vdc_port *port)
port->disk = g;
- blk_queue_max_hw_segments(q, port->ring_cookies);
blk_queue_max_phys_segments(q, port->ring_cookies);
blk_queue_max_sectors(q, port->max_xfer_size);
g->major = vdc_major;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index a18e1ca..df21bd3 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1521,7 +1521,6 @@ static int carm_init_disks(struct carm_host *host)
break;
}
disk->queue = q;
- blk_queue_max_hw_segments(q, CARM_MAX_REQ_SG);
blk_queue_max_phys_segments(q, CARM_MAX_REQ_SG);
blk_queue_segment_boundary(q, CARM_SG_BOUNDARY);
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 3a281ef..3e3a520 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -2325,7 +2325,6 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum)
disk->queue = q;
blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
- blk_queue_max_hw_segments(q, UB_MAX_REQ_SG);
blk_queue_max_phys_segments(q, UB_MAX_REQ_SG);
blk_queue_segment_boundary(q, 0xffffffff); /* Dubious. */
blk_queue_max_sectors(q, UB_MAX_SECTORS);
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index f1c8feb..10e207f 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -482,7 +482,6 @@ retry:
}
d->disk = g;
- blk_queue_max_hw_segments(q, VIOMAXBLOCKDMA);
blk_queue_max_phys_segments(q, VIOMAXBLOCKDMA);
blk_queue_max_sectors(q, VIODASD_MAXSECTORS);
g->major = VIODASD_MAJOR;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 879506a..effcef1 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -289,7 +289,7 @@ static int virtblk_probe(struct virtio_device *vdev)
offsetof(struct virtio_blk_config, seg_max),
&v);
if (!err)
- blk_queue_max_hw_segments(vblk->disk->queue, v);
+ blk_queue_max_phys_segments(vblk->disk->queue, v);
/* Host can optionally specify the block size of the device */
err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 3ca643c..53b6012 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -354,7 +354,6 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
/* Ensure a merged request will fit in a single I/O ring slot. */
blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
- blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
/* Make sure buffer addresses are sector-aligned. */
blk_queue_dma_alignment(rq, 511);
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 1231d95..1209ed8 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -744,7 +744,7 @@ static int __devinit probe_gdrom_setupqueue(void)
{
blk_queue_hardsect_size(gd.gdrom_rq, GDROM_HARD_SECTOR);
/* using DMA so memory will need to be contiguous */
- blk_queue_max_hw_segments(gd.gdrom_rq, 1);
+ blk_queue_max_phys_segments(gd.gdrom_rq, 1);
/* set a large max size to get most from DMA */
blk_queue_max_segment_size(gd.gdrom_rq, 0x40000);
gd.disk->queue = gd.gdrom_rq;
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 031e0e1..71458b8 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -632,7 +632,6 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
gendisk->first_minor = deviceno;
strncpy(gendisk->disk_name, c->name,
sizeof(gendisk->disk_name));
- blk_queue_max_hw_segments(q, 1);
blk_queue_max_phys_segments(q, 1);
blk_queue_max_sectors(q, 4096 / 512);
gendisk->queue = q;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 994e410..0788154 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -927,7 +927,6 @@ static int ide_init_queue(ide_drive_t *drive)
max_sg_entries >>= 1;
#endif /* CONFIG_PCI */
- blk_queue_max_hw_segments(q, max_sg_entries);
blk_queue_max_phys_segments(q, max_sg_entries);
/* assign drive queue */
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 61f4414..0212984 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -91,9 +91,6 @@ static void combine_restrictions_low(struct io_restrictions *lhs,
lhs->max_phys_segments =
min_not_zero(lhs->max_phys_segments, rhs->max_phys_segments);
- lhs->max_hw_segments =
- min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
-
lhs->hardsect_size = max(lhs->hardsect_size, rhs->hardsect_size);
lhs->max_segment_size =
@@ -504,9 +501,6 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
min_not_zero(rs->max_phys_segments,
q->max_phys_segments);
- rs->max_hw_segments =
- min_not_zero(rs->max_hw_segments, q->max_hw_segments);
-
rs->hardsect_size = max(rs->hardsect_size, q->hardsect_size);
rs->max_segment_size =
@@ -656,8 +650,6 @@ static void check_for_valid_limits(struct io_restrictions *rs)
rs->max_hw_sectors = SAFE_MAX_SECTORS;
if (!rs->max_phys_segments)
rs->max_phys_segments = MAX_PHYS_SEGMENTS;
- if (!rs->max_hw_segments)
- rs->max_hw_segments = MAX_HW_SEGMENTS;
if (!rs->hardsect_size)
rs->hardsect_size = 1 << SECTOR_SHIFT;
if (!rs->max_segment_size)
@@ -849,7 +841,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
*/
blk_queue_max_sectors(q, t->limits.max_sectors);
q->max_phys_segments = t->limits.max_phys_segments;
- q->max_hw_segments = t->limits.max_hw_segments;
q->hardsect_size = t->limits.hardsect_size;
q->max_segment_size = t->limits.max_segment_size;
q->max_hw_sectors = t->limits.max_hw_sectors;
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 44b1817..7db17a0 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1215,7 +1215,6 @@ static int mspro_block_init_disk(struct memstick_dev *card)
blk_queue_bounce_limit(msb->queue, limit);
blk_queue_max_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES);
blk_queue_max_phys_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
- blk_queue_max_hw_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
blk_queue_max_segment_size(msb->queue,
MSPRO_BLOCK_MAX_PAGES * msb->page_size);
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 81483de..7d56306 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -1069,13 +1069,11 @@ static int i2o_block_probe(struct device *dev)
queue = gd->queue;
queue->queuedata = i2o_blk_dev;
- blk_queue_max_phys_segments(queue, I2O_MAX_PHYS_SEGMENTS);
+ blk_queue_max_phys_segments(queue, min(I2O_MAX_PHYS_SEGMENTS, i2o_sg_tablesize(c, body_size)));
blk_queue_max_sectors(queue, max_sectors);
- blk_queue_max_hw_segments(queue, i2o_sg_tablesize(c, body_size));
osm_debug("max sectors = %d\n", queue->max_sectors);
osm_debug("phys segments = %d\n", queue->max_phys_segments);
- osm_debug("max hw segments = %d\n", queue->max_hw_segments);
/*
* Ask for the current media data. If that isn't supported
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 3dee97e..a6529ce 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -151,7 +151,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
blk_queue_max_sectors(mq->queue, bouncesz / 512);
blk_queue_max_phys_segments(mq->queue, bouncesz / 512);
- blk_queue_max_hw_segments(mq->queue, bouncesz / 512);
blk_queue_max_segment_size(mq->queue, bouncesz);
mq->sg = kmalloc(sizeof(struct scatterlist),
@@ -176,8 +175,8 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
if (!mq->bounce_buf) {
blk_queue_bounce_limit(mq->queue, limit);
blk_queue_max_sectors(mq->queue, host->max_req_size / 512);
- blk_queue_max_phys_segments(mq->queue, host->max_phys_segs);
- blk_queue_max_hw_segments(mq->queue, host->max_hw_segs);
+ /* TODO: drop host->max_hw_segs */
+ blk_queue_max_phys_segments(mq->queue, min(host->max_phys_segs, host->max_hw_segs));
blk_queue_max_segment_size(mq->queue, host->max_seg_size);
mq->sg = kmalloc(sizeof(struct scatterlist) *
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 1b6c52e..39a2216 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1976,7 +1976,6 @@ static void dasd_setup_queue(struct dasd_block *block)
max = block->base->discipline->max_blocks << block->s2b_shift;
blk_queue_max_sectors(block->request_queue, max);
blk_queue_max_phys_segments(block->request_queue, -1L);
- blk_queue_max_hw_segments(block->request_queue, -1L);
blk_queue_max_segment_size(block->request_queue, -1L);
blk_queue_segment_boundary(block->request_queue, -1L);
blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN, NULL);
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 95da72b..89f81eb 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -233,7 +233,6 @@ tapeblock_setup_device(struct tape_device * device)
blk_queue_hardsect_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE);
blk_queue_max_sectors(blkdat->request_queue, TAPEBLOCK_MAX_SEC);
blk_queue_max_phys_segments(blkdat->request_queue, -1L);
- blk_queue_max_hw_segments(blkdat->request_queue, -1L);
blk_queue_max_segment_size(blkdat->request_queue, -1L);
blk_queue_segment_boundary(blkdat->request_queue, -1L);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ff5d56b..6f0648f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1639,8 +1639,7 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
/*
* this limit is imposed by hardware restrictions
*/
- blk_queue_max_hw_segments(q, shost->sg_tablesize);
- blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
+ blk_queue_max_phys_segments(q, min(shost->sg_tablesize, (unsigned short)SCSI_MAX_SG_CHAIN_SEGMENTS));
blk_queue_max_sectors(q, shost->max_sectors);
blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 3d36270..b324177 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -292,8 +292,7 @@ sg_open(struct inode *inode, struct file *filp)
if (!sdp->headfp) { /* no existing opens on this device */
sdp->sgdebug = 0;
q = sdp->device->request_queue;
- sdp->sg_tablesize = min(q->max_hw_segments,
- q->max_phys_segments);
+ sdp->sg_tablesize = q->max_phys_segments;
}
if ((sfp = sg_add_sfp(sdp, dev)))
filp->private_data = sfp;
@@ -1394,7 +1393,7 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp)
sdp->disk = disk;
sdp->device = scsidp;
init_waitqueue_head(&sdp->o_excl_wait);
- sdp->sg_tablesize = min(q->max_hw_segments, q->max_phys_segments);
+ sdp->sg_tablesize = q->max_phys_segments;
sdp->index = k;
error = 0;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index c2bb53e..a54a716 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -3930,8 +3930,7 @@ static int st_probe(struct device *dev)
return -ENODEV;
}
- i = min(SDp->request_queue->max_hw_segments,
- SDp->request_queue->max_phys_segments);
+ i = SDp->request_queue->max_phys_segments;
if (st_max_sg_segs < i)
i = st_max_sg_segs;
buffer = new_tape_buffer(1, (SDp->host)->unchecked_isa_dma, i);
diff --git a/fs/bio.c b/fs/bio.c
index d394090..a40ae6f 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -281,8 +281,6 @@ int bio_get_nr_vecs(struct block_device *bdev)
nr_pages = ((q->max_sectors << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (nr_pages > q->max_phys_segments)
nr_pages = q->max_phys_segments;
- if (nr_pages > q->max_hw_segments)
- nr_pages = q->max_hw_segments;
return nr_pages;
}
@@ -341,8 +339,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
* make this too complex.
*/
- while (bio->bi_phys_segments >= q->max_phys_segments
- || bio->bi_phys_segments >= q->max_hw_segments) {
+ while (bio->bi_phys_segments >= q->max_phys_segments) {
if (retried_segments)
return 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c0930fa..f904911 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -359,7 +359,6 @@ struct request_queue
unsigned int max_sectors;
unsigned int max_hw_sectors;
unsigned short max_phys_segments;
- unsigned short max_hw_segments;
unsigned short hardsect_size;
unsigned int max_segment_size;
@@ -772,7 +771,6 @@ extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
extern void blk_queue_bounce_limit(struct request_queue *, u64);
extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
-extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
extern void blk_queue_hardsect_size(struct request_queue *, unsigned short);
extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
@@ -852,7 +850,6 @@ extern int blk_register_filter(struct gendisk *disk);
extern void blk_unregister_filter(struct gendisk *disk);
#define MAX_PHYS_SEGMENTS 128
-#define MAX_HW_SEGMENTS 128
#define SAFE_MAX_SECTORS 255
#define BLK_DEF_MAX_SECTORS 1024
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index a90222e..9d9698e 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -122,7 +122,6 @@ struct io_restrictions {
unsigned max_sectors;
unsigned max_segment_size;
unsigned short hardsect_size;
- unsigned short max_hw_segments;
unsigned short max_phys_segments;
unsigned char no_cluster; /* inverted so that 0 is default */
};
^ permalink raw reply related [flat|nested] 65+ messages in thread
* [PATCH 4/4] drop vmerge accounting
2008-08-22 9:33 ` Jens Axboe
2008-08-22 21:34 ` Mikulas Patocka
@ 2008-08-22 21:35 ` Mikulas Patocka
1 sibling, 0 replies; 65+ messages in thread
From: Mikulas Patocka @ 2008-08-22 21:35 UTC (permalink / raw)
To: Jens Axboe
Cc: Pierre Ossman, FUJITA Tomonori, adel.gadllah, matthew, linux-scsi,
James.Bottomley, dan.j.williams, pjones, viro, dougg
Remove max_hw_segs variable from MMC layer.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index a6529ce..0733a1b 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -133,7 +133,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
blk_queue_prep_rq(mq->queue, mmc_prep_request);
#ifdef CONFIG_MMC_BLOCK_BOUNCE
- if (host->max_hw_segs == 1) {
+ if (host->max_phys_segs == 1) {
unsigned int bouncesz;
bouncesz = MMC_QUEUE_BOUNCESZ;
@@ -175,8 +175,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
if (!mq->bounce_buf) {
blk_queue_bounce_limit(mq->queue, limit);
blk_queue_max_sectors(mq->queue, host->max_req_size / 512);
- /* TODO: drop host->max_hw_segs */
- blk_queue_max_phys_segments(mq->queue, min(host->max_phys_segs, host->max_hw_segs));
+ blk_queue_max_phys_segments(mq->queue, host->max_phys_segs);
blk_queue_max_segment_size(mq->queue, host->max_seg_size);
mq->sg = kmalloc(sizeof(struct scatterlist) *
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 6da80fd..27a02fd 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -89,7 +89,6 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
* By default, hosts do not support SGIO or large requests.
* They have to set these according to their abilities.
*/
- host->max_hw_segs = 1;
host->max_phys_segs = 1;
host->max_seg_size = PAGE_CACHE_SIZE;
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 0bd06f5..1d2293e 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -1037,7 +1037,6 @@ static int __init atmci_probe(struct platform_device *pdev)
mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
mmc->caps |= MMC_CAP_4_BIT_DATA;
- mmc->max_hw_segs = 64;
mmc->max_phys_segs = 64;
mmc->max_req_size = 32768 * 512;
mmc->max_blk_size = 32768;
diff --git a/drivers/mmc/host/imxmmc.c b/drivers/mmc/host/imxmmc.c
index 2f0fcdb..f97c90a 100644
--- a/drivers/mmc/host/imxmmc.c
+++ b/drivers/mmc/host/imxmmc.c
@@ -958,7 +958,6 @@ static int imxmci_probe(struct platform_device *pdev)
mmc->caps = MMC_CAP_4_BIT_DATA;
/* MMC core transfer sizes tunable parameters */
- mmc->max_hw_segs = 64;
mmc->max_phys_segs = 64;
mmc->max_seg_size = 64*512; /* default PAGE_CACHE_SIZE */
mmc->max_req_size = 64*512; /* default PAGE_CACHE_SIZE */
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 696cf36..a2384f6 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -539,7 +539,6 @@ static int mmci_probe(struct amba_device *dev, void *id)
/*
* We can do SGIO
*/
- mmc->max_hw_segs = 16;
mmc->max_phys_segs = NR_SG;
/*
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index c160288..066ad03 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -1336,7 +1336,6 @@ static int __init mmc_omap_new_slot(struct mmc_omap_host *host, int id)
* normally used (except e.g. for reading SD registers).
*/
mmc->max_phys_segs = 32;
- mmc->max_hw_segs = 32;
mmc->max_blk_size = 2048; /* BLEN is 11 bits (+1) */
mmc->max_blk_count = 2048; /* NBLK is 11 bits (+1) */
mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index ae16d84..4b91999 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -1291,7 +1291,6 @@ static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440)
mmc->max_seg_size = mmc->max_req_size;
mmc->max_phys_segs = 128;
- mmc->max_hw_segs = 128;
dbg(host, dbg_debug,
"probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%u.\n",
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index e3a8133..8de2024 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1628,12 +1628,11 @@ int sdhci_add_host(struct sdhci_host *host)
* can do scatter/gather or not.
*/
if (host->flags & SDHCI_USE_ADMA)
- mmc->max_hw_segs = 128;
+ mmc->max_phys_segs = 128;
else if (host->flags & SDHCI_USE_DMA)
- mmc->max_hw_segs = 1;
+ mmc->max_phys_segs = 1;
else /* PIO */
- mmc->max_hw_segs = 128;
- mmc->max_phys_segs = 128;
+ mmc->max_phys_segs = 128;
/*
* Maximum number of sectors in one transfer. Limited by DMA boundary
diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c
index 1384484..5700c1a 100644
--- a/drivers/mmc/host/tifm_sd.c
+++ b/drivers/mmc/host/tifm_sd.c
@@ -978,11 +978,10 @@ static int tifm_sd_probe(struct tifm_dev *sock)
mmc->f_max = 24000000;
mmc->max_blk_count = 2048;
- mmc->max_hw_segs = mmc->max_blk_count;
+ mmc->max_phys_segs = mmc->max_blk_count;
mmc->max_blk_size = min(TIFM_MMCSD_MAX_BLOCK_SIZE, PAGE_SIZE);
mmc->max_seg_size = mmc->max_blk_count * mmc->max_blk_size;
mmc->max_req_size = mmc->max_seg_size;
- mmc->max_phys_segs = mmc->max_hw_segs;
sock->card_event = tifm_sd_card_event;
sock->data_event = tifm_sd_data_event;
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index adda379..034ccc4 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -1234,7 +1234,6 @@ static int __devinit wbsd_alloc_mmc(struct device *dev)
* Maximum number of segments. Worst case is one sector per segment
* so this will be 64kB/512.
*/
- mmc->max_hw_segs = 128;
mmc->max_phys_segs = 128;
/*
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 9c288c9..e11b957 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -119,7 +119,6 @@ struct mmc_host {
/* host specific block data */
unsigned int max_seg_size; /* see blk_queue_max_segment_size */
- unsigned short max_hw_segs; /* see blk_queue_max_hw_segments */
unsigned short max_phys_segs; /* see blk_queue_max_phys_segments */
unsigned short unused;
unsigned int max_req_size; /* maximum number of bytes in one req */
^ permalink raw reply related [flat|nested] 65+ messages in thread
end of thread, other threads:[~2008-08-22 21:35 UTC | newest]
Thread overview: 65+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-13 19:33 [PATCH/RFC] allow userspace to modify scsi command filter on per device basis Adel Gadllah
2008-06-13 19:54 ` Matthew Wilcox
2008-06-13 20:22 ` Adel Gadllah
2008-06-13 20:23 ` Adel Gadllah
2008-06-14 6:51 ` [PATCH/RFC v2] " Adel Gadllah
2008-06-16 2:55 ` FUJITA Tomonori
2008-06-16 5:49 ` Adel Gadllah
2008-06-16 6:13 ` FUJITA Tomonori
2008-06-16 9:22 ` [PATCH/RFC v3] " Adel Gadllah
2008-06-17 20:14 ` FUJITA Tomonori
2008-06-17 21:45 ` Peter Jones
2008-06-17 22:40 ` FUJITA Tomonori
2008-06-17 22:49 ` FUJITA Tomonori
2008-06-17 23:01 ` Douglas Gilbert
2008-06-18 1:13 ` Pete Wyckoff
2008-06-18 7:33 ` Adel Gadllah
2008-06-18 14:55 ` James Smart
2008-06-18 14:56 ` Peter Jones
2008-06-26 10:10 ` Adel Gadllah
2008-06-26 10:13 ` Jens Axboe
2008-06-26 14:36 ` FUJITA Tomonori
2008-06-26 15:05 ` Adel Gadllah
2008-06-26 15:08 ` FUJITA Tomonori
2008-06-26 15:26 ` FUJITA Tomonori
2008-07-24 1:11 ` Dan Williams
2008-07-24 3:31 ` FUJITA Tomonori
2008-07-26 9:03 ` [PATCH 0/3] cmd_filter fixes FUJITA Tomonori
2008-07-26 9:03 ` [PATCH 1/3] move cmd_filter from gendisk to request_queue FUJITA Tomonori
2008-07-26 9:03 ` [PATCH 2/3] sg: restore command permission for TYPE_SCANNER FUJITA Tomonori
2008-07-26 9:03 ` [PATCH 3/3] rename blk_scsi_cmd_filter to blk_cmd_filter FUJITA Tomonori
2008-07-30 20:10 ` [PATCH 1/3] move cmd_filter from gendisk to request_queue Peter Jones
2008-07-31 5:13 ` FUJITA Tomonori
2008-08-16 5:47 ` FUJITA Tomonori
2008-07-27 19:59 ` [PATCH 0/3] cmd_filter fixes Adel Gadllah
2008-07-27 20:02 ` Adel Gadllah
2008-07-28 2:18 ` FUJITA Tomonori
2008-07-30 19:59 ` Adel Gadllah
2008-07-31 4:55 ` FUJITA Tomonori
2008-07-31 7:18 ` Matthew Wilcox
2008-07-31 7:24 ` FUJITA Tomonori
2008-07-31 13:04 ` Matthew Wilcox
2008-07-31 15:18 ` FUJITA Tomonori
2008-08-07 18:47 ` Adel Gadllah
2008-08-08 0:20 ` FUJITA Tomonori
2008-08-08 5:54 ` Jens Axboe
2008-08-08 6:11 ` FUJITA Tomonori
2008-08-08 6:15 ` Jens Axboe
2008-08-08 6:29 ` FUJITA Tomonori
2008-08-08 6:35 ` Jens Axboe
2008-08-08 16:53 ` [PATCH 1/2] drop vmerge accounting Mikulas Patocka
2008-08-08 17:07 ` [PATCH 2/2] " Mikulas Patocka
2008-08-15 9:48 ` Jens Axboe
2008-08-15 18:23 ` [PATCH 3/4] " Mikulas Patocka
2008-08-22 9:10 ` Jens Axboe
2008-08-22 9:17 ` Jens Axboe
2008-08-22 16:58 ` Mikulas Patocka
2008-08-22 17:05 ` Mikulas Patocka
2008-08-22 9:29 ` Pierre Ossman
2008-08-22 9:33 ` Jens Axboe
2008-08-22 21:34 ` Mikulas Patocka
2008-08-22 21:35 ` [PATCH 4/4] " Mikulas Patocka
2008-08-15 18:26 ` Mikulas Patocka
2008-08-21 9:26 ` [PATCH 0/3] cmd_filter fixes Adel Gadllah
2008-08-22 9:10 ` Jens Axboe
2008-06-14 20:26 ` [PATCH/RFC] allow userspace to modify scsi command filter on per device basis Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).