diff -urN linux-2.5.73-p6.2/drivers/block/nbd.c linux-2.5.73-p7/drivers/block/nbd.c --- linux-2.5.73-p6.2/drivers/block/nbd.c 2003-06-30 11:58:50.000000000 -0600 +++ linux-2.5.73-p7/drivers/block/nbd.c 2003-07-01 12:31:53.258728598 -0600 @@ -41,6 +41,8 @@ * require them to be updated. Actually, the linux 2.5 block layer redisign * already required existing user tools to be updated to properly set the * correct device size. + * 03-06-30 Added compatibility with previous ioctl user interface while + * maintaining better resource locking. * * possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall * why not: would need verify_area and friends, would share yet another @@ -112,6 +114,8 @@ static int requests_out; #endif +static int backcompat = 1; + #ifndef NDEBUG static const char *ioctl_cmd_to_ascii(int cmd) { @@ -120,6 +124,7 @@ case NBD_SET_BLKSIZE: return "set-blksize"; case NBD_SET_SIZE: return "set-size"; case NBD_DO_IT: return "do-it"; + case NBD_RUN_SOCK: return "run-sock"; case NBD_CLEAR_SOCK: return "clear-sock"; case NBD_CLEAR_QUE: return "clear-que"; case NBD_PRINT_DEBUG: return "print-debug"; @@ -400,32 +405,6 @@ return req; } -static int nbd_clear_que(struct nbd_device *lo) -{ - struct request *req; - - down(&lo->tx_lock); - if (lo->sock) { - up(&lo->tx_lock); - return -EBUSY; - } - do { - req = NULL; - spin_lock(&lo->queue_lock); - if (!list_empty(&lo->queue_head)) { - req = list_entry(lo->queue_head.next, struct request, queuelist); - list_del_init(&req->queuelist); - } - spin_unlock(&lo->queue_lock); - if (req) { - req->errors++; - request_end(req); - } - } while (req); - up(&lo->tx_lock); - return 0; -} - /* * We always wait for result of write, for now. It would be nice to make it optional * in future @@ -533,16 +512,53 @@ return 0; } -static int nbd_do_it(struct nbd_device *lo, unsigned int fd) +static int nbd_set_sock(struct nbd_device *lo, int fd) +{ + struct file *file; + + dprintk(DBG_IOCTL, "%s: %s[%d] called set-sock %d\n", + lo->disk->disk_name, current->comm, current->pid, fd); + + /* give nbd-client way to check busy before NBD_DO_IT */ + if (lo->sock) + return -EBUSY; + + /* implement some checks for nbd-client */ + file = fget(fd); + if (!file) + return backcompat? -EINVAL: -EBADF; + fput(file); + + lo->file = fd; + return 0; +} + +static int nbd_clear_sock(struct nbd_device *lo) +{ + dprintk(DBG_IOCTL, "%s: %s[%d] called clear-sock\n", + lo->disk->disk_name, current->comm, current->pid); + + /* implement some checks for nbd-client */ + if (lo->file == -1) + return backcompat? 0: -EINVAL; + + lo->file = -1; + return 0; +} + +static int nbd_run_sock(struct nbd_device *lo, int fd) { struct file *file; struct inode *inode; struct socket *sock; struct request *req; + dprintk(DBG_IOCTL, "%s: %s[%d] called run-sock %d\n", + lo->disk->disk_name, current->comm, current->pid, fd); + file = fget(fd); if (!file) - return -EBADF; + return backcompat? -EINVAL: -EBADF; inode = file->f_dentry->d_inode; if (!inode->i_sock) { fput(file); @@ -563,28 +579,31 @@ lo->sock = sock; up(&lo->tx_lock); + lo->harderror = 0; /* no need for rx lock since here is the rx... */ - dprintk(DBG_IOCTL, "%s: nbd_do_it: commencing read loop\n", - lo->disk->disk_name); + dprintk(DBG_IOCTL, "%s: commencing read loop\n", lo->disk->disk_name); while ((req = nbd_read_stat(lo)) != NULL) request_end(req); - dprintk(DBG_IOCTL, "%s: nbd_do_it: read loop finished\n", - lo->disk->disk_name); + dprintk(DBG_IOCTL, "%s: read loop finished\n", lo->disk->disk_name); down(&lo->tx_lock); lo->sock = NULL; up(&lo->tx_lock); + /* In case shutdown occurred from disconnect, wake up disconnector */ + wake_up(&lo->waiters); + fput(file); return lo->harderror; } static int nbd_disconnect(struct nbd_device *lo) { - int error; + int sent; struct request sreq; - printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name); + dprintk(DBG_IOCTL, "%s: %s[%d] called disconnect\n", + lo->disk->disk_name, current->comm, current->pid); sreq.flags = REQ_SPECIAL; nbd_cmd(&sreq) = NBD_CMD_DISC; @@ -599,18 +618,30 @@ down(&lo->tx_lock); if (lo->sock) { - error = 0; nbd_send_req(lo, &sreq); + sent = 1; } else { - error = -ENOTCONN; + sent = 0; } up(&lo->tx_lock); - return error; + + if (sent) { + /* Block till finished so no more user calls run till then */ + dprintk(DBG_IOCTL, "%s: waiting till disconnect finished\n", + lo->disk->disk_name); + wait_event_interruptible(lo->waiters, lo->sock == NULL); + dprintk(DBG_IOCTL, "%s: disconnect all finished now\n", + lo->disk->disk_name); + } + return sent? 0: backcompat? -EINVAL: -ENOTCONN; } static int nbd_set_blksize(struct nbd_device *lo, unsigned long arg) { + dprintk(DBG_IOCTL, "%s: %s[%d] called set-blksize %lu\n", + lo->disk->disk_name, current->comm, current->pid, arg); + if ((arg & (arg-1)) || (arg < 512) || (arg > PAGE_SIZE)) return -EINVAL; down(&lo->tx_lock); @@ -627,6 +658,9 @@ static int nbd_set_size(struct nbd_device *lo, unsigned long arg) { + dprintk(DBG_IOCTL, "%s: %s[%d] called set-size %lu\n", + lo->disk->disk_name, current->comm, current->pid, arg); + down(&lo->tx_lock); if (lo->sock) { up(&lo->tx_lock); @@ -640,6 +674,9 @@ static int nbd_set_size_blocks(struct nbd_device *lo, unsigned long arg) { + dprintk(DBG_IOCTL, "%s: %s[%d] called set-size-blocks %lu\n", + lo->disk->disk_name, current->comm, current->pid, arg); + down(&lo->tx_lock); if (lo->sock) { up(&lo->tx_lock); @@ -651,28 +688,67 @@ return 0; } +static int nbd_clear_que(struct nbd_device *lo) +{ + struct request *req; + + dprintk(DBG_IOCTL, "%s: %s[%d] called clear-que\n", + lo->disk->disk_name, current->comm, current->pid); + + down(&lo->tx_lock); + if (lo->sock) { + /* + * Don't allow queue to be cleared while device is running! + * Device must be stopped (disconnected) first. Otherwise + * clearing is meaningless & can lock up processes: it's a + * race with users who may queue up more requests after the + * clearing is done that may then never be freed till the + * system reboots or clear que is run again which just + * opens the race yet again. + */ + up(&lo->tx_lock); + /* The original nbd-client expects 0 (always) */ + return backcompat? 0: -EBUSY; + } + do { + req = NULL; + spin_lock(&lo->queue_lock); + if (!list_empty(&lo->queue_head)) { + req = list_entry(lo->queue_head.next, struct request, queuelist); + list_del_init(&req->queuelist); + } + spin_unlock(&lo->queue_lock); + if (req) { + req->errors++; + request_end(req); + } + } while (req); + up(&lo->tx_lock); + return 0; +} + static int nbd_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - struct nbd_device *lo = inode->i_bdev->bd_disk->private_data; + struct nbd_device *lo; + /* Anyone capable of this syscall can do *bad* things */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + lo = inode->i_bdev->bd_disk->private_data; #ifdef PARANOIA BUG_ON(lo->magic != LO_MAGIC); #endif - /* Anyone capable of this syscall can do *real bad* things */ - dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n", - lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg); - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; switch (cmd) { - case NBD_CLEAR_SOCK: /* deprecated! */ - case NBD_SET_SOCK: /* deprecated! */ - printk(KERN_WARNING "%s: %s[%d] called deprecated ioctl %s\n", - lo->disk->disk_name, - current->comm, current->pid, - ioctl_cmd_to_ascii(cmd)); - return -EINVAL; + case NBD_CLEAR_SOCK: + return nbd_clear_sock(lo); + case NBD_SET_SOCK: + return nbd_set_sock(lo, arg); + case NBD_DO_IT: + return nbd_run_sock(lo, lo->file); + case NBD_RUN_SOCK: + return nbd_run_sock(lo, arg); case NBD_DISCONNECT: return nbd_disconnect(lo); case NBD_SET_BLKSIZE: @@ -681,8 +757,6 @@ return nbd_set_size(lo, arg); case NBD_SET_SIZE_BLOCKS: return nbd_set_size_blocks(lo, arg); - case NBD_DO_IT: - return nbd_do_it(lo, arg); case NBD_CLEAR_QUE: return nbd_clear_que(lo); case NBD_PRINT_DEBUG: @@ -697,6 +771,9 @@ #endif return 0; } + dprintk(DBG_IOCTL, "%s: %s[%d] unhandled-cmd=0x%x(%s) arg=%lu\n", + lo->disk->disk_name, current->comm, current->pid, + cmd, ioctl_cmd_to_ascii(cmd), arg); return -EINVAL; } @@ -751,6 +828,7 @@ printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR); dprintk(DBG_INIT, "nbd: debugflags=0x%x\n", debugflags); + dprintk(DBG_INIT, "nbd: backcompat=%d\n", backcompat); devfs_mk_dir("nbd"); for (i = 0; i < MAX_NBD; i++) { @@ -765,6 +843,10 @@ init_MUTEX(&nbd_dev[i].tx_lock); nbd_dev[i].blksize = 1024; nbd_dev[i].bytesize = ((u64)0x7ffffc00) << 10; /* 2TB */ + nbd_dev[i].file = -1; + nbd_dev[i].sock = NULL; + nbd_dev[i].harderror = 0; + init_waitqueue_head(&nbd_dev[i].waiters); disk->major = NBD_MAJOR; disk->first_minor = i; disk->fops = &nbd_fops; @@ -814,3 +896,6 @@ MODULE_PARM(debugflags, "i"); MODULE_PARM_DESC(debugflags, "flags for controlling debug output"); #endif + +MODULE_PARM(backcompat, "i"); +MODULE_PARM_DESC(backcompat, "boolean for better backward compatibility"); diff -urN linux-2.5.73-p6.2/include/linux/nbd.h linux-2.5.73-p7/include/linux/nbd.h --- linux-2.5.73-p6.2/include/linux/nbd.h 2003-06-25 09:20:19.000000000 -0600 +++ linux-2.5.73-p7/include/linux/nbd.h 2003-07-01 11:48:06.868415543 -0600 @@ -8,9 +8,25 @@ * 2003/06/24 Louis D. Langholtz * Removed unneeded blksize_bits field from nbd_device struct. * Cleanup PARANOIA usage & code. - * Removed unneeded file field from nbd_device struct & moved - * kernel specific stuff within __KERNEL__. The non-kernel stuff - * should probably be moved to its own file (someday). + * Moved kernel specific stuff within __KERNEL__ and turned file + * field into holder for descriptor instead of pointer to struct + * file. The non-kernel stuff should probably be moved to its + * own file (someday). The change to file field is so NBD_DO_IT() + * ioctl handling can just use new NBD_RUN_SOCK(descriptor) + * instead. With this new scheme, if NBD_SET_SOCK is called by + * one process, and then NBD_DO_IT gets called by another + * process (essentially willy-nilly calling of the NBD ioctls) + * this change fails *gracefully* with an error. On the other + * hand, with the previous implementation, if user processes call + * the NBD ioctls willy-nilly, failure meant oops if you were + * lucky, data corruption if not. Consequently, we can say a + * user who can run the nbd ioctls can do bad things rather + * than *really* bad things. So bad things are like root shutting + * down the nbd device on users who weren't forewarned for which + * we have precendence in just about every other driver etc. + * Eventually NBD_SET_SOCK, NBD_CLEAR_SOCK, and NBD_DO_IT may + * go away in favor of just the one NBD_RUN_SOCK ioctl and then + * the file field can go away from the nbd_device struct too. */ #ifndef LINUX_NBD_H @@ -25,6 +41,7 @@ #define NBD_PRINT_DEBUG _IO( 0xab, 6 ) #define NBD_SET_SIZE_BLOCKS _IO( 0xab, 7 ) #define NBD_DISCONNECT _IO( 0xab, 8 ) +#define NBD_RUN_SOCK _IO( 0xab, 9 ) /* These are send over network in request/reply magic field */ #define NBD_REQUEST_MAGIC 0x25609513 @@ -73,7 +90,8 @@ int harderror; /* Code of hard error */ #define NBD_READ_ONLY 0x0001 #define NBD_WRITE_NOCHK 0x0002 - struct socket * sock; + struct socket * sock; /* If == NULL, device is not ready, yet */ + int file; /* previously struct file *, descriptor now (see above) */ #ifdef PARANOIA int magic; /* FIXME: not if debugging is off */ #endif @@ -83,6 +101,7 @@ struct gendisk *disk; int blksize; u64 bytesize; + wait_queue_head_t waiters; }; #endif /* __KERNEL__ */