public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Lou Langholtz <ldl@aros.net>
To: viro@parcelfarce.linux.theplanet.co.uk
Cc: linux-kernel@vger.kernel.org, Jens Axboe <axboe@suse.de>,
	Andrew Morton <akpm@digeo.com>,
	torvalds@osdl.org
Subject: Re: [PATCH] nbd driver for 2.5+: fix for 2.5 block layer (improved)
Date: Sun, 22 Jun 2003 19:27:29 -0600	[thread overview]
Message-ID: <3EF65781.50708@aros.net> (raw)
In-Reply-To: <20030622214412.GM6754@parcelfarce.linux.theplanet.co.uk>

[-- Attachment #1: Type: text/plain, Size: 735 bytes --]

viro@parcelfarce.linux.theplanet.co.uk wrote:

>On Sun, Jun 22, 2003 at 01:28:12PM -0600, Lou Langholtz wrote:
>
>  
>
>>4. that the allocation of request_queue is dynamic and seperate from 
>>other allocated objects [Al]
>>    
>>
>
>*Ugh*.  Not on ->open(), please...  _If_ you really want that sort of
>on-demand allocation - make it happen via blk_register_region() and
>allocate both gendisk and queue at once.
>
>However, I would suggest to make that a separate patch and for now allocate
>queues at the same place where you allocate gendisks, without any on-demand
>stuff.
>  
>
Okay, here's the patch now updated to be without the on-demand stuff and 
with queues allocated all where alloc_disk is used. How is this patch now?

[-- Attachment #2: nbd-patch1.3 --]
[-- Type: text/plain, Size: 5737 bytes --]

diff -urN linux-2.5.72/drivers/block/nbd.c linux-2.5.72-p1.5/drivers/block/nbd.c
--- linux-2.5.72/drivers/block/nbd.c	2003-06-16 22:19:44.000000000 -0600
+++ linux-2.5.72-p1.5/drivers/block/nbd.c	2003-06-22 19:13:08.199124846 -0600
@@ -28,6 +28,9 @@
  *   the transmit lock. <steve@chygwyn.com>
  * 02-10-11 Allow hung xmit to be aborted via SIGKILL & various fixes.
  *   <Paul.Clements@SteelEye.com> <James.Bottomley@SteelEye.com>
+ * 03-06-22 Make nbd work with new linux 2.5 block layer design. This fixes
+ *   memory corruption from module removal and possible memory corruption
+ *   from sending/receiving disk data. <ldl@aros.net>
  *
  * possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall
  * why not: would need verify_area and friends, would share yet another 
@@ -63,6 +66,16 @@
 
 static struct nbd_device nbd_dev[MAX_NBD];
 
+/*
+ * Use just one lock (or at most 1 per NIC). Two arguments for this:
+ * 1. Each NIC is essentially a synchronization point for all servers
+ *    accessed through that NIC so there's no need to have more locks
+ *    than NICs anyway.
+ * 2. More locks lead to more "Dirty cache line bouncing" which will slow
+ *    down each lock to the point where they're actually slower than just
+ *    a single lock.
+ * Thanks go to Jens Axboe and Al Viro for their LKML emails explaining this!
+ */
 static spinlock_t nbd_lock = SPIN_LOCK_UNLOCKED;
 
 #define DEBUG( s )
@@ -168,6 +181,17 @@
 	return result;
 }
 
+static inline int sock_send_bvec(struct socket *sock, struct bio_vec *bvec,
+		int flags)
+{
+	int result;
+	void *kaddr = kmap(bvec->bv_page);
+	result = nbd_xmit(1, sock, kaddr + bvec->bv_offset, bvec->bv_len,
+			flags);
+	kunmap(bvec->bv_page);
+	return result;
+}
+
 #define FAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); goto error_out; }
 
 void nbd_send_req(struct nbd_device *lo, struct request *req)
@@ -209,7 +233,7 @@
 				if ((i < (bio->bi_vcnt - 1)) || bio->bi_next)
 					flags = MSG_MORE;
 				DEBUG("data, ");
-				result = nbd_xmit(1, sock, page_address(bvec->bv_page) + bvec->bv_offset, bvec->bv_len, flags);
+				result = sock_send_bvec(sock, bvec, flags);
 				if (result <= 0)
 					FAIL("Send data failed.");
 			}
@@ -244,6 +268,16 @@
 	return NULL;
 }
 
+static inline int sock_recv_bvec(struct socket *sock, struct bio_vec *bvec)
+{
+	int result;
+	void *kaddr = kmap(bvec->bv_page);
+	result = nbd_xmit(0, sock, kaddr + bvec->bv_offset, bvec->bv_len,
+			MSG_WAITALL);
+	kunmap(bvec->bv_page);
+	return result;
+}
+
 #define HARDFAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); lo->harderror = result; return NULL; }
 struct request *nbd_read_stat(struct nbd_device *lo)
 		/* NULL returned = something went wrong, inform userspace       */ 
@@ -251,10 +285,11 @@
 	int result;
 	struct nbd_reply reply;
 	struct request *req;
+	struct socket *sock = lo->sock;
 
 	DEBUG("reading control, ");
 	reply.magic = 0;
-	result = nbd_xmit(0, lo->sock, (char *) &reply, sizeof(reply), MSG_WAITALL);
+	result = nbd_xmit(0, sock, (char *) &reply, sizeof(reply), MSG_WAITALL);
 	if (result <= 0)
 		HARDFAIL("Recv control failed.");
 	req = nbd_find_request(lo, reply.handle);
@@ -268,14 +303,17 @@
 		FAIL("Other side returned error.");
 
 	if (nbd_cmd(req) == NBD_CMD_READ) {
-		struct bio *bio = req->bio;
+		int i;
+		struct bio *bio;
 		DEBUG("data, ");
-		do {
-			result = nbd_xmit(0, lo->sock, bio_data(bio), bio->bi_size, MSG_WAITALL);
-			if (result <= 0)
-				HARDFAIL("Recv data failed.");
-			bio = bio->bi_next;
-		} while(bio);
+		rq_for_each_bio(bio, req) {
+			struct bio_vec *bvec;
+			bio_for_each_segment(bvec, bio, i) {
+				result = sock_recv_bvec(sock, bvec);
+				if (result <= 0)
+					HARDFAIL("Recv data failed.");
+			}
+		}
 	}
 	DEBUG("done.\n");
 	return req;
@@ -538,8 +576,6 @@
  *  (Just smiley confuses emacs :-)
  */
 
-static struct request_queue nbd_queue;
-
 static int __init nbd_init(void)
 {
 	int err = -ENOMEM;
@@ -555,6 +591,17 @@
 		if (!disk)
 			goto out;
 		nbd_dev[i].disk = disk;
+		/*
+		 * The new linux 2.5 block layer implementation requires
+		 * every gendisk to have its very own request_queue struct.
+		 * These structs are big so we dynamically allocate them.
+		 */
+		disk->queue = kmalloc(sizeof(struct request_queue), GFP_KERNEL);
+		if (!disk->queue) {
+			put_disk(disk);
+			goto out;
+		}
+		blk_init_queue(disk->queue, do_nbd_request, &nbd_lock);
 	}
 
 	if (register_blkdev(NBD_MAJOR, "nbd")) {
@@ -564,7 +611,6 @@
 #ifdef MODULE
 	printk("nbd: registered device at major %d\n", NBD_MAJOR);
 #endif
-	blk_init_queue(&nbd_queue, do_nbd_request, &nbd_lock);
 	devfs_mk_dir("nbd");
 	for (i = 0; i < MAX_NBD; i++) {
 		struct gendisk *disk = nbd_dev[i].disk;
@@ -582,7 +628,6 @@
 		disk->first_minor = i;
 		disk->fops = &nbd_fops;
 		disk->private_data = &nbd_dev[i];
-		disk->queue = &nbd_queue;
 		sprintf(disk->disk_name, "nbd%d", i);
 		sprintf(disk->devfs_name, "nbd/%d", i);
 		set_capacity(disk, 0x3ffffe);
@@ -591,8 +636,10 @@
 
 	return 0;
 out:
-	while (i--)
+	while (i--) {
+		kfree(nbd_dev[i].disk->queue);
 		put_disk(nbd_dev[i].disk);
+	}
 	return err;
 }
 
@@ -600,12 +647,22 @@
 {
 	int i;
 	for (i = 0; i < MAX_NBD; i++) {
-		del_gendisk(nbd_dev[i].disk);
-		put_disk(nbd_dev[i].disk);
+		struct gendisk *disk = nbd_dev[i].disk;
+		if (disk) {
+			if (disk->queue) {
+				blk_cleanup_queue(disk->queue);
+				kfree(disk->queue);
+				disk->queue = NULL;
+			}
+			del_gendisk(disk);
+			put_disk(disk);
+		}
 	}
 	devfs_remove("nbd");
-	blk_cleanup_queue(&nbd_queue);
 	unregister_blkdev(NBD_MAJOR, "nbd");
+#ifdef MODULE
+	printk("nbd: unregistered device at major %d\n", NBD_MAJOR);
+#endif
 }
 
 module_init(nbd_init);

      reply	other threads:[~2003-06-23  1:13 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-06-22 19:28 [PATCH] nbd driver for 2.5+: fix for 2.5 block layer (improved) Lou Langholtz
2003-06-22 21:44 ` viro
2003-06-23  1:27   ` Lou Langholtz [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3EF65781.50708@aros.net \
    --to=ldl@aros.net \
    --cc=akpm@digeo.com \
    --cc=axboe@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@osdl.org \
    --cc=viro@parcelfarce.linux.theplanet.co.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox