public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 8/11] 2.6.18-mm3 pktcdvd: bio write congestion control
@ 2006-10-03 15:26 Thomas Maier
  2006-10-03 15:29 ` Jens Axboe
  0 siblings, 1 reply; 3+ messages in thread
From: Thomas Maier @ 2006-10-03 15:26 UTC (permalink / raw)
  To: linux-kernel@vger.kernel.org; +Cc: petero2@telia.com, akpm@osdl.org

[-- Attachment #1: Type: text/plain, Size: 297 bytes --]

Hello,

this patch adds the ability to control the size of the drivers
bio write queue.
See the Documentation/* files in the patch for further infos.

http://people.freenet.de/BalaGi/download/pktcdvd-8-wqueue-congestion_2.6.18.patch

Signed-off-by: Thomas Maier<balagi@justmail.de>

-Thomas Maier

[-- Attachment #2: pktcdvd-8-wqueue-congestion_2.6.18.patch --]
[-- Type: application/octet-stream, Size: 11816 bytes --]

diff -urpN 7-procfs-optional/Documentation/ABI/testing/sysfs-class-pktcdvd 8-wqueue-congestion/Documentation/ABI/testing/sysfs-class-pktcdvd
--- 7-procfs-optional/Documentation/ABI/testing/sysfs-class-pktcdvd	2006-10-03 13:17:15.000000000 +0200
+++ 8-wqueue-congestion/Documentation/ABI/testing/sysfs-class-pktcdvd	2006-10-03 15:46:49.000000000 +0200
@@ -38,6 +38,23 @@ these files in the sysfs:
     reset                 (0200) Write any value to it to reset
                                  pktcdvd device statistic values, like
                                  bytes read/written.
+    
+/sys/class/pktcdvd/pktcdvd[0-7]/write_queue/
+    size                  (0444) Contains the size of the bio write
+                                 queue.
+
+    congestion_off        (0644) If bio write queue size is below
+                                 this mark, accept new bio requests
+                                 from the block layer.
+
+    congestion_on         (0644) If bio write queue size is higher
+                                 as this mark, do no longer accept
+                                 bio write requests from the block
+                                 layer and wait till the pktcdvd
+                                 device has processed enough bio's
+                                 so that bio write queue size is
+                                 below congestion off mark.
+
  
 
 Example:
diff -urpN 7-procfs-optional/Documentation/cdrom/packet-writing.txt 8-wqueue-congestion/Documentation/cdrom/packet-writing.txt
--- 7-procfs-optional/Documentation/cdrom/packet-writing.txt	2006-10-03 13:17:15.000000000 +0200
+++ 8-wqueue-congestion/Documentation/cdrom/packet-writing.txt	2006-10-03 15:52:00.000000000 +0200
@@ -125,8 +125,26 @@ To read pktcdvd device infos in human re
 For a description of the debugfs interface look into the file:
 
   Documentation/ABI/testing/debugfs-pktcdvd
-	                 
-  
+
+
+Bio write queue congestion marks
+--------------------------------
+The pktcdvd driver allows to adjust the behaviour of the
+internal bio write queue.
+This can be done with the two write_congestion_[on|off] marks.
+The driver does only accept up to write_congestion_on bio
+write request from the i/o block layer, and waits till the
+requests are processed by the mapped block device and
+the queue size is below the write_congestion_off mark.
+In previous versions of pktcdvd, the driver accepted all
+incoming bio write request. This led sometimes to kernel
+out of memory oops (maybe some bugs in the linux kernel ;)
+CAUTION: use this options only if you know what you do!
+The default settings for the congestion marks should be ok.
+The maximum size of the bio write queue can influence
+driver's cpu load and memory consumption.
+
+
 Links
 -----
 
diff -urpN 7-procfs-optional/drivers/block/pktcdvd.c 8-wqueue-congestion/drivers/block/pktcdvd.c
--- 7-procfs-optional/drivers/block/pktcdvd.c	2006-10-03 13:54:31.000000000 +0200
+++ 8-wqueue-congestion/drivers/block/pktcdvd.c	2006-10-03 13:54:43.000000000 +0200
@@ -88,6 +88,8 @@
 
 static struct pktcdvd_device *pkt_devs[MAX_WRITERS];
 static int pktdev_major = 0; /* default: dynamic major number */
+static int write_congestion_on  = PKT_WRITE_CONGESTION_ON;
+static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
 static struct mutex ctl_mutex;	/* Serialize open/close/setup/teardown */
 static mempool_t *psd_pool;
 
@@ -133,6 +135,23 @@ static struct pktcdvd_device *pkt_find_d
 	return NULL;
 }
 
+static void init_write_congestion_marks(int* lo, int* hi)
+{
+	if (*hi > 0) {
+		*hi = max(*hi, PKT_WRITE_CONGESTION_MIN);
+		*hi = min(*hi, PKT_WRITE_CONGESTION_MAX);
+		if (*lo <= 0)
+			*lo = *hi - PKT_WRITE_CONGESTION_THRESHOLD;
+		else {
+			*lo = min(*lo, *hi - PKT_WRITE_CONGESTION_THRESHOLD);
+			*lo = max(*lo, PKT_WRITE_CONGESTION_MIN/4);
+		}
+	} else {
+		*hi = -1;
+		*lo = -1;
+	}
+}
+
 static void pkt_count_states(struct pktcdvd_device *pd, int *states)
 {
 	struct packet_data *pkt;
@@ -207,6 +226,10 @@ static int pkt_print_info(struct pktcdvd
 	pkt_count_states(pd, states);
 	PRINT("\tstate:\t\t\ti:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
 	      states[0], states[1], states[2], states[3], states[4], states[5]);
+		   
+	PRINT("\twrite congestion marks:\toff=%d on=%d\n",
+			pd->write_congestion_off,
+			pd->write_congestion_on);
 #undef PRINT
 	buf[blen-1] = 0;
 	return n;
@@ -262,6 +285,9 @@ static void pkt_kobj_release(struct kobj
 
 /**********************************************************
   /sys/class/pktcdvd/pktcdvd[0-7]/
+                     write_queue/size
+                     write_queue/congestion_off
+                     write_queue/congestion_on
                      stat/reset
                      stat/packets_started
                      stat/packets_finished
@@ -270,6 +296,17 @@ static void pkt_kobj_release(struct kobj
                      stat/kb_read_gather
  **********************************************************/
 
+DEF_ATTR(kobj_pkt_attr_wq1, "size", 0444);
+DEF_ATTR(kobj_pkt_attr_wq2, "congestion_off", 0644);
+DEF_ATTR(kobj_pkt_attr_wq3, "congestion_on",  0644);
+
+static struct attribute *kobj_pkt_attrs_wqueue[] = {
+	&kobj_pkt_attr_wq1,
+	&kobj_pkt_attr_wq2,
+	&kobj_pkt_attr_wq3,
+	NULL
+};
+
 DEF_ATTR(kobj_pkt_attr_st1, "reset", 0200);
 DEF_ATTR(kobj_pkt_attr_st2, "packets_started", 0444);
 DEF_ATTR(kobj_pkt_attr_st3, "packets_finished", 0444);
@@ -302,6 +339,7 @@ static ssize_t kobj_pkt_show(struct kobj
 {
 	struct pktcdvd_device *pd;
 	int n = 0;
+	int v;
 
 	data[0] = 0;
 	pd = to_pktcdvdkobj(kobj)->pd;
@@ -320,6 +358,24 @@ static ssize_t kobj_pkt_show(struct kobj
 
 	} else if (strcmp(attr->name, "kb_read_gather") == 0) {
 		n = sprintf(data, "%lu\n", pd->stats.secs_rg >> 1);
+	
+	} else if (strcmp(attr->name, "size") == 0) {
+		spin_lock(&pd->lock);
+		v = pd->bio_queue_size;
+		spin_unlock(&pd->lock);
+		n = sprintf(data, "%d\n", v);
+		
+	} else if (strcmp(attr->name, "congestion_off") == 0) {
+		spin_lock(&pd->lock);
+		v = pd->write_congestion_off;
+		spin_unlock(&pd->lock);
+		n = sprintf(data, "%d\n", v);
+
+	} else if (strcmp(attr->name, "congestion_on") == 0) {
+		spin_lock(&pd->lock);
+		v = pd->write_congestion_on;
+		spin_unlock(&pd->lock);
+		n = sprintf(data, "%d\n", v);
 	}
 	return n;
 }
@@ -328,6 +384,7 @@ static ssize_t kobj_pkt_store(struct kob
 			struct attribute *attr,
 			const char *data, size_t len)
 {
+	int val;
 	struct pktcdvd_device *pd;
 	DECLARE_BUF_AS_STRING(dbuf, data, len); /* ensure sscanf scans a string */
 
@@ -338,8 +395,24 @@ static ssize_t kobj_pkt_store(struct kob
 		pd->stats.pkt_ended = 0;
 		pd->stats.secs_w = 0;
 		pd->stats.secs_rg = 0;
-		pd->stats.secs_r = 0;		
+		pd->stats.secs_r = 0;
+
+	} else if (strcmp(attr->name, "congestion_off") == 0
+		   && sscanf(dbuf, "%d", &val) == 1) {
+		spin_lock(&pd->lock);
+		pd->write_congestion_off = val;
+		init_write_congestion_marks(&pd->write_congestion_off,
+					&pd->write_congestion_on);
+		spin_unlock(&pd->lock);
+	} else if (strcmp(attr->name, "congestion_on") == 0
+		   && sscanf(dbuf, "%d", &val) == 1) {
+		spin_lock(&pd->lock);
+		pd->write_congestion_on = val;
+		init_write_congestion_marks(&pd->write_congestion_off,
+					&pd->write_congestion_on);
+		spin_unlock(&pd->lock);
 	}
+
 	return len;
 }
 
@@ -352,6 +425,12 @@ static struct kobj_type kobj_pkt_type_st
 	.sysfs_ops = &kobj_pkt_ops,
 	.default_attrs = kobj_pkt_attrs_stat
 };
+static struct kobj_type kobj_pkt_type_wqueue = {
+	.release = pkt_kobj_release,
+	.sysfs_ops = &kobj_pkt_ops,
+	.default_attrs = kobj_pkt_attrs_wqueue
+};
+
 
 static void pkt_sysfs_dev_new(struct pktcdvd_device *pd)
 {
@@ -366,12 +445,16 @@ static void pkt_sysfs_dev_new(struct pkt
 		pd->kobj_stat = pkt_kobj_create(pd, "stat",
 					&pd->clsdev->kobj,
 					&kobj_pkt_type_stat);
+		pd->kobj_wqueue = pkt_kobj_create(pd, "write_queue",
+					&pd->clsdev->kobj,
+					&kobj_pkt_type_wqueue);
 	}
 }
 
 static void pkt_sysfs_dev_remove(struct pktcdvd_device *pd)
 {
 	pkt_kobj_remove(pd->kobj_stat);
+	pkt_kobj_remove(pd->kobj_wqueue);
 	if (class_pktcdvd)
 		class_device_destroy(class_pktcdvd, pd->pkt_dev);
 }
@@ -1460,6 +1543,7 @@ static int pkt_handle_queue(struct pktcd
 	sector_t zone = 0; /* Suppress gcc warning */
 	struct pkt_rb_node *node, *first_node;
 	struct rb_node *n;
+	int wakeup;
 
 	VPRINTK("handle_queue\n");
 
@@ -1532,7 +1616,14 @@ try_next_bio:
 		pkt->write_size += bio->bi_size / CD_FRAMESIZE;
 		spin_unlock(&pkt->lock);
 	}
+	/* check write congestion marks, and if bio_queue_size is
+	   below, wake up any waiters */
+	wakeup = (pd->write_congestion_on > 0
+	 		&& pd->bio_queue_size <= pd->write_congestion_off
+			&& waitqueue_active(&pd->write_congestion_wqueue));
 	spin_unlock(&pd->lock);
+	if (wakeup)
+		wake_up(&pd->write_congestion_wqueue);
 
 	pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
 	pkt_set_state(pkt, PACKET_WAITING_STATE);
@@ -2724,6 +2815,32 @@ static int pkt_make_request(request_queu
 	spin_unlock(&pd->cdrw.active_list_lock);
 
 	/*
+	 * Test if there is enough room left in the bio work queue
+	 * (queue size >= congestion on mark).
+	 * If not, wait till the work queue size is below the congestion off mark.
+	 * This is similar to the get_request_wait() call made in the block
+	 * layer function __make_request() used for normal block i/o request
+	 * handling.
+	 */
+	spin_lock(&pd->lock);
+	if (pd->write_congestion_on > 0
+	     && pd->bio_queue_size >= pd->write_congestion_on) {
+		DEFINE_WAIT(wait);
+		/* wait till number of bio requests is low enough */
+		do {
+			spin_unlock(&pd->lock);
+			prepare_to_wait_exclusive(&pd->write_congestion_wqueue,
+					&wait, TASK_UNINTERRUPTIBLE);
+			io_schedule();
+			/* if we are here, bio_queue_size should be below
+			   congestion_off, but be sure and do a test */
+			spin_lock(&pd->lock);
+		} while(pd->bio_queue_size > pd->write_congestion_off);
+		finish_wait(&pd->write_congestion_wqueue, &wait);
+	}
+	spin_unlock(&pd->lock);
+
+	/*
 	 * No matching packet found. Store the bio in the work queue.
 	 */
 	node = mempool_alloc(pd->rb_pool, GFP_NOIO);
@@ -2951,6 +3068,10 @@ static int pkt_setup_dev(dev_t dev, dev_
 	init_waitqueue_head(&pd->wqueue);
 	pd->bio_queue = RB_ROOT;
 
+	init_waitqueue_head(&pd->write_congestion_wqueue);
+	pd->write_congestion_on  = write_congestion_on;
+	pd->write_congestion_off = write_congestion_off;
+
 	disk = alloc_disk(1);
 	if (!disk)
 		goto out_mem;
@@ -3057,6 +3178,9 @@ static int __init pkt_init(void)
 {
 	int ret;
 
+	init_write_congestion_marks(&write_congestion_off,
+				&write_congestion_on);
+
 	mutex_init(&ctl_mutex);
 
 	psd_pool = mempool_create_kmalloc_pool(PSD_POOL_SIZE,
diff -urpN 7-procfs-optional/include/linux/pktcdvd.h 8-wqueue-congestion/include/linux/pktcdvd.h
--- 7-procfs-optional/include/linux/pktcdvd.h	2006-10-03 13:09:18.000000000 +0200
+++ 8-wqueue-congestion/include/linux/pktcdvd.h	2006-10-03 13:37:01.000000000 +0200
@@ -123,6 +123,14 @@ struct pkt_ctrl_command {
 #endif
 
 
+/* default bio write queue congestion marks */
+#define PKT_WRITE_CONGESTION_ON 5000
+#define PKT_WRITE_CONGESTION_OFF 4500
+#define PKT_WRITE_CONGESTION_MAX (1024*1024)
+#define PKT_WRITE_CONGESTION_MIN 100
+#define PKT_WRITE_CONGESTION_THRESHOLD 25
+
+
 struct packet_settings
 {
 	__u32			size;		/* packet size in (512 byte) sectors */
@@ -291,7 +299,11 @@ struct pktcdvd_device
 
 	struct packet_iosched   iosched;
 	struct gendisk		*disk;
-	
+		
+	wait_queue_head_t	write_congestion_wqueue;
+	int			write_congestion_off;
+	int			write_congestion_on;
+
 	struct class_device	*clsdev;	/* sysfs pktcdvd[0-7] class dev */ 
 	struct pktcdvd_kobj	*kobj_stat;	/* sysfs pktcdvd[0-7]/stat/     */
 	struct pktcdvd_kobj	*kobj_wqueue;	/* sysfs pktcdvd[0-7]/write_queue/ */

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2006-10-09 10:03 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-03 15:26 [PATCH 8/11] 2.6.18-mm3 pktcdvd: bio write congestion control Thomas Maier
2006-10-03 15:29 ` Jens Axboe
2006-10-09 10:05   ` Thomas Maier

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox