From: "Thomas Maier" <balagi@justmail.de>
To: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Cc: "petero2@telia.com" <petero2@telia.com>, "akpm@osdl.org" <akpm@osdl.org>
Subject: [PATCH 8/11] 2.6.18-mm3 pktcdvd: bio write congestion control
Date: Tue, 03 Oct 2006 17:26:27 +0200 [thread overview]
Message-ID: <op.tguqidxpiudtyh@master> (raw)
[-- Attachment #1: Type: text/plain, Size: 297 bytes --]
Hello,
this patch adds the ability to control the size of the drivers
bio write queue.
See the Documentation/* files in the patch for further infos.
http://people.freenet.de/BalaGi/download/pktcdvd-8-wqueue-congestion_2.6.18.patch
Signed-off-by: Thomas Maier<balagi@justmail.de>
-Thomas Maier
[-- Attachment #2: pktcdvd-8-wqueue-congestion_2.6.18.patch --]
[-- Type: application/octet-stream, Size: 11816 bytes --]
diff -urpN 7-procfs-optional/Documentation/ABI/testing/sysfs-class-pktcdvd 8-wqueue-congestion/Documentation/ABI/testing/sysfs-class-pktcdvd
--- 7-procfs-optional/Documentation/ABI/testing/sysfs-class-pktcdvd 2006-10-03 13:17:15.000000000 +0200
+++ 8-wqueue-congestion/Documentation/ABI/testing/sysfs-class-pktcdvd 2006-10-03 15:46:49.000000000 +0200
@@ -38,6 +38,23 @@ these files in the sysfs:
reset (0200) Write any value to it to reset
pktcdvd device statistic values, like
bytes read/written.
+
+/sys/class/pktcdvd/pktcdvd[0-7]/write_queue/
+ size (0444) Contains the size of the bio write
+ queue.
+
+ congestion_off (0644) If bio write queue size is below
+ this mark, accept new bio requests
+ from the block layer.
+
+ congestion_on (0644) If bio write queue size is higher
+ as this mark, do no longer accept
+ bio write requests from the block
+ layer and wait till the pktcdvd
+ device has processed enough bio's
+ so that bio write queue size is
+ below congestion off mark.
+
Example:
diff -urpN 7-procfs-optional/Documentation/cdrom/packet-writing.txt 8-wqueue-congestion/Documentation/cdrom/packet-writing.txt
--- 7-procfs-optional/Documentation/cdrom/packet-writing.txt 2006-10-03 13:17:15.000000000 +0200
+++ 8-wqueue-congestion/Documentation/cdrom/packet-writing.txt 2006-10-03 15:52:00.000000000 +0200
@@ -125,8 +125,26 @@ To read pktcdvd device infos in human re
For a description of the debugfs interface look into the file:
Documentation/ABI/testing/debugfs-pktcdvd
-
-
+
+
+Bio write queue congestion marks
+--------------------------------
+The pktcdvd driver allows to adjust the behaviour of the
+internal bio write queue.
+This can be done with the two write_congestion_[on|off] marks.
+The driver does only accept up to write_congestion_on bio
+write request from the i/o block layer, and waits till the
+requests are processed by the mapped block device and
+the queue size is below the write_congestion_off mark.
+In previous versions of pktcdvd, the driver accepted all
+incoming bio write request. This led sometimes to kernel
+out of memory oops (maybe some bugs in the linux kernel ;)
+CAUTION: use this options only if you know what you do!
+The default settings for the congestion marks should be ok.
+The maximum size of the bio write queue can influence
+driver's cpu load and memory consumption.
+
+
Links
-----
diff -urpN 7-procfs-optional/drivers/block/pktcdvd.c 8-wqueue-congestion/drivers/block/pktcdvd.c
--- 7-procfs-optional/drivers/block/pktcdvd.c 2006-10-03 13:54:31.000000000 +0200
+++ 8-wqueue-congestion/drivers/block/pktcdvd.c 2006-10-03 13:54:43.000000000 +0200
@@ -88,6 +88,8 @@
static struct pktcdvd_device *pkt_devs[MAX_WRITERS];
static int pktdev_major = 0; /* default: dynamic major number */
+static int write_congestion_on = PKT_WRITE_CONGESTION_ON;
+static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */
static mempool_t *psd_pool;
@@ -133,6 +135,23 @@ static struct pktcdvd_device *pkt_find_d
return NULL;
}
+static void init_write_congestion_marks(int* lo, int* hi)
+{
+ if (*hi > 0) {
+ *hi = max(*hi, PKT_WRITE_CONGESTION_MIN);
+ *hi = min(*hi, PKT_WRITE_CONGESTION_MAX);
+ if (*lo <= 0)
+ *lo = *hi - PKT_WRITE_CONGESTION_THRESHOLD;
+ else {
+ *lo = min(*lo, *hi - PKT_WRITE_CONGESTION_THRESHOLD);
+ *lo = max(*lo, PKT_WRITE_CONGESTION_MIN/4);
+ }
+ } else {
+ *hi = -1;
+ *lo = -1;
+ }
+}
+
static void pkt_count_states(struct pktcdvd_device *pd, int *states)
{
struct packet_data *pkt;
@@ -207,6 +226,10 @@ static int pkt_print_info(struct pktcdvd
pkt_count_states(pd, states);
PRINT("\tstate:\t\t\ti:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
states[0], states[1], states[2], states[3], states[4], states[5]);
+
+ PRINT("\twrite congestion marks:\toff=%d on=%d\n",
+ pd->write_congestion_off,
+ pd->write_congestion_on);
#undef PRINT
buf[blen-1] = 0;
return n;
@@ -262,6 +285,9 @@ static void pkt_kobj_release(struct kobj
/**********************************************************
/sys/class/pktcdvd/pktcdvd[0-7]/
+ write_queue/size
+ write_queue/congestion_off
+ write_queue/congestion_on
stat/reset
stat/packets_started
stat/packets_finished
@@ -270,6 +296,17 @@ static void pkt_kobj_release(struct kobj
stat/kb_read_gather
**********************************************************/
+DEF_ATTR(kobj_pkt_attr_wq1, "size", 0444);
+DEF_ATTR(kobj_pkt_attr_wq2, "congestion_off", 0644);
+DEF_ATTR(kobj_pkt_attr_wq3, "congestion_on", 0644);
+
+static struct attribute *kobj_pkt_attrs_wqueue[] = {
+ &kobj_pkt_attr_wq1,
+ &kobj_pkt_attr_wq2,
+ &kobj_pkt_attr_wq3,
+ NULL
+};
+
DEF_ATTR(kobj_pkt_attr_st1, "reset", 0200);
DEF_ATTR(kobj_pkt_attr_st2, "packets_started", 0444);
DEF_ATTR(kobj_pkt_attr_st3, "packets_finished", 0444);
@@ -302,6 +339,7 @@ static ssize_t kobj_pkt_show(struct kobj
{
struct pktcdvd_device *pd;
int n = 0;
+ int v;
data[0] = 0;
pd = to_pktcdvdkobj(kobj)->pd;
@@ -320,6 +358,24 @@ static ssize_t kobj_pkt_show(struct kobj
} else if (strcmp(attr->name, "kb_read_gather") == 0) {
n = sprintf(data, "%lu\n", pd->stats.secs_rg >> 1);
+
+ } else if (strcmp(attr->name, "size") == 0) {
+ spin_lock(&pd->lock);
+ v = pd->bio_queue_size;
+ spin_unlock(&pd->lock);
+ n = sprintf(data, "%d\n", v);
+
+ } else if (strcmp(attr->name, "congestion_off") == 0) {
+ spin_lock(&pd->lock);
+ v = pd->write_congestion_off;
+ spin_unlock(&pd->lock);
+ n = sprintf(data, "%d\n", v);
+
+ } else if (strcmp(attr->name, "congestion_on") == 0) {
+ spin_lock(&pd->lock);
+ v = pd->write_congestion_on;
+ spin_unlock(&pd->lock);
+ n = sprintf(data, "%d\n", v);
}
return n;
}
@@ -328,6 +384,7 @@ static ssize_t kobj_pkt_store(struct kob
struct attribute *attr,
const char *data, size_t len)
{
+ int val;
struct pktcdvd_device *pd;
DECLARE_BUF_AS_STRING(dbuf, data, len); /* ensure sscanf scans a string */
@@ -338,8 +395,24 @@ static ssize_t kobj_pkt_store(struct kob
pd->stats.pkt_ended = 0;
pd->stats.secs_w = 0;
pd->stats.secs_rg = 0;
- pd->stats.secs_r = 0;
+ pd->stats.secs_r = 0;
+
+ } else if (strcmp(attr->name, "congestion_off") == 0
+ && sscanf(dbuf, "%d", &val) == 1) {
+ spin_lock(&pd->lock);
+ pd->write_congestion_off = val;
+ init_write_congestion_marks(&pd->write_congestion_off,
+ &pd->write_congestion_on);
+ spin_unlock(&pd->lock);
+ } else if (strcmp(attr->name, "congestion_on") == 0
+ && sscanf(dbuf, "%d", &val) == 1) {
+ spin_lock(&pd->lock);
+ pd->write_congestion_on = val;
+ init_write_congestion_marks(&pd->write_congestion_off,
+ &pd->write_congestion_on);
+ spin_unlock(&pd->lock);
}
+
return len;
}
@@ -352,6 +425,12 @@ static struct kobj_type kobj_pkt_type_st
.sysfs_ops = &kobj_pkt_ops,
.default_attrs = kobj_pkt_attrs_stat
};
+static struct kobj_type kobj_pkt_type_wqueue = {
+ .release = pkt_kobj_release,
+ .sysfs_ops = &kobj_pkt_ops,
+ .default_attrs = kobj_pkt_attrs_wqueue
+};
+
static void pkt_sysfs_dev_new(struct pktcdvd_device *pd)
{
@@ -366,12 +445,16 @@ static void pkt_sysfs_dev_new(struct pkt
pd->kobj_stat = pkt_kobj_create(pd, "stat",
&pd->clsdev->kobj,
&kobj_pkt_type_stat);
+ pd->kobj_wqueue = pkt_kobj_create(pd, "write_queue",
+ &pd->clsdev->kobj,
+ &kobj_pkt_type_wqueue);
}
}
static void pkt_sysfs_dev_remove(struct pktcdvd_device *pd)
{
pkt_kobj_remove(pd->kobj_stat);
+ pkt_kobj_remove(pd->kobj_wqueue);
if (class_pktcdvd)
class_device_destroy(class_pktcdvd, pd->pkt_dev);
}
@@ -1460,6 +1543,7 @@ static int pkt_handle_queue(struct pktcd
sector_t zone = 0; /* Suppress gcc warning */
struct pkt_rb_node *node, *first_node;
struct rb_node *n;
+ int wakeup;
VPRINTK("handle_queue\n");
@@ -1532,7 +1616,14 @@ try_next_bio:
pkt->write_size += bio->bi_size / CD_FRAMESIZE;
spin_unlock(&pkt->lock);
}
+ /* check write congestion marks, and if bio_queue_size is
+ below, wake up any waiters */
+ wakeup = (pd->write_congestion_on > 0
+ && pd->bio_queue_size <= pd->write_congestion_off
+ && waitqueue_active(&pd->write_congestion_wqueue));
spin_unlock(&pd->lock);
+ if (wakeup)
+ wake_up(&pd->write_congestion_wqueue);
pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
pkt_set_state(pkt, PACKET_WAITING_STATE);
@@ -2724,6 +2815,32 @@ static int pkt_make_request(request_queu
spin_unlock(&pd->cdrw.active_list_lock);
/*
+ * Test if there is enough room left in the bio work queue
+ * (queue size >= congestion on mark).
+ * If not, wait till the work queue size is below the congestion off mark.
+ * This is similar to the get_request_wait() call made in the block
+ * layer function __make_request() used for normal block i/o request
+ * handling.
+ */
+ spin_lock(&pd->lock);
+ if (pd->write_congestion_on > 0
+ && pd->bio_queue_size >= pd->write_congestion_on) {
+ DEFINE_WAIT(wait);
+ /* wait till number of bio requests is low enough */
+ do {
+ spin_unlock(&pd->lock);
+ prepare_to_wait_exclusive(&pd->write_congestion_wqueue,
+ &wait, TASK_UNINTERRUPTIBLE);
+ io_schedule();
+ /* if we are here, bio_queue_size should be below
+ congestion_off, but be sure and do a test */
+ spin_lock(&pd->lock);
+ } while(pd->bio_queue_size > pd->write_congestion_off);
+ finish_wait(&pd->write_congestion_wqueue, &wait);
+ }
+ spin_unlock(&pd->lock);
+
+ /*
* No matching packet found. Store the bio in the work queue.
*/
node = mempool_alloc(pd->rb_pool, GFP_NOIO);
@@ -2951,6 +3068,10 @@ static int pkt_setup_dev(dev_t dev, dev_
init_waitqueue_head(&pd->wqueue);
pd->bio_queue = RB_ROOT;
+ init_waitqueue_head(&pd->write_congestion_wqueue);
+ pd->write_congestion_on = write_congestion_on;
+ pd->write_congestion_off = write_congestion_off;
+
disk = alloc_disk(1);
if (!disk)
goto out_mem;
@@ -3057,6 +3178,9 @@ static int __init pkt_init(void)
{
int ret;
+ init_write_congestion_marks(&write_congestion_off,
+ &write_congestion_on);
+
mutex_init(&ctl_mutex);
psd_pool = mempool_create_kmalloc_pool(PSD_POOL_SIZE,
diff -urpN 7-procfs-optional/include/linux/pktcdvd.h 8-wqueue-congestion/include/linux/pktcdvd.h
--- 7-procfs-optional/include/linux/pktcdvd.h 2006-10-03 13:09:18.000000000 +0200
+++ 8-wqueue-congestion/include/linux/pktcdvd.h 2006-10-03 13:37:01.000000000 +0200
@@ -123,6 +123,14 @@ struct pkt_ctrl_command {
#endif
+/* default bio write queue congestion marks */
+#define PKT_WRITE_CONGESTION_ON 5000
+#define PKT_WRITE_CONGESTION_OFF 4500
+#define PKT_WRITE_CONGESTION_MAX (1024*1024)
+#define PKT_WRITE_CONGESTION_MIN 100
+#define PKT_WRITE_CONGESTION_THRESHOLD 25
+
+
struct packet_settings
{
__u32 size; /* packet size in (512 byte) sectors */
@@ -291,7 +299,11 @@ struct pktcdvd_device
struct packet_iosched iosched;
struct gendisk *disk;
-
+
+ wait_queue_head_t write_congestion_wqueue;
+ int write_congestion_off;
+ int write_congestion_on;
+
struct class_device *clsdev; /* sysfs pktcdvd[0-7] class dev */
struct pktcdvd_kobj *kobj_stat; /* sysfs pktcdvd[0-7]/stat/ */
struct pktcdvd_kobj *kobj_wqueue; /* sysfs pktcdvd[0-7]/write_queue/ */
next reply other threads:[~2006-10-03 15:25 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-10-03 15:26 Thomas Maier [this message]
2006-10-03 15:29 ` [PATCH 8/11] 2.6.18-mm3 pktcdvd: bio write congestion control Jens Axboe
2006-10-09 10:05 ` Thomas Maier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=op.tguqidxpiudtyh@master \
--to=balagi@justmail.de \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
--cc=petero2@telia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.