* [PATCHSET] 9p: clean up a bit and use single poller for trans_fd
@ 2008-08-26 8:50 Tejun Heo
2008-08-26 8:50 ` [PATCH 1/6] 9p: implement proper trans module refcounting and unregistration Tejun Heo
` (5 more replies)
0 siblings, 6 replies; 7+ messages in thread
From: Tejun Heo @ 2008-08-26 8:50 UTC (permalink / raw)
To: ericvh, rminnich, v9fs-developer, linux-kernel
This patchset cleans up 9p and 9p-trans_fd a bit and convert trans_fd
to use single poller instead of poll of pollers and contains the
following six patches.
0001-9p-implement-proper-trans-module-refcounting-and-un.patch
0002-9p-trans_fd-fix-trans_fd-p9_conn_destroy.patch
0003-9p-trans_fd-clean-up-p9_conn_create.patch
0004-9p-trans_fd-don-t-do-fs-segment-mangling-in-p9_fd_p.patch
0005-9p-trans_fd-fix-and-clean-up-module-init-exit-paths.patch
0006-9p-trans_fd-use-single-poller.patch
0001 fixes trans module registration and unregistration. 0002-0005
fix a few bugs in and clean up trans_fd. 0006 converts trans_fd to
use single poller instead of pool of pollers.
Although this patchset fixes a few problem cases but there still are
other synchronization issues in trans_fd. Most notably, a request
which is being flushed can be freed before r/w works are done with
them. What's necessary is probably flushing r/w works before actually
destroying the request from flush. Well, I guess that's for another
day.
This patchset is on top of the current linus#master (399d7f6b) and the
combined diffstat follows.
include/net/9p/9p.h | 1
include/net/9p/transport.h | 9 -
net/9p/client.c | 10 +
net/9p/mod.c | 92 ++++++++----
net/9p/trans_fd.c | 340 ++++++++++++++-------------------------------
net/9p/trans_virtio.c | 2
6 files changed, 198 insertions(+), 256 deletions(-)
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/6] 9p: implement proper trans module refcounting and unregistration
2008-08-26 8:50 [PATCHSET] 9p: clean up a bit and use single poller for trans_fd Tejun Heo
@ 2008-08-26 8:50 ` Tejun Heo
2008-08-26 8:50 ` [PATCH 2/6] 9p-trans_fd: fix trans_fd::p9_conn_destroy() Tejun Heo
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Tejun Heo @ 2008-08-26 8:50 UTC (permalink / raw)
To: ericvh, rminnich, v9fs-developer, linux-kernel; +Cc: Tejun Heo
9p trans modules aren't refcounted nor were they unregistered
properly. Fix it.
* Add 9p_trans_module->owner and reference the module on each trans
instance creation and put it on destruction.
* Protect v9fs_trans_list with a spinlock. This isn't strictly
necessary as the list is manipulated only during module loading /
unloading but it's a good idea to make the API safe.
* Unregister trans modules when the corresponding module is being
unloaded.
* While at it, kill unnecessary EXPORT_SYMBOL on p9_trans_fd_init().
Signed-off-by: Tejun Heo <tj@kernel.org>
---
include/net/9p/9p.h | 1 +
include/net/9p/transport.h | 9 +++-
net/9p/client.c | 10 ++++-
net/9p/mod.c | 92 ++++++++++++++++++++++++++++++++------------
net/9p/trans_fd.c | 11 +++++-
net/9p/trans_virtio.c | 2 +
6 files changed, 95 insertions(+), 30 deletions(-)
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index b3d3e27..c3626c0 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -596,4 +596,5 @@ int p9_idpool_check(int id, struct p9_idpool *p);
int p9_error_init(void);
int p9_errstr2errno(char *, int);
int p9_trans_fd_init(void);
+void p9_trans_fd_exit(void);
#endif /* NET_9P_H */
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 0db3a40..3ca7371 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -26,6 +26,8 @@
#ifndef NET_9P_TRANSPORT_H
#define NET_9P_TRANSPORT_H
+#include <linux/module.h>
+
/**
* enum p9_trans_status - different states of underlying transports
* @Connected: transport is connected and healthy
@@ -91,9 +93,12 @@ struct p9_trans_module {
int maxsize; /* max message size of transport */
int def; /* this transport should be default */
struct p9_trans * (*create)(const char *, char *, int, unsigned char);
+ struct module *owner;
};
void v9fs_register_trans(struct p9_trans_module *m);
-struct p9_trans_module *v9fs_match_trans(const substring_t *name);
-struct p9_trans_module *v9fs_default_trans(void);
+void v9fs_unregister_trans(struct p9_trans_module *m);
+struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name);
+struct p9_trans_module *v9fs_get_default_trans(void);
+void v9fs_put_trans(struct p9_trans_module *m);
#endif /* NET_9P_TRANSPORT_H */
diff --git a/net/9p/client.c b/net/9p/client.c
index 2ffe40c..10e3203 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -75,7 +75,6 @@ static int parse_opts(char *opts, struct p9_client *clnt)
int option;
int ret = 0;
- clnt->trans_mod = v9fs_default_trans();
clnt->dotu = 1;
clnt->msize = 8192;
@@ -108,7 +107,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
clnt->msize = option;
break;
case Opt_trans:
- clnt->trans_mod = v9fs_match_trans(&args[0]);
+ clnt->trans_mod = v9fs_get_trans_by_name(&args[0]);
break;
case Opt_legacy:
clnt->dotu = 0;
@@ -117,6 +116,10 @@ static int parse_opts(char *opts, struct p9_client *clnt)
continue;
}
}
+
+ if (!clnt->trans_mod)
+ clnt->trans_mod = v9fs_get_default_trans();
+
kfree(options);
return ret;
}
@@ -150,6 +153,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
if (!clnt)
return ERR_PTR(-ENOMEM);
+ clnt->trans_mod = NULL;
clnt->trans = NULL;
spin_lock_init(&clnt->lock);
INIT_LIST_HEAD(&clnt->fidlist);
@@ -235,6 +239,8 @@ void p9_client_destroy(struct p9_client *clnt)
clnt->trans = NULL;
}
+ v9fs_put_trans(clnt->trans_mod);
+
list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist)
p9_fid_destroy(fid);
diff --git a/net/9p/mod.c b/net/9p/mod.c
index bdee1fb..1084feb 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -31,6 +31,7 @@
#include <linux/parser.h>
#include <net/9p/transport.h>
#include <linux/list.h>
+#include <linux/spinlock.h>
#ifdef CONFIG_NET_9P_DEBUG
unsigned int p9_debug_level = 0; /* feature-rific global debug level */
@@ -44,8 +45,8 @@ MODULE_PARM_DESC(debug, "9P debugging level");
*
*/
+static DEFINE_SPINLOCK(v9fs_trans_lock);
static LIST_HEAD(v9fs_trans_list);
-static struct p9_trans_module *v9fs_default_transport;
/**
* v9fs_register_trans - register a new transport with 9p
@@ -54,48 +55,87 @@ static struct p9_trans_module *v9fs_default_transport;
*/
void v9fs_register_trans(struct p9_trans_module *m)
{
+ spin_lock(&v9fs_trans_lock);
list_add_tail(&m->list, &v9fs_trans_list);
- if (m->def)
- v9fs_default_transport = m;
+ spin_unlock(&v9fs_trans_lock);
}
EXPORT_SYMBOL(v9fs_register_trans);
/**
- * v9fs_match_trans - match transport versus registered transports
+ * v9fs_unregister_trans - unregister a 9p transport
+ * @m: the transport to remove
+ *
+ */
+void v9fs_unregister_trans(struct p9_trans_module *m)
+{
+ spin_lock(&v9fs_trans_lock);
+ list_del_init(&m->list);
+ spin_unlock(&v9fs_trans_lock);
+}
+EXPORT_SYMBOL(v9fs_unregister_trans);
+
+/**
+ * v9fs_get_trans_by_name - get transport with the matching name
* @name: string identifying transport
*
*/
-struct p9_trans_module *v9fs_match_trans(const substring_t *name)
+struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name)
{
- struct list_head *p;
- struct p9_trans_module *t = NULL;
-
- list_for_each(p, &v9fs_trans_list) {
- t = list_entry(p, struct p9_trans_module, list);
- if (strncmp(t->name, name->from, name->to-name->from) == 0)
- return t;
- }
- return NULL;
+ struct p9_trans_module *t, *found = NULL;
+
+ spin_lock(&v9fs_trans_lock);
+
+ list_for_each_entry(t, &v9fs_trans_list, list)
+ if (strncmp(t->name, name->from, name->to-name->from) == 0 &&
+ try_module_get(t->owner)) {
+ found = t;
+ break;
+ }
+
+ spin_unlock(&v9fs_trans_lock);
+ return found;
}
-EXPORT_SYMBOL(v9fs_match_trans);
+EXPORT_SYMBOL(v9fs_get_trans_by_name);
/**
- * v9fs_default_trans - returns pointer to default transport
+ * v9fs_get_default_trans - get the default transport
*
*/
-struct p9_trans_module *v9fs_default_trans(void)
+struct p9_trans_module *v9fs_get_default_trans(void)
{
- if (v9fs_default_transport)
- return v9fs_default_transport;
- else if (!list_empty(&v9fs_trans_list))
- return list_first_entry(&v9fs_trans_list,
- struct p9_trans_module, list);
- else
- return NULL;
+ struct p9_trans_module *t, *found = NULL;
+
+ spin_lock(&v9fs_trans_lock);
+
+ list_for_each_entry(t, &v9fs_trans_list, list)
+ if (t->def && try_module_get(t->owner)) {
+ found = t;
+ break;
+ }
+
+ if (!found)
+ list_for_each_entry(t, &v9fs_trans_list, list)
+ if (try_module_get(t->owner)) {
+ found = t;
+ break;
+ }
+
+ spin_unlock(&v9fs_trans_lock);
+ return found;
}
-EXPORT_SYMBOL(v9fs_default_trans);
+EXPORT_SYMBOL(v9fs_get_default_trans);
+/**
+ * v9fs_put_trans - put trans
+ * @m: transport to put
+ *
+ */
+void v9fs_put_trans(struct p9_trans_module *m)
+{
+ if (m)
+ module_put(m->owner);
+}
/**
* v9fs_init - Initialize module
@@ -120,6 +160,8 @@ static int __init init_p9(void)
static void __exit exit_p9(void)
{
printk(KERN_INFO "Unloading 9P2000 support\n");
+
+ p9_trans_fd_exit();
}
module_init(init_p9)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index cdf137a..6a32ffd 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1629,6 +1629,7 @@ static struct p9_trans_module p9_tcp_trans = {
.maxsize = MAX_SOCK_BUF,
.def = 1,
.create = p9_trans_create_tcp,
+ .owner = THIS_MODULE,
};
static struct p9_trans_module p9_unix_trans = {
@@ -1636,6 +1637,7 @@ static struct p9_trans_module p9_unix_trans = {
.maxsize = MAX_SOCK_BUF,
.def = 0,
.create = p9_trans_create_unix,
+ .owner = THIS_MODULE,
};
static struct p9_trans_module p9_fd_trans = {
@@ -1643,6 +1645,7 @@ static struct p9_trans_module p9_fd_trans = {
.maxsize = MAX_SOCK_BUF,
.def = 0,
.create = p9_trans_create_fd,
+ .owner = THIS_MODULE,
};
int p9_trans_fd_init(void)
@@ -1659,4 +1662,10 @@ int p9_trans_fd_init(void)
return 0;
}
-EXPORT_SYMBOL(p9_trans_fd_init);
+
+void p9_trans_fd_exit(void)
+{
+ v9fs_unregister_trans(&p9_tcp_trans);
+ v9fs_unregister_trans(&p9_unix_trans);
+ v9fs_unregister_trans(&p9_fd_trans);
+}
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 42adc05..94912e0 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -528,6 +528,7 @@ static struct p9_trans_module p9_virtio_trans = {
.create = p9_virtio_create,
.maxsize = PAGE_SIZE*16,
.def = 0,
+ .owner = THIS_MODULE,
};
/* The standard init function */
@@ -545,6 +546,7 @@ static int __init p9_virtio_init(void)
static void __exit p9_virtio_cleanup(void)
{
unregister_virtio_driver(&p9_virtio_drv);
+ v9fs_unregister_trans(&p9_virtio_trans);
}
module_init(p9_virtio_init);
--
1.5.4.5
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/6] 9p-trans_fd: fix trans_fd::p9_conn_destroy()
2008-08-26 8:50 [PATCHSET] 9p: clean up a bit and use single poller for trans_fd Tejun Heo
2008-08-26 8:50 ` [PATCH 1/6] 9p: implement proper trans module refcounting and unregistration Tejun Heo
@ 2008-08-26 8:50 ` Tejun Heo
2008-08-26 8:50 ` [PATCH 3/6] 9p-trans_fd: clean up p9_conn_create() Tejun Heo
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Tejun Heo @ 2008-08-26 8:50 UTC (permalink / raw)
To: ericvh, rminnich, v9fs-developer, linux-kernel; +Cc: Tejun Heo
p9_conn_destroy() first kills all current requests by calling
p9_conn_cancel(), then waits for the request list to be cleared by
waiting on p9_conn->equeue. After that, polling is stopped and the
trans is destroyed. This sequence has a few problems.
* Read and write works were never cancelled and the p9_conn can be
destroyed while the works are running as r/w works remove requests
from the list and dereference the p9_conn from them.
* The list emptiness wait using p9_conn->equeue wouldn't trigger
because p9_conn_cancel() always clears all the lists and the only
way the wait can be triggered is to have another task to issue a
request between the slim window between p9_conn_cancel() and the
wait, which isn't safe under the current implementation with or
without the wait.
This patch fixes the problem by first stopping poll, which can
schedule r/w works, first and cancle r/w works which guarantees that
r/w works are not and will not run from that point and then calling
p9_conn_cancel() and do the rest of destruction.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
net/9p/trans_fd.c | 24 +++++-------------------
1 files changed, 5 insertions(+), 19 deletions(-)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 6a32ffd..ee0d151 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -151,7 +151,6 @@ struct p9_mux_poll_task {
* @trans: reference to transport instance for this connection
* @tagpool: id accounting for transactions
* @err: error state
- * @equeue: event wait_q (?)
* @req_list: accounting for requests which have been sent
* @unsent_req_list: accounting for requests that haven't been sent
* @rcall: current response &p9_fcall structure
@@ -178,7 +177,6 @@ struct p9_conn {
struct p9_trans *trans;
struct p9_idpool *tagpool;
int err;
- wait_queue_head_t equeue;
struct list_head req_list;
struct list_head unsent_req_list;
struct p9_fcall *rcall;
@@ -430,7 +428,6 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
}
m->err = 0;
- init_waitqueue_head(&m->equeue);
INIT_LIST_HEAD(&m->req_list);
INIT_LIST_HEAD(&m->unsent_req_list);
m->rcall = NULL;
@@ -483,18 +480,13 @@ static void p9_conn_destroy(struct p9_conn *m)
{
P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m,
m->mux_list.prev, m->mux_list.next);
- p9_conn_cancel(m, -ECONNRESET);
-
- if (!list_empty(&m->req_list)) {
- /* wait until all processes waiting on this session exit */
- P9_DPRINTK(P9_DEBUG_MUX,
- "mux %p waiting for empty request queue\n", m);
- wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000);
- P9_DPRINTK(P9_DEBUG_MUX, "mux %p request queue empty: %d\n", m,
- list_empty(&m->req_list));
- }
p9_mux_poll_stop(m);
+ cancel_work_sync(&m->rq);
+ cancel_work_sync(&m->wq);
+
+ p9_conn_cancel(m, -ECONNRESET);
+
m->trans = NULL;
p9_idpool_destroy(m->tagpool);
kfree(m);
@@ -840,8 +832,6 @@ static void p9_read_work(struct work_struct *work)
(*req->cb) (req, req->cba);
else
kfree(req->rcall);
-
- wake_up(&m->equeue);
}
} else {
if (err >= 0 && rcall->id != P9_RFLUSH)
@@ -984,8 +974,6 @@ static void p9_mux_flush_cb(struct p9_req *freq, void *a)
(*req->cb) (req, req->cba);
else
kfree(req->rcall);
-
- wake_up(&m->equeue);
}
kfree(freq->tcall);
@@ -1191,8 +1179,6 @@ void p9_conn_cancel(struct p9_conn *m, int err)
else
kfree(req->rcall);
}
-
- wake_up(&m->equeue);
}
/**
--
1.5.4.5
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/6] 9p-trans_fd: clean up p9_conn_create()
2008-08-26 8:50 [PATCHSET] 9p: clean up a bit and use single poller for trans_fd Tejun Heo
2008-08-26 8:50 ` [PATCH 1/6] 9p: implement proper trans module refcounting and unregistration Tejun Heo
2008-08-26 8:50 ` [PATCH 2/6] 9p-trans_fd: fix trans_fd::p9_conn_destroy() Tejun Heo
@ 2008-08-26 8:50 ` Tejun Heo
2008-08-26 8:50 ` [PATCH 4/6] 9p-trans_fd: don't do fs segment mangling in p9_fd_poll() Tejun Heo
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Tejun Heo @ 2008-08-26 8:50 UTC (permalink / raw)
To: ericvh, rminnich, v9fs-developer, linux-kernel; +Cc: Tejun Heo
* Use kzalloc() to allocate p9_conn and remove 0/NULL initializations.
* Clean up error return paths.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
net/9p/trans_fd.c | 20 ++++----------------
1 files changed, 4 insertions(+), 16 deletions(-)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index ee0d151..6c88e89 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -407,11 +407,11 @@ static void p9_mux_poll_stop(struct p9_conn *m)
static struct p9_conn *p9_conn_create(struct p9_trans *trans)
{
int i, n;
- struct p9_conn *m, *mtmp;
+ struct p9_conn *m;
P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans,
trans->msize);
- m = kmalloc(sizeof(struct p9_conn), GFP_KERNEL);
+ m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL);
if (!m)
return ERR_PTR(-ENOMEM);
@@ -422,24 +422,14 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
m->trans = trans;
m->tagpool = p9_idpool_create();
if (IS_ERR(m->tagpool)) {
- mtmp = ERR_PTR(-ENOMEM);
kfree(m);
- return mtmp;
+ return ERR_PTR(-ENOMEM);
}
- m->err = 0;
INIT_LIST_HEAD(&m->req_list);
INIT_LIST_HEAD(&m->unsent_req_list);
- m->rcall = NULL;
- m->rpos = 0;
- m->rbuf = NULL;
- m->wpos = m->wsize = 0;
- m->wbuf = NULL;
INIT_WORK(&m->rq, p9_read_work);
INIT_WORK(&m->wq, p9_write_work);
- m->wsched = 0;
- memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
- m->poll_task = NULL;
n = p9_mux_poll_start(m);
if (n) {
kfree(m);
@@ -460,10 +450,8 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
if (IS_ERR(m->poll_waddr[i])) {
p9_mux_poll_stop(m);
- mtmp = (void *)m->poll_waddr; /* the error code */
kfree(m);
- m = mtmp;
- break;
+ return (void *)m->poll_waddr; /* the error code */
}
}
--
1.5.4.5
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 4/6] 9p-trans_fd: don't do fs segment mangling in p9_fd_poll()
2008-08-26 8:50 [PATCHSET] 9p: clean up a bit and use single poller for trans_fd Tejun Heo
` (2 preceding siblings ...)
2008-08-26 8:50 ` [PATCH 3/6] 9p-trans_fd: clean up p9_conn_create() Tejun Heo
@ 2008-08-26 8:50 ` Tejun Heo
2008-08-26 8:50 ` [PATCH 5/6] 9p-trans_fd: fix and clean up module init/exit paths Tejun Heo
2008-08-26 8:50 ` [PATCH 6/6] 9p-trans_fd: use single poller Tejun Heo
5 siblings, 0 replies; 7+ messages in thread
From: Tejun Heo @ 2008-08-26 8:50 UTC (permalink / raw)
To: ericvh, rminnich, v9fs-developer, linux-kernel; +Cc: Tejun Heo
p9_fd_poll() is never called with user pointers and f_op->poll()
doesn't expect its arguments to be from userland. There's no need to
set kernel ds before calling f_op->poll() from p9_fd_poll(). Remove
it.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
net/9p/trans_fd.c | 14 +++-----------
1 files changed, 3 insertions(+), 11 deletions(-)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 6c88e89..f6d4af1 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1344,7 +1344,6 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt)
{
int ret, n;
struct p9_trans_fd *ts = NULL;
- mm_segment_t oldfs;
if (trans && trans->status == Connected)
ts = trans->priv;
@@ -1358,24 +1357,17 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt)
if (!ts->wr->f_op || !ts->wr->f_op->poll)
return -EIO;
- oldfs = get_fs();
- set_fs(get_ds());
-
ret = ts->rd->f_op->poll(ts->rd, pt);
if (ret < 0)
- goto end;
+ return ret;
if (ts->rd != ts->wr) {
n = ts->wr->f_op->poll(ts->wr, pt);
- if (n < 0) {
- ret = n;
- goto end;
- }
+ if (n < 0)
+ return n;
ret = (ret & ~POLLOUT) | (n & ~POLLIN);
}
-end:
- set_fs(oldfs);
return ret;
}
--
1.5.4.5
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 5/6] 9p-trans_fd: fix and clean up module init/exit paths
2008-08-26 8:50 [PATCHSET] 9p: clean up a bit and use single poller for trans_fd Tejun Heo
` (3 preceding siblings ...)
2008-08-26 8:50 ` [PATCH 4/6] 9p-trans_fd: don't do fs segment mangling in p9_fd_poll() Tejun Heo
@ 2008-08-26 8:50 ` Tejun Heo
2008-08-26 8:50 ` [PATCH 6/6] 9p-trans_fd: use single poller Tejun Heo
5 siblings, 0 replies; 7+ messages in thread
From: Tejun Heo @ 2008-08-26 8:50 UTC (permalink / raw)
To: ericvh, rminnich, v9fs-developer, linux-kernel; +Cc: Tejun Heo
trans_fd leaked p9_mux_wq on module unload. Fix it. While at it,
collapse p9_mux_global_init() into p9_trans_fd_init(). It's easier to
follow this way and the global poll_tasks array is about to removed
anyway.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
net/9p/trans_fd.c | 31 +++++++++++--------------------
1 files changed, 11 insertions(+), 20 deletions(-)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index f6d4af1..0b4eb5f 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -238,22 +238,6 @@ static int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc,
static void p9_conn_cancel(struct p9_conn *m, int err);
-static int p9_mux_global_init(void)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++)
- p9_mux_poll_tasks[i].task = NULL;
-
- p9_mux_wq = create_workqueue("v9fs");
- if (!p9_mux_wq) {
- printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
- return -ENOMEM;
- }
-
- return 0;
-}
-
static u16 p9_mux_get_tag(struct p9_conn *m)
{
int tag;
@@ -1616,10 +1600,15 @@ static struct p9_trans_module p9_fd_trans = {
int p9_trans_fd_init(void)
{
- int ret = p9_mux_global_init();
- if (ret) {
- printk(KERN_WARNING "9p: starting mux failed\n");
- return ret;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++)
+ p9_mux_poll_tasks[i].task = NULL;
+
+ p9_mux_wq = create_workqueue("v9fs");
+ if (!p9_mux_wq) {
+ printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
+ return -ENOMEM;
}
v9fs_register_trans(&p9_tcp_trans);
@@ -1634,4 +1623,6 @@ void p9_trans_fd_exit(void)
v9fs_unregister_trans(&p9_tcp_trans);
v9fs_unregister_trans(&p9_unix_trans);
v9fs_unregister_trans(&p9_fd_trans);
+
+ destroy_workqueue(p9_mux_wq);
}
--
1.5.4.5
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 6/6] 9p-trans_fd: use single poller
2008-08-26 8:50 [PATCHSET] 9p: clean up a bit and use single poller for trans_fd Tejun Heo
` (4 preceding siblings ...)
2008-08-26 8:50 ` [PATCH 5/6] 9p-trans_fd: fix and clean up module init/exit paths Tejun Heo
@ 2008-08-26 8:50 ` Tejun Heo
5 siblings, 0 replies; 7+ messages in thread
From: Tejun Heo @ 2008-08-26 8:50 UTC (permalink / raw)
To: ericvh, rminnich, v9fs-developer, linux-kernel; +Cc: Tejun Heo
trans_fd used pool of upto 100 pollers to monitor the r/w fds. The
approach makes sense in userspace back when the only available
interfaces were poll(2) and select(2). As each event monitor -
trigger - handling iteration took O(n) where `n' is the number of
watched fds, it makes sense to spread them to many pollers such that
the `n' can be divided by the number of pollers. However, this
doesn't make any sense in kernel because persistent edge triggered
event monitoring is how the whole thing is implemented in the kernel
in the first place.
This patch converts trans_fd to use single poller which watches all
the fds instead of the poll of pollers approach. All the fds are
registered for monitoring on creation and only the fds with pending
events are scanned when something happens much like how epoll is
implemented.
This change makes trans_fd fd monitoring more efficient and simpler.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
net/9p/trans_fd.c | 252 ++++++++++++++++++-----------------------------------
1 files changed, 86 insertions(+), 166 deletions(-)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 0b4eb5f..6866131 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -44,7 +44,6 @@
#define P9_PORT 564
#define MAX_SOCK_BUF (64*1024)
#define ERREQFLUSH 1
-#define SCHED_TIMEOUT 10
#define MAXPOLLWADDR 2
/**
@@ -135,17 +134,16 @@ struct p9_req {
struct list_head req_list;
};
-struct p9_mux_poll_task {
- struct task_struct *task;
- struct list_head mux_list;
- int muxnum;
+struct p9_poll_wait {
+ struct p9_conn *conn;
+ wait_queue_t wait;
+ wait_queue_head_t *wait_addr;
};
/**
* struct p9_conn - fd mux connection state information
* @lock: protects mux_list (?)
* @mux_list: list link for mux to manage multiple connections (?)
- * @poll_task: task polling on this connection
* @msize: maximum size for connection (dup)
* @extended: 9p2000.u flag (dup)
* @trans: reference to transport instance for this connection
@@ -171,7 +169,6 @@ struct p9_mux_poll_task {
struct p9_conn {
spinlock_t lock; /* protect lock structure */
struct list_head mux_list;
- struct p9_mux_poll_task *poll_task;
int msize;
unsigned char extended;
struct p9_trans *trans;
@@ -185,8 +182,8 @@ struct p9_conn {
int wpos;
int wsize;
char *wbuf;
- wait_queue_t poll_wait[MAXPOLLWADDR];
- wait_queue_head_t *poll_waddr[MAXPOLLWADDR];
+ struct list_head poll_pending_link;
+ struct p9_poll_wait poll_wait[MAXPOLLWADDR];
poll_table pt;
struct work_struct rq;
struct work_struct wq;
@@ -220,12 +217,10 @@ static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address,
static int p9_fd_write(struct p9_trans *trans, void *v, int len);
static int p9_fd_read(struct p9_trans *trans, void *v, int len);
-static DEFINE_MUTEX(p9_mux_task_lock);
+static DEFINE_SPINLOCK(p9_poll_lock);
+static LIST_HEAD(p9_poll_pending_list);
static struct workqueue_struct *p9_mux_wq;
-
-static int p9_mux_num;
-static int p9_mux_poll_task_num;
-static struct p9_mux_poll_task p9_mux_poll_tasks[100];
+static struct task_struct *p9_poll_task;
static void p9_conn_destroy(struct p9_conn *);
static unsigned int p9_fd_poll(struct p9_trans *trans,
@@ -255,130 +250,23 @@ static void p9_mux_put_tag(struct p9_conn *m, u16 tag)
p9_idpool_put(tag, m->tagpool);
}
-/**
- * p9_mux_calc_poll_procs - calculates the number of polling procs
- * @muxnum: number of mounts
- *
- * Calculation is based on the number of mounted v9fs filesystems.
- * The current implementation returns sqrt of the number of mounts.
- */
-
-static int p9_mux_calc_poll_procs(int muxnum)
-{
- int n;
-
- if (p9_mux_poll_task_num)
- n = muxnum / p9_mux_poll_task_num +
- (muxnum % p9_mux_poll_task_num ? 1 : 0);
- else
- n = 1;
-
- if (n > ARRAY_SIZE(p9_mux_poll_tasks))
- n = ARRAY_SIZE(p9_mux_poll_tasks);
-
- return n;
-}
-
-static int p9_mux_poll_start(struct p9_conn *m)
+static void p9_mux_poll_stop(struct p9_conn *m)
{
- int i, n;
- struct p9_mux_poll_task *vpt, *vptlast;
- struct task_struct *pproc;
-
- P9_DPRINTK(P9_DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, p9_mux_num,
- p9_mux_poll_task_num);
- mutex_lock(&p9_mux_task_lock);
-
- n = p9_mux_calc_poll_procs(p9_mux_num + 1);
- if (n > p9_mux_poll_task_num) {
- for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) {
- if (p9_mux_poll_tasks[i].task == NULL) {
- vpt = &p9_mux_poll_tasks[i];
- P9_DPRINTK(P9_DEBUG_MUX, "create proc %p\n",
- vpt);
- pproc = kthread_create(p9_poll_proc, vpt,
- "v9fs-poll");
-
- if (!IS_ERR(pproc)) {
- vpt->task = pproc;
- INIT_LIST_HEAD(&vpt->mux_list);
- vpt->muxnum = 0;
- p9_mux_poll_task_num++;
- wake_up_process(vpt->task);
- }
- break;
- }
- }
-
- if (i >= ARRAY_SIZE(p9_mux_poll_tasks))
- P9_DPRINTK(P9_DEBUG_ERROR,
- "warning: no free poll slots\n");
- }
+ unsigned long flags;
+ int i;
- n = (p9_mux_num + 1) / p9_mux_poll_task_num +
- ((p9_mux_num + 1) % p9_mux_poll_task_num ? 1 : 0);
-
- vptlast = NULL;
- for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) {
- vpt = &p9_mux_poll_tasks[i];
- if (vpt->task != NULL) {
- vptlast = vpt;
- if (vpt->muxnum < n) {
- P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i);
- list_add(&m->mux_list, &vpt->mux_list);
- vpt->muxnum++;
- m->poll_task = vpt;
- memset(&m->poll_waddr, 0,
- sizeof(m->poll_waddr));
- init_poll_funcptr(&m->pt, p9_pollwait);
- break;
- }
- }
- }
+ for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
+ struct p9_poll_wait *pwait = &m->poll_wait[i];
- if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) {
- if (vptlast == NULL) {
- mutex_unlock(&p9_mux_task_lock);
- return -ENOMEM;
+ if (pwait->wait_addr) {
+ remove_wait_queue(pwait->wait_addr, &pwait->wait);
+ pwait->wait_addr = NULL;
}
-
- P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i);
- list_add(&m->mux_list, &vptlast->mux_list);
- vptlast->muxnum++;
- m->poll_task = vptlast;
- memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
- init_poll_funcptr(&m->pt, p9_pollwait);
}
- p9_mux_num++;
- mutex_unlock(&p9_mux_task_lock);
-
- return 0;
-}
-
-static void p9_mux_poll_stop(struct p9_conn *m)
-{
- int i;
- struct p9_mux_poll_task *vpt;
-
- mutex_lock(&p9_mux_task_lock);
- vpt = m->poll_task;
- list_del(&m->mux_list);
- for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
- if (m->poll_waddr[i] != NULL) {
- remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]);
- m->poll_waddr[i] = NULL;
- }
- }
- vpt->muxnum--;
- if (!vpt->muxnum) {
- P9_DPRINTK(P9_DEBUG_MUX, "destroy proc %p\n", vpt);
- kthread_stop(vpt->task);
- vpt->task = NULL;
- p9_mux_poll_task_num--;
- }
- p9_mux_num--;
- mutex_unlock(&p9_mux_task_lock);
+ spin_lock_irqsave(&p9_poll_lock, flags);
+ list_del_init(&m->poll_pending_link);
+ spin_unlock_irqrestore(&p9_poll_lock, flags);
}
/**
@@ -414,11 +302,8 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
INIT_LIST_HEAD(&m->unsent_req_list);
INIT_WORK(&m->rq, p9_read_work);
INIT_WORK(&m->wq, p9_write_work);
- n = p9_mux_poll_start(m);
- if (n) {
- kfree(m);
- return ERR_PTR(n);
- }
+ INIT_LIST_HEAD(&m->poll_pending_link);
+ init_poll_funcptr(&m->pt, p9_pollwait);
n = p9_fd_poll(trans, &m->pt);
if (n & POLLIN) {
@@ -431,11 +316,12 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
set_bit(Wpending, &m->wsched);
}
- for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
- if (IS_ERR(m->poll_waddr[i])) {
+ for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
+ if (IS_ERR(m->poll_wait[i].wait_addr)) {
p9_mux_poll_stop(m);
kfree(m);
- return (void *)m->poll_waddr; /* the error code */
+ /* return the error code */
+ return (void *)m->poll_wait[i].wait_addr;
}
}
@@ -464,6 +350,23 @@ static void p9_conn_destroy(struct p9_conn *m)
kfree(m);
}
+static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+ struct p9_poll_wait *pwait =
+ container_of(wait, struct p9_poll_wait, wait);
+ struct p9_conn *m = pwait->conn;
+ unsigned long flags;
+ DECLARE_WAITQUEUE(dummy_wait, p9_poll_task);
+
+ spin_lock_irqsave(&p9_poll_lock, flags);
+ if (list_empty(&m->poll_pending_link))
+ list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
+ spin_unlock_irqrestore(&p9_poll_lock, flags);
+
+ /* perform the default wake up operation */
+ return default_wake_function(&dummy_wait, mode, sync, key);
+}
+
/**
* p9_pollwait - add poll task to the wait queue
* @filp: file pointer being polled
@@ -476,29 +379,32 @@ static void p9_conn_destroy(struct p9_conn *m)
static void
p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
{
+ struct p9_conn *m = container_of(p, struct p9_conn, pt);
+ struct p9_poll_wait *pwait = NULL;
int i;
- struct p9_conn *m;
- m = container_of(p, struct p9_conn, pt);
- for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++)
- if (m->poll_waddr[i] == NULL)
+ for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
+ if (m->poll_wait[i].wait_addr == NULL) {
+ pwait = &m->poll_wait[i];
break;
+ }
+ }
- if (i >= ARRAY_SIZE(m->poll_waddr)) {
+ if (!pwait) {
P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n");
return;
}
- m->poll_waddr[i] = wait_address;
-
if (!wait_address) {
P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n");
- m->poll_waddr[i] = ERR_PTR(-EIO);
+ pwait->wait_addr = ERR_PTR(-EIO);
return;
}
- init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task);
- add_wait_queue(wait_address, &m->poll_wait[i]);
+ pwait->conn = m;
+ pwait->wait_addr = wait_address;
+ init_waitqueue_func_entry(&pwait->wait, p9_pollwake);
+ add_wait_queue(wait_address, &pwait->wait);
}
/**
@@ -553,23 +459,34 @@ static void p9_poll_mux(struct p9_conn *m)
static int p9_poll_proc(void *a)
{
- struct p9_conn *m, *mtmp;
- struct p9_mux_poll_task *vpt;
+ unsigned long flags;
- vpt = a;
- P9_DPRINTK(P9_DEBUG_MUX, "start %p %p\n", current, vpt);
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
+ P9_DPRINTK(P9_DEBUG_MUX, "start %p\n", current);
+ repeat:
+ spin_lock_irqsave(&p9_poll_lock, flags);
+ while (!list_empty(&p9_poll_pending_list)) {
+ struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
+ struct p9_conn,
+ poll_pending_link);
+ list_del_init(&conn->poll_pending_link);
+ spin_unlock_irqrestore(&p9_poll_lock, flags);
- list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) {
- p9_poll_mux(m);
- }
+ p9_poll_mux(conn);
- P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n");
- schedule_timeout(SCHED_TIMEOUT * HZ);
+ spin_lock_irqsave(&p9_poll_lock, flags);
}
+ spin_unlock_irqrestore(&p9_poll_lock, flags);
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (list_empty(&p9_poll_pending_list)) {
+ P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n");
+ schedule();
+ }
__set_current_state(TASK_RUNNING);
+
+ if (!kthread_should_stop())
+ goto repeat;
+
P9_DPRINTK(P9_DEBUG_MUX, "finish\n");
return 0;
}
@@ -1600,17 +1517,19 @@ static struct p9_trans_module p9_fd_trans = {
int p9_trans_fd_init(void)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++)
- p9_mux_poll_tasks[i].task = NULL;
-
p9_mux_wq = create_workqueue("v9fs");
if (!p9_mux_wq) {
printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
return -ENOMEM;
}
+ p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll");
+ if (IS_ERR(p9_poll_task)) {
+ destroy_workqueue(p9_mux_wq);
+ printk(KERN_WARNING "v9fs: mux: creating poll task failed\n");
+ return PTR_ERR(p9_poll_task);
+ }
+
v9fs_register_trans(&p9_tcp_trans);
v9fs_register_trans(&p9_unix_trans);
v9fs_register_trans(&p9_fd_trans);
@@ -1620,6 +1539,7 @@ int p9_trans_fd_init(void)
void p9_trans_fd_exit(void)
{
+ kthread_stop(p9_poll_task);
v9fs_unregister_trans(&p9_tcp_trans);
v9fs_unregister_trans(&p9_unix_trans);
v9fs_unregister_trans(&p9_fd_trans);
--
1.5.4.5
^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2008-08-26 8:54 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-08-26 8:50 [PATCHSET] 9p: clean up a bit and use single poller for trans_fd Tejun Heo
2008-08-26 8:50 ` [PATCH 1/6] 9p: implement proper trans module refcounting and unregistration Tejun Heo
2008-08-26 8:50 ` [PATCH 2/6] 9p-trans_fd: fix trans_fd::p9_conn_destroy() Tejun Heo
2008-08-26 8:50 ` [PATCH 3/6] 9p-trans_fd: clean up p9_conn_create() Tejun Heo
2008-08-26 8:50 ` [PATCH 4/6] 9p-trans_fd: don't do fs segment mangling in p9_fd_poll() Tejun Heo
2008-08-26 8:50 ` [PATCH 5/6] 9p-trans_fd: fix and clean up module init/exit paths Tejun Heo
2008-08-26 8:50 ` [PATCH 6/6] 9p-trans_fd: use single poller Tejun Heo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox