From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Venkateswararao Jujjuri (JV)" Subject: [PATCH 5/5] [net/9p] Achieve zero copy on read path. Date: Tue, 17 Aug 2010 10:27:25 -0700 Message-ID: <1282066045-3945-6-git-send-email-jvrao@linux.vnet.ibm.com> References: <1282066045-3945-1-git-send-email-jvrao@linux.vnet.ibm.com> Cc: linux-fsdevel@vger.kernel.org, "Venkateswararao Jujjuri (JV)" , Badari Pulavarty To: v9fs-developer@lists.sourceforge.net Return-path: Received: from e32.co.us.ibm.com ([32.97.110.150]:44275 "EHLO e32.co.us.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1757983Ab0HQRTo (ORCPT ); Tue, 17 Aug 2010 13:19:44 -0400 Received: from d03relay03.boulder.ibm.com (d03relay03.boulder.ibm.com [9.17.195.228]) by e32.co.us.ibm.com (8.14.4/8.13.1) with ESMTP id o7HHBUik008434 for ; Tue, 17 Aug 2010 11:11:30 -0600 Received: from d03av02.boulder.ibm.com (d03av02.boulder.ibm.com [9.17.195.168]) by d03relay03.boulder.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id o7HHJduY157834 for ; Tue, 17 Aug 2010 11:19:41 -0600 Received: from d03av02.boulder.ibm.com (loopback [127.0.0.1]) by d03av02.boulder.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id o7HHJb8S002511 for ; Tue, 17 Aug 2010 11:19:39 -0600 In-Reply-To: <1282066045-3945-1-git-send-email-jvrao@linux.vnet.ibm.com> Sender: linux-fsdevel-owner@vger.kernel.org List-ID: This patch avoids copy_to_user by employing get_user_pages_fast() on the udata buffer. This will eliminate an additonal copy of kernel buffer into user buffer. We filter out the kernel buffers (kernel_read()) by comparing segments. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Badari Pulavarty --- net/9p/client.c | 18 ++++++++++++++++-- net/9p/protocol.c | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 7ce58fb..d11c7dd 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1282,6 +1282,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, struct p9_client *clnt; struct p9_req_t *req; char *dataptr; + int page_direct = 0; P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, (long long unsigned) offset, count); @@ -1296,7 +1297,20 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, if (count < rsize) rsize = count; - req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); + if (clnt->trans_mod->capability && + clnt->trans_mod->capability(P9_CAP_GET_MAX_SG_PAGES) && + (udata && !segment_eq(get_fs(), KERNEL_DS))) + page_direct = 1; + + if (page_direct) { + rsize = count; + req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, + rsize, udata); + } else { + req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, + rsize); + } + if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1314,7 +1328,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, memmove(data, dataptr, count); } - if (udata) { + if (udata && !page_direct) { err = copy_to_user(udata, dataptr, count); if (err) { err = -EFAULT; diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 97f313d..b82d117 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -194,6 +194,25 @@ pdu_write_u(struct p9_fcall *pdu, struct p9_client *c, const char __user *udata, return len; } + static size_t +pdu_write_ur(struct p9_fcall *pdu, struct p9_client *c, + const char __user *udata, size_t size) +{ + size_t len = size; + int max_req_sg_pages = 0; + + if (c->trans_mod->capability) + max_req_sg_pages = + c->trans_mod->capability(P9_CAP_GET_MAX_SG_PAGES); + if (max_req_sg_pages) { + len = pdu_fill_pages(pdu, udata, size, 1, max_req_sg_pages); + if (len < 0) + return len; + pdu->pdata_read_len = len; + } + return len; +} + /* b - int8_t w - int16_t @@ -534,8 +553,18 @@ p9pdu_vwritef(struct p9_fcall *pdu, struct p9_client *c, const char *fmt, if (!errcode && pdu_write_u(pdu, c, udata, count) < 0) errcode = -EFAULT; - } - break; + } + break; + case 'E':{ + int32_t count = va_arg(ap, int32_t); + const char __user *udata = + va_arg(ap, const void __user *); + errcode = p9pdu_writef(pdu, c, "d", count); + if (!errcode && + pdu_write_ur(pdu, c, udata, count) < 0) + errcode = -EFAULT; + } + break; case 'T':{ int16_t nwname = va_arg(ap, int); const char **wnames = va_arg(ap, const char **); -- 1.6.5.2