From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1LiZa3-0007by-5R for qemu-devel@nongnu.org; Sat, 14 Mar 2009 15:31:35 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1LiZZx-0007Z7-TG for qemu-devel@nongnu.org; Sat, 14 Mar 2009 15:31:34 -0400 Received: from [199.232.76.173] (port=36291 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1LiZZx-0007Z1-Mi for qemu-devel@nongnu.org; Sat, 14 Mar 2009 15:31:29 -0400 Received: from verein.lst.de ([213.95.11.210]:58746) by monty-python.gnu.org with esmtps (TLS-1.0:DHE_RSA_3DES_EDE_CBC_SHA1:24) (Exim 4.60) (envelope-from ) id 1LiZZx-000113-41 for qemu-devel@nongnu.org; Sat, 14 Mar 2009 15:31:29 -0400 Received: from verein.lst.de (localhost [127.0.0.1]) by verein.lst.de (8.12.3/8.12.3/Debian-7.1) with ESMTP id n2EJVRIF003934 (version=TLSv1/SSLv3 cipher=EDH-RSA-DES-CBC3-SHA bits=168 verify=NO) for ; Sat, 14 Mar 2009 20:31:27 +0100 Received: (from hch@localhost) by verein.lst.de (8.12.3/8.12.3/Debian-6.6) id n2EJVRkW003932 for qemu-devel@nongnu.org; Sat, 14 Mar 2009 20:31:27 +0100 Date: Sat, 14 Mar 2009 20:31:27 +0100 From: Christoph Hellwig Message-ID: <20090314193127.GC3799@lst.de> References: <20090314192701.GA3497@lst.de> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20090314192701.GA3497@lst.de> Subject: [Qemu-devel] [PATCH 6/6] experimental native preadv/pwritev support for Linux Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org This ties up Gerd Hoffmann's unmegred preadv/pwritev syscalls to qemu. Use with care as the syscall numbers aren't finalized yet. If someone of the BSD folks is interested it should be trivial to tie this up for the preadv/pwritev syscalls that have been around there for a while. Probably wants some optimization to not try preadv/pwritev again once we got the first ENOSYS. Signed-off-by: Christoph Hellwig Index: qemu/posix-aio-compat.c =================================================================== --- qemu.orig/posix-aio-compat.c 2009-03-14 18:30:35.000000000 +0100 +++ qemu/posix-aio-compat.c 2009-03-14 18:30:36.000000000 +0100 @@ -76,6 +76,45 @@ static void thread_create(pthread_t *thr if (ret) die2(ret, "pthread_create"); } +#if defined (__linux__) && defined(__i386__) + +#define __NR_preadv 333 +#define __NR_pwritev 334 + +static ssize_t +qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + uint32_t pos_high = offset >> 32; + uint32_t pos_low = offset; + + return syscall(__NR_preadv, fd, iov, (unsigned long)nr_iov, pos_high, pos_low); +} + +static ssize_t +qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + uint32_t pos_high = offset >> 32; + uint32_t pos_low = offset; + + return syscall(__NR_pwritev, fd, iov, (unsigned long)nr_iov, pos_high, pos_low); +} + +#else + +static ssize_t +qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + return -ENOSYS; +} + +static ssize_t +qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + return -ENOSYS; +} + +#endif + /* * Check if we need to copy the data in the aiocb into a new * properly aligned buffer. @@ -93,6 +132,29 @@ static int aiocb_needs_copy(struct qemu_ return 0; } +static size_t handle_aiocb_vector(struct qemu_paiocb *aiocb) +{ + size_t offset = 0; + ssize_t len; + + do { + if (aiocb->is_write) + len = qemu_pwritev(aiocb->aio_fildes, + aiocb->aio_iov, + aiocb->aio_niov, + aiocb->aio_offset + offset); + else + len = qemu_preadv(aiocb->aio_fildes, + aiocb->aio_iov, + aiocb->aio_niov, + aiocb->aio_offset + offset); + } while (len == -1 && errno == EINTR); + + if (len == -1) + return -errno; + return len; +} + static size_t handle_aiocb_linear(struct qemu_paiocb *aiocb, char *buf) { size_t offset = 0; @@ -129,12 +191,31 @@ static size_t handle_aiocb(struct qemu_p size_t nbytes; char *buf; - if (!aiocb_needs_copy(aiocb) && aiocb->aio_niov == 1) { + if (!aiocb_needs_copy(aiocb)) { /* * If there is just a single buffer, and it is properly aligned * we can just use plain pread/pwrite without any problems. */ - return handle_aiocb_linear(aiocb, aiocb->aio_iov->iov_base); + if (aiocb->aio_niov == 1) + return handle_aiocb_linear(aiocb, aiocb->aio_iov->iov_base); + + /* + * We have more than one iovec, and all are properly aligned. + * + * Try preadv/pwritev first and fall back to linearizing the + * buffer if it's not supported. + */ + nbytes = handle_aiocb_vector(aiocb); + if (nbytes == aiocb->aio_nbytes) + return nbytes; + if (nbytes < 0 && nbytes != -ENOSYS) + return nbytes; + + /* + * XXX(hch): short read/write. no easy way to handle the reminder + * using these interfaces. For now retry using plain + * pread/pwrite? + */ } /*