From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1JrBBZ-0006n9-Ap for qemu-devel@nongnu.org; Wed, 30 Apr 2008 08:13:21 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1JrBBY-0006mw-OS for qemu-devel@nongnu.org; Wed, 30 Apr 2008 08:13:21 -0400 Received: from [199.232.76.173] (port=54357 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1JrBBY-0006mt-Gq for qemu-devel@nongnu.org; Wed, 30 Apr 2008 08:13:20 -0400 Received: from ns.suse.de ([195.135.220.2] helo=mx1.suse.de) by monty-python.gnu.org with esmtps (TLS-1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.60) (envelope-from ) id 1JrBBX-0007vu-JQ for qemu-devel@nongnu.org; Wed, 30 Apr 2008 08:13:20 -0400 Message-ID: <48186134.7070303@suse.de> Date: Wed, 30 Apr 2008 14:08:20 +0200 From: Kevin Wolf MIME-Version: 1.0 Subject: Re: [Qemu-devel] [PATCH] Align file accesses with cache=off (O_DIRECT) References: <4807514B.9040607@suse.de> <4815EE89.70407@suse.de> <1209459667.4328.7.camel@frecb07144> <48173591.9010609@suse.de> <1209484105.4248.27.camel@frecb07144> <48174B0B.5070904@suse.de> <1209487719.4248.43.camel@frecb07144> <48183A34.7030608@suse.de> <1209549574.4312.27.camel@frecb07144> In-Reply-To: <1209549574.4312.27.camel@frecb07144> Content-Type: multipart/mixed; boundary="------------070508020404030605030803" Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Laurent Vivier Cc: qemu-devel@nongnu.org This is a multi-part message in MIME format. --------------070508020404030605030803 Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit Laurent Vivier schrieb: > just a comment on the patch: perhaps you can call your field > "open_flags" instead of "flags", and perhaps you can merge your field > with "fd_open_flags" ? Here you are. ;-) Kevin --------------070508020404030605030803 Content-Type: text/x-patch; name="align-odirect-accesses.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="align-odirect-accesses.patch" Index: block-raw-posix.c =================================================================== --- block-raw-posix.c.orig +++ block-raw-posix.c @@ -77,10 +77,10 @@ typedef struct BDRVRawState { int fd; int type; + int open_flags; unsigned int lseek_err_cnt; #if defined(__linux__) /* linux floppy specific */ - int fd_open_flags; int64_t fd_open_time; int64_t fd_error_time; int fd_got_error; @@ -111,6 +111,7 @@ static int raw_open(BlockDriverState *bs open_flags |= O_DIRECT; #endif + s->open_flags = open_flags; s->type = FTYPE_FILE; fd = open(filename, open_flags, 0644); @@ -141,7 +142,14 @@ static int raw_open(BlockDriverState *bs #endif */ -static int raw_pread(BlockDriverState *bs, int64_t offset, +/* + * offset and count are in bytes, but must be multiples of 512 for files + * opened with O_DIRECT. buf must be aligned to 512 bytes then. + * + * This function may be called without alignment if the caller ensures + * that O_DIRECT is not in effect. + */ +static int raw_pread_aligned(BlockDriverState *bs, int64_t offset, uint8_t *buf, int count) { BDRVRawState *s = bs->opaque; @@ -194,7 +202,14 @@ label__raw_read__success: return ret; } -static int raw_pwrite(BlockDriverState *bs, int64_t offset, +/* + * offset and count are in bytes, but must be multiples of 512 for files + * opened with O_DIRECT. buf must be aligned to 512 bytes then. + * + * This function may be called without alignment if the caller ensures + * that O_DIRECT is not in effect. + */ +static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset, const uint8_t *buf, int count) { BDRVRawState *s = bs->opaque; @@ -230,6 +245,67 @@ label__raw_write__success: return ret; } + +#ifdef O_DIRECT +/* + * offset and count are in bytes and possibly not aligned. For files opened + * with O_DIRECT, necessary alignments are ensured before calling + * raw_pread_aligned to do the actual read. + */ +static int raw_pread(BlockDriverState *bs, int64_t offset, + uint8_t *buf, int count) +{ + BDRVRawState *s = bs->opaque; + + if (unlikely((s->open_flags & O_DIRECT) && + (offset % 512 || count % 512 || (uintptr_t) buf % 512))) { + + int ret; + + // Temporarily disable O_DIRECT for unaligned access + fcntl(s->fd, F_SETFL, s->open_flags & ~O_DIRECT); + ret = raw_pread_aligned(bs, offset, buf, count); + fcntl(s->fd, F_SETFL, s->open_flags); + + return ret; + + } else { + return raw_pread_aligned(bs, offset, buf, count); + } +} + +/* + * offset and count are in bytes and possibly not aligned. For files opened + * with O_DIRECT, necessary alignments are ensured before calling + * raw_pwrite_aligned to do the actual write. + */ +static int raw_pwrite(BlockDriverState *bs, int64_t offset, + const uint8_t *buf, int count) +{ + BDRVRawState *s = bs->opaque; + + if (unlikely((s->open_flags & O_DIRECT) && + (offset % 512 || count % 512 || (uintptr_t) buf % 512))) { + + int ret; + + // Temporarily disable O_DIRECT for unaligned access + fcntl(s->fd, F_SETFL, s->open_flags & ~O_DIRECT); + ret = raw_pwrite_aligned(bs, offset, buf, count); + fcntl(s->fd, F_SETFL, s->open_flags); + + return ret; + } else { + return raw_pwrite_aligned(bs, offset, buf, count); + } +} + +#else +#define raw_pread raw_pread_aligned +#define raw_pwrite raw_pwrite_aligned +#endif + + /***********************************************************/ /* Unix AIO using POSIX AIO */ @@ -402,10 +478,26 @@ static BlockDriverAIOCB *raw_aio_read(Bl BlockDriverCompletionFunc *cb, void *opaque) { RawAIOCB *acb; + BDRVRawState *s = bs->opaque; + + /* + * If O_DIRECT is used and the buffer is not aligned fall back + * to synchronous IO. + */ + if (unlikely((s->open_flags & O_DIRECT) && ((uintptr_t) buf % 512))) { + int ret; + + acb = qemu_aio_get(bs, cb, opaque); + ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors); + acb->common.cb(acb->common.opaque, ret); + qemu_aio_release(acb); + return &acb->common; + } acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); if (!acb) return NULL; + if (aio_read(&acb->aiocb) < 0) { qemu_aio_release(acb); return NULL; @@ -418,6 +510,21 @@ static BlockDriverAIOCB *raw_aio_write(B BlockDriverCompletionFunc *cb, void *opaque) { RawAIOCB *acb; + BDRVRawState *s = bs->opaque; + + /* + * If O_DIRECT is used and the buffer is not aligned fall back + * to synchronous IO. + */ + if (unlikely((s->open_flags & O_DIRECT) && ((uintptr_t) buf % 512))) { + int ret; + + acb = qemu_aio_get(bs, cb, opaque); + ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors); + acb->common.cb(acb->common.opaque, ret); + qemu_aio_release(acb); + return &acb->common; + } acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); if (!acb) @@ -679,7 +786,7 @@ static int hdev_open(BlockDriverState *b s->type = FTYPE_CD; } else if (strstart(filename, "/dev/fd", NULL)) { s->type = FTYPE_FD; - s->fd_open_flags = open_flags; + s->open_flags = open_flags; /* open will not fail even if no floppy is inserted */ open_flags |= O_NONBLOCK; } else if (strstart(filename, "/dev/sg", NULL)) { @@ -734,7 +841,7 @@ static int fd_open(BlockDriverState *bs) #endif return -EIO; } - s->fd = open(bs->filename, s->fd_open_flags); + s->fd = open(bs->filename, s->open_flags); if (s->fd < 0) { s->fd_error_time = qemu_get_clock(rt_clock); s->fd_got_error = 1; @@ -831,7 +938,7 @@ static int raw_eject(BlockDriverState *b close(s->fd); s->fd = -1; } - fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK); + fd = open(bs->filename, s->open_flags | O_NONBLOCK); if (fd >= 0) { if (ioctl(fd, FDEJECT, 0) < 0) perror("FDEJECT"); --------------070508020404030605030803--