Index: block-raw-posix.c =================================================================== --- block-raw-posix.c.orig +++ block-raw-posix.c @@ -77,6 +77,7 @@ typedef struct BDRVRawState { int fd; int type; + int flags; unsigned int lseek_err_cnt; #if defined(__linux__) /* linux floppy specific */ @@ -111,6 +112,7 @@ static int raw_open(BlockDriverState *bs open_flags |= O_DIRECT; #endif + s->flags = open_flags; s->type = FTYPE_FILE; fd = open(filename, open_flags, 0644); @@ -141,7 +143,14 @@ static int raw_open(BlockDriverState *bs #endif */ -static int raw_pread(BlockDriverState *bs, int64_t offset, +/* + * offset and count are in bytes, but must be multiples of 512 for files + * opened with O_DIRECT. buf must be aligned to 512 bytes then. + * + * This function may be called without alignment if the caller ensures + * that O_DIRECT is not in effect. + */ +static int raw_pread_aligned(BlockDriverState *bs, int64_t offset, uint8_t *buf, int count) { BDRVRawState *s = bs->opaque; @@ -194,7 +203,14 @@ label__raw_read__success: return ret; } -static int raw_pwrite(BlockDriverState *bs, int64_t offset, +/* + * offset and count are in bytes, but must be multiples of 512 for files + * opened with O_DIRECT. buf must be aligned to 512 bytes then. + * + * This function may be called without alignment if the caller ensures + * that O_DIRECT is not in effect. + */ +static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset, const uint8_t *buf, int count) { BDRVRawState *s = bs->opaque; @@ -230,6 +246,67 @@ label__raw_write__success: return ret; } + +#ifdef O_DIRECT +/* + * offset and count are in bytes and possibly not aligned. For files opened + * with O_DIRECT, necessary alignments are ensured before calling + * raw_pread_aligned to do the actual read. + */ +static int raw_pread(BlockDriverState *bs, int64_t offset, + uint8_t *buf, int count) +{ + BDRVRawState *s = bs->opaque; + + if (unlikely((s->flags & O_DIRECT) && + (offset % 512 || count % 512 || (uintptr_t) buf % 512))) { + + int ret; + + // Temporarily disable O_DIRECT for unaligned access + fcntl(s->fd, F_SETFL, s->flags & ~O_DIRECT); + ret = raw_pread_aligned(bs, offset, buf, count); + fcntl(s->fd, F_SETFL, s->flags); + + return ret; + + } else { + return raw_pread_aligned(bs, offset, buf, count); + } +} + +/* + * offset and count are in bytes and possibly not aligned. For files opened + * with O_DIRECT, necessary alignments are ensured before calling + * raw_pwrite_aligned to do the actual write. + */ +static int raw_pwrite(BlockDriverState *bs, int64_t offset, + const uint8_t *buf, int count) +{ + BDRVRawState *s = bs->opaque; + + if (unlikely((s->flags & O_DIRECT) && + (offset % 512 || count % 512 || (uintptr_t) buf % 512))) { + + int ret; + + // Temporarily disable O_DIRECT for unaligned access + fcntl(s->fd, F_SETFL, s->flags & ~O_DIRECT); + ret = raw_pwrite_aligned(bs, offset, buf, count); + fcntl(s->fd, F_SETFL, s->flags); + + return ret; + } else { + return raw_pwrite_aligned(bs, offset, buf, count); + } +} + +#else +#define raw_pread raw_pread_aligned +#define raw_pwrite raw_pwrite_aligned +#endif + + /***********************************************************/ /* Unix AIO using POSIX AIO */ @@ -402,10 +479,26 @@ static BlockDriverAIOCB *raw_aio_read(Bl BlockDriverCompletionFunc *cb, void *opaque) { RawAIOCB *acb; + BDRVRawState *s = bs->opaque; + + /* + * If O_DIRECT is used and the buffer is not aligned fall back + * to synchronous IO. + */ + if (unlikely((s->flags & O_DIRECT) && ((uintptr_t) buf % 512))) { + int ret; + + acb = qemu_aio_get(bs, cb, opaque); + ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors); + acb->common.cb(acb->common.opaque, ret); + qemu_aio_release(acb); + return &acb->common; + } acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); if (!acb) return NULL; + if (aio_read(&acb->aiocb) < 0) { qemu_aio_release(acb); return NULL; @@ -418,6 +511,21 @@ static BlockDriverAIOCB *raw_aio_write(B BlockDriverCompletionFunc *cb, void *opaque) { RawAIOCB *acb; + BDRVRawState *s = bs->opaque; + + /* + * If O_DIRECT is used and the buffer is not aligned fall back + * to synchronous IO. + */ + if (unlikely((s->flags & O_DIRECT) && ((uintptr_t) buf % 512))) { + int ret; + + acb = qemu_aio_get(bs, cb, opaque); + ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors); + acb->common.cb(acb->common.opaque, ret); + qemu_aio_release(acb); + return &acb->common; + } acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); if (!acb)