--- block-qcow2.c | 40 +++++++++++++++++++++++----------------- block-raw-posix.c | 31 +++++++++++++++++++++++++++++++ block.c | 35 ++++++++++++++++++++++++----------- qemu-img.c | 16 ++++++++++++++++ vl.c | 11 +++++++++-- 5 files changed, 103 insertions(+), 30 deletions(-) Index: qemu/block-raw-posix.c =================================================================== --- qemu.orig/block-raw-posix.c 2008-01-22 10:12:20.000000000 +0100 +++ qemu/block-raw-posix.c 2008-01-22 11:10:41.000000000 +0100 @@ -141,16 +141,39 @@ static int raw_open(BlockDriverState *bs #endif */ +static long raw_save_directio(int fd) +{ + long fd_arg; + + fd_arg = fcntl(fd, F_GETFL); + if ((fd_arg & O_DIRECT) == 0) + return 0; + fcntl(fd, F_SETFL, fd_arg & ~O_DIRECT); + return fd_arg; +} + +static void raw_restore_directio(int fd, long fd_arg) +{ + if (fd_arg) { + fdatasync(fd); + fcntl(fd, F_SETFL, fd_arg); + } +} + static int raw_pread(BlockDriverState *bs, int64_t offset, uint8_t *buf, int count) { BDRVRawState *s = bs->opaque; + long fd_arg = 0; int ret; ret = fd_open(bs); if (ret < 0) return ret; + if ((count & 0x1FF) || (offset & 0x1FF) || ((long)buf & 0x1FF)) + fd_arg = raw_save_directio(s->fd); + if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) { ++(s->lseek_err_cnt); if(s->lseek_err_cnt <= 10) { @@ -159,6 +182,7 @@ static int raw_pread(BlockDriverState *b s->fd, bs->filename, offset, buf, count, bs->total_sectors, errno, strerror(errno)); } + raw_restore_directio(s->fd, fd_arg); return -1; } s->lseek_err_cnt=0; @@ -190,6 +214,7 @@ static int raw_pread(BlockDriverState *b } label__raw_read__success: + raw_restore_directio(s->fd, fd_arg); return ret; } @@ -198,12 +223,16 @@ static int raw_pwrite(BlockDriverState * const uint8_t *buf, int count) { BDRVRawState *s = bs->opaque; + long fd_arg = 0; int ret; ret = fd_open(bs); if (ret < 0) return ret; + if ((count & 0x1FF) || (offset & 0x1FF) || ((long)buf & 0x1FF)) + fd_arg = raw_save_directio(s->fd); + if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) { ++(s->lseek_err_cnt); if(s->lseek_err_cnt) { @@ -212,6 +241,7 @@ static int raw_pwrite(BlockDriverState * s->fd, bs->filename, offset, buf, count, bs->total_sectors, errno, strerror(errno)); } + raw_restore_directio(s->fd, fd_arg); return -1; } s->lseek_err_cnt = 0; @@ -226,6 +256,7 @@ static int raw_pwrite(BlockDriverState * bs->total_sectors, ret, errno, strerror(errno)); label__raw_write__success: + raw_restore_directio(s->fd, fd_arg); return ret; } Index: qemu/vl.c =================================================================== --- qemu.orig/vl.c 2008-01-22 10:12:20.000000000 +0100 +++ qemu/vl.c 2008-01-22 10:12:30.000000000 +0100 @@ -5593,7 +5593,7 @@ struct QEMUFile { when reading */ int buf_index; int buf_size; /* 0 when writing */ - uint8_t buf[IO_BUF_SIZE]; + uint8_t *buf; }; QEMUFile *qemu_fopen(const char *filename, const char *mode) @@ -5629,6 +5629,12 @@ static QEMUFile *qemu_fopen_bdrv(BlockDr f = qemu_mallocz(sizeof(QEMUFile)); if (!f) return NULL; + f->buf = qemu_memalign(512, IO_BUF_SIZE); + if (f->buf == NULL) { + qemu_free(f); + return NULL; + } + memset(f->buf, 0, IO_BUF_SIZE); f->is_file = 0; f->bs = bs; f->is_writable = is_writable; @@ -5682,6 +5688,7 @@ void qemu_fclose(QEMUFile *f) if (f->is_file) { fclose(f->outfile); } + qemu_free(f->buf); qemu_free(f); } @@ -7545,7 +7552,7 @@ static void help(int exitcode) "-hdc/-hdd file use 'file' as IDE hard disk 2/3 image\n" "-cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master)\n" "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]\n" - " [,cyls=c,heads=h,secs=s[,trans=t]][snapshot=on|off]" + " [,cyls=c,heads=h,secs=s[,trans=t]][snapshot=on|off]\n" " [,cache=on|off]\n" " use 'file' as a drive image\n" "-mtdblock file use 'file' as on-board Flash memory image\n" Index: qemu/block-qcow2.c =================================================================== --- qemu.orig/block-qcow2.c 2008-01-22 10:12:20.000000000 +0100 +++ qemu/block-qcow2.c 2008-01-22 10:12:30.000000000 +0100 @@ -26,6 +26,7 @@ #include #include "aes.h" #include +#include "osdep.h" /* Differences with QCOW: @@ -246,7 +247,7 @@ static int qcow_open(BlockDriverState *b if (s->l1_size < s->l1_vm_state_index) goto fail; s->l1_table_offset = header.l1_table_offset; - s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t)); + s->l1_table = qemu_memalign(512, s->l1_size * sizeof(uint64_t)); if (!s->l1_table) goto fail; if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) != @@ -256,14 +257,14 @@ static int qcow_open(BlockDriverState *b be64_to_cpus(&s->l1_table[i]); } /* alloc L2 cache */ - s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); + s->l2_cache = qemu_memalign(512, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); if (!s->l2_cache) goto fail; s->cluster_cache = qemu_malloc(s->cluster_size); if (!s->cluster_cache) goto fail; /* one more sector for decompressed data alignment */ - s->cluster_data = qemu_malloc(s->cluster_size + 512); + s->cluster_data = qemu_memalign(512, s->cluster_size + 512); if (!s->cluster_data) goto fail; s->cluster_cache_offset = -1; @@ -444,9 +445,10 @@ static int grow_l1_table(BlockDriverStat #endif new_l1_size2 = sizeof(uint64_t) * new_l1_size; - new_l1_table = qemu_mallocz(new_l1_size2); + new_l1_table = qemu_memalign(512, new_l1_size2); if (!new_l1_table) return -ENOMEM; + memset(new_l1_table, 0, new_l1_size2); memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t)); /* write new table (align to cluster) */ @@ -893,7 +895,6 @@ static QCowAIOCB *qcow_aio_setup(BlockDr BlockDriverCompletionFunc *cb, void *opaque) { QCowAIOCB *acb; - acb = qemu_aio_get(bs, cb, opaque); if (!acb) return NULL; @@ -962,11 +963,12 @@ static void qcow_aio_write_cb(void *opaq } if (s->crypt_method) { if (!acb->cluster_data) { - acb->cluster_data = qemu_mallocz(s->cluster_size); + acb->cluster_data = qemu_memalign(512, s->cluster_size); if (!acb->cluster_data) { ret = -ENOMEM; goto fail; } + memset(acb->cluster_data, 0, s->cluster_size); } encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf, acb->n, 1, &s->aes_encrypt_key); @@ -1090,12 +1092,14 @@ static int qcow_create(const char *filen header.l1_size = cpu_to_be32(l1_size); offset += align_offset(l1_size * sizeof(uint64_t), s->cluster_size); - s->refcount_table = qemu_mallocz(s->cluster_size); + s->refcount_table = qemu_memalign(512, s->cluster_size); if (!s->refcount_table) goto fail; - s->refcount_block = qemu_mallocz(s->cluster_size); + memset(s->refcount_table, 0, s->cluster_size); + s->refcount_block = qemu_memalign(512, s->cluster_size); if (!s->refcount_block) goto fail; + memset(s->refcount_block, 0, s->cluster_size); s->refcount_table_offset = offset; header.refcount_table_offset = cpu_to_be64(offset); @@ -1182,7 +1186,8 @@ static int qcow_write_compressed(BlockDr if (nb_sectors != s->cluster_sectors) return -EINVAL; - out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); + out_buf = qemu_memalign(512, + s->cluster_size + (s->cluster_size / 1000) + 128); if (!out_buf) return -ENOMEM; @@ -1264,7 +1269,7 @@ static int update_snapshot_refcount(Bloc l1_size2 = l1_size * sizeof(uint64_t); l1_allocated = 0; if (l1_table_offset != s->l1_table_offset) { - l1_table = qemu_malloc(l1_size2); + l1_table = qemu_memalign(512, l1_size2); if (!l1_table) goto fail; l1_allocated = 1; @@ -1280,7 +1285,7 @@ static int update_snapshot_refcount(Bloc } l2_size = s->l2_size * sizeof(uint64_t); - l2_table = qemu_malloc(l2_size); + l2_table = qemu_memalign(512, l2_size); if (!l2_table) goto fail; l1_modified = 0; @@ -1583,7 +1588,7 @@ static int qcow_snapshot_create(BlockDri sn->l1_table_offset = alloc_clusters(bs, s->l1_size * sizeof(uint64_t)); sn->l1_size = s->l1_size; - l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t)); + l1_table = qemu_memalign(512, s->l1_size * sizeof(uint64_t)); if (!l1_table) goto fail; for(i = 0; i < s->l1_size; i++) { @@ -1732,11 +1737,11 @@ static int refcount_init(BlockDriverStat BDRVQcowState *s = bs->opaque; int ret, refcount_table_size2, i; - s->refcount_block_cache = qemu_malloc(s->cluster_size); + s->refcount_block_cache = qemu_memalign(512, s->cluster_size); if (!s->refcount_block_cache) goto fail; refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t); - s->refcount_table = qemu_malloc(refcount_table_size2); + s->refcount_table = qemu_memalign(512, refcount_table_size2); if (!s->refcount_table) goto fail; if (s->refcount_table_size > 0) { @@ -1909,9 +1914,10 @@ static int grow_refcount_table(BlockDriv new_table_size); #endif new_table_size2 = new_table_size * sizeof(uint64_t); - new_table = qemu_mallocz(new_table_size2); + new_table = qemu_memalign(512, new_table_size2); if (!new_table) return -ENOMEM; + memset(new_table, 0, new_table_size2); memcpy(new_table, s->refcount_table, s->refcount_table_size * sizeof(uint64_t)); for(i = 0; i < s->refcount_table_size; i++) @@ -2078,7 +2084,7 @@ static int check_refcounts_l1(BlockDrive inc_refcounts(bs, refcount_table, refcount_table_size, l1_table_offset, l1_size2); - l1_table = qemu_malloc(l1_size2); + l1_table = qemu_memalign(512, l1_size2); if (!l1_table) goto fail; if (bdrv_pread(s->hd, l1_table_offset, @@ -2088,7 +2094,7 @@ static int check_refcounts_l1(BlockDrive be64_to_cpus(&l1_table[i]); l2_size = s->l2_size * sizeof(uint64_t); - l2_table = qemu_malloc(l2_size); + l2_table = qemu_memalign(512, l2_size); if (!l2_table) goto fail; for(i = 0; i < l1_size; i++) { Index: qemu/qemu-img.c =================================================================== --- qemu.orig/qemu-img.c 2008-01-22 10:12:20.000000000 +0100 +++ qemu/qemu-img.c 2008-01-22 10:12:30.000000000 +0100 @@ -55,6 +55,22 @@ void *qemu_mallocz(size_t size) return ptr; } +void *qemu_memalign(size_t alignment, size_t size) +{ +#if defined(_POSIX_C_SOURCE) + int ret; + void *ptr; + ret = posix_memalign(&ptr, alignment, size); + if (ret != 0) + return NULL; + return ptr; +#elif defined(_BSD) + return valloc(size); +#else + return memalign(alignment, size); +#endif +} + char *qemu_strdup(const char *str) { char *ptr; Index: qemu/block.c =================================================================== --- qemu.orig/block.c 2008-01-22 10:12:20.000000000 +0100 +++ qemu/block.c 2008-01-22 11:12:18.000000000 +0100 @@ -459,7 +459,10 @@ int bdrv_commit(BlockDriverState *bs) BlockDriver *drv = bs->drv; int64_t i, total_sectors; int n, j; - unsigned char sector[512]; + unsigned char tmp_sector[SECTOR_SIZE + 0x1FF]; + /* align on 512 bytes boundary for O_DIRECT */ + unsigned char *sector = (uint8_t*) + (((unsigned long)tmp_sector + 0x1FF) & ~0x1FF); if (!drv) return -ENOMEDIUM; @@ -569,7 +572,9 @@ int bdrv_write(BlockDriverState *bs, int static int bdrv_pread_em(BlockDriverState *bs, int64_t offset, uint8_t *buf, int count1) { - uint8_t tmp_buf[SECTOR_SIZE]; + uint8_t align_buf[SECTOR_SIZE + 0x1FF]; + /* align on 512 bytes boundary for O_DIRECT */ + uint8_t *tmp_buf = (uint8_t*)(((unsigned long)align_buf + 0x1FF) & ~0x1FF); int len, nb_sectors, count; int64_t sector_num; @@ -592,11 +597,14 @@ static int bdrv_pread_em(BlockDriverStat /* read the sectors "in place" */ nb_sectors = count >> SECTOR_BITS; - if (nb_sectors > 0) { - if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0) + while (nb_sectors > 0) { + if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0) return -EIO; - sector_num += nb_sectors; - len = nb_sectors << SECTOR_BITS; + /* alignment needed by O_DIRECT */ + memcpy(buf, tmp_buf, SECTOR_SIZE); + nb_sectors--; + sector_num += 1; + len = 1 << SECTOR_BITS; buf += len; count -= len; } @@ -613,7 +621,9 @@ static int bdrv_pread_em(BlockDriverStat static int bdrv_pwrite_em(BlockDriverState *bs, int64_t offset, const uint8_t *buf, int count1) { - uint8_t tmp_buf[SECTOR_SIZE]; + uint8_t align_buf[SECTOR_SIZE + 0x1FF]; + /* align on 512 bytes boundary for O_DIRECT */ + uint8_t *tmp_buf = (uint8_t*)(((unsigned long)align_buf + 0x1FF) & ~0x1FF); int len, nb_sectors, count; int64_t sector_num; @@ -638,11 +648,14 @@ static int bdrv_pwrite_em(BlockDriverSta /* write the sectors "in place" */ nb_sectors = count >> SECTOR_BITS; - if (nb_sectors > 0) { - if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0) + while (nb_sectors > 0) { + /* alignment needed by O_DIRECT */ + memcpy(tmp_buf, buf, SECTOR_SIZE); + if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0) return -EIO; - sector_num += nb_sectors; - len = nb_sectors << SECTOR_BITS; + nb_sectors--; + sector_num += 1; + len = 1 << SECTOR_BITS; buf += len; count -= len; }