From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from list by monty-python.gnu.org with tmda-scanned (Exim 4.20) id 19XbVu-0002K4-Pz for qemu-devel@nongnu.org; Wed, 02 Jul 2003 02:54:46 -0400 Received: from mail by monty-python.gnu.org with spam-scanned (Exim 4.20) id 19XbVU-0001ea-Qq for qemu-devel@nongnu.org; Wed, 02 Jul 2003 02:54:26 -0400 Received: from dp.samba.org ([66.70.73.150] helo=lists.samba.org) by monty-python.gnu.org with esmtp (Exim 4.20) id 19XbSd-0000K0-55 for qemu-devel@nongnu.org; Wed, 02 Jul 2003 02:51:23 -0400 From: Rusty Russell Date: Wed, 02 Jul 2003 16:50:41 +1000 Sender: rusty@bach.samba.org Message-Id: <20030702065122.0DAD72C0D3@lists.samba.org> Subject: [Qemu-devel] [PATCH] Snapshot block device support Reply-To: qemu-devel@nongnu.org List-Id: List-Help: List-Post: List-Subscribe: , List-Archive: List-Unsubscribe: , To: Fabrice Bellard Cc: qemu-devel@nongnu.org Hi Fabrice, I haven't got the IDE emulation to work for me yet (is it supposed to yet?), but this allows the "-snapshot" option and "C-a s" to commit the disks. The blocks which change are committed to backing store, and a bitmap of changed blocks is kept. Diff + new test file below. Rusty. -- Anyone who quotes me in their sig is an idiot. -- Rusty Russell. tests/test_block.c: ================ #define _GNU_SOURCE /* For lseek64 */ #include "../block.c" #define NUM_SECTORS 2560 static int memisset(const void *mem, int c, size_t len) { size_t i; for (i = 0; i < len; i++) if (((unsigned char *)mem)[i] != (unsigned char)c) return 0; return 1; } static int read_test(BlockDriverState *bs, unsigned char contents[]) { int i; unsigned char sector[512]; unsigned char ten_sectors[5120]; /* Single read test. */ for (i = 0; i < NUM_SECTORS; i++) { if (bdrv_read(bs, i, sector, 1) != 0) return 0; if (!memisset(sector, contents[i], 512)) return 0; } /* Multiple read test. */ for (i = 0; i < NUM_SECTORS - 10; i++) { int j; if (bdrv_read(bs, i, ten_sectors, 10) != 0) return 0; for (j = 0; j < 10; j++) if (!memisset(ten_sectors + j*512, contents[i+j], 512)) return 0; } return 1; } int main(int argc, char *argv[]) { int fd, i; unsigned char sector[512]; unsigned char ten_sectors[5120]; unsigned char contents[NUM_SECTORS]; BlockDriverState *bs; fd = open(argv[1], O_RDWR|O_CREAT|O_TRUNC, 0600); for (i = 0; i < NUM_SECTORS; i++) { memset(sector, i, sizeof(sector)); if (write(fd, sector, sizeof(sector)) != sizeof(sector)) abort(); contents[i] = i; } close(fd); bs = bdrv_open(argv[1], argv[2] ? 1 : 0); /* Test data is as we expect. */ if (!read_test(bs, contents)) abort(); /* Single write test. */ memset(sector, 2, sizeof(sector)); if (bdrv_write(bs, 0, sector, 1) != 0) abort(); contents[0] = 2; if (!read_test(bs, contents)) abort(); memset(sector, 0, sizeof(sector)); if (bdrv_read(bs, 0, sector, 1) != 0) abort(); if (!memisset(sector, 2, sizeof(sector))) abort(); /* Random test */ for (i = 0; i < 10000; i++) { int j; int num_sectors = (random() % 10) + 1; int sect_start = random() % (NUM_SECTORS - num_sectors); if (random() % 2) { if (bdrv_read(bs,sect_start,ten_sectors,num_sectors)) abort(); for (j = 0; j < num_sectors; j++) if (!memisset(ten_sectors + j*512, contents[sect_start + j], 512)) abort(); } else { for (j = 0; j < num_sectors; j++) { contents[sect_start + j] = random(); memset(ten_sectors + j*512, contents[sect_start + j], 512); } if (bdrv_write(bs,sect_start,ten_sectors,num_sectors)) abort(); } } if (!read_test(bs, contents)) abort(); if (argv[2]) { /* Test that it hasn't touched initial file. */ fd = open(argv[1], O_RDONLY); if (fd < 0) abort(); for (i = 0; i < NUM_SECTORS; i++) { if (read(fd, sector, sizeof(sector)) != sizeof(sector)) abort(); if (!memisset(sector, i, sizeof(sector))) abort(); } close(fd); /* Test that commit works. */ bdrv_commit(bs); fd = open(argv[1], O_RDONLY); if (fd < 0) abort(); for (i = 0; i < NUM_SECTORS; i++) { if (read(fd, sector, sizeof(sector)) != sizeof(sector)) abort(); if (!memisset(sector, contents[i], sizeof(sector))) abort(); } close(fd); } printf("All tests on %s%s passed!\n", argv[1], argv[2] ? " (with undo)" : ""); return 0; } ================ Index: block.c =================================================================== RCS file: /cvsroot/qemu/qemu/block.c,v retrieving revision 1.2 diff -u -r1.2 block.c --- block.c 30 Jun 2003 23:17:31 -0000 1.2 +++ block.c 2 Jul 2003 06:45:08 -0000 @@ -45,9 +45,14 @@ int fd; int64_t total_sectors; int read_only; + const char *filename; + + /* If snapshot set, this is nonnull and these are used. */ + unsigned char *changes_map; + int changes_fd; }; -BlockDriverState *bdrv_open(const char *filename) +BlockDriverState *bdrv_open(const char *filename, int snapshot) { BlockDriverState *bs; int fd; @@ -56,12 +61,12 @@ bs = malloc(sizeof(BlockDriverState)); if(!bs) return NULL; + bs->read_only = 0; fd = open(filename, O_RDWR); if (fd < 0) { fd = open(filename, O_RDONLY); if (fd < 0) { - close(fd); free(bs); return NULL; } @@ -70,44 +75,176 @@ size = lseek64(fd, 0, SEEK_END); bs->total_sectors = size / 512; bs->fd = fd; + bs->filename = filename; + + if (snapshot) { + /* Lazy paging in of /dev/zero for changes bitmap. */ + int dev_zero; + char template[] = "/tmp/vl.XXXXXX"; + + dev_zero = open("/dev/zero", O_RDONLY); + if (dev_zero < 0) { + close(fd); + free(bs); + return NULL; + } + + bs->changes_map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE, + dev_zero, 0); + close(dev_zero); + if (bs->changes_map == MAP_FAILED) { + close(fd); + free(bs); + return NULL; + } + + /* Now, create a (sparse) temporary file for backing blocks. */ + bs->changes_fd = mkstemp(template); + if (bs->changes_fd < 0) { + munmap(bs->changes_map, bs->total_sectors * 512); + close(fd); + free(bs); + return NULL; + } + /* Delete it. */ + unlink(template); + } else { + bs->changes_map = NULL; + } + return bs; } void bdrv_close(BlockDriverState *bs) { close(bs->fd); + if (bs->changes_map) { + munmap(bs->changes_map, bs->total_sectors * 512); + close(bs->changes_fd); + } free(bs); } +static inline void set_bit(unsigned char *bitmap, int64_t bitnum) +{ + bitmap[bitnum / 8] |= (1 << (bitnum%8)); +} + +static inline int is_bit_set(const unsigned char *bitmap, int64_t bitnum) +{ + return !!(bitmap[bitnum / 8] & (1 << (bitnum%8))); +} + +void bdrv_commit(BlockDriverState *bs) +{ + int64_t i; + unsigned char *changes_map; + + if (!bs->changes_map) { + fprintf(stderr, "Already committing to %s\n", bs->filename); + return; + } + + if (bs->read_only) { + fprintf(stderr, "Can't commit to %s: read-only\n", bs->filename); + return; + } + + changes_map = bs->changes_map; + for (i = 0; i < bs->total_sectors; i++) { + if (is_bit_set(changes_map, i)) { + unsigned char sector[512]; + if (bdrv_read(bs, i, sector, 1) != 0) { + fprintf(stderr, "Error reading sector %lli: aborting commit\n", + (long long)i); + return; + } + + /* Make bdrv_write write to real file for a moment. */ + bs->changes_map = NULL; + if (bdrv_write(bs, i, sector, 1) != 0) { + fprintf(stderr, "Error writing sector %lli: aborting commit\n", + (long long)i); + bs->changes_map = changes_map; + return; + } + bs->changes_map = changes_map; + } + } + fprintf(stderr, "Committed snapshot to %s\n", bs->filename); +} + +/* Return true if first block has been changed (ie. current version is + * in backing store). Set the number of continuous blocks for which + * that is true. */ +static int is_changed(const unsigned char *bitmap, + int64_t sector_num, int nb_sectors, + int *num_same) +{ + int changed; + + if (!bitmap || nb_sectors == 0) { + *num_same = nb_sectors; + return 0; + } + + changed = is_bit_set(bitmap, sector_num); + for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) { + if (is_bit_set(bitmap, sector_num + *num_same) != changed) + break; + } + + return changed; +} + /* return -1 if error */ int bdrv_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors) { - int ret; + int ret, num_same, fd; - lseek64(bs->fd, sector_num * 512, SEEK_SET); - ret = read(bs->fd, buf, nb_sectors * 512); - if (ret != nb_sectors * 512) + fd = bs->fd; + if (is_changed(bs->changes_map, sector_num, nb_sectors, &num_same)) + fd = bs->changes_fd; + + lseek64(fd, sector_num * 512, SEEK_SET); + ret = read(fd, buf, num_same * 512); + if (ret != num_same * 512) { + fprintf(stderr, "Block: Failed to read %i sectors at %lli\n", + num_same, (long long)sector_num); return -1; - else - return 0; + } + + /* Recurse to do rest of blocks. */ + if (num_same < nb_sectors) + return bdrv_read(bs, sector_num + num_same, buf + 512 * num_same, + nb_sectors - num_same); + return 0; } /* return -1 if error */ int bdrv_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { - int ret; + int ret, fd, i; - if (bs->read_only) - return -1; + fd = bs->fd; + + if (bs->changes_map) + fd = bs->changes_fd; + else if (bs->read_only) + return -1; - lseek64(bs->fd, sector_num * 512, SEEK_SET); - ret = write(bs->fd, buf, nb_sectors * 512); + lseek64(fd, sector_num * 512, SEEK_SET); + ret = write(fd, buf, nb_sectors * 512); if (ret != nb_sectors * 512) return -1; - else - return 0; + + if (bs->changes_map) + for (i = 0; i < nb_sectors; i++) + set_bit(bs->changes_map, sector_num + i); + + return 0; } void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr) Index: vl.c =================================================================== RCS file: /cvsroot/qemu/qemu/vl.c,v retrieving revision 1.10 diff -u -r1.10 vl.c --- vl.c 1 Jul 2003 16:27:45 -0000 1.10 +++ vl.c 2 Jul 2003 06:45:09 -0000 @@ -52,6 +52,7 @@ #define DEBUG_LOGFILE "/tmp/vl.log" #define DEFAULT_NETWORK_SCRIPT "/etc/vl-ifup" +#define MAX_DISKS 2 //#define DEBUG_UNUSED_IOPORT //#define DEBUG_IRQ_LATENCY @@ -63,6 +64,8 @@ #define INITRD_LOAD_ADDR 0x00400000 #define KERNEL_PARAMS_ADDR 0x00090000 +BlockDriverState *bs_table[MAX_DISKS]; + /* from plex86 (BSD license) */ struct __attribute__ ((packed)) linux_params { // For 0x00..0x3f, see 'struct screen_info' in linux/include/linux/tty.h. @@ -1265,6 +1268,7 @@ printf("\n" "C-a h print this help\n" "C-a x exit emulatior\n" + "C-a s save disk data back to file (if -snapshot)\n" "C-a b send break (magic sysrq)\n" "C-a C-a send C-a\n" ); @@ -1282,6 +1286,14 @@ case 'x': exit(0); break; + case 's': { + int i; + + for (i = 0; i < MAX_DISKS; i++) + if (bs_table[i]) + bdrv_commit(bs_table[i]); + break; + } case 'b': /* send break */ s->rbr = 0; @@ -1976,8 +1988,6 @@ /* set to 1 set disable mult support */ #define MAX_MULT_SECTORS 8 -#define MAX_DISKS 2 - struct IDEState; typedef void EndTransferFunc(struct IDEState *); @@ -2009,7 +2019,6 @@ uint8_t io_buffer[MAX_MULT_SECTORS*512 + 4]; } IDEState; -BlockDriverState *bs_table[MAX_DISKS]; IDEState ide_state[MAX_DISKS]; static void padstr(char *str, const char *src, int len) @@ -2577,6 +2586,7 @@ "-initrd file use 'file' as initial ram disk\n" "-hda file use 'file' as hard disk 0 image\n" "-hdb file use 'file' as hard disk 1 image\n" + "-snapshot write to temporary files instead of disk files\n" "-m megs set virtual RAM size to megs MB\n" "-n script set network init script [default=%s]\n" "\n" @@ -2595,12 +2605,13 @@ { "initrd", 1, NULL, 0, }, { "hda", 1, NULL, 0, }, { "hdb", 1, NULL, 0, }, + { "snapshot", 0, NULL, 0, }, { NULL, 0, NULL, 0 }, }; int main(int argc, char **argv) { - int c, ret, initrd_size, i, use_gdbstub, gdbstub_port, long_index; + int c, ret, initrd_size, i, use_gdbstub, gdbstub_port, snapshot, long_index; struct linux_params *params; struct sigaction act; struct itimerval itv; @@ -2617,6 +2628,7 @@ pstrcpy(network_script, sizeof(network_script), DEFAULT_NETWORK_SCRIPT); use_gdbstub = 0; gdbstub_port = DEFAULT_GDBSTUB_PORT; + snapshot = 0; for(;;) { c = getopt_long_only(argc, argv, "hm:dn:sp:", long_options, &long_index); if (c == -1) @@ -2633,6 +2645,9 @@ case 2: hd_filename[1] = optarg; break; + case 3: + snapshot = 1; + break; } break; case 'h': @@ -2679,7 +2694,7 @@ /* open the virtual block devices */ for(i = 0; i < MAX_DISKS; i++) { if (hd_filename[i]) { - bs_table[i] = bdrv_open(hd_filename[i]); + bs_table[i] = bdrv_open(hd_filename[i], snapshot); if (!bs_table[i]) { fprintf(stderr, "vl: could not open hard disk image '%s\n", hd_filename[i]); Index: vl.h =================================================================== RCS file: /cvsroot/qemu/qemu/vl.h,v retrieving revision 1.1 diff -u -r1.1 vl.h --- vl.h 30 Jun 2003 10:03:06 -0000 1.1 +++ vl.h 2 Jul 2003 06:45:09 -0000 @@ -27,13 +27,13 @@ /* block.c */ typedef struct BlockDriverState BlockDriverState; -BlockDriverState *bdrv_open(const char *filename); +BlockDriverState *bdrv_open(const char *filename, int snapshot); void bdrv_close(BlockDriverState *bs); int bdrv_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors); int bdrv_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr); - +void bdrv_commit(BlockDriverState *bs); #endif /* VL_H */ Index: tests/Makefile =================================================================== RCS file: /cvsroot/qemu/qemu/tests/Makefile,v retrieving revision 1.21 diff -u -r1.21 Makefile --- tests/Makefile 15 Jun 2003 20:42:31 -0000 1.21 +++ tests/Makefile 2 Jul 2003 06:45:10 -0000 @@ -6,7 +6,7 @@ ifeq ($(ARCH),i386) TESTS=testclone testsig testthread sha1-i386 test-i386 runcom endif -TESTS+=sha1 test_path +TESTS+=sha1 test_path test_block QEMU=../qemu @@ -28,6 +28,12 @@ test_path: test_path.c $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< ./$@ || { rm $@; exit 1; } + +test_block: test_block.c + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< + ./$@ test_block_data || { rm -f $@ test_block_data; exit 1; } + ./$@ test_block_data undo || { rm -f $@ test_block_data; exit 1; } + @rm -f test_block_data # i386 emulation test (test various opcodes) */ test-i386: test-i386.c test-i386-code16.S test-i386-vm86.S \