qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Rusty Russell <rusty@rustcorp.com.au>
To: Fabrice Bellard <fabrice.bellard@free.fr>
Cc: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH] Snapshot block device support
Date: Wed, 02 Jul 2003 16:50:41 +1000	[thread overview]
Message-ID: <20030702065122.0DAD72C0D3@lists.samba.org> (raw)

Hi Fabrice,

	I haven't got the IDE emulation to work for me yet (is it
supposed to yet?), but this allows the "-snapshot" option and "C-a s"
to commit the disks.  The blocks which change are committed to backing
store, and a bitmap of changed blocks is kept.

Diff + new test file below.
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

tests/test_block.c:
================
#define _GNU_SOURCE /* For lseek64 */
#include "../block.c"

#define NUM_SECTORS 2560

static int memisset(const void *mem, int c, size_t len)
{
	size_t i;

	for (i = 0; i < len; i++)
		if (((unsigned char *)mem)[i] != (unsigned char)c)
			return 0;
	return 1;
}

static int read_test(BlockDriverState *bs, unsigned char contents[])
{
	int i;
	unsigned char sector[512];
	unsigned char ten_sectors[5120];

	/* Single read test. */
	for (i = 0; i < NUM_SECTORS; i++) {
		if (bdrv_read(bs, i, sector, 1) != 0)
			return 0;
		if (!memisset(sector, contents[i], 512))
			return 0;
	}

	/* Multiple read test. */
	for (i = 0; i < NUM_SECTORS - 10; i++) {
		int j;
		if (bdrv_read(bs, i, ten_sectors, 10) != 0)
			return 0;

		for (j = 0; j < 10; j++)
			if (!memisset(ten_sectors + j*512, contents[i+j], 512))
				return 0;
	}
	return 1;
}

int main(int argc, char *argv[])
{
	int fd, i;
	unsigned char sector[512];
	unsigned char ten_sectors[5120];
	unsigned char contents[NUM_SECTORS];
	BlockDriverState *bs;

	fd = open(argv[1], O_RDWR|O_CREAT|O_TRUNC, 0600);
	for (i = 0; i < NUM_SECTORS; i++) {
		memset(sector, i, sizeof(sector));
		if (write(fd, sector, sizeof(sector)) != sizeof(sector))
			abort();
		contents[i] = i;
	}
	close(fd);

	bs = bdrv_open(argv[1], argv[2] ? 1 : 0);

	/* Test data is as we expect. */
	if (!read_test(bs, contents))
		abort();

	/* Single write test. */
	memset(sector, 2, sizeof(sector));
	if (bdrv_write(bs, 0, sector, 1) != 0)
		abort();
	contents[0] = 2;
	if (!read_test(bs, contents))
		abort();

	memset(sector, 0, sizeof(sector));
	if (bdrv_read(bs, 0, sector, 1) != 0)
		abort();
	if (!memisset(sector, 2, sizeof(sector)))
		abort();

	/* Random test */
	for (i = 0; i < 10000; i++) {
		int j;
		int num_sectors = (random() % 10) + 1;
		int sect_start = random() % (NUM_SECTORS - num_sectors);

		if (random() % 2) {
			if (bdrv_read(bs,sect_start,ten_sectors,num_sectors))
				abort();
			for (j = 0; j < num_sectors; j++)
				if (!memisset(ten_sectors + j*512,
					      contents[sect_start + j],
					      512))
					abort();
		} else {
			for (j = 0; j < num_sectors; j++) {
				contents[sect_start + j] = random();
				memset(ten_sectors + j*512,
				       contents[sect_start + j],
				       512);
			}
			if (bdrv_write(bs,sect_start,ten_sectors,num_sectors))
				abort();
		}
	}

	if (!read_test(bs, contents))
		abort();

	if (argv[2]) {
		/* Test that it hasn't touched initial file. */
		fd = open(argv[1], O_RDONLY);
		if (fd < 0)
			abort();
		for (i = 0; i < NUM_SECTORS; i++) {
			if (read(fd, sector, sizeof(sector)) != sizeof(sector))
				abort();
			if (!memisset(sector, i, sizeof(sector)))
				abort();
		}
		close(fd);

		/* Test that commit works. */
		bdrv_commit(bs);
		fd = open(argv[1], O_RDONLY);
		if (fd < 0)
			abort();
		for (i = 0; i < NUM_SECTORS; i++) {
			if (read(fd, sector, sizeof(sector)) != sizeof(sector))
				abort();
			if (!memisset(sector, contents[i], sizeof(sector)))
				abort();
		}
		close(fd);
	}

	printf("All tests on %s%s passed!\n", argv[1],
	       argv[2] ? " (with undo)" : "");
	return 0;
}
================
Index: block.c
===================================================================
RCS file: /cvsroot/qemu/qemu/block.c,v
retrieving revision 1.2
diff -u -r1.2 block.c
--- block.c	30 Jun 2003 23:17:31 -0000	1.2
+++ block.c	2 Jul 2003 06:45:08 -0000
@@ -45,9 +45,14 @@
     int fd;
     int64_t total_sectors;
     int read_only;
+    const char *filename;
+
+    /* If snapshot set, this is nonnull and these are used. */
+    unsigned char *changes_map;
+    int changes_fd;
 };
 
-BlockDriverState *bdrv_open(const char *filename)
+BlockDriverState *bdrv_open(const char *filename, int snapshot)
 {
     BlockDriverState *bs;
     int fd;
@@ -56,12 +61,12 @@
     bs = malloc(sizeof(BlockDriverState));
     if(!bs)
         return NULL;
+
     bs->read_only = 0;
     fd = open(filename, O_RDWR);
     if (fd < 0) {
         fd = open(filename, O_RDONLY);
         if (fd < 0) {
-            close(fd);
             free(bs);
             return NULL;
         }
@@ -70,44 +75,176 @@
     size = lseek64(fd, 0, SEEK_END);
     bs->total_sectors = size / 512;
     bs->fd = fd;
+    bs->filename = filename;
+
+    if (snapshot) {
+	/* Lazy paging in of /dev/zero for changes bitmap. */
+	int dev_zero;
+	char template[] = "/tmp/vl.XXXXXX";
+
+	dev_zero = open("/dev/zero", O_RDONLY);
+	if (dev_zero < 0) {
+	    close(fd);
+	    free(bs);
+	    return NULL;
+	}
+
+	bs->changes_map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE,
+			       dev_zero, 0);
+	close(dev_zero);
+	if (bs->changes_map == MAP_FAILED) {
+	    close(fd);
+	    free(bs);
+	    return NULL;
+	}
+
+	/* Now, create a (sparse) temporary file for backing blocks. */
+	bs->changes_fd = mkstemp(template);
+	if (bs->changes_fd < 0) {
+	    munmap(bs->changes_map, bs->total_sectors * 512);
+	    close(fd);
+	    free(bs);
+	    return NULL;
+	}
+	/* Delete it. */
+	unlink(template);
+    } else {
+	bs->changes_map = NULL;
+    }
+
     return bs;
 }
 
 void bdrv_close(BlockDriverState *bs)
 {
     close(bs->fd);
+    if (bs->changes_map) {
+	munmap(bs->changes_map, bs->total_sectors * 512);
+	close(bs->changes_fd);
+    }
     free(bs);
 }
 
+static inline void set_bit(unsigned char *bitmap, int64_t bitnum)
+{
+    bitmap[bitnum / 8] |= (1 << (bitnum%8));
+}
+
+static inline int is_bit_set(const unsigned char *bitmap, int64_t bitnum)
+{
+    return !!(bitmap[bitnum / 8] & (1 << (bitnum%8)));
+}
+
+void bdrv_commit(BlockDriverState *bs)
+{
+    int64_t i;
+    unsigned char *changes_map;
+
+    if (!bs->changes_map) {
+	fprintf(stderr, "Already committing to %s\n", bs->filename);
+	return;
+    }
+
+    if (bs->read_only) {
+	fprintf(stderr, "Can't commit to %s: read-only\n", bs->filename);
+	return;
+    }
+
+    changes_map = bs->changes_map;
+    for (i = 0; i < bs->total_sectors; i++) {
+	if (is_bit_set(changes_map, i)) {
+	    unsigned char sector[512];
+	    if (bdrv_read(bs, i, sector, 1) != 0) {
+		fprintf(stderr, "Error reading sector %lli: aborting commit\n",
+			(long long)i);
+		return;
+	    }
+
+	    /* Make bdrv_write write to real file for a moment. */
+	    bs->changes_map = NULL;
+	    if (bdrv_write(bs, i, sector, 1) != 0) {
+		fprintf(stderr, "Error writing sector %lli: aborting commit\n",
+			(long long)i);
+		bs->changes_map = changes_map;
+		return;
+	    }
+	    bs->changes_map = changes_map;
+	}
+    }
+    fprintf(stderr, "Committed snapshot to %s\n", bs->filename);
+}
+    
+/* Return true if first block has been changed (ie. current version is
+ * in backing store).  Set the number of continuous blocks for which
+ * that is true. */
+static int is_changed(const unsigned char *bitmap,
+		      int64_t sector_num, int nb_sectors,
+		      int *num_same)
+{
+    int changed;
+
+    if (!bitmap || nb_sectors == 0) {
+	*num_same = nb_sectors;
+	return 0;
+    }
+
+    changed = is_bit_set(bitmap, sector_num);
+    for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
+	if (is_bit_set(bitmap, sector_num + *num_same) != changed)
+	    break;
+    }
+
+    return changed;
+}
+    
 /* return -1 if error */
 int bdrv_read(BlockDriverState *bs, int64_t sector_num, 
               uint8_t *buf, int nb_sectors)
 {
-    int ret;
+    int ret, num_same, fd;
 
-    lseek64(bs->fd, sector_num * 512, SEEK_SET);
-    ret = read(bs->fd, buf, nb_sectors * 512);
-    if (ret != nb_sectors * 512)
+    fd = bs->fd;
+    if (is_changed(bs->changes_map, sector_num, nb_sectors, &num_same))
+	fd = bs->changes_fd;
+
+    lseek64(fd, sector_num * 512, SEEK_SET);
+    ret = read(fd, buf, num_same * 512);
+    if (ret != num_same * 512) {
+	fprintf(stderr, "Block: Failed to read %i sectors at %lli\n",
+		num_same, (long long)sector_num);
         return -1;
-    else
-        return 0;
+    }
+
+    /* Recurse to do rest of blocks. */
+    if (num_same < nb_sectors)
+	return bdrv_read(bs, sector_num + num_same, buf + 512 * num_same,
+			 nb_sectors - num_same);
+    return 0;
 }
 
 /* return -1 if error */
 int bdrv_write(BlockDriverState *bs, int64_t sector_num, 
                const uint8_t *buf, int nb_sectors)
 {
-    int ret;
+    int ret, fd, i;
 
-    if (bs->read_only)
-        return -1;
+    fd = bs->fd;
+
+    if (bs->changes_map)
+	fd = bs->changes_fd;
+    else if (bs->read_only)
+	return -1;
 
-    lseek64(bs->fd, sector_num * 512, SEEK_SET);
-    ret = write(bs->fd, buf, nb_sectors * 512);
+    lseek64(fd, sector_num * 512, SEEK_SET);
+    ret = write(fd, buf, nb_sectors * 512);
     if (ret != nb_sectors * 512)
         return -1;
-    else
-        return 0;
+
+    if (bs->changes_map)
+	for (i = 0; i < nb_sectors; i++)
+	    set_bit(bs->changes_map, sector_num + i);
+
+    return 0;
 }
 
 void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr)
Index: vl.c
===================================================================
RCS file: /cvsroot/qemu/qemu/vl.c,v
retrieving revision 1.10
diff -u -r1.10 vl.c
--- vl.c	1 Jul 2003 16:27:45 -0000	1.10
+++ vl.c	2 Jul 2003 06:45:09 -0000
@@ -52,6 +52,7 @@
 
 #define DEBUG_LOGFILE "/tmp/vl.log"
 #define DEFAULT_NETWORK_SCRIPT "/etc/vl-ifup"
+#define MAX_DISKS 2
 
 //#define DEBUG_UNUSED_IOPORT
 //#define DEBUG_IRQ_LATENCY
@@ -63,6 +64,8 @@
 #define INITRD_LOAD_ADDR   0x00400000
 #define KERNEL_PARAMS_ADDR 0x00090000
 
+BlockDriverState *bs_table[MAX_DISKS];
+
 /* from plex86 (BSD license) */
 struct  __attribute__ ((packed)) linux_params {
   // For 0x00..0x3f, see 'struct screen_info' in linux/include/linux/tty.h.
@@ -1265,6 +1268,7 @@
     printf("\n"
            "C-a h    print this help\n"
            "C-a x    exit emulatior\n"
+	   "C-a s    save disk data back to file (if -snapshot)\n"
            "C-a b    send break (magic sysrq)\n"
            "C-a C-a  send C-a\n"
            );
@@ -1282,6 +1286,14 @@
         case 'x':
             exit(0);
             break;
+	case 's': {
+	    int i;
+
+	    for (i = 0; i < MAX_DISKS; i++)
+		if (bs_table[i])
+		    bdrv_commit(bs_table[i]);
+	    break;
+	    }
         case 'b':
             /* send break */
             s->rbr = 0;
@@ -1976,8 +1988,6 @@
 /* set to 1 set disable mult support */
 #define MAX_MULT_SECTORS 8
 
-#define MAX_DISKS 2
-
 struct IDEState;
 
 typedef void EndTransferFunc(struct IDEState *);
@@ -2009,7 +2019,6 @@
     uint8_t io_buffer[MAX_MULT_SECTORS*512 + 4];
 } IDEState;
 
-BlockDriverState *bs_table[MAX_DISKS];
 IDEState ide_state[MAX_DISKS];
 
 static void padstr(char *str, const char *src, int len)
@@ -2577,6 +2586,7 @@
            "-initrd file   use 'file' as initial ram disk\n"
            "-hda file      use 'file' as hard disk 0 image\n"
            "-hdb file      use 'file' as hard disk 1 image\n"
+	   "-snapshot      write to temporary files instead of disk files\n"
            "-m megs        set virtual RAM size to megs MB\n"
            "-n script      set network init script [default=%s]\n"
            "\n"
@@ -2595,12 +2605,13 @@
     { "initrd", 1, NULL, 0, },
     { "hda", 1, NULL, 0, },
     { "hdb", 1, NULL, 0, },
+    { "snapshot", 0, NULL, 0, },
     { NULL, 0, NULL, 0 },
 };
 
 int main(int argc, char **argv)
 {
-    int c, ret, initrd_size, i, use_gdbstub, gdbstub_port, long_index;
+    int c, ret, initrd_size, i, use_gdbstub, gdbstub_port, snapshot, long_index;
     struct linux_params *params;
     struct sigaction act;
     struct itimerval itv;
@@ -2617,6 +2628,7 @@
     pstrcpy(network_script, sizeof(network_script), DEFAULT_NETWORK_SCRIPT);
     use_gdbstub = 0;
     gdbstub_port = DEFAULT_GDBSTUB_PORT;
+    snapshot = 0;
     for(;;) {
         c = getopt_long_only(argc, argv, "hm:dn:sp:", long_options, &long_index);
         if (c == -1)
@@ -2633,6 +2645,9 @@
             case 2:
                 hd_filename[1] = optarg;
                 break;
+	    case 3:
+		snapshot = 1;
+		break;
             }
             break;
         case 'h':
@@ -2679,7 +2694,7 @@
     /* open the virtual block devices */
     for(i = 0; i < MAX_DISKS; i++) {
         if (hd_filename[i]) {
-            bs_table[i] = bdrv_open(hd_filename[i]);
+            bs_table[i] = bdrv_open(hd_filename[i], snapshot);
             if (!bs_table[i]) {
                 fprintf(stderr, "vl: could not open hard disk image '%s\n",
                         hd_filename[i]);
Index: vl.h
===================================================================
RCS file: /cvsroot/qemu/qemu/vl.h,v
retrieving revision 1.1
diff -u -r1.1 vl.h
--- vl.h	30 Jun 2003 10:03:06 -0000	1.1
+++ vl.h	2 Jul 2003 06:45:09 -0000
@@ -27,13 +27,13 @@
 /* block.c */
 typedef struct BlockDriverState BlockDriverState;
 
-BlockDriverState *bdrv_open(const char *filename);
+BlockDriverState *bdrv_open(const char *filename, int snapshot);
 void bdrv_close(BlockDriverState *bs);
 int bdrv_read(BlockDriverState *bs, int64_t sector_num, 
               uint8_t *buf, int nb_sectors);
 int bdrv_write(BlockDriverState *bs, int64_t sector_num, 
                const uint8_t *buf, int nb_sectors);
 void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr);
-
+void bdrv_commit(BlockDriverState *bs);
 
 #endif /* VL_H */
Index: tests/Makefile
===================================================================
RCS file: /cvsroot/qemu/qemu/tests/Makefile,v
retrieving revision 1.21
diff -u -r1.21 Makefile
--- tests/Makefile	15 Jun 2003 20:42:31 -0000	1.21
+++ tests/Makefile	2 Jul 2003 06:45:10 -0000
@@ -6,7 +6,7 @@
 ifeq ($(ARCH),i386)
 TESTS=testclone testsig testthread sha1-i386 test-i386 runcom
 endif
-TESTS+=sha1 test_path
+TESTS+=sha1 test_path test_block
 
 QEMU=../qemu
 
@@ -28,6 +28,12 @@
 test_path: test_path.c
 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
 	./$@ || { rm $@; exit 1; }
+
+test_block: test_block.c
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+	./$@ test_block_data || { rm -f $@ test_block_data; exit 1; }
+	./$@ test_block_data undo || { rm -f $@ test_block_data; exit 1; }
+	@rm -f test_block_data
 
 # i386 emulation test (test various opcodes) */
 test-i386: test-i386.c test-i386-code16.S test-i386-vm86.S \

             reply	other threads:[~2003-07-02  6:54 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-07-02  6:50 Rusty Russell [this message]
2003-07-02  9:49 ` [Qemu-devel] Re: [PATCH] Snapshot block device support Fabrice Bellard
2003-07-02 10:04   ` Thomas Glanzmann
2003-07-02 23:54   ` Rusty Russell
2003-07-04 14:51     ` Fabrice Bellard
2003-07-06  2:26       ` Rusty Russell
2003-07-06 14:08         ` Fabrice Bellard
2003-07-07  8:15           ` Rusty Russell
2003-07-07 12:51             ` Fabrice Bellard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20030702065122.0DAD72C0D3@lists.samba.org \
    --to=rusty@rustcorp.com.au \
    --cc=fabrice.bellard@free.fr \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).