All of lore.kernel.org
 help / color / mirror / Atom feed
* [Patch 2/3] New blktap implementation
@ 2008-10-31  3:16 Dutch Meyer
  2008-10-31  3:50 ` Stefan de Konink
  2008-10-31  9:59 ` Kevin Wolf
  0 siblings, 2 replies; 5+ messages in thread
From: Dutch Meyer @ 2008-10-31  3:16 UTC (permalink / raw)
  To: xen-devel@lists.xensource.com; +Cc: Keir Fraser

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1695 bytes --]

Patch 2 of 3.

Signed-off-by: Jake Wires <jake.wires@citrix.com>, Dutch Meyer 
<dmeyer@cs.ubc.ca>

This is a new and rewritten version of blktap that we have developed at
Citrix.  The current version of blktap is left functionally unmodified.
The change set consists of three patches.

   0) A patch to deprecate the open source blktap, by moving it and issuing
a warning whenever it is used.  No functionality is modified in this
patch, it is just housekeeping.

   1) A patch to add a new blktap implementation that is feature equivalent
to (or better than) the current open source blktap.  This will eventually
replace the current blktap implementation.

   2) Fix several bugs in the qcow tools.

   3) A kernel patch to add a new unified blktap2 module that will
eventually replace blktap.

The new blktap implementation has several improvements.

   * Isolation from xenstore - Blktap devices can now be created in dom0 as
virtual block devices without coordination from xen and have few
dependencies on xenstore in normal operation.

   * Improved development environment for tapdisks, simpler request
forwarding, new request scheduler.

   * Pause scripts updated to support live qcow snapshot (see xmsnap
script)

   * New tapdisk type: Block Mason disks allow a set of tapdisks to be
flexibly arranged into graph structure and modified on-the-fly.  Several
example modules for Block Mason are included.  Block Mason disks are
constructed and modified with a declarative configuration language.
These capabilities are discussed in more depth in an upcoming paper in the
First Workshop on IO Virtualization, available HERE:
http://www.cs.ubc.ca/~dmeyer/blockmason-wiov-final.pdf

--Dutch

[-- Attachment #2: Type: TEXT/PLAIN, Size: 9870 bytes --]

diff -r 6eb1a5d782bf tools/blktap/drivers/block-qcow.c
--- a/tools/blktap/drivers/block-qcow.c	Thu Oct 30 19:48:08 2008 -0700
+++ b/tools/blktap/drivers/block-qcow.c	Thu Oct 30 19:50:59 2008 -0700
@@ -88,18 +88,8 @@
   int i;
   uint32_t md[4];
 
-  /* Convert L1 table to big endian */
-  for(i = 0; i < len / sizeof(uint64_t); i++) {
-    cpu_to_be64s(&((uint64_t*) ptr)[i]);
-  }
-
   /* Generate checksum */
   gcry_md_hash_buffer(GCRY_MD_MD5, md, ptr, len);
-
-  /* Convert L1 table back to native endianess */
-  for(i = 0; i < len / sizeof(uint64_t); i++) {
-    be64_to_cpus(&((uint64_t*) ptr)[i]);
-  }
 
   return md[0];
 }
@@ -117,21 +107,11 @@
   md = malloc(MD5_DIGEST_LENGTH);
   if(!md) return 0;
 
-  /* Convert L1 table to big endian */
-  for(i = 0; i < len / sizeof(uint64_t); i++) {
-    cpu_to_be64s(&((uint64_t*) ptr)[i]);
-  }
-
   /* Generate checksum */
   if (MD5((unsigned char *)ptr, len, md) != md)
     ret = 0;
   else
     memcpy(&ret, md, sizeof(uint32_t));
-
-  /* Convert L1 table back to native endianess */
-  for(i = 0; i < len / sizeof(uint64_t); i++) {
-    be64_to_cpus(&((uint64_t*) ptr)[i]);
-  }
 
   free(md);
   return ret;
@@ -491,7 +471,6 @@
 
 		/* update the L1 entry */
 		s->l1_table[l1_index] = l2_offset;
-		tmp = cpu_to_be64(l2_offset);
 		
 		/*Truncate file for L2 table 
 		 *(initialised to zero in case we crash)*/
@@ -874,22 +853,6 @@
 		(int) (s->l1_size * sizeof(uint64_t)), 
 		l1_table_size);
 
-	lseek(fd, 0, SEEK_SET);
-	l1_table_block = l1_table_size + s->l1_table_offset;
-	l1_table_block = l1_table_block + 512 - (l1_table_block % 512); 
-	ret = posix_memalign((void **)&buf2, 4096, l1_table_block);
-	if (ret != 0) goto fail;
-	if (read(fd, buf2, l1_table_block) != l1_table_block)
-		goto fail;
-	memcpy(s->l1_table, buf2 + s->l1_table_offset, l1_table_size);
-
-	for(i = 0; i < s->l1_size; i++) {
-		be64_to_cpus(&s->l1_table[i]);
-		//DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]);
-		if (s->l1_table[i] > final_cluster)
-			final_cluster = s->l1_table[i];
-	}
-
 	/* alloc L2 cache */
 	size = s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t);
 	ret = posix_memalign((void **)&s->l2_cache, 4096, size);
@@ -911,32 +874,39 @@
 	
 	/*Detect min_cluster_alloc*/
 	s->min_cluster_alloc = 1; /*Default*/
-	if (s->backing_file_offset == 0 && s->l1_table_offset % 4096 == 0) {
+
+	l1_table_block = l1_table_size + s->l1_table_offset;
+	l1_table_block = (l1_table_block + 511) & ~511;
+	ret = posix_memalign((void **)&buf2, 512, l1_table_block);
+	if (ret != 0) goto fail;
+
+	lseek(fd, 0, SEEK_SET);
+	if (read(fd, buf2, l1_table_block) != l1_table_block)
+		goto fail;
+	memcpy(s->l1_table, buf2 + s->l1_table_offset, l1_table_size);
+
+	if (s->l1_table_offset % 4096 == 0) {
 		/*We test to see if the xen magic # exists*/
-		exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader));
+		exthdr = (QCowHeader_ext *)(buf2 + sizeof(QCowHeader));
 		be32_to_cpus(&exthdr->xmagic);
+		be32_to_cpus(&exthdr->min_cluster_alloc);
+		be32_to_cpus(&exthdr->flags);
+
 		if(exthdr->xmagic != XEN_MAGIC) 
 			goto end_xenhdr;
-    
+		s->extended = 1;
+
 		/* Try to detect old tapdisk images. They have to be fixed because 
-		 * they don't use big endian but native endianess for the L1 table */
+		 * they don't use big endian but native endianess for the L1 table. *
+		 * After this block, the l1 table will definitely be in BIG endian.*/
 		if ((exthdr->flags & EXTHDR_L1_BIG_ENDIAN) == 0) {
 
 			/* 
-			   The image is broken. Fix it. The L1 table has already been 
-			   byte-swapped, so we can write it to the image file as it is
-			   currently in memory. Then swap it back to native endianess
-			   for operation.
+			   The image is broken. Fix it. Swap
+			   everything we've changed, and also the L1
+			   table to big endian for write to image.
 			 */
-
 			DPRINTF("qcow: Converting image to big endian L1 table\n");
-
-			memcpy(buf2 + s->l1_table_offset, s->l1_table, l1_table_size);
-			lseek(fd, 0, SEEK_SET);
-			if (write(fd, buf2, l1_table_block) != l1_table_block) {
-				DPRINTF("qcow: Failed to write new L1 table\n");
-				goto fail;
-			}
 
 			for(i = 0;i < s->l1_size; i++) {
 				cpu_to_be64s(&s->l1_table[i]);
@@ -944,11 +914,22 @@
 
 			/* Write the big endian flag to the extended header */
 			exthdr->flags |= EXTHDR_L1_BIG_ENDIAN;
-
-			if (write(fd, buf, 512) != 512) {
-				DPRINTF("qcow: Failed to write extended header\n");
+			
+			cpu_to_be32s(&exthdr->xmagic);
+			cpu_to_be32s(&exthdr->min_cluster_alloc);
+			cpu_to_be32s(&exthdr->flags);
+			
+			memcpy(buf2 + s->l1_table_offset, s->l1_table, l1_table_size);
+			lseek(fd, 0, SEEK_SET);
+			if (write(fd, buf2, l1_table_block) != l1_table_block) {
+				DPRINTF("qcow: Failed to write new L1 table\n");
 				goto fail;
 			}
+			
+			//Now we swap back to how it was before.
+			be32_to_cpus(&exthdr->xmagic);
+			be32_to_cpus(&exthdr->min_cluster_alloc);
+			be32_to_cpus(&exthdr->flags);
 		}
 
 		/*Finally check the L1 table cksum*/
@@ -958,14 +939,19 @@
 		if(exthdr->cksum != cksum)
 			goto end_xenhdr;
 			
-		be32_to_cpus(&exthdr->min_cluster_alloc);
-		be32_to_cpus(&exthdr->flags);
 		s->sparse = (exthdr->flags & SPARSE_FILE);
 		s->min_cluster_alloc = exthdr->min_cluster_alloc; 
 	}
 
  end_xenhdr:
-	
+	/* convert l1 table to native endian for operation */
+	for(i = 0; i < s->l1_size; i++) {
+		be64_to_cpus(&s->l1_table[i]);
+		//DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]);
+		if (s->l1_table[i] > final_cluster)
+			final_cluster = s->l1_table[i];
+	}
+
 	if (init_aio_state(driver)!=0) {
 	  DPRINTF("Unable to initialise AIO state\n");
 	  free_aio_state(s);
@@ -1127,24 +1113,62 @@
 
 	return;
 }
+
+static int
+tdqcow_update_checksum(struct tdqcow_state *s)
+{
+	int i, fd, err;
+	uint32_t offset, cksum, out;
+
+	if (!s->extended)
+		return 0;
+
+	fd = open(s->name, O_WRONLY | O_LARGEFILE); /* open without O_DIRECT */
+	if (fd == -1) {
+		err = errno;
+		goto out;
+	}
+
+	offset = sizeof(QCowHeader) + offsetof(QCowHeader_ext, cksum);
+	if (lseek(fd, offset, SEEK_SET) == (off_t)-1) {
+		err = errno;
+		goto out;
+	}
+
+	/* convert to big endian for checksum */
+	for (i = 0; i < s->l1_size; i++)
+		cpu_to_be64s(&s->l1_table[i]);
+
+	cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t));
+
+	/* and back again... */
+	for (i = 0; i < s->l1_size; i++)
+		be64_to_cpus(&s->l1_table[i]);
+
+	DPRINTF("Writing cksum: %d", cksum);
+
+	out = cpu_to_be32(cksum);
+	if (write(fd, &out, sizeof(out)) != sizeof(out)) {
+		err = errno;
+		goto out;
+	}
+
+	err = 0;
+
+out:
+	if (err)
+		DPRINTF("failed to update checksum: %d\n", err);
+	if (fd != -1)
+		close(fd);
+	return err;
+}
  		
 int tdqcow_close(td_driver_t *driver)
 {
 	struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
-	uint32_t cksum, out;
-	int fd, offset;
 
 	/*Update the hdr cksum*/
-	if(s->min_cluster_alloc == s->l2_size) {
-		cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t));
-		printf("Writing cksum: %d",cksum);
-		fd = open(s->name, O_WRONLY | O_LARGEFILE); /*Open without O_DIRECT*/
-		offset = sizeof(QCowHeader) + sizeof(uint32_t);
-		lseek(fd, offset, SEEK_SET);
-		out = cpu_to_be32(cksum);
-		if (write(fd, &out, sizeof(uint32_t))) ;
-		close(fd);
-	}
+	tdqcow_update_checksum(s);
 
 	free(s->name);
 	free(s->l1_table);
@@ -1277,6 +1301,7 @@
 	} else
 		flags = SPARSE_FILE;
 
+	exthdr.flags |= EXTHDR_L1_BIG_ENDIAN;
 	exthdr.flags = cpu_to_be32(flags);
 	
 	/* write all the data */
diff -r 6eb1a5d782bf tools/blktap/drivers/img2qcow.c
--- a/tools/blktap/drivers/img2qcow.c	Thu Oct 30 19:48:08 2008 -0700
+++ b/tools/blktap/drivers/img2qcow.c	Thu Oct 30 19:50:59 2008 -0700
@@ -107,6 +107,7 @@
 
 static inline void LOCAL_FD_SET(fd_set *readfds) 
 {
+        FD_ZERO(readfds);
 	FD_SET(server.aio_queue.poll_fd, readfds);
 	maxfds = server.aio_queue.poll_fd;
 	
diff -r 6eb1a5d782bf tools/blktap/drivers/qcow.h
--- a/tools/blktap/drivers/qcow.h	Thu Oct 30 19:48:08 2008 -0700
+++ b/tools/blktap/drivers/qcow.h	Thu Oct 30 19:50:59 2008 -0700
@@ -60,6 +60,7 @@
 	char *name;                    /*Record of the filename*/
 	uint32_t backing_file_size;
 	uint64_t backing_file_offset;
+	uint8_t extended;              /*File contains extended header*/
 	int encrypted;                 /*File contents are encrypted or plain*/
 	int cluster_bits;              /*Determines length of cluster as 
 					*indicated by file hdr*/
@@ -89,8 +90,8 @@
 	uint32_t crypt_method_header;  /**/
 	AES_KEY aes_encrypt_key;       /*AES key*/
 	AES_KEY aes_decrypt_key;       /*AES key*/
+
         /* libaio state */
-
 	int                  aio_free_count;	
 	int                  max_aio_reqs;
 	struct qcow_request   *aio_requests;
diff -r 6eb1a5d782bf tools/blktap/drivers/qcow2raw.c
--- a/tools/blktap/drivers/qcow2raw.c	Thu Oct 30 19:48:08 2008 -0700
+++ b/tools/blktap/drivers/qcow2raw.c	Thu Oct 30 19:50:59 2008 -0700
@@ -112,6 +112,7 @@
 
 static inline void LOCAL_FD_SET(fd_set *readfds) 
 {
+        FD_ZERO(readfds);
 	FD_SET(qcowio_fd[0], readfds);
 	FD_SET(aio_fd[0], readfds);
 	
diff -r 6eb1a5d782bf tools/blktap/drivers/xmsnap
--- a/tools/blktap/drivers/xmsnap	Thu Oct 30 19:48:08 2008 -0700
+++ b/tools/blktap/drivers/xmsnap	Thu Oct 30 19:50:59 2008 -0700
@@ -54,11 +54,19 @@
 # Pause VM
 #
 xm pause $vmid
+if [ "$?" -ne "0" ]; then
+  exit 1
+fi
+
 
 #
 # Snap and reposition the files
 #
 mv $directory/$target $directory/$snap
+if [ "$?" -ne "0" ]; then
+  exit 1
+fi
+
 qcow-create 0 $directory/$target $directory/$snap
 
 #

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2008-10-31 17:54 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-31  3:16 [Patch 2/3] New blktap implementation Dutch Meyer
2008-10-31  3:50 ` Stefan de Konink
2008-10-31  3:52   ` Dutch Meyer
2008-10-31  9:59 ` Kevin Wolf
2008-10-31 17:54   ` Dutch Meyer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.