[Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.
@ 2009-01-20 10:56 Gleb Natapov
  2009-01-20 14:01 ` Ian Jackson
  2009-01-21 19:00 ` Anthony Liguori
  0 siblings, 2 replies; 14+ messages in thread
From: Gleb Natapov @ 2009-01-20 10:56 UTC (permalink / raw)
  To: qemu-devel

This version of the patch adds new option "werror" to -drive flag.
Possible values are:

report    - report errors to a guest as IO errors
ignore    - continue as if nothing happened
stop      - stop VM on any error and retry last command on resume
enospc    - stop vm on ENOSPC error and retry last command on resume
            all other errors are reported to a guest.

Default is "report" to maintain current behaviour.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
diff --git a/hw/ide.c b/hw/ide.c
index 7dd41f7..06ac6dc 100644
--- a/hw/ide.c
+++ b/hw/ide.c
@@ -457,6 +457,8 @@ static inline int media_is_cd(IDEState *s)
 #define BM_STATUS_DMAING 0x01
 #define BM_STATUS_ERROR  0x02
 #define BM_STATUS_INT    0x04
+#define BM_STATUS_DMA_RETRY  0x08
+#define BM_STATUS_PIO_RETRY  0x10
 
 #define BM_CMD_START     0x01
 #define BM_CMD_READ      0x08
@@ -488,6 +490,8 @@ typedef struct BMDMAState {
     IDEState *ide_if;
     BlockDriverCompletionFunc *dma_cb;
     BlockDriverAIOCB *aiocb;
+    int64_t sector_num;
+    uint32_t nsector;
 } BMDMAState;
 
 typedef struct PCIIDEState {
@@ -498,6 +502,7 @@ typedef struct PCIIDEState {
 } PCIIDEState;
 
 static void ide_dma_start(IDEState *s, BlockDriverCompletionFunc *dma_cb);
+static void ide_dma_restart(IDEState *s);
 static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret);
 
 static void padstr(char *str, const char *src, int len)
@@ -865,6 +870,28 @@ static void ide_dma_error(IDEState *s)
     ide_set_irq(s);
 }
 
+static int ide_handle_write_error(IDEState *s, int error, int op)
+{
+    BlockInterfaceErrorAction action = drive_get_onerror(s->bs);
+
+    if (action == BLOCK_ERR_IGNORE)
+        return 0;
+
+    if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
+            || action == BLOCK_ERR_STOP_ANY) {
+        s->bmdma->ide_if = s;
+        s->bmdma->status |= op;
+        vm_stop(0);
+    } else {
+        if (op == BM_STATUS_DMA_RETRY)
+            ide_dma_error(s);
+        else
+            ide_rw_error(s);
+    }
+
+    return 1;
+}
+
 /* return 0 if buffer completed */
 static int dma_buf_rw(BMDMAState *bm, int is_write)
 {
@@ -990,9 +1017,10 @@ static void ide_sector_write(IDEState *s)
     if (n > s->req_nb_sectors)
         n = s->req_nb_sectors;
     ret = bdrv_write(s->bs, sector_num, s->io_buffer, n);
+
     if (ret != 0) {
-	ide_rw_error(s);
-	return;
+        if (ide_handle_write_error(s, -ret, BM_STATUS_PIO_RETRY))
+            return;
     }
 
     s->nsector -= n;
@@ -1024,6 +1052,20 @@ static void ide_sector_write(IDEState *s)
     }
 }
 
+static void ide_dma_restart_cb(void *opaque, int running)
+{
+    BMDMAState *bm = opaque;
+    if (!running)
+        return;
+    if (bm->status & BM_STATUS_DMA_RETRY) {
+        bm->status &= ~BM_STATUS_DMA_RETRY;
+        ide_dma_restart(bm->ide_if);
+    } else if (bm->status & BM_STATUS_PIO_RETRY) {
+        bm->status &= ~BM_STATUS_PIO_RETRY;
+        ide_sector_write(bm->ide_if);
+    }
+}
+
 static void ide_write_dma_cb(void *opaque, int ret)
 {
     BMDMAState *bm = opaque;
@@ -1032,8 +1074,8 @@ static void ide_write_dma_cb(void *opaque, int ret)
     int64_t sector_num;
 
     if (ret < 0) {
-	ide_dma_error(s);
-	return;
+        if (ide_handle_write_error(s, -ret,  BM_STATUS_DMA_RETRY))
+            return;
     }
 
     n = s->io_buffer_size >> 9;
@@ -2849,11 +2891,25 @@ static void ide_dma_start(IDEState *s, BlockDriverCompletionFunc *dma_cb)
     bm->cur_prd_last = 0;
     bm->cur_prd_addr = 0;
     bm->cur_prd_len = 0;
+    bm->sector_num = ide_get_sector(s);
+    bm->nsector = s->nsector;
     if (bm->status & BM_STATUS_DMAING) {
         bm->dma_cb(bm, 0);
     }
 }
 
+static void ide_dma_restart(IDEState *s)
+{
+    BMDMAState *bm = s->bmdma;
+    ide_set_sector(s, bm->sector_num);
+    s->io_buffer_index = 0;
+    s->io_buffer_size = 0;
+    s->nsector = bm->nsector;
+    bm->cur_addr = bm->addr;
+    bm->dma_cb = ide_write_dma_cb;
+    ide_dma_start(s, bm->dma_cb);
+}
+
 static void ide_dma_cancel(BMDMAState *bm)
 {
     if (bm->status & BM_STATUS_DMAING) {
@@ -3043,6 +3099,7 @@ static void bmdma_map(PCIDevice *pci_dev, int region_num,
         d->ide_if[2 * i].bmdma = bm;
         d->ide_if[2 * i + 1].bmdma = bm;
         bm->pci_dev = (PCIIDEState *)pci_dev;
+        qemu_add_vm_change_state_handler(ide_dma_restart_cb, bm);
 
         register_ioport_write(addr, 1, 1, bmdma_cmd_writeb, bm);
 
@@ -3068,9 +3125,14 @@ static void pci_ide_save(QEMUFile* f, void *opaque)
 
     for(i = 0; i < 2; i++) {
         BMDMAState *bm = &d->bmdma[i];
+        uint8_t ifidx;
         qemu_put_8s(f, &bm->cmd);
         qemu_put_8s(f, &bm->status);
         qemu_put_be32s(f, &bm->addr);
+        qemu_put_sbe64s(f, &bm->sector_num);
+        qemu_put_be32s(f, &bm->nsector);
+        ifidx = bm->ide_if ? bm->ide_if - d->ide_if : 0;
+        qemu_put_8s(f, &ifidx);
         /* XXX: if a transfer is pending, we do not save it yet */
     }
 
@@ -3094,7 +3156,7 @@ static int pci_ide_load(QEMUFile* f, void *opaque, int version_id)
     PCIIDEState *d = opaque;
     int ret, i;
 
-    if (version_id != 1)
+    if (version_id != 2)
         return -EINVAL;
     ret = pci_device_load(&d->dev, f);
     if (ret < 0)
@@ -3102,9 +3164,14 @@ static int pci_ide_load(QEMUFile* f, void *opaque, int version_id)
 
     for(i = 0; i < 2; i++) {
         BMDMAState *bm = &d->bmdma[i];
+        uint8_t ifidx;
         qemu_get_8s(f, &bm->cmd);
         qemu_get_8s(f, &bm->status);
         qemu_get_be32s(f, &bm->addr);
+        qemu_get_sbe64s(f, &bm->sector_num);
+        qemu_get_be32s(f, &bm->nsector);
+        qemu_get_8s(f, &ifidx);
+        bm->ide_if = &d->ide_if[ifidx];
         /* XXX: if a transfer is pending, we do not save it yet */
     }
 
@@ -3212,7 +3279,7 @@ void pci_cmd646_ide_init(PCIBus *bus, BlockDriverState **hd_table,
     ide_init2(&d->ide_if[0], hd_table[0], hd_table[1], irq[0]);
     ide_init2(&d->ide_if[2], hd_table[2], hd_table[3], irq[1]);
 
-    register_savevm("ide", 0, 1, pci_ide_save, pci_ide_load, d);
+    register_savevm("ide", 0, 2, pci_ide_save, pci_ide_load, d);
     qemu_register_reset(cmd646_reset, d);
     cmd646_reset(d);
 }
@@ -3269,7 +3336,7 @@ void pci_piix3_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn,
     ide_init_ioport(&d->ide_if[0], 0x1f0, 0x3f6);
     ide_init_ioport(&d->ide_if[2], 0x170, 0x376);
 
-    register_savevm("ide", 0, 1, pci_ide_save, pci_ide_load, d);
+    register_savevm("ide", 0, 2, pci_ide_save, pci_ide_load, d);
 }
 
 /* hd_table must contain 4 block drivers */
@@ -3308,7 +3375,7 @@ void pci_piix4_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn,
     ide_init_ioport(&d->ide_if[0], 0x1f0, 0x3f6);
     ide_init_ioport(&d->ide_if[2], 0x170, 0x376);
 
-    register_savevm("ide", 0, 1, pci_ide_save, pci_ide_load, d);
+    register_savevm("ide", 0, 2, pci_ide_save, pci_ide_load, d);
 }
 
 /***********************************************************/
diff --git a/sysemu.h b/sysemu.h
index 56eb9b3..b390184 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -128,11 +128,17 @@ typedef enum {
     IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO
 } BlockInterfaceType;
 
+typedef enum {
+    BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC,
+    BLOCK_ERR_STOP_ANY
+} BlockInterfaceErrorAction;
+
 typedef struct DriveInfo {
     BlockDriverState *bdrv;
     BlockInterfaceType type;
     int bus;
     int unit;
+    BlockInterfaceErrorAction onerror;
     char serial[21];
 } DriveInfo;
 
@@ -146,6 +152,7 @@ extern DriveInfo drives_table[MAX_DRIVES+1];
 extern int drive_get_index(BlockInterfaceType type, int bus, int unit);
 extern int drive_get_max_bus(BlockInterfaceType type);
 extern const char *drive_get_serial(BlockDriverState *bdrv);
+extern BlockInterfaceErrorAction drive_get_onerror(BlockDriverState *bdrv);
 
 /* serial ports */
 
diff --git a/vl.c b/vl.c
index 63d954b..426033c 100644
--- a/vl.c
+++ b/vl.c
@@ -2200,6 +2200,17 @@ const char *drive_get_serial(BlockDriverState *bdrv)
     return "\0";
 }
 
+BlockInterfaceErrorAction drive_get_onerror(BlockDriverState *bdrv)
+{
+    int index;
+
+    for (index = 0; index < nb_drives; index++)
+        if (drives_table[index].bdrv == bdrv)
+            return drives_table[index].onerror;
+
+    return BLOCK_ERR_REPORT;
+}
+
 static void bdrv_format_print(void *opaque, const char *name)
 {
     fprintf(stderr, " %s", name);
@@ -2222,12 +2233,13 @@ static int drive_init(struct drive_opt *arg, int snapshot,
     int max_devs;
     int index;
     int cache;
-    int bdrv_flags;
+    int bdrv_flags, onerror;
     char *str = arg->opt;
     static const char * const params[] = { "bus", "unit", "if", "index",
                                            "cyls", "heads", "secs", "trans",
                                            "media", "snapshot", "file",
-                                           "cache", "format", "serial", NULL };
+                                           "cache", "format", "serial", "werror",
+                                           NULL };
 
     if (check_params(buf, sizeof(buf), params, str) < 0) {
          fprintf(stderr, "qemu: unknown parameter '%s' in '%s'\n",
@@ -2417,6 +2429,26 @@ static int drive_init(struct drive_opt *arg, int snapshot,
     if (!get_param_value(serial, sizeof(serial), "serial", str))
 	    memset(serial, 0,  sizeof(serial));
 
+    onerror = BLOCK_ERR_REPORT;
+    if (get_param_value(buf, sizeof(serial), "werror", str)) {
+        if (type != IF_IDE) {
+            fprintf(stderr, "werror is supported only by IDE\n");
+            return -1;
+        }
+        if (!strcmp(buf, "ignore"))
+            onerror = BLOCK_ERR_IGNORE;
+        else if (!strcmp(buf, "enospc"))
+            onerror = BLOCK_ERR_STOP_ENOSPC;
+        else if (!strcmp(buf, "stop"))
+            onerror = BLOCK_ERR_STOP_ANY;
+        else if (!strcmp(buf, "report"))
+            onerror = BLOCK_ERR_REPORT;
+        else {
+            fprintf(stderr, "qemu: '%s' invalid write error action\n", buf);
+            return -1;
+        }
+    }
+
     /* compute bus and unit according index */
 
     if (index != -1) {
@@ -2480,6 +2512,7 @@ static int drive_init(struct drive_opt *arg, int snapshot,
     drives_table[nb_drives].type = type;
     drives_table[nb_drives].bus = bus_id;
     drives_table[nb_drives].unit = unit_id;
+    drives_table[nb_drives].onerror = onerror;
     strncpy(drives_table[nb_drives].serial, serial, sizeof(serial));
     nb_drives++;
 
--
			Gleb.

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.
  2009-01-20 10:56 [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error Gleb Natapov
@ 2009-01-20 14:01 ` Ian Jackson
  2009-01-20 14:16   ` Gleb Natapov
  2009-01-21 19:00 ` Anthony Liguori
  1 sibling, 1 reply; 14+ messages in thread
From: Ian Jackson @ 2009-01-20 14:01 UTC (permalink / raw)
  To: qemu-devel

Gleb Natapov writes ("[Qemu-devel] [PATCH v4] Stop VM on ENOSPC error."):
> This version of the patch adds new option "werror" to -drive flag.
> Possible values are:
> 
> report    - report errors to a guest as IO errors
> ignore    - continue as if nothing happened
> stop      - stop VM on any error and retry last command on resume
> enospc    - stop vm on ENOSPC error and retry last command on resume
>             all other errors are reported to a guest.

Great, thanks.

Are we sure that the ide layer is the right place to do this ?
Perhaps it would be better to do it in block.c.  That way it would
seamlessly affect scsi too and other kinds of weird block devices
on other platforms.

That way the ide code can remain oblivious.

What do you think ?

Ian.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.
  2009-01-20 14:01 ` Ian Jackson
@ 2009-01-20 14:16   ` Gleb Natapov
  2009-01-20 14:57     ` Ian Jackson
  0 siblings, 1 reply; 14+ messages in thread
From: Gleb Natapov @ 2009-01-20 14:16 UTC (permalink / raw)
  To: qemu-devel

On Tue, Jan 20, 2009 at 02:01:21PM +0000, Ian Jackson wrote:
> Gleb Natapov writes ("[Qemu-devel] [PATCH v4] Stop VM on ENOSPC error."):
> > This version of the patch adds new option "werror" to -drive flag.
> > Possible values are:
> > 
> > report    - report errors to a guest as IO errors
> > ignore    - continue as if nothing happened
> > stop      - stop VM on any error and retry last command on resume
> > enospc    - stop vm on ENOSPC error and retry last command on resume
> >             all other errors are reported to a guest.
> 
> Great, thanks.
> 
> Are we sure that the ide layer is the right place to do this ?
> Perhaps it would be better to do it in block.c.  That way it would
> seamlessly affect scsi too and other kinds of weird block devices
> on other platforms.
> 
> That way the ide code can remain oblivious.
> 
> What do you think ?
> 
I thought about it and I see only two places where this can be done. In
ide/scsi/pv-block layer or in individual block implementations
(qcow,raw, etc). The problem doing it in generic block.c layer is that
how should it handle errors during blocking writes? It can't return
error to the caller and it can't wait inside the function. Doing this in
the ide layer allows for error handling flexibility that this patch
provides.

--
			Gleb.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.
  2009-01-20 14:16   ` Gleb Natapov
@ 2009-01-20 14:57     ` Ian Jackson
  2009-01-20 15:31       ` Gleb Natapov
  0 siblings, 1 reply; 14+ messages in thread
From: Ian Jackson @ 2009-01-20 14:57 UTC (permalink / raw)
  To: qemu-devel

Gleb Natapov writes ("Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error."):
> On Tue, Jan 20, 2009 at 02:01:21PM +0000, Ian Jackson wrote:
> > Are we sure that the ide layer is the right place to do this ?
> > Perhaps it would be better to do it in block.c.  That way it would
> > seamlessly affect scsi too and other kinds of weird block devices
> > on other platforms.
...
> I thought about it and I see only two places where this can be done. In
> ide/scsi/pv-block layer or in individual block implementations
> (qcow,raw, etc). The problem doing it in generic block.c layer is that
> how should it handle errors during blocking writes? It can't return
> error to the caller and it can't wait inside the function. Doing this in
> the ide layer allows for error handling flexibility that this patch
> provides.

Hrm, yes, I see.

We could abolish the blocking calls.  Or rather, we could say that a
device emulation which wants the stop-on-enospc behaviour must never
make the blocking calls.

That way there would still have to be some additional complexity in
ide.c and scsi.c but only the minimum necessary.

Ian.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.
  2009-01-20 14:57     ` Ian Jackson
@ 2009-01-20 15:31       ` Gleb Natapov
  2009-01-20 16:50         ` Ian Jackson
  0 siblings, 1 reply; 14+ messages in thread
From: Gleb Natapov @ 2009-01-20 15:31 UTC (permalink / raw)
  To: qemu-devel

On Tue, Jan 20, 2009 at 02:57:28PM +0000, Ian Jackson wrote:
> Gleb Natapov writes ("Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error."):
> > On Tue, Jan 20, 2009 at 02:01:21PM +0000, Ian Jackson wrote:
> > > Are we sure that the ide layer is the right place to do this ?
> > > Perhaps it would be better to do it in block.c.  That way it would
> > > seamlessly affect scsi too and other kinds of weird block devices
> > > on other platforms.
> ...
> > I thought about it and I see only two places where this can be done. In
> > ide/scsi/pv-block layer or in individual block implementations
> > (qcow,raw, etc). The problem doing it in generic block.c layer is that
> > how should it handle errors during blocking writes? It can't return
> > error to the caller and it can't wait inside the function. Doing this in
> > the ide layer allows for error handling flexibility that this patch
> > provides.
> 
> Hrm, yes, I see.
> 
> We could abolish the blocking calls.  Or rather, we could say that a
> device emulation which wants the stop-on-enospc behaviour must never
> make the blocking calls.
> 
I would like to abolish the blocking calls from all file formats, but in
reality it is almost impossible. Qcow2 metadata updates is a big mess to
do asynchronously. The best thing we can do is to move it to another thread.

So are you OK with my current approach?

--
			Gleb.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.
  2009-01-20 15:31       ` Gleb Natapov
@ 2009-01-20 16:50         ` Ian Jackson
  2009-01-20 18:19           ` Jamie Lokier
  2009-01-20 18:23           ` Gleb Natapov
  0 siblings, 2 replies; 14+ messages in thread
From: Ian Jackson @ 2009-01-20 16:50 UTC (permalink / raw)
  To: qemu-devel

Gleb Natapov writes ("Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error."):
> I would like to abolish the blocking calls from all file formats, but in
> reality it is almost impossible. Qcow2 metadata updates is a big mess to
> do asynchronously. The best thing we can do is to move it to another thread.

Err, I wasn't suggesting it should be done in each format in that way.
It could be done once in block.c, before the specific format write
method is called.

So the formats would still write synchronously, and would pass errors
up to their parent formats, until it reenters the generic block code
where the retry would take place.  At that point the call from the
device emulation would necessariy be asynchronous.

Ian.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.
  2009-01-20 16:50         ` Ian Jackson
@ 2009-01-20 18:19           ` Jamie Lokier
  2009-01-20 18:23           ` Gleb Natapov
  1 sibling, 0 replies; 14+ messages in thread
From: Jamie Lokier @ 2009-01-20 18:19 UTC (permalink / raw)
  To: qemu-devel

Ian Jackson wrote:
> Gleb Natapov writes ("Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error."):
> > I would like to abolish the blocking calls from all file formats, but in
> > reality it is almost impossible. Qcow2 metadata updates is a big mess to
> > do asynchronously. The best thing we can do is to move it to another thread.
> 
> Err, I wasn't suggesting it should be done in each format in that way.
> It could be done once in block.c, before the specific format write
> method is called.
> 
> So the formats would still write synchronously, and would pass errors
> up to their parent formats, until it reenters the generic block code
> where the retry would take place.  At that point the call from the
> device emulation would necessariy be asynchronous.

Most of the block I/O is moving to threads anyway - if only to
"emulate" AIO.

If raw block I/O is going to use helper threads which do blocking I/O,
it may be just as fast, and much simpler, to put the QCOW2 format
block writes (including metadata) into those helper threads, instead
of (avoiding) rewriting QCOW2 to do everything with AIO calls.

-- Jamie

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.
  2009-01-20 16:50         ` Ian Jackson
  2009-01-20 18:19           ` Jamie Lokier
@ 2009-01-20 18:23           ` Gleb Natapov
  2009-01-21 16:37             ` Ian Jackson
  1 sibling, 1 reply; 14+ messages in thread
From: Gleb Natapov @ 2009-01-20 18:23 UTC (permalink / raw)
  To: qemu-devel

On Tue, Jan 20, 2009 at 04:50:14PM +0000, Ian Jackson wrote:
> Gleb Natapov writes ("Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error."):
> > I would like to abolish the blocking calls from all file formats, but in
> > reality it is almost impossible. Qcow2 metadata updates is a big mess to
> > do asynchronously. The best thing we can do is to move it to another thread.
> 
> Err, I wasn't suggesting it should be done in each format in that way.
> It could be done once in block.c, before the specific format write
> method is called.
>
Before? How should we know before specific format write if there is
enough space?  And block.c is called by different file formats too. So
what do you suggest to do inside bdrv_write() function when write failed?

> So the formats would still write synchronously, and would pass errors
> up to their parent formats, until it reenters the generic block code
> where the retry would take place.  At that point the call from the
> device emulation would necessariy be asynchronous.
> 
It may reenter block formats many times during one write from ide.
Look at block-qcow2.c and calls to bdrv_pwrite() there. 

--
			Gleb.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.
  2009-01-20 18:23           ` Gleb Natapov
@ 2009-01-21 16:37             ` Ian Jackson
  2009-01-21 17:00               ` Gleb Natapov
  0 siblings, 1 reply; 14+ messages in thread
From: Ian Jackson @ 2009-01-21 16:37 UTC (permalink / raw)
  To: qemu-devel

Gleb Natapov writes ("Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error."):
> On Tue, Jan 20, 2009 at 04:50:14PM +0000, Ian Jackson wrote:
> > Gleb Natapov writes ("Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error."):
> > > I would like to abolish the blocking calls from all file formats, but in
> > > reality it is almost impossible. Qcow2 metadata updates is a big mess to
> > > do asynchronously. The best thing we can do is to move it to another thread.
> > 
> > Err, I wasn't suggesting it should be done in each format in that way.
> > It could be done once in block.c, before the specific format write
> > method is called.
> 
> Before? How should we know before specific format write if there is
> enough space?  And block.c is called by different file formats too. So
> what do you suggest to do inside bdrv_write() function when write failed?

I meant `before' in the calling stack sense rather than a temporal
sense.  `Above' if you prefer.

bdrv_[p]write would simply fail - as indeed they do in your patch.

But ide.c doesn't call bdrv_pwrite; it calls bdrv_aio_write.
bdrv_aio_write would do what your ide.c code currently does.

> > So the formats would still write synchronously, and would pass errors
> > up to their parent formats, until it reenters the generic block code
> > where the retry would take place.  At that point the call from the
> > device emulation would necessariy be asynchronous.
> 
> It may reenter block formats many times during one write from ide.
> Look at block-qcow2.c and calls to bdrv_pwrite() there. 

Yes.  All of those calls would be allowed to fail with ENOSPC, just as
in your patch all of the block layer is untouched ane ENOSPC is
propagated upwards.

I'm suggesting that instead of catching it in ide.c we should catch it
in bdrv_aio_write.  bdrv_aio_write can request a VM stop and then
return to its caller.  Well, actually, we'd have to insert a wrapper
around the BlockDriverCompletionFunc so that drv->bdrv_aio_write
doesn't return to the caller directly; instead, it would thread
through a new function (`bdrv_aio_write_cb' perhaps).

bdrv_aio_write_cb would check for errors which would stop the VM, and
in those cases it would request a VM stop and put the request on a
queue (maintained in block.c) for retry on restart.

Ian.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.
  2009-01-21 16:37             ` Ian Jackson
@ 2009-01-21 17:00               ` Gleb Natapov
  2009-01-21 17:25                 ` Ian Jackson
  0 siblings, 1 reply; 14+ messages in thread
From: Gleb Natapov @ 2009-01-21 17:00 UTC (permalink / raw)
  To: qemu-devel

On Wed, Jan 21, 2009 at 04:37:08PM +0000, Ian Jackson wrote:
> > Before? How should we know before specific format write if there is
> > enough space?  And block.c is called by different file formats too. So
> > what do you suggest to do inside bdrv_write() function when write failed?
> 
> I meant `before' in the calling stack sense rather than a temporal
> sense.  `Above' if you prefer.
> 
> bdrv_[p]write would simply fail - as indeed they do in your patch.
> 
> But ide.c doesn't call bdrv_pwrite; it calls bdrv_aio_write.
> bdrv_aio_write would do what your ide.c code currently does.
> 
ide.c calls bdrv_write.

> > > So the formats would still write synchronously, and would pass errors
> > > up to their parent formats, until it reenters the generic block code
> > > where the retry would take place.  At that point the call from the
> > > device emulation would necessariy be asynchronous.
> > 
> > It may reenter block formats many times during one write from ide.
> > Look at block-qcow2.c and calls to bdrv_pwrite() there. 
> 
> Yes.  All of those calls would be allowed to fail with ENOSPC, just as
> in your patch all of the block layer is untouched ane ENOSPC is
> propagated upwards.
> 
> I'm suggesting that instead of catching it in ide.c we should catch it
> in bdrv_aio_write.  bdrv_aio_write can request a VM stop and then
> return to its caller.  Well, actually, we'd have to insert a wrapper
> around the BlockDriverCompletionFunc so that drv->bdrv_aio_write
> doesn't return to the caller directly; instead, it would thread
> through a new function (`bdrv_aio_write_cb' perhaps).
> 
> bdrv_aio_write_cb would check for errors which would stop the VM, and
> in those cases it would request a VM stop and put the request on a
> queue (maintained in block.c) for retry on restart.
> 
And synchronous calls will behave differently. I am all for doing it in
one place, but I am against doing it just for part of API calls. So do
you have better solution for sync calls except don't use them?

And BTW block.c is compiled also for qemu-{img|mbd} no vmstop there.

--
			Gleb.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.
  2009-01-21 17:00               ` Gleb Natapov
@ 2009-01-21 17:25                 ` Ian Jackson
  2009-01-21 18:01                   ` Gleb Natapov
  0 siblings, 1 reply; 14+ messages in thread
From: Ian Jackson @ 2009-01-21 17:25 UTC (permalink / raw)
  To: qemu-devel

Gleb Natapov writes ("Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error."):
> On Wed, Jan 21, 2009 at 04:37:08PM +0000, Ian Jackson wrote:
> > But ide.c doesn't call bdrv_pwrite; it calls bdrv_aio_write.
> > bdrv_aio_write would do what your ide.c code currently does.
> 
> ide.c calls bdrv_write.

In the non-DMA case, yes.  That would have to be changed to call
bdrv_aio_write.

> And synchronous calls will behave differently. I am all for doing it in
> one place, but I am against doing it just for part of API calls. So do
> you have better solution for sync calls except don't use them?

Well, stopping the VM can't be done for synchronous calls so we
clearly have three choices
  * write out the complicated asynchronous vm_stop-on-enospc logic
    at least twice, once for ide and scsi
  * do it only for ide
  * do it only for part of the api, and use only that api in ide
    and scsi

I prefer the last of these.  Doing it for only part of the internal
API is less bad than doing it only for some of the actual emulated
hardware.  I also think doing it for only part of the internal API is
less bad than writing the same functionality twice.

Writing the same functionality twice seems to me to be one of the
cardinal sins of programming.

But it would be good to hear what other people think.  If they prefer
the other options then your patch should go in as-is.

> And BTW block.c is compiled also for qemu-{img|mbd} no vmstop there.

We can finesse this with a suitable hook or an #ifdef.

Ian.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.
  2009-01-21 17:25                 ` Ian Jackson
@ 2009-01-21 18:01                   ` Gleb Natapov
  2009-01-22 12:39                     ` Ian Jackson
  0 siblings, 1 reply; 14+ messages in thread
From: Gleb Natapov @ 2009-01-21 18:01 UTC (permalink / raw)
  To: qemu-devel

On Wed, Jan 21, 2009 at 05:25:51PM +0000, Ian Jackson wrote:
> Gleb Natapov writes ("Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error."):
> > On Wed, Jan 21, 2009 at 04:37:08PM +0000, Ian Jackson wrote:
> > > But ide.c doesn't call bdrv_pwrite; it calls bdrv_aio_write.
> > > bdrv_aio_write would do what your ide.c code currently does.
> > 
> > ide.c calls bdrv_write.
> 
> In the non-DMA case, yes.  That would have to be changed to call
> bdrv_aio_write.
> 
> > And synchronous calls will behave differently. I am all for doing it in
> > one place, but I am against doing it just for part of API calls. So do
> > you have better solution for sync calls except don't use them?
> 
> Well, stopping the VM can't be done for synchronous calls so we
> clearly have three choices
>   * write out the complicated asynchronous vm_stop-on-enospc logic
>     at least twice, once for ide and scsi
>   * do it only for ide
>   * do it only for part of the api, and use only that api in ide
>     and scsi
> 
> I prefer the last of these.  Doing it for only part of the internal
> API is less bad than doing it only for some of the actual emulated
> hardware.  I also think doing it for only part of the internal API is
> less bad than writing the same functionality twice.
Tomorrow I'll send patch for scsi and PV block device. The patches are
absolutely trivial. The only reason I've sent ide implementation first was
that I wanted early feedback. As discussion went to this direction it
just easier for me to send all patches to demonstrate that the changes a
trivial.

> 
> Writing the same functionality twice seems to me to be one of the
> cardinal sins of programming.
Doing something in the wrong level to save three line of code and make
only part of API usable doesn't sound plausible too.

> 
> But it would be good to hear what other people think.  If they prefer
> the other options then your patch should go in as-is.
> 
> > And BTW block.c is compiled also for qemu-{img|mbd} no vmstop there.
> 
> We can finesse this with a suitable hook or an #ifdef.
> 
As far as I can see we compile block*.c only once and link them with
qemu, qemu-img and qemu-nbd. AFAIR that was not the case so someone make
an effort to fix it. Why break it one more time without a really good reason?
 
--
			Gleb.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.
  2009-01-20 10:56 [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error Gleb Natapov
  2009-01-20 14:01 ` Ian Jackson
@ 2009-01-21 19:00 ` Anthony Liguori
  1 sibling, 0 replies; 14+ messages in thread
From: Anthony Liguori @ 2009-01-21 19:00 UTC (permalink / raw)
  To: qemu-devel

Gleb Natapov wrote:
> This version of the patch adds new option "werror" to -drive flag.
> Possible values are:
>
> report    - report errors to a guest as IO errors
> ignore    - continue as if nothing happened
> stop      - stop VM on any error and retry last command on resume
> enospc    - stop vm on ENOSPC error and retry last command on resume
>             all other errors are reported to a guest.
>
> Default is "report" to maintain current behaviour.
>
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>   

Applied.  Thanks.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error.
  2009-01-21 18:01                   ` Gleb Natapov
@ 2009-01-22 12:39                     ` Ian Jackson
  0 siblings, 0 replies; 14+ messages in thread
From: Ian Jackson @ 2009-01-22 12:39 UTC (permalink / raw)
  To: qemu-devel

Gleb Natapov writes ("Re: [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error."):
> Tomorrow I'll send patch for scsi and PV block device. The patches are
> absolutely trivial. The only reason I've sent ide implementation first was
> that I wanted early feedback. As discussion went to this direction it
> just easier for me to send all patches to demonstrate that the changes a
> trivial.

OK, I'll look forward to this.

Ian.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2009-01-22 12:39 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-01-20 10:56 [Qemu-devel] [PATCH v4] Stop VM on ENOSPC error Gleb Natapov
2009-01-20 14:01 ` Ian Jackson
2009-01-20 14:16   ` Gleb Natapov
2009-01-20 14:57     ` Ian Jackson
2009-01-20 15:31       ` Gleb Natapov
2009-01-20 16:50         ` Ian Jackson
2009-01-20 18:19           ` Jamie Lokier
2009-01-20 18:23           ` Gleb Natapov
2009-01-21 16:37             ` Ian Jackson
2009-01-21 17:00               ` Gleb Natapov
2009-01-21 17:25                 ` Ian Jackson
2009-01-21 18:01                   ` Gleb Natapov
2009-01-22 12:39                     ` Ian Jackson
2009-01-21 19:00 ` Anthony Liguori

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).