All of lore.kernel.org
 help / color / mirror / Atom feed
From: John Snow <jsnow@redhat.com>
To: Peter Lieven <pl@kamp.de>, qemu-devel@nongnu.org, qemu-block@nongnu.org
Cc: kwolf@redhat.com, stefanha@gmail.com, jcody@redhat.com
Subject: Re: [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async
Date: Wed, 7 Oct 2015 12:42:25 -0400	[thread overview]
Message-ID: <56154B71.1060001@redhat.com> (raw)
In-Reply-To: <56138A76.1030804@kamp.de>



On 10/06/2015 04:46 AM, Peter Lieven wrote:
> Am 05.10.2015 um 23:15 schrieb John Snow:
>>
>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>>> PIO read requests on the ATAPI interface used to be sync blk requests.
>>> This has to siginificant drawbacks. First the main loop hangs util an
>>> I/O request is completed and secondly if the I/O request does not
>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>>
>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>> ---
>>>   hw/ide/atapi.c | 69
>>> ++++++++++++++++++++++++++++++++++++----------------------
>>>   1 file changed, 43 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>>> index 747f466..9257e1c 100644
>>> --- a/hw/ide/atapi.c
>>> +++ b/hw/ide/atapi.c
>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>>       memset(buf, 0, 288);
>>>   }
>>>   -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int
>>> sector_size)
>>> +static void cd_read_sector_cb(void *opaque, int ret)
>>>   {
>>> -    int ret;
>>> +    IDEState *s = opaque;
>>>   -    switch(sector_size) {
>>> -    case 2048:
>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -        break;
>>> -    case 2352:
>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -        if (ret < 0)
>>> -            return ret;
>>> -        cd_data_to_raw(buf, lba);
>>> -        break;
>>> -    default:
>>> -        ret = -EIO;
>>> -        break;
>>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> +
>>> +    if (ret < 0) {
>>> +        ide_atapi_io_error(s, ret);
>>> +        return;
>>> +    }
>>> +
>>> +    if (s->cd_sector_size == 2352) {
>>> +        cd_data_to_raw(s->io_buffer, s->lba);
>>>       }
>>> -    return ret;
>>> +
>>> +    s->lba++;
>>> +    s->io_buffer_index = 0;
>>> +    s->status &= ~BUSY_STAT;
>>> +
>>> +    ide_atapi_cmd_reply_end(s);
>>> +}
>>> +
>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
>>> sector_size)
>>> +{
>>> +    if (sector_size != 2048 && sector_size != 2352) {
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    s->iov.iov_base = buf;
>>> +    if (sector_size == 2352) {
>>> +        buf += 4;
>>> +    }
>>> +
>>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>> +
>>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>>> +                      cd_read_sector_cb, s) == NULL) {
>>> +        return -EIO;
>>> +    }
>>> +
>>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> +    s->status |= BUSY_STAT;
>>> +    return 0;
>>>   }
>>>   
>> We discussed this off-list a bit, but for upstream synchronization:
>>
>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>> are not prepared to cope with.
>>
>> My suggestion is to buffer an entire DRQ block of data at once
>> (byte_count_limit) to avoid the problem.
> 
> Hi John,
> 
> first of all thank you for the detailed analysis.
> 
> Is the following what you have i mind. For PIO reads > 1 sector
> it is a great improvement for the NFS backend:
> 
> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
> index ab45495..ec2ba89 100644
> --- a/hw/ide/atapi.c
> +++ b/hw/ide/atapi.c
> @@ -117,37 +117,40 @@ static void cd_read_sector_cb(void *opaque, int ret)
>      }
> 
>      if (s->cd_sector_size == 2352) {
> -        cd_data_to_raw(s->io_buffer, s->lba);
> +        int nb_sectors = s->packet_transfer_size / 2352;
> +        while (nb_sectors--) {
> +            memmove(s->io_buffer + nb_sectors * 2352 + 4,
> +                    s->io_buffer + nb_sectors * 2048, 2048);
> +            cd_data_to_raw(s->io_buffer + nb_sectors * 2352,
> +                           s->lba + nb_sectors);
> +        }
>      }

Is this going to be correct in cases where the number of sectors we are
copying is less than the total request size? We might need to bookmark
how many sectors/bytes we're copying this go-around. Perhaps by looking
at lcyl/hcyl.

> 
> -    s->lba++;
> +    s->lba = -1;
>      s->io_buffer_index = 0;
>      s->status &= ~BUSY_STAT;
> 
>      ide_atapi_cmd_reply_end(s);
>  }
> 

Well, I might not name it cd_read_sector and cd_read_sector_cb anymore.
Perhaps cd_read_sectors[_cb].

> -static int cd_read_sector(IDEState *s, int lba, void *buf, int
> sector_size)
> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
> sector_size, int nb_sectors)
>  {
>      if (sector_size != 2048 && sector_size != 2352) {
>          return -EINVAL;
>      }
> 
>      s->iov.iov_base = buf;
> -    if (sector_size == 2352) {
> -        buf += 4;
> -    }
> -
> -    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
> +    s->iov.iov_len = nb_sectors * 2048;
>      qemu_iovec_init_external(&s->qiov, &s->iov, 1);
> 
> -    if (ide_readv_cancelable(s, (int64_t)lba << 2, &s->qiov, 4,
> -                      cd_read_sector_cb, s) == NULL) {
> +    if (ide_readv_cancelable(s, (int64_t)lba << 2,
> +                             &s->qiov, nb_sectors * 4,
> +                             cd_read_sector_cb, s) == NULL) {
>          return -EIO;
>      }
> 
>      block_acct_start(blk_get_stats(s->blk), &s->acct,
> -                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> +                     nb_sectors * 2048, BLOCK_ACCT_READ);
>      s->status |= BUSY_STAT;
>      return 0;
>  }
> @@ -190,7 +193,7 @@ void ide_atapi_io_error(IDEState *s, int ret)
>  /* The whole ATAPI transfer logic is handled in this function */
>  void ide_atapi_cmd_reply_end(IDEState *s)
>  {
> -    int byte_count_limit, size, ret;
> +    int byte_count_limit, size;
>  #ifdef DEBUG_IDE_ATAPI
>      printf("reply: tx_size=%d elem_tx_size=%d index=%d\n",
>             s->packet_transfer_size,
> @@ -205,14 +208,6 @@ void ide_atapi_cmd_reply_end(IDEState *s)
>          printf("status=0x%x\n", s->status);
>  #endif
>      } else {
> -        /* see if a new sector must be read */
> -        if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) {
> -            ret = cd_read_sector(s, s->lba, s->io_buffer,
> s->cd_sector_size);
> -            if (ret < 0) {
> -                ide_atapi_io_error(s, ret);
> -            }
> -            return;
> -        }
>          if (s->elementary_transfer_size > 0) {
>              /* there are some data left to transmit in this elementary
>                 transfer */
> @@ -287,13 +282,18 @@ static void ide_atapi_cmd_reply(IDEState *s, int
> size, int max_size)
>  static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
>                                     int sector_size)
>  {
> +    int ret;
>      s->lba = lba;
>      s->packet_transfer_size = nb_sectors * sector_size;
> +    assert(s->packet_transfer_size <=
> +           IDE_DMA_BUF_SECTORS * BDRV_SECTOR_SIZE + 4);
>      s->elementary_transfer_size = 0;
> -    s->io_buffer_index = sector_size;
>      s->cd_sector_size = sector_size;
> -
> -    ide_atapi_cmd_reply_end(s);
> +    ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size,
> +                         nb_sectors);
> +    if (ret < 0) {
> +        ide_atapi_io_error(s, ret);
> +    }
>  }
> 
>  static void ide_atapi_cmd_check_status(IDEState *s)
> 
> 
> Did you also have a look at the other patches?
> 
> Thanks,
> Peter

On my queue; hopefully Stefan can take a peek too, but I'll try to
review the IDE-specific bits. I imagine you want to wait to respin until
we've looked at all the patches, that's fine -- I'll try not to keep you
waiting for much longer.

--js

  parent reply	other threads:[~2015-10-07 16:42 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-21 12:25 [Qemu-devel] [PATCH 0/5] ide: avoid main-loop hang on CDROM/NFS failure Peter Lieven
2015-09-21 12:25 ` [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async Peter Lieven
2015-10-02 21:02   ` John Snow
2015-10-05 21:15   ` John Snow
2015-10-06  8:46     ` Peter Lieven
2015-10-06 12:08       ` Peter Lieven
2015-10-07 16:42       ` John Snow [this message]
2015-10-07 18:53         ` Peter Lieven
2015-10-08 12:06         ` Peter Lieven
2015-10-08 16:44           ` John Snow
2015-10-09  8:21             ` Kevin Wolf
2015-10-09 11:18               ` Peter Lieven
2015-10-09 16:32               ` John Snow
2015-10-14 18:19             ` Peter Lieven
2015-10-14 18:21               ` John Snow
2015-10-16 10:56                 ` Peter Lieven
2015-10-06  8:57     ` Kevin Wolf
2015-10-06  9:20       ` Peter Lieven
2015-10-06 17:07         ` John Snow
2015-10-06 17:12           ` Peter Lieven
2015-10-06 17:56             ` John Snow
2015-10-06 18:31               ` Peter Lieven
2015-10-06 18:34                 ` John Snow
2015-10-06 15:54       ` John Snow
2015-10-07  7:28         ` Kevin Wolf
2015-10-06 13:05   ` Laszlo Ersek
2015-09-21 12:25 ` [Qemu-devel] [PATCH 2/5] ide/atapi: blk_aio_readv may return NULL Peter Lieven
2015-09-21 12:25 ` [Qemu-devel] [PATCH 3/5] ide: add support for cancelable read requests Peter Lieven
2015-09-21 12:25 ` [Qemu-devel] [PATCH 4/5] ide/atapi: enable cancelable requests Peter Lieven
2015-09-21 12:25 ` [Qemu-devel] [PATCH 5/5] block/nfs: cache allocated filesize for read-only files Peter Lieven
2015-09-21 20:58 ` [Qemu-devel] [PATCH 0/5] ide: avoid main-loop hang on CDROM/NFS failure John Snow
2015-09-21 21:22   ` Peter Lieven

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56154B71.1060001@redhat.com \
    --to=jsnow@redhat.com \
    --cc=jcody@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=pl@kamp.de \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.