qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Alexander Graf <agraf@suse.de>
To: Stefan Hajnoczi <stefanha@gmail.com>
Cc: Kevin Wolf <kwolf@redhat.com>,
	Joerg Roedel <Joerg.Roedel@amd.com>,
	Paul Brook <paul@codesourcery.com>,
	QEMU-devel Developers <qemu-devel@nongnu.org>,
	Blue Swirl <blauwirbel@gmail.com>,
	Gerd Hoffmann <kraxel@redhat.com>,
	tj@kernel.org, Roland Elek <elek.roland@gmail.com>,
	Sebastian Herbszt <herbszt@gmx.de>
Subject: [Qemu-devel] Re: [PATCH 09/13] ahci: add ahci emulation
Date: Thu, 09 Dec 2010 16:48:20 +0100	[thread overview]
Message-ID: <4D00FA44.8090207@suse.de> (raw)
In-Reply-To: <AANLkTin_h2KMdq_Mm60TKaAENxneJykpu2JvX1ZCnXkS@mail.gmail.com>

Stefan Hajnoczi wrote:
> On Wed, Dec 8, 2010 at 12:13 PM, Alexander Graf <agraf@suse.de> wrote:
>   
>> +struct AHCIDevice {
>> +    IDEBus port;
>> +    int port_no;
>> +    uint32_t port_state;
>> +    uint32_t finished;
>> +    AHCIPortRegs port_regs;
>> +    struct AHCIState *hba;
>> +    uint8_t *lst;
>> +    uint8_t *res_fis;
>> +    uint8_t *cmd_fis;
>>     
>
> Are these unmapped on reset?
>   

On which reset? Port reset sends a d2h fis to the guest, so we still
need at least the res mapping. I couldn't find the exact spot where the
state of registers after reset is defined.

>   
>> +    int cmd_fis_len;
>> +    int dma_status;
>> +    BlockDriverCompletionFunc *dma_cb;
>> +    AHCICmdHdr *cur_cmd;
>> +    NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
>>     
>
> Are the ncq_tfs[] elements cleaned up on reset (i.e. cancellation and
> free sglist)?
>
>   
>> +static void map_page(uint8_t **ptr, uint64_t addr, uint32_t wanted)
>> +{
>> +    target_phys_addr_t len = wanted;
>> +
>> +    if (*ptr) {
>> +        cpu_physical_memory_unmap(*ptr, 1, len, len);
>> +    }
>> +
>> +    *ptr = cpu_physical_memory_map(addr, &len, 1);
>> +    if (len < wanted) {
>> +        cpu_physical_memory_unmap(*ptr, 1, len, len);
>>     
>
> *ptr = NULL;
>
>   
>> +static void ncq_cb(void *opaque, int ret)
>> +{
>> +    NCQTransferState *ncq_tfs = (NCQTransferState *)opaque;
>> +    IDEState *ide_state;
>> +
>> +    if (ret < 0) {
>> +        /* XXX error */
>> +    }
>>     
>
> Missing error handling.
>   

Yes, that's what the XXX stands for :).

>   
>> +static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
>> +                                int slot, QEMUSGList *sg)
>> +{
>> +    NCQFrame *ncq_fis = (NCQFrame*)cmd_fis;
>> +    uint8_t tag = ncq_fis->tag >> 3;
>> +    NCQTransferState *ncq_tfs = &s->dev[port].ncq_tfs[tag];
>> +
>> +    if (ncq_tfs->used) {
>> +        /* error - already in use */
>> +        fprintf(stderr, "%s: tag %d already used\n", __FUNCTION__, tag);
>> +        return;
>> +    }
>> +
>> +    ncq_tfs->used = 1;
>> +    ncq_tfs->drive = &s->dev[port];
>> +    ncq_tfs->drive->cmd_fis = cmd_fis;
>> +    ncq_tfs->drive->cmd_fis_len = 0x20;
>> +    ncq_tfs->slot = slot;
>> +    ncq_tfs->lba = ((uint64_t)ncq_fis->lba5 << 40) |
>> +                   ((uint64_t)ncq_fis->lba4 << 32) |
>> +                   ((uint64_t)ncq_fis->lba3 << 24) |
>> +                   ((uint64_t)ncq_fis->lba2 << 16) |
>> +                   ((uint64_t)ncq_fis->lba1 << 8) |
>> +                   (uint64_t)ncq_fis->lba0;
>> +
>> +    /* Note: We calculate the sector count, but don't currently rely on it.
>> +     * The total size of the DMA buffer tells us the transfer size instead. */
>> +    ncq_tfs->sector_count = ((uint16_t)ncq_fis->sector_count_high << 8) |
>> +                                ncq_fis->sector_count_low;
>> +
>> +    DPRINTF(port, "NCQ transfer LBA from %ld to %ld, drive max %ld\n",
>> +            ncq_tfs->lba, ncq_tfs->lba + ncq_tfs->sector_count - 2,
>> +            s->dev[port].port.ifs[0].nb_sectors - 1);
>> +
>> +    ncq_tfs->sglist = *sg;
>> +    ncq_tfs->tag = tag;
>> +
>> +    switch(ncq_fis->command) {
>> +        case READ_FPDMA_QUEUED:
>> +            DPRINTF(port, "NCQ reading %d sectors from LBA %ld, tag %d\n",
>> +                    ncq_tfs->sector_count-1, ncq_tfs->lba, ncq_tfs->tag);
>> +            ncq_tfs->is_read = 1;
>> +
>> +            /* XXX: The specification is unclear about whether the DMA Setup
>> +             * FIS here should have the I bit set, but it suggest that it should
>> +             * not. Linux works without this interrupt, so I disabled it.
>> +             * If someone knows if it is needed, please tell me, or fix this. */
>> +
>> +            /* ahci_trigger_irq(s,s->dev[port],PORT_IRQ_STAT_DSS); */
>> +            DPRINTF(port, "tag %d aio read %ld\n", ncq_tfs->tag, ncq_tfs->lba);
>> +            dma_bdrv_read(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->sglist,
>> +                          ncq_tfs->lba, ncq_cb, ncq_tfs);
>> +            break;
>> +        case WRITE_FPDMA_QUEUED:
>> +            DPRINTF(port, "NCQ writing %d sectors to LBA %ld, tag %d\n",
>> +                    ncq_tfs->sector_count-1, ncq_tfs->lba, ncq_tfs->tag);
>> +            ncq_tfs->is_read = 0;
>> +            /* ahci_trigger_irq(s,s->dev[port],PORT_IRQ_STAT_DSS); */
>> +            DPRINTF(port, "tag %d aio write %ld\n", ncq_tfs->tag, ncq_tfs->lba);
>> +            dma_bdrv_write(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->sglist,
>> +                           ncq_tfs->lba, ncq_cb, ncq_tfs);
>> +            break;
>> +        default:
>> +            hw_error("ahci: tried to process non-NCQ command as NCQ\n");
>>     
>
> Guest triggerable abort.
>   

Those happen. The guest can shoot itself in the foot. We have more of
these in other places. Just check virtio.c and search for abort() :).

>   
>> +            break;
>> +    }
>> +}
>> +
>> +static int handle_cmd(AHCIState *s, int port, int slot)
>> +{
>> +    IDEState *ide_state;
>> +
>> +    int sglist_alloc_hint;
>> +    QEMUSGList sglist;
>> +    int atapi_packet_len = 0;
>> +    AHCIPortRegs *pr;
>> +    uint32_t opts;
>> +    uint64_t tbl_addr;
>> +    AHCICmdHdr *cmd;
>> +    uint8_t *cmd_fis;
>> +
>> +    target_phys_addr_t cmd_len;
>> +    int i;
>> +
>> +    if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
>> +        /* Engine currently busy, try again later */
>> +        DPRINTF(port, "engine busy\n");
>> +        return -1;
>> +    }
>> +
>> +    pr = &s->dev[port].port_regs;
>> +    cmd = &((AHCICmdHdr *)s->dev[port].lst)[slot];
>> +
>> +    if (!s->dev[port].lst) {
>> +        hw_error("%s: lst not given but cmd handled", __FUNCTION__);
>>     
>
> Guest triggerable abort.
>
>   
>> +    }
>> +
>> +    opts = le32_to_cpu(cmd->opts);
>> +    tbl_addr = le64_to_cpu(cmd->tbl_addr);
>> +
>> +    sglist_alloc_hint = opts >> AHCI_CMD_HDR_PRDT_LEN;
>> +    cmd_len = 0x80 + (sglist_alloc_hint * sizeof(AHCI_SG));
>> +    cmd_fis = cpu_physical_memory_map(tbl_addr, &cmd_len, 1);
>>     
>
> NULL dereference later if cpu_physical_memory_map() fails due to
> invalid address (tbl_addr).
>
>   
>> +
>> +    /* The device we are working for */
>> +    ide_state = &s->dev[port].port.ifs[0];
>> +
>> +    if (!ide_state->bs) {
>> +        hw_error("%s: guest accessed unused port", __FUNCTION__);
>>     
>
> Guest triggerable abort.
>
>   
>> +    }
>> +
>> +    /* Get number of entries in the PRDT, init a qemu sglist accordingly */
>> +    memset(&sglist, 0, sizeof(sglist));
>> +
>> +    if (sglist_alloc_hint > 0) {
>> +        AHCI_SG *tbl = (AHCI_SG *)(&cmd_fis[0x80]);
>> +
>> +        qemu_sglist_init(&sglist, sglist_alloc_hint);
>> +        /* Parse the PRDs and create qemu sglist entries accordingly */
>> +        for (i = 0; i < sglist_alloc_hint; i++) {
>> +            /* flags_size is zero-based */
>> +            qemu_sglist_add(&sglist, le64_to_cpu(tbl[i].addr),
>> +                            le32_to_cpu(tbl[i].flags_size) + 1);
>> +        }
>> +    }
>>     
>
> Only the SATA_FIS_REG_H2D_UPDATE_COMMAND_REGISTER codepath seems to
> clean up sglist.  The guest can leak host memory by setting
> sglist_alloc_hint > 0 and not using
> SATA_FIS_REG_H2D_UPDATE_COMMAND_REGISTER.
>   

True, the sglist should only be created in dma_prepare (then the core is
responsible for cleanup) or ncq command issue.


Alex

  reply	other threads:[~2010-12-09 15:48 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-08 12:13 [Qemu-devel] [PATCH 00/13] AHCI emulation support v8 Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 01/13] ide: split ide command interpretation off Alexander Graf
2010-12-08 13:31   ` [Qemu-devel] " Stefan Hajnoczi
2010-12-08 12:13 ` [Qemu-devel] [PATCH 02/13] ide: fix whitespace gap in ide_exec_cmd Alexander Graf
2010-12-08 13:32   ` [Qemu-devel] " Stefan Hajnoczi
2010-12-08 14:59   ` Kevin Wolf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 03/13] ide: Split out BMDMA code from ATA core Alexander Graf
2010-12-08 14:26   ` [Qemu-devel] " Stefan Hajnoczi
2010-12-08 14:32     ` Alexander Graf
2010-12-08 14:35     ` Kevin Wolf
2010-12-08 14:40       ` Stefan Hajnoczi
2010-12-08 14:46         ` Kevin Wolf
2010-12-09 12:31   ` Kevin Wolf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 04/13] bmdma: split out irq setting Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 05/13] bmdma: move header definitions out Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 06/13] ide: add ncq identify data for ahci sata drives Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 07/13] pci: add storage class for sata Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 08/13] pci: add ich7 pci id Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 09/13] ahci: add ahci emulation Alexander Graf
2010-12-08 21:14   ` [Qemu-devel] " Stefan Hajnoczi
2010-12-09 15:48     ` Alexander Graf [this message]
2010-12-09 15:53       ` Kevin Wolf
2010-12-09 16:18         ` Alexander Graf
2010-12-10  9:35           ` Kevin Wolf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 10/13] config: move ide core and pci to pci.mak Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 11/13] config: add ahci for pci capable machines Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 12/13] ahci: set SATA Mode Select Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 13/13] ahci: set pci revision id Alexander Graf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4D00FA44.8090207@suse.de \
    --to=agraf@suse.de \
    --cc=Joerg.Roedel@amd.com \
    --cc=blauwirbel@gmail.com \
    --cc=elek.roland@gmail.com \
    --cc=herbszt@gmx.de \
    --cc=kraxel@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=paul@codesourcery.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@gmail.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).