From: Alexander Graf <agraf@suse.de>
To: Stefan Hajnoczi <stefanha@gmail.com>
Cc: Kevin Wolf <kwolf@redhat.com>,
Joerg Roedel <Joerg.Roedel@amd.com>,
Paul Brook <paul@codesourcery.com>,
QEMU-devel Developers <qemu-devel@nongnu.org>,
Blue Swirl <blauwirbel@gmail.com>,
Gerd Hoffmann <kraxel@redhat.com>,
tj@kernel.org, Roland Elek <elek.roland@gmail.com>,
Sebastian Herbszt <herbszt@gmx.de>
Subject: [Qemu-devel] Re: [PATCH 09/13] ahci: add ahci emulation
Date: Thu, 09 Dec 2010 16:48:20 +0100 [thread overview]
Message-ID: <4D00FA44.8090207@suse.de> (raw)
In-Reply-To: <AANLkTin_h2KMdq_Mm60TKaAENxneJykpu2JvX1ZCnXkS@mail.gmail.com>
Stefan Hajnoczi wrote:
> On Wed, Dec 8, 2010 at 12:13 PM, Alexander Graf <agraf@suse.de> wrote:
>
>> +struct AHCIDevice {
>> + IDEBus port;
>> + int port_no;
>> + uint32_t port_state;
>> + uint32_t finished;
>> + AHCIPortRegs port_regs;
>> + struct AHCIState *hba;
>> + uint8_t *lst;
>> + uint8_t *res_fis;
>> + uint8_t *cmd_fis;
>>
>
> Are these unmapped on reset?
>
On which reset? Port reset sends a d2h fis to the guest, so we still
need at least the res mapping. I couldn't find the exact spot where the
state of registers after reset is defined.
>
>> + int cmd_fis_len;
>> + int dma_status;
>> + BlockDriverCompletionFunc *dma_cb;
>> + AHCICmdHdr *cur_cmd;
>> + NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
>>
>
> Are the ncq_tfs[] elements cleaned up on reset (i.e. cancellation and
> free sglist)?
>
>
>> +static void map_page(uint8_t **ptr, uint64_t addr, uint32_t wanted)
>> +{
>> + target_phys_addr_t len = wanted;
>> +
>> + if (*ptr) {
>> + cpu_physical_memory_unmap(*ptr, 1, len, len);
>> + }
>> +
>> + *ptr = cpu_physical_memory_map(addr, &len, 1);
>> + if (len < wanted) {
>> + cpu_physical_memory_unmap(*ptr, 1, len, len);
>>
>
> *ptr = NULL;
>
>
>> +static void ncq_cb(void *opaque, int ret)
>> +{
>> + NCQTransferState *ncq_tfs = (NCQTransferState *)opaque;
>> + IDEState *ide_state;
>> +
>> + if (ret < 0) {
>> + /* XXX error */
>> + }
>>
>
> Missing error handling.
>
Yes, that's what the XXX stands for :).
>
>> +static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
>> + int slot, QEMUSGList *sg)
>> +{
>> + NCQFrame *ncq_fis = (NCQFrame*)cmd_fis;
>> + uint8_t tag = ncq_fis->tag >> 3;
>> + NCQTransferState *ncq_tfs = &s->dev[port].ncq_tfs[tag];
>> +
>> + if (ncq_tfs->used) {
>> + /* error - already in use */
>> + fprintf(stderr, "%s: tag %d already used\n", __FUNCTION__, tag);
>> + return;
>> + }
>> +
>> + ncq_tfs->used = 1;
>> + ncq_tfs->drive = &s->dev[port];
>> + ncq_tfs->drive->cmd_fis = cmd_fis;
>> + ncq_tfs->drive->cmd_fis_len = 0x20;
>> + ncq_tfs->slot = slot;
>> + ncq_tfs->lba = ((uint64_t)ncq_fis->lba5 << 40) |
>> + ((uint64_t)ncq_fis->lba4 << 32) |
>> + ((uint64_t)ncq_fis->lba3 << 24) |
>> + ((uint64_t)ncq_fis->lba2 << 16) |
>> + ((uint64_t)ncq_fis->lba1 << 8) |
>> + (uint64_t)ncq_fis->lba0;
>> +
>> + /* Note: We calculate the sector count, but don't currently rely on it.
>> + * The total size of the DMA buffer tells us the transfer size instead. */
>> + ncq_tfs->sector_count = ((uint16_t)ncq_fis->sector_count_high << 8) |
>> + ncq_fis->sector_count_low;
>> +
>> + DPRINTF(port, "NCQ transfer LBA from %ld to %ld, drive max %ld\n",
>> + ncq_tfs->lba, ncq_tfs->lba + ncq_tfs->sector_count - 2,
>> + s->dev[port].port.ifs[0].nb_sectors - 1);
>> +
>> + ncq_tfs->sglist = *sg;
>> + ncq_tfs->tag = tag;
>> +
>> + switch(ncq_fis->command) {
>> + case READ_FPDMA_QUEUED:
>> + DPRINTF(port, "NCQ reading %d sectors from LBA %ld, tag %d\n",
>> + ncq_tfs->sector_count-1, ncq_tfs->lba, ncq_tfs->tag);
>> + ncq_tfs->is_read = 1;
>> +
>> + /* XXX: The specification is unclear about whether the DMA Setup
>> + * FIS here should have the I bit set, but it suggest that it should
>> + * not. Linux works without this interrupt, so I disabled it.
>> + * If someone knows if it is needed, please tell me, or fix this. */
>> +
>> + /* ahci_trigger_irq(s,s->dev[port],PORT_IRQ_STAT_DSS); */
>> + DPRINTF(port, "tag %d aio read %ld\n", ncq_tfs->tag, ncq_tfs->lba);
>> + dma_bdrv_read(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->sglist,
>> + ncq_tfs->lba, ncq_cb, ncq_tfs);
>> + break;
>> + case WRITE_FPDMA_QUEUED:
>> + DPRINTF(port, "NCQ writing %d sectors to LBA %ld, tag %d\n",
>> + ncq_tfs->sector_count-1, ncq_tfs->lba, ncq_tfs->tag);
>> + ncq_tfs->is_read = 0;
>> + /* ahci_trigger_irq(s,s->dev[port],PORT_IRQ_STAT_DSS); */
>> + DPRINTF(port, "tag %d aio write %ld\n", ncq_tfs->tag, ncq_tfs->lba);
>> + dma_bdrv_write(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->sglist,
>> + ncq_tfs->lba, ncq_cb, ncq_tfs);
>> + break;
>> + default:
>> + hw_error("ahci: tried to process non-NCQ command as NCQ\n");
>>
>
> Guest triggerable abort.
>
Those happen. The guest can shoot itself in the foot. We have more of
these in other places. Just check virtio.c and search for abort() :).
>
>> + break;
>> + }
>> +}
>> +
>> +static int handle_cmd(AHCIState *s, int port, int slot)
>> +{
>> + IDEState *ide_state;
>> +
>> + int sglist_alloc_hint;
>> + QEMUSGList sglist;
>> + int atapi_packet_len = 0;
>> + AHCIPortRegs *pr;
>> + uint32_t opts;
>> + uint64_t tbl_addr;
>> + AHCICmdHdr *cmd;
>> + uint8_t *cmd_fis;
>> +
>> + target_phys_addr_t cmd_len;
>> + int i;
>> +
>> + if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
>> + /* Engine currently busy, try again later */
>> + DPRINTF(port, "engine busy\n");
>> + return -1;
>> + }
>> +
>> + pr = &s->dev[port].port_regs;
>> + cmd = &((AHCICmdHdr *)s->dev[port].lst)[slot];
>> +
>> + if (!s->dev[port].lst) {
>> + hw_error("%s: lst not given but cmd handled", __FUNCTION__);
>>
>
> Guest triggerable abort.
>
>
>> + }
>> +
>> + opts = le32_to_cpu(cmd->opts);
>> + tbl_addr = le64_to_cpu(cmd->tbl_addr);
>> +
>> + sglist_alloc_hint = opts >> AHCI_CMD_HDR_PRDT_LEN;
>> + cmd_len = 0x80 + (sglist_alloc_hint * sizeof(AHCI_SG));
>> + cmd_fis = cpu_physical_memory_map(tbl_addr, &cmd_len, 1);
>>
>
> NULL dereference later if cpu_physical_memory_map() fails due to
> invalid address (tbl_addr).
>
>
>> +
>> + /* The device we are working for */
>> + ide_state = &s->dev[port].port.ifs[0];
>> +
>> + if (!ide_state->bs) {
>> + hw_error("%s: guest accessed unused port", __FUNCTION__);
>>
>
> Guest triggerable abort.
>
>
>> + }
>> +
>> + /* Get number of entries in the PRDT, init a qemu sglist accordingly */
>> + memset(&sglist, 0, sizeof(sglist));
>> +
>> + if (sglist_alloc_hint > 0) {
>> + AHCI_SG *tbl = (AHCI_SG *)(&cmd_fis[0x80]);
>> +
>> + qemu_sglist_init(&sglist, sglist_alloc_hint);
>> + /* Parse the PRDs and create qemu sglist entries accordingly */
>> + for (i = 0; i < sglist_alloc_hint; i++) {
>> + /* flags_size is zero-based */
>> + qemu_sglist_add(&sglist, le64_to_cpu(tbl[i].addr),
>> + le32_to_cpu(tbl[i].flags_size) + 1);
>> + }
>> + }
>>
>
> Only the SATA_FIS_REG_H2D_UPDATE_COMMAND_REGISTER codepath seems to
> clean up sglist. The guest can leak host memory by setting
> sglist_alloc_hint > 0 and not using
> SATA_FIS_REG_H2D_UPDATE_COMMAND_REGISTER.
>
True, the sglist should only be created in dma_prepare (then the core is
responsible for cleanup) or ncq command issue.
Alex
next prev parent reply other threads:[~2010-12-09 15:48 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-12-08 12:13 [Qemu-devel] [PATCH 00/13] AHCI emulation support v8 Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 01/13] ide: split ide command interpretation off Alexander Graf
2010-12-08 13:31 ` [Qemu-devel] " Stefan Hajnoczi
2010-12-08 12:13 ` [Qemu-devel] [PATCH 02/13] ide: fix whitespace gap in ide_exec_cmd Alexander Graf
2010-12-08 13:32 ` [Qemu-devel] " Stefan Hajnoczi
2010-12-08 14:59 ` Kevin Wolf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 03/13] ide: Split out BMDMA code from ATA core Alexander Graf
2010-12-08 14:26 ` [Qemu-devel] " Stefan Hajnoczi
2010-12-08 14:32 ` Alexander Graf
2010-12-08 14:35 ` Kevin Wolf
2010-12-08 14:40 ` Stefan Hajnoczi
2010-12-08 14:46 ` Kevin Wolf
2010-12-09 12:31 ` Kevin Wolf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 04/13] bmdma: split out irq setting Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 05/13] bmdma: move header definitions out Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 06/13] ide: add ncq identify data for ahci sata drives Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 07/13] pci: add storage class for sata Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 08/13] pci: add ich7 pci id Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 09/13] ahci: add ahci emulation Alexander Graf
2010-12-08 21:14 ` [Qemu-devel] " Stefan Hajnoczi
2010-12-09 15:48 ` Alexander Graf [this message]
2010-12-09 15:53 ` Kevin Wolf
2010-12-09 16:18 ` Alexander Graf
2010-12-10 9:35 ` Kevin Wolf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 10/13] config: move ide core and pci to pci.mak Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 11/13] config: add ahci for pci capable machines Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 12/13] ahci: set SATA Mode Select Alexander Graf
2010-12-08 12:13 ` [Qemu-devel] [PATCH 13/13] ahci: set pci revision id Alexander Graf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4D00FA44.8090207@suse.de \
--to=agraf@suse.de \
--cc=Joerg.Roedel@amd.com \
--cc=blauwirbel@gmail.com \
--cc=elek.roland@gmail.com \
--cc=herbszt@gmx.de \
--cc=kraxel@redhat.com \
--cc=kwolf@redhat.com \
--cc=paul@codesourcery.com \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@gmail.com \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.