From: Jason Wang <jasowang@redhat.com>
To: Li Qiang <liq3ea@163.com>,
dmitry.fleytman@gmail.com, pbonzini@redhat.com
Cc: alxndr@bu.edu, liq3ea@gmail.com, qemu-devel@nongnu.org,
ppandit@redhat.com
Subject: Re: [PATCH v3] e1000e: using bottom half to send packets
Date: Wed, 22 Jul 2020 11:32:02 +0800 [thread overview]
Message-ID: <307795f9-70bb-b83b-6110-da2c923e4dc2@redhat.com> (raw)
In-Reply-To: <20200721151728.112395-1-liq3ea@163.com>
On 2020/7/21 下午11:17, Li Qiang wrote:
> Alexander Bulekov reported a UAF bug related e1000e packets send.
>
> -->https://bugs.launchpad.net/qemu/+bug/1886362
>
> This is because the guest trigger a e1000e packet send and set the
> data's address to e1000e's MMIO address. So when the e1000e do DMA
> it will write the MMIO again and trigger re-entrancy and finally
> causes this UAF.
>
> Paolo suggested to use a bottom half whenever MMIO is doing complicate
> things in here:
> -->https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg03342.html
>
> Reference here:
> 'The easiest solution is to delay processing of descriptors to a bottom
> half whenever MMIO is doing something complicated. This is also better
> for latency because it will free the vCPU thread more quickly and leave
> the work to the I/O thread.'
>
> This patch fixes this UAF.
>
> Reported-by: Alexander Bulekov <alxndr@bu.edu>
> Signed-off-by: Li Qiang <liq3ea@163.com>
> ---
> Change since v2:
> 1. Add comments for the tx bh schdule when VM resumes
> 2. Leave the set ics code in 'e1000e_start_xmit'
> 3. Cancel the tx bh and reset tx_waiting in e1000e_core_reset
So based on our discussion this is probably not sufficient. It solves
the issue TX re-entrancy but not RX (e.g RX DMA to RDT?) Or is e1000e's
RX is reentrant?
Thanks
>
> Change since v1:
> Per Jason's review here:
> -- https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg05368.html
> 1. Cancel and schedule the tx bh when VM is stopped or resume
> 2. Add a tx_burst for e1000e configuration to throttle the bh execution
> 3. Add a tx_waiting to record whether the bh is pending or not
> Don't use BQL in the tx_bh handler as when tx_bh is executed, the BQL is
> acquired.
>
> hw/net/e1000e.c | 6 +++
> hw/net/e1000e_core.c | 107 +++++++++++++++++++++++++++++++++++--------
> hw/net/e1000e_core.h | 8 ++++
> 3 files changed, 101 insertions(+), 20 deletions(-)
>
> diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
> index fda34518c9..24e35a78bf 100644
> --- a/hw/net/e1000e.c
> +++ b/hw/net/e1000e.c
> @@ -77,10 +77,14 @@ typedef struct E1000EState {
>
> bool disable_vnet;
>
> + int32_t tx_burst;
> +
> E1000ECore core;
>
> } E1000EState;
>
> +#define TX_BURST 256
> +
> #define E1000E_MMIO_IDX 0
> #define E1000E_FLASH_IDX 1
> #define E1000E_IO_IDX 2
> @@ -263,6 +267,7 @@ static void e1000e_core_realize(E1000EState *s)
> {
> s->core.owner = &s->parent_obj;
> s->core.owner_nic = s->nic;
> + s->core.tx_burst = s->tx_burst;
> }
>
> static void
> @@ -665,6 +670,7 @@ static Property e1000e_properties[] = {
> e1000e_prop_subsys_ven, uint16_t),
> DEFINE_PROP_SIGNED("subsys", E1000EState, subsys, 0,
> e1000e_prop_subsys, uint16_t),
> + DEFINE_PROP_INT32("x-txburst", E1000EState, tx_burst, TX_BURST),
> DEFINE_PROP_END_OF_LIST(),
> };
>
> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
> index bcd186cac5..2fdfc23204 100644
> --- a/hw/net/e1000e_core.c
> +++ b/hw/net/e1000e_core.c
> @@ -910,18 +910,17 @@ e1000e_rx_ring_init(E1000ECore *core, E1000E_RxRing *rxr, int idx)
> }
>
> static void
> -e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr)
> +e1000e_start_xmit(struct e1000e_tx *q)
> {
> + E1000ECore *core = q->core;
> dma_addr_t base;
> struct e1000_tx_desc desc;
> - bool ide = false;
> - const E1000E_RingInfo *txi = txr->i;
> - uint32_t cause = E1000_ICS_TXQE;
> + const E1000E_RingInfo *txi;
> + E1000E_TxRing txr;
> + int32_t num_packets = 0;
>
> - if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
> - trace_e1000e_tx_disabled();
> - return;
> - }
> + e1000e_tx_ring_init(core, &txr, q - &core->tx[0]);
> + txi = txr.i;
>
> while (!e1000e_ring_empty(core, txi)) {
> base = e1000e_ring_head_descr(core, txi);
> @@ -931,14 +930,24 @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr)
> trace_e1000e_tx_descr((void *)(intptr_t)desc.buffer_addr,
> desc.lower.data, desc.upper.data);
>
> - e1000e_process_tx_desc(core, txr->tx, &desc, txi->idx);
> - cause |= e1000e_txdesc_writeback(core, base, &desc, &ide, txi->idx);
> + e1000e_process_tx_desc(core, txr.tx, &desc, txi->idx);
> + q->cause |= e1000e_txdesc_writeback(core, base, &desc,
> + &q->ide, txi->idx);
>
> e1000e_ring_advance(core, txi, 1);
> + if (++num_packets >= core->tx_burst) {
> + break;
> + }
> + }
> +
> + if (num_packets >= core->tx_burst) {
> + qemu_bh_schedule(q->tx_bh);
> + q->tx_waiting = 1;
> + return;
> }
>
> - if (!ide || !e1000e_intrmgr_delay_tx_causes(core, &cause)) {
> - e1000e_set_interrupt_cause(core, cause);
> + if (!q->ide || !e1000e_intrmgr_delay_tx_causes(core, &q->cause)) {
> + e1000e_set_interrupt_cause(core, q->cause);
> }
> }
>
> @@ -2423,32 +2432,41 @@ e1000e_set_dbal(E1000ECore *core, int index, uint32_t val)
> static void
> e1000e_set_tctl(E1000ECore *core, int index, uint32_t val)
> {
> - E1000E_TxRing txr;
> core->mac[index] = val;
>
> if (core->mac[TARC0] & E1000_TARC_ENABLE) {
> - e1000e_tx_ring_init(core, &txr, 0);
> - e1000e_start_xmit(core, &txr);
> + if (core->tx[0].tx_waiting) {
> + return;
> + }
> + core->tx[0].tx_waiting = 1;
> + if (!core->vm_running) {
> + return;
> + }
> + qemu_bh_schedule(core->tx[0].tx_bh);
> }
>
> if (core->mac[TARC1] & E1000_TARC_ENABLE) {
> - e1000e_tx_ring_init(core, &txr, 1);
> - e1000e_start_xmit(core, &txr);
> + if (core->tx[1].tx_waiting) {
> + return;
> + }
> + core->tx[1].tx_waiting = 1;
> + if (!core->vm_running) {
> + return;
> + }
> + qemu_bh_schedule(core->tx[1].tx_bh);
> }
> }
>
> static void
> e1000e_set_tdt(E1000ECore *core, int index, uint32_t val)
> {
> - E1000E_TxRing txr;
> int qidx = e1000e_mq_queue_idx(TDT, index);
> uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1;
>
> core->mac[index] = val & 0xffff;
>
> if (core->mac[tarc_reg] & E1000_TARC_ENABLE) {
> - e1000e_tx_ring_init(core, &txr, qidx);
> - e1000e_start_xmit(core, &txr);
> + qemu_bh_schedule(core->tx[qidx].tx_bh);
> }
> }
>
> @@ -3315,11 +3333,52 @@ e1000e_vm_state_change(void *opaque, int running, RunState state)
> trace_e1000e_vm_state_running();
> e1000e_intrmgr_resume(core);
> e1000e_autoneg_resume(core);
> + core->vm_running = 1;
> +
> + for (int i = 0; i < E1000E_NUM_QUEUES; i++) {
> + /*
> + * Schedule tx bh unconditionally to make sure
> + * tx work after live migration since we don't
> + * migrate tx_waiting.
> + */
> + qemu_bh_schedule(core->tx[i].tx_bh);
> + }
> +
> } else {
> trace_e1000e_vm_state_stopped();
> +
> + for (int i = 0; i < E1000E_NUM_QUEUES; i++) {
> + qemu_bh_cancel(core->tx[i].tx_bh);
> + }
> +
> e1000e_autoneg_pause(core);
> e1000e_intrmgr_pause(core);
> + core->vm_running = 0;
> + }
> +}
> +
> +
> +static void e1000e_core_tx_bh(void *opaque)
> +{
> + struct e1000e_tx *q = opaque;
> + E1000ECore *core = q->core;
> +
> + if (!core->vm_running) {
> + assert(q->tx_waiting);
> + return;
> + }
> +
> + q->tx_waiting = 0;
> +
> + if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
> + trace_e1000e_tx_disabled();
> + return;
> }
> +
> + q->cause = E1000_ICS_TXQE;
> + q->ide = false;
> +
> + e1000e_start_xmit(q);
> }
>
> void
> @@ -3334,12 +3393,15 @@ e1000e_core_pci_realize(E1000ECore *core,
> e1000e_autoneg_timer, core);
> e1000e_intrmgr_pci_realize(core);
>
> + core->vm_running = runstate_is_running();
> core->vmstate =
> qemu_add_vm_change_state_handler(e1000e_vm_state_change, core);
>
> for (i = 0; i < E1000E_NUM_QUEUES; i++) {
> net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner,
> E1000E_MAX_TX_FRAGS, core->has_vnet);
> + core->tx[i].core = core;
> + core->tx[i].tx_bh = qemu_bh_new(e1000e_core_tx_bh, &core->tx[i]);
> }
>
> net_rx_pkt_init(&core->rx_pkt, core->has_vnet);
> @@ -3367,6 +3429,9 @@ e1000e_core_pci_uninit(E1000ECore *core)
> for (i = 0; i < E1000E_NUM_QUEUES; i++) {
> net_tx_pkt_reset(core->tx[i].tx_pkt);
> net_tx_pkt_uninit(core->tx[i].tx_pkt);
> + qemu_bh_cancel(core->tx[i].tx_bh);
> + qemu_bh_delete(core->tx[i].tx_bh);
> + core->tx[i].tx_bh = NULL;
> }
>
> net_rx_pkt_uninit(core->rx_pkt);
> @@ -3480,6 +3545,8 @@ e1000e_core_reset(E1000ECore *core)
> net_tx_pkt_reset(core->tx[i].tx_pkt);
> memset(&core->tx[i].props, 0, sizeof(core->tx[i].props));
> core->tx[i].skip_cp = false;
> + qemu_bh_cancel(core->tx[i].tx_bh);
> + core->tx[i].tx_waiting = 0;
> }
> }
>
> diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
> index aee32f7e48..0c16dce3a6 100644
> --- a/hw/net/e1000e_core.h
> +++ b/hw/net/e1000e_core.h
> @@ -77,10 +77,18 @@ struct E1000Core {
> unsigned char sum_needed;
> bool cptse;
> struct NetTxPkt *tx_pkt;
> + QEMUBH *tx_bh;
> + uint32_t tx_waiting;
> + uint32_t cause;
> + bool ide;
> + E1000ECore *core;
> } tx[E1000E_NUM_QUEUES];
>
> struct NetRxPkt *rx_pkt;
>
> + int32_t tx_burst;
> +
> + bool vm_running;
> bool has_vnet;
> int max_queue_num;
>
next prev parent reply other threads:[~2020-07-22 3:32 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-21 15:17 [PATCH v3] e1000e: using bottom half to send packets Li Qiang
2020-07-22 3:32 ` Jason Wang [this message]
2020-07-22 4:47 ` Li Qiang
2020-07-22 5:49 ` Jason Wang
2020-07-22 8:31 ` Jason Wang
2020-07-22 10:45 ` Li Qiang
2020-07-22 4:52 ` P J P
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=307795f9-70bb-b83b-6110-da2c923e4dc2@redhat.com \
--to=jasowang@redhat.com \
--cc=alxndr@bu.edu \
--cc=dmitry.fleytman@gmail.com \
--cc=liq3ea@163.com \
--cc=liq3ea@gmail.com \
--cc=pbonzini@redhat.com \
--cc=ppandit@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).