* [PATCH net-next v2 12/21] virtio_net: xsk: tx: support tx
From: Xuan Zhuo @ 2023-11-07 3:12 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Michael S. Tsirkin, Jason Wang, Xuan Zhuo, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
virtualization, bpf
In-Reply-To: <20231107031227.100015-1-xuanzhuo@linux.alibaba.com>
The driver's tx napi is very important for XSK. It is responsible for
obtaining data from the XSK queue and sending it out.
At the beginning, we need to trigger tx napi.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/net/virtio/main.c | 12 +++-
drivers/net/virtio/virtio_net.h | 3 +-
drivers/net/virtio/xsk.c | 110 ++++++++++++++++++++++++++++++++
drivers/net/virtio/xsk.h | 13 ++++
4 files changed, 136 insertions(+), 2 deletions(-)
diff --git a/drivers/net/virtio/main.c b/drivers/net/virtio/main.c
index 6c608b3ce27d..ff6bc764089d 100644
--- a/drivers/net/virtio/main.c
+++ b/drivers/net/virtio/main.c
@@ -2074,6 +2074,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
struct virtnet_info *vi = sq->vq->vdev->priv;
unsigned int index = vq2txq(sq->vq);
struct netdev_queue *txq;
+ int busy = 0;
int opaque;
bool done;
@@ -2086,11 +2087,20 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
txq = netdev_get_tx_queue(vi->dev, index);
__netif_tx_lock(txq, raw_smp_processor_id());
virtqueue_disable_cb(sq->vq);
- free_old_xmit(sq, true);
+
+ if (sq->xsk.pool)
+ busy |= virtnet_xsk_xmit(sq, sq->xsk.pool, budget);
+ else
+ free_old_xmit(sq, true);
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
netif_tx_wake_queue(txq);
+ if (busy) {
+ __netif_tx_unlock(txq);
+ return budget;
+ }
+
opaque = virtqueue_enable_cb_prepare(sq->vq);
done = napi_complete_done(napi, 0);
diff --git a/drivers/net/virtio/virtio_net.h b/drivers/net/virtio/virtio_net.h
index 442af4673bf8..1c21af47e13c 100644
--- a/drivers/net/virtio/virtio_net.h
+++ b/drivers/net/virtio/virtio_net.h
@@ -9,7 +9,8 @@
#include <net/xdp_sock_drv.h>
#define VIRTIO_XDP_FLAG BIT(0)
-#define VIRTIO_XMIT_DATA_MASK (VIRTIO_XDP_FLAG)
+#define VIRTIO_XSK_FLAG BIT(1)
+#define VIRTIO_XMIT_DATA_MASK (VIRTIO_XDP_FLAG | VIRTIO_XSK_FLAG)
/* RX packet size EWMA. The average packet size is used to determine the packet
* buffer size when refilling RX rings. As the entire RX ring may be refilled
diff --git a/drivers/net/virtio/xsk.c b/drivers/net/virtio/xsk.c
index 8b397787603f..caa448308232 100644
--- a/drivers/net/virtio/xsk.c
+++ b/drivers/net/virtio/xsk.c
@@ -4,9 +4,119 @@
*/
#include "virtio_net.h"
+#include "xsk.h"
static struct virtio_net_hdr_mrg_rxbuf xsk_hdr;
+static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
+{
+ sg->dma_address = addr;
+ sg->length = len;
+}
+
+static void virtnet_xsk_check_queue(struct virtnet_sq *sq)
+{
+ struct virtnet_info *vi = sq->vq->vdev->priv;
+ struct net_device *dev = vi->dev;
+ int qnum = sq - vi->sq;
+
+ /* If it is a raw buffer queue, it does not check whether the status
+ * of the queue is stopped when sending. So there is no need to check
+ * the situation of the raw buffer queue.
+ */
+ if (virtnet_is_xdp_raw_buffer_queue(vi, qnum))
+ return;
+
+ /* If this sq is not the exclusive queue of the current cpu,
+ * then it may be called by start_xmit, so check it running out
+ * of space.
+ *
+ * Stop the queue to avoid getting packets that we are
+ * then unable to transmit. Then wait the tx interrupt.
+ */
+ if (sq->vq->num_free < 2 + MAX_SKB_FRAGS)
+ netif_stop_subqueue(dev, qnum);
+}
+
+static int virtnet_xsk_xmit_one(struct virtnet_sq *sq,
+ struct xsk_buff_pool *pool,
+ struct xdp_desc *desc)
+{
+ struct virtnet_info *vi;
+ dma_addr_t addr;
+
+ vi = sq->vq->vdev->priv;
+
+ addr = xsk_buff_raw_get_dma(pool, desc->addr);
+ xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len);
+
+ sg_init_table(sq->sg, 2);
+
+ sg_fill_dma(sq->sg, sq->xsk.hdr_dma_address, vi->hdr_len);
+ sg_fill_dma(sq->sg + 1, addr, desc->len);
+
+ return virtqueue_add_outbuf(sq->vq, sq->sg, 2,
+ virtnet_xsk_to_ptr(desc->len), GFP_ATOMIC);
+}
+
+static int virtnet_xsk_xmit_batch(struct virtnet_sq *sq,
+ struct xsk_buff_pool *pool,
+ unsigned int budget,
+ u64 *kicks)
+{
+ struct xdp_desc *descs = pool->tx_descs;
+ u32 nb_pkts, max_pkts, i;
+ bool kick = false;
+ int err;
+
+ /* Every xsk tx packet needs two desc(virtnet header and packet). So we
+ * use sq->vq->num_free / 2 as the limitation.
+ */
+ max_pkts = min_t(u32, budget, sq->vq->num_free / 2);
+
+ nb_pkts = xsk_tx_peek_release_desc_batch(pool, max_pkts);
+ if (!nb_pkts)
+ return 0;
+
+ for (i = 0; i < nb_pkts; i++) {
+ err = virtnet_xsk_xmit_one(sq, pool, &descs[i]);
+ if (unlikely(err))
+ break;
+
+ kick = true;
+ }
+
+ if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
+ (*kicks)++;
+
+ return i;
+}
+
+bool virtnet_xsk_xmit(struct virtnet_sq *sq, struct xsk_buff_pool *pool,
+ int budget)
+{
+ u64 bytes = 0, packets = 0, kicks = 0;
+ int sent;
+
+ virtnet_free_old_xmit(sq, true, &bytes, &packets);
+
+ sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks);
+
+ virtnet_xsk_check_queue(sq);
+
+ u64_stats_update_begin(&sq->stats.syncp);
+ u64_stats_add(&sq->stats.packets, packets);
+ u64_stats_add(&sq->stats.bytes, bytes);
+ u64_stats_add(&sq->stats.kicks, kicks);
+ u64_stats_add(&sq->stats.xdp_tx, sent);
+ u64_stats_update_end(&sq->stats.syncp);
+
+ if (xsk_uses_need_wakeup(pool))
+ xsk_set_tx_need_wakeup(pool);
+
+ return sent == budget;
+}
+
static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct virtnet_rq *rq,
struct xsk_buff_pool *pool)
{
diff --git a/drivers/net/virtio/xsk.h b/drivers/net/virtio/xsk.h
index 1918285c310c..73ca8cd5308b 100644
--- a/drivers/net/virtio/xsk.h
+++ b/drivers/net/virtio/xsk.h
@@ -3,5 +3,18 @@
#ifndef __XSK_H__
#define __XSK_H__
+#define VIRTIO_XSK_FLAG_OFFSET 4
+
+static inline void *virtnet_xsk_to_ptr(u32 len)
+{
+ unsigned long p;
+
+ p = len << VIRTIO_XSK_FLAG_OFFSET;
+
+ return (void *)(p | VIRTIO_XSK_FLAG);
+}
+
int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp);
+bool virtnet_xsk_xmit(struct virtnet_sq *sq, struct xsk_buff_pool *pool,
+ int budget);
#endif
--
2.32.0.3.g01195cf9f
^ permalink raw reply related
* [PATCH net-next v2 13/21] virtio_net: xsk: tx: support wakeup
From: Xuan Zhuo @ 2023-11-07 3:12 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Michael S. Tsirkin, Jason Wang, Xuan Zhuo, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
virtualization, bpf
In-Reply-To: <20231107031227.100015-1-xuanzhuo@linux.alibaba.com>
xsk wakeup is used to trigger the logic for xsk xmit by xsk framework or
user.
Virtio-net does not support to actively generate an interruption, so it
tries to trigger tx NAPI on the local cpu.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/net/virtio/main.c | 20 ++++++--------------
drivers/net/virtio/virtio_net.h | 9 +++++++++
drivers/net/virtio/xsk.c | 23 +++++++++++++++++++++++
drivers/net/virtio/xsk.h | 1 +
4 files changed, 39 insertions(+), 14 deletions(-)
diff --git a/drivers/net/virtio/main.c b/drivers/net/virtio/main.c
index ff6bc764089d..6a5e74c482f3 100644
--- a/drivers/net/virtio/main.c
+++ b/drivers/net/virtio/main.c
@@ -233,15 +233,6 @@ static void disable_delayed_refill(struct virtnet_info *vi)
spin_unlock_bh(&vi->refill_lock);
}
-static void virtqueue_napi_schedule(struct napi_struct *napi,
- struct virtqueue *vq)
-{
- if (napi_schedule_prep(napi)) {
- virtqueue_disable_cb(vq);
- __napi_schedule(napi);
- }
-}
-
static void virtqueue_napi_complete(struct napi_struct *napi,
struct virtqueue *vq, int processed)
{
@@ -250,7 +241,7 @@ static void virtqueue_napi_complete(struct napi_struct *napi,
opaque = virtqueue_enable_cb_prepare(vq);
if (napi_complete_done(napi, processed)) {
if (unlikely(virtqueue_poll(vq, opaque)))
- virtqueue_napi_schedule(napi, vq);
+ virtnet_vq_napi_schedule(napi, vq);
} else {
virtqueue_disable_cb(vq);
}
@@ -265,7 +256,7 @@ static void skb_xmit_done(struct virtqueue *vq)
virtqueue_disable_cb(vq);
if (napi->weight)
- virtqueue_napi_schedule(napi, vq);
+ virtnet_vq_napi_schedule(napi, vq);
else
/* We were probably waiting for more output buffers. */
netif_wake_subqueue(vi->dev, vq2txq(vq));
@@ -667,7 +658,7 @@ static void check_sq_full_and_disable(struct virtnet_info *vi,
netif_stop_subqueue(dev, qnum);
if (use_napi) {
if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
- virtqueue_napi_schedule(&sq->napi, sq->vq);
+ virtnet_vq_napi_schedule(&sq->napi, sq->vq);
} else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
/* More just got used, free them then recheck. */
free_old_xmit(sq, false);
@@ -1834,7 +1825,7 @@ static void skb_recv_done(struct virtqueue *rvq)
struct virtnet_info *vi = rvq->vdev->priv;
struct virtnet_rq *rq = &vi->rq[vq2rxq(rvq)];
- virtqueue_napi_schedule(&rq->napi, rvq);
+ virtnet_vq_napi_schedule(&rq->napi, rvq);
}
static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
@@ -1846,7 +1837,7 @@ static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
* Call local_bh_enable after to trigger softIRQ processing.
*/
local_bh_disable();
- virtqueue_napi_schedule(napi, vq);
+ virtnet_vq_napi_schedule(napi, vq);
local_bh_enable();
}
@@ -3818,6 +3809,7 @@ static const struct net_device_ops virtnet_netdev = {
.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
.ndo_bpf = virtnet_xdp,
.ndo_xdp_xmit = virtnet_xdp_xmit,
+ .ndo_xsk_wakeup = virtnet_xsk_wakeup,
.ndo_features_check = passthru_features_check,
.ndo_get_phys_port_name = virtnet_get_phys_port_name,
.ndo_set_features = virtnet_set_features,
diff --git a/drivers/net/virtio/virtio_net.h b/drivers/net/virtio/virtio_net.h
index 1c21af47e13c..a431a2c1ee47 100644
--- a/drivers/net/virtio/virtio_net.h
+++ b/drivers/net/virtio/virtio_net.h
@@ -302,6 +302,15 @@ static inline bool virtnet_is_xdp_raw_buffer_queue(struct virtnet_info *vi, int
return false;
}
+static inline void virtnet_vq_napi_schedule(struct napi_struct *napi,
+ struct virtqueue *vq)
+{
+ if (napi_schedule_prep(napi)) {
+ virtqueue_disable_cb(vq);
+ __napi_schedule(napi);
+ }
+}
+
void virtnet_rx_pause(struct virtnet_info *vi, struct virtnet_rq *rq);
void virtnet_rx_resume(struct virtnet_info *vi, struct virtnet_rq *rq);
void virtnet_tx_pause(struct virtnet_info *vi, struct virtnet_sq *sq);
diff --git a/drivers/net/virtio/xsk.c b/drivers/net/virtio/xsk.c
index caa448308232..ea5804ddd44e 100644
--- a/drivers/net/virtio/xsk.c
+++ b/drivers/net/virtio/xsk.c
@@ -117,6 +117,29 @@ bool virtnet_xsk_xmit(struct virtnet_sq *sq, struct xsk_buff_pool *pool,
return sent == budget;
}
+int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct virtnet_sq *sq;
+
+ if (!netif_running(dev))
+ return -ENETDOWN;
+
+ if (qid >= vi->curr_queue_pairs)
+ return -EINVAL;
+
+ sq = &vi->sq[qid];
+
+ if (napi_if_scheduled_mark_missed(&sq->napi))
+ return 0;
+
+ local_bh_disable();
+ virtnet_vq_napi_schedule(&sq->napi, sq->vq);
+ local_bh_enable();
+
+ return 0;
+}
+
static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct virtnet_rq *rq,
struct xsk_buff_pool *pool)
{
diff --git a/drivers/net/virtio/xsk.h b/drivers/net/virtio/xsk.h
index 73ca8cd5308b..1bd19dcda649 100644
--- a/drivers/net/virtio/xsk.h
+++ b/drivers/net/virtio/xsk.h
@@ -17,4 +17,5 @@ static inline void *virtnet_xsk_to_ptr(u32 len)
int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp);
bool virtnet_xsk_xmit(struct virtnet_sq *sq, struct xsk_buff_pool *pool,
int budget);
+int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag);
#endif
--
2.32.0.3.g01195cf9f
^ permalink raw reply related
* [PATCH net-next v2 14/21] virtio_net: xsk: tx: virtnet_free_old_xmit() distinguishes xsk buffer
From: Xuan Zhuo @ 2023-11-07 3:12 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Michael S. Tsirkin, Jason Wang, Xuan Zhuo, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
virtualization, bpf
In-Reply-To: <20231107031227.100015-1-xuanzhuo@linux.alibaba.com>
virtnet_free_old_xmit distinguishes three type ptr(skb, xdp frame, xsk
buffer) by the last bits of the pointer.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/net/virtio/virtio_net.h | 18 ++++++++++++++++--
drivers/net/virtio/xsk.h | 5 +++++
2 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/drivers/net/virtio/virtio_net.h b/drivers/net/virtio/virtio_net.h
index a431a2c1ee47..a13d6d301fdb 100644
--- a/drivers/net/virtio/virtio_net.h
+++ b/drivers/net/virtio/virtio_net.h
@@ -225,6 +225,11 @@ struct virtnet_info {
struct failover *failover;
};
+static inline bool virtnet_is_skb_ptr(void *ptr)
+{
+ return !((unsigned long)ptr & VIRTIO_XMIT_DATA_MASK);
+}
+
static inline bool virtnet_is_xdp_frame(void *ptr)
{
return (unsigned long)ptr & VIRTIO_XDP_FLAG;
@@ -235,6 +240,8 @@ static inline struct xdp_frame *virtnet_ptr_to_xdp(void *ptr)
return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
}
+static inline u32 virtnet_ptr_to_xsk(void *ptr);
+
static inline void *virtnet_sq_unmap(struct virtnet_sq *sq, void *data)
{
struct virtnet_sq_dma *next, *head;
@@ -261,11 +268,12 @@ static inline void *virtnet_sq_unmap(struct virtnet_sq *sq, void *data)
static inline void virtnet_free_old_xmit(struct virtnet_sq *sq, bool in_napi,
u64 *bytes, u64 *packets)
{
+ unsigned int xsknum = 0;
unsigned int len;
void *ptr;
while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
- if (!virtnet_is_xdp_frame(ptr)) {
+ if (virtnet_is_skb_ptr(ptr)) {
struct sk_buff *skb;
if (sq->do_dma)
@@ -277,7 +285,7 @@ static inline void virtnet_free_old_xmit(struct virtnet_sq *sq, bool in_napi,
*bytes += skb->len;
napi_consume_skb(skb, in_napi);
- } else {
+ } else if (virtnet_is_xdp_frame(ptr)) {
struct xdp_frame *frame;
if (sq->do_dma)
@@ -287,9 +295,15 @@ static inline void virtnet_free_old_xmit(struct virtnet_sq *sq, bool in_napi,
*bytes += xdp_get_frame_len(frame);
xdp_return_frame(frame);
+ } else {
+ *bytes += virtnet_ptr_to_xsk(ptr);
+ ++xsknum;
}
(*packets)++;
}
+
+ if (xsknum)
+ xsk_tx_completed(sq->xsk.pool, xsknum);
}
static inline bool virtnet_is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
diff --git a/drivers/net/virtio/xsk.h b/drivers/net/virtio/xsk.h
index 1bd19dcda649..7ebc9bda7aee 100644
--- a/drivers/net/virtio/xsk.h
+++ b/drivers/net/virtio/xsk.h
@@ -14,6 +14,11 @@ static inline void *virtnet_xsk_to_ptr(u32 len)
return (void *)(p | VIRTIO_XSK_FLAG);
}
+static inline u32 virtnet_ptr_to_xsk(void *ptr)
+{
+ return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET;
+}
+
int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp);
bool virtnet_xsk_xmit(struct virtnet_sq *sq, struct xsk_buff_pool *pool,
int budget);
--
2.32.0.3.g01195cf9f
^ permalink raw reply related
* [PATCH net-next v2 15/21] virtio_net: xsk: tx: virtnet_sq_free_unused_buf() check xsk buffer
From: Xuan Zhuo @ 2023-11-07 3:12 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Michael S. Tsirkin, Jason Wang, Xuan Zhuo, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
virtualization, bpf
In-Reply-To: <20231107031227.100015-1-xuanzhuo@linux.alibaba.com>
virtnet_sq_free_unused_buf() check xsk buffer.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/virtio/main.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/virtio/main.c b/drivers/net/virtio/main.c
index 6a5e74c482f3..6210a6e37396 100644
--- a/drivers/net/virtio/main.c
+++ b/drivers/net/virtio/main.c
@@ -3919,16 +3919,18 @@ void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
sq = &vi->sq[i];
- if (!virtnet_is_xdp_frame(buf)) {
+ if (virtnet_is_skb_ptr(buf)) {
if (sq->do_dma)
buf = virtnet_sq_unmap(sq, buf);
dev_kfree_skb(buf);
- } else {
+ } else if (virtnet_is_xdp_frame(buf)) {
if (sq->do_dma)
buf = virtnet_sq_unmap(sq, buf);
xdp_return_frame(virtnet_ptr_to_xdp(buf));
+ } else {
+ xsk_tx_completed(sq->xsk.pool, 1);
}
}
--
2.32.0.3.g01195cf9f
^ permalink raw reply related
* [PATCH net-next v2 16/21] virtio_net: xsk: rx: introduce add_recvbuf_xsk()
From: Xuan Zhuo @ 2023-11-07 3:12 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Michael S. Tsirkin, Jason Wang, Xuan Zhuo, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
virtualization, bpf
In-Reply-To: <20231107031227.100015-1-xuanzhuo@linux.alibaba.com>
Implement the logic of filling rq with XSK buffers.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/net/virtio/main.c | 4 ++-
drivers/net/virtio/virtio_net.h | 5 ++++
drivers/net/virtio/xsk.c | 49 ++++++++++++++++++++++++++++++++-
drivers/net/virtio/xsk.h | 2 ++
4 files changed, 58 insertions(+), 2 deletions(-)
diff --git a/drivers/net/virtio/main.c b/drivers/net/virtio/main.c
index 6210a6e37396..15943a22e17d 100644
--- a/drivers/net/virtio/main.c
+++ b/drivers/net/virtio/main.c
@@ -1798,7 +1798,9 @@ static bool try_fill_recv(struct virtnet_info *vi, struct virtnet_rq *rq,
bool oom;
do {
- if (vi->mergeable_rx_bufs)
+ if (rq->xsk.pool)
+ err = virtnet_add_recvbuf_xsk(vi, rq, rq->xsk.pool, gfp);
+ else if (vi->mergeable_rx_bufs)
err = add_recvbuf_mergeable(vi, rq, gfp);
else if (vi->big_packets)
err = add_recvbuf_big(vi, rq, gfp);
diff --git a/drivers/net/virtio/virtio_net.h b/drivers/net/virtio/virtio_net.h
index a13d6d301fdb..1242785e311e 100644
--- a/drivers/net/virtio/virtio_net.h
+++ b/drivers/net/virtio/virtio_net.h
@@ -140,6 +140,11 @@ struct virtnet_rq {
/* xdp rxq used by xsk */
struct xdp_rxq_info xdp_rxq;
+
+ struct xdp_buff **xsk_buffs;
+ u32 nxt_idx;
+ u32 num;
+ u32 size;
} xsk;
};
diff --git a/drivers/net/virtio/xsk.c b/drivers/net/virtio/xsk.c
index ea5804ddd44e..e737c3353212 100644
--- a/drivers/net/virtio/xsk.c
+++ b/drivers/net/virtio/xsk.c
@@ -38,6 +38,41 @@ static void virtnet_xsk_check_queue(struct virtnet_sq *sq)
netif_stop_subqueue(dev, qnum);
}
+int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct virtnet_rq *rq,
+ struct xsk_buff_pool *pool, gfp_t gfp)
+{
+ struct xdp_buff **xsk_buffs;
+ dma_addr_t addr;
+ u32 len, i;
+ int err = 0;
+
+ xsk_buffs = rq->xsk.xsk_buffs;
+
+ if (rq->xsk.nxt_idx >= rq->xsk.num) {
+ rq->xsk.num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->xsk.size);
+ if (!rq->xsk.num)
+ return -ENOMEM;
+ rq->xsk.nxt_idx = 0;
+ }
+
+ i = rq->xsk.nxt_idx;
+
+ /* use the part of XDP_PACKET_HEADROOM as the virtnet hdr space */
+ addr = xsk_buff_xdp_get_dma(xsk_buffs[i]) - vi->hdr_len;
+ len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len;
+
+ sg_init_table(rq->sg, 1);
+ sg_fill_dma(rq->sg, addr, len);
+
+ err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, xsk_buffs[i], gfp);
+ if (err)
+ return err;
+
+ rq->xsk.nxt_idx++;
+
+ return 0;
+}
+
static int virtnet_xsk_xmit_one(struct virtnet_sq *sq,
struct xsk_buff_pool *pool,
struct xdp_desc *desc)
@@ -213,7 +248,7 @@ static int virtnet_xsk_pool_enable(struct net_device *dev,
struct virtnet_sq *sq;
struct device *dma_dev;
dma_addr_t hdr_dma;
- int err;
+ int err, size;
/* In big_packets mode, xdp cannot work, so there is no need to
* initialize xsk of rq.
@@ -249,6 +284,16 @@ static int virtnet_xsk_pool_enable(struct net_device *dev,
if (!dma_dev)
return -EPERM;
+ size = virtqueue_get_vring_size(rq->vq);
+
+ rq->xsk.xsk_buffs = kcalloc(size, sizeof(*rq->xsk.xsk_buffs), GFP_KERNEL);
+ if (!rq->xsk.xsk_buffs)
+ return -ENOMEM;
+
+ rq->xsk.size = size;
+ rq->xsk.nxt_idx = 0;
+ rq->xsk.num = 0;
+
hdr_dma = dma_map_single(dma_dev, &xsk_hdr, vi->hdr_len, DMA_TO_DEVICE);
if (dma_mapping_error(dma_dev, hdr_dma))
return -ENOMEM;
@@ -307,6 +352,8 @@ static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid)
dma_unmap_single(dma_dev, sq->xsk.hdr_dma_address, vi->hdr_len, DMA_TO_DEVICE);
+ kfree(rq->xsk.xsk_buffs);
+
return err1 | err2;
}
diff --git a/drivers/net/virtio/xsk.h b/drivers/net/virtio/xsk.h
index 7ebc9bda7aee..bef41a3f954e 100644
--- a/drivers/net/virtio/xsk.h
+++ b/drivers/net/virtio/xsk.h
@@ -23,4 +23,6 @@ int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp);
bool virtnet_xsk_xmit(struct virtnet_sq *sq, struct xsk_buff_pool *pool,
int budget);
int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag);
+int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct virtnet_rq *rq,
+ struct xsk_buff_pool *pool, gfp_t gfp);
#endif
--
2.32.0.3.g01195cf9f
^ permalink raw reply related
* [PATCH net-next v2 17/21] virtio_net: xsk: rx: skip dma unmap when rq is bind with AF_XDP
From: Xuan Zhuo @ 2023-11-07 3:12 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Michael S. Tsirkin, Jason Wang, Xuan Zhuo, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
virtualization, bpf
In-Reply-To: <20231107031227.100015-1-xuanzhuo@linux.alibaba.com>
When rq is bound with AF_XDP, the buffer dma is managed
by the AF_XDP APIs. So the buffer got from the virtio core should
skip the dma unmap operation.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/net/virtio/main.c | 8 +++++---
drivers/net/virtio/virtio_net.h | 3 +++
drivers/net/virtio/xsk.c | 1 +
3 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/net/virtio/main.c b/drivers/net/virtio/main.c
index 15943a22e17d..a318b2533b94 100644
--- a/drivers/net/virtio/main.c
+++ b/drivers/net/virtio/main.c
@@ -430,7 +430,7 @@ static void *virtnet_rq_get_buf(struct virtnet_rq *rq, u32 *len, void **ctx)
void *buf;
buf = virtqueue_get_buf_ctx(rq->vq, len, ctx);
- if (buf && rq->do_dma)
+ if (buf && rq->do_dma_unmap)
virtnet_rq_unmap(rq, buf, *len);
return buf;
@@ -561,8 +561,10 @@ static void virtnet_set_premapped(struct virtnet_info *vi)
/* disable for big mode */
if (vi->mergeable_rx_bufs || !vi->big_packets) {
- if (!virtqueue_set_dma_premapped(vi->rq[i].vq))
+ if (!virtqueue_set_dma_premapped(vi->rq[i].vq)) {
vi->rq[i].do_dma = true;
+ vi->rq[i].do_dma_unmap = true;
+ }
}
}
}
@@ -3944,7 +3946,7 @@ void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
rq = &vi->rq[i];
- if (rq->do_dma)
+ if (rq->do_dma_unmap)
virtnet_rq_unmap(rq, buf, 0);
virtnet_rq_free_buf(vi, rq, buf);
diff --git a/drivers/net/virtio/virtio_net.h b/drivers/net/virtio/virtio_net.h
index 1242785e311e..2005d0cd22e2 100644
--- a/drivers/net/virtio/virtio_net.h
+++ b/drivers/net/virtio/virtio_net.h
@@ -135,6 +135,9 @@ struct virtnet_rq {
/* Do dma by self */
bool do_dma;
+ /* Do dma unmap after getting buf from virtio core. */
+ bool do_dma_unmap;
+
struct {
struct xsk_buff_pool *pool;
diff --git a/drivers/net/virtio/xsk.c b/drivers/net/virtio/xsk.c
index e737c3353212..b09c473c29fb 100644
--- a/drivers/net/virtio/xsk.c
+++ b/drivers/net/virtio/xsk.c
@@ -210,6 +210,7 @@ static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct virtnet_rq *
xdp_rxq_info_unreg(&rq->xsk.xdp_rxq);
rq->xsk.pool = pool;
+ rq->do_dma_unmap = !pool;
virtnet_rx_resume(vi, rq);
--
2.32.0.3.g01195cf9f
^ permalink raw reply related
* [PATCH net-next v2 18/21] virtio_net: xsk: rx: introduce receive_xsk() to recv xsk buffer
From: Xuan Zhuo @ 2023-11-07 3:12 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Michael S. Tsirkin, Jason Wang, Xuan Zhuo, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
virtualization, bpf
In-Reply-To: <20231107031227.100015-1-xuanzhuo@linux.alibaba.com>
The virtnet_xdp_handler() is re-used. But
1. We need to copy data to create skb for XDP_PASS.
2. We need to call xsk_buff_free() to release the buffer.
3. The handle for xdp_buff is difference.
If we pushed this logic into existing receive handle(merge and small),
we would have to maintain code scattered inside merge and small (and big).
So I think it is a good choice for us to put the xsk code into an
independent function.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/net/virtio/main.c | 12 ++--
drivers/net/virtio/virtio_net.h | 4 ++
drivers/net/virtio/xsk.c | 120 ++++++++++++++++++++++++++++++++
drivers/net/virtio/xsk.h | 4 ++
4 files changed, 135 insertions(+), 5 deletions(-)
diff --git a/drivers/net/virtio/main.c b/drivers/net/virtio/main.c
index a318b2533b94..095f4acb0577 100644
--- a/drivers/net/virtio/main.c
+++ b/drivers/net/virtio/main.c
@@ -831,10 +831,10 @@ static void put_xdp_frags(struct xdp_buff *xdp)
}
}
-static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
- struct net_device *dev,
- unsigned int *xdp_xmit,
- struct virtnet_rq_stats *stats)
+int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
+ struct net_device *dev,
+ unsigned int *xdp_xmit,
+ struct virtnet_rq_stats *stats)
{
struct xdp_frame *xdpf;
int err;
@@ -1598,7 +1598,9 @@ static void receive_buf(struct virtnet_info *vi, struct virtnet_rq *rq,
return;
}
- if (vi->mergeable_rx_bufs)
+ if (rq->xsk.pool)
+ skb = virtnet_receive_xsk(dev, vi, rq, buf, len, xdp_xmit, stats);
+ else if (vi->mergeable_rx_bufs)
skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
stats);
else if (vi->big_packets)
diff --git a/drivers/net/virtio/virtio_net.h b/drivers/net/virtio/virtio_net.h
index 2005d0cd22e2..f520fec06662 100644
--- a/drivers/net/virtio/virtio_net.h
+++ b/drivers/net/virtio/virtio_net.h
@@ -339,4 +339,8 @@ void virtnet_tx_pause(struct virtnet_info *vi, struct virtnet_sq *sq);
void virtnet_tx_resume(struct virtnet_info *vi, struct virtnet_sq *sq);
void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
+int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
+ struct net_device *dev,
+ unsigned int *xdp_xmit,
+ struct virtnet_rq_stats *stats);
#endif
diff --git a/drivers/net/virtio/xsk.c b/drivers/net/virtio/xsk.c
index b09c473c29fb..5c7eb19ab04b 100644
--- a/drivers/net/virtio/xsk.c
+++ b/drivers/net/virtio/xsk.c
@@ -14,6 +14,18 @@ static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
sg->length = len;
}
+static unsigned int virtnet_receive_buf_num(struct virtnet_info *vi, char *buf)
+{
+ struct virtio_net_hdr_mrg_rxbuf *hdr;
+
+ if (vi->mergeable_rx_bufs) {
+ hdr = (struct virtio_net_hdr_mrg_rxbuf *)buf;
+ return virtio16_to_cpu(vi->vdev, hdr->num_buffers);
+ }
+
+ return 1;
+}
+
static void virtnet_xsk_check_queue(struct virtnet_sq *sq)
{
struct virtnet_info *vi = sq->vq->vdev->priv;
@@ -38,6 +50,114 @@ static void virtnet_xsk_check_queue(struct virtnet_sq *sq)
netif_stop_subqueue(dev, qnum);
}
+static void merge_drop_follow_xdp(struct net_device *dev,
+ struct virtnet_rq *rq,
+ u32 num_buf,
+ struct virtnet_rq_stats *stats)
+{
+ struct xdp_buff *xdp;
+ u32 len;
+
+ while (num_buf-- > 1) {
+ xdp = virtqueue_get_buf(rq->vq, &len);
+ if (unlikely(!xdp)) {
+ pr_debug("%s: rx error: %d buffers missing\n",
+ dev->name, num_buf);
+ dev->stats.rx_length_errors++;
+ break;
+ }
+ u64_stats_add(&stats->bytes, len);
+ xsk_buff_free(xdp);
+ }
+}
+
+static struct sk_buff *construct_skb(struct virtnet_rq *rq,
+ struct xdp_buff *xdp)
+{
+ unsigned int metasize = xdp->data - xdp->data_meta;
+ struct sk_buff *skb;
+ unsigned int size;
+
+ size = xdp->data_end - xdp->data_hard_start;
+ skb = napi_alloc_skb(&rq->napi, size);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
+
+ size = xdp->data_end - xdp->data_meta;
+ memcpy(__skb_put(skb, size), xdp->data_meta, size);
+
+ if (metasize) {
+ __skb_pull(skb, metasize);
+ skb_metadata_set(skb, metasize);
+ }
+
+ return skb;
+}
+
+struct sk_buff *virtnet_receive_xsk(struct net_device *dev, struct virtnet_info *vi,
+ struct virtnet_rq *rq, void *buf,
+ unsigned int len, unsigned int *xdp_xmit,
+ struct virtnet_rq_stats *stats)
+{
+ struct virtio_net_hdr_mrg_rxbuf *hdr;
+ struct sk_buff *skb = NULL;
+ u32 ret, headroom, num_buf;
+ struct bpf_prog *prog;
+ struct xdp_buff *xdp;
+
+ len -= vi->hdr_len;
+
+ xdp = (struct xdp_buff *)buf;
+
+ xsk_buff_set_size(xdp, len);
+
+ hdr = xdp->data - vi->hdr_len;
+
+ num_buf = virtnet_receive_buf_num(vi, (char *)hdr);
+ if (num_buf > 1)
+ goto drop;
+
+ headroom = xdp->data - xdp->data_hard_start;
+
+ xdp_prepare_buff(xdp, xdp->data_hard_start, headroom, len, true);
+ xsk_buff_dma_sync_for_cpu(xdp, rq->xsk.pool);
+
+ ret = XDP_PASS;
+ rcu_read_lock();
+ prog = rcu_dereference(rq->xdp_prog);
+ if (prog)
+ ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
+ rcu_read_unlock();
+
+ switch (ret) {
+ case XDP_PASS:
+ skb = construct_skb(rq, xdp);
+ xsk_buff_free(xdp);
+ break;
+
+ case XDP_TX:
+ case XDP_REDIRECT:
+ goto consumed;
+
+ default:
+ goto drop;
+ }
+
+ return skb;
+
+drop:
+ u64_stats_inc(&stats->drops);
+
+ xsk_buff_free(xdp);
+
+ if (num_buf > 1)
+ merge_drop_follow_xdp(dev, rq, num_buf, stats);
+consumed:
+ return NULL;
+}
+
int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct virtnet_rq *rq,
struct xsk_buff_pool *pool, gfp_t gfp)
{
diff --git a/drivers/net/virtio/xsk.h b/drivers/net/virtio/xsk.h
index bef41a3f954e..dbd2839a5f61 100644
--- a/drivers/net/virtio/xsk.h
+++ b/drivers/net/virtio/xsk.h
@@ -25,4 +25,8 @@ bool virtnet_xsk_xmit(struct virtnet_sq *sq, struct xsk_buff_pool *pool,
int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag);
int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct virtnet_rq *rq,
struct xsk_buff_pool *pool, gfp_t gfp);
+struct sk_buff *virtnet_receive_xsk(struct net_device *dev, struct virtnet_info *vi,
+ struct virtnet_rq *rq, void *buf,
+ unsigned int len, unsigned int *xdp_xmit,
+ struct virtnet_rq_stats *stats);
#endif
--
2.32.0.3.g01195cf9f
^ permalink raw reply related
* [PATCH net-next v2 20/21] virtio_net: update tx timeout record
From: Xuan Zhuo @ 2023-11-07 3:12 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Michael S. Tsirkin, Jason Wang, Xuan Zhuo, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
virtualization, bpf
In-Reply-To: <20231107031227.100015-1-xuanzhuo@linux.alibaba.com>
If send queue sent some packets, we update the tx timeout
record to prevent the tx timeout.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/virtio/xsk.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/net/virtio/xsk.c b/drivers/net/virtio/xsk.c
index 5c7eb19ab04b..96bf5d8260f8 100644
--- a/drivers/net/virtio/xsk.c
+++ b/drivers/net/virtio/xsk.c
@@ -259,6 +259,16 @@ bool virtnet_xsk_xmit(struct virtnet_sq *sq, struct xsk_buff_pool *pool,
virtnet_xsk_check_queue(sq);
+ if (packets) {
+ struct netdev_queue *txq;
+ struct virtnet_info *vi;
+
+ vi = sq->vq->vdev->priv;
+
+ txq = netdev_get_tx_queue(vi->dev, sq - vi->sq);
+ txq_trans_cond_update(txq);
+ }
+
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_add(&sq->stats.packets, packets);
u64_stats_add(&sq->stats.bytes, bytes);
--
2.32.0.3.g01195cf9f
^ permalink raw reply related
* [PATCH net-next v2 21/21] virtio_net: xdp_features add NETDEV_XDP_ACT_XSK_ZEROCOPY
From: Xuan Zhuo @ 2023-11-07 3:12 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Michael S. Tsirkin, Jason Wang, Xuan Zhuo, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
virtualization, bpf
In-Reply-To: <20231107031227.100015-1-xuanzhuo@linux.alibaba.com>
Now, we supported AF_XDP(xsk). Add NETDEV_XDP_ACT_XSK_ZEROCOPY to
xdp_features.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/net/virtio/main.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/net/virtio/main.c b/drivers/net/virtio/main.c
index 42c7dbf53e63..2c14f0c84e56 100644
--- a/drivers/net/virtio/main.c
+++ b/drivers/net/virtio/main.c
@@ -4363,7 +4363,8 @@ static int virtnet_probe(struct virtio_device *vdev)
dev->hw_features |= NETIF_F_GRO_HW;
dev->vlan_features = dev->features;
- dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;
+ dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_XSK_ZEROCOPY;
/* MTU range: 68 - 65535 */
dev->min_mtu = MIN_MTU;
--
2.32.0.3.g01195cf9f
^ permalink raw reply related
* [PATCH net-next v2 19/21] virtio_net: xsk: rx: virtnet_rq_free_unused_buf() check xsk buffer
From: Xuan Zhuo @ 2023-11-07 3:12 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Michael S. Tsirkin, Jason Wang, Xuan Zhuo, Alexei Starovoitov,
Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
virtualization, bpf
In-Reply-To: <20231107031227.100015-1-xuanzhuo@linux.alibaba.com>
Since this will be called in other circumstances(freeze), we must check
whether it is xsk's buffer in this function. It cannot be judged outside
this function.
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
drivers/net/virtio/main.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/net/virtio/main.c b/drivers/net/virtio/main.c
index 095f4acb0577..42c7dbf53e63 100644
--- a/drivers/net/virtio/main.c
+++ b/drivers/net/virtio/main.c
@@ -3948,6 +3948,14 @@ void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
rq = &vi->rq[i];
+ if (rq->xsk.pool) {
+ struct xdp_buff *xdp;
+
+ xdp = (struct xdp_buff *)buf;
+ xsk_buff_free(xdp);
+ return;
+ }
+
if (rq->do_dma_unmap)
virtnet_rq_unmap(rq, buf, 0);
--
2.32.0.3.g01195cf9f
^ permalink raw reply related
* Re: [PATCH] crypto: ahash - Set using_shash for cloned ahash wrapper over shash
From: Eric Biggers @ 2023-11-07 3:36 UTC (permalink / raw)
To: Dmitry Safonov
Cc: Herbert Xu, linux-kernel, David Ahern, David S. Miller,
Dmitry Safonov, Eric Dumazet, Francesco Ruggeri, Jakub Kicinski,
Paolo Abeni, Salam Noureddine, netdev, linux-crypto
In-Reply-To: <20231107023717.820093-1-dima@arista.com>
On Tue, Nov 07, 2023 at 02:37:17AM +0000, Dmitry Safonov wrote:
> The cloned child of ahash that uses shash under the hood should use
> shash helpers (like crypto_shash_setkey()).
>
> The following panic may be observed on TCP-AO selftests:
>
> > ==================================================================
> > BUG: KASAN: wild-memory-access in crypto_mod_get+0x1b/0x60
> > Write of size 4 at addr 5d5be0ff5c415e14 by task connect_ipv4/1397
> >
> > CPU: 0 PID: 1397 Comm: connect_ipv4 Tainted: G W 6.6.0+ #47
> > Call Trace:
> > <TASK>
> > dump_stack_lvl+0x46/0x70
> > kasan_report+0xc3/0xf0
> > kasan_check_range+0xec/0x190
> > crypto_mod_get+0x1b/0x60
> > crypto_spawn_alg+0x53/0x140
> > crypto_spawn_tfm2+0x13/0x60
> > hmac_init_tfm+0x25/0x60
> > crypto_ahash_setkey+0x8b/0x100
> > tcp_ao_add_cmd+0xe7a/0x1120
> > do_tcp_setsockopt+0x5ed/0x12a0
> > do_sock_setsockopt+0x82/0x100
> > __sys_setsockopt+0xe9/0x160
> > __x64_sys_setsockopt+0x60/0x70
> > do_syscall_64+0x3c/0xe0
> > entry_SYSCALL_64_after_hwframe+0x46/0x4e
> > ==================================================================
> > general protection fault, probably for non-canonical address 0x5d5be0ff5c415e14: 0000 [#1] PREEMPT SMP KASAN
> > CPU: 0 PID: 1397 Comm: connect_ipv4 Tainted: G B W 6.6.0+ #47
> > Call Trace:
> > <TASK>
> > ? die_addr+0x3c/0xa0
> > ? exc_general_protection+0x144/0x210
> > ? asm_exc_general_protection+0x22/0x30
> > ? add_taint+0x26/0x90
> > ? crypto_mod_get+0x20/0x60
> > ? crypto_mod_get+0x1b/0x60
> > ? ahash_def_finup_done1+0x58/0x80
> > crypto_spawn_alg+0x53/0x140
> > crypto_spawn_tfm2+0x13/0x60
> > hmac_init_tfm+0x25/0x60
> > crypto_ahash_setkey+0x8b/0x100
> > tcp_ao_add_cmd+0xe7a/0x1120
> > do_tcp_setsockopt+0x5ed/0x12a0
> > do_sock_setsockopt+0x82/0x100
> > __sys_setsockopt+0xe9/0x160
> > __x64_sys_setsockopt+0x60/0x70
> > do_syscall_64+0x3c/0xe0
> > entry_SYSCALL_64_after_hwframe+0x46/0x4e
> > </TASK>
> > RIP: 0010:crypto_mod_get+0x20/0x60
>
> Make sure that the child/clone has using_shash set when parent is
> an shash user.
>
> Fixes: 2f1f34c1bf7b ("crypto: ahash - optimize performance when wrapping shash")
> Cc: David Ahern <dsahern@kernel.org>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Dmitry Safonov <0x7f454c46@gmail.com>
> Cc: Eric Biggers <ebiggers@google.com>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: Francesco Ruggeri <fruggeri05@gmail.com>
> To: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: Jakub Kicinski <kuba@kernel.org>
> Cc: Paolo Abeni <pabeni@redhat.com>
> Cc: Salam Noureddine <noureddine@arista.com>
> Cc: netdev@vger.kernel.org
> Cc: linux-crypto@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Dmitry Safonov <dima@arista.com>
> ---
> crypto/ahash.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/crypto/ahash.c b/crypto/ahash.c
> index deee55f939dc..80c3e5354711 100644
> --- a/crypto/ahash.c
> +++ b/crypto/ahash.c
> @@ -651,6 +651,7 @@ struct crypto_ahash *crypto_clone_ahash(struct crypto_ahash *hash)
> err = PTR_ERR(shash);
> goto out_free_nhash;
> }
> + nhash->using_shash = true;
> *nctx = shash;
> return nhash;
> }
>
> base-commit: be3ca57cfb777ad820c6659d52e60bbdd36bf5ff
Thanks:
Reviewed-by: Eric Biggers <ebiggers@google.com>
Note that this bug would have been prevented if crypto_clone_*() were covered by
the crypto self-tests.
- Eric
^ permalink raw reply
* Re: [PATCH v1] net/tcp: use kfree_sensitive() instend of kfree() in tcp_md5_twsk_free_rcu()
From: Eric Dumazet @ 2023-11-07 3:40 UTC (permalink / raw)
To: Minjie Du
Cc: David S. Miller, David Ahern, Jakub Kicinski, Paolo Abeni,
open list:NETWORKING [TCP], open list, opensource.kernel
In-Reply-To: <20231107023444.3141-1-duminjie@vivo.com>
On Tue, Nov 7, 2023 at 3:35 AM Minjie Du <duminjie@vivo.com> wrote:
>
> key might contain private information, so better use
> kfree_sensitive to free it.
> In tcp_md5_twsk_free_rcu() use kfree_sensitive().
>
> Signed-off-by: Minjie Du <duminjie@vivo.com>
> ---
> net/ipv4/tcp_minisocks.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index a9807eeb311c..a7be78096783 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -368,7 +368,7 @@ static void tcp_md5_twsk_free_rcu(struct rcu_head *head)
> struct tcp_md5sig_key *key;
>
> key = container_of(head, struct tcp_md5sig_key, rcu);
> - kfree(key);
> + kfree_sensitive(key);
> static_branch_slow_dec_deferred(&tcp_md5_needed);
> tcp_md5_release_sigpool();
> }
> --
> 2.39.0
>
1) net-next is currently closed.
2) such patch could send a wrong signal (false sense of security with MD5)
3) You forgot tcp_time_wait_init(), tcp_md5_do_del(), tcp_md5_key_copy(),
tcp_md5_do_add(), tcp_clear_md5_list().
More work is needed I am afraid, please wait until next week when
6-7-rc1 is tagged and net-next opens again.
^ permalink raw reply
* Re: [PATCH net v3] tcp: Fix -Wc23-extensions in tcp_options_write()
From: Eric Dumazet @ 2023-11-07 3:44 UTC (permalink / raw)
To: Nathan Chancellor
Cc: davem, dsahern, kuba, pabeni, ndesaulniers, trix, 0x7f454c46,
noureddine, hch, netdev, linux-kernel, llvm, patches
In-Reply-To: <20231106-tcp-ao-fix-label-in-compound-statement-warning-v3-1-b54a64602a85@kernel.org>
On Mon, Nov 6, 2023 at 10:14 PM Nathan Chancellor <nathan@kernel.org> wrote:
>
> Clang warns (or errors with CONFIG_WERROR=y) when CONFIG_TCP_AO is set:
>
> net/ipv4/tcp_output.c:663:2: error: label at end of compound statement is a C23 extension [-Werror,-Wc23-extensions]
> 663 | }
> | ^
> 1 error generated.
>
> On earlier releases (such as clang-11, the current minimum supported
> version for building the kernel) that do not support C23, this was a
> hard error unconditionally:
>
>
> Closes: https://github.com/ClangBuiltLinux/linux/issues/1953
> Fixes: 1e03d32bea8e ("net/tcp: Add TCP-AO sign to outgoing packets")
> Signed-off-by: Nathan Chancellor <nathan@kernel.org>
> ---
> Changes in v3:
> - Don't use a pointer to a pointer for ptr parameter to avoid the extra
> indirection in process_tcp_ao_options(), just return the modified ptr
> value back to the caller (Eric)
SGTM thanks.
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply
* [PATCH net] net/smc: avoid data corruption caused by decline
From: D. Wythe @ 2023-11-07 3:56 UTC (permalink / raw)
To: kgraul, wenjia, jaka, wintera; +Cc: kuba, davem, netdev, linux-s390, linux-rdma
From: "D. Wythe" <alibuda@linux.alibaba.com>
We found a data corruption issue during testing of SMC-R on Redis
applications.
The benchmark has a low probability of reporting a strange error as
shown below.
"Error: Protocol error, got "\xe2" as reply type byte"
Finally, we found that the retrieved error data was as follows:
0xE2 0xD4 0xC3 0xD9 0x04 0x00 0x2C 0x20 0xA6 0x56 0x00 0x16 0x3E 0x0C
0xCB 0x04 0x02 0x01 0x00 0x00 0x20 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xE2
It is quite obvious that this is a SMC DECLINE message, which means that
the applications received SMC protocol message.
We found that this was caused by the following situations:
client server
proposal
------------->
accept
<-------------
confirm
------------->
wait confirm
failed llc confirm
x------
(after 2s)timeout
wait rsp
wait decline
(after 1s) timeout
(after 2s) timeout
decline
-------------->
decline
<--------------
As a result, a decline message was sent in the implementation, and this
message was read from TCP by the already-fallback connection.
This patch double the client timeout as 2x of the server value,
With this simple change, the Decline messages should never cross or
collide (during Confirm link timeout).
This issue requires an immediate solution, since the protocol updates
involve a more long-term solution.
Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
net/smc/af_smc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index abd2667..5b91f55 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -599,7 +599,7 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
int rc;
/* receive CONFIRM LINK request from server over RoCE fabric */
- qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
+ qentry = smc_llc_wait(link->lgr, NULL, 2 * SMC_LLC_WAIT_TIME,
SMC_LLC_CONFIRM_LINK);
if (!qentry) {
struct smc_clc_msg_decline dclc;
--
1.8.3.1
^ permalink raw reply related
* [syzbot] Monthly dccp report (Nov 2023)
From: syzbot @ 2023-11-07 4:52 UTC (permalink / raw)
To: dccp, linux-kernel, netdev, syzkaller-bugs
Hello dccp maintainers/developers,
This is a 31-day syzbot report for the dccp subsystem.
All related reports/information can be found at:
https://syzkaller.appspot.com/upstream/s/dccp
During the period, 1 new issues were detected and 0 were fixed.
In total, 5 issues are still open and 6 have been fixed so far.
Some of the still happening issues:
Ref Crashes Repro Title
<1> 102 Yes KASAN: use-after-free Read in ccid2_hc_tx_packet_recv
https://syzkaller.appspot.com/bug?extid=554ccde221001ab5479a
<2> 50 Yes BUG: "hc->tx_t_ipi == NUM" holds (exception!) at net/dccp/ccids/ccid3.c:LINE/ccid3_update_send_interval()
https://syzkaller.appspot.com/bug?extid=94641ba6c1d768b1e35e
<3> 13 Yes BUG: stored value of X_recv is zero at net/dccp/ccids/ccid3.c:LINE/ccid3_first_li() (3)
https://syzkaller.appspot.com/bug?extid=2ad8ef335371014d4dc7
---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.
To disable reminders for individual bugs, reply with the following command:
#syz set <Ref> no-reminders
To change bug's subsystems, reply with:
#syz set <Ref> subsystems: new-subsystem
You may send multiple commands in a single email message.
^ permalink raw reply
* Re: [RFC v1 8/8] iommu: expose the function iommu_device_use_default_domain
From: Cindy Lu @ 2023-11-07 6:10 UTC (permalink / raw)
To: Jason Wang; +Cc: mst, yi.l.liu, jgg, linux-kernel, virtualization, netdev
In-Reply-To: <CACGkMEtRJ6-KRQ1qrrwC3FVBosMfYvV6Q47enoE9cE9C8MYYOg@mail.gmail.com>
On Mon, Nov 6, 2023 at 3:26 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Sat, Nov 4, 2023 at 1:18 AM Cindy Lu <lulu@redhat.com> wrote:
> >
> > Expose the function iommu_device_use_default_domain() and
> > iommu_device_unuse_default_domain(),
> > While vdpa bind the iommufd device and detach the iommu device,
> > vdpa need to call the function
> > iommu_device_unuse_default_domain() to release the owner
> >
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
>
> This is the end of the series, who is the user then?
>
> Thanks
>
hi Jason
These 2 functions was called in vhost_vdpa_iommufd_set_device(), Vdpa need to
release the dma owner, otherwise, the function will fail when
iommufd called iommu_device_claim_dma_owner() in iommufd_device_bind()
I will change this sequence, Or maybe will find some other way to fix
this problem
thanks
cindy
> > ---
> > drivers/iommu/iommu.c | 2 ++
> > 1 file changed, 2 insertions(+)
> >
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > index 3bfc56df4f78..987cbf8c9a87 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -3164,6 +3164,7 @@ int iommu_device_use_default_domain(struct device *dev)
> >
> > return ret;
> > }
> > +EXPORT_SYMBOL_GPL(iommu_device_use_default_domain);
> >
> > /**
> > * iommu_device_unuse_default_domain() - Device driver stops handling device
> > @@ -3187,6 +3188,7 @@ void iommu_device_unuse_default_domain(struct device *dev)
> > mutex_unlock(&group->mutex);
> > iommu_group_put(group);
> > }
> > +EXPORT_SYMBOL_GPL(iommu_device_unuse_default_domain);
> >
> > static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
> > {
> > --
> > 2.34.3
> >
>
^ permalink raw reply
* Re: [PATCH net-next V8 1/2] ptp: ptp_read should not release queue
From: Richard Cochran @ 2023-11-07 6:13 UTC (permalink / raw)
To: Edward Adam Davis
Cc: davem, habetsm.xilinx, jeremy, linux-kernel, netdev, reibax,
syzbot+df3f3ef31f60781fa911
In-Reply-To: <tencent_AD33049E711B744BDD1B3225A1BA3DBB9A08@qq.com>
On Mon, Nov 06, 2023 at 10:31:27PM +0800, Edward Adam Davis wrote:
> Firstly, queue is not the memory allocated in ptp_read;
> Secondly, other processes may block at ptp_read and wait for conditions to be
> met to perform read operations.
>
> Reported-and-tested-by: syzbot+df3f3ef31f60781fa911@syzkaller.appspotmail.com
> Fixes: 8f5de6fb2453 ("ptp: support multiple timestamp event readers")
> Signed-off-by: Edward Adam Davis <eadavis@qq.com>
(This should go to net and not net-next.)
Acked-by: Richard Cochran <richardcochran@gmail.com>
^ permalink raw reply
* Re: [PATCH net-next V8 2/2] ptp: fix corrupted list in ptp_open
From: Richard Cochran @ 2023-11-07 6:13 UTC (permalink / raw)
To: Edward Adam Davis
Cc: davem, habetsm.xilinx, jeremy, linux-kernel, netdev, reibax,
syzbot+df3f3ef31f60781fa911
In-Reply-To: <tencent_1372C3B5244E7768777606C0F36563612905@qq.com>
On Mon, Nov 06, 2023 at 10:31:28PM +0800, Edward Adam Davis wrote:
> There is no lock protection when writing ptp->tsevqs in ptp_open() and
> ptp_release(), which can cause data corruption, use spin lock to avoid this
> issue.
>
> Moreover, ptp_release() should not be used to release the queue in ptp_read(),
> and it should be deleted altogether.
>
> Reported-and-tested-by: syzbot+df3f3ef31f60781fa911@syzkaller.appspotmail.com
> Fixes: 8f5de6fb2453 ("ptp: support multiple timestamp event readers")
> Signed-off-by: Edward Adam Davis <eadavis@qq.com>
(This should go to net and not net-next.)
Acked-by: Richard Cochran <richardcochran@gmail.com>
^ permalink raw reply
* Re: [RFC v1 6/8] vdpa: change the map/unmap process to support iommufd
From: Cindy Lu @ 2023-11-07 6:14 UTC (permalink / raw)
To: Yi Liu; +Cc: jasowang, mst, jgg, linux-kernel, virtualization, netdev
In-Reply-To: <12ebf6fc-9228-47a1-88dc-a177f7f7d5db@intel.com>
On Mon, Nov 6, 2023 at 4:52 PM Yi Liu <yi.l.liu@intel.com> wrote:
>
> On 2023/11/4 01:16, Cindy Lu wrote:
> > Add the check for iommufd_ictx,If vdpa don't have the iommufd_ictx
> > then will use the Legacy iommu domain pathway
> >
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
> > drivers/vhost/vdpa.c | 43 ++++++++++++++++++++++++++++++++++++++-----
> > 1 file changed, 38 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> > index dfaddd833364..0e2dba59e1ce 100644
> > --- a/drivers/vhost/vdpa.c
> > +++ b/drivers/vhost/vdpa.c
> > @@ -1067,9 +1067,6 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
> > /* Legacy iommu domain pathway without IOMMUFD */
> > r = iommu_map(v->domain, iova, pa, size,
> > perm_to_iommu_flags(perm), GFP_KERNEL);
> > - } else {
> > - r = iommu_map(v->domain, iova, pa, size,
> > - perm_to_iommu_flags(perm), GFP_KERNEL);
> > }
> > if (r) {
> > vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
> > @@ -1095,8 +1092,10 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v,
> > if (ops->set_map) {
> > if (!v->in_batch)
> > ops->set_map(vdpa, asid, iotlb);
> > + } else if (!vdpa->iommufd_ictx) {
> > + /* Legacy iommu domain pathway without IOMMUFD */
> > + iommu_unmap(v->domain, iova, size);
> > }
> > -
> > }
> >
> > static int vhost_vdpa_va_map(struct vhost_vdpa *v,
> > @@ -1149,7 +1148,36 @@ static int vhost_vdpa_va_map(struct vhost_vdpa *v,
> >
> > return ret;
> > }
> > +#if 0
> > +int vhost_pin_pages(struct vdpa_device *device, dma_addr_t iova, int npage,
> > + int prot, struct page **pages)
> > +{
> > + if (!pages || !npage)
> > + return -EINVAL;
> > + //if (!device->config->dma_unmap)
> > + //return -EINVAL;
> > +
> > + if (0) { //device->iommufd_access) {
> > + int ret;
> > +
> > + if (iova > ULONG_MAX)
> > + return -EINVAL;
> >
> > + ret = iommufd_access_pin_pages(
> > + device->iommufd_access, iova, npage * PAGE_SIZE, pages,
> > + (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0);
> > + if (ret) {
> > +
> > + return ret;
> > + }
> > +
> > + return npage;
> > + } else {
> > + return pin_user_pages(iova, npage, prot, pages);
> > + }
> > + return -EINVAL;
> > +}
> > +#endif
>
> Is above code needed or not?
this code is for simulator, and this device still has some bugs I will
continue working in it,
Thanks
cindy
>
> > static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
> > struct vhost_iotlb *iotlb,
> > u64 iova, u64 size, u64 uaddr, u32 perm)
> > @@ -1418,9 +1446,13 @@ static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
> > struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> >
> > if (v->domain) {
> > - iommu_detach_device(v->domain, dma_dev);
> > + if (!vdpa->iommufd_ictx) {
> > + iommu_detach_device(v->domain, dma_dev);
> > + }
> > iommu_domain_free(v->domain);
> > }
> > + if (vdpa->iommufd_ictx)
> > + vdpa_iommufd_unbind(vdpa);
> >
> > v->domain = NULL;
> > }
> > @@ -1645,6 +1677,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
> > }
> >
> > atomic_set(&v->opened, 0);
> > + atomic_set(&vdpa->iommufd_users, 0);
> > v->minor = minor;
> > v->vdpa = vdpa;
> > v->nvqs = vdpa->nvqs;
>
> --
> Regards,
> Yi Liu
>
^ permalink raw reply
* Re: [RFC v1 2/8] Kconfig: Add the new file vhost/iommufd
From: Cindy Lu @ 2023-11-07 6:15 UTC (permalink / raw)
To: Yi Liu; +Cc: jasowang, mst, jgg, linux-kernel, virtualization, netdev
In-Reply-To: <0c710537-254f-48a7-b11d-3ee27c1f10f7@intel.com>
On Mon, Nov 6, 2023 at 4:50 PM Yi Liu <yi.l.liu@intel.com> wrote:
>
> On 2023/11/4 01:16, Cindy Lu wrote:
> > Change the makefile and Kconfig, to add the
> > new file vhost/iommufd.c
>
> why not merge it with patch 1?
>
sure will change this
thanks
cindy
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
> > drivers/vhost/Kconfig | 1 +
> > drivers/vhost/Makefile | 1 +
> > 2 files changed, 2 insertions(+)
> >
> > diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
> > index b455d9ab6f3d..a4becfb36d77 100644
> > --- a/drivers/vhost/Kconfig
> > +++ b/drivers/vhost/Kconfig
> > @@ -72,6 +72,7 @@ config VHOST_VDPA
> > select VHOST
> > select IRQ_BYPASS_MANAGER
> > depends on VDPA
> > + depends on IOMMUFD || !IOMMUFD
> > help
> > This kernel module can be loaded in host kernel to accelerate
> > guest virtio devices with the vDPA-based backends.
> > diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile
> > index f3e1897cce85..cda7f6b7f8da 100644
> > --- a/drivers/vhost/Makefile
> > +++ b/drivers/vhost/Makefile
> > @@ -12,6 +12,7 @@ obj-$(CONFIG_VHOST_RING) += vringh.o
> >
> > obj-$(CONFIG_VHOST_VDPA) += vhost_vdpa.o
> > vhost_vdpa-y := vdpa.o
> > +vhost_vdpa-$(CONFIG_IOMMUFD) += iommufd.o
> >
> > obj-$(CONFIG_VHOST) += vhost.o
> >
>
> --
> Regards,
> Yi Liu
>
^ permalink raw reply
* Re: net: bcmasp: Use common error handling code in bcmasp_probe()
From: Markus Elfring @ 2023-11-07 6:38 UTC (permalink / raw)
To: Jakub Kicinski, Wojciech Drewek
Cc: Julia Lawall, David S. Miller, Eric Dumazet, Florian Fainelli,
Justin Chen, Paolo Abeni, bcm-kernel-feedback-list, netdev,
kernel-janitors, cocci, LKML, Simon Horman
In-Reply-To: <20231106145806.669875f4@kernel.org>
>> Add a jump target so that a bit of exception handling can be better
>> reused at the end of this function.
…
>> ---
>> drivers/net/ethernet/broadcom/asp2/bcmasp.c | 10 ++++++----
>> 1 file changed, 6 insertions(+), 4 deletions(-)
>
> The diffstat proves otherwise.
> Please don't send such patches to networking.
How does this feedback fit to a change possibility which was reviewed by
Wojciech Drewek yesterday?
Regards,
Markus
^ permalink raw reply
* Re: [RFC v1 3/8] vhost: Add 3 new uapi to support iommufd
From: Cindy Lu @ 2023-11-07 6:57 UTC (permalink / raw)
To: Jason Wang; +Cc: mst, yi.l.liu, jgg, linux-kernel, virtualization, netdev
In-Reply-To: <CACGkMEtVfHL2WPwxkYEfTKBE10uWfB2a75QQOO8rzn3=Y9FiBg@mail.gmail.com>
On Mon, Nov 6, 2023 at 3:30 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Sat, Nov 4, 2023 at 1:17 AM Cindy Lu <lulu@redhat.com> wrote:
> >
> > VHOST_VDPA_SET_IOMMU_FD: bind the device to iommufd device
> >
> > VDPA_DEVICE_ATTACH_IOMMUFD_AS: Attach a vdpa device to an iommufd
> > address space specified by IOAS id.
> >
> > VDPA_DEVICE_DETACH_IOMMUFD_AS: Detach a vdpa device
> > from the iommufd address space
> >
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
>
> [...]
>
> > diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> > index f5c48b61ab62..07e1b2c443ca 100644
> > --- a/include/uapi/linux/vhost.h
> > +++ b/include/uapi/linux/vhost.h
> > @@ -219,4 +219,70 @@
> > */
> > #define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E)
> >
> > +/* vhost_vdpa_set_iommufd
> > + * Input parameters:
> > + * @iommufd: file descriptor from /dev/iommu; pass -1 to unset
> > + * @iommufd_ioasid: IOAS identifier returned from ioctl(IOMMU_IOAS_ALLOC)
> > + * Output parameters:
> > + * @out_dev_id: device identifier
> > + */
> > +struct vhost_vdpa_set_iommufd {
> > + __s32 iommufd;
> > + __u32 iommufd_ioasid;
> > + __u32 out_dev_id;
> > +};
> > +
> > +#define VHOST_VDPA_SET_IOMMU_FD \
> > + _IOW(VHOST_VIRTIO, 0x7F, struct vhost_vdpa_set_iommufd)
> > +
> > +/*
> > + * VDPA_DEVICE_ATTACH_IOMMUFD_AS -
> > + * _IOW(VHOST_VIRTIO, 0x7f, struct vdpa_device_attach_iommufd_as)
> > + *
> > + * Attach a vdpa device to an iommufd address space specified by IOAS
> > + * id.
> > + *
> > + * Available only after a device has been bound to iommufd via
> > + * VHOST_VDPA_SET_IOMMU_FD
> > + *
> > + * Undo by VDPA_DEVICE_DETACH_IOMMUFD_AS or device fd close.
> > + *
> > + * @argsz: user filled size of this data.
> > + * @flags: must be 0.
> > + * @ioas_id: Input the target id which can represent an ioas
> > + * allocated via iommufd subsystem.
> > + *
> > + * Return: 0 on success, -errno on failure.
> > + */
> > +struct vdpa_device_attach_iommufd_as {
> > + __u32 argsz;
> > + __u32 flags;
> > + __u32 ioas_id;
> > +};
>
> I think we need to map ioas to vDPA AS, so there should be an ASID
> from the view of vDPA?
>
> Thanks
>
The qemu will have a structure save and maintain this information,So
I didn't add this
in kernel,we can add this but maybe only for check?
this in
Thanks
Cindy
> > +
> > +#define VDPA_DEVICE_ATTACH_IOMMUFD_AS \
> > + _IOW(VHOST_VIRTIO, 0x82, struct vdpa_device_attach_iommufd_as)
> > +
> > +/*
> > + * VDPA_DEVICE_DETACH_IOMMUFD_AS
> > + *
> > + * Detach a vdpa device from the iommufd address space it has been
> > + * attached to. After it, device should be in a blocking DMA state.
> > + *
> > + * Available only after a device has been bound to iommufd via
> > + * VHOST_VDPA_SET_IOMMU_FD
> > + *
> > + * @argsz: user filled size of this data.
> > + * @flags: must be 0.
> > + *
> > + * Return: 0 on success, -errno on failure.
> > + */
> > +struct vdpa_device_detach_iommufd_as {
> > + __u32 argsz;
> > + __u32 flags;
> > +};
> > +
> > +#define VDPA_DEVICE_DETACH_IOMMUFD_AS \
> > + _IOW(VHOST_VIRTIO, 0x83, struct vdpa_device_detach_iommufd_as)
> > +
> > #endif
> > --
> > 2.34.3
> >
>
^ permalink raw reply
* Re: [PATCH net 1/2] r8169: add handling DASH when DASH is disabled
From: Heiner Kallweit @ 2023-11-07 7:12 UTC (permalink / raw)
To: ChunHao Lin; +Cc: netdev, nic_swsd
In-Reply-To: <20231106151124.9175-2-hau@realtek.com>
On 06.11.2023 16:11, ChunHao Lin wrote:
> For devices that support DASH, even DASH is disabled, there may still
> exist a default firmware that will influence device behavior.
> So driver needs to handle DASH for devices that support DASH, no matter
> the DASH status is.
>
AFAICS there's no functional change in patch 1 (except the dash disabled
message). It just prepares patch 2. That's worth mentioning.
> Fixes: ee7a1beb9759 ("r8169:call "rtl8168_driver_start" "rtl8168_driver_stop" only when hardware dash function is enabled")
> Signed-off-by: ChunHao Lin <hau@realtek.com>
stable should be cc'ed
> ---
> drivers/net/ethernet/realtek/r8169_main.c | 37 +++++++++++++++++------
> 1 file changed, 27 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index 4b8251cdb436..8cbd7c96d9e1 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -624,6 +624,7 @@ struct rtl8169_private {
>
> unsigned supports_gmii:1;
> unsigned aspm_manageable:1;
> + unsigned dash_enable:1;
better: dash_enabled
> dma_addr_t counters_phys_addr;
> struct rtl8169_counters *counters;
> struct rtl8169_tc_offsets tc_offset;
> @@ -1253,14 +1254,26 @@ static bool r8168ep_check_dash(struct rtl8169_private *tp)
> return r8168ep_ocp_read(tp, 0x128) & BIT(0);
> }
>
> -static enum rtl_dash_type rtl_check_dash(struct rtl8169_private *tp)
> +static bool rtl_check_dash(struct rtl8169_private *tp)
maybe better: rtl_dash_is_enabled()
> +{
> + switch (tp->dash_type) {
> + case RTL_DASH_DP:
> + return r8168dp_check_dash(tp);
> + case RTL_DASH_EP:
> + return r8168ep_check_dash(tp);
> + default:
> + return 0;
false instead of 0
> + }
> +}
> +
> +static enum rtl_dash_type rtl_check_dash_type(struct rtl8169_private *tp)
> {
maybe better: rtl_get_dash_type()
> switch (tp->mac_version) {
> case RTL_GIGA_MAC_VER_28:
> case RTL_GIGA_MAC_VER_31:
> - return r8168dp_check_dash(tp) ? RTL_DASH_DP : RTL_DASH_NONE;
> + return RTL_DASH_DP;
> case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53:
> - return r8168ep_check_dash(tp) ? RTL_DASH_EP : RTL_DASH_NONE;
> + return RTL_DASH_EP;
> default:
> return RTL_DASH_NONE;
> }
> @@ -1453,7 +1466,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
>
> device_set_wakeup_enable(tp_to_dev(tp), wolopts);
>
> - if (tp->dash_type == RTL_DASH_NONE) {
> + if (!tp->dash_enable) {
> rtl_set_d3_pll_down(tp, !wolopts);
> tp->dev->wol_enabled = wolopts ? 1 : 0;
> }
> @@ -2512,7 +2525,7 @@ static void rtl_wol_enable_rx(struct rtl8169_private *tp)
>
> static void rtl_prepare_power_down(struct rtl8169_private *tp)
> {
> - if (tp->dash_type != RTL_DASH_NONE)
> + if (tp->dash_enable)
> return;
>
> if (tp->mac_version == RTL_GIGA_MAC_VER_32 ||
> @@ -4867,7 +4880,7 @@ static int rtl8169_runtime_idle(struct device *device)
> {
> struct rtl8169_private *tp = dev_get_drvdata(device);
>
> - if (tp->dash_type != RTL_DASH_NONE)
> + if (tp->dash_enable)
> return -EBUSY;
>
> if (!netif_running(tp->dev) || !netif_carrier_ok(tp->dev))
> @@ -4894,7 +4907,7 @@ static void rtl_shutdown(struct pci_dev *pdev)
> rtl_rar_set(tp, tp->dev->perm_addr);
>
> if (system_state == SYSTEM_POWER_OFF &&
> - tp->dash_type == RTL_DASH_NONE) {
> + !tp->dash_enable) {
> pci_wake_from_d3(pdev, tp->saved_wolopts);
> pci_set_power_state(pdev, PCI_D3hot);
> }
> @@ -5252,7 +5265,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
> rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
> tp->aspm_manageable = !rc;
>
> - tp->dash_type = rtl_check_dash(tp);
> + tp->dash_type = rtl_check_dash_type(tp);
> + tp->dash_enable = rtl_check_dash(tp);
>
> tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK;
>
> @@ -5323,7 +5337,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
> /* configure chip for default features */
> rtl8169_set_features(dev, dev->features);
>
> - if (tp->dash_type == RTL_DASH_NONE) {
> + if (!tp->dash_enable) {
> rtl_set_d3_pll_down(tp, true);
> } else {
> rtl_set_d3_pll_down(tp, false);
> @@ -5363,7 +5377,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
> "ok" : "ko");
>
> if (tp->dash_type != RTL_DASH_NONE) {
> - netdev_info(dev, "DASH enabled\n");
> + if (tp->dash_enable)
> + netdev_info(dev, "DASH enabled\n");
> + else
> + netdev_info(dev, "DASH disabled\n");
alternative:
netdev_info(dev, "DASH %s\n", tp->dash_enabled ? "enabled" : "disabled")
> rtl8168_driver_start(tp);
> }
>
^ permalink raw reply
* Re: [PATCH net 0/2] r8169: fix DASH deviceis network lost issue
From: Heiner Kallweit @ 2023-11-07 7:14 UTC (permalink / raw)
To: ChunHao Lin; +Cc: netdev, nic_swsd
In-Reply-To: <20231106151124.9175-1-hau@realtek.com>
On 06.11.2023 16:11, ChunHao Lin wrote:
> This series are used to fix network lost issue on systems that support
> DASH.
>
As I have no he to test this: Can you add on which hw you tested this patch?
> ChunHao Lin (2):
> r8169: add handling DASH when DASH is disabled
> r8169: fix network lost after resume on DASH systems
>
> drivers/net/ethernet/realtek/r8169_main.c | 43 +++++++++++++++++------
> 1 file changed, 33 insertions(+), 10 deletions(-)
>
^ permalink raw reply
* Re: [PATCH net 2/2] r8169: fix network lost after resume on DASH systems
From: Heiner Kallweit @ 2023-11-07 7:15 UTC (permalink / raw)
To: ChunHao Lin; +Cc: netdev, nic_swsd
In-Reply-To: <20231106151124.9175-3-hau@realtek.com>
On 06.11.2023 16:11, ChunHao Lin wrote:
> Device that support DASH may be reseted or powered off during suspend.
> So driver needs to handle DASH during system suspend and resume. Or
> DASH firmware will influence device behavior and causes network lost.
>
> Fixes: b646d90053f8 ("r8169: magic.")
> Signed-off-by: ChunHao Lin <hau@realtek.com>
Also here: cc stable
With this:
Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox