From: Heng Qi <hengqi@linux.alibaba.com>
To: linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
bpf@vger.kernel.org
Cc: "David S. Miller" <davem@davemloft.net>,
"Eric Dumazet" <edumazet@google.com>,
"Jakub Kicinski" <kuba@kernel.org>,
"Paolo Abeni" <pabeni@redhat.com>,
"Alexei Starovoitov" <ast@kernel.org>,
"Daniel Borkmann" <daniel@iogearbox.net>,
"Jesper Dangaard Brouer" <hawk@kernel.org>,
"John Fastabend" <john.fastabend@gmail.com>,
"Toke Høiland-Jørgensen" <toke@redhat.com>,
"Xuan Zhuo" <xuanzhuo@linux.alibaba.com>,
"Heng Qi" <henqqi@linux.alibaba.com>
Subject: [PATCH] veth: Avoid drop packets when xdp_redirect performs
Date: Mon, 31 Oct 2022 14:19:22 +0800 [thread overview]
Message-ID: <20221031061922.124992-1-hengqi@linux.alibaba.com> (raw)
From: Heng Qi <henqqi@linux.alibaba.com>
In the current processing logic, when xdp_redirect occurs, it transmits
the xdp frame based on napi.
If napi of the peer veth is not ready, the veth will drop the packets.
This doesn't meet our expectations.
In this context, we enable napi of the peer veth automatically when the
veth loads the xdp. Then if the veth unloads the xdp, we need to
correctly judge whether to disable napi of the peer veth, because the peer
veth may have loaded xdp, or even the user has enabled GRO.
Signed-off-by: Heng Qi <henqqi@linux.alibaba.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
Previous discussion on this issue is here
https://lore.kernel.org/all/87r0yxgxba.fsf@toke.dk/ .
drivers/net/veth.c | 88 +++++++++++++++++++++++++++++++++++++++-------
1 file changed, 76 insertions(+), 12 deletions(-)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 466da01ba2e3..105682237a9d 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1119,10 +1119,14 @@ static void veth_disable_xdp_range(struct net_device *dev, int start, int end,
static int veth_enable_xdp(struct net_device *dev)
{
- bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP);
struct veth_priv *priv = netdev_priv(dev);
+ bool napi_already_on;
+ struct veth_rq *rq;
int err, i;
+ rq = &priv->rq[0];
+ napi_already_on = (dev->flags & IFF_UP) && rcu_access_pointer(rq->napi);
+
if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
err = veth_enable_xdp_range(dev, 0, dev->real_num_rx_queues, napi_already_on);
if (err)
@@ -1323,18 +1327,28 @@ static int veth_set_channels(struct net_device *dev,
static int veth_open(struct net_device *dev)
{
- struct veth_priv *priv = netdev_priv(dev);
+ struct veth_priv *peer_priv, *priv = netdev_priv(dev);
struct net_device *peer = rtnl_dereference(priv->peer);
+ struct veth_rq *peer_rq;
int err;
if (!peer)
return -ENOTCONN;
+ peer_priv = netdev_priv(peer);
+ peer_rq = &peer_priv->rq[0];
+
if (priv->_xdp_prog) {
err = veth_enable_xdp(dev);
if (err)
return err;
- } else if (veth_gro_requested(dev)) {
+ /* refer to the logic in veth_xdp_set() */
+ if (!rtnl_dereference(peer_rq->napi)) {
+ err = veth_napi_enable(peer);
+ if (err)
+ return err;
+ }
+ } else if (veth_gro_requested(dev) || peer_priv->_xdp_prog) {
err = veth_napi_enable(dev);
if (err)
return err;
@@ -1350,17 +1364,29 @@ static int veth_open(struct net_device *dev)
static int veth_close(struct net_device *dev)
{
- struct veth_priv *priv = netdev_priv(dev);
+ struct veth_priv *peer_priv, *priv = netdev_priv(dev);
struct net_device *peer = rtnl_dereference(priv->peer);
+ struct veth_rq *peer_rq;
netif_carrier_off(dev);
- if (peer)
- netif_carrier_off(peer);
+ if (peer) {
+ peer_priv = netdev_priv(peer);
+ peer_rq = &peer_priv->rq[0];
+ }
- if (priv->_xdp_prog)
+ if (priv->_xdp_prog) {
veth_disable_xdp(dev);
- else if (veth_gro_requested(dev))
+ /* refer to the logic in veth_xdp_set */
+ if (peer && rtnl_dereference(peer_rq->napi)) {
+ if (!veth_gro_requested(peer) && !peer_priv->_xdp_prog)
+ veth_napi_del(peer);
+ }
+ } else if (veth_gro_requested(dev) || (peer && peer_priv->_xdp_prog)) {
veth_napi_del(dev);
+ }
+
+ if (peer)
+ netif_carrier_off(peer);
return 0;
}
@@ -1470,17 +1496,21 @@ static int veth_set_features(struct net_device *dev,
{
netdev_features_t changed = features ^ dev->features;
struct veth_priv *priv = netdev_priv(dev);
+ struct veth_rq *rq = &priv->rq[0];
int err;
if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog)
return 0;
if (features & NETIF_F_GRO) {
- err = veth_napi_enable(dev);
- if (err)
- return err;
+ if (!rtnl_dereference(rq->napi)) {
+ err = veth_napi_enable(dev);
+ if (err)
+ return err;
+ }
} else {
- veth_napi_del(dev);
+ if (rtnl_dereference(rq->napi))
+ veth_napi_del(dev);
}
return 0;
}
@@ -1512,14 +1542,19 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
struct veth_priv *priv = netdev_priv(dev);
+ struct veth_priv *peer_priv;
struct bpf_prog *old_prog;
+ struct veth_rq *peer_rq;
struct net_device *peer;
+ bool napi_already_off;
unsigned int max_mtu;
+ bool noreq_napi;
int err;
old_prog = priv->_xdp_prog;
priv->_xdp_prog = prog;
peer = rtnl_dereference(priv->peer);
+ peer_priv = netdev_priv(peer);
if (prog) {
if (!peer) {
@@ -1556,6 +1591,24 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
}
}
+ if (peer && (peer->flags & IFF_UP)) {
+ peer_rq = &peer_priv->rq[0];
+
+ /* If the peer hasn't enabled GRO and loaded xdp,
+ * then we enable napi automatically if its napi
+ * is not ready.
+ */
+ napi_already_off = !rtnl_dereference(peer_rq->napi);
+ if (napi_already_off) {
+ err = veth_napi_enable(peer);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to automatically enable napi of peer");
+ goto err;
+ }
+ }
+ }
+
if (!old_prog) {
peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
peer->max_mtu = max_mtu;
@@ -1570,6 +1623,17 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
if (peer) {
peer->hw_features |= NETIF_F_GSO_SOFTWARE;
peer->max_mtu = ETH_MAX_MTU;
+ peer_rq = &peer_priv->rq[0];
+
+ /* If the peer doesn't has its xdp and enabled
+ * GRO, then we disable napi if its napi is ready;
+ */
+ if (rtnl_dereference(peer_rq->napi)) {
+ noreq_napi = !veth_gro_requested(peer) &&
+ !peer_priv->_xdp_prog;
+ if (noreq_napi && (peer->flags & IFF_UP))
+ veth_napi_del(peer);
+ }
}
}
bpf_prog_put(old_prog);
--
2.19.1.6.gb485710b
next reply other threads:[~2022-10-31 6:19 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-31 6:19 Heng Qi [this message]
2022-11-02 12:10 ` [PATCH] veth: Avoid drop packets when xdp_redirect performs patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221031061922.124992-1-hengqi@linux.alibaba.com \
--to=hengqi@linux.alibaba.com \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=hawk@kernel.org \
--cc=henqqi@linux.alibaba.com \
--cc=john.fastabend@gmail.com \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=toke@redhat.com \
--cc=xuanzhuo@linux.alibaba.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).