netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jason Wang <jasowang@redhat.com>
To: davem@davemloft.net, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: John Fastabend <john.r.fastabend@intel.com>,
	e1000-devel@lists.sourceforge.net,
	Jason Wang <jasowang@redhat.com>,
	Neil Horman <nhorman@tuxdriver.com>,
	mst@redhat.com
Subject: [PATCH net 2/2] net: core: explicitly select a txq before doing l2 forwarding
Date: Mon,  6 Jan 2014 11:21:07 +0800	[thread overview]
Message-ID: <1388978467-2075-2-git-send-email-jasowang@redhat.com> (raw)
In-Reply-To: <1388978467-2075-1-git-send-email-jasowang@redhat.com>

Currently, the tx queue were selected implicitly in ndo_dfwd_start_xmit(). The
will cause several issues:

- NETIF_F_LLTX was forced for macvlan device in this case which lead extra lock
  contention.
- dev_hard_start_xmit() was called with NULL txq which bypasses the net device
  watchdog
- dev_hard_start_xmit() does not check txq everywhere which will lead a crash
  when tso is disabled for lower device.

Fix this by explicitly introducing a select queue method just for l2 forwarding
offload (ndo_dfwd_select_queue), and introducing dfwd_direct_xmit() to do the
queue selecting and transmitting for l2 forwarding.

With this fixes, NETIF_F_LLTX could be preserved for macvlan and there's no need
to check txq against NULL in dev_hard_start_xmit().

In the future, it was also required for macvtap l2 forwarding support since it
provides a necessary synchronization method.

Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: e1000-devel@lists.sourceforge.net
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   15 +++++++++----
 drivers/net/macvlan.c                         |    3 +-
 include/linux/netdevice.h                     |   11 +++++++++
 net/core/dev.c                                |   28 ++++++++++++++++++++++++-
 4 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index cc06854..ee71cf7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7629,16 +7629,20 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
 	kfree(fwd_adapter);
 }
 
+static u16 ixgbe_fwd_select_queue(struct net_device *dev, struct sk_buff *skb,
+				  void *priv)
+{
+	struct ixgbe_fwd_adapter *fwd_adapter = priv;
+	return skb->queue_mapping + fwd_adapter->tx_base_queue;
+}
+
 static netdev_tx_t ixgbe_fwd_xmit(struct sk_buff *skb,
 				  struct net_device *dev,
 				  void *priv)
 {
 	struct ixgbe_fwd_adapter *fwd_adapter = priv;
-	unsigned int queue;
-	struct ixgbe_ring *tx_ring;
-
-	queue = skb->queue_mapping + fwd_adapter->tx_base_queue;
-	tx_ring = fwd_adapter->real_adapter->tx_ring[queue];
+	struct ixgbe_ring *tx_ring =
+		fwd_adapter->real_adapter->tx_ring[skb->queue_mapping];
 
 	return __ixgbe_xmit_frame(skb, dev, tx_ring);
 }
@@ -7689,6 +7693,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_bridge_getlink	= ixgbe_ndo_bridge_getlink,
 	.ndo_dfwd_add_station	= ixgbe_fwd_add,
 	.ndo_dfwd_del_station	= ixgbe_fwd_del,
+	.ndo_dfwd_select_queue	= ixgbe_fwd_select_queue,
 	.ndo_dfwd_start_xmit	= ixgbe_fwd_xmit,
 };
 
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 5360f73..2cbbce3 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -299,7 +299,7 @@ netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 
 	if (vlan->fwd_priv) {
 		skb->dev = vlan->lowerdev;
-		ret = dev_hard_start_xmit(skb, skb->dev, NULL, vlan->fwd_priv);
+		ret = dfwd_direct_xmit(skb, skb->dev, vlan->fwd_priv);
 	} else {
 		ret = macvlan_queue_xmit(skb, dev);
 	}
@@ -366,7 +366,6 @@ static int macvlan_open(struct net_device *dev)
 		if (IS_ERR_OR_NULL(vlan->fwd_priv)) {
 			vlan->fwd_priv = NULL;
 		} else {
-			dev->features &= ~NETIF_F_LLTX;
 			return 0;
 		}
 	}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d9a550b..dbfd476 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -975,6 +975,11 @@ struct netdev_phys_port_id {
  *	by 'ndo_dfwd_add_station'. 'pdev' is the net device backing
  *	the station and priv is the structure returned by the add
  *	operation.
+ * u16 (*ndo_dfwd_select_queue)(struct net_device *dev,
+ *				struct sk_buff *skb,
+ *				void *priv);
+ *	Called to decide which queue to xmit over the accelerated station when
+ *	device supports multiple transmit queues.
  * netdev_tx_t (*ndo_dfwd_start_xmit)(struct sk_buff *skb,
  *				      struct net_device *dev,
  *				      void *priv);
@@ -1123,6 +1128,10 @@ struct net_device_ops {
 	void			(*ndo_dfwd_del_station)(struct net_device *pdev,
 							void *priv);
 
+	u16			(*ndo_dfwd_select_queue)(struct net_device *dev,
+							 struct sk_buff *skb,
+							 void *priv);
+
 	netdev_tx_t		(*ndo_dfwd_start_xmit) (struct sk_buff *skb,
 							struct net_device *dev,
 							void *priv);
@@ -2416,6 +2425,8 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *);
 int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
+int dfwd_direct_xmit(struct sk_buff *skb, struct net_device *dev,
+		     void *accel_priv);
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq, void *accel_priv);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 4fc1722..bc2b03f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2538,6 +2538,32 @@ static inline int skb_needs_linearize(struct sk_buff *skb,
 				!(features & NETIF_F_SG)));
 }
 
+int dfwd_direct_xmit(struct sk_buff *skb, struct net_device *dev,
+		     void *accel_priv)
+{
+	struct netdev_queue *txq;
+	int ret = NETDEV_TX_BUSY;
+	int index;
+
+	BUG_ON(!dev->netdev_ops->ndo_dfwd_select_queue);
+	index =	dev->netdev_ops->ndo_dfwd_select_queue(dev, skb,
+						       accel_priv);
+
+	local_bh_disable();
+
+	skb_set_queue_mapping(skb, index);
+	txq = netdev_get_tx_queue(dev, index);
+
+	HARD_TX_LOCK(dev, txq, smp_processor_id());
+	if (!netif_xmit_frozen_or_stopped(txq))
+		ret = dev_hard_start_xmit(skb, dev, txq, accel_priv);
+	HARD_TX_UNLOCK(dev, txq);
+
+	local_bh_enable();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dfwd_direct_xmit);
+
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq, void *accel_priv)
 {
@@ -2611,7 +2637,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			rc = ops->ndo_start_xmit(skb, dev);
 
 		trace_net_dev_xmit(skb, rc, dev, skb_len);
-		if (rc == NETDEV_TX_OK && txq)
+		if (rc == NETDEV_TX_OK)
 			txq_trans_update(txq);
 		return rc;
 	}
-- 
1.7.1


------------------------------------------------------------------------------
Rapidly troubleshoot problems before they affect your business. Most IT 
organizations don't have a clear picture of how application performance 
affects their revenue. With AppDynamics, you get 100% visibility into your 
Java,.NET, & PHP application. Start your 15-day FREE TRIAL of AppDynamics Pro!
http://pubads.g.doubleclick.net/gampad/clk?id=84349831&iu=/4140/ostg.clktrk
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

  reply	other threads:[~2014-01-06  3:21 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-01-06  3:21 [PATCH net 1/2] macvlan: forbid L2 fowarding offload for macvtap Jason Wang
2014-01-06  3:21 ` Jason Wang [this message]
2014-01-06 12:04   ` [PATCH net 2/2] net: core: explicitly select a txq before doing l2 forwarding Jeff Kirsher
2014-01-06 12:42   ` Neil Horman
2014-01-06 15:06     ` John Fastabend
2014-01-06 15:29       ` Neil Horman
2014-01-07  3:42     ` Jason Wang
2014-01-07 13:17       ` Neil Horman
2014-01-08  3:21         ` Jason Wang
2014-01-08 14:40           ` Neil Horman
2014-01-09  8:28             ` Jason Wang
2014-01-09 11:53               ` Neil Horman
2014-01-07  8:22   ` John Fastabend
2014-01-07  8:37     ` John Fastabend
2014-01-06  7:35 ` [PATCH net 1/2] macvlan: forbid L2 fowarding offload for macvtap John Fastabend
2014-01-06  7:54   ` Jason Wang
2014-01-06 12:26     ` Neil Horman
2014-01-07  3:10       ` Jason Wang
2014-01-07  5:15         ` John Fastabend
2014-01-07  6:22           ` Jason Wang
2014-01-07  7:26             ` John Fastabend
2014-01-07  9:00               ` Jason Wang
2014-01-08 12:55                 ` Michael S. Tsirkin
2014-01-08 19:05                   ` John Fastabend
2014-01-09  7:17                     ` Michael S. Tsirkin
2014-01-09  8:55                       ` Jason Wang
2014-01-09 21:39                         ` Stephen Hemminger
2014-01-09 22:03                           ` Michael S. Tsirkin
2014-01-09 22:20                             ` Stephen Hemminger
2014-01-10  7:06                           ` Jason Wang
2014-01-10 16:40                             ` Vlad Yasevich
2014-01-07  5:16         ` John Fastabend
2014-01-06 20:47 ` David Miller
2014-01-07  3:17   ` Jason Wang
2014-01-07  5:57     ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1388978467-2075-2-git-send-email-jasowang@redhat.com \
    --to=jasowang@redhat.com \
    --cc=davem@davemloft.net \
    --cc=e1000-devel@lists.sourceforge.net \
    --cc=john.r.fastabend@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=nhorman@tuxdriver.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).