netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tom Herbert <tom@herbertland.com>
To: <davem@davemloft.net>, <netdev@vger.kernel.org>
Cc: <kernel-team@fb.com>, <rick.jones2@hpe.com>
Subject: [PATCH net-next 4/4] xps_flows: XPS for packets that don't have a socket
Date: Wed, 31 Aug 2016 17:10:05 -0700	[thread overview]
Message-ID: <1472688605-2613704-5-git-send-email-tom@herbertland.com> (raw)
In-Reply-To: <1472688605-2613704-1-git-send-email-tom@herbertland.com>

xps_flows maintains a per device flow table that is indexed by the
skbuff hash. The table is only consulted when there is no queue saved in
a transmit socket for an skbuff.

Each entry in the flow table contains a queue index and a queue
pointer. The queue pointer is set when a queue is chosen using a
flow table entry. This pointer is set to the head pointer in the
transmit queue (which is maintained by BQL).

The new function get_xps_flows_index that looks up flows in the
xps_flows table. The entry returned gives the last queue a matching flow
used. The returned queue is compared against the normal XPS queue. If
they are different, then we only switch if the tail pointer in the TX
queue has advanced past the pointer saved in the entry. In this
way OOO should be avoided when XPS wants to use a different queue.

Signed-off-by: Tom Herbert <tom@herbertland.com>
---
 net/Kconfig    |  6 +++++
 net/core/dev.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 76 insertions(+), 15 deletions(-)

diff --git a/net/Kconfig b/net/Kconfig
index 7b6cd34..f77fad1 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -255,6 +255,12 @@ config XPS
 	depends on SMP
 	default y
 
+config XPS_FLOWS
+	bool
+	depends on XPS
+	depends on BQL
+	default y
+
 config HWBM
        bool
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 34b5322..fc68d19 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3210,6 +3210,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 }
 #endif /* CONFIG_NET_EGRESS */
 
+/* Must be called with RCU read_lock */
 static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_XPS
@@ -3217,7 +3218,6 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 	struct xps_map *map;
 	int queue_index = -1;
 
-	rcu_read_lock();
 	dev_maps = rcu_dereference(dev->xps_maps);
 	if (dev_maps) {
 		map = rcu_dereference(
@@ -3232,7 +3232,6 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 				queue_index = -1;
 		}
 	}
-	rcu_read_unlock();
 
 	return queue_index;
 #else
@@ -3240,26 +3239,82 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 #endif
 }
 
-static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+/* Must be called with RCU read_lock */
+static int get_xps_flows_index(struct net_device *dev, struct sk_buff *skb)
 {
-	struct sock *sk = skb->sk;
-	int queue_index = sk_tx_queue_get(sk);
+#ifdef CONFIG_XPS_FLOWS
+	struct xps_dev_flow_table *flow_table;
+	struct xps_dev_flow ent;
+	int queue_index;
+	struct netdev_queue *txq;
+	u32 hash;
 
-	if (queue_index < 0 || skb->ooo_okay ||
-	    queue_index >= dev->real_num_tx_queues) {
-		int new_index = get_xps_queue(dev, skb);
-		if (new_index < 0)
-			new_index = skb_tx_hash(dev, skb);
+	flow_table = rcu_dereference(dev->xps_flow_table);
+	if (!flow_table)
+		return -1;
 
-		if (queue_index != new_index && sk &&
-		    sk_fullsock(sk) &&
-		    rcu_access_pointer(sk->sk_dst_cache))
-			sk_tx_queue_set(sk, new_index);
+	queue_index = get_xps_queue(dev, skb);
+	if (queue_index < 0)
+		return -1;
 
-		queue_index = new_index;
+	hash = skb_get_hash(skb);
+	if (!hash)
+		return -1;
+
+	ent.v64 = flow_table->flows[hash & flow_table->mask].v64;
+
+	if (queue_index != ent.queue_index &&
+	    ent.queue_index >= 0 &&
+	    ent.queue_index < dev->real_num_tx_queues) {
+		txq = netdev_get_tx_queue(dev, ent.queue_index);
+		if ((int)(txq->dql.num_completed_ops - ent.queue_ptr) < 0)  {
+			/* The current queue's tail has not advanced beyond the
+			 * last packet that was enqueued using the table entry.
+			 * We can't change queues without risking OOO. Stick
+			 * with the queue listed in the flow table.
+			 */
+			queue_index = ent.queue_index;
+		}
 	}
 
+	/* Save the updated entry */
+	txq = netdev_get_tx_queue(dev, queue_index);
+	ent.queue_index = queue_index;
+	ent.queue_ptr = txq->dql.num_enqueue_ops;
+	flow_table->flows[hash & flow_table->mask].v64 = ent.v64;
+
 	return queue_index;
+#else
+	return get_xps_queue(dev, skb);
+#endif
+}
+
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	int queue_index = sk_tx_queue_get(sk);
+	int new_index;
+
+	if (queue_index < 0) {
+		/* Socket did not provide a queue index, try xps_flows */
+		new_index = get_xps_flows_index(dev, skb);
+	} else if (skb->ooo_okay || queue_index >= dev->real_num_tx_queues) {
+		/* Queue index in socket, see if we can find a better one */
+		new_index = get_xps_queue(dev, skb);
+	} else {
+		/* Valid queue in socket and can't send OOO. Just return it */
+		return queue_index;
+	}
+
+	/* No queue index from flow steering, fallback to hash */
+	if (new_index < 0)
+		new_index = skb_tx_hash(dev, skb);
+
+	if (queue_index != new_index && sk && sk_fullsock(sk) &&
+	    rcu_access_pointer(sk->sk_dst_cache))
+		sk_tx_queue_set(sk, new_index);
+
+	return new_index;
 }
 
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
-- 
2.8.0.rc2

  parent reply	other threads:[~2016-09-01  0:10 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-01  0:10 [PATCH net-next 0/4] xps_flows: XPS flow steering when there is no socket Tom Herbert
2016-09-01  0:10 ` [PATCH net-next 1/4] net: Set SW hash in skb_set_hash_from_sk Tom Herbert
2016-09-01  0:10 ` [PATCH net-next 2/4] dql: Add counters for number of queuing and completion operations Tom Herbert
2016-09-01  0:10 ` [PATCH net-next 3/4] net: Add xps_dev_flow_table_cnt Tom Herbert
2016-09-01  0:10 ` Tom Herbert [this message]
2016-09-01 15:36   ` [PATCH net-next 4/4] xps_flows: XPS for packets that don't have a socket Alexander Duyck
2016-09-01 15:56     ` Tom Herbert
2016-09-01 23:18       ` Alexander Duyck
2016-09-01  0:37 ` [PATCH net-next 0/4] xps_flows: XPS flow steering when there is no socket Eric Dumazet
2016-09-01 16:14   ` Tom Herbert
2016-09-01 19:25 ` Florian Fainelli
2016-09-01 19:32   ` Tom Herbert
2016-09-01 19:46     ` Florian Fainelli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1472688605-2613704-5-git-send-email-tom@herbertland.com \
    --to=tom@herbertland.com \
    --cc=davem@davemloft.net \
    --cc=kernel-team@fb.com \
    --cc=netdev@vger.kernel.org \
    --cc=rick.jones2@hpe.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).