From: Ben Hutchings <bhutchings@solarflare.com>
To: netdev@vger.kernel.org
Cc: linux-net-drivers@solarflare.com
Subject: [RFC PATCH] net: Set sock's TX queue to match the RX queue
Date: Tue, 20 Apr 2010 23:10:13 +0100 [thread overview]
Message-ID: <1271801414.2083.34.camel@achroite.uk.solarflarecom.com> (raw)
We generally expect TX work for a connected socket to be done on the
same CPU (or rather hardware thread) as RX work, so the RX and TX
queue selection should match. Therefore, when a sock with a cached
destination receives an skb from a multi-RX-queue net device, set the
sock's TX queue index to the RX queue index of the skb, modulo the
number of TX queues on the cached output device.
This changes the representation of TX queue mapping to record how the
current queue selection (if any) was made. We can make the initial
queue selection based on the current simple hash, then replace it once
we know which RX queue is selected. For TCP and other connected
protocols, this should all be settled during the initial handshake
so the change of TX queue will not cause packet reordering.
I haven't given this a whole lot of testing yet. I would be interested
to hear how this affects performance on different systems and whether
it's actually worth doing.
Ben.
---
include/net/sock.h | 37 +++++++++++++++++++++++++++++--------
net/core/dev.c | 3 ++-
net/core/sock.c | 18 +++++++++++++++++-
3 files changed, 48 insertions(+), 10 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index 56df440..3cd2b17 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -108,7 +108,8 @@ struct net;
* @skc_node: main hash linkage for various protocol lookup tables
* @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
* @skc_refcnt: reference count
- * @skc_tx_queue_mapping: tx queue number for this connection
+ * @skc_tx_queue_source: source for @skc_tx_queue_index
+ * @skc_tx_queue_index: tx queue index for this connection
* @skc_hash: hash value used with various protocol lookup tables
* @skc_u16hashes: two u16 hash values used by UDP lookup tables
* @skc_family: network address family
@@ -132,7 +133,8 @@ struct sock_common {
struct hlist_nulls_node skc_nulls_node;
};
atomic_t skc_refcnt;
- int skc_tx_queue_mapping;
+ u16 skc_tx_queue_source;
+ u16 skc_tx_queue_index;
union {
unsigned int skc_hash;
@@ -152,6 +154,12 @@ struct sock_common {
#endif
};
+enum sock_tx_queue_source {
+ SK_TX_QUEUE_SOURCE_NONE,
+ SK_TX_QUEUE_SOURCE_HASH,
+ SK_TX_QUEUE_SOURCE_RX_QUEUE
+};
+
/**
* struct sock - network layer representation of sockets
* @__sk_common: shared layout with inet_timewait_sock
@@ -226,7 +234,8 @@ struct sock {
#define sk_node __sk_common.skc_node
#define sk_nulls_node __sk_common.skc_nulls_node
#define sk_refcnt __sk_common.skc_refcnt
-#define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping
+#define sk_tx_queue_source __sk_common.skc_tx_queue_source
+#define sk_tx_queue_index __sk_common.skc_tx_queue_index
#define sk_copy_start __sk_common.skc_hash
#define sk_hash __sk_common.skc_hash
@@ -1134,24 +1143,29 @@ static inline void sock_put(struct sock *sk)
extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
const int nested);
-static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
+static inline void
+sk_tx_queue_set(struct sock *sk, u16 index, enum sock_tx_queue_source source)
{
- sk->sk_tx_queue_mapping = tx_queue;
+ sk->sk_tx_queue_source = source;
+ sk->sk_tx_queue_index = index;
}
+extern void
+__sk_tx_queue_set_from_rx_queue(struct sock *sk, const struct sk_buff *skb);
+
static inline void sk_tx_queue_clear(struct sock *sk)
{
- sk->sk_tx_queue_mapping = -1;
+ sk->sk_tx_queue_source = SK_TX_QUEUE_SOURCE_NONE;
}
static inline int sk_tx_queue_get(const struct sock *sk)
{
- return sk->sk_tx_queue_mapping;
+ return sk->sk_tx_queue_index;
}
static inline bool sk_tx_queue_recorded(const struct sock *sk)
{
- return (sk && sk->sk_tx_queue_mapping >= 0);
+ return (sk && sk->sk_tx_queue_source != SK_TX_QUEUE_SOURCE_NONE);
}
static inline void sk_set_socket(struct sock *sk, struct socket *sock)
@@ -1423,6 +1437,13 @@ static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
skb->destructor = sock_rfree;
atomic_add(skb->truesize, &sk->sk_rmem_alloc);
sk_mem_charge(sk, skb->truesize);
+ /*
+ * If we're receiving through a multiqueue device, set the
+ * TX queue to match the RX queue.
+ */
+ if (sk->sk_dst_cache && skb_rx_queue_recorded(skb) &&
+ unlikely(sk->sk_tx_queue_source != SK_TX_QUEUE_SOURCE_RX_QUEUE))
+ __sk_tx_queue_set_from_rx_queue(sk, skb);
}
extern void sk_reset_timer(struct sock *sk, struct timer_list* timer,
diff --git a/net/core/dev.c b/net/core/dev.c
index b31d5d6..576bb28 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2016,7 +2016,8 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
queue_index = skb_tx_hash(dev, skb);
if (sk && rcu_dereference_check(sk->sk_dst_cache, 1))
- sk_tx_queue_set(sk, queue_index);
+ sk_tx_queue_set(sk, queue_index,
+ SK_TX_QUEUE_SOURCE_HASH);
}
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 7effa1e..66719a7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -362,6 +362,21 @@ void sk_reset_txq(struct sock *sk)
}
EXPORT_SYMBOL(sk_reset_txq);
+void __sk_tx_queue_set_from_rx_queue(struct sock *sk, const struct sk_buff *skb)
+{
+ struct dst_entry *dst = __sk_dst_get(sk);
+ u16 queue_index = skb_get_rx_queue(skb);
+ struct net_device *output_dev;
+
+ if (!dst)
+ return;
+ output_dev = dst->dev;
+ if (unlikely(queue_index >= output_dev->real_num_tx_queues))
+ queue_index %= output_dev->real_num_tx_queues;
+ sk_tx_queue_set(sk, queue_index, SK_TX_QUEUE_SOURCE_RX_QUEUE);
+}
+EXPORT_SYMBOL_GPL(__sk_tx_queue_set_from_rx_queue);
+
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
struct dst_entry *dst = __sk_dst_get(sk);
@@ -966,7 +981,8 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
#endif
BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) !=
sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) +
- sizeof(osk->sk_tx_queue_mapping));
+ sizeof(osk->sk_tx_queue_source) +
+ sizeof(osk->sk_tx_queue_index));
memcpy(&nsk->sk_copy_start, &osk->sk_copy_start,
osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start));
#ifdef CONFIG_SECURITY_NETWORK
--
1.6.2.5
--
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
reply other threads:[~2010-04-20 22:10 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1271801414.2083.34.camel@achroite.uk.solarflarecom.com \
--to=bhutchings@solarflare.com \
--cc=linux-net-drivers@solarflare.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox