* [RFC PATCH net-next 0/3] sock: Fix sock queue mapping to include device
@ 2020-07-24 20:14 Tom Herbert
2020-07-24 20:14 ` [RFC PATCH net-next 1/3] sock: Definition and general functions for dev_and_queue structure Tom Herbert
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Tom Herbert @ 2020-07-24 20:14 UTC (permalink / raw)
To: netdev, amritha.nambiar; +Cc: Tom Herbert
The transmit queue selected for a packet is saved in the associated sock
for the packet and is subsequently used to avoid recalculating the queue
on subsequent sends. The problem is that the corresponding device is not
also recorded so that when the queue mapping is referenced it may
correspond to a different device than the sending one, resulting in an
incorrect queue being used for transmit. A similar problem exists in
recording the receive queue in the sock without the corresponding
receive device.
This patch set fixes the issue by recording both the device (via
ifindex) and the queue in the sock mapping. The pair is set and
retrieved atomically. The caller getting the mapping pair checks
that both the recorded queue and in the device are valid in the
context (for instance, in transmit the returned ifindex is checked
against that of the transmitting device to ensure they refer to
same device before apply the recorded queue).
This patch set contains:
- Definition of dev_and_queue structure to hold the ifindex
and queue number
- Generic functions to get, set, and clear dev_and_queue
structure
- Change sk_tx_queue_{get,set,clear} to
sk_tx_dev_and_queue_{get,set,clear}
- Modify callers of above to use new interface
- Change sk_rx_queue_{get,set,clear} to
sk_rx_dev_and_queue_{get,set,clear}
- Modify callers of above to use new interface
Tom Herbert (3):
sock: Definition and general functions for dev_and_queue structure
sock: Use dev_and_queue structure for TX queue mapping in sock
sock: Use dev_and_queue structure for RX queue mapping in sock
.../mellanox/mlx5/core/en_accel/ktls_rx.c | 10 +-
drivers/net/hyperv/netvsc_drv.c | 9 +-
include/net/busy_poll.h | 2 +-
include/net/request_sock.h | 2 +-
include/net/sock.h | 126 +++++++++++++-----
net/core/dev.c | 15 ++-
net/core/filter.c | 7 +-
net/core/sock.c | 10 +-
net/ipv4/tcp_input.c | 2 +-
9 files changed, 124 insertions(+), 59 deletions(-)
--
2.25.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [RFC PATCH net-next 1/3] sock: Definition and general functions for dev_and_queue structure
2020-07-24 20:14 [RFC PATCH net-next 0/3] sock: Fix sock queue mapping to include device Tom Herbert
@ 2020-07-24 20:14 ` Tom Herbert
2020-07-24 20:14 ` [RFC PATCH net-next 2/3] sock: Use dev_and_queue structure for TX queue mapping in sock Tom Herbert
2020-07-24 20:14 ` [RFC PATCH net-next 3/3] sock: Use dev_and_queue structure for RX " Tom Herbert
2 siblings, 0 replies; 5+ messages in thread
From: Tom Herbert @ 2020-07-24 20:14 UTC (permalink / raw)
To: netdev, amritha.nambiar; +Cc: Tom Herbert
Add struct dev_and_queue which holds and ifindex and queue pair. Add
generic functions to set, get, and clear the pair in a structure.
---
include/net/sock.h | 56 ++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 54 insertions(+), 2 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index 62e18fc8ac9f..b4919e603648 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -106,6 +106,16 @@ typedef struct {
#endif
} socket_lock_t;
+struct dev_and_queue {
+ union {
+ struct {
+ int ifindex;
+ u16 queue;
+ };
+ u64 val64;
+ };
+};
+
struct sock;
struct proto;
struct net;
@@ -1788,6 +1798,50 @@ static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
return __sk_receive_skb(sk, skb, nested, 1, true);
}
+#define NO_QUEUE_MAPPING USHRT_MAX
+
+static inline void __dev_and_queue_get(const struct dev_and_queue *idandq,
+ int *ifindex, int *queue)
+{
+ struct dev_and_queue dandq;
+
+ dandq.val64 = idandq->val64;
+
+ if (dandq.ifindex >= 0 && dandq.queue != NO_QUEUE_MAPPING) {
+ *ifindex = dandq.ifindex;
+ *queue = dandq.queue;
+ return;
+ }
+
+ *ifindex = -1;
+ *queue = -1;
+}
+
+static inline void __dev_and_queue_set(struct dev_and_queue *odandq,
+ struct net_device *dev, int queue)
+{
+ struct dev_and_queue dandq;
+
+ /* queue_mapping accept only upto a 16-bit value */
+ if (WARN_ON_ONCE((unsigned short)queue >= USHRT_MAX))
+ return;
+
+ dandq.ifindex = dev->ifindex;
+ dandq.queue = queue;
+
+ odandq->val64 = dandq.val64;
+}
+
+static inline void __dev_and_queue_clear(struct dev_and_queue *odandq)
+{
+ struct dev_and_queue dandq;
+
+ dandq.ifindex = -1;
+ dandq.queue = NO_QUEUE_MAPPING;
+
+ odandq->val64 = dandq.val64;
+}
+
static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
{
/* sk_tx_queue_mapping accept only upto a 16-bit value */
@@ -1796,8 +1850,6 @@ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
sk->sk_tx_queue_mapping = tx_queue;
}
-#define NO_QUEUE_MAPPING USHRT_MAX
-
static inline void sk_tx_queue_clear(struct sock *sk)
{
sk->sk_tx_queue_mapping = NO_QUEUE_MAPPING;
--
2.25.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [RFC PATCH net-next 2/3] sock: Use dev_and_queue structure for TX queue mapping in sock
2020-07-24 20:14 [RFC PATCH net-next 0/3] sock: Fix sock queue mapping to include device Tom Herbert
2020-07-24 20:14 ` [RFC PATCH net-next 1/3] sock: Definition and general functions for dev_and_queue structure Tom Herbert
@ 2020-07-24 20:14 ` Tom Herbert
2020-07-24 20:14 ` [RFC PATCH net-next 3/3] sock: Use dev_and_queue structure for RX " Tom Herbert
2 siblings, 0 replies; 5+ messages in thread
From: Tom Herbert @ 2020-07-24 20:14 UTC (permalink / raw)
To: netdev, amritha.nambiar; +Cc: Tom Herbert
Replace sk_tx_queue_mapping with sk_tx_dev_and_queue_mapping and
change associated function to set, get, and clear mapping.
---
drivers/net/hyperv/netvsc_drv.c | 9 +++++---
include/net/request_sock.h | 2 +-
include/net/sock.h | 38 +++++++++++++++++----------------
net/core/dev.c | 8 ++++---
net/core/sock.c | 8 +++----
5 files changed, 36 insertions(+), 29 deletions(-)
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index e0327b88732c..016b1ab20767 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -307,7 +307,7 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev,
/* If queue index changed record the new value */
if (q_idx != old_idx &&
sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
- sk_tx_queue_set(sk, q_idx);
+ sk_tx_dev_and_queue_set(sk, ndev, q_idx);
return q_idx;
}
@@ -325,9 +325,12 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev,
*/
static u16 netvsc_pick_tx(struct net_device *ndev, struct sk_buff *skb)
{
- int q_idx = sk_tx_queue_get(skb->sk);
+ int ifindex, q_idx;
- if (q_idx < 0 || skb->ooo_okay || q_idx >= ndev->real_num_tx_queues) {
+ sk_tx_dev_and_queue_get(skb->sk, &ifindex, &q_idx);
+
+ if (ifindex != ndev->ifindex || q_idx < 0 || skb->ooo_okay ||
+ q_idx >= ndev->real_num_tx_queues) {
/* If forwarding a packet, we use the recorded queue when
* available for better cache locality.
*/
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index cf8b33213bbc..a6c0636eeb58 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -95,7 +95,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
req->rsk_ops = ops;
req_to_sk(req)->sk_prot = sk_listener->sk_prot;
sk_node_init(&req_to_sk(req)->sk_node);
- sk_tx_queue_clear(req_to_sk(req));
+ sk_tx_dev_and_queue_clear(req_to_sk(req));
req->saved_syn = NULL;
req->num_timeout = 0;
req->num_retrans = 0;
diff --git a/include/net/sock.h b/include/net/sock.h
index b4919e603648..f311425513ff 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -149,7 +149,7 @@ typedef __u64 __bitwise __addrpair;
* @skc_cookie: socket's cookie value
* @skc_node: main hash linkage for various protocol lookup tables
* @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
- * @skc_tx_queue_mapping: tx queue number for this connection
+ * @skc_tx_dev_and_queue_mapping: tx ifindex/queue for this connection
* @skc_rx_queue_mapping: rx queue number for this connection
* @skc_flags: place holder for sk_flags
* %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
@@ -234,7 +234,7 @@ struct sock_common {
struct hlist_node skc_node;
struct hlist_nulls_node skc_nulls_node;
};
- unsigned short skc_tx_queue_mapping;
+ struct dev_and_queue skc_tx_dev_and_queue_mapping;
#ifdef CONFIG_XPS
unsigned short skc_rx_queue_mapping;
#endif
@@ -362,7 +362,7 @@ struct sock {
#define sk_node __sk_common.skc_node
#define sk_nulls_node __sk_common.skc_nulls_node
#define sk_refcnt __sk_common.skc_refcnt
-#define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping
+#define sk_tx_dev_and_queue_mapping __sk_common.skc_tx_dev_and_queue_mapping
#ifdef CONFIG_XPS
#define sk_rx_queue_mapping __sk_common.skc_rx_queue_mapping
#endif
@@ -1842,25 +1842,27 @@ static inline void __dev_and_queue_clear(struct dev_and_queue *odandq)
odandq->val64 = dandq.val64;
}
-static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
+static inline void sk_tx_dev_and_queue_set(struct sock *sk,
+ struct net_device *dev, int tx_queue)
{
- /* sk_tx_queue_mapping accept only upto a 16-bit value */
- if (WARN_ON_ONCE((unsigned short)tx_queue >= USHRT_MAX))
- return;
- sk->sk_tx_queue_mapping = tx_queue;
+ __dev_and_queue_set(&sk->sk_tx_dev_and_queue_mapping, dev, tx_queue);
}
-static inline void sk_tx_queue_clear(struct sock *sk)
+static inline void sk_tx_dev_and_queue_clear(struct sock *sk)
{
- sk->sk_tx_queue_mapping = NO_QUEUE_MAPPING;
+ __dev_and_queue_clear(&sk->sk_tx_dev_and_queue_mapping);
}
-static inline int sk_tx_queue_get(const struct sock *sk)
+static inline void sk_tx_dev_and_queue_get(const struct sock *sk, int *ifindex,
+ int *tx_queue)
{
- if (sk && sk->sk_tx_queue_mapping != NO_QUEUE_MAPPING)
- return sk->sk_tx_queue_mapping;
-
- return -1;
+ if (sk) {
+ __dev_and_queue_get(&sk->sk_tx_dev_and_queue_mapping,
+ ifindex, tx_queue);
+ } else {
+ *ifindex = -1;
+ *tx_queue = -1;
+ }
}
static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
@@ -1989,7 +1991,7 @@ static inline void dst_negative_advice(struct sock *sk)
if (ndst != dst) {
rcu_assign_pointer(sk->sk_dst_cache, ndst);
- sk_tx_queue_clear(sk);
+ sk_tx_dev_and_queue_clear(sk);
sk->sk_dst_pending_confirm = 0;
}
}
@@ -2000,7 +2002,7 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst)
{
struct dst_entry *old_dst;
- sk_tx_queue_clear(sk);
+ sk_tx_dev_and_queue_clear(sk);
sk->sk_dst_pending_confirm = 0;
old_dst = rcu_dereference_protected(sk->sk_dst_cache,
lockdep_sock_is_held(sk));
@@ -2013,7 +2015,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst)
{
struct dst_entry *old_dst;
- sk_tx_queue_clear(sk);
+ sk_tx_dev_and_queue_clear(sk);
sk->sk_dst_pending_confirm = 0;
old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst);
dst_release(old_dst);
diff --git a/net/core/dev.c b/net/core/dev.c
index a986b07ea845..669dea31b467 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3982,11 +3982,13 @@ u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev)
{
struct sock *sk = skb->sk;
- int queue_index = sk_tx_queue_get(sk);
+ int queue_index, ifindex;
+
+ sk_tx_dev_and_queue_get(sk, &ifindex, &queue_index);
sb_dev = sb_dev ? : dev;
- if (queue_index < 0 || skb->ooo_okay ||
+ if (ifindex != dev->ifindex || queue_index < 0 || skb->ooo_okay ||
queue_index >= dev->real_num_tx_queues) {
int new_index = get_xps_queue(dev, sb_dev, skb);
@@ -3996,7 +3998,7 @@ u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
if (queue_index != new_index && sk &&
sk_fullsock(sk) &&
rcu_access_pointer(sk->sk_dst_cache))
- sk_tx_queue_set(sk, new_index);
+ sk_tx_dev_and_queue_set(sk, dev, new_index);
queue_index = new_index;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 6da54eac2b34..92129b017074 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -542,7 +542,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
struct dst_entry *dst = __sk_dst_get(sk);
if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
- sk_tx_queue_clear(sk);
+ sk_tx_dev_and_queue_clear(sk);
sk->sk_dst_pending_confirm = 0;
RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
dst_release(dst);
@@ -1680,7 +1680,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
if (!try_module_get(prot->owner))
goto out_free_sec;
- sk_tx_queue_clear(sk);
+ sk_tx_dev_and_queue_clear(sk);
}
return sk;
@@ -1749,7 +1749,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
cgroup_sk_alloc(&sk->sk_cgrp_data);
sock_update_classid(&sk->sk_cgrp_data);
sock_update_netprioidx(&sk->sk_cgrp_data);
- sk_tx_queue_clear(sk);
+ sk_tx_dev_and_queue_clear(sk);
}
return sk;
@@ -1973,7 +1973,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
*/
sk_refcnt_debug_inc(newsk);
sk_set_socket(newsk, NULL);
- sk_tx_queue_clear(newsk);
+ sk_tx_dev_and_queue_clear(newsk);
RCU_INIT_POINTER(newsk->sk_wq, NULL);
if (newsk->sk_prot->sockets_allocated)
--
2.25.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [RFC PATCH net-next 3/3] sock: Use dev_and_queue structure for RX queue mapping in sock
2020-07-24 20:14 [RFC PATCH net-next 0/3] sock: Fix sock queue mapping to include device Tom Herbert
2020-07-24 20:14 ` [RFC PATCH net-next 1/3] sock: Definition and general functions for dev_and_queue structure Tom Herbert
2020-07-24 20:14 ` [RFC PATCH net-next 2/3] sock: Use dev_and_queue structure for TX queue mapping in sock Tom Herbert
@ 2020-07-24 20:14 ` Tom Herbert
2 siblings, 0 replies; 5+ messages in thread
From: Tom Herbert @ 2020-07-24 20:14 UTC (permalink / raw)
To: netdev, amritha.nambiar; +Cc: Tom Herbert
Replace sk_rx_queue_mapping with sk_rx_dev_and_queue_mapping and
change associated function to set, get, and clear mapping.
---
.../mellanox/mlx5/core/en_accel/ktls_rx.c | 10 +++--
include/net/busy_poll.h | 2 +-
include/net/sock.h | 38 +++++++++----------
net/core/dev.c | 7 +++-
net/core/filter.c | 7 ++--
net/core/sock.c | 2 +-
net/ipv4/tcp_input.c | 2 +-
7 files changed, 37 insertions(+), 31 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index acf6d80a6bb7..ef8b38c0ee56 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -547,11 +547,13 @@ void mlx5e_ktls_handle_ctx_completion(struct mlx5e_icosq_wqe_info *wi)
queue_work(rule->priv->tls->rx_wq, &rule->work);
}
-static int mlx5e_ktls_sk_get_rxq(struct sock *sk)
+static int mlx5e_ktls_sk_get_rxq(struct sock *sk, struct net_device *dev)
{
- int rxq = sk_rx_queue_get(sk);
+ int ifindex, rxq;
- if (unlikely(rxq == -1))
+ sk_rx_dev_and_queue_get(sk, &ifindex, &rxq);
+
+ if (unlikely(ifindex != dev->ifindex || rxq == -1))
rxq = 0;
return rxq;
@@ -583,7 +585,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
priv_rx->crypto_info =
*(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
- rxq = mlx5e_ktls_sk_get_rxq(sk);
+ rxq = mlx5e_ktls_sk_get_rxq(sk, netdev);
priv_rx->rxq = rxq;
priv_rx->sk = sk;
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index b001fa91c14e..f04283c54bcd 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -128,7 +128,7 @@ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
#ifdef CONFIG_NET_RX_BUSY_POLL
WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
#endif
- sk_rx_queue_set(sk, skb);
+ sk_rx_dev_and_queue_set(sk, skb);
}
/* variant used for unconnected sockets */
diff --git a/include/net/sock.h b/include/net/sock.h
index f311425513ff..fe8b669237f3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -150,7 +150,7 @@ typedef __u64 __bitwise __addrpair;
* @skc_node: main hash linkage for various protocol lookup tables
* @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
* @skc_tx_dev_and_queue_mapping: tx ifindex/queue for this connection
- * @skc_rx_queue_mapping: rx queue number for this connection
+ * @skc_rx_dev_and_queue_mapping: rx ifindex/queue for this connection
* @skc_flags: place holder for sk_flags
* %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
* %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
@@ -236,7 +236,7 @@ struct sock_common {
};
struct dev_and_queue skc_tx_dev_and_queue_mapping;
#ifdef CONFIG_XPS
- unsigned short skc_rx_queue_mapping;
+ struct dev_and_queue skc_rx_dev_and_queue_mapping;
#endif
union {
int skc_incoming_cpu;
@@ -364,7 +364,7 @@ struct sock {
#define sk_refcnt __sk_common.skc_refcnt
#define sk_tx_dev_and_queue_mapping __sk_common.skc_tx_dev_and_queue_mapping
#ifdef CONFIG_XPS
-#define sk_rx_queue_mapping __sk_common.skc_rx_queue_mapping
+#define sk_rx_dev_and_queue_mapping __sk_common.skc_rx_dev_and_queue_mapping
#endif
#define sk_dontcopy_begin __sk_common.skc_dontcopy_begin
@@ -1865,34 +1865,34 @@ static inline void sk_tx_dev_and_queue_get(const struct sock *sk, int *ifindex,
}
}
-static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
+static inline void sk_rx_dev_and_queue_set(struct sock *sk,
+ const struct sk_buff *skb)
{
#ifdef CONFIG_XPS
- if (skb_rx_queue_recorded(skb)) {
- u16 rx_queue = skb_get_rx_queue(skb);
-
- if (WARN_ON_ONCE(rx_queue == NO_QUEUE_MAPPING))
- return;
-
- sk->sk_rx_queue_mapping = rx_queue;
- }
+ if (skb->dev && skb_rx_queue_recorded(skb))
+ __dev_and_queue_set(&sk->sk_rx_dev_and_queue_mapping, skb->dev,
+ skb_get_rx_queue(skb));
#endif
}
-static inline void sk_rx_queue_clear(struct sock *sk)
+static inline void sk_rx_dev_and_queue_clear(struct sock *sk)
{
#ifdef CONFIG_XPS
- sk->sk_rx_queue_mapping = NO_QUEUE_MAPPING;
+ __dev_and_queue_clear(&sk->sk_rx_dev_and_queue_mapping);
#endif
}
#ifdef CONFIG_XPS
-static inline int sk_rx_queue_get(const struct sock *sk)
+static inline void sk_rx_dev_and_queue_get(const struct sock *sk,
+ int *ifindex, int *rx_queue)
{
- if (sk && sk->sk_rx_queue_mapping != NO_QUEUE_MAPPING)
- return sk->sk_rx_queue_mapping;
-
- return -1;
+ if (sk) {
+ __dev_and_queue_get(&sk->sk_rx_dev_and_queue_mapping,
+ ifindex, rx_queue);
+ } else {
+ *ifindex = -1;
+ *rx_queue = -1;
+ }
}
#endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 669dea31b467..10a704c79ea7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3939,9 +3939,12 @@ static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
if (dev_maps) {
- int tci = sk_rx_queue_get(sk);
+ int tci, ifindex;
- if (tci >= 0 && tci < dev->num_rx_queues)
+ sk_rx_dev_and_queue_get(sk, &ifindex, &tci);
+
+ if (dev->ifindex == ifindex &&
+ tci >= 0 && tci < dev->num_rx_queues)
queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
tci);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 3fa16b8c0d61..eef2b778ade6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8044,11 +8044,12 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct bpf_sock, rx_queue_mapping):
#ifdef CONFIG_XPS
*insn++ = BPF_LDX_MEM(
- BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping),
+ BPF_FIELD_SIZEOF(struct sock,
+ sk_rx_dev_and_queue_mapping.queue),
si->dst_reg, si->src_reg,
- bpf_target_off(struct sock, sk_rx_queue_mapping,
+ bpf_target_off(struct sock, sk_rx_dev_and_queue_mapping.queue,
sizeof_field(struct sock,
- sk_rx_queue_mapping),
+ sk_rx_dev_and_queue_mapping.queue),
target_size));
*insn++ = BPF_JMP_IMM(BPF_JNE, si->dst_reg, NO_QUEUE_MAPPING,
1);
diff --git a/net/core/sock.c b/net/core/sock.c
index 92129b017074..8b44512c8b3a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3000,7 +3000,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
WRITE_ONCE(sk->sk_pacing_shift, 10);
sk->sk_incoming_cpu = -1;
- sk_rx_queue_clear(sk);
+ sk_rx_dev_and_queue_clear(sk);
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 82906deb7874..5c7e1a7a7ed9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6746,7 +6746,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->snt_isn = isn;
tcp_rsk(req)->txhash = net_tx_rndhash();
tcp_openreq_init_rwin(req, sk, dst);
- sk_rx_queue_set(req_to_sk(req), skb);
+ sk_rx_dev_and_queue_set(req_to_sk(req), skb);
if (!want_cookie) {
tcp_reqsk_record_syn(sk, req, skb);
fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
--
2.25.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [RFC PATCH net-next 1/3] sock: Definition and general functions for dev_and_queue structure
2020-10-21 19:47 [RFC PATCH net-next 0/3] sock: Fix sock queue mapping to include device Harshitha Ramamurthy
@ 2020-10-21 19:47 ` Harshitha Ramamurthy
0 siblings, 0 replies; 5+ messages in thread
From: Harshitha Ramamurthy @ 2020-10-21 19:47 UTC (permalink / raw)
To: netdev, davem, kuba
Cc: tom, carolyn.wyborny, jacob.e.keller, amritha.nambiar,
Harshitha Ramamurthy
From: Tom Herbert <tom@herbertland.com>
Add struct dev_and_queue which holds and ifindex and queue pair. Add
generic functions to get the queue for the ifindex held as well as
functions to set, clear the pair in a structure.
Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
---
include/net/sock.h | 52 ++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 50 insertions(+), 2 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index a5c6ae78df77..9755a6cab1a1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -107,6 +107,16 @@ typedef struct {
#endif
} socket_lock_t;
+struct dev_and_queue {
+ union {
+ struct {
+ int ifindex;
+ u16 queue;
+ };
+ u64 val64;
+ };
+};
+
struct sock;
struct proto;
struct net;
@@ -1791,6 +1801,46 @@ static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
return __sk_receive_skb(sk, skb, nested, 1, true);
}
+#define NO_QUEUE_MAPPING USHRT_MAX
+
+static inline int __dev_and_queue_get(const struct dev_and_queue *idandq,
+ int ifindex)
+{
+ struct dev_and_queue dandq;
+
+ dandq.val64 = idandq->val64;
+
+ if (dandq.ifindex == ifindex && dandq.queue != NO_QUEUE_MAPPING)
+ return dandq.queue;
+
+ return -1;
+}
+
+static inline void __dev_and_queue_set(struct dev_and_queue *odandq,
+ int ifindex, int queue)
+{
+ struct dev_and_queue dandq;
+
+ /* queue_mapping accept only upto a 16-bit value */
+ if (WARN_ON_ONCE((unsigned short)queue >= USHRT_MAX))
+ return;
+
+ dandq.ifindex = ifindex;
+ dandq.queue = queue;
+
+ odandq->val64 = dandq.val64;
+}
+
+static inline void __dev_and_queue_clear(struct dev_and_queue *odandq)
+{
+ struct dev_and_queue dandq;
+
+ dandq.ifindex = -1;
+ dandq.queue = NO_QUEUE_MAPPING;
+
+ odandq->val64 = dandq.val64;
+}
+
static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
{
/* sk_tx_queue_mapping accept only upto a 16-bit value */
@@ -1799,8 +1849,6 @@ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
sk->sk_tx_queue_mapping = tx_queue;
}
-#define NO_QUEUE_MAPPING USHRT_MAX
-
static inline void sk_tx_queue_clear(struct sock *sk)
{
sk->sk_tx_queue_mapping = NO_QUEUE_MAPPING;
--
2.26.2
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2020-10-21 19:51 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-07-24 20:14 [RFC PATCH net-next 0/3] sock: Fix sock queue mapping to include device Tom Herbert
2020-07-24 20:14 ` [RFC PATCH net-next 1/3] sock: Definition and general functions for dev_and_queue structure Tom Herbert
2020-07-24 20:14 ` [RFC PATCH net-next 2/3] sock: Use dev_and_queue structure for TX queue mapping in sock Tom Herbert
2020-07-24 20:14 ` [RFC PATCH net-next 3/3] sock: Use dev_and_queue structure for RX " Tom Herbert
-- strict thread matches above, loose matches on Subject: below --
2020-10-21 19:47 [RFC PATCH net-next 0/3] sock: Fix sock queue mapping to include device Harshitha Ramamurthy
2020-10-21 19:47 ` [RFC PATCH net-next 1/3] sock: Definition and general functions for dev_and_queue structure Harshitha Ramamurthy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).