* [PATCH net-next] net: Add sysctl to toggle early demux for tcp and udp
@ 2017-03-09 21:09 Subash Abhinov Kasiviswanathan
2017-03-09 23:50 ` Stephen Hemminger
0 siblings, 1 reply; 2+ messages in thread
From: Subash Abhinov Kasiviswanathan @ 2017-03-09 21:09 UTC (permalink / raw)
To: netdev, eric.dumazet; +Cc: Subash Abhinov Kasiviswanathan
Certain system process significant unconnected UDP workload.
It would be preferrable to disable UDP early demux for those systems
and enable it for TCP only.
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Suggested-by: Eric Dumazet <edumazet@google.com>
---
include/net/netns/ipv4.h | 2 ++
include/net/protocol.h | 3 ++-
net/ipv4/af_inet.c | 9 ++++++---
net/ipv4/ip_input.c | 2 +-
net/ipv4/sysctl_net_ipv4.c | 14 ++++++++++++++
net/ipv6/ip6_input.c | 2 +-
net/ipv6/tcp_ipv6.c | 3 ++-
7 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 0378e88..1e74da23 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -86,6 +86,8 @@ struct netns_ipv4 {
/* Shall we try to damage output packets if routing dev changes? */
int sysctl_ip_dynaddr;
int sysctl_ip_early_demux;
+ int sysctl_tcp_early_demux;
+ int sysctl_udp_early_demux;
int sysctl_fwmark_reflect;
int sysctl_tcp_fwmark_accept;
diff --git a/include/net/protocol.h b/include/net/protocol.h
index bf36ca3..f8ede39 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -40,6 +40,7 @@
/* This is used to register protocols. */
struct net_protocol {
void (*early_demux)(struct sk_buff *skb);
+ int *early_demux_enabled;
int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb, u32 info);
unsigned int no_policy:1,
@@ -54,7 +55,7 @@ struct net_protocol {
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol {
void (*early_demux)(struct sk_buff *skb);
-
+ int *early_demux_enabled;
int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f750698..5a1d30e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1579,7 +1579,7 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
};
#endif
-static const struct net_protocol tcp_protocol = {
+static struct net_protocol tcp_protocol = {
.early_demux = tcp_v4_early_demux,
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
@@ -1588,7 +1588,7 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
.icmp_strict_tag_validation = 1,
};
-static const struct net_protocol udp_protocol = {
+static struct net_protocol udp_protocol = {
.early_demux = udp_v4_early_demux,
.handler = udp_rcv,
.err_handler = udp_err,
@@ -1699,7 +1699,10 @@ static __net_init int inet_init_net(struct net *net)
*/
net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
net->ipv4.sysctl_ip_dynaddr = 0;
- net->ipv4.sysctl_ip_early_demux = 1;
+ net->ipv4.sysctl_udp_early_demux = 1;
+ net->ipv4.sysctl_tcp_early_demux = 1;
+ tcp_protocol.early_demux_enabled = &net->ipv4.sysctl_tcp_early_demux;
+ udp_protocol.early_demux_enabled = &net->ipv4.sysctl_udp_early_demux;
return 0;
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d6feabb..187feae 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -329,7 +329,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
int protocol = iph->protocol;
ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && ipprot->early_demux) {
+ if (ipprot && ipprot->early_demux && *ipprot->early_demux_enabled) {
ipprot->early_demux(skb);
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index b2fa498..b212af9 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -737,6 +737,20 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
.proc_handler = proc_dointvec
},
{
+ .procname = "udp_early_demux",
+ .data = &init_net.ipv4.sysctl_udp_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_early_demux",
+ .data = &init_net.ipv4.sysctl_tcp_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "ip_default_ttl",
.data = &init_net.ipv4.sysctl_ip_default_ttl,
.maxlen = sizeof(int),
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index aacfb4b..b34f737 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -60,7 +60,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
const struct inet6_protocol *ipprot;
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
- if (ipprot && ipprot->early_demux)
+ if (ipprot && ipprot->early_demux && *ipprot->early_demux_enabled)
ipprot->early_demux(skb);
}
if (!skb_valid_dst(skb))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4c60c6f..fb73a41 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1926,7 +1926,7 @@ struct proto tcpv6_prot = {
.diag_destroy = tcp_abort,
};
-static const struct inet6_protocol tcpv6_protocol = {
+static struct inet6_protocol tcpv6_protocol = {
.early_demux = tcp_v6_early_demux,
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
@@ -1944,6 +1944,7 @@ struct proto tcpv6_prot = {
static int __net_init tcpv6_net_init(struct net *net)
{
+ tcpv6_protocol.early_demux_enabled = &net->ipv4.sysctl_tcp_early_demux;
return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
SOCK_RAW, IPPROTO_TCP, net);
}
--
1.9.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH net-next] net: Add sysctl to toggle early demux for tcp and udp
2017-03-09 21:09 [PATCH net-next] net: Add sysctl to toggle early demux for tcp and udp Subash Abhinov Kasiviswanathan
@ 2017-03-09 23:50 ` Stephen Hemminger
0 siblings, 0 replies; 2+ messages in thread
From: Stephen Hemminger @ 2017-03-09 23:50 UTC (permalink / raw)
To: Subash Abhinov Kasiviswanathan; +Cc: netdev, eric.dumazet
On Thu, 9 Mar 2017 14:09:18 -0700
Subash Abhinov Kasiviswanathan <subashab@codeaurora.org> wrote:
> Certain system process significant unconnected UDP workload.
> It would be preferrable to disable UDP early demux for those systems
> and enable it for TCP only.
>
> Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
> Suggested-by: Eric Dumazet <edumazet@google.com>
> ---
This makes sense.
> diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
> index d6feabb..187feae 100644
> --- a/net/ipv4/ip_input.c
> +++ b/net/ipv4/ip_input.c
> @@ -329,7 +329,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
> int protocol = iph->protocol;
>
> ipprot = rcu_dereference(inet_protos[protocol]);
> - if (ipprot && ipprot->early_demux) {
> + if (ipprot && ipprot->early_demux && *ipprot->early_demux_enabled) {
> ipprot->early_demux(skb);
> /* must reload iph, skb->head might have changed */
> iph = ip_hdr(skb);
Another possible option would be change the function pointer for early_demux instead of having
an additional conditional test (and cache line read). The downside of doing it that way
is the code to turn the sysctl on/off gets more complicated than simple standard proc_int_vec.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2017-03-09 23:50 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-03-09 21:09 [PATCH net-next] net: Add sysctl to toggle early demux for tcp and udp Subash Abhinov Kasiviswanathan
2017-03-09 23:50 ` Stephen Hemminger
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).