From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
To: netdev@vger.kernel.org, eric.dumazet@gmail.com
Cc: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>,
Stephen Hemminger <stephen@networkplumber.org>
Subject: [PATCH net-next v2] net: Add sysctl to toggle early demux for tcp and udp
Date: Thu, 9 Mar 2017 20:31:00 -0700 [thread overview]
Message-ID: <1489116660-4244-1-git-send-email-subashab@codeaurora.org> (raw)
Certain system process significant unconnected UDP workload.
It would be preferrable to disable UDP early demux for those systems
and enable it for TCP only.
v1->v2: Change function pointer instead of adding conditional as
suggested by Stephen.
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Suggested-by: Eric Dumazet <edumazet@google.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
---
include/net/netns/ipv4.h | 2 ++
include/net/tcp.h | 2 ++
include/net/udp.h | 2 ++
net/ipv4/af_inet.c | 22 ++++++++++++++++++++--
net/ipv4/sysctl_net_ipv4.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++
net/ipv6/tcp_ipv6.c | 10 +++++++++-
6 files changed, 82 insertions(+), 3 deletions(-)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 0378e88..1e74da23 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -86,6 +86,8 @@ struct netns_ipv4 {
/* Shall we try to damage output packets if routing dev changes? */
int sysctl_ip_dynaddr;
int sysctl_ip_early_demux;
+ int sysctl_tcp_early_demux;
+ int sysctl_udp_early_demux;
int sysctl_fwmark_reflect;
int sysctl_tcp_fwmark_accept;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6061963..3b6446d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1953,4 +1953,6 @@ static inline void tcp_listendrop(const struct sock *sk)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
}
+void tcp_v4_early_demux_configure(int enable);
+void tcp_v6_early_demux_configure(int enable);
#endif /* _TCP_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index 1661791..7de31d5 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -373,4 +373,6 @@ struct udp_iter_state {
#if IS_ENABLED(CONFIG_IPV6)
void udpv6_encap_enable(void);
#endif
+
+void udp_v4_early_demux_configure(int enable);
#endif /* _UDP_H */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f750698..3e11d74 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1579,7 +1579,7 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
};
#endif
-static const struct net_protocol tcp_protocol = {
+static struct net_protocol tcp_protocol = {
.early_demux = tcp_v4_early_demux,
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
@@ -1588,7 +1588,7 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
.icmp_strict_tag_validation = 1,
};
-static const struct net_protocol udp_protocol = {
+static struct net_protocol udp_protocol = {
.early_demux = udp_v4_early_demux,
.handler = udp_rcv,
.err_handler = udp_err,
@@ -1596,6 +1596,22 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
.netns_ok = 1,
};
+void tcp_v4_early_demux_configure(int enable)
+{
+ if (enable)
+ tcp_protocol.early_demux = tcp_v4_early_demux;
+ else
+ tcp_protocol.early_demux = NULL;
+}
+
+void udp_v4_early_demux_configure(int enable)
+{
+ if (enable)
+ udp_protocol.early_demux = udp_v4_early_demux;
+ else
+ udp_protocol.early_demux = NULL;
+}
+
static const struct net_protocol icmp_protocol = {
.handler = icmp_rcv,
.err_handler = icmp_err,
@@ -1700,6 +1716,8 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
net->ipv4.sysctl_ip_dynaddr = 0;
net->ipv4.sysctl_ip_early_demux = 1;
+ net->ipv4.sysctl_udp_early_demux = 1;
+ net->ipv4.sysctl_tcp_early_demux = 1;
return 0;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index b2fa498..c61383b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -253,6 +253,39 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
return ret;
}
+static int proc_tcp_early_demux(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret = 0;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+ if (write && !ret) {
+ int enabled = init_net.ipv4.sysctl_tcp_early_demux;
+
+ tcp_v4_early_demux_configure(enabled);
+ tcp_v6_early_demux_configure(enabled);
+ }
+
+ return ret;
+}
+
+static int proc_udp_early_demux(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret = 0;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+ if (write && !ret) {
+ int enabled = init_net.ipv4.sysctl_udp_early_demux;
+
+ udp_v4_early_demux_configure(enabled);
+ }
+
+ return ret;
+}
+
static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_timestamps",
@@ -737,6 +770,20 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
.proc_handler = proc_dointvec
},
{
+ .procname = "udp_early_demux",
+ .data = &init_net.ipv4.sysctl_udp_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_udp_early_demux
+ },
+ {
+ .procname = "tcp_early_demux",
+ .data = &init_net.ipv4.sysctl_tcp_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_tcp_early_demux
+ },
+ {
.procname = "ip_default_ttl",
.data = &init_net.ipv4.sysctl_ip_default_ttl,
.maxlen = sizeof(int),
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4c60c6f..0dd761c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1926,13 +1926,21 @@ struct proto tcpv6_prot = {
.diag_destroy = tcp_abort,
};
-static const struct inet6_protocol tcpv6_protocol = {
+static struct inet6_protocol tcpv6_protocol = {
.early_demux = tcp_v6_early_demux,
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
+void tcp_v6_early_demux_configure(int enable)
+{
+ if (enable)
+ tcpv6_protocol.early_demux = tcp_v6_early_demux;
+ else
+ tcpv6_protocol.early_demux = NULL;
+}
+
static struct inet_protosw tcpv6_protosw = {
.type = SOCK_STREAM,
.protocol = IPPROTO_TCP,
--
1.9.1
next reply other threads:[~2017-03-10 3:33 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-03-10 3:31 Subash Abhinov Kasiviswanathan [this message]
2017-03-10 3:42 ` [PATCH net-next v2] net: Add sysctl to toggle early demux for tcp and udp Tom Herbert
2017-03-10 5:26 ` Subash Abhinov Kasiviswanathan
2017-03-10 16:33 ` Tom Herbert
2017-03-11 0:22 ` Eric Dumazet
2017-03-11 0:49 ` Tom Herbert
2017-03-18 17:32 ` Subash Abhinov Kasiviswanathan
2017-03-18 17:44 ` Tom Herbert
2017-03-19 2:07 ` Subash Abhinov Kasiviswanathan
2017-03-19 19:18 ` Eric Dumazet
2017-03-21 22:49 ` Tom Herbert
2017-03-10 4:25 ` Eric Dumazet
2017-03-10 7:34 ` Subash Abhinov Kasiviswanathan
2017-03-10 12:42 ` kbuild test robot
2017-03-10 12:44 ` kbuild test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1489116660-4244-1-git-send-email-subashab@codeaurora.org \
--to=subashab@codeaurora.org \
--cc=eric.dumazet@gmail.com \
--cc=netdev@vger.kernel.org \
--cc=stephen@networkplumber.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).