* [PATCH net-next v3 0/2] RDS: TCP: tunable socket buffer parameters
@ 2016-03-15 18:53 Sowmini Varadhan
2016-03-15 18:53 ` [PATCH net-next v3 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket Sowmini Varadhan
2016-03-15 18:53 ` [PATCH net-next v3 2/2] RDS: TCP: Remove unused constant Sowmini Varadhan
0 siblings, 2 replies; 10+ messages in thread
From: Sowmini Varadhan @ 2016-03-15 18:53 UTC (permalink / raw)
To: netdev; +Cc: sowmini.varadhan, davem, santosh.shilimkar, eric.dumazet
Patch 1 uses sysctl to create tunable socket buffer size parameters.
Patch 2 removes an unuused constant.
Changes since v2: review comments from Santosh Shilimkar, Eric Dumazet
Sowmini Varadhan (2):
RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
RDS: TCP: Remove unused constant
net/rds/tcp.c | 143 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 133 insertions(+), 10 deletions(-)
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH net-next v3 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
2016-03-15 18:53 [PATCH net-next v3 0/2] RDS: TCP: tunable socket buffer parameters Sowmini Varadhan
@ 2016-03-15 18:53 ` Sowmini Varadhan
2016-03-15 19:21 ` santosh shilimkar
2016-03-16 10:29 ` Hannes Frederic Sowa
2016-03-15 18:53 ` [PATCH net-next v3 2/2] RDS: TCP: Remove unused constant Sowmini Varadhan
1 sibling, 2 replies; 10+ messages in thread
From: Sowmini Varadhan @ 2016-03-15 18:53 UTC (permalink / raw)
To: netdev; +Cc: sowmini.varadhan, davem, santosh.shilimkar, eric.dumazet
Add per-net sysctl tunables to set the size of sndbuf and
rcvbuf on the kernel tcp socket.
The tunables are added at /proc/sys/net/rds/tcp/rds_tcp_sndbuf
and /proc/sys/net/rds/tcp/rds_tcp_rcvbuf.
Since these values must be set before accept() or connect(),
and there may be an arbitrary number of existing rds-tcp
sockets when the tunable is modified. To make sure that all
connections in the netns pick up the same value for the tunable,
we reset existing rds-tcp connections in the netns, so that
they can reconnect with the new parameters.
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
---
v2; use sysctl instead of module param. Tunabes are now per netns,
and can be dynamically modified without restarting all namespaces.
v3: review comments from Santosh Shilimkar, Eric Dumazet
net/rds/tcp.c | 143 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 134 insertions(+), 9 deletions(-)
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index ad60299..b720b25 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -54,6 +54,28 @@ static struct kmem_cache *rds_tcp_conn_slab;
#define RDS_TCP_DEFAULT_BUFSIZE (128 * 1024)
+static int sndbuf_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *fpos);
+static int rcvbuf_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *fpos);
+static struct ctl_table rds_tcp_sysctl_table[] = {
+ {
+ .procname = "rds_tcp_sndbuf",
+ /* data is per-net pointer */
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = sndbuf_handler,
+ },
+ {
+ .procname = "rds_tcp_rcvbuf",
+ /* data is per-net pointer */
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = rcvbuf_handler,
+ },
+ { }
+};
+
/* doing it this way avoids calling tcp_sk() */
void rds_tcp_nonagle(struct socket *sock)
{
@@ -66,15 +88,6 @@ void rds_tcp_nonagle(struct socket *sock)
set_fs(oldfs);
}
-/* All module specific customizations to the RDS-TCP socket should be done in
- * rds_tcp_tune() and applied after socket creation. In general these
- * customizations should be tunable via module_param()
- */
-void rds_tcp_tune(struct socket *sock)
-{
- rds_tcp_nonagle(sock);
-}
-
u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
{
return tcp_sk(tc->t_sock->sk)->snd_nxt;
@@ -272,8 +285,33 @@ static int rds_tcp_netid;
struct rds_tcp_net {
struct socket *rds_tcp_listen_sock;
struct work_struct rds_tcp_accept_w;
+ struct ctl_table_header *rds_tcp_sysctl;
+ int sndbuf_size;
+ int rcvbuf_size;
};
+/* All module specific customizations to the RDS-TCP socket should be done in
+ * rds_tcp_tune() and applied after socket creation.
+ */
+void rds_tcp_tune(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
+ struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+ rds_tcp_nonagle(sock);
+ lock_sock(sk);
+ if (rtn->sndbuf_size > 0) {
+ sk->sk_sndbuf = rtn->sndbuf_size;
+ sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+ }
+ if (rtn->rcvbuf_size > 0) {
+ sk->sk_sndbuf = rtn->rcvbuf_size;
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+ }
+ release_sock(sk);
+}
+
static void rds_tcp_accept_worker(struct work_struct *work)
{
struct rds_tcp_net *rtn = container_of(work,
@@ -296,9 +334,17 @@ static __net_init int rds_tcp_init_net(struct net *net)
{
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ /* 0 value for {snd, rcv}buf_size implies we let the stack
+ * pick the default, and permit auto-tuning of buffer size.
+ */
+ rtn->sndbuf_size = 0;
+ rtn->rcvbuf_size = 0;
+ rtn->rds_tcp_sysctl = register_net_sysctl(net, "net/rds/tcp",
+ rds_tcp_sysctl_table);
rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
if (!rtn->rds_tcp_listen_sock) {
pr_warn("could not set up listen sock\n");
+ unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
return -EAFNOSUPPORT;
}
INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
@@ -309,6 +355,7 @@ static void __net_exit rds_tcp_exit_net(struct net *net)
{
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
/* If rds_tcp_exit_net() is called as a result of netns deletion,
* the rds_tcp_kill_sock() device notifier would already have cleaned
* up the listen socket, thus there is no work to do in this function.
@@ -383,6 +430,84 @@ static struct notifier_block rds_tcp_dev_notifier = {
.priority = -10, /* must be called after other network notifiers */
};
+static int user_atoi(char __user *ubuf, size_t len)
+{
+ char buf[16];
+ unsigned long ret;
+ int err;
+
+ if (len > 15)
+ return -EINVAL;
+
+ if (copy_from_user(buf, ubuf, len))
+ return -EFAULT;
+
+ buf[len] = 0;
+ err = kstrtoul(buf, 0, &ret);
+ if (err != 0)
+ return -ERANGE;
+ return ret;
+}
+
+/* when sysctl is used to modify some kernel socket parameters,this
+ * function resets the RDS connections in that netns so that we can
+ * restart with new parameters. The assumption is that such reset
+ * events are few and far-between.
+ */
+static void rds_tcp_sysctl_reset(struct net *net)
+{
+ struct rds_tcp_connection *tc, *_tc;
+
+ spin_lock_irq(&rds_tcp_conn_lock);
+ list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
+ struct net *c_net = read_pnet(&tc->conn->c_net);
+
+ if (net != c_net || !tc->t_sock)
+ continue;
+
+ rds_conn_drop(tc->conn); /* reconnect with new parameters */
+ }
+ spin_unlock_irq(&rds_tcp_conn_lock);
+}
+
+static int sndbuf_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *fpos)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ struct ctl_table tmp;
+
+ tmp = *ctl;
+ tmp.data = &rtn->sndbuf_size;
+ if (!write)
+ return proc_dointvec(&tmp, write, buffer, lenp, fpos);
+
+ rtn->sndbuf_size = max_t(u32, user_atoi(buffer, *lenp) * 2,
+ SOCK_MIN_SNDBUF);
+ rds_tcp_sysctl_reset(net);
+
+ return 0;
+}
+
+static int rcvbuf_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *fpos)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ struct ctl_table tmp;
+
+ tmp = *ctl;
+ tmp.data = &rtn->rcvbuf_size;
+ if (!write)
+ return proc_dointvec(&tmp, write, buffer, lenp, fpos);
+
+ rtn->rcvbuf_size = max_t(u32, user_atoi(buffer, *lenp) * 2,
+ SOCK_MIN_RCVBUF);
+ rds_tcp_sysctl_reset(net);
+
+ return 0;
+}
+
static void rds_tcp_exit(void)
{
rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
--
1.7.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH net-next v3 2/2] RDS: TCP: Remove unused constant
2016-03-15 18:53 [PATCH net-next v3 0/2] RDS: TCP: tunable socket buffer parameters Sowmini Varadhan
2016-03-15 18:53 ` [PATCH net-next v3 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket Sowmini Varadhan
@ 2016-03-15 18:53 ` Sowmini Varadhan
2016-03-15 19:22 ` santosh shilimkar
1 sibling, 1 reply; 10+ messages in thread
From: Sowmini Varadhan @ 2016-03-15 18:53 UTC (permalink / raw)
To: netdev; +Cc: sowmini.varadhan, davem, santosh.shilimkar, eric.dumazet
RDS_TCP_DEFAULT_BUFSIZE has been unused since commit 1edd6a14d24f
("RDS-TCP: Do not bloat sndbuf/rcvbuf in rds_tcp_tune").
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
---
v3: review comments from Santosh Shilimkar
net/rds/tcp.c | 2 --
1 files changed, 0 insertions(+), 2 deletions(-)
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index b720b25..8b953c2 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -52,8 +52,6 @@ static LIST_HEAD(rds_tcp_conn_list);
static struct kmem_cache *rds_tcp_conn_slab;
-#define RDS_TCP_DEFAULT_BUFSIZE (128 * 1024)
-
static int sndbuf_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *fpos);
static int rcvbuf_handler(struct ctl_table *ctl, int write,
--
1.7.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH net-next v3 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
2016-03-15 18:53 ` [PATCH net-next v3 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket Sowmini Varadhan
@ 2016-03-15 19:21 ` santosh shilimkar
2016-03-16 10:29 ` Hannes Frederic Sowa
1 sibling, 0 replies; 10+ messages in thread
From: santosh shilimkar @ 2016-03-15 19:21 UTC (permalink / raw)
To: Sowmini Varadhan, netdev; +Cc: davem, eric.dumazet
On 3/15/2016 11:53 AM, Sowmini Varadhan wrote:
> Add per-net sysctl tunables to set the size of sndbuf and
> rcvbuf on the kernel tcp socket.
>
> The tunables are added at /proc/sys/net/rds/tcp/rds_tcp_sndbuf
> and /proc/sys/net/rds/tcp/rds_tcp_rcvbuf.
>
> Since these values must be set before accept() or connect(),
> and there may be an arbitrary number of existing rds-tcp
> sockets when the tunable is modified. To make sure that all
> connections in the netns pick up the same value for the tunable,
> we reset existing rds-tcp connections in the netns, so that
> they can reconnect with the new parameters.
>
> Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
> ---
> v2; use sysctl instead of module param. Tunabes are now per netns,
> and can be dynamically modified without restarting all namespaces.
> v3: review comments from Santosh Shilimkar, Eric Dumazet
>
> net/rds/tcp.c | 143 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
> 1 files changed, 134 insertions(+), 9 deletions(-)
>
This version looks fine to me. Thanks !!
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next v3 2/2] RDS: TCP: Remove unused constant
2016-03-15 18:53 ` [PATCH net-next v3 2/2] RDS: TCP: Remove unused constant Sowmini Varadhan
@ 2016-03-15 19:22 ` santosh shilimkar
0 siblings, 0 replies; 10+ messages in thread
From: santosh shilimkar @ 2016-03-15 19:22 UTC (permalink / raw)
To: Sowmini Varadhan, netdev; +Cc: davem, eric.dumazet
On 3/15/2016 11:53 AM, Sowmini Varadhan wrote:
> RDS_TCP_DEFAULT_BUFSIZE has been unused since commit 1edd6a14d24f
> ("RDS-TCP: Do not bloat sndbuf/rcvbuf in rds_tcp_tune").
>
> Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
> ---
> v3: review comments from Santosh Shilimkar
>
> net/rds/tcp.c | 2 --
> 1 files changed, 0 insertions(+), 2 deletions(-)
>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next v3 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
2016-03-15 18:53 ` [PATCH net-next v3 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket Sowmini Varadhan
2016-03-15 19:21 ` santosh shilimkar
@ 2016-03-16 10:29 ` Hannes Frederic Sowa
2016-03-16 11:06 ` Sowmini Varadhan
2016-03-16 11:10 ` Sowmini Varadhan
1 sibling, 2 replies; 10+ messages in thread
From: Hannes Frederic Sowa @ 2016-03-16 10:29 UTC (permalink / raw)
To: Sowmini Varadhan, netdev; +Cc: davem, santosh.shilimkar, eric.dumazet
Hello,
some feedback.
On 15.03.2016 19:53, Sowmini Varadhan wrote:
> Add per-net sysctl tunables to set the size of sndbuf and
> rcvbuf on the kernel tcp socket.
>
> The tunables are added at /proc/sys/net/rds/tcp/rds_tcp_sndbuf
> and /proc/sys/net/rds/tcp/rds_tcp_rcvbuf.
>
> Since these values must be set before accept() or connect(),
> and there may be an arbitrary number of existing rds-tcp
> sockets when the tunable is modified. To make sure that all
> connections in the netns pick up the same value for the tunable,
> we reset existing rds-tcp connections in the netns, so that
> they can reconnect with the new parameters.
>
> Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
> ---
> v2; use sysctl instead of module param. Tunabes are now per netns,
> and can be dynamically modified without restarting all namespaces.
> v3: review comments from Santosh Shilimkar, Eric Dumazet
>
> net/rds/tcp.c | 143 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
> 1 files changed, 134 insertions(+), 9 deletions(-)
>
> diff --git a/net/rds/tcp.c b/net/rds/tcp.c
> index ad60299..b720b25 100644
> --- a/net/rds/tcp.c
> +++ b/net/rds/tcp.c
> @@ -54,6 +54,28 @@ static struct kmem_cache *rds_tcp_conn_slab;
>
> #define RDS_TCP_DEFAULT_BUFSIZE (128 * 1024)
>
> +static int sndbuf_handler(struct ctl_table *ctl, int write,
> + void __user *buffer, size_t *lenp, loff_t *fpos);
> +static int rcvbuf_handler(struct ctl_table *ctl, int write,
> + void __user *buffer, size_t *lenp, loff_t *fpos);
> +static struct ctl_table rds_tcp_sysctl_table[] = {
> + {
> + .procname = "rds_tcp_sndbuf",
> + /* data is per-net pointer */
> + .maxlen = sizeof(int),
> + .mode = 0644,
> + .proc_handler = sndbuf_handler,
> + },
> + {
> + .procname = "rds_tcp_rcvbuf",
> + /* data is per-net pointer */
> + .maxlen = sizeof(int),
> + .mode = 0644,
> + .proc_handler = rcvbuf_handler,
> + },
> + { }
> +};
Normally we kmemdup a table per netns and update its data pointer, so we
can reuse the proc_doint_minmax functions.
> /* doing it this way avoids calling tcp_sk() */
> void rds_tcp_nonagle(struct socket *sock)
> {
> @@ -66,15 +88,6 @@ void rds_tcp_nonagle(struct socket *sock)
> set_fs(oldfs);
> }
>
> -/* All module specific customizations to the RDS-TCP socket should be done in
> - * rds_tcp_tune() and applied after socket creation. In general these
> - * customizations should be tunable via module_param()
> - */
> -void rds_tcp_tune(struct socket *sock)
> -{
> - rds_tcp_nonagle(sock);
> -}
> -
> u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
> {
> return tcp_sk(tc->t_sock->sk)->snd_nxt;
> @@ -272,8 +285,33 @@ static int rds_tcp_netid;
> struct rds_tcp_net {
> struct socket *rds_tcp_listen_sock;
> struct work_struct rds_tcp_accept_w;
> + struct ctl_table_header *rds_tcp_sysctl;
> + int sndbuf_size;
> + int rcvbuf_size;
> };
>
> +/* All module specific customizations to the RDS-TCP socket should be done in
> + * rds_tcp_tune() and applied after socket creation.
> + */
> +void rds_tcp_tune(struct socket *sock)
> +{
> + struct sock *sk = sock->sk;
> + struct net *net = sock_net(sk);
> + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
> +
> + rds_tcp_nonagle(sock);
> + lock_sock(sk);
> + if (rtn->sndbuf_size > 0) {
> + sk->sk_sndbuf = rtn->sndbuf_size;
> + sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
> + }
> + if (rtn->rcvbuf_size > 0) {
> + sk->sk_sndbuf = rtn->rcvbuf_size;
> + sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
> + }
> + release_sock(sk);
> +}
> +
> static void rds_tcp_accept_worker(struct work_struct *work)
> {
> struct rds_tcp_net *rtn = container_of(work,
> @@ -296,9 +334,17 @@ static __net_init int rds_tcp_init_net(struct net *net)
> {
> struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
>
> + /* 0 value for {snd, rcv}buf_size implies we let the stack
> + * pick the default, and permit auto-tuning of buffer size.
> + */
> + rtn->sndbuf_size = 0;
> + rtn->rcvbuf_size = 0;
> + rtn->rds_tcp_sysctl = register_net_sysctl(net, "net/rds/tcp",
> + rds_tcp_sysctl_table);
You should add proper error handling here?
> rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
> if (!rtn->rds_tcp_listen_sock) {
> pr_warn("could not set up listen sock\n");
> + unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
> return -EAFNOSUPPORT;
> }
> INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
> @@ -309,6 +355,7 @@ static void __net_exit rds_tcp_exit_net(struct net *net)
> {
> struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
>
> + unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
> /* If rds_tcp_exit_net() is called as a result of netns deletion,
> * the rds_tcp_kill_sock() device notifier would already have cleaned
> * up the listen socket, thus there is no work to do in this function.
> @@ -383,6 +430,84 @@ static struct notifier_block rds_tcp_dev_notifier = {
> .priority = -10, /* must be called after other network notifiers */
> };
>
> +static int user_atoi(char __user *ubuf, size_t len)
> +{
> + char buf[16];
> + unsigned long ret;
> + int err;
> +
> + if (len > 15)
> + return -EINVAL;
> +
> + if (copy_from_user(buf, ubuf, len))
> + return -EFAULT;
> +
> + buf[len] = 0;
> + err = kstrtoul(buf, 0, &ret);
> + if (err != 0)
> + return -ERANGE;
> + return ret;
> +}
> +
> +/* when sysctl is used to modify some kernel socket parameters,this
> + * function resets the RDS connections in that netns so that we can
> + * restart with new parameters. The assumption is that such reset
> + * events are few and far-between.
> + */
> +static void rds_tcp_sysctl_reset(struct net *net)
> +{
> + struct rds_tcp_connection *tc, *_tc;
> +
> + spin_lock_irq(&rds_tcp_conn_lock);
> + list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
> + struct net *c_net = read_pnet(&tc->conn->c_net);
> +
> + if (net != c_net || !tc->t_sock)
> + continue;
> +
> + rds_conn_drop(tc->conn); /* reconnect with new parameters */
> + }
> + spin_unlock_irq(&rds_tcp_conn_lock);
> +}
> +
> +static int sndbuf_handler(struct ctl_table *ctl, int write,
> + void __user *buffer, size_t *lenp, loff_t *fpos)
> +{
> + struct net *net = current->nsproxy->net_ns;
> + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
> + struct ctl_table tmp;
> +
> + tmp = *ctl;
> + tmp.data = &rtn->sndbuf_size;
> + if (!write)
> + return proc_dointvec(&tmp, write, buffer, lenp, fpos);
> +
> + rtn->sndbuf_size = max_t(u32, user_atoi(buffer, *lenp) * 2,
> + SOCK_MIN_SNDBUF);
user_atoi does return negative error values (as you implemented it), I
think you should check before, otherwise the signed to unsigned
conversion can cause huge memory allocations.
Why actually implement user_atoi?
> + rds_tcp_sysctl_reset(net);
> +
> + return 0;
> +}
> +
> +static int rcvbuf_handler(struct ctl_table *ctl, int write,
> + void __user *buffer, size_t *lenp, loff_t *fpos)
> +{
> + struct net *net = current->nsproxy->net_ns;
> + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
> + struct ctl_table tmp;
> +
> + tmp = *ctl;
> + tmp.data = &rtn->rcvbuf_size;
> + if (!write)
> + return proc_dointvec(&tmp, write, buffer, lenp, fpos);
> +
> + rtn->rcvbuf_size = max_t(u32, user_atoi(buffer, *lenp) * 2,
> + SOCK_MIN_RCVBUF);
> + rds_tcp_sysctl_reset(net);
> +
> + return 0;
> +}
> +
> static void rds_tcp_exit(void)
> {
> rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next v3 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
2016-03-16 10:29 ` Hannes Frederic Sowa
@ 2016-03-16 11:06 ` Sowmini Varadhan
2016-03-16 11:10 ` Sowmini Varadhan
1 sibling, 0 replies; 10+ messages in thread
From: Sowmini Varadhan @ 2016-03-16 11:06 UTC (permalink / raw)
To: Hannes Frederic Sowa; +Cc: netdev, davem, santosh.shilimkar, eric.dumazet
On (03/16/16 11:29), Hannes Frederic Sowa wrote:
>
> Normally we kmemdup a table per netns and update its data pointer,
> so we can reuse the proc_doint_minmax functions.
I actually thought of doing a separate kzalloc per netns,
but it seemed like it would be a lot of memory bloat, when really,
all I want is just the per-netns data.
I could go and duplicate the whole table, but is there a significant
benefit to that, or some bad bug with this approach?
One dubious (yes, I know, "solution looking for problem" category)
benefit is that this hack allows me to have both global, and per netns,
tunables... plus the reduced memory footprint (latter is probably
more interesting than former?)
> >+ rtn->rds_tcp_sysctl = register_net_sysctl(net, "net/rds/tcp",
> >+ rds_tcp_sysctl_table);
>
> You should add proper error handling here?
Agree, I will send this out in the next rev.
> >+ rtn->sndbuf_size = max_t(u32, user_atoi(buffer, *lenp) * 2,
> >+ SOCK_MIN_SNDBUF);
>
> user_atoi does return negative error values (as you implemented it),
> I think you should check before, otherwise the signed to unsigned
> conversion can cause huge memory allocations.
yes, good catch, that will have to be split into two lines. Shall
be fixing this in v4.
> Why actually implement user_atoi?
errhm. cut/paste from other drivers that do this. I needed to
do the copy_from_user() + kstrtoul, and user_atoi had some
additional checks that seemed useful.
--Sowmini
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next v3 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
2016-03-16 10:29 ` Hannes Frederic Sowa
2016-03-16 11:06 ` Sowmini Varadhan
@ 2016-03-16 11:10 ` Sowmini Varadhan
2016-03-16 11:26 ` Hannes Frederic Sowa
1 sibling, 1 reply; 10+ messages in thread
From: Sowmini Varadhan @ 2016-03-16 11:10 UTC (permalink / raw)
To: Hannes Frederic Sowa; +Cc: netdev, davem, santosh.shilimkar, eric.dumazet
On (03/16/16 11:29), Hannes Frederic Sowa wrote:
> Normally we kmemdup a table per netns and update its data pointer,
> so we can reuse the proc_doint_minmax functions.
I remembered one more thing.. in this particular case, I need to
have my one ->proc_handler, because I need to rds_tcp_sysctl_reset()
existing connections to make them use the new tunable.
--Sowmini
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next v3 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
2016-03-16 11:10 ` Sowmini Varadhan
@ 2016-03-16 11:26 ` Hannes Frederic Sowa
2016-03-16 11:32 ` Sowmini Varadhan
0 siblings, 1 reply; 10+ messages in thread
From: Hannes Frederic Sowa @ 2016-03-16 11:26 UTC (permalink / raw)
To: Sowmini Varadhan; +Cc: netdev, davem, santosh.shilimkar, eric.dumazet
On 16.03.2016 12:10, Sowmini Varadhan wrote:
> On (03/16/16 11:29), Hannes Frederic Sowa wrote:
>> Normally we kmemdup a table per netns and update its data pointer,
>> so we can reuse the proc_doint_minmax functions.
>
> I remembered one more thing.. in this particular case, I need to
> have my one ->proc_handler, because I need to rds_tcp_sysctl_reset()
> existing connections to make them use the new tunable.
My hope was actually that by using the ->data pointer in netns you don't
need to provide the two functions, just simply use something like the
following for both cases.
static int rds_skbuf_handler(...) {
int err;
err = proc_dointvec(...);
if (err)
return err;
if (write)
rds_tcp_sysctl_reset(...);
return err;
}
If you use proc_dointvec_min(max) you can already sanitize the input
values even more.
Do I understand it correctly that all connections of a namespace will be
dropped if you modify those sysctls?
Thanks,
Hannes
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next v3 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
2016-03-16 11:26 ` Hannes Frederic Sowa
@ 2016-03-16 11:32 ` Sowmini Varadhan
0 siblings, 0 replies; 10+ messages in thread
From: Sowmini Varadhan @ 2016-03-16 11:32 UTC (permalink / raw)
To: Hannes Frederic Sowa; +Cc: netdev, davem, santosh.shilimkar, eric.dumazet
On (03/16/16 12:26), Hannes Frederic Sowa wrote:
>
> My hope was actually that by using the ->data pointer in netns you
> don't need to provide the two functions, just simply use something
> like the following for both cases.
Ok, it would probably be less hacky to do it as you suggest.
> Do I understand it correctly that all connections of a namespace
> will be dropped if you modify those sysctls?
yes. it's unfortunate, but afaict there's no other way to make them
use the new vars.
But as I said in the comments, an admin who goes around creating
this churn is probably doing this very rarely, and for a good
reason, and is fully aware of the cost. So there is some degree
of human control possible.
--Sowmini
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2016-03-16 11:32 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-03-15 18:53 [PATCH net-next v3 0/2] RDS: TCP: tunable socket buffer parameters Sowmini Varadhan
2016-03-15 18:53 ` [PATCH net-next v3 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket Sowmini Varadhan
2016-03-15 19:21 ` santosh shilimkar
2016-03-16 10:29 ` Hannes Frederic Sowa
2016-03-16 11:06 ` Sowmini Varadhan
2016-03-16 11:10 ` Sowmini Varadhan
2016-03-16 11:26 ` Hannes Frederic Sowa
2016-03-16 11:32 ` Sowmini Varadhan
2016-03-15 18:53 ` [PATCH net-next v3 2/2] RDS: TCP: Remove unused constant Sowmini Varadhan
2016-03-15 19:22 ` santosh shilimkar
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).