public inbox for linux-s390@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH net v3 2/2] net/smc: Use correct buffer sizes when switching between TCP and SMC
  2023-08-04 17:06 [PATCH net v3 0/2] net/smc: Fix effective buffer size Gerd Bayer
@ 2023-08-04 17:06 ` Gerd Bayer
  0 siblings, 0 replies; 2+ messages in thread
From: Gerd Bayer @ 2023-08-04 17:06 UTC (permalink / raw)
  To: Wenjia Zhang, Jan Karcher, Tony Lu, Paolo Abeni
  Cc: Karsten Graul, D . Wythe, Wen Gu, David S . Miller, Eric Dumazet,
	Jakub Kicinski, linux-s390, netdev, linux-kernel

Tuning of the effective buffer size through setsockopts was working for
SMC traffic only but not for TCP fall-back connections even before
commit 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and
make them tunable"). That change made it apparent that TCP fall-back
connections would use net.smc.[rw]mem as buffer size instead of
net.ipv4_tcp_[rw]mem.

Amend the code that copies attributes between the (TCP) clcsock and the
SMC socket and adjust buffer sizes appropriately:
- Copy over sk_userlocks so that both sockets agree on whether tuning
  via setsockopt is active.
- When falling back to TCP use sk_sndbuf or sk_rcvbuf as specified with
  setsockopt. Otherwise, use the sysctl value for TCP/IPv4.
- Likewise, use either values from setsockopt or from sysctl for SMC
  (duplicated) on successful SMC connect.

In smc_tcp_listen_work() drop the explicit copy of buffer sizes as that
is taken care of by the attribute copy.

Fixes: 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com>
---
 net/smc/af_smc.c | 73 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 22 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5b878e523abf..f5834af5fad5 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -436,13 +436,60 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
 	return rc;
 }
 
+/* copy only relevant settings and flags of SOL_SOCKET level from smc to
+ * clc socket (since smc is not called for these options from net/core)
+ */
+
+#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
+			     (1UL << SOCK_KEEPOPEN) | \
+			     (1UL << SOCK_LINGER) | \
+			     (1UL << SOCK_BROADCAST) | \
+			     (1UL << SOCK_TIMESTAMP) | \
+			     (1UL << SOCK_DBG) | \
+			     (1UL << SOCK_RCVTSTAMP) | \
+			     (1UL << SOCK_RCVTSTAMPNS) | \
+			     (1UL << SOCK_LOCALROUTE) | \
+			     (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
+			     (1UL << SOCK_RXQ_OVFL) | \
+			     (1UL << SOCK_WIFI_STATUS) | \
+			     (1UL << SOCK_NOFCS) | \
+			     (1UL << SOCK_FILTER_LOCKED) | \
+			     (1UL << SOCK_TSTAMP_NEW))
+
+/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
+static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
+				     unsigned long mask)
+{
+	struct net *nnet = sock_net(nsk);
+
+	nsk->sk_userlocks = osk->sk_userlocks;
+	if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
+		nsk->sk_sndbuf = osk->sk_sndbuf;
+	} else {
+		if (mask == SK_FLAGS_SMC_TO_CLC)
+			WRITE_ONCE(nsk->sk_sndbuf,
+				   READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
+		else
+			WRITE_ONCE(nsk->sk_sndbuf,
+				   2 * READ_ONCE(nnet->smc.sysctl_wmem));
+	}
+	if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
+		nsk->sk_rcvbuf = osk->sk_rcvbuf;
+	} else {
+		if (mask == SK_FLAGS_SMC_TO_CLC)
+			WRITE_ONCE(nsk->sk_rcvbuf,
+				   READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
+		else
+			WRITE_ONCE(nsk->sk_rcvbuf,
+				   2 * READ_ONCE(nnet->smc.sysctl_rmem));
+	}
+}
+
 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
 				   unsigned long mask)
 {
 	/* options we don't get control via setsockopt for */
 	nsk->sk_type = osk->sk_type;
-	nsk->sk_sndbuf = osk->sk_sndbuf;
-	nsk->sk_rcvbuf = osk->sk_rcvbuf;
 	nsk->sk_sndtimeo = osk->sk_sndtimeo;
 	nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
 	nsk->sk_mark = READ_ONCE(osk->sk_mark);
@@ -453,26 +500,10 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
 
 	nsk->sk_flags &= ~mask;
 	nsk->sk_flags |= osk->sk_flags & mask;
+
+	smc_adjust_sock_bufsizes(nsk, osk, mask);
 }
 
-#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
-			     (1UL << SOCK_KEEPOPEN) | \
-			     (1UL << SOCK_LINGER) | \
-			     (1UL << SOCK_BROADCAST) | \
-			     (1UL << SOCK_TIMESTAMP) | \
-			     (1UL << SOCK_DBG) | \
-			     (1UL << SOCK_RCVTSTAMP) | \
-			     (1UL << SOCK_RCVTSTAMPNS) | \
-			     (1UL << SOCK_LOCALROUTE) | \
-			     (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
-			     (1UL << SOCK_RXQ_OVFL) | \
-			     (1UL << SOCK_WIFI_STATUS) | \
-			     (1UL << SOCK_NOFCS) | \
-			     (1UL << SOCK_FILTER_LOCKED) | \
-			     (1UL << SOCK_TSTAMP_NEW))
-/* copy only relevant settings and flags of SOL_SOCKET level from smc to
- * clc socket (since smc is not called for these options from net/core)
- */
 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
 {
 	smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
@@ -2479,8 +2510,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
 		sock_hold(lsk); /* sock_put in smc_listen_work */
 		INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
 		smc_copy_sock_settings_to_smc(new_smc);
-		new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
-		new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
 		sock_hold(&new_smc->sk); /* sock_put in passive closing */
 		if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
 			sock_put(&new_smc->sk);
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH net v3 2/2] net/smc: Use correct buffer sizes when switching between TCP and SMC
@ 2024-05-30  9:20 Wen Gu
  0 siblings, 0 replies; 2+ messages in thread
From: Wen Gu @ 2024-05-30  9:20 UTC (permalink / raw)
  To: Gerd Bayer, Wenjia Zhang, Jan Karcher
  Cc: linux-s390@vger.kernel.org, netdev@vger.kernel.org, LKML

[-- Attachment #1: Type: text/plain, Size: 5283 bytes --]

> Tuning of the effective buffer size through setsockopts was working for
> SMC traffic only but not for TCP fall-back connections even before
> commit 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and
> make them tunable"). That change made it apparent that TCP fall-back
> connections would use net.smc.[rw]mem as buffer size instead of
> net.ipv4_tcp_[rw]mem.
> 
> Amend the code that copies attributes between the (TCP) clcsock and the
> SMC socket and adjust buffer sizes appropriately:
> - Copy over sk_userlocks so that both sockets agree on whether tuning
>   via setsockopt is active.
> - When falling back to TCP use sk_sndbuf or sk_rcvbuf as specified with
>   setsockopt. Otherwise, use the sysctl value for TCP/IPv4.
> - Likewise, use either values from setsockopt or from sysctl for SMC
>   (duplicated) on successful SMC connect.
> 
> In smc_tcp_listen_work() drop the explicit copy of buffer sizes as that
> is taken care of by the attribute copy.

[...]
> +/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
> +static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
> +				     unsigned long mask)
> +{
> +	struct net *nnet = sock_net(nsk);
> +
> +	nsk->sk_userlocks = osk->sk_userlocks;
> +	if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
> +		nsk->sk_sndbuf = osk->sk_sndbuf;
> +	} else {
> +		if (mask == SK_FLAGS_SMC_TO_CLC)
> +			WRITE_ONCE(nsk->sk_sndbuf,
> +				   READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));

Hi Gerd,

I noticed that during TCP connection establishment, tcp_sndbuf_expand()
will tune sk->sk_sndbuf, that causes clcsock's sk_sndbuf to no longer
be sysctl_tcp_wmem[1]. But here we set it back to sysctl_tcp_wmem[1].

So I did some tests to see if the values of sk_sndbuf and sk_rcvbuf are
as expected in SMC and fallback cases (see the attached server.c and
client.c for the reproducer and here are the sysctl values in my environment)

net.ipv4.tcp_wmem = 4096        4096    16777216
net.ipv4.tcp_rmem = 4096        4096    16777216
net.smc.wmem = 65536
net.smc.rmem = 65536


1. No additional sk_{snd|rcv}buf settings

1.1 TCP

     ./server
     ./client -i <serv_ip>

     results:
     - server: sndbuf_size 87040, rcvbuf_size 4096
     - client: sndbuf_size 87040, rcvbuf_size 4096

1.2 SMC

     smc_run ./server
     smc_run ./client -i <serv_ip>

     results:
     - server: sndbuf_size 131072, rcvbuf_size 131072
     - client: sndbuf_size 131072, rcvbuf_size 131072

1.3 SMC, but server fallback

     smc_run ./server
     ./client -i <serv_ip>

     results:
     - server: sndbuf_size 87040, rcvbuf_size 4096
     - client: sndbuf_size 87040, rcvbuf_size 4096

1.4 SMC, but client fallback

     ./server
     smc_run ./client -i <serv_ip>

     results:
     - server: sndbuf_size 87040, rcvbuf_size 4096
     - client: sndbuf_size 4096, rcvbuf_size 4096    <--- I think clcsock's sk_sndbuf should
                                                          be the same as 1.1 after fallback?


2. Set server listen sock's and client sock's sk_{snd|rcv}buf
    as 16KB by setsockopt() before connection establishment.

2.1 TCP

     ./server -s 16384
     ./client -i <serv_ip> -s 16384

     results:
     - server: sndbuf_size 32768, rcvbuf_size 32768
     - client: sndbuf_size 32768, rcvbuf_size 32768

2.2 SMC

     smc_run ./server -s 16384
     smc_run ./client -i <serv_ip> -s 16384

     results:
     - server: sndbuf_size 32768, rcvbuf_size 32768
     - client: sndbuf_size 32768, rcvbuf_size 32768

2.3 SMC, but server fallback

     smc_run ./server -s 16384
     ./client -i <serv_ip> -s 16384

     results:
     - server: sndbuf_size 32768, rcvbuf_size 32768
     - client: sndbuf_size 32768, rcvbuf_size 32768

2.4 SMC, but client fallback

     ./server -s 16384
     smc_run ./client -i <serv_ip> -s 16384

     results:
     - server: sndbuf_size 32768, rcvbuf_size 32768
     - client: sndbuf_size 32768, rcvbuf_size 32768


In the above 8 sets of tests, 1.4 does not seem to meet expectations.
It is because we reset clcsock's sk_sndbuf to sysctl_tcp_wmem[1] in
smc_copy_sock_settings_to_clc(). I think it should be like 1.1 TCP values
after fallback. What do you think?

If so, we may need to avoid setting sysctl value to clcsock's sk_sndbuf
in smc_adjust_sock_bufsizes(). Furthermore, maybe all the setting-sysctl-value
can be omitted, since smc sock's and clcsock's sk_{snd|rcv}buf have been
set to sysctl value during their sock initialization (smc_sock_alloc() and
tcp_init_sock()).


And another question is why 1.3 is as expected? The direct cause is that
server does not call smc_copy_sock_settings_to_clc() when fallback, like the
client smc_connect_fallback() does. But I didn't figure out what is the
reason for the different behavior? Do you have any information? Thanks a lot!


Best regards,
Wen Gu

> +		else
> +			WRITE_ONCE(nsk->sk_sndbuf,
> +				   2 * READ_ONCE(nnet->smc.sysctl_wmem));
> +	}
> +	if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
> +		nsk->sk_rcvbuf = osk->sk_rcvbuf;
> +	} else {
> +		if (mask == SK_FLAGS_SMC_TO_CLC)
> +			WRITE_ONCE(nsk->sk_rcvbuf,
> +				   READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
> +		else
> +			WRITE_ONCE(nsk->sk_rcvbuf,
> +				   2 * READ_ONCE(nnet->smc.sysctl_rmem));
> +	}
> +}
> +

[...]

[-- Attachment #2: client.c --]
[-- Type: text/plain, Size: 3094 bytes --]

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdbool.h>
#include <errno.h>
#include <netinet/tcp.h>

#ifndef AF_SMC
#define AF_SMC          43
#endif
#define NET_PROTOCAL    AF_INET
#define SERV_IP         "11.213.5.33"
#define SERV_PORT       10012

char *ip;

int net_clnt(int buf_size, int port)
{
        int sndbuf_size, rcvbuf_size;
        struct sockaddr_in s_addr;
        char msg[128] = { 0 };
        int optlen = 4;
        int sock;
        int rc;

        if (!port)
                port = SERV_PORT;

        sock = socket(NET_PROTOCAL, SOCK_STREAM, 0);

        if (buf_size) {
                sndbuf_size = rcvbuf_size = buf_size;
                /* set sndbuf and rcvbuf */
                if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
                                &sndbuf_size, sizeof(int))) {
                        printf("set sndbuf failed\n");
                        return 0;
                }
                if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
                                &rcvbuf_size, sizeof(int))) {
                        printf("set rcvbuf failed\n");
                        return 0;
                }
        }

        memset(&s_addr, 0, sizeof(s_addr));
        s_addr.sin_family = NET_PROTOCAL;
        if (ip)
                s_addr.sin_addr.s_addr = inet_addr(ip);
        else
                s_addr.sin_addr.s_addr = inet_addr(SERV_IP);
        s_addr.sin_port = htons(port);
        if (connect(sock, (struct sockaddr*)&s_addr, sizeof(s_addr))){
                printf("connect fail\n");
                return 0;
        }

        sndbuf_size = 0; rcvbuf_size = 0;
        getsockopt(sock, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, &optlen);
        getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, &optlen);
        printf("client: sndbuf_size %d, rcvbuf_size %d\n", sndbuf_size, rcvbuf_size);

        recv(sock, msg, sizeof(msg), 0);
        printf("get msg: %s\n", msg);
        send(sock, "Response", sizeof("Response"), MSG_NOSIGNAL);

        close(sock);
}

int main(int argc, char **argv){
        bool wrong_param = false;
        int buf_size = 0, port = 0;
        int c;
        while(!wrong_param &&
              (-1 != (c = getopt(argc, argv, "p:s:i:")))) {
                switch (c) {
                        case 's':
                                buf_size = atoi(optarg);
                                break;
                        case 'i':
                                ip = strdup(optarg);
                                break;
                        case 'p':
                                port = atoi(optarg);
                                break;
                        case '?':
                                printf("usage: ./client -s <bufsize> -i <ip> -p <port>\n");
                                wrong_param = true;
                                break;
                }
        }
        if (!wrong_param)
                net_clnt(buf_size, port);
        return 0;
}

[-- Attachment #3: server.c --]
[-- Type: text/plain, Size: 3551 bytes --]

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <errno.h>
#include <stdbool.h>
#include <netinet/tcp.h>

#ifndef AF_SMC
#define AF_SMC          43
#endif
#define NET_PROTOCAL    AF_INET
#define SERV_IP         "0.0.0.0"
#define SERV_PORT       10012

int net_serv(int buf_size, int port)
{
        int sndbuf_size, rcvbuf_size;
        struct sockaddr_in s_addr;
        struct sockaddr_in c_addr;
        char msg[128] = "Request";
        int l_sock, s_sock;
        int optlen = 4;

        if (!port)
                port = SERV_PORT;

        l_sock = socket(NET_PROTOCAL, SOCK_STREAM, 0);

        if (buf_size) {
                sndbuf_size = rcvbuf_size = buf_size;
                /* set sndbuf and rcvbuf */
                if (setsockopt(l_sock, SOL_SOCKET, SO_SNDBUF,
                                &sndbuf_size, sizeof(int))) {
                        printf("set sndbuf failed\n");
                        return 0;
                }
                if (setsockopt(l_sock, SOL_SOCKET, SO_RCVBUF,
                                &rcvbuf_size, sizeof(int))) {
                        printf("set rcvbuf failed\n", rcvbuf_size);
                        return 0;
                }
        }

        memset(&s_addr, 0, sizeof(struct sockaddr_in));
        s_addr.sin_family = NET_PROTOCAL;
        s_addr.sin_addr.s_addr = inet_addr(SERV_IP);
        s_addr.sin_port = htons(port);
        if (bind(l_sock, (struct sockaddr*)&s_addr, sizeof(s_addr))) {
                printf("bind listen socket error %d\n", errno);
                return 0;
        }
        if (listen(l_sock, 20)) {
                printf("listen error\n");
                return 0;
        }

        socklen_t c_addr_len = sizeof(c_addr);
        s_sock = accept(l_sock, (struct sockaddr*)&c_addr,
                            &c_addr_len);
        if (s_sock < 0) {
                printf("accept fail\n");
                return 0;
        } else {
                char ip[16] = { 0 };
                inet_ntop(NET_PROTOCAL, &(c_addr.sin_addr), ip, INET_ADDRSTRLEN);
                printf("accept connection: ip %s port %d\n",
                        ip, c_addr.sin_port);
        }
        getsockopt(s_sock, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, &optlen);
        getsockopt(s_sock, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, &optlen);
        printf("server: sndbuf_size %d, rcvbuf_size %d\n", sndbuf_size, rcvbuf_size);

        send(s_sock, "Request", sizeof("Request"), MSG_NOSIGNAL);
        recv(s_sock, msg, sizeof(msg), 0);
        printf("get msg: %s\n", msg);

        close(s_sock);
        close(l_sock);
        return 0;
}

int main(int argc, char **argv)
{
        bool wrong_param = false;
        int buf_size = 0, port = 0;
        int c;
        while(!wrong_param &&
              (-1 != (c = getopt(argc, argv, "p:s:")))) {
                switch (c) {
                        case 's':
                                buf_size = atoi(optarg);
                                break;
                        case 'p':
                                port = atoi(optarg);
                                break;
                        case '?':
                                printf("usage: ./server -s <bufsize> -p <port>\n");
                                wrong_param = true;
                                break;
                }
        }
        if (!wrong_param)
                net_serv(buf_size, port);
        return 0;
}


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-05-30  9:20 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-05-30  9:20 [PATCH net v3 2/2] net/smc: Use correct buffer sizes when switching between TCP and SMC Wen Gu
  -- strict thread matches above, loose matches on Subject: below --
2023-08-04 17:06 [PATCH net v3 0/2] net/smc: Fix effective buffer size Gerd Bayer
2023-08-04 17:06 ` [PATCH net v3 2/2] net/smc: Use correct buffer sizes when switching between TCP and SMC Gerd Bayer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox