* [PATCH net-next v2 10/11] selftests: udp gso with corking
From: Willem de Bruijn @ 2018-04-26 17:42 UTC (permalink / raw)
To: netdev; +Cc: davem, alexander.duyck, Willem de Bruijn
In-Reply-To: <20180426174225.246388-1-willemdebruijn.kernel@gmail.com>
From: Willem de Bruijn <willemb@google.com>
Corked sockets take a different path to construct a udp datagram than
the lockless fast path. Test this alternate path.
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
tools/testing/selftests/net/udpgso.c | 42 ++++++++++++++++++++-------
tools/testing/selftests/net/udpgso.sh | 6 ++++
2 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index a47dca025346..6b1998b9f7df 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -49,6 +49,7 @@ static bool cfg_do_ipv4;
static bool cfg_do_ipv6;
static bool cfg_do_connected;
static bool cfg_do_connectionless;
+static bool cfg_do_msgmore;
static bool cfg_do_setsockopt;
static int cfg_specific_test_id = -1;
@@ -369,6 +370,23 @@ static void set_route_mtu(int mtu, bool is_ipv4)
fprintf(stderr, "route mtu (test): %u\n", mtu);
}
+static bool __send_one(int fd, struct msghdr *msg, int flags)
+{
+ int ret;
+
+ ret = sendmsg(fd, msg, flags);
+ if (ret == -1 && (errno == EMSGSIZE || errno == ENOMEM))
+ return false;
+ if (ret == -1)
+ error(1, errno, "sendmsg");
+ if (ret != msg->msg_iov->iov_len)
+ error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len);
+ if (msg->msg_flags)
+ error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
+
+ return true;
+}
+
static bool send_one(int fd, int len, int gso_len,
struct sockaddr *addr, socklen_t alen)
{
@@ -376,7 +394,6 @@ static bool send_one(int fd, int len, int gso_len,
struct msghdr msg = {0};
struct iovec iov = {0};
struct cmsghdr *cm;
- int ret;
iov.iov_base = buf;
iov.iov_len = len;
@@ -398,15 +415,17 @@ static bool send_one(int fd, int len, int gso_len,
*((uint16_t *) CMSG_DATA(cm)) = gso_len;
}
- ret = sendmsg(fd, &msg, 0);
- if (ret == -1 && (errno == EMSGSIZE || errno == ENOMEM))
- return false;
- if (ret == -1)
- error(1, errno, "sendmsg");
- if (ret != len)
- error(1, 0, "sendto: %d != %u", ret, len);
+ /* If MSG_MORE, send 1 byte followed by remainder */
+ if (cfg_do_msgmore && len > 1) {
+ iov.iov_len = 1;
+ if (!__send_one(fd, &msg, MSG_MORE))
+ error(1, 0, "send 1B failed");
- return true;
+ iov.iov_base++;
+ iov.iov_len = len - 1;
+ }
+
+ return __send_one(fd, &msg, 0);
}
static int recv_one(int fd, int flags)
@@ -558,7 +577,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "46cCst:")) != -1) {
+ while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
switch (c) {
case '4':
cfg_do_ipv4 = true;
@@ -572,6 +591,9 @@ static void parse_opts(int argc, char **argv)
case 'C':
cfg_do_connectionless = true;
break;
+ case 'm':
+ cfg_do_msgmore = true;
+ break;
case 's':
cfg_do_setsockopt = true;
break;
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
index 7cdf0e7c1dde..fec24f584fe9 100755
--- a/tools/testing/selftests/net/udpgso.sh
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -21,3 +21,9 @@ echo "ipv4 connected"
# blocked on 2nd loopback address
# echo "ipv6 connected"
# ./in_netns.sh ./udpgso -6 -c
+
+echo "ipv4 msg_more"
+./in_netns.sh ./udpgso -4 -C -m
+
+echo "ipv6 msg_more"
+./in_netns.sh ./udpgso -6 -C -m
--
2.17.0.484.g0c8726318c-goog
^ permalink raw reply related
* [PATCH net-next v2 09/11] selftests: udp gso with connected sockets
From: Willem de Bruijn @ 2018-04-26 17:42 UTC (permalink / raw)
To: netdev; +Cc: davem, alexander.duyck, Willem de Bruijn
In-Reply-To: <20180426174225.246388-1-willemdebruijn.kernel@gmail.com>
From: Willem de Bruijn <willemb@google.com>
Connected sockets use path mtu instead of device mtu.
Test this path by inserting a route mtu that is lower than the device
mtu. Verify that the path mtu for the connection matches this lower
number, then run the same test as in the connectionless case.
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
tools/testing/selftests/net/udpgso.c | 117 +++++++++++++++++++++++++-
tools/testing/selftests/net/udpgso.sh | 7 ++
2 files changed, 122 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 68a230dfee73..a47dca025346 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -47,6 +47,7 @@
static bool cfg_do_ipv4;
static bool cfg_do_ipv6;
+static bool cfg_do_connected;
static bool cfg_do_connectionless;
static bool cfg_do_setsockopt;
static int cfg_specific_test_id = -1;
@@ -273,6 +274,101 @@ static void set_pmtu_discover(int fd, bool is_ipv4)
error(1, errno, "setsockopt path mtu");
}
+static unsigned int get_path_mtu(int fd, bool is_ipv4)
+{
+ socklen_t vallen;
+ unsigned int mtu;
+ int ret;
+
+ vallen = sizeof(mtu);
+ if (is_ipv4)
+ ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen);
+ else
+ ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen);
+
+ if (ret)
+ error(1, errno, "getsockopt mtu");
+
+
+ fprintf(stderr, "path mtu (read): %u\n", mtu);
+ return mtu;
+}
+
+/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
+static void set_route_mtu(int mtu, bool is_ipv4)
+{
+ struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ struct rtmsg *rt;
+ char data[NLMSG_ALIGN(sizeof(*nh)) +
+ NLMSG_ALIGN(sizeof(*rt)) +
+ NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
+ NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
+ NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
+ int fd, ret, alen, off = 0;
+
+ alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (fd == -1)
+ error(1, errno, "socket netlink");
+
+ memset(data, 0, sizeof(data));
+
+ nh = (void *)data;
+ nh->nlmsg_type = RTM_NEWROUTE;
+ nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
+ off += NLMSG_ALIGN(sizeof(*nh));
+
+ rt = (void *)(data + off);
+ rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
+ rt->rtm_table = RT_TABLE_MAIN;
+ rt->rtm_dst_len = alen << 3;
+ rt->rtm_protocol = RTPROT_BOOT;
+ rt->rtm_scope = RT_SCOPE_UNIVERSE;
+ rt->rtm_type = RTN_UNICAST;
+ off += NLMSG_ALIGN(sizeof(*rt));
+
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_DST;
+ rta->rta_len = RTA_LENGTH(alen);
+ if (is_ipv4)
+ memcpy(RTA_DATA(rta), &addr4, alen);
+ else
+ memcpy(RTA_DATA(rta), &addr6, alen);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_OIF;
+ rta->rta_len = RTA_LENGTH(sizeof(int));
+ *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* MTU is a subtype in a metrics type */
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_METRICS;
+ rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* now fill MTU subtype. Note that it fits within above rta_len */
+ rta = (void *)(((char *) rta) + RTA_LENGTH(0));
+ rta->rta_type = RTAX_MTU;
+ rta->rta_len = RTA_LENGTH(sizeof(int));
+ *((int *)(RTA_DATA(rta))) = mtu;
+
+ nh->nlmsg_len = off;
+
+ ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
+ if (ret != off)
+ error(1, errno, "send netlink: %uB != %uB\n", ret, off);
+
+ if (close(fd))
+ error(1, errno, "close netlink");
+
+ fprintf(stderr, "route mtu (test): %u\n", mtu);
+}
+
static bool send_one(int fd, int len, int gso_len,
struct sockaddr *addr, socklen_t alen)
{
@@ -391,7 +487,7 @@ static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen)
static void run_test(struct sockaddr *addr, socklen_t alen)
{
struct timeval tv = { .tv_usec = 100 * 1000 };
- int fdr, fdt;
+ int fdr, fdt, val;
fdr = socket(addr->sa_family, SOCK_DGRAM, 0);
if (fdr == -1)
@@ -416,6 +512,20 @@ static void run_test(struct sockaddr *addr, socklen_t alen)
run_all(fdt, fdr, addr, alen);
}
+ if (cfg_do_connected) {
+ set_device_mtu(fdt, CONST_MTU_TEST + 100);
+ set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
+
+ if (connect(fdt, addr, alen))
+ error(1, errno, "connect");
+
+ val = get_path_mtu(fdt, addr->sa_family == AF_INET);
+ if (val != CONST_MTU_TEST)
+ error(1, 0, "bad path mtu %u\n", val);
+
+ run_all(fdt, fdr, addr, 0 /* use connected addr */);
+ }
+
if (close(fdt))
error(1, errno, "close t");
if (close(fdr))
@@ -448,7 +558,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "46Cst:")) != -1) {
+ while ((c = getopt(argc, argv, "46cCst:")) != -1) {
switch (c) {
case '4':
cfg_do_ipv4 = true;
@@ -456,6 +566,9 @@ static void parse_opts(int argc, char **argv)
case '6':
cfg_do_ipv6 = true;
break;
+ case 'c':
+ cfg_do_connected = true;
+ break;
case 'C':
cfg_do_connectionless = true;
break;
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
index 7977b97e060c..7cdf0e7c1dde 100755
--- a/tools/testing/selftests/net/udpgso.sh
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -14,3 +14,10 @@ echo "ipv6 cmsg"
echo "ipv6 setsockopt"
./in_netns.sh ./udpgso -6 -C -s
+
+echo "ipv4 connected"
+./in_netns.sh ./udpgso -4 -c
+
+# blocked on 2nd loopback address
+# echo "ipv6 connected"
+# ./in_netns.sh ./udpgso -6 -c
--
2.17.0.484.g0c8726318c-goog
^ permalink raw reply related
* [PATCH net-next v2 11/11] selftests: udp gso benchmark
From: Willem de Bruijn @ 2018-04-26 17:42 UTC (permalink / raw)
To: netdev; +Cc: davem, alexander.duyck, Willem de Bruijn
In-Reply-To: <20180426174225.246388-1-willemdebruijn.kernel@gmail.com>
From: Willem de Bruijn <willemb@google.com>
Send udp data between a source and sink, optionally with udp gso.
The two processes are expected to be run on separate hosts.
A script is included that runs them together over loopback in a
single namespace for functionality testing.
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
tools/testing/selftests/net/.gitignore | 2 +
tools/testing/selftests/net/Makefile | 3 +-
tools/testing/selftests/net/udpgso_bench.sh | 74 +++
tools/testing/selftests/net/udpgso_bench_rx.c | 265 +++++++++++
tools/testing/selftests/net/udpgso_bench_tx.c | 420 ++++++++++++++++++
5 files changed, 763 insertions(+), 1 deletion(-)
create mode 100755 tools/testing/selftests/net/udpgso_bench.sh
create mode 100644 tools/testing/selftests/net/udpgso_bench_rx.c
create mode 100644 tools/testing/selftests/net/udpgso_bench_tx.c
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 67a26240407c..f0e6c35a93ae 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -9,3 +9,5 @@ reuseport_dualstack
reuseaddr_conflict
tcp_mmap
udpgso
+udpgso_bench_rx
+udpgso_bench_tx
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index db3303348315..df9102ec7b7a 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -6,12 +6,13 @@ CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
TEST_PROGS += fib_tests.sh fib-onlink-tests.sh in_netns.sh pmtu.sh udpgso.sh
+TEST_PROGS += udpgso_bench.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
TEST_GEN_FILES += tcp_mmap
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
-TEST_GEN_PROGS += udpgso
+TEST_GEN_PROGS += udpgso udpgso_bench_tx udpgso_bench_rx
include ../lib.mk
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
new file mode 100755
index 000000000000..792fa4d0285e
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso benchmarks
+
+wake_children() {
+ local -r jobs="$(jobs -p)"
+
+ if [[ "${jobs}" != "" ]]; then
+ kill -1 ${jobs} 2>/dev/null
+ fi
+}
+trap wake_children EXIT
+
+run_one() {
+ local -r args=$@
+
+ ./udpgso_bench_rx &
+ ./udpgso_bench_rx -t &
+
+ ./udpgso_bench_tx ${args}
+}
+
+run_in_netns() {
+ local -r args=$@
+
+ ./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+ local -r args=$@
+
+ echo "udp"
+ run_in_netns ${args}
+
+ echo "udp gso"
+ run_in_netns ${args} -S
+
+ echo "udp gso zerocopy"
+ run_in_netns ${args} -S -z
+}
+
+run_tcp() {
+ local -r args=$@
+
+ echo "tcp"
+ run_in_netns ${args} -t
+
+ echo "tcp zerocopy"
+ run_in_netns ${args} -t -z
+}
+
+run_all() {
+ local -r core_args="-l 4"
+ local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
+ local -r ipv6_args="${core_args} -6 -D ::1"
+
+ echo "ipv4"
+ run_tcp "${ipv4_args}"
+ run_udp "${ipv4_args}"
+
+ echo "ipv6"
+ run_tcp "${ipv4_args}"
+ run_udp "${ipv6_args}"
+}
+
+if [[ $# -eq 0 ]]; then
+ run_all
+elif [[ $1 == "__subprocess" ]]; then
+ shift
+ run_one $@
+else
+ run_in_netns $@
+fi
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
new file mode 100644
index 000000000000..727cf67a3f75
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static int cfg_port = 8000;
+static bool cfg_tcp;
+static bool cfg_verify;
+
+static bool interrupted;
+static unsigned long packets, bytes;
+
+static void sigint_handler(int signum)
+{
+ if (signum == SIGINT)
+ interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_poll(int fd)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.events = POLLIN;
+ pfd.revents = 0;
+ pfd.fd = fd;
+
+ do {
+ ret = poll(&pfd, 1, 10);
+ if (ret == -1)
+ error(1, errno, "poll");
+ if (ret == 0)
+ continue;
+ if (pfd.revents != POLLIN)
+ error(1, errno, "poll: 0x%x expected 0x%x\n",
+ pfd.revents, POLLIN);
+ } while (!ret && !interrupted);
+}
+
+static int do_socket(bool do_tcp)
+{
+ struct sockaddr_in6 addr = {0};
+ int fd, val;
+
+ fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket");
+
+ val = 1 << 21;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)))
+ error(1, errno, "setsockopt rcvbuf");
+ val = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)))
+ error(1, errno, "setsockopt reuseport");
+
+ addr.sin6_family = PF_INET6;
+ addr.sin6_port = htons(cfg_port);
+ addr.sin6_addr = in6addr_any;
+ if (bind(fd, (void *) &addr, sizeof(addr)))
+ error(1, errno, "bind");
+
+ if (do_tcp) {
+ int accept_fd = fd;
+
+ if (listen(accept_fd, 1))
+ error(1, errno, "listen");
+
+ do_poll(accept_fd);
+
+ fd = accept(accept_fd, NULL, NULL);
+ if (fd == -1)
+ error(1, errno, "accept");
+ if (close(accept_fd))
+ error(1, errno, "close accept fd");
+ }
+
+ return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+ int ret;
+
+ while (true) {
+ /* MSG_TRUNC flushes up to len bytes */
+ ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ return;
+ if (ret == -1)
+ error(1, errno, "flush");
+ if (ret == 0) {
+ /* client detached */
+ exit(0);
+ }
+
+ packets++;
+ bytes += ret;
+ }
+
+}
+
+static char sanitized_char(char val)
+{
+ return (val >= 'a' && val <= 'z') ? val : '.';
+}
+
+static void do_verify_udp(const char *data, int len)
+{
+ char cur = data[0];
+ int i;
+
+ /* verify contents */
+ if (cur < 'a' || cur > 'z')
+ error(1, 0, "data initial byte out of range");
+
+ for (i = 1; i < len; i++) {
+ if (cur == 'z')
+ cur = 'a';
+ else
+ cur++;
+
+ if (data[i] != cur)
+ error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n",
+ i, len,
+ sanitized_char(data[i]), data[i],
+ sanitized_char(cur), cur);
+ }
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_udp(int fd)
+{
+ static char rbuf[ETH_DATA_LEN];
+ int ret, len, budget = 256;
+
+ len = cfg_verify ? sizeof(rbuf) : 0;
+ while (budget--) {
+ /* MSG_TRUNC will make return value full datagram length */
+ ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ return;
+ if (ret == -1)
+ error(1, errno, "recv");
+ if (len) {
+ if (ret == 0)
+ error(1, errno, "recv: 0 byte datagram\n");
+
+ do_verify_udp(rbuf, ret);
+ }
+
+ packets++;
+ bytes += ret;
+ }
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s [-tv] [-p port]", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "ptv")) != -1) {
+ switch (c) {
+ case 'p':
+ cfg_port = htons(strtoul(optarg, NULL, 0));
+ break;
+ case 't':
+ cfg_tcp = true;
+ break;
+ case 'v':
+ cfg_verify = true;
+ break;
+ }
+ }
+
+ if (optind != argc)
+ usage(argv[0]);
+
+ if (cfg_tcp && cfg_verify)
+ error(1, 0, "TODO: implement verify mode for tcp");
+}
+
+static void do_recv(void)
+{
+ unsigned long tnow, treport;
+ int fd;
+
+ fd = do_socket(cfg_tcp);
+
+ treport = gettimeofday_ms() + 1000;
+ do {
+ do_poll(fd);
+
+ if (cfg_tcp)
+ do_flush_tcp(fd);
+ else
+ do_flush_udp(fd);
+
+ tnow = gettimeofday_ms();
+ if (tnow > treport) {
+ if (packets)
+ fprintf(stderr,
+ "%s rx: %6lu MB/s %8lu calls/s\n",
+ cfg_tcp ? "tcp" : "udp",
+ bytes >> 20, packets);
+ bytes = packets = 0;
+ treport = tnow + 1000;
+ }
+
+ } while (!interrupted);
+
+ if (close(fd))
+ error(1, errno, "close");
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ signal(SIGINT, sigint_handler);
+
+ do_recv();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
new file mode 100644
index 000000000000..e821564053cf
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU 0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT 103
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY 60
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY 0x4000000
+#endif
+
+#define NUM_PKT 100
+
+static bool cfg_cache_trash;
+static int cfg_cpu = -1;
+static int cfg_connected = true;
+static int cfg_family = PF_UNSPEC;
+static uint16_t cfg_mss;
+static int cfg_payload_len = (1472 * 42);
+static int cfg_port = 8000;
+static int cfg_runtime_ms = -1;
+static bool cfg_segment;
+static bool cfg_sendmmsg;
+static bool cfg_tcp;
+static bool cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static bool interrupted;
+static char buf[NUM_PKT][ETH_MAX_MTU];
+
+static void sigint_handler(int signum)
+{
+ if (signum == SIGINT)
+ interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int set_cpu(int cpu)
+{
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+ if (sched_setaffinity(0, sizeof(mask), &mask))
+ error(1, 0, "setaffinity %d", cpu);
+
+ return 0;
+}
+
+static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (domain) {
+ case PF_INET:
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", str_addr);
+ break;
+ case PF_INET6:
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", str_addr);
+ break;
+ default:
+ error(1, 0, "illegal domain");
+ }
+}
+
+static void flush_zerocopy(int fd)
+{
+ struct msghdr msg = {0}; /* flush */
+ int ret;
+
+ while (1) {
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret == -1 && errno == EAGAIN)
+ break;
+ if (ret == -1)
+ error(1, errno, "errqueue");
+ if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC))
+ error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
+ msg.msg_flags = 0;
+ }
+}
+
+static int send_tcp(int fd, char *data)
+{
+ int ret, done = 0, count = 0;
+
+ while (done < cfg_payload_len) {
+ ret = send(fd, data + done, cfg_payload_len - done,
+ cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "write");
+
+ done += ret;
+ count++;
+ }
+
+ return count;
+}
+
+static int send_udp(int fd, char *data)
+{
+ int ret, total_len, len, count = 0;
+
+ total_len = cfg_payload_len;
+
+ while (total_len) {
+ len = total_len < cfg_mss ? total_len : cfg_mss;
+
+ ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
+ cfg_connected ? NULL : (void *)&cfg_dst_addr,
+ cfg_connected ? 0 : cfg_alen);
+ if (ret == -1)
+ error(1, errno, "write");
+ if (ret != len)
+ error(1, errno, "write: %uB != %uB\n", ret, len);
+
+ total_len -= len;
+ count++;
+ }
+
+ return count;
+}
+
+static int send_udp_sendmmsg(int fd, char *data)
+{
+ const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN;
+ struct mmsghdr mmsgs[max_nr_msg];
+ struct iovec iov[max_nr_msg];
+ unsigned int off = 0, left;
+ int i = 0, ret;
+
+ memset(mmsgs, 0, sizeof(mmsgs));
+
+ left = cfg_payload_len;
+ while (left) {
+ if (i == max_nr_msg)
+ error(1, 0, "sendmmsg: exceeds max_nr_msg");
+
+ iov[i].iov_base = data + off;
+ iov[i].iov_len = cfg_mss < left ? cfg_mss : left;
+
+ mmsgs[i].msg_hdr.msg_iov = iov + i;
+ mmsgs[i].msg_hdr.msg_iovlen = 1;
+
+ off += iov[i].iov_len;
+ left -= iov[i].iov_len;
+ i++;
+ }
+
+ ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "sendmmsg");
+
+ return ret;
+}
+
+static void send_udp_segment_cmsg(struct cmsghdr *cm)
+{
+ uint16_t *valp;
+
+ cm->cmsg_level = SOL_UDP;
+ cm->cmsg_type = UDP_SEGMENT;
+ cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss));
+ valp = (void *)CMSG_DATA(cm);
+ *valp = cfg_mss;
+}
+
+static int send_udp_segment(int fd, char *data)
+{
+ char control[CMSG_SPACE(sizeof(cfg_mss))] = {0};
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ int ret;
+
+ iov.iov_base = data;
+ iov.iov_len = cfg_payload_len;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+ send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg));
+
+ msg.msg_name = (void *)&cfg_dst_addr;
+ msg.msg_namelen = cfg_alen;
+
+ ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "sendmsg");
+ if (ret != iov.iov_len)
+ error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len);
+
+ return 1;
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]",
+ filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int max_len, hdrlen;
+ int c;
+
+ while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) {
+ switch (c) {
+ case '4':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ break;
+ case 'c':
+ cfg_cache_trash = true;
+ break;
+ case 'C':
+ cfg_cpu = strtol(optarg, NULL, 0);
+ break;
+ case 'D':
+ setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+ break;
+ case 'l':
+ cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
+ break;
+ case 'm':
+ cfg_sendmmsg = true;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 's':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'S':
+ cfg_segment = true;
+ break;
+ case 't':
+ cfg_tcp = true;
+ break;
+ case 'u':
+ cfg_connected = false;
+ break;
+ case 'z':
+ cfg_zerocopy = true;
+ break;
+ }
+ }
+
+ if (optind != argc)
+ usage(argv[0]);
+
+ if (cfg_family == PF_UNSPEC)
+ error(1, 0, "must pass one of -4 or -6");
+ if (cfg_tcp && !cfg_connected)
+ error(1, 0, "connectionless tcp makes no sense");
+ if (cfg_segment && cfg_sendmmsg)
+ error(1, 0, "cannot combine segment offload and sendmmsg");
+
+ if (cfg_family == PF_INET)
+ hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
+ else
+ hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr);
+
+ cfg_mss = ETH_DATA_LEN - hdrlen;
+ max_len = ETH_MAX_MTU - hdrlen;
+
+ if (cfg_payload_len > max_len)
+ error(1, 0, "payload length %u exceeds max %u",
+ cfg_payload_len, max_len);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+ int level, name, val;
+
+ if (is_ipv4) {
+ level = SOL_IP;
+ name = IP_MTU_DISCOVER;
+ val = IP_PMTUDISC_DO;
+ } else {
+ level = SOL_IPV6;
+ name = IPV6_MTU_DISCOVER;
+ val = IPV6_PMTUDISC_DO;
+ }
+
+ if (setsockopt(fd, level, name, &val, sizeof(val)))
+ error(1, errno, "setsockopt path mtu");
+}
+
+int main(int argc, char **argv)
+{
+ unsigned long num_msgs, num_sends;
+ unsigned long tnow, treport, tstop;
+ int fd, i, val;
+
+ parse_opts(argc, argv);
+
+ if (cfg_cpu > 0)
+ set_cpu(cfg_cpu);
+
+ for (i = 0; i < sizeof(buf[0]); i++)
+ buf[0][i] = 'a' + (i % 26);
+ for (i = 1; i < NUM_PKT; i++)
+ memcpy(buf[i], buf[0], sizeof(buf[0]));
+
+ signal(SIGINT, sigint_handler);
+
+ fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket");
+
+ if (cfg_zerocopy) {
+ val = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)))
+ error(1, errno, "setsockopt zerocopy");
+ }
+
+ if (cfg_connected &&
+ connect(fd, (void *)&cfg_dst_addr, cfg_alen))
+ error(1, errno, "connect");
+
+ if (cfg_segment)
+ set_pmtu_discover(fd, cfg_family == PF_INET);
+
+ num_msgs = num_sends = 0;
+ tnow = gettimeofday_ms();
+ tstop = tnow + cfg_runtime_ms;
+ treport = tnow + 1000;
+
+ i = 0;
+ do {
+ if (cfg_tcp)
+ num_sends += send_tcp(fd, buf[i]);
+ else if (cfg_segment)
+ num_sends += send_udp_segment(fd, buf[i]);
+ else if (cfg_sendmmsg)
+ num_sends += send_udp_sendmmsg(fd, buf[i]);
+ else
+ num_sends += send_udp(fd, buf[i]);
+ num_msgs++;
+
+ if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
+ flush_zerocopy(fd);
+
+ tnow = gettimeofday_ms();
+ if (tnow > treport) {
+ fprintf(stderr,
+ "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
+ cfg_tcp ? "tcp" : "udp",
+ (num_msgs * cfg_payload_len) >> 20,
+ num_sends, num_msgs);
+ num_msgs = num_sends = 0;
+ treport = tnow + 1000;
+ }
+
+ /* cold cache when writing buffer */
+ if (cfg_cache_trash)
+ i = ++i < NUM_PKT ? i : 0;
+
+ } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ return 0;
+}
--
2.17.0.484.g0c8726318c-goog
^ permalink raw reply related
* Re: [PATCH net-next 02/10] udp: add gso
From: Willem de Bruijn @ 2018-04-26 17:48 UTC (permalink / raw)
To: Alexander Duyck
Cc: Netdev, David Miller, Dimitris Michailidis, Willem de Bruijn
In-Reply-To: <CAKgT0UfP7ztrtV7smQviAXZyaiPAwCSARuKnbKnc3SP_R1ogsQ@mail.gmail.com>
Sent a v2 with all but the below suggestion incorporated.
>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
>> index ff49e352deea..c647cfe114e0 100644
>> --- a/net/core/skbuff.c
>> +++ b/net/core/skbuff.c
>> @@ -4940,6 +4940,8 @@ static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
>> thlen = tcp_hdrlen(skb);
>> } else if (unlikely(skb_is_gso_sctp(skb))) {
>> thlen = sizeof(struct sctphdr);
>> + } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
>> + thlen = sizeof(struct udphdr);
>> }
>> /* UFO sets gso_size to the size of the fragmentation
>> * payload, i.e. the size of the L4 (UDP) header is already
>
> It might make more sense to look at converting this over to a switch
> statement based off of shinfo(skb)->gso_type & GSO_TRANSPORT_MASK,
> where the transport mask consists of the 4 bits that are supported.
I decided to skip this. The types SKB_GSO_{TCPV4, TCPV6, SCTP, UDP_L4}
are far apart in the enum namespace and the tests have to use & instead
of direct comparison. I did not see an obvious way to have the compiler
convert this into a jump table.
Doing so is also a bit out of scope of the feature, so even if feasible
without too much gymnastics I suggest doing so in a separate patch.
^ permalink raw reply
* Re: [PATCH net-next 02/10] udp: add gso
From: Willem de Bruijn @ 2018-04-26 17:49 UTC (permalink / raw)
To: Alexander Duyck
Cc: Netdev, David Miller, Dimitris Michailidis, Willem de Bruijn
In-Reply-To: <CAKgT0UccfvfwLemZ5XcWoZPupQi6U2rfjbGjfjFfvJptMcNYeQ@mail.gmail.com>
>>> That way for things like GSO_PARTIAL we can update after segmentation
>>> since there are only going to be 2 segments most likely instead of
>>> multiple MSS sized segments.
>>
>> I don't quite follow. Which two segments?
>
> When we do GSO partial we end up with 2 segments. One really big one
> that is a multiple of MSS and the remainder assuming the frame is odd
> sized. The idea is we can just replicate all of the headers from the
> outer IP header to the inner transport header in hardware so we do all
> the updates based on that assumption and then we do the standard
> segmentation update on the tail skb.
Thanks for the explanation. That is a very cool feature. I clearly hadn't
read the GSO_PARTIAL code closely enough yet.
^ permalink raw reply
* [PATCH net-next 1/1] inet_diag: fetch cong algo info when socket is destroyed
From: Jamal Hadi Salim @ 2018-04-26 17:58 UTC (permalink / raw)
To: davem
Cc: kraig, netdev, eric.dumazet, kernel, Jamal Hadi Salim,
Jamal Hadi Salim
From: Jamal Hadi Salim <hadi@mojatatu.com>
When a user dumps an existing established tcp socket state
via inet diag, it is possible to retrieve the congestion control
details.
When an the sock is destroyed, the generated event has all the
details available in the dump sans congestion control info.
This patch fixes it.
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
net/core/sock_diag.c | 3 +++
net/ipv4/inet_diag.c | 48 ++++++++++++++++++++++++++++++++++++++----------
2 files changed, 41 insertions(+), 10 deletions(-)
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index c37b5be7c5e4..0bf64dd70aee 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -7,6 +7,7 @@
#include <net/net_namespace.h>
#include <linux/module.h>
#include <net/sock.h>
+#include <net/tcp.h>
#include <linux/kernel.h>
#include <linux/tcp.h>
#include <linux/workqueue.h>
@@ -112,6 +113,8 @@ static size_t sock_diag_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct inet_diag_msg)
+ nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */
+ + nla_total_size(TCP_CA_NAME_MAX) /* INET_DIAG_CONG */
+ + nla_total_size(sizeof(union tcp_cc_info))
+ nla_total_size_64bit(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
}
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 4e5bc4b2f14e..9722f31cc9c5 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -159,6 +159,35 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill);
+static int inet_csk_cong_fill(struct sock *sk, struct sk_buff *skb, int ext)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct tcp_congestion_ops *ca_ops;
+ union tcp_cc_info info;
+ int attr, err = 0;
+ size_t sz = 0;
+
+ rcu_read_lock();
+ ca_ops = READ_ONCE(icsk->icsk_ca_ops);
+ if (ca_ops) {
+ if (ca_ops->get_info)
+ sz = ca_ops->get_info(sk, ext, &attr, &info);
+ if (ext & (1 << (INET_DIAG_CONG - 1))) {
+ err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name);
+ if (err < 0) {
+ rcu_read_unlock();
+ return err;
+ }
+ }
+ }
+ rcu_read_unlock();
+
+ if (sz)
+ err = nla_put(skb, attr, sz, &info);
+
+ return err;
+}
+
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct sk_buff *skb, const struct inet_diag_req_v2 *req,
struct user_namespace *user_ns,
@@ -274,16 +303,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
goto errout;
if (sk->sk_state < TCP_TIME_WAIT) {
- union tcp_cc_info info;
- size_t sz = 0;
- int attr;
-
- rcu_read_lock();
- ca_ops = READ_ONCE(icsk->icsk_ca_ops);
- if (ca_ops && ca_ops->get_info)
- sz = ca_ops->get_info(sk, ext, &attr, &info);
- rcu_read_unlock();
- if (sz && nla_put(skb, attr, sz, &info) < 0)
+ if (inet_csk_cong_fill(sk, skb, ext))
goto errout;
}
@@ -1215,6 +1235,14 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
if (attr)
info = nla_data(attr);
+#define EXT_MASK (1 << (INET_DIAG_VEGASINFO - 1) | 1 << (INET_DIAG_CONG - 1))
+ err = inet_csk_cong_fill(sk, skb, EXT_MASK);
+ if (err) {
+ inet_diag_unlock_handler(handler);
+ nlmsg_cancel(skb, nlh);
+ return err;
+ }
+
handler->idiag_get_info(sk, r, info);
inet_diag_unlock_handler(handler);
--
2.11.0
^ permalink raw reply related
* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Michael S. Tsirkin @ 2018-04-26 18:49 UTC (permalink / raw)
To: Mikulas Patocka
Cc: James Bottomley, Michal Hocko, David Rientjes, dm-devel,
eric.dumazet, netdev, jasowang, Randy Dunlap, linux-kernel,
Matthew Wilcox, linux-mm, edumazet, Andrew Morton, virtualization,
David Miller, Vlastimil Babka
In-Reply-To: <alpine.LRH.2.02.1804261202350.24656@file01.intranet.prod.int.rdu2.redhat.com>
On Thu, Apr 26, 2018 at 12:07:25PM -0400, Mikulas Patocka wrote:
> > IIUC debug kernels mainly exist so people who experience e.g. memory
> > corruption can try and debug the failure. In this case, CONFIG_DEBUG_SG
> > will *already* catch a failure early. Nothing special needs to be done.
>
> The patch helps people debug such memory coprruptions (such as using DMA
> API on the result of kvmalloc).
That's my point. I don't think your patch helps debug any memory
corruptions. With CONFIG_DEBUG_SG using DMA API already causes a
BUG_ON, that's before any memory can get corrupted.
--
MST
^ permalink raw reply
* [PULL] virtio: fixups
From: Michael S. Tsirkin @ 2018-04-26 18:50 UTC (permalink / raw)
To: Linus Torvalds; +Cc: kvm, mst, netdev, linux-kernel, stable, virtualization
The following changes since commit 6d08b06e67cd117f6992c46611dfb4ce267cd71e:
Linux 4.17-rc2 (2018-04-22 19:20:09 -0700)
are available in the Git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus
for you to fetch changes up to 5c60300d68da32ca77f7f978039dc72bfc78b06b:
virtio_console: reset on out of memory (2018-04-25 20:41:29 +0300)
----------------------------------------------------------------
virtio: fixups
Latest header update will break QEMU (if it's rebuilt with the new
header) - and it seems that the code there is so fragile that any change
in this header will break it. Add a better interface so users do not
need to change their code every time that header changes.
Fix virtio console for spec compliance.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
----------------------------------------------------------------
Michael S. Tsirkin (7):
virtio_balloon: add array of stat names
virtio_console: don't tie bufs to a vq
virtio: add ability to iterate over vqs
virtio_console: free buffers after reset
virtio_console: drop custom control queue cleanup
virtio_console: move removal code
virtio_console: reset on out of memory
drivers/char/virtio_console.c | 157 ++++++++++++++++--------------------
include/linux/virtio.h | 3 +
include/uapi/linux/virtio_balloon.h | 15 ++++
3 files changed, 89 insertions(+), 86 deletions(-)
^ permalink raw reply
* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Mikulas Patocka @ 2018-04-26 18:54 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: eric.dumazet, netdev, Randy Dunlap, linux-kernel, Matthew Wilcox,
Michal Hocko, James Bottomley, linux-mm, dm-devel,
Vlastimil Babka, David Rientjes, Andrew Morton, virtualization,
David Miller, edumazet
In-Reply-To: <20180426214011-mutt-send-email-mst@kernel.org>
On Thu, 26 Apr 2018, Michael S. Tsirkin wrote:
> On Thu, Apr 26, 2018 at 12:07:25PM -0400, Mikulas Patocka wrote:
> > > IIUC debug kernels mainly exist so people who experience e.g. memory
> > > corruption can try and debug the failure. In this case, CONFIG_DEBUG_SG
> > > will *already* catch a failure early. Nothing special needs to be done.
> >
> > The patch helps people debug such memory coprruptions (such as using DMA
> > API on the result of kvmalloc).
>
> That's my point. I don't think your patch helps debug any memory
> corruptions. With CONFIG_DEBUG_SG using DMA API already causes a
> BUG_ON, that's before any memory can get corrupted.
The patch turns a hard-to-reproduce bug into an easy-to-reproduce bug.
Obviously we don't want this in production kernels, but in the debug
kernels it should be done.
Mikulas
^ permalink raw reply
* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Mikulas Patocka @ 2018-04-26 18:58 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: James Bottomley, Michal Hocko, David Rientjes, dm-devel,
eric.dumazet, netdev, jasowang, Randy Dunlap, linux-kernel,
Matthew Wilcox, linux-mm, edumazet, Andrew Morton, virtualization,
David Miller, Vlastimil Babka
In-Reply-To: <20180426184845-mutt-send-email-mst@kernel.org>
On Thu, 26 Apr 2018, Michael S. Tsirkin wrote:
> How do you make sure QA tests a specific corner case? Add it to
> the test plan :)
BTW. how many "lines of code" of corporate bureaucracy would that take? :-)
> I don't speak for Red Hat, etc.
>
> --
> MST
Mikulas
^ permalink raw reply
* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: John Stoffel @ 2018-04-26 18:58 UTC (permalink / raw)
To: James Bottomley
Cc: Mikulas Patocka, Michal, eric.dumazet, mst, netdev, jasowang,
Randy Dunlap, linux-kernel, Matthew Wilcox, Hocko, linux-mm,
dm-devel, Vlastimil Babka, Andrew, David Rientjes, Morton,
virtualization, David Miller, edumazet
In-Reply-To: <1524697697.4100.23.camel@HansenPartnership.com>
>>>>> "James" == James Bottomley <James.Bottomley@HansenPartnership.com> writes:
James> On Wed, 2018-04-25 at 19:00 -0400, Mikulas Patocka wrote:
>>
>> On Wed, 25 Apr 2018, James Bottomley wrote:
>>
>> > > > Do we really need the new config option? This could just be
>> > > > manually tunable via fault injection IIUC.
>> > >
>> > > We do, because we want to enable it in RHEL and Fedora debugging
>> > > kernels, so that it will be tested by the users.
>> > >
>> > > The users won't use some extra magic kernel options or debugfs
>> files.
>> >
>> > If it can be enabled via a tunable, then the distro can turn it on
>> > without the user having to do anything. If you want to present the
>> > user with a different boot option, you can (just have the tunable
>> set
>> > on the command line), but being tunable driven means that you don't
>> > have to choose that option, you could automatically enable it under
>> a
>> > range of circumstances. I think most sane distributions would want
>> > that flexibility.
>> >
>> > Kconfig proliferation, conversely, is a bit of a nightmare from
>> both
>> > the user and the tester's point of view, so we're trying to avoid
>> it
>> > unless absolutely necessary.
>> >
>> > James
>>
>> BTW. even developers who compile their own kernel should have this
>> enabled by a CONFIG option - because if the developer sees the option
>> when browsing through menuconfig, he may enable it. If he doesn't see
>> the option, he won't even know that such an option exists.
James> I may be an atypical developer but I'd rather have a root canal
James> than browse through menuconfig options. The way to get people
James> to learn about new debugging options is to blog about it (or
James> write an lwn.net article) which google will find the next time
James> I ask it how I debug XXX. Google (probably as a service to
James> humanity) rarely turns up Kconfig options in response to a
James> query.
I agree with James here. Looking at the SLAB vs SLUB Kconfig entries
tells me *nothing* about why I should pick one or the other, as an
example.
John
^ permalink raw reply
* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Michael S. Tsirkin @ 2018-04-26 19:05 UTC (permalink / raw)
To: Mikulas Patocka
Cc: James Bottomley, Michal Hocko, David Rientjes, dm-devel,
eric.dumazet, netdev, jasowang, Randy Dunlap, linux-kernel,
Matthew Wilcox, linux-mm, edumazet, Andrew Morton, virtualization,
David Miller, Vlastimil Babka
In-Reply-To: <alpine.LRH.2.02.1804261454380.23716@file01.intranet.prod.int.rdu2.redhat.com>
On Thu, Apr 26, 2018 at 02:58:08PM -0400, Mikulas Patocka wrote:
>
>
> On Thu, 26 Apr 2018, Michael S. Tsirkin wrote:
>
> > How do you make sure QA tests a specific corner case? Add it to
> > the test plan :)
>
> BTW. how many "lines of code" of corporate bureaucracy would that take? :-)
It's pretty easy at least here at Red Hat.
> > I don't speak for Red Hat, etc.
> >
> > --
> > MST
>
> Mikulas
^ permalink raw reply
* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Michael S. Tsirkin @ 2018-04-26 19:14 UTC (permalink / raw)
To: Mikulas Patocka
Cc: James Bottomley, Michal Hocko, David Rientjes, dm-devel,
eric.dumazet, netdev, jasowang, Randy Dunlap, linux-kernel,
Matthew Wilcox, linux-mm, edumazet, Andrew Morton, virtualization,
David Miller, Vlastimil Babka
In-Reply-To: <alpine.LRH.2.02.1804261451120.23716@file01.intranet.prod.int.rdu2.redhat.com>
On Thu, Apr 26, 2018 at 02:54:26PM -0400, Mikulas Patocka wrote:
>
>
> On Thu, 26 Apr 2018, Michael S. Tsirkin wrote:
>
> > On Thu, Apr 26, 2018 at 12:07:25PM -0400, Mikulas Patocka wrote:
> > > > IIUC debug kernels mainly exist so people who experience e.g. memory
> > > > corruption can try and debug the failure. In this case, CONFIG_DEBUG_SG
> > > > will *already* catch a failure early. Nothing special needs to be done.
> > >
> > > The patch helps people debug such memory coprruptions (such as using DMA
> > > API on the result of kvmalloc).
> >
> > That's my point. I don't think your patch helps debug any memory
> > corruptions. With CONFIG_DEBUG_SG using DMA API already causes a
> > BUG_ON, that's before any memory can get corrupted.
>
> The patch turns a hard-to-reproduce bug into an easy-to-reproduce bug.
It's still not a memory corruption. It's a BUG_ON the source of which -
should it trigger - can be typically found using grep.
> Obviously we don't want this in production kernels, but in the debug
> kernels it should be done.
>
> Mikulas
I'm not so sure. debug kernels should make debugging easier,
definitely.
Unfortunately they are already slower so some races don't trigger.
If they also start crashing more because we are injecting
memory allocation errors, people are even less likely to
be able to use them.
Just add a comment near the BUG_ON within DMA API telling people how
they can inject this error some more if the bug does not
reproduce, and leave it at that.
--
MST
^ permalink raw reply
* Re: [PATCH net-next v2] Add Common Applications Kept Enhanced (cake) qdisc
From: kbuild test robot @ 2018-04-26 19:16 UTC (permalink / raw)
To: Toke Høiland-Jørgensen
Cc: kbuild-all, netdev, cake, Toke Høiland-Jørgensen,
Dave Taht
In-Reply-To: <20180424114407.5939-1-toke@toke.dk>
[-- Attachment #1: Type: text/plain, Size: 3367 bytes --]
Hi Toke,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on net-next/master]
url: https://github.com/0day-ci/linux/commits/Toke-H-iland-J-rgensen/Add-Common-Applications-Kept-Enhanced-cake-qdisc/20180426-064653
config: parisc-allmodconfig (attached as .config)
compiler: hppa-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=parisc
All warnings (new ones prefixed by >>):
vim +2589 net//sched/sch_cake.c
2525
2526 static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
2527 {
2528 struct cake_sched_data *q = qdisc_priv(sch);
2529 struct tc_cake_xstats *st;
2530 size_t size = (sizeof(*st) +
2531 sizeof(struct tc_cake_tin_stats) * q->tin_cnt);
2532 int i;
2533
2534 st = cake_zalloc(size);
2535
2536 if (!st)
2537 return -1;
2538
2539 st->version = 0x102; /* old userspace code discards versions > 0xFF */
2540 st->tin_stats_size = sizeof(struct tc_cake_tin_stats);
2541 st->tin_cnt = q->tin_cnt;
2542
2543 st->avg_trnoff = (q->avg_trnoff + 0x8000) >> 16;
2544 st->max_netlen = q->max_netlen;
2545 st->max_adjlen = q->max_adjlen;
2546 st->min_netlen = q->min_netlen;
2547 st->min_adjlen = q->min_adjlen;
2548
2549 for (i = 0; i < q->tin_cnt; i++) {
2550 struct cake_tin_data *b = &q->tins[q->tin_order[i]];
2551 struct tc_cake_tin_stats *tstat = &st->tin_stats[i];
2552
2553 tstat->threshold_rate = b->tin_rate_bps;
2554 tstat->target_us = cobalt_time_to_us(b->cparams.target);
2555 tstat->interval_us = cobalt_time_to_us(b->cparams.interval);
2556
2557 /* TODO FIXME: add missing aspects of these composite stats */
2558 tstat->sent.packets = b->packets;
2559 tstat->sent.bytes = b->bytes;
2560 tstat->dropped.packets = b->tin_dropped;
2561 tstat->ecn_marked.packets = b->tin_ecn_mark;
2562 tstat->backlog.bytes = b->tin_backlog;
2563 tstat->ack_drops.packets = b->ack_drops;
2564
2565 tstat->peak_delay_us = cobalt_time_to_us(b->peak_delay);
2566 tstat->avge_delay_us = cobalt_time_to_us(b->avge_delay);
2567 tstat->base_delay_us = cobalt_time_to_us(b->base_delay);
2568
2569 tstat->way_indirect_hits = b->way_hits;
2570 tstat->way_misses = b->way_misses;
2571 tstat->way_collisions = b->way_collisions;
2572
2573 tstat->sparse_flows = b->sparse_flow_count +
2574 b->decaying_flow_count;
2575 tstat->bulk_flows = b->bulk_flow_count;
2576 tstat->unresponse_flows = b->unresponsive_flow_count;
2577 tstat->spare = 0;
2578 tstat->max_skblen = b->max_skblen;
2579
2580 tstat->flow_quantum = b->flow_quantum;
2581 }
2582 st->capacity_estimate = q->avg_peak_bandwidth;
2583 st->memory_limit = q->buffer_limit;
2584 st->memory_used = q->buffer_max_used;
2585
2586 i = gnet_stats_copy_app(d, st, size);
2587 cake_free(st);
2588 return i;
> 2589 }
2590
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 52468 bytes --]
^ permalink raw reply
* Re: [PATCH net-next v2 00/11] udp gso
From: David Miller @ 2018-04-26 19:23 UTC (permalink / raw)
To: willemdebruijn.kernel; +Cc: netdev, alexander.duyck, willemb
In-Reply-To: <20180426174225.246388-1-willemdebruijn.kernel@gmail.com>
From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Date: Thu, 26 Apr 2018 13:42:14 -0400
> Segmentation offload reduces cycles/byte for large packets by
> amortizing the cost of protocol stack traversal.
>
> This patchset implements GSO for UDP. A process can concatenate and
> submit multiple datagrams to the same destination in one send call
> by setting socket option SOL_UDP/UDP_SEGMENT with the segment size,
> or passing an analogous cmsg at send time.
Looks great.
Build testing revealed that with ipv6=m we have to export
__udp_gso_segment (patch #2) and udp_cmsg_send (patch #6).
I added the exports while applying this series.
Nice work, thanks Willem!
^ permalink raw reply
* Re: [PATCH stable v4.4+] r8152: add Linksys USB3GIGV1 id
From: Grant Grundler @ 2018-04-26 19:34 UTC (permalink / raw)
To: Krzysztof Kozlowski
Cc: Grant Grundler, Oliver Neukum, David S. Miller, linux-usb, netdev,
LKML
In-Reply-To: <CAJKOXPcD2O8KhyrNj58fMRishdscVf9VoybWfMjezuZdETuibQ@mail.gmail.com>
On Thu, Apr 26, 2018 at 12:56 AM, Krzysztof Kozlowski <krzk@kernel.org> wrote:
> On Thu, Apr 26, 2018 at 2:40 AM, Grant Grundler <grundler@chromium.org> wrote:
>> On Wed, Apr 25, 2018 at 2:54 AM, Krzysztof Kozlowski <krzk@kernel.org>
>> wrote:
>>>
>>> commit 90841047a01b452cc8c3f9b990698b264143334a upstream
>>>
>>> This linksys dongle by default comes up in cdc_ether mode.
>>> This patch allows r8152 to claim the device:
>>> Bus 002 Device 002: ID 13b1:0041 Linksys
>>>
>>> Signed-off-by: Grant Grundler <grundler@chromium.org>
>>> Reviewed-by: Douglas Anderson <dianders@chromium.org>
>>> Signed-off-by: David S. Miller <davem@davemloft.net>
>>> [krzk: Rebase on v4.4]'
>>
>>
>> thanks krzk!
>>
>> FTR, to support RTL8153B (HW ID 0x6010), the follow patch series to bring
>> r8152 v1.09.9 driver from 4.14 kernel.org to 3 (of 5) older Chrome OS
>> kernels:
>>
>> 3.14:
>> https://chromium-review.googlesource.com/q/topic:%22update_r8152-3.14%22+(status:open%20OR%20status:merged)
>> 3.18:
>> https://chromium-review.googlesource.com/q/topic:%2522update-r8152-3.18%2522+(status:open+OR+status:merged)
>> 4.4:
>> https://chromium-review.googlesource.com/q/topic:%2522update_r8152-4.4%2522+(status:open+OR+status:merged)
>>
>> caveat: These series are not suitable directly for kernel.org submission
>> (extraneous stuff in the commit messages, order is different). Using the
>> original SHA1 (in each commit message), this can all be fixed up by
>> hand/simple scripts.
>
> Hi Grant,
>
> These are regular feature/patch backports so they do not fit into
> stable process. Only new quirks and IDs are accepted for stable.
Hi Krzysztof!
Sorry, I wasn't advocating for -stable inclusion. I shared in case
someone has unusually high USB ethernet requirements similar to Chrome
OS test lab which nearly all dongles I've tested can't provide.
Chrome OS test lab needs a USB ethernet dongle that reliably
negotiates a link (e.g. 10k iterations in a row). RTL8153 in general
are good (> 99.99% gets GigE link speed) but RTL8153B is the first
dongle that meets Chrome OS test lab requirements. The patch series
above is required to support RTL8153B.
cheers,
grant
^ permalink raw reply
* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Mikulas Patocka @ 2018-04-26 19:36 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: eric.dumazet, netdev, Randy Dunlap, linux-kernel, Matthew Wilcox,
Michal Hocko, James Bottomley, linux-mm, dm-devel,
Vlastimil Babka, David Rientjes, Andrew Morton, virtualization,
David Miller, edumazet
In-Reply-To: <20180426220523-mutt-send-email-mst@kernel.org>
On Thu, 26 Apr 2018, Michael S. Tsirkin wrote:
> On Thu, Apr 26, 2018 at 02:54:26PM -0400, Mikulas Patocka wrote:
> >
> >
> > On Thu, 26 Apr 2018, Michael S. Tsirkin wrote:
> >
> > > On Thu, Apr 26, 2018 at 12:07:25PM -0400, Mikulas Patocka wrote:
> > > > > IIUC debug kernels mainly exist so people who experience e.g. memory
> > > > > corruption can try and debug the failure. In this case, CONFIG_DEBUG_SG
> > > > > will *already* catch a failure early. Nothing special needs to be done.
> > > >
> > > > The patch helps people debug such memory coprruptions (such as using DMA
> > > > API on the result of kvmalloc).
> > >
> > > That's my point. I don't think your patch helps debug any memory
> > > corruptions. With CONFIG_DEBUG_SG using DMA API already causes a
> > > BUG_ON, that's before any memory can get corrupted.
> >
> > The patch turns a hard-to-reproduce bug into an easy-to-reproduce bug.
>
> It's still not a memory corruption. It's a BUG_ON the source of which -
> should it trigger - can be typically found using grep.
>
> > Obviously we don't want this in production kernels, but in the debug
> > kernels it should be done.
> >
> > Mikulas
>
> I'm not so sure. debug kernels should make debugging easier,
> definitely.
>
> Unfortunately they are already slower so some races don't trigger.
>
> If they also start crashing more because we are injecting
> memory allocation errors, people are even less likely to
> be able to use them.
I've actually already pushed this patch to RHEL-7 (just before 7.5 was
released) and it found out some powerpc issues. See the commit
ea376cc55bc3 in the RHEL-7 git. It was reverted just before RHEL-7.5 was
released with the intention that it will be reinstated just after RHEL-7.5
release, so that these issues could be found and eliminated in the
7.5->7.6 development cycle. Jeff Moyer asked me to put it upstream because
they want to follow upstream and they don't like RHEL-specific patches.
There's clear incentive to put this patch to RHEL-7, that's why I'm
posting it here.
> Just add a comment near the BUG_ON within DMA API telling people how
> they can inject this error some more if the bug does not
> reproduce, and leave it at that.
But the problem is that the powerpc bug only triggers with this patch. It
doesn't trigger without it. So, we have a potential random-crashing bug in
the codebase (and perhaps more others) and we want to eliminate them -
that's why we need the patch.
People on this list argue "this should be a kernel parameter". But the
testers won't enable the kernel parameter, the crashes won't happen
without the kernel parameter and the bugs will stay unreported and
uncorrected. That's why it needs to be the default.
Mikulas
^ permalink raw reply
* [PATCH] DT: net: can: rcar_canfd: document R8A77970 bindings
From: Sergei Shtylyov @ 2018-04-26 19:41 UTC (permalink / raw)
To: Marc Kleine-Budde, Rob Herring, linux-can, netdev, devicetree
Cc: Wolfgang Grandegger, Mark Rutland, linux-renesas-soc
Document the R-Car V3M (R8A77970) SoC support in the R-Car CAN-FD bindings.
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
---
The patch is against the 'linux-can-next.git' repo but I wouldn't object if
it's merged to the 'linux-can.git' repo instead. :-)
Documentation/devicetree/bindings/net/can/rcar_canfd.txt | 1 +
1 file changed, 1 insertion(+)
Index: linux-can-next/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
===================================================================
--- linux-can-next.orig/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
+++ linux-can-next/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
@@ -6,6 +6,7 @@ Required properties:
- "renesas,rcar-gen3-canfd" for R-Car Gen3 compatible controller.
- "renesas,r8a7795-canfd" for R8A7795 (R-Car H3) compatible controller.
- "renesas,r8a7796-canfd" for R8A7796 (R-Car M3) compatible controller.
+ - "renesas,r8a77970-canfd" for R8A77970 (R-Car V3M) compatible controller.
When compatible with the generic version, nodes must list the
SoC-specific version corresponding to the platform first, followed by the
^ permalink raw reply
* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Michael S. Tsirkin @ 2018-04-26 19:45 UTC (permalink / raw)
To: Mikulas Patocka
Cc: eric.dumazet, netdev, Randy Dunlap, linux-kernel, Matthew Wilcox,
Michal Hocko, James Bottomley, linux-mm, dm-devel,
Vlastimil Babka, David Rientjes, Andrew Morton, virtualization,
David Miller, edumazet
In-Reply-To: <alpine.LRH.2.02.1804261516250.26980@file01.intranet.prod.int.rdu2.redhat.com>
On Thu, Apr 26, 2018 at 03:36:14PM -0400, Mikulas Patocka wrote:
> People on this list argue "this should be a kernel parameter".
How about making it a writeable attribute, so it's easy to turn on/off
after boot. Then you can keep it deterministic, userspace can play with
the attribute at random if it wants to.
--
MST
^ permalink raw reply
* Re: [PATCH net-next v2 00/11] udp gso
From: Willem de Bruijn @ 2018-04-26 19:46 UTC (permalink / raw)
To: David Miller; +Cc: Network Development, Alexander Duyck, Willem de Bruijn
In-Reply-To: <20180426.152341.192100471142622918.davem@davemloft.net>
On Thu, Apr 26, 2018 at 3:23 PM, David Miller <davem@davemloft.net> wrote:
> From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
> Date: Thu, 26 Apr 2018 13:42:14 -0400
>
>> Segmentation offload reduces cycles/byte for large packets by
>> amortizing the cost of protocol stack traversal.
>>
>> This patchset implements GSO for UDP. A process can concatenate and
>> submit multiple datagrams to the same destination in one send call
>> by setting socket option SOL_UDP/UDP_SEGMENT with the segment size,
>> or passing an analogous cmsg at send time.
>
> Looks great.
>
> Build testing revealed that with ipv6=m we have to export
> __udp_gso_segment (patch #2) and udp_cmsg_send (patch #6).
Oops :/
Thanks for fixing up this breakage, David.
^ permalink raw reply
* [net 1/7] net/mlx5e: Allow offloading ipv4 header re-write for icmp
From: Saeed Mahameed @ 2018-04-26 19:58 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev, Jianbo Liu, Saeed Mahameed
In-Reply-To: <20180426195842.29665-1-saeedm@mellanox.com>
From: Jianbo Liu <jianbol@mellanox.com>
For ICMPv4, the checksum is calculated from the ICMP headers and data.
Since the ICMPv4 checksum doesn't cover the IP header, we can allow to
do L3 header re-write for this protocol.
Fixes: bdd66ac0aeed ('net/mlx5e: Disallow TC offloading of unsupported match/action combinations')
Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 4197001f9801..3c534fc43400 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1864,7 +1864,8 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
}
ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
- if (modify_ip_header && ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) {
+ if (modify_ip_header && ip_proto != IPPROTO_TCP &&
+ ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
pr_info("can't offload re-write of ip proto %d\n", ip_proto);
return false;
}
--
2.14.3
^ permalink raw reply related
* [pull request][net 0/7] Mellanox, mlx5 fixes 2018-04-26
From: Saeed Mahameed @ 2018-04-26 19:58 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev, Saeed Mahameed
Hi Dave,
This pull request includes fixes for mlx5 core and netdev driver.
Please pull and let me know if there's any problems.
For -stable v4.12
net/mlx5e: TX, Use correct counter in dma_map error flow
For -stable v4.13
net/mlx5: Avoid cleaning flow steering table twice during error flow
For -stable v4.14
net/mlx5e: Allow offloading ipv4 header re-write for icmp
For -stable v4.15
net/mlx5e: DCBNL fix min inline header size for dscp
For -stable v4.16
net/mlx5: Fix mlx5_get_vector_affinity function
Thanks,
Saeed.
---
The following changes since commit 25eb0ea7174c6e84f21fa59dccbddd0318b17b12:
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf (2018-04-25 22:55:33 -0400)
are available in the Git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git tags/mlx5-fixes-2018-04-25
for you to fetch changes up to 202854e9f4df99df1f79962a9e8f94a7de602f7b:
net/mlx5: Properly deal with flow counters when deleting rules (2018-04-26 12:43:21 -0700)
----------------------------------------------------------------
mlx5-fixes-2018-04-25
----------------------------------------------------------------
Chris Mi (1):
net/mlx5: Properly deal with flow counters when deleting rules
Huy Nguyen (1):
net/mlx5e: DCBNL fix min inline header size for dscp
Israel Rukshin (1):
net/mlx5: Fix mlx5_get_vector_affinity function
Jianbo Liu (1):
net/mlx5e: Allow offloading ipv4 header re-write for icmp
Shahar Klein (1):
net/mlx5e: Fix traffic between VF and representor
Talat Batheesh (1):
net/mlx5: Avoid cleaning flow steering table twice during error flow
Tariq Toukan (1):
net/mlx5e: TX, Use correct counter in dma_map error flow
drivers/infiniband/hw/mlx5/main.c | 2 +-
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 8 ++++---
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 5 +++--
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++-
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 20 ++++++++---------
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 26 +++++++++++++---------
include/linux/mlx5/driver.h | 12 +++-------
7 files changed, 40 insertions(+), 36 deletions(-)
^ permalink raw reply
* [PATCH net-next 03/13] sctp: remove an if() that is always true
From: Marcelo Ricardo Leitner @ 2018-04-26 19:58 UTC (permalink / raw)
To: netdev; +Cc: linux-sctp, Vlad Yasevich, Neil Horman, Xin Long
In-Reply-To: <cover.1524772453.git.marcelo.leitner@gmail.com>
As noticed by Xin Long, the if() here is always true as PMTU can never
be 0.
Reported-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
net/sctp/associola.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b3aa95222bd52113295cb246c503c903bdd5c353..c5ed09cfa8423b17546e3d45f6d06db03af66384 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1397,10 +1397,8 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
pmtu = t->pathmtu;
}
- if (pmtu) {
- asoc->pathmtu = pmtu;
- asoc->frag_point = sctp_frag_point(asoc, pmtu);
- }
+ asoc->pathmtu = pmtu;
+ asoc->frag_point = sctp_frag_point(asoc, pmtu);
pr_debug("%s: asoc:%p, pmtu:%d, frag_point:%d\n", __func__, asoc,
asoc->pathmtu, asoc->frag_point);
--
2.14.3
^ permalink raw reply related
* [PATCH net-next 00/13] sctp: refactor MTU handling
From: Marcelo Ricardo Leitner @ 2018-04-26 19:58 UTC (permalink / raw)
To: netdev; +Cc: linux-sctp, Vlad Yasevich, Neil Horman, Xin Long
Currently MTU handling is spread over SCTP stack. There are multiple
places doing same/similar calculations and updating them is error prone
as one spot can easily be left out.
This patchset converges it into a more concise and consistent code. In
general, it moves MTU handling from functions with bigger objectives,
such as sctp_assoc_add_peer(), to specific functions.
It's also a preparation for the next patchset, which removes the
duplication between sctp_make_op_error_space and
sctp_make_op_error_fixed and relies on sctp_mtu_payload introduced here.
More details on each patch.
Marcelo Ricardo Leitner (13):
sctp: remove old and unused SCTP_MIN_PMTU
sctp: move transport pathmtu calc away of sctp_assoc_add_peer
sctp: remove an if() that is always true
sctp: introduce sctp_assoc_set_pmtu
sctp: introduce sctp_mtu_payload
sctp: introduce sctp_assoc_update_frag_point
sctp: remove sctp_assoc_pending_pmtu
sctp: introduce sctp_dst_mtu
sctp: remove sctp_transport_pmtu_check
sctp: re-use sctp_transport_pmtu in sctp_transport_route
sctp: honor PMTU_DISABLED when handling icmp
sctp: consider idata chunks when setting SCTP_MAXSEG
sctp: allow unsetting sockopt MAXSEG
include/net/sctp/constants.h | 5 ++--
include/net/sctp/sctp.h | 52 ++++++++++++++------------------------
include/net/sctp/structs.h | 2 ++
net/sctp/associola.c | 60 +++++++++++++++++++++++---------------------
net/sctp/chunk.c | 12 +--------
net/sctp/output.c | 28 ++++++++-------------
net/sctp/socket.c | 43 ++++++++++++++-----------------
net/sctp/transport.c | 37 ++++++++++++++-------------
8 files changed, 105 insertions(+), 134 deletions(-)
^ permalink raw reply
* [PATCH net-next v2 0/7] Microsemi Ocelot Ethernet switch support
From: Alexandre Belloni @ 2018-04-26 19:59 UTC (permalink / raw)
To: David S . Miller
Cc: Allan Nielsen, razvan.stefanescu, po.liu, Thomas Petazzoni,
Andrew Lunn, Florian Fainelli, netdev, devicetree, linux-kernel,
linux-mips, Alexandre Belloni, James Hogan
Hi,
This series adds initial support for the Microsemi Ethernet switch
present on Ocelot SoCs.
This only has bridging (and STP) support for now and it uses the
switchdev framework.
Coming features are VLAN filtering, link aggregation, IGMP snooping.
The switch can also be connected to an external CPU using PCIe.
Also, support for integration on other SoCs will be submitted.
The ocelot dts changes are here for reference and should probably go
through the MIPS tree once the bindings are accepted.
Changes in v2:
- Dropped Microsemi Ocelot PHY support
* MIIM driver:
- Documented interrupts bindings
- Moved the driver to drivers/net/phy/
- Removed unused mutex
- Removed MDIO bus scanning
* Switchdev driver:
- Changed compatible to mscc,vsc7514-switch
- Removed unused header inclusion
- Factorized MAC table selection in ocelot_mact_select()
- Disable the port in ocelot_port_stop()
- Fixed the smatch endianness warnings
- int to unsinged int where necessary
- Removed VID handling for the FDB it has been reworked anyway and will be
submitted with VLAN support
- Fixed up unused cases in ocelot_port_attr_set()
- Added a loop to register all the IO register spaces
- the ports are now in an ethernet-ports node
I've tried switching to NAPI but this is not working well, mainly because the
only way to disable interrupts is to actually mask them in the interrupt
controller (it is not possible to tell the switch to stop generating
interrupts).
Cc: James Hogan <jhogan@kernel.org>
Alexandre Belloni (7):
dt-bindings: net: add DT bindings for Microsemi MIIM
net: mscc: Add MDIO driver
dt-bindings: net: add DT bindings for Microsemi Ocelot Switch
net: mscc: Add initial Ocelot switch support
MIPS: mscc: Add switch to ocelot
MIPS: mscc: connect phys to ports on ocelot_pcb123
MAINTAINERS: Add entry for Microsemi Ethernet switches
.../devicetree/bindings/net/mscc-miim.txt | 26 +
.../devicetree/bindings/net/mscc-ocelot.txt | 82 +
MAINTAINERS | 6 +
arch/mips/boot/dts/mscc/ocelot.dtsi | 88 ++
arch/mips/boot/dts/mscc/ocelot_pcb123.dts | 20 +
drivers/net/ethernet/Kconfig | 1 +
drivers/net/ethernet/Makefile | 1 +
drivers/net/ethernet/mscc/Kconfig | 29 +
drivers/net/ethernet/mscc/Makefile | 5 +
drivers/net/ethernet/mscc/ocelot.c | 1316 +++++++++++++++++
drivers/net/ethernet/mscc/ocelot.h | 554 +++++++
drivers/net/ethernet/mscc/ocelot_ana.h | 625 ++++++++
drivers/net/ethernet/mscc/ocelot_board.c | 313 ++++
drivers/net/ethernet/mscc/ocelot_dev.h | 275 ++++
drivers/net/ethernet/mscc/ocelot_dev_gmii.h | 154 ++
drivers/net/ethernet/mscc/ocelot_hsio.h | 785 ++++++++++
drivers/net/ethernet/mscc/ocelot_io.c | 116 ++
drivers/net/ethernet/mscc/ocelot_qs.h | 78 +
drivers/net/ethernet/mscc/ocelot_qsys.h | 270 ++++
drivers/net/ethernet/mscc/ocelot_regs.c | 399 +++++
drivers/net/ethernet/mscc/ocelot_rew.h | 81 +
drivers/net/ethernet/mscc/ocelot_sys.h | 140 ++
drivers/net/phy/Kconfig | 7 +
drivers/net/phy/Makefile | 1 +
drivers/net/phy/mdio-mscc-miim.c | 197 +++
25 files changed, 5569 insertions(+)
create mode 100644 Documentation/devicetree/bindings/net/mscc-miim.txt
create mode 100644 Documentation/devicetree/bindings/net/mscc-ocelot.txt
create mode 100644 drivers/net/ethernet/mscc/Kconfig
create mode 100644 drivers/net/ethernet/mscc/Makefile
create mode 100644 drivers/net/ethernet/mscc/ocelot.c
create mode 100644 drivers/net/ethernet/mscc/ocelot.h
create mode 100644 drivers/net/ethernet/mscc/ocelot_ana.h
create mode 100644 drivers/net/ethernet/mscc/ocelot_board.c
create mode 100644 drivers/net/ethernet/mscc/ocelot_dev.h
create mode 100644 drivers/net/ethernet/mscc/ocelot_dev_gmii.h
create mode 100644 drivers/net/ethernet/mscc/ocelot_hsio.h
create mode 100644 drivers/net/ethernet/mscc/ocelot_io.c
create mode 100644 drivers/net/ethernet/mscc/ocelot_qs.h
create mode 100644 drivers/net/ethernet/mscc/ocelot_qsys.h
create mode 100644 drivers/net/ethernet/mscc/ocelot_regs.c
create mode 100644 drivers/net/ethernet/mscc/ocelot_rew.h
create mode 100644 drivers/net/ethernet/mscc/ocelot_sys.h
create mode 100644 drivers/net/phy/mdio-mscc-miim.c
--
2.17.0
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox