* [PATCH net-next 1/2] sock_diag: create udp_info for udp socket stats
2015-10-05 19:55 [PATCH net-next 0/2] sock_diag: introducing udp_info Craig Gallek
@ 2015-10-05 19:55 ` Craig Gallek
2015-10-05 19:55 ` [PATCH net-next 2/2] sock_diag: initial udp_info metrics Craig Gallek
1 sibling, 0 replies; 3+ messages in thread
From: Craig Gallek @ 2015-10-05 19:55 UTC (permalink / raw)
To: Eric Dumazet, Willem de Bruijn, Marcelo Ricardo Leitner
Cc: kraigatgoog, David Miller, netdev
From: Craig Gallek <kraig@google.com>
struct udp_info is intended to be the UDP equivalent of tcp_info for the
INET_DIAG_INFO netlink attribute of TCPDIAG_GETSOCK and
SOCK_DIAG_BY_FAMILY.
Unfortunately, since clients may assume that all instances of the
INET_DIAG_INFO response attribute are serialized versions of tcp_info,
we need to define a new response attribute (INET_DIAG_UDP_INFO) for
returning the new udp_info struct.
This patch prepares the netlink code to return INET_DIAG_UDP_INFO and
udp_info (currently size of zero).
Signed-off-by: Craig Gallek <kraig@google.com>
---
include/linux/inet_diag.h | 1 +
include/uapi/linux/inet_diag.h | 3 ++-
include/uapi/linux/udp.h | 3 +++
net/dccp/diag.c | 1 +
net/ipv4/inet_diag.c | 25 ++++++++++++-------------
net/ipv4/tcp_diag.c | 1 +
net/ipv4/udp_diag.c | 6 ++++--
7 files changed, 24 insertions(+), 16 deletions(-)
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 0e707f0..bcf1189 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -25,6 +25,7 @@ struct inet_diag_handler {
void *info);
__u16 idiag_type;
__u16 idiag_info_size;
+ __u8 idiag_info_type;
};
struct inet_connection_sock;
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index 68a1f71..5ad029a 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -113,9 +113,10 @@ enum {
INET_DIAG_DCTCPINFO,
INET_DIAG_PROTOCOL, /* response attribute only */
INET_DIAG_SKV6ONLY,
+ INET_DIAG_UDP_INFO,
};
-#define INET_DIAG_MAX INET_DIAG_SKV6ONLY
+#define INET_DIAG_MAX INET_DIAG_UDP_INFO
/* INET_DIAG_MEM */
diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index 16574ea..6ba37dc 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -37,5 +37,8 @@ struct udphdr {
#define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-06 */
#define UDP_ENCAP_L2TPINUDP 3 /* rfc2661 */
+struct udp_info {
+};
+
#endif /* _UAPI_LINUX_UDP_H */
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 2d84303..3c1bf5c 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -67,6 +67,7 @@ static const struct inet_diag_handler dccp_diag_handler = {
.idiag_get_info = dccp_diag_get_info,
.idiag_type = IPPROTO_DCCP,
.idiag_info_size = sizeof(struct tcp_info),
+ .idiag_info_type = INET_DIAG_INFO,
};
static int __init dccp_diag_init(void)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ab9f8a6..581b335 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -177,8 +177,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
goto errout;
+ if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
+ attr = nla_reserve(skb, handler->idiag_info_type,
+ handler->idiag_info_size);
+ if (!attr)
+ goto errout;
+
+ info = nla_data(attr);
+ }
+ handler->idiag_get_info(sk, r, info);
+
if (!icsk) {
- handler->idiag_get_info(sk, r, NULL);
goto out;
}
@@ -204,15 +213,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
}
#undef EXPIRES_IN_MS
- if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
- attr = nla_reserve(skb, INET_DIAG_INFO,
- handler->idiag_info_size);
- if (!attr)
- goto errout;
-
- info = nla_data(attr);
- }
-
if (ext & (1 << (INET_DIAG_CONG - 1))) {
int err = 0;
@@ -225,8 +225,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
goto errout;
}
- handler->idiag_get_info(sk, r, info);
-
if (sk->sk_state < TCP_TIME_WAIT) {
union tcp_cc_info info;
size_t sz = 0;
@@ -1036,7 +1034,8 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
}
attr = handler->idiag_info_size
- ? nla_reserve(skb, INET_DIAG_INFO, handler->idiag_info_size)
+ ? nla_reserve(skb, handler->idiag_info_type,
+ handler->idiag_info_size)
: NULL;
if (attr)
info = nla_data(attr);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 479f349..66e044b 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -52,6 +52,7 @@ static const struct inet_diag_handler tcp_diag_handler = {
.idiag_get_info = tcp_diag_get_info,
.idiag_type = IPPROTO_TCP,
.idiag_info_size = sizeof(struct tcp_info),
+ .idiag_info_type = INET_DIAG_INFO,
};
static int __init tcp_diag_init(void)
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 6116604..db48698 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -170,7 +170,8 @@ static const struct inet_diag_handler udp_diag_handler = {
.dump_one = udp_diag_dump_one,
.idiag_get_info = udp_diag_get_info,
.idiag_type = IPPROTO_UDP,
- .idiag_info_size = 0,
+ .idiag_info_size = sizeof(struct udp_info),
+ .idiag_info_type = INET_DIAG_UDP_INFO,
};
static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
@@ -191,7 +192,8 @@ static const struct inet_diag_handler udplite_diag_handler = {
.dump_one = udplite_diag_dump_one,
.idiag_get_info = udp_diag_get_info,
.idiag_type = IPPROTO_UDPLITE,
- .idiag_info_size = 0,
+ .idiag_info_size = sizeof(struct udp_info),
+ .idiag_info_type = INET_DIAG_UDP_INFO,
};
static int __init udp_diag_init(void)
--
2.6.0.rc2.230.g3dd15c0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH net-next 2/2] sock_diag: initial udp_info metrics
2015-10-05 19:55 [PATCH net-next 0/2] sock_diag: introducing udp_info Craig Gallek
2015-10-05 19:55 ` [PATCH net-next 1/2] sock_diag: create udp_info for udp socket stats Craig Gallek
@ 2015-10-05 19:55 ` Craig Gallek
1 sibling, 0 replies; 3+ messages in thread
From: Craig Gallek @ 2015-10-05 19:55 UTC (permalink / raw)
To: Eric Dumazet, Willem de Bruijn, Marcelo Ricardo Leitner
Cc: kraigatgoog, David Miller, netdev
From: Craig Gallek <kraig@google.com>
Define per-UDP socket metrics for counting datagrams in and out and
bytes in and out. These four metrics are also exposed though the
INET_DIAG_UDP_INFO netlink attribute of the SOCK_DIAG_BY_FAMILY interface.
Performance test configuration to maximize cache misses across CPU sockets
2x 12-core Xeon
Single 10GbE link, 8 RX/8 TX queues
Receive test: Single process with 10 threads each reading a single byte
from a UDP socket (pinned to cores 6-10 and 18-22). RX queues pinned to
cores 2-5 and 14-17. RX queues/soft interrupts were saturated with a
remote trafgen process. Userspace threads used ~90% of each core. This
configuration allowed a receive rate of ~440K datagrams per second.
There was no noticeable change in throughput after this patch. The
dominating factor both before and after is the taking of the socket lock
in udp_recvmsg in order to free an skb (skb_free_datagram_locked).
Send test: A single process with 8 threads sending one byte messages
through a single UDP socket (pinned to cores 6-9 and 18-21). TX queues
pinned to the same cores with XPS. Transmit complete interrupts pinned
to cores 2-5 and 14-17. This configuration allowed a send rate
of ~2 million datagrams per second. This benchmark did not show noticeable
change in datagram throughput. udp_sndmsg appears to already incur
this cacheline miss because of the IS_UDPLITE check and the dominating
bottle neck of the function is the route lookup.
Tested:
lpaa15:~# nc -4 -l -u -p 8888 | lpaa16:~# nc -4 -u lpaa15 8888
a <- a
bb -> bb
ccc <- ccc
^C
lpaa15:~# nc -6 -l -u -p 8888 | lpaa16:~# nc -6 -u lpaa15 8888
a <- a
bb -> bb
ccc <- ccc
^C
While also running:
lpaa15:~# /tmp/ss -Ei dst lpaa16
Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port
udp ESTAB 0 -1 10.246.7.143:8888 10.246.7.144:39130
bytes_in: 6 bytes_out: 3 dgrams_in: 2 dgrams_out: 1
udp6 ESTAB 0 -1 fd1d:c486:7f89:1709::1:8888 fd1d:c486:7f89:1709::2:48675
bytes_in: 6 bytes_out: 3 dgrams_in: 2 dgrams_out: 1
Signed-off-by: Craig Gallek <kraig@google.com>
---
include/linux/udp.h | 7 +++++++
include/net/udp.h | 2 ++
include/uapi/linux/udp.h | 4 ++++
net/ipv4/udp.c | 26 ++++++++++++++++++++++++--
net/ipv4/udp_diag.c | 12 ++++++++++++
net/ipv6/udp.c | 14 +++++++++++++-
6 files changed, 62 insertions(+), 3 deletions(-)
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 87c0949..7969675 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -19,6 +19,7 @@
#include <net/inet_sock.h>
#include <linux/skbuff.h>
+#include <linux/spinlock_types.h>
#include <net/netns/hash.h>
#include <uapi/linux/udp.h>
@@ -55,6 +56,12 @@ struct udp_sock {
* when the socket is uncorked.
*/
__u16 len; /* total length of pending frames */
+
+ spinlock_t stats_lock; /* lock for statistics counters */
+ __u64 dgrams_out; /* total datagrams sent*/
+ __u64 bytes_out; /* total bytes sent */
+ __u64 dgrams_in; /* total datagrams received */
+ __u64 bytes_in; /* total bytes received */
/*
* Fields specific to UDP-Lite.
*/
diff --git a/include/net/udp.h b/include/net/udp.h
index 6d4ed18..7e4a95b 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -185,6 +185,8 @@ static inline void udp_lib_hash(struct sock *sk)
void udp_lib_unhash(struct sock *sk);
void udp_lib_rehash(struct sock *sk, u16 new_hash);
+int udp_lib_init_sock(struct sock *sk);
+
static inline void udp_lib_close(struct sock *sk, long timeout)
{
sk_common_release(sk);
diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index 6ba37dc..36cc00c 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -38,6 +38,10 @@ struct udphdr {
#define UDP_ENCAP_L2TPINUDP 3 /* rfc2661 */
struct udp_info {
+ __u64 udpi_dgrams_out;
+ __u64 udpi_bytes_out;
+ __u64 udpi_dgrams_in;
+ __u64 udpi_bytes_in;
};
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 156ba75..0909118 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -797,6 +797,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
{
struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
+ struct udp_sock *up = udp_sk(sk);
struct udphdr *uh;
int err = 0;
int is_udplite = IS_UDPLITE(sk);
@@ -843,9 +844,14 @@ send:
UDP_MIB_SNDBUFERRORS, is_udplite);
err = 0;
}
- } else
+ } else {
UDP_INC_STATS_USER(sock_net(sk),
UDP_MIB_OUTDATAGRAMS, is_udplite);
+ spin_lock(&up->stats_lock);
+ up->dgrams_out++;
+ up->bytes_out += len - sizeof(struct udphdr);
+ spin_unlock(&up->stats_lock);
+ }
return err;
}
@@ -1277,6 +1283,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
+ struct udp_sock *up = udp_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
struct sk_buff *skb;
unsigned int ulen, copied;
@@ -1333,9 +1340,14 @@ try_again:
goto out_free;
}
- if (!peeked)
+ if (!peeked) {
UDP_INC_STATS_USER(sock_net(sk),
UDP_MIB_INDATAGRAMS, is_udplite);
+ spin_lock(&up->stats_lock);
+ up->dgrams_in++;
+ up->bytes_in += skb->len - sizeof(struct udphdr);
+ spin_unlock(&up->stats_lock);
+ }
sock_recv_ts_and_drops(msg, sk, skb);
@@ -2036,6 +2048,15 @@ int udp_rcv(struct sk_buff *skb)
return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP);
}
+int udp_lib_init_sock(struct sock *sk)
+{
+ struct udp_sock *up = udp_sk(sk);
+
+ spin_lock_init(&up->stats_lock);
+ return 0;
+}
+EXPORT_SYMBOL(udp_lib_init_sock);
+
void udp_destroy_sock(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
@@ -2273,6 +2294,7 @@ struct proto udp_prot = {
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
+ .init = udp_lib_init_sock,
.destroy = udp_destroy_sock,
.setsockopt = udp_setsockopt,
.getsockopt = udp_getsockopt,
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index db48698..346be40 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -161,8 +161,20 @@ static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
void *info)
{
+ struct udp_sock *up = udp_sk(sk);
+ struct udp_info *i = info;
+
r->idiag_rqueue = sk_rmem_alloc_get(sk);
r->idiag_wqueue = sk_wmem_alloc_get(sk);
+ if (!info)
+ return;
+
+ spin_lock(&up->stats_lock);
+ i->udpi_dgrams_out = up->dgrams_out;
+ i->udpi_bytes_out = up->bytes_out;
+ i->udpi_dgrams_in = up->dgrams_in;
+ i->udpi_bytes_in = up->bytes_in;
+ spin_unlock(&up->stats_lock);
}
static const struct inet_diag_handler udp_diag_handler = {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0aba654..0db2ad4 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -394,6 +394,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
+ struct udp_sock *up = udp_sk(sk);
struct sk_buff *skb;
unsigned int ulen, copied;
int peeked, off = 0;
@@ -464,6 +465,10 @@ try_again:
else
UDP6_INC_STATS_USER(sock_net(sk),
UDP_MIB_INDATAGRAMS, is_udplite);
+ spin_lock(&up->stats_lock);
+ up->dgrams_in++;
+ up->bytes_in += skb->len - sizeof(struct udphdr);
+ spin_unlock(&up->stats_lock);
}
sock_recv_ts_and_drops(msg, sk, skb);
@@ -1024,6 +1029,7 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
{
struct sock *sk = skb->sk;
+ struct udp_sock *up = udp_sk(sk);
struct udphdr *uh;
int err = 0;
int is_udplite = IS_UDPLITE(sk);
@@ -1065,9 +1071,14 @@ send:
UDP_MIB_SNDBUFERRORS, is_udplite);
err = 0;
}
- } else
+ } else {
UDP6_INC_STATS_USER(sock_net(sk),
UDP_MIB_OUTDATAGRAMS, is_udplite);
+ spin_lock(&up->stats_lock);
+ up->dgrams_out++;
+ up->bytes_out += len - sizeof(struct udphdr);
+ spin_unlock(&up->stats_lock);
+ }
return err;
}
@@ -1522,6 +1533,7 @@ struct proto udpv6_prot = {
.connect = ip6_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
+ .init = udp_lib_init_sock,
.destroy = udpv6_destroy_sock,
.setsockopt = udpv6_setsockopt,
.getsockopt = udpv6_getsockopt,
--
2.6.0.rc2.230.g3dd15c0
^ permalink raw reply related [flat|nested] 3+ messages in thread