* [PATCH 1/5] udp: fix send buffer check
2007-11-15 21:44 [PATCH 0/5] UDP memory accounting and limitation (take 8) Hideo AOKI
@ 2007-11-15 21:49 ` Hideo AOKI
2007-11-15 21:49 ` [PATCH 2/5] udp: accounting unit and variable Hideo AOKI
` (3 subsequent siblings)
4 siblings, 0 replies; 11+ messages in thread
From: Hideo AOKI @ 2007-11-15 21:49 UTC (permalink / raw)
To: David Miller, netdev
Cc: Satoshi Oshima, Herbert Xu, Bill Fink, Andi Kleen,
Evgeniy Polyakov, Stephen Hemminger, yoshfuji, Yumiko Sugita
This patch introduces sndbuf size check before memory allocation for
send buffer.
signed-off-by: Satoshi Oshima <satoshi.oshima.fk@hitachi.com>
signed-off-by: Hideo Aoki <haoki@redhat.com>
---
ip_output.c | 5 +++++
1 file changed, 5 insertions(+)
diff -pruN net-2.6/net/ipv4/ip_output.c net-2.6-udp-p1/net/ipv4/ip_output.c
--- net-2.6/net/ipv4/ip_output.c 2007-11-14 10:49:06.000000000 -0500
+++ net-2.6-udp-p1/net/ipv4/ip_output.c 2007-11-15 14:44:11.000000000 -0500
@@ -1004,6 +1004,11 @@ alloc_new_skb:
frag = &skb_shinfo(skb)->frags[i];
}
} else if (i < MAX_SKB_FRAGS) {
+ if (atomic_read(&sk->sk_wmem_alloc) + PAGE_SIZE
+ > 2 * sk->sk_sndbuf) {
+ err = -ENOBUFS;
+ goto error;
+ }
if (copy > PAGE_SIZE)
copy = PAGE_SIZE;
page = alloc_pages(sk->sk_allocation, 0);
--
Hitachi Computer Products (America) Inc.
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 2/5] udp: accounting unit and variable
2007-11-15 21:44 [PATCH 0/5] UDP memory accounting and limitation (take 8) Hideo AOKI
2007-11-15 21:49 ` [PATCH 1/5] udp: fix send buffer check Hideo AOKI
@ 2007-11-15 21:49 ` Hideo AOKI
2007-11-15 21:50 ` [PATCH 3/5] udp: memory accounting Hideo AOKI
` (2 subsequent siblings)
4 siblings, 0 replies; 11+ messages in thread
From: Hideo AOKI @ 2007-11-15 21:49 UTC (permalink / raw)
To: David Miller, netdev
Cc: Satoshi Oshima, Herbert Xu, Bill Fink, Andi Kleen,
Evgeniy Polyakov, Stephen Hemminger, yoshfuji, Yumiko Sugita
This patch introduces global variable for UDP memory accounting.
The unit is page.
signed-off-by: Satoshi Oshima <satoshi.oshima.fk@hitachi.com>
signed-off-by: Hideo Aoki <haoki@redhat.com>
---
include/net/sock.h | 8 ++++++++
include/net/udp.h | 2 ++
net/ipv4/proc.c | 3 ++-
net/ipv4/udp.c | 2 ++
4 files changed, 14 insertions(+), 1 deletion(-)
diff -pruN net-2.6-udp-p1/include/net/sock.h net-2.6-udp-p2/include/net/sock.h
--- net-2.6-udp-p1/include/net/sock.h 2007-11-15 12:42:04.000000000 -0500
+++ net-2.6-udp-p2/include/net/sock.h 2007-11-15 14:44:13.000000000 -0500
@@ -778,6 +778,14 @@ static inline int sk_stream_wmem_schedul
sk_stream_mem_schedule(sk, size, 0);
}
+#define SK_DATAGRAM_MEM_QUANTUM ((unsigned int)PAGE_SIZE)
+
+static inline int sk_datagram_pages(int amt)
+{
+ /* Cast to unsigned as an optimization, since amt is always positive. */
+ return DIV_ROUND_UP((unsigned int)amt, SK_DATAGRAM_MEM_QUANTUM);
+}
+
/* Used by processes to "lock" a socket state, so that
* interrupts and bottom half handlers won't change it
* from under us. It essentially blocks any incoming
diff -pruN net-2.6-udp-p1/include/net/udp.h net-2.6-udp-p2/include/net/udp.h
--- net-2.6-udp-p1/include/net/udp.h 2007-11-14 10:49:05.000000000 -0500
+++ net-2.6-udp-p2/include/net/udp.h 2007-11-15 14:44:13.000000000 -0500
@@ -65,6 +65,8 @@ extern rwlock_t udp_hash_lock;
extern struct proto udp_prot;
+extern atomic_t udp_memory_allocated;
+
struct sk_buff;
/*
diff -pruN net-2.6-udp-p1/net/ipv4/proc.c net-2.6-udp-p2/net/ipv4/proc.c
--- net-2.6-udp-p1/net/ipv4/proc.c 2007-11-14 10:49:07.000000000 -0500
+++ net-2.6-udp-p2/net/ipv4/proc.c 2007-11-15 14:44:13.000000000 -0500
@@ -56,7 +56,8 @@ static int sockstat_seq_show(struct seq_
sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
atomic_read(&tcp_memory_allocated));
- seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot));
+ seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse(&udp_prot),
+ atomic_read(&udp_memory_allocated));
seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot));
seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot));
seq_printf(seq, "FRAG: inuse %d memory %d\n",
diff -pruN net-2.6-udp-p1/net/ipv4/udp.c net-2.6-udp-p2/net/ipv4/udp.c
--- net-2.6-udp-p1/net/ipv4/udp.c 2007-11-14 10:49:07.000000000 -0500
+++ net-2.6-udp-p2/net/ipv4/udp.c 2007-11-15 14:44:13.000000000 -0500
@@ -114,6 +114,8 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_sta
struct hlist_head udp_hash[UDP_HTABLE_SIZE];
DEFINE_RWLOCK(udp_hash_lock);
+atomic_t udp_memory_allocated;
+
static inline int __udp_lib_lport_inuse(__u16 num,
const struct hlist_head udptable[])
{
--
Hitachi Computer Products (America) Inc.
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 3/5] udp: memory accounting
2007-11-15 21:44 [PATCH 0/5] UDP memory accounting and limitation (take 8) Hideo AOKI
2007-11-15 21:49 ` [PATCH 1/5] udp: fix send buffer check Hideo AOKI
2007-11-15 21:49 ` [PATCH 2/5] udp: accounting unit and variable Hideo AOKI
@ 2007-11-15 21:50 ` Hideo AOKI
2007-11-15 21:50 ` [PATCH 4/5] udp: memory limitation by using udp_mem Hideo AOKI
2007-11-15 21:50 ` [PATCH 5/5] udp: add udp_rmem_min and udp_wmem_min Hideo AOKI
4 siblings, 0 replies; 11+ messages in thread
From: Hideo AOKI @ 2007-11-15 21:50 UTC (permalink / raw)
To: David Miller, netdev
Cc: Satoshi Oshima, Herbert Xu, Bill Fink, Andi Kleen,
Evgeniy Polyakov, Stephen Hemminger, yoshfuji, Yumiko Sugita
This patch adds UDP memory usage accounting in IPv4.
signed-off-by: Satoshi Oshima <satoshi.oshima.fk@hitachi.com>
signed-off-by: Hideo Aoki <haoki@redhat.com>
---
af_inet.c | 30 +++++++++++++++++++++++++++++-
ip_output.c | 25 ++++++++++++++++++++++---
udp.c | 10 ++++++++++
3 files changed, 61 insertions(+), 4 deletions(-)
diff -pruN net-2.6-udp-p2/net/ipv4/af_inet.c net-2.6-udp-p3/net/ipv4/af_inet.c
--- net-2.6-udp-p2/net/ipv4/af_inet.c 2007-11-14 10:49:06.000000000 -0500
+++ net-2.6-udp-p3/net/ipv4/af_inet.c 2007-11-15 14:44:18.000000000 -0500
@@ -126,13 +126,41 @@ extern void ip_mc_drop_socket(struct soc
static struct list_head inetsw[SOCK_MAX];
static DEFINE_SPINLOCK(inetsw_lock);
+/**
+ * __skb_queue_purge_and_sub_memory_allocated
+ * - empty a list and subtruct memory allocation counter
+ * @sk: sk
+ * @list: list to empty
+ * Delete all buffers on an &sk_buff list and subtruct the
+ * truesize of the sk_buff for memory accounting. Each buffer
+ * is removed from the list and one reference dropped. This
+ * function does not take the list lock and the caller must
+ * hold the relevant locks to use it.
+ */
+static inline void __skb_queue_purge_and_sub_memory_allocated(struct sock *sk,
+ struct sk_buff_head *list)
+{
+ struct sk_buff *skb;
+ int purged_skb_size = 0;
+ while ((skb = __skb_dequeue(list)) != NULL) {
+ purged_skb_size += sk_datagram_pages(skb->truesize);
+ kfree_skb(skb);
+ }
+ atomic_sub(purged_skb_size, sk->sk_prot->memory_allocated);
+}
+
/* New destruction routine */
void inet_sock_destruct(struct sock *sk)
{
struct inet_sock *inet = inet_sk(sk);
- __skb_queue_purge(&sk->sk_receive_queue);
+ if (sk->sk_prot->memory_allocated && sk->sk_type != SOCK_STREAM)
+ __skb_queue_purge_and_sub_memory_allocated(sk,
+ &sk->sk_receive_queue);
+ else
+ __skb_queue_purge(&sk->sk_receive_queue);
+
__skb_queue_purge(&sk->sk_error_queue);
if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
diff -pruN net-2.6-udp-p2/net/ipv4/ip_output.c net-2.6-udp-p3/net/ipv4/ip_output.c
--- net-2.6-udp-p2/net/ipv4/ip_output.c 2007-11-15 14:44:11.000000000 -0500
+++ net-2.6-udp-p3/net/ipv4/ip_output.c 2007-11-15 14:44:18.000000000 -0500
@@ -743,6 +743,8 @@ static inline int ip_ufo_append_data(str
/* specify the length of each IP datagram fragment*/
skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+ atomic_add(sk_datagram_pages(skb->truesize),
+ sk->sk_prot->memory_allocated);
__skb_queue_tail(&sk->sk_write_queue, skb);
return 0;
@@ -924,6 +926,9 @@ alloc_new_skb:
}
if (skb == NULL)
goto error;
+ if (sk->sk_prot->memory_allocated)
+ atomic_add(sk_datagram_pages(skb->truesize),
+ sk->sk_prot->memory_allocated);
/*
* Fill in the control structures
@@ -1023,6 +1028,8 @@ alloc_new_skb:
frag = &skb_shinfo(skb)->frags[i];
skb->truesize += PAGE_SIZE;
atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
+ if (sk->sk_prot->memory_allocated)
+ atomic_inc(sk->sk_prot->memory_allocated);
} else {
err = -EMSGSIZE;
goto error;
@@ -1123,7 +1130,9 @@ ssize_t ip_append_page(struct sock *sk,
if (unlikely(!skb)) {
err = -ENOBUFS;
goto error;
- }
+ } else if (sk->sk_prot->memory_allocated)
+ atomic_add(sk_datagram_pages(skb->truesize),
+ sk->sk_prot->memory_allocated);
/*
* Fill in the control structures
@@ -1213,13 +1222,14 @@ int ip_push_pending_frames(struct sock *
struct iphdr *iph;
__be16 df = 0;
__u8 ttl;
- int err = 0;
+ int err = 0, send_page_size;
if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
goto out;
tail_skb = &(skb_shinfo(skb)->frag_list);
/* move skb->data to ip header from ext header */
+ send_page_size = sk_datagram_pages(skb->truesize);
if (skb->data < skb_network_header(skb))
__skb_pull(skb, skb_network_offset(skb));
while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
@@ -1229,6 +1239,7 @@ int ip_push_pending_frames(struct sock *
skb->len += tmp_skb->len;
skb->data_len += tmp_skb->len;
skb->truesize += tmp_skb->truesize;
+ send_page_size += sk_datagram_pages(tmp_skb->truesize);
__sock_put(tmp_skb->sk);
tmp_skb->destructor = NULL;
tmp_skb->sk = NULL;
@@ -1284,6 +1295,8 @@ int ip_push_pending_frames(struct sock *
/* Netfilter gets whole the not fragmented skb. */
err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
skb->dst->dev, dst_output);
+ if (sk->sk_prot->memory_allocated)
+ atomic_sub(send_page_size, sk->sk_prot->memory_allocated);
if (err) {
if (err > 0)
err = inet->recverr ? net_xmit_errno(err) : 0;
@@ -1306,9 +1319,15 @@ error:
void ip_flush_pending_frames(struct sock *sk)
{
struct sk_buff *skb;
+ int num_flush_mem = 0;
- while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
+ while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
+ num_flush_mem += sk_datagram_pages(skb->truesize);
kfree_skb(skb);
+ }
+
+ if (sk->sk_prot->memory_allocated)
+ atomic_sub(num_flush_mem, sk->sk_prot->memory_allocated);
ip_cork_release(inet_sk(sk));
}
diff -pruN net-2.6-udp-p2/net/ipv4/udp.c net-2.6-udp-p3/net/ipv4/udp.c
--- net-2.6-udp-p2/net/ipv4/udp.c 2007-11-15 14:44:13.000000000 -0500
+++ net-2.6-udp-p3/net/ipv4/udp.c 2007-11-15 14:44:18.000000000 -0500
@@ -829,6 +829,7 @@ int udp_recvmsg(struct kiocb *iocb, stru
unsigned int ulen, copied;
int err;
int is_udplite = IS_UDPLITE(sk);
+ int truesize;
/*
* Check any passed addresses
@@ -893,14 +894,19 @@ try_again:
err = ulen;
out_free:
+ truesize = skb->truesize;
skb_free_datagram(sk, skb);
+ atomic_sub(sk_datagram_pages(truesize), sk->sk_prot->memory_allocated);
+
out:
return err;
csum_copy_err:
UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
+ truesize = skb->truesize;
skb_kill_datagram(sk, skb, flags);
+ atomic_sub(sk_datagram_pages(truesize), sk->sk_prot->memory_allocated);
if (noblock)
return -EAGAIN;
@@ -1025,6 +1031,9 @@ int udp_queue_rcv_skb(struct sock * sk,
goto drop;
}
+ atomic_add(sk_datagram_pages(skb->truesize),
+ sk->sk_prot->memory_allocated);
+
UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
return 0;
@@ -1451,6 +1460,7 @@ struct proto udp_prot = {
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.get_port = udp_v4_get_port,
+ .memory_allocated = &udp_memory_allocated,
.obj_size = sizeof(struct udp_sock),
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
--
Hitachi Computer Products (America) Inc.
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 4/5] udp: memory limitation by using udp_mem
2007-11-15 21:44 [PATCH 0/5] UDP memory accounting and limitation (take 8) Hideo AOKI
` (2 preceding siblings ...)
2007-11-15 21:50 ` [PATCH 3/5] udp: memory accounting Hideo AOKI
@ 2007-11-15 21:50 ` Hideo AOKI
2007-11-15 23:23 ` David Miller
2007-11-15 21:50 ` [PATCH 5/5] udp: add udp_rmem_min and udp_wmem_min Hideo AOKI
4 siblings, 1 reply; 11+ messages in thread
From: Hideo AOKI @ 2007-11-15 21:50 UTC (permalink / raw)
To: David Miller, netdev
Cc: Satoshi Oshima, Herbert Xu, Bill Fink, Andi Kleen,
Evgeniy Polyakov, Stephen Hemminger, yoshfuji, Yumiko Sugita
This patch introduces memory limitation for UDP.
signed-off-by: Satoshi Oshima <satoshi.oshima.fk@hitachi.com>
signed-off-by: Hideo Aoki <haoki@redhat.com>
---
Documentation/networking/ip-sysctl.txt | 6 ++++
include/net/udp.h | 3 ++
net/ipv4/af_inet.c | 3 ++
net/ipv4/ip_output.c | 47 ++++++++++++++++++++++++++++++---
net/ipv4/sysctl_net_ipv4.c | 11 +++++++
net/ipv4/udp.c | 24 ++++++++++++++++
6 files changed, 91 insertions(+), 3 deletions(-)
diff -pruN net-2.6-udp-p3/Documentation/networking/ip-sysctl.txt net-2.6-udp-p4/Documentation/networking/ip-sysctl.txt
--- net-2.6-udp-p3/Documentation/networking/ip-sysctl.txt 2007-11-14 10:48:49.000000000 -0500
+++ net-2.6-udp-p4/Documentation/networking/ip-sysctl.txt 2007-11-15 14:44:21.000000000 -0500
@@ -446,6 +446,12 @@ tcp_dma_copybreak - INTEGER
and CONFIG_NET_DMA is enabled.
Default: 4096
+UDP variables:
+
+udp_mem - INTEGER
+ Number of pages allowed for queueing by all UDP sockets.
+ Default is calculated at boot time from amount of available memory.
+
CIPSOv4 Variables:
cipso_cache_enable - BOOLEAN
diff -pruN net-2.6-udp-p3/include/net/udp.h net-2.6-udp-p4/include/net/udp.h
--- net-2.6-udp-p3/include/net/udp.h 2007-11-15 14:44:13.000000000 -0500
+++ net-2.6-udp-p4/include/net/udp.h 2007-11-15 14:44:21.000000000 -0500
@@ -66,6 +66,7 @@ extern rwlock_t udp_hash_lock;
extern struct proto udp_prot;
extern atomic_t udp_memory_allocated;
+extern int sysctl_udp_mem;
struct sk_buff;
@@ -175,4 +176,6 @@ extern void udp_proc_unregister(struct u
extern int udp4_proc_init(void);
extern void udp4_proc_exit(void);
#endif
+
+extern void udp_init(void);
#endif /* _UDP_H */
diff -pruN net-2.6-udp-p3/net/ipv4/af_inet.c net-2.6-udp-p4/net/ipv4/af_inet.c
--- net-2.6-udp-p3/net/ipv4/af_inet.c 2007-11-15 14:44:18.000000000 -0500
+++ net-2.6-udp-p4/net/ipv4/af_inet.c 2007-11-15 14:44:21.000000000 -0500
@@ -1446,6 +1446,9 @@ static int __init inet_init(void)
/* Setup TCP slab cache for open requests. */
tcp_init();
+ /* Setup UDP memory threshold */
+ udp_init();
+
/* Add UDP-Lite (RFC 3828) */
udplite4_register();
diff -pruN net-2.6-udp-p3/net/ipv4/ip_output.c net-2.6-udp-p4/net/ipv4/ip_output.c
--- net-2.6-udp-p3/net/ipv4/ip_output.c 2007-11-15 14:44:18.000000000 -0500
+++ net-2.6-udp-p4/net/ipv4/ip_output.c 2007-11-15 14:44:21.000000000 -0500
@@ -75,6 +75,7 @@
#include <net/icmp.h>
#include <net/checksum.h>
#include <net/inetpeer.h>
+#include <net/udp.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
@@ -699,6 +700,20 @@ csum_page(struct page *page, int offset,
return csum;
}
+static inline int __ip_check_max_skb_pages(struct sock *sk, int size)
+{
+ switch(sk->sk_protocol) {
+ case IPPROTO_UDP:
+ if (atomic_read(sk->sk_prot->memory_allocated) + size
+ > sk->sk_prot->sysctl_mem[0])
+ return -ENOBUFS;
+ /* Fall through */
+ default:
+ break;
+ }
+ return 0;
+}
+
static inline int ip_ufo_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
@@ -707,16 +722,20 @@ static inline int ip_ufo_append_data(str
{
struct sk_buff *skb;
int err;
+ int size = 0;
/* There is support for UDP fragmentation offload by network
* device, so create one single skb packet containing complete
* udp datagram
*/
if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
- skb = sock_alloc_send_skb(sk,
- hh_len + fragheaderlen + transhdrlen + 20,
- (flags & MSG_DONTWAIT), &err);
+ size = hh_len + fragheaderlen + transhdrlen + 20;
+ err = __ip_check_max_skb_pages(sk, sk_datagram_pages(size));
+ if (err)
+ return err;
+ skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT),
+ &err);
if (skb == NULL)
return err;
@@ -737,6 +756,10 @@ static inline int ip_ufo_append_data(str
sk->sk_sndmsg_off = 0;
}
+ err = __ip_check_max_skb_pages(sk, sk_datagram_pages(size + length -
+ transhdrlen));
+ if (err)
+ goto fail;
err = skb_append_datato_frags(sk,skb, getfrag, from,
(length - transhdrlen));
if (!err) {
@@ -752,6 +775,7 @@ static inline int ip_ufo_append_data(str
/* There is not enough support do UFO ,
* so follow normal path
*/
+fail:
kfree_skb(skb);
return err;
}
@@ -910,6 +934,12 @@ alloc_new_skb:
if (datalen == length + fraggap)
alloclen += rt->u.dst.trailer_len;
+ err = __ip_check_max_skb_pages(sk,
+ sk_datagram_pages(SKB_DATA_ALIGN(alloclen + hh_len + 15)
+ + sizeof(struct sk_buff)));
+ if (err)
+ goto error;
+
if (transhdrlen) {
skb = sock_alloc_send_skb(sk,
alloclen + hh_len + 15,
@@ -1009,6 +1039,11 @@ alloc_new_skb:
frag = &skb_shinfo(skb)->frags[i];
}
} else if (i < MAX_SKB_FRAGS) {
+ err = __ip_check_max_skb_pages(sk,
+ sk_datagram_pages(PAGE_SIZE));
+ if (err)
+ goto error;
+
if (atomic_read(&sk->sk_wmem_alloc) + PAGE_SIZE
> 2 * sk->sk_sndbuf) {
err = -ENOBUFS;
@@ -1126,6 +1161,12 @@ ssize_t ip_append_page(struct sock *sk,
fraggap = skb_prev->len - maxfraglen;
alloclen = fragheaderlen + hh_len + fraggap + 15;
+
+ err = __ip_check_max_skb_pages(sk,
+ sk_datagram_pages(alloclen + sizeof(struct sk_buff)));
+ if (err)
+ goto error;
+
skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
if (unlikely(!skb)) {
err = -ENOBUFS;
diff -pruN net-2.6-udp-p3/net/ipv4/sysctl_net_ipv4.c net-2.6-udp-p4/net/ipv4/sysctl_net_ipv4.c
--- net-2.6-udp-p3/net/ipv4/sysctl_net_ipv4.c 2007-11-14 10:49:07.000000000 -0500
+++ net-2.6-udp-p4/net/ipv4/sysctl_net_ipv4.c 2007-11-15 14:44:21.000000000 -0500
@@ -18,6 +18,7 @@
#include <net/ip.h>
#include <net/route.h>
#include <net/tcp.h>
+#include <net/udp.h>
#include <net/cipso_ipv4.h>
#include <net/inet_frag.h>
@@ -885,6 +886,16 @@ ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "udp_mem",
+ .data = &sysctl_udp_mem,
+ .maxlen = sizeof(sysctl_udp_mem),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero
+ },
{ .ctl_name = 0 }
};
diff -pruN net-2.6-udp-p3/net/ipv4/udp.c net-2.6-udp-p4/net/ipv4/udp.c
--- net-2.6-udp-p3/net/ipv4/udp.c 2007-11-15 14:44:18.000000000 -0500
+++ net-2.6-udp-p4/net/ipv4/udp.c 2007-11-15 14:44:21.000000000 -0500
@@ -82,6 +82,7 @@
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
+#include <linux/bootmem.h>
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/module.h>
@@ -115,6 +116,7 @@ struct hlist_head udp_hash[UDP_HTABLE_SI
DEFINE_RWLOCK(udp_hash_lock);
atomic_t udp_memory_allocated;
+int sysctl_udp_mem __read_mostly;
static inline int __udp_lib_lport_inuse(__u16 num,
const struct hlist_head udptable[])
@@ -1024,6 +1026,13 @@ int udp_queue_rcv_skb(struct sock * sk,
goto drop;
}
+ if ((atomic_read(sk->sk_prot->memory_allocated)
+ + sk_datagram_pages(skb->truesize))
+ > sk->sk_prot->sysctl_mem[0]) {
+ UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag);
+ goto drop;
+ }
+
if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
@@ -1461,6 +1470,7 @@ struct proto udp_prot = {
.unhash = udp_lib_unhash,
.get_port = udp_v4_get_port,
.memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = &sysctl_udp_mem,
.obj_size = sizeof(struct udp_sock),
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
@@ -1656,6 +1666,20 @@ void udp4_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
+void __init udp_init(void)
+{
+ unsigned long limit;
+
+ /* Set the pressure threshold up by the same strategy of TCP. It is a
+ * fraction of global memory that is up to 1/2 at 256 MB, decreasing
+ * toward zero with the amount of memory, with a floor of 128 pages.
+ */
+ limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
+ limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
+ limit = max(limit, 128UL);
+ sysctl_udp_mem = limit / 2 * 3;
+}
+
EXPORT_SYMBOL(udp_disconnect);
EXPORT_SYMBOL(udp_hash);
EXPORT_SYMBOL(udp_hash_lock);
--
Hitachi Computer Products (America) Inc.
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 4/5] udp: memory limitation by using udp_mem
2007-11-15 21:50 ` [PATCH 4/5] udp: memory limitation by using udp_mem Hideo AOKI
@ 2007-11-15 23:23 ` David Miller
2007-11-16 2:12 ` Herbert Xu
2007-11-21 23:25 ` Hideo AOKI
0 siblings, 2 replies; 11+ messages in thread
From: David Miller @ 2007-11-15 23:23 UTC (permalink / raw)
To: haoki
Cc: netdev, satoshi.oshima.fk, herbert, billfink, andi, johnpol,
shemminger, yoshfuji, yumiko.sugita.yf
From: Hideo AOKI <haoki@redhat.com>
Date: Thu, 15 Nov 2007 16:50:14 -0500
> +static inline int __ip_check_max_skb_pages(struct sock *sk, int size)
> +{
> + switch(sk->sk_protocol) {
> + case IPPROTO_UDP:
> + if (atomic_read(sk->sk_prot->memory_allocated) + size
> + > sk->sk_prot->sysctl_mem[0])
> + return -ENOBUFS;
> + /* Fall through */
> + default:
> + break;
> + }
> + return 0;
> +}
> +
This check misses UDPLITE, and this whole patch set is a designed in a
way that makes errors like this easy.
These special case checks are all over the place.
We don't have tests all over the place to see if a socket is TCP or
DCCP or SCTP in order to implement memory accounting there, because we
did it for connection oriented protocols cleanly, seperating things
via callbacks etc.
I would like to see the datagram memory accounting work similarly.
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 4/5] udp: memory limitation by using udp_mem
2007-11-15 23:23 ` David Miller
@ 2007-11-16 2:12 ` Herbert Xu
2007-11-17 2:52 ` Hideo AOKI
2007-11-21 23:25 ` Hideo AOKI
1 sibling, 1 reply; 11+ messages in thread
From: Herbert Xu @ 2007-11-16 2:12 UTC (permalink / raw)
To: David Miller
Cc: haoki, netdev, satoshi.oshima.fk, billfink, andi, johnpol,
shemminger, yoshfuji, yumiko.sugita.yf
On Thu, Nov 15, 2007 at 03:23:53PM -0800, David Miller wrote:
>
> We don't have tests all over the place to see if a socket is TCP or
> DCCP or SCTP in order to implement memory accounting there, because we
> did it for connection oriented protocols cleanly, seperating things
> via callbacks etc.
>
> I would like to see the datagram memory accounting work similarly.
I agree. In fact if we adopt some of the conventions used by
stream protocols such as the use of sk_forward_alloc, we should
be able to share code with TCP accounting too.
As it is every packet updates a global counter, using sk_forward_alloc
would mean that for most packets you only update a per-socket counter
which then would feed into the global counter at points such as socket
creation and destruction.
Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 4/5] udp: memory limitation by using udp_mem
2007-11-16 2:12 ` Herbert Xu
@ 2007-11-17 2:52 ` Hideo AOKI
2007-11-17 4:01 ` David Miller
0 siblings, 1 reply; 11+ messages in thread
From: Hideo AOKI @ 2007-11-17 2:52 UTC (permalink / raw)
To: Herbert Xu, David Miller
Cc: netdev, satoshi.oshima.fk, billfink, andi, johnpol, shemminger,
yoshfuji, yumiko.sugita.yf
Herbert Xu wrote:
> On Thu, Nov 15, 2007 at 03:23:53PM -0800, David Miller wrote:
>> We don't have tests all over the place to see if a socket is TCP or
>> DCCP or SCTP in order to implement memory accounting there, because we
>> did it for connection oriented protocols cleanly, seperating things
>> via callbacks etc.
>>
>> I would like to see the datagram memory accounting work similarly.
>
> I agree. In fact if we adopt some of the conventions used by
> stream protocols such as the use of sk_forward_alloc, we should
> be able to share code with TCP accounting too.
>
> As it is every packet updates a global counter, using sk_forward_alloc
> would mean that for most packets you only update a per-socket counter
> which then would feed into the global counter at points such as socket
> creation and destruction.
>
> Cheers,
Hello,
I appreciate your comments.
I understood that memory accounting code should avoid special protocols checks.
Then, I'll improve this part in next patch set.
Many thanks,
Hideo
--
Hitachi Computer Products (America) Inc.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 4/5] udp: memory limitation by using udp_mem
2007-11-17 2:52 ` Hideo AOKI
@ 2007-11-17 4:01 ` David Miller
0 siblings, 0 replies; 11+ messages in thread
From: David Miller @ 2007-11-17 4:01 UTC (permalink / raw)
To: haoki
Cc: herbert, netdev, satoshi.oshima.fk, billfink, andi, johnpol,
shemminger, yoshfuji, yumiko.sugita.yf
From: Hideo AOKI <haoki@redhat.com>
Date: Fri, 16 Nov 2007 21:52:16 -0500
> I understood that memory accounting code should avoid special
> protocols checks. Then, I'll improve this part in next patch set.
Thank you for continuing this work, it is very much appreciated.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 4/5] udp: memory limitation by using udp_mem
2007-11-15 23:23 ` David Miller
2007-11-16 2:12 ` Herbert Xu
@ 2007-11-21 23:25 ` Hideo AOKI
1 sibling, 0 replies; 11+ messages in thread
From: Hideo AOKI @ 2007-11-21 23:25 UTC (permalink / raw)
To: netdev, herbert
Cc: David Miller, satoshi.oshima.fk, billfink, andi, johnpol,
shemminger, yoshfuji, yumiko.sugita.yf, haoki
David Miller wrote:
> From: Hideo AOKI <haoki@redhat.com>
> Date: Thu, 15 Nov 2007 16:50:14 -0500
>
>> +static inline int __ip_check_max_skb_pages(struct sock *sk, int size)
>> +{
>> + switch(sk->sk_protocol) {
>> + case IPPROTO_UDP:
>> + if (atomic_read(sk->sk_prot->memory_allocated) + size
>> + > sk->sk_prot->sysctl_mem[0])
>> + return -ENOBUFS;
>> + /* Fall through */
>> + default:
>> + break;
>> + }
>> + return 0;
>> +}
>> +
<snip>
>
> These special case checks are all over the place.
>
> We don't have tests all over the place to see if a socket is TCP or
> DCCP or SCTP in order to implement memory accounting there, because we
> did it for connection oriented protocols cleanly, seperating things
> via callbacks etc.
>
> I would like to see the datagram memory accounting work similarly.
Hello,
I'm still thinking this and focusing on enhancement of above function.
However, I feel difficulty because socket buffer allocation of UDP
sending packet is in IP layer: ip_append_data(). Moreover, the function
is called from several protocols including TCP. This makes setting
callback hard without changing function interface or core data structure.
Then, I would like to know if the following implementation could be
acceptable.
- Adding sk_datagram_{rw}mem_schedule() as a memory schedule function
for datagram protocols.
- Adding sk_wmem_schedule().
In the function, sk_stream_wmem_schedule() is called if the caller
socket is stream protocols. Moreover, sk_datagram_wmem_schedule()
is called if the socket is datagram like this:
int sk_wmem_schedule(struct sock *sk, int size)
{
...
switch (sk->sk_type) {
case SOCK_STREAM:
return sk_stream_wmem_schedule(sk, size);
case SOCK_DGRAM:
return sk_datagram_wmem_schedule(sk, size);
default:
return 1;
}
}
- In ip_append_data(), sk_wmem_schedule() is called to execute
memory accounting.
Please let me know if you have any comments about this.
Best regards,
Hideo
--
Hitachi Computer Products (America) Inc.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 5/5] udp: add udp_rmem_min and udp_wmem_min
2007-11-15 21:44 [PATCH 0/5] UDP memory accounting and limitation (take 8) Hideo AOKI
` (3 preceding siblings ...)
2007-11-15 21:50 ` [PATCH 4/5] udp: memory limitation by using udp_mem Hideo AOKI
@ 2007-11-15 21:50 ` Hideo AOKI
4 siblings, 0 replies; 11+ messages in thread
From: Hideo AOKI @ 2007-11-15 21:50 UTC (permalink / raw)
To: David Miller, netdev
Cc: Satoshi Oshima, Herbert Xu, Bill Fink, Andi Kleen,
Evgeniy Polyakov, Stephen Hemminger, yoshfuji, Yumiko Sugita
This patch added /proc/sys/net/ipv4/udp_rmem_min and
/proc/sys/net/ipv4/udp_rmem_min. Each UDP packet is drooped when the
number of pages for socket buffer is beyond the limit and the socket
already consumes minimum buffer.
Cc: Satoshi Oshima <satoshi.oshima.fk@hitachi.com>
signed-off-by: Hideo Aoki <haoki@redhat.com>
---
Documentation/networking/ip-sysctl.txt | 12 ++++++++++++
include/net/udp.h | 4 ++++
net/ipv4/ip_output.c | 4 +++-
net/ipv4/sysctl_net_ipv4.c | 20 ++++++++++++++++++++
net/ipv4/udp.c | 13 +++++++++++--
5 files changed, 50 insertions(+), 3 deletions(-)
diff -pruN net-2.6-udp-p4/Documentation/networking/ip-sysctl.txt net-2.6-udp-p5/Documentation/networking/ip-sysctl.txt
--- net-2.6-udp-p4/Documentation/networking/ip-sysctl.txt 2007-11-15 14:44:21.000000000 -0500
+++ net-2.6-udp-p5/Documentation/networking/ip-sysctl.txt 2007-11-15 14:44:23.000000000 -0500
@@ -452,6 +452,18 @@ udp_mem - INTEGER
Number of pages allowed for queueing by all UDP sockets.
Default is calculated at boot time from amount of available memory.
+udp_rmem_min - INTEGER
+ Minimal size of receive buffer used by UDP sockets. Each UDP socket
+ is able to use the size for receiving data, even if total pages of UDP
+ sockets exceed udp_mem. The unit is byte.
+ Default: 4096
+
+udp_wmem_min - INTEGER
+ Minimal size of send buffer used by UDP sockets. Each UDP socket is
+ able to use the size for sending data, even if total pages of UDP
+ sockets exceed udp_mem. The unit is byte.
+ Default: 4096
+
CIPSOv4 Variables:
cipso_cache_enable - BOOLEAN
diff -pruN net-2.6-udp-p4/include/net/udp.h net-2.6-udp-p5/include/net/udp.h
--- net-2.6-udp-p4/include/net/udp.h 2007-11-15 14:44:21.000000000 -0500
+++ net-2.6-udp-p5/include/net/udp.h 2007-11-15 14:44:23.000000000 -0500
@@ -66,7 +66,11 @@ extern rwlock_t udp_hash_lock;
extern struct proto udp_prot;
extern atomic_t udp_memory_allocated;
+
+/* sysctl variables for udp */
extern int sysctl_udp_mem;
+extern int sysctl_udp_rmem_min;
+extern int sysctl_udp_wmem_min;
struct sk_buff;
diff -pruN net-2.6-udp-p4/net/ipv4/ip_output.c net-2.6-udp-p5/net/ipv4/ip_output.c
--- net-2.6-udp-p4/net/ipv4/ip_output.c 2007-11-15 14:44:21.000000000 -0500
+++ net-2.6-udp-p5/net/ipv4/ip_output.c 2007-11-15 14:44:23.000000000 -0500
@@ -705,7 +705,9 @@ static inline int __ip_check_max_skb_pag
switch(sk->sk_protocol) {
case IPPROTO_UDP:
if (atomic_read(sk->sk_prot->memory_allocated) + size
- > sk->sk_prot->sysctl_mem[0])
+ > sk->sk_prot->sysctl_mem[0] &&
+ atomic_read(&sk->sk_wmem_alloc) + size
+ > sk->sk_prot->sysctl_wmem[0])
return -ENOBUFS;
/* Fall through */
default:
diff -pruN net-2.6-udp-p4/net/ipv4/sysctl_net_ipv4.c net-2.6-udp-p5/net/ipv4/sysctl_net_ipv4.c
--- net-2.6-udp-p4/net/ipv4/sysctl_net_ipv4.c 2007-11-15 14:44:21.000000000 -0500
+++ net-2.6-udp-p5/net/ipv4/sysctl_net_ipv4.c 2007-11-15 14:44:23.000000000 -0500
@@ -896,6 +896,26 @@ ctl_table ipv4_table[] = {
.strategy = &sysctl_intvec,
.extra1 = &zero
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "udp_rmem_min",
+ .data = &sysctl_udp_rmem_min,
+ .maxlen = sizeof(sysctl_udp_rmem_min),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "udp_wmem_min",
+ .data = &sysctl_udp_wmem_min,
+ .maxlen = sizeof(sysctl_udp_wmem_min),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero
+ },
{ .ctl_name = 0 }
};
diff -pruN net-2.6-udp-p4/net/ipv4/udp.c net-2.6-udp-p5/net/ipv4/udp.c
--- net-2.6-udp-p4/net/ipv4/udp.c 2007-11-15 14:44:21.000000000 -0500
+++ net-2.6-udp-p5/net/ipv4/udp.c 2007-11-15 14:44:23.000000000 -0500
@@ -117,6 +117,8 @@ DEFINE_RWLOCK(udp_hash_lock);
atomic_t udp_memory_allocated;
int sysctl_udp_mem __read_mostly;
+int sysctl_udp_rmem_min __read_mostly;
+int sysctl_udp_wmem_min __read_mostly;
static inline int __udp_lib_lport_inuse(__u16 num,
const struct hlist_head udptable[])
@@ -1027,8 +1029,10 @@ int udp_queue_rcv_skb(struct sock * sk,
}
if ((atomic_read(sk->sk_prot->memory_allocated)
- + sk_datagram_pages(skb->truesize))
- > sk->sk_prot->sysctl_mem[0]) {
+ + sk_datagram_pages(skb->truesize))
+ > sk->sk_prot->sysctl_mem[0] &&
+ atomic_read(&sk->sk_rmem_alloc) + skb->truesize
+ > sk->sk_prot->sysctl_rmem[0]) {
UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag);
goto drop;
}
@@ -1471,6 +1475,8 @@ struct proto udp_prot = {
.get_port = udp_v4_get_port,
.memory_allocated = &udp_memory_allocated,
.sysctl_mem = &sysctl_udp_mem,
+ .sysctl_wmem = &sysctl_udp_wmem_min,
+ .sysctl_rmem = &sysctl_udp_rmem_min,
.obj_size = sizeof(struct udp_sock),
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
@@ -1678,6 +1684,9 @@ void __init udp_init(void)
limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
limit = max(limit, 128UL);
sysctl_udp_mem = limit / 2 * 3;
+
+ sysctl_udp_rmem_min = SK_DATAGRAM_MEM_QUANTUM;
+ sysctl_udp_wmem_min = SK_DATAGRAM_MEM_QUANTUM;
}
EXPORT_SYMBOL(udp_disconnect);
--
Hitachi Computer Products (America) Inc.
^ permalink raw reply [flat|nested] 11+ messages in thread