All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hideo AOKI <haoki@redhat.com>
To: David Miller <davem@davemloft.net>, netdev <netdev@vger.kernel.org>
Cc: Hideo AOKI <haoki@redhat.com>,
	Satoshi Oshima <satoshi.oshima.fk@hitachi.com>,
	Herbert Xu <herbert@gondor.apana.org.au>,
	Bill Fink <billfink@mindspring.com>,
	Andi Kleen <andi@firstfloor.org>,
	Evgeniy Polyakov <johnpol@2ka.mipt.ru>,
	Stephen Hemminger <shemminger@linux-foundation.org>,
	yoshfuji@linux-ipv6.org,
	Yumiko Sugita <yumiko.sugita.yf@hitachi.com>
Subject: [PATCH 4/5] memory limitation by using udp_mem
Date: Tue, 13 Nov 2007 21:48:42 -0500	[thread overview]
Message-ID: <473A620A.5040300@redhat.com> (raw)
In-Reply-To: <473A5FD6.5010209@redhat.com>

This patch introduces memory limitation for UDP.

signed-off-by: Satoshi Oshima <satoshi.oshima.fk@hitachi.com>
signed-off-by: Hideo Aoki <haoki@redhat.com>
---

  Documentation/networking/ip-sysctl.txt |    6 ++++
  include/net/udp.h                      |    3 ++
  net/ipv4/af_inet.c                     |    3 ++
  net/ipv4/ip_output.c                   |   47 ++++++++++++++++++++++++++++++---
  net/ipv4/sysctl_net_ipv4.c             |   11 +++++++
  net/ipv4/udp.c                         |   24 ++++++++++++++++
  6 files changed, 91 insertions(+), 3 deletions(-)

diff -pruN net-2.6-udp-p3/Documentation/networking/ip-sysctl.txt net-2.6-udp-p4/Documentation/networking/ip-sysctl.txt
--- net-2.6-udp-p3/Documentation/networking/ip-sysctl.txt	2007-11-13 08:19:30.000000000 -0500
+++ net-2.6-udp-p4/Documentation/networking/ip-sysctl.txt	2007-11-13 16:12:26.000000000 -0500
@@ -446,6 +446,12 @@ tcp_dma_copybreak - INTEGER
  	and CONFIG_NET_DMA is enabled.
  	Default: 4096

+UDP variables:
+
+udp_mem - INTEGER
+	Number of pages allowed for queueing by all UDP sockets.
+	Default is calculated at boot time from amount of available memory.
+
  CIPSOv4 Variables:

  cipso_cache_enable - BOOLEAN
diff -pruN net-2.6-udp-p3/include/net/udp.h net-2.6-udp-p4/include/net/udp.h
--- net-2.6-udp-p3/include/net/udp.h	2007-11-13 16:10:05.000000000 -0500
+++ net-2.6-udp-p4/include/net/udp.h	2007-11-13 16:12:26.000000000 -0500
@@ -66,6 +66,7 @@ extern rwlock_t udp_hash_lock;
  extern struct proto udp_prot;

  extern atomic_t udp_memory_allocated;
+extern int sysctl_udp_mem;

  struct sk_buff;

@@ -175,4 +176,6 @@ extern void udp_proc_unregister(struct u
  extern int  udp4_proc_init(void);
  extern void udp4_proc_exit(void);
  #endif
+
+extern void udp_init(void);
  #endif	/* _UDP_H */
diff -pruN net-2.6-udp-p3/net/ipv4/af_inet.c net-2.6-udp-p4/net/ipv4/af_inet.c
--- net-2.6-udp-p3/net/ipv4/af_inet.c	2007-11-13 16:12:24.000000000 -0500
+++ net-2.6-udp-p4/net/ipv4/af_inet.c	2007-11-13 16:12:26.000000000 -0500
@@ -1446,6 +1446,9 @@ static int __init inet_init(void)
  	/* Setup TCP slab cache for open requests. */
  	tcp_init();

+	/* Setup UDP memory threshold */
+	udp_init();
+
  	/* Add UDP-Lite (RFC 3828) */
  	udplite4_register();

diff -pruN net-2.6-udp-p3/net/ipv4/ip_output.c net-2.6-udp-p4/net/ipv4/ip_output.c
--- net-2.6-udp-p3/net/ipv4/ip_output.c	2007-11-13 16:12:24.000000000 -0500
+++ net-2.6-udp-p4/net/ipv4/ip_output.c	2007-11-13 16:12:26.000000000 -0500
@@ -75,6 +75,7 @@
  #include <net/icmp.h>
  #include <net/checksum.h>
  #include <net/inetpeer.h>
+#include <net/udp.h>
  #include <linux/igmp.h>
  #include <linux/netfilter_ipv4.h>
  #include <linux/netfilter_bridge.h>
@@ -699,6 +700,20 @@ csum_page(struct page *page, int offset,
  	return csum;
  }

+static inline int __ip_check_max_skb_pages(struct sock *sk, int size)
+{
+	switch(sk->sk_protocol) {
+	case IPPROTO_UDP:
+		if (atomic_read(sk->sk_prot->memory_allocated) + size
+		    > sk->sk_prot->sysctl_mem[0])
+			return -ENOBUFS;
+		/* Fall through */	
+	default:
+		break;
+	}
+	return 0;
+}
+
  static inline int ip_ufo_append_data(struct sock *sk,
  			int getfrag(void *from, char *to, int offset, int len,
  			       int odd, struct sk_buff *skb),
@@ -707,16 +722,20 @@ static inline int ip_ufo_append_data(str
  {
  	struct sk_buff *skb;
  	int err;
+	int size = 0;

  	/* There is support for UDP fragmentation offload by network
  	 * device, so create one single skb packet containing complete
  	 * udp datagram
  	 */
  	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
-		skb = sock_alloc_send_skb(sk,
-			hh_len + fragheaderlen + transhdrlen + 20,
-			(flags & MSG_DONTWAIT), &err);
+		size = hh_len + fragheaderlen + transhdrlen + 20;
+		err = __ip_check_max_skb_pages(sk, sk_datagram_pages(size));
+		if (err)
+			return err;

+		skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT),
+					  &err);
  		if (skb == NULL)
  			return err;

@@ -737,6 +756,10 @@ static inline int ip_ufo_append_data(str
  		sk->sk_sndmsg_off = 0;
  	}

+	err = __ip_check_max_skb_pages(sk, sk_datagram_pages(size + length -
+							     transhdrlen));
+	if (err)
+		goto fail;
  	err = skb_append_datato_frags(sk,skb, getfrag, from,
  			       (length - transhdrlen));
  	if (!err) {
@@ -752,6 +775,7 @@ static inline int ip_ufo_append_data(str
  	/* There is not enough support do UFO ,
  	 * so follow normal path
  	 */
+fail:
  	kfree_skb(skb);
  	return err;
  }
@@ -910,6 +934,12 @@ alloc_new_skb:
  			if (datalen == length + fraggap)
  				alloclen += rt->u.dst.trailer_len;

+			err = __ip_check_max_skb_pages(sk,
+				sk_datagram_pages(SKB_DATA_ALIGN(alloclen + hh_len + 15)
+				+ sizeof(struct sk_buff)));
+			if (err)
+				goto error;
+
  			if (transhdrlen) {
  				skb = sock_alloc_send_skb(sk,
  						alloclen + hh_len + 15,
@@ -1009,6 +1039,11 @@ alloc_new_skb:
  					frag = &skb_shinfo(skb)->frags[i];
  				}
  			} else if (i < MAX_SKB_FRAGS) {
+				err = __ip_check_max_skb_pages(sk,
+					sk_datagram_pages(PAGE_SIZE));
+				if (err)
+					goto error;
+
  				if (atomic_read(&sk->sk_wmem_alloc) + PAGE_SIZE
  				    > 2 * sk->sk_sndbuf) {
  					err = -ENOBUFS;
@@ -1126,6 +1161,12 @@ ssize_t	ip_append_page(struct sock *sk,
  			fraggap = skb_prev->len - maxfraglen;

  			alloclen = fragheaderlen + hh_len + fraggap + 15;
+
+			err = __ip_check_max_skb_pages(sk,
+				sk_datagram_pages(alloclen + sizeof(struct sk_buff)));
+			if (err)
+				goto error;
+
  			skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
  			if (unlikely(!skb)) {
  				err = -ENOBUFS;
diff -pruN net-2.6-udp-p3/net/ipv4/sysctl_net_ipv4.c net-2.6-udp-p4/net/ipv4/sysctl_net_ipv4.c
--- net-2.6-udp-p3/net/ipv4/sysctl_net_ipv4.c	2007-11-13 08:19:57.000000000 -0500
+++ net-2.6-udp-p4/net/ipv4/sysctl_net_ipv4.c	2007-11-13 16:12:26.000000000 -0500
@@ -18,6 +18,7 @@
  #include <net/ip.h>
  #include <net/route.h>
  #include <net/tcp.h>
+#include <net/udp.h>
  #include <net/cipso_ipv4.h>
  #include <net/inet_frag.h>

@@ -885,6 +886,16 @@ ctl_table ipv4_table[] = {
  		.mode		= 0644,
  		.proc_handler	= &proc_dointvec,
  	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "udp_mem",
+		.data		= &sysctl_udp_mem,
+		.maxlen		= sizeof(sysctl_udp_mem),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero
+	},
  	{ .ctl_name = 0 }
  };

diff -pruN net-2.6-udp-p3/net/ipv4/udp.c net-2.6-udp-p4/net/ipv4/udp.c
--- net-2.6-udp-p3/net/ipv4/udp.c	2007-11-13 16:12:24.000000000 -0500
+++ net-2.6-udp-p4/net/ipv4/udp.c	2007-11-13 16:12:26.000000000 -0500
@@ -82,6 +82,7 @@
  #include <asm/system.h>
  #include <asm/uaccess.h>
  #include <asm/ioctls.h>
+#include <linux/bootmem.h>
  #include <linux/types.h>
  #include <linux/fcntl.h>
  #include <linux/module.h>
@@ -115,6 +116,7 @@ struct hlist_head udp_hash[UDP_HTABLE_SI
  DEFINE_RWLOCK(udp_hash_lock);

  atomic_t udp_memory_allocated;
+int sysctl_udp_mem __read_mostly;

  static inline int __udp_lib_lport_inuse(__u16 num,
  					const struct hlist_head udptable[])
@@ -1023,6 +1025,13 @@ int udp_queue_rcv_skb(struct sock * sk,
  			goto drop;
  	}

+	if ((atomic_read(sk->sk_prot->memory_allocated)
+		       + sk_datagram_pages(skb->truesize))
+		> sk->sk_prot->sysctl_mem[0]) {
+		UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag);
+		goto drop;
+	}
+
  	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
  		/* Note that an ENOMEM error is charged twice */
  		if (rc == -ENOMEM)
@@ -1460,6 +1469,7 @@ struct proto udp_prot = {
  	.unhash		   = udp_lib_unhash,
  	.get_port	   = udp_v4_get_port,
  	.memory_allocated  = &udp_memory_allocated,
+	.sysctl_mem	   = &sysctl_udp_mem,
  	.obj_size	   = sizeof(struct udp_sock),
  #ifdef CONFIG_COMPAT
  	.compat_setsockopt = compat_udp_setsockopt,
@@ -1655,6 +1665,20 @@ void udp4_proc_exit(void)
  }
  #endif /* CONFIG_PROC_FS */

+void __init udp_init(void)
+{
+	unsigned long limit;
+
+	/* Set the pressure threshold up by the same strategy of TCP. It is a
+	 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
+	 * toward zero with the amount of memory, with a floor of 128 pages.
+	 */
+	limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
+	limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
+	limit = max(limit, 128UL);
+	sysctl_udp_mem = limit / 2 * 3;
+}
+
  EXPORT_SYMBOL(udp_disconnect);
  EXPORT_SYMBOL(udp_hash);
  EXPORT_SYMBOL(udp_hash_lock);
--
Hideo Aoki
Hitachi Computer Products (America) Inc.

  parent reply	other threads:[~2007-11-14  2:50 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-11-14  2:39 [PATCH 0/5] UDP memory accounting and limitation (take 7) Hideo AOKI
2007-11-14  2:47 ` [PATCH 1/5] fix send buffer check Hideo AOKI
2007-11-14  2:48 ` [PATCH 2/5] accounting unit and variable Hideo AOKI
2007-11-14  2:48 ` [PATCH 3/5] memory accounting Hideo AOKI
2007-11-14  2:48 ` Hideo AOKI [this message]
2007-11-14  2:49 ` [PATCH 5/5] add udp_rmem_min and udp_wmem_min Hideo AOKI
2007-11-14  4:32 ` [PATCH 0/5] UDP memory accounting and limitation (take 7) David Miller
  -- strict thread matches above, loose matches on Subject: below --
2007-10-29 21:18 [PATCH 0/5] UDP memory accounting and limitation (take 6) Hideo AOKI
2007-10-29 21:23 ` [PATCH 4/5] memory limitation by using udp_mem Hideo AOKI

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=473A620A.5040300@redhat.com \
    --to=haoki@redhat.com \
    --cc=andi@firstfloor.org \
    --cc=billfink@mindspring.com \
    --cc=davem@davemloft.net \
    --cc=herbert@gondor.apana.org.au \
    --cc=johnpol@2ka.mipt.ru \
    --cc=netdev@vger.kernel.org \
    --cc=satoshi.oshima.fk@hitachi.com \
    --cc=shemminger@linux-foundation.org \
    --cc=yoshfuji@linux-ipv6.org \
    --cc=yumiko.sugita.yf@hitachi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.