All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org, netdev@vger.kernel.org
Cc: Daniel Phillips <phillips@google.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [RFC][PATCH 2/9] deadlock prevention core
Date: Tue, 08 Aug 2006 21:33:45 +0200	[thread overview]
Message-ID: <20060808193345.1396.16773.sendpatchset@lappy> (raw)
In-Reply-To: <20060808193325.1396.58813.sendpatchset@lappy>


The core of the VM deadlock avoidance framework.

>From the 'user' side of things it provides a function to mark a 'struct sock'
as SOCK_MEMALLOC, meaning this socket may dip into the memalloc reserves on
the receive side.

>From the net_device side of things, the extra 'struct net_device *' argument
to {,__}netdev_alloc_skb() is used to attribute/account the memalloc usage.
Converted drivers will make use of this new API and will set NETIF_F_MEMALLOC
to indicate the driver fully supports this feature.

When a SOCK_MEMALLOC socket is marked, the device is checked for this feature
and tries to increase the memalloc pool; if both succeed, the device is marked
with IFF_MEMALLOC, indicating to {,__}netdev_alloc_skb() that it is OK to dip
into the memalloc pool.

Memalloc sk_buff allocations are not done from the SLAB but are done using 
alloc_pages(). sk_buff::memalloc records this exception so that kfree_skbmem()
can do the right thing.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Daniel Phillips <phillips@google.com>

---
 include/linux/gfp.h       |    3 -
 include/linux/if.h        |    1 
 include/linux/mmzone.h    |    1 
 include/linux/netdevice.h |   48 ++++++++++++++-----
 include/linux/skbuff.h    |    3 -
 include/net/sock.h        |    8 +++
 mm/page_alloc.c           |   29 ++++++++++-
 net/core/dev.c            |    1 
 net/core/skbuff.c         |  114 +++++++++++++++++++++++++++++++++++++++++++---
 net/core/sock.c           |   54 +++++++++++++++++++++
 net/ethernet/eth.c        |    1 
 net/ipv4/af_inet.c        |   14 +++++
 net/ipv4/icmp.c           |    5 ++
 net/ipv4/tcp_ipv4.c       |    6 ++
 net/ipv4/udp.c            |   11 ++++
 15 files changed, 274 insertions(+), 25 deletions(-)

Index: linux-2.6/include/linux/gfp.h
===================================================================
--- linux-2.6.orig/include/linux/gfp.h
+++ linux-2.6/include/linux/gfp.h
@@ -46,6 +46,7 @@ struct vm_area_struct;
 #define __GFP_ZERO	((__force gfp_t)0x8000u)/* Return zeroed page on success */
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
 #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_MEMALLOC  ((__force gfp_t)0x40000u) /* Use emergency reserves */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -54,7 +55,7 @@ struct vm_area_struct;
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-			__GFP_NOMEMALLOC|__GFP_HARDWALL)
+			__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_MEMALLOC)
 
 /* This equals 0, but use constants in case they ever change */
 #define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
Index: linux-2.6/include/linux/mmzone.h
===================================================================
--- linux-2.6.orig/include/linux/mmzone.h
+++ linux-2.6/include/linux/mmzone.h
@@ -420,6 +420,7 @@ int percpu_pagelist_fraction_sysctl_hand
 					void __user *, size_t *, loff_t *);
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
 			struct file *, void __user *, size_t *, loff_t *);
+int adjust_memalloc_reserve(int bytes);
 
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
Index: linux-2.6/include/linux/netdevice.h
===================================================================
--- linux-2.6.orig/include/linux/netdevice.h
+++ linux-2.6/include/linux/netdevice.h
@@ -298,18 +298,22 @@ struct net_device
 
 	/* Net device features */
 	unsigned long		features;
-#define NETIF_F_SG		1	/* Scatter/gather IO. */
-#define NETIF_F_IP_CSUM		2	/* Can checksum only TCP/UDP over IPv4. */
-#define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
-#define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
-#define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
-#define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
-#define NETIF_F_HW_VLAN_TX	128	/* Transmit VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_RX	256	/* Receive VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_FILTER	512	/* Receive filtering on VLAN */
-#define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
-#define NETIF_F_GSO		2048	/* Enable software GSO. */
-#define NETIF_F_LLTX		4096	/* LockLess TX */
+#define NETIF_F_SG		0x0001	/* Scatter/gather IO. */
+#define NETIF_F_IP_CSUM		0x0002	/* Can checksum only TCP/UDP over IPv4. */
+#define NETIF_F_NO_CSUM		0x0004	/* Does not require checksum. F.e. loopack. */
+#define NETIF_F_HW_CSUM		0x0008	/* Can checksum all the packets. */
+
+#define NETIF_F_HIGHDMA		0x0010	/* Can DMA to high memory. */
+#define NETIF_F_FRAGLIST	0x0020	/* Scatter/gather IO. */
+#define NETIF_F_HW_VLAN_TX	0x0040	/* Transmit VLAN hw acceleration */
+#define NETIF_F_HW_VLAN_RX	0x0080	/* Receive VLAN hw acceleration */
+
+#define NETIF_F_HW_VLAN_FILTER	0x0100	/* Receive filtering on VLAN */
+#define NETIF_F_VLAN_CHALLENGED	0x0200	/* Device cannot handle VLAN packets */
+#define NETIF_F_GSO		0x0400	/* Enable software GSO. */
+#define NETIF_F_LLTX		0x0800	/* LockLess TX */
+
+#define NETIF_F_MEMALLOC	0x1000	/* Supports {SOCK,__GFP}_MEMALLOC */
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
@@ -409,6 +413,12 @@ struct net_device
 	struct Qdisc		*qdisc_sleeping;
 	struct list_head	qdisc_list;
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
+	int			rx_reserve;
+	atomic_t		rx_reserve_used;
+
+	atomic_t		memalloc_socks;
+	unsigned long		memalloc_reserve;
+	spinlock_t		memalloc_lock;
 
 	/* Partially transmitted GSO packet. */
 	struct sk_buff		*gso_skb;
@@ -686,6 +696,20 @@ static inline void dev_kfree_skb_irq(str
  */
 extern void dev_kfree_skb_any(struct sk_buff *skb);
 
+/*
+ * Support for critical network IO under low memory conditions
+ */
+static inline int dev_reserve_used(struct net_device *dev)
+{
+	return atomic_read(&dev->rx_reserve_used);
+}
+
+static inline void dev_unreserve_skb(struct net_device *dev)
+{
+	if (atomic_dec_return(&dev->rx_reserve_used) < 0)
+		atomic_inc(&dev->rx_reserve_used);
+}
+
 #define HAVE_NETIF_RX 1
 extern int		netif_rx(struct sk_buff *skb);
 extern int		netif_rx_ni(struct sk_buff *skb);
Index: linux-2.6/include/linux/skbuff.h
===================================================================
--- linux-2.6.orig/include/linux/skbuff.h
+++ linux-2.6/include/linux/skbuff.h
@@ -282,7 +282,8 @@ struct sk_buff {
 				nfctinfo:3;
 	__u8			pkt_type:3,
 				fclone:2,
-				ipvs_property:1;
+				ipvs_property:1,
+				memalloc:1;
 	__be16			protocol;
 
 	void			(*destructor)(struct sk_buff *skb);
Index: linux-2.6/include/net/sock.h
===================================================================
--- linux-2.6.orig/include/net/sock.h
+++ linux-2.6/include/net/sock.h
@@ -391,6 +391,7 @@ enum sock_flags {
 	SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */
 	SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
 	SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
+	SOCK_MEMALLOC, /* protocol can use memalloc reserve */
 };
 
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
@@ -413,6 +414,13 @@ static inline int sock_flag(struct sock 
 	return test_bit(flag, &sk->sk_flags);
 }
 
+static inline int sk_is_memalloc(struct sock *sk)
+{
+	return sock_flag(sk, SOCK_MEMALLOC);
+}
+
+extern int sk_set_memalloc(struct sock *sk);
+
 static inline void sk_acceptq_removed(struct sock *sk)
 {
 	sk->sk_ack_backlog--;
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -82,6 +82,7 @@ EXPORT_SYMBOL(zone_table);
 
 static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
 int min_free_kbytes = 1024;
+int var_free_kbytes;
 
 unsigned long __meminitdata nr_kernel_pages;
 unsigned long __meminitdata nr_all_pages;
@@ -970,8 +971,8 @@ restart:
 
 	/* This allocation should allow future memory freeing. */
 
-	if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
-			&& !in_interrupt()) {
+	if ((((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
+			&& !in_interrupt()) || (gfp_mask & __GFP_MEMALLOC)) {
 		if (!(gfp_mask & __GFP_NOMEMALLOC)) {
 nofail_alloc:
 			/* go through the zonelist yet again, ignoring mins */
@@ -2196,7 +2197,8 @@ static void setup_per_zone_lowmem_reserv
  */
 void setup_per_zone_pages_min(void)
 {
-	unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
+	unsigned pages_min = (min_free_kbytes + var_free_kbytes)
+		>> (PAGE_SHIFT - 10);
 	unsigned long lowmem_pages = 0;
 	struct zone *zone;
 	unsigned long flags;
@@ -2248,6 +2250,27 @@ void setup_per_zone_pages_min(void)
 	calculate_totalreserve_pages();
 }
 
+int adjust_memalloc_reserve(int pages)
+{
+	int kbytes = var_free_kbytes + (pages << (PAGE_SHIFT - 10));
+	if (kbytes < 0)
+		return -EINVAL;
+	var_free_kbytes = kbytes;
+	setup_per_zone_pages_min();
+	if (pages > 0) {
+		int i;
+		pg_data_t *pgdat;
+		for_each_online_pgdat(pgdat) {
+			for (i = 0; i < pgdat->nr_zones; ++i)
+				wakeup_kswapd(&pgdat->node_zones[i], 0);
+		}
+	}
+	printk(KERN_DEBUG "RX reserve: %d\n", var_free_kbytes);
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(adjust_memalloc_reserve);
+
 /*
  * Initialise min_free_kbytes.
  *
Index: linux-2.6/net/core/skbuff.c
===================================================================
--- linux-2.6.orig/net/core/skbuff.c
+++ linux-2.6/net/core/skbuff.c
@@ -43,6 +43,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/pagemap.h>
 #include <linux/interrupt.h>
 #include <linux/in.h>
 #include <linux/inet.h>
@@ -125,6 +126,8 @@ EXPORT_SYMBOL(skb_truesize_bug);
  *
  */
 
+#define ceiling_log2(x)	fls((x) - 1)
+
 /**
  *	__alloc_skb	-	allocate a network buffer
  *	@size: size to allocate
@@ -147,6 +150,49 @@ struct sk_buff *__alloc_skb(unsigned int
 	struct sk_buff *skb;
 	u8 *data;
 
+	size = SKB_DATA_ALIGN(size);
+
+	if (gfp_mask & __GFP_MEMALLOC) {
+		/*
+		 * We have to do higher order allocations for icky jumbo
+		 * frame drivers :-(
+		 * They really should be migrated to scater/gather DMA
+		 * and use skb fragments.
+		 */
+		unsigned int data_offset =
+			sizeof(struct sk_buff) + sizeof(unsigned int);
+		unsigned long length = size + data_offset +
+			sizeof(struct skb_shared_info);
+		unsigned int pages;
+		unsigned int order;
+		struct page *page;
+		void *kaddr;
+
+		/*
+		 * force fclone alloc in order to fudge a lacking in skb_clone().
+		 */
+		fclone = 1;
+		if (fclone) {
+			data_offset += sizeof(struct sk_buff) + sizeof(atomic_t);
+			length += sizeof(struct sk_buff) + sizeof(atomic_t);
+		}
+		pages = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
+		order = ceiling_log2(pages);
+
+		skb = NULL;
+		if (!(page = alloc_pages(gfp_mask & ~__GFP_HIGHMEM, order)))
+			goto out;
+
+		kaddr = pfn_to_kaddr(page_to_pfn(page));
+		skb = (struct sk_buff *)kaddr;
+
+		*((unsigned int *)(kaddr + data_offset -
+					sizeof(unsigned int))) = order;
+		data = (u8 *)(kaddr + data_offset);
+
+		goto allocated;
+	}
+
 	cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
 
 	/* Get the HEAD */
@@ -155,12 +201,13 @@ struct sk_buff *__alloc_skb(unsigned int
 		goto out;
 
 	/* Get the DATA. Size must match skb_add_mtu(). */
-	size = SKB_DATA_ALIGN(size);
 	data = ____kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
 	if (!data)
 		goto nodata;
 
+allocated:
 	memset(skb, 0, offsetof(struct sk_buff, truesize));
+	skb->memalloc = !!(gfp_mask & __GFP_MEMALLOC);
 	skb->truesize = size + sizeof(struct sk_buff);
 	atomic_set(&skb->users, 1);
 	skb->head = data;
@@ -185,6 +232,7 @@ struct sk_buff *__alloc_skb(unsigned int
 		atomic_set(fclone_ref, 1);
 
 		child->fclone = SKB_FCLONE_UNAVAILABLE;
+		child->memalloc = skb->memalloc;
 	}
 out:
 	return skb;
@@ -250,7 +298,7 @@ nodata:
 }
 
 /**
- *	__netdev_alloc_skb - allocate an skbuff for rx on a specific device
+ *	___netdev_alloc_skb - allocate an skbuff for rx on a specific device
  *	@dev: network device to receive on
  *	@length: length to allocate
  *	@gfp_mask: get_free_pages mask, passed to alloc_skb
@@ -262,7 +310,7 @@ nodata:
  *
  *	%NULL is returned if there is no free memory.
  */
-struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
+static struct sk_buff *___netdev_alloc_skb(struct net_device *dev,
 		unsigned int length, gfp_t gfp_mask)
 {
 	struct sk_buff *skb;
@@ -273,6 +321,34 @@ struct sk_buff *__netdev_alloc_skb(struc
 	return skb;
 }
 
+struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
+		unsigned length, gfp_t gfp_mask)
+{
+	struct sk_buff *skb;
+
+	if (dev && (dev->flags & IFF_MEMALLOC)) {
+		WARN_ON(gfp_mask & (__GFP_NOMEMALLOC | __GFP_MEMALLOC));
+		gfp_mask &= ~(__GFP_NOMEMALLOC | __GFP_MEMALLOC);
+
+		if ((skb = ___netdev_alloc_skb(dev, length,
+					       gfp_mask | __GFP_NOMEMALLOC)))
+			goto done;
+		if (dev_reserve_used(dev) >= dev->rx_reserve)
+			goto out;
+		if (!(skb = ___netdev_alloc_skb(dev, length,
+						gfp_mask | __GFP_MEMALLOC)))
+			goto out;
+		atomic_inc(&dev->rx_reserve_used);
+	} else
+		if (!(skb = ___netdev_alloc_skb(dev, length, gfp_mask)))
+			goto out;
+
+done:
+	skb->dev = dev;
+out:
+	return skb;
+}
+
 static void skb_drop_list(struct sk_buff **listp)
 {
 	struct sk_buff *list = *listp;
@@ -313,10 +389,19 @@ static void skb_release_data(struct sk_b
 		if (skb_shinfo(skb)->frag_list)
 			skb_drop_fraglist(skb);
 
-		kfree(skb->head);
+		if (!skb->memalloc)
+			kfree(skb->head);
 	}
 }
 
+static void free_skb_pages(struct kmem_cache *cache, void *objp)
+{
+	struct sk_buff *skb = (struct sk_buff *)objp;
+	unsigned int order =
+		*(unsigned int *)(skb->head - sizeof(unsigned int));
+	free_pages((unsigned long)skb, order);
+}
+
 /*
  *	Free an skbuff by memory without cleaning the state.
  */
@@ -324,17 +409,21 @@ void kfree_skbmem(struct sk_buff *skb)
 {
 	struct sk_buff *other;
 	atomic_t *fclone_ref;
+	void (*free_skb)(struct kmem_cache *, void *);
 
 	skb_release_data(skb);
+
+	free_skb = skb->memalloc ? free_skb_pages : kmem_cache_free;
+
 	switch (skb->fclone) {
 	case SKB_FCLONE_UNAVAILABLE:
-		kmem_cache_free(skbuff_head_cache, skb);
+		free_skb(skbuff_head_cache, skb);
 		break;
 
 	case SKB_FCLONE_ORIG:
 		fclone_ref = (atomic_t *) (skb + 2);
 		if (atomic_dec_and_test(fclone_ref))
-			kmem_cache_free(skbuff_fclone_cache, skb);
+			free_skb(skbuff_fclone_cache, skb);
 		break;
 
 	case SKB_FCLONE_CLONE:
@@ -347,7 +436,7 @@ void kfree_skbmem(struct sk_buff *skb)
 		skb->fclone = SKB_FCLONE_UNAVAILABLE;
 
 		if (atomic_dec_and_test(fclone_ref))
-			kmem_cache_free(skbuff_fclone_cache, other);
+			free_skb(skbuff_fclone_cache, other);
 		break;
 	};
 }
@@ -363,6 +452,8 @@ void kfree_skbmem(struct sk_buff *skb)
 
 void __kfree_skb(struct sk_buff *skb)
 {
+	struct net_device *dev = skb->dev;
+
 	dst_release(skb->dst);
 #ifdef CONFIG_XFRM
 	secpath_put(skb->sp);
@@ -389,6 +480,8 @@ void __kfree_skb(struct sk_buff *skb)
 #endif
 
 	kfree_skbmem(skb);
+	if (dev && (dev->flags & IFF_MEMALLOC))
+		dev_unreserve_skb(dev);
 }
 
 /**
@@ -434,10 +527,15 @@ struct sk_buff *skb_clone(struct sk_buff
 		n->fclone = SKB_FCLONE_CLONE;
 		atomic_inc(fclone_ref);
 	} else {
+		/*
+		 * should we special-case skb->memalloc cloning?
+		 * for now fudge it by forcing fast-clone alloc.
+		 */
 		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
 		if (!n)
 			return NULL;
 		n->fclone = SKB_FCLONE_UNAVAILABLE;
+		n->memalloc = 0;
 	}
 
 #define C(x) n->x = skb->x
@@ -686,6 +784,8 @@ int pskb_expand_head(struct sk_buff *skb
 	if (skb_shared(skb))
 		BUG();
 
+	BUG_ON(skb->memalloc);
+
 	size = SKB_DATA_ALIGN(size);
 
 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
Index: linux-2.6/net/ethernet/eth.c
===================================================================
--- linux-2.6.orig/net/ethernet/eth.c
+++ linux-2.6/net/ethernet/eth.c
@@ -275,6 +275,7 @@ void ether_setup(struct net_device *dev)
 	dev->mtu		= ETH_DATA_LEN;
 	dev->addr_len		= ETH_ALEN;
 	dev->tx_queue_len	= 1000;	/* Ethernet wants good queues */	
+	dev->rx_reserve		= 384;
 	dev->flags		= IFF_BROADCAST|IFF_MULTICAST;
 	
 	memset(dev->broadcast,0xFF, ETH_ALEN);
Index: linux-2.6/net/ipv4/icmp.c
===================================================================
--- linux-2.6.orig/net/ipv4/icmp.c
+++ linux-2.6/net/ipv4/icmp.c
@@ -938,6 +938,11 @@ int icmp_rcv(struct sk_buff *skb)
 			goto error;
 	}
 
+	if (unlikely(dev_reserve_used(skb->dev))) {
+		dev_unreserve_skb(skb->dev);
+		goto drop;
+	}
+
 	if (!pskb_pull(skb, sizeof(struct icmphdr)))
 		goto error;
 
Index: linux-2.6/net/ipv4/tcp_ipv4.c
===================================================================
--- linux-2.6.orig/net/ipv4/tcp_ipv4.c
+++ linux-2.6/net/ipv4/tcp_ipv4.c
@@ -1093,6 +1093,12 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!sk)
 		goto no_tcp_socket;
 
+	if (unlikely(dev_reserve_used(skb->dev))) {
+		dev_unreserve_skb(skb->dev);
+		if (!sk_is_memalloc(sk))
+			goto discard_and_relse;
+	}
+
 process:
 	if (sk->sk_state == TCP_TIME_WAIT)
 		goto do_time_wait;
Index: linux-2.6/net/ipv4/udp.c
===================================================================
--- linux-2.6.orig/net/ipv4/udp.c
+++ linux-2.6/net/ipv4/udp.c
@@ -1136,7 +1136,15 @@ int udp_rcv(struct sk_buff *skb)
 	sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
 
 	if (sk != NULL) {
-		int ret = udp_queue_rcv_skb(sk, skb);
+		int ret;
+
+		if (unlikely(dev_reserve_used(skb->dev))) {
+			dev_unreserve_skb(skb->dev);
+			if (!sk_is_memalloc(sk))
+				goto drop_noncritical;
+		}
+
+		ret = udp_queue_rcv_skb(sk, skb);
 		sock_put(sk);
 
 		/* a return value > 0 means to resubmit the input, but
@@ -1147,6 +1155,7 @@ int udp_rcv(struct sk_buff *skb)
 		return 0;
 	}
 
+drop_noncritical:
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto drop;
 	nf_reset(skb);
Index: linux-2.6/include/linux/if.h
===================================================================
--- linux-2.6.orig/include/linux/if.h
+++ linux-2.6/include/linux/if.h
@@ -49,6 +49,7 @@
 
 #define IFF_LOWER_UP	0x10000		/* driver signals L1 up		*/
 #define IFF_DORMANT	0x20000		/* driver signals dormant	*/
+#define IFF_MEMALLOC	0x40000		/* driver has a reserve allocated */
 
 #define IFF_VOLATILE	(IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|\
 		IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)
Index: linux-2.6/net/core/dev.c
===================================================================
--- linux-2.6.orig/net/core/dev.c
+++ linux-2.6/net/core/dev.c
@@ -2900,6 +2900,7 @@ int register_netdevice(struct net_device
 #ifdef CONFIG_NET_CLS_ACT
 	spin_lock_init(&dev->ingress_lock);
 #endif
+	spin_lock_init(&dev->memalloc_lock);
 
 	ret = alloc_divert_blk(dev);
 	if (ret)
Index: linux-2.6/net/ipv4/af_inet.c
===================================================================
--- linux-2.6.orig/net/ipv4/af_inet.c
+++ linux-2.6/net/ipv4/af_inet.c
@@ -131,6 +131,20 @@ static DEFINE_SPINLOCK(inetsw_lock);
 void inet_sock_destruct(struct sock *sk)
 {
 	struct inet_sock *inet = inet_sk(sk);
+	struct net_device *dev = ip_dev_find(inet->rcv_saddr);
+
+	if (dev && (dev->flags & IFF_MEMALLOC) &&
+			sk_is_memalloc(sk) &&
+			(atomic_read(&dev->memalloc_socks) == 1)) {
+		spin_lock(&dev->memalloc_lock);
+		if (atomic_dec_and_test(&dev->memalloc_socks)) {
+			dev->flags &= ~IFF_MEMALLOC;
+			WARN_ON(dev_reserve_used(dev));
+			atomic_set(&dev->rx_reserve_used, 0);
+			adjust_memalloc_reserve(-dev->memalloc_reserve);
+		}
+		spin_unlock(&dev->memalloc_lock);
+	}
 
 	__skb_queue_purge(&sk->sk_receive_queue);
 	__skb_queue_purge(&sk->sk_error_queue);
Index: linux-2.6/net/core/sock.c
===================================================================
--- linux-2.6.orig/net/core/sock.c
+++ linux-2.6/net/core/sock.c
@@ -111,6 +111,7 @@
 #include <linux/poll.h>
 #include <linux/tcp.h>
 #include <linux/init.h>
+#include <linux/inetdevice.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -195,6 +196,59 @@ __u32 sysctl_rmem_default = SK_RMEM_MAX;
 /* Maximal space eaten by iovec or ancilliary data plus some space */
 int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
 
+#define ceiling_log2(x) fls((x) - 1)
+
+static inline unsigned int skb_pages(unsigned int mtu)
+{
+	unsigned int pages = (mtu + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	unsigned int order = ceiling_log2(pages);
+	pages = 1 << order;
+	if (pages > 1) ++pages;
+
+	return pages;
+}
+
+int sk_set_memalloc(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct net_device *dev = ip_dev_find(inet->rcv_saddr);
+	int err = 0;
+
+	if (!dev)
+		return -ENODEV;
+
+	if (!(dev->features & NETIF_F_MEMALLOC)) {
+		err = -EPERM;
+		goto out;
+	}
+
+	if (atomic_read(&dev->memalloc_socks) == 0) {
+		spin_lock(&dev->memalloc_lock);
+		if (atomic_read(&dev->memalloc_socks) == 0) {
+			dev->memalloc_reserve =
+				dev->rx_reserve * skb_pages(dev->mtu);
+			err = adjust_memalloc_reserve(dev->memalloc_reserve);
+			if (err) {
+				spin_unlock(&dev->memalloc_lock);
+				printk(KERN_WARNING
+			"%s: Unable to allocate RX reserve, error: %d\n",
+					dev->name, err);
+				goto out;
+			}
+			sock_set_flag(sk, SOCK_MEMALLOC);
+			dev->flags |= IFF_MEMALLOC;
+		}
+		atomic_inc(&dev->memalloc_socks);
+		spin_unlock(&dev->memalloc_lock);
+	} else
+		atomic_inc(&dev->memalloc_socks);
+
+out:
+	dev_put(dev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(sk_set_memalloc);
+
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
 	struct timeval tv;

WARNING: multiple messages have this Message-ID (diff)
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org, netdev@vger.kernel.org
Cc: Daniel Phillips <phillips@google.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [RFC][PATCH 2/9] deadlock prevention core
Date: Tue, 08 Aug 2006 21:33:45 +0200	[thread overview]
Message-ID: <20060808193345.1396.16773.sendpatchset@lappy> (raw)
In-Reply-To: <20060808193325.1396.58813.sendpatchset@lappy>

The core of the VM deadlock avoidance framework.

>From the 'user' side of things it provides a function to mark a 'struct sock'
as SOCK_MEMALLOC, meaning this socket may dip into the memalloc reserves on
the receive side.

>From the net_device side of things, the extra 'struct net_device *' argument
to {,__}netdev_alloc_skb() is used to attribute/account the memalloc usage.
Converted drivers will make use of this new API and will set NETIF_F_MEMALLOC
to indicate the driver fully supports this feature.

When a SOCK_MEMALLOC socket is marked, the device is checked for this feature
and tries to increase the memalloc pool; if both succeed, the device is marked
with IFF_MEMALLOC, indicating to {,__}netdev_alloc_skb() that it is OK to dip
into the memalloc pool.

Memalloc sk_buff allocations are not done from the SLAB but are done using 
alloc_pages(). sk_buff::memalloc records this exception so that kfree_skbmem()
can do the right thing.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Daniel Phillips <phillips@google.com>

---
 include/linux/gfp.h       |    3 -
 include/linux/if.h        |    1 
 include/linux/mmzone.h    |    1 
 include/linux/netdevice.h |   48 ++++++++++++++-----
 include/linux/skbuff.h    |    3 -
 include/net/sock.h        |    8 +++
 mm/page_alloc.c           |   29 ++++++++++-
 net/core/dev.c            |    1 
 net/core/skbuff.c         |  114 +++++++++++++++++++++++++++++++++++++++++++---
 net/core/sock.c           |   54 +++++++++++++++++++++
 net/ethernet/eth.c        |    1 
 net/ipv4/af_inet.c        |   14 +++++
 net/ipv4/icmp.c           |    5 ++
 net/ipv4/tcp_ipv4.c       |    6 ++
 net/ipv4/udp.c            |   11 ++++
 15 files changed, 274 insertions(+), 25 deletions(-)

Index: linux-2.6/include/linux/gfp.h
===================================================================
--- linux-2.6.orig/include/linux/gfp.h
+++ linux-2.6/include/linux/gfp.h
@@ -46,6 +46,7 @@ struct vm_area_struct;
 #define __GFP_ZERO	((__force gfp_t)0x8000u)/* Return zeroed page on success */
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
 #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_MEMALLOC  ((__force gfp_t)0x40000u) /* Use emergency reserves */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -54,7 +55,7 @@ struct vm_area_struct;
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-			__GFP_NOMEMALLOC|__GFP_HARDWALL)
+			__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_MEMALLOC)
 
 /* This equals 0, but use constants in case they ever change */
 #define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
Index: linux-2.6/include/linux/mmzone.h
===================================================================
--- linux-2.6.orig/include/linux/mmzone.h
+++ linux-2.6/include/linux/mmzone.h
@@ -420,6 +420,7 @@ int percpu_pagelist_fraction_sysctl_hand
 					void __user *, size_t *, loff_t *);
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
 			struct file *, void __user *, size_t *, loff_t *);
+int adjust_memalloc_reserve(int bytes);
 
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
Index: linux-2.6/include/linux/netdevice.h
===================================================================
--- linux-2.6.orig/include/linux/netdevice.h
+++ linux-2.6/include/linux/netdevice.h
@@ -298,18 +298,22 @@ struct net_device
 
 	/* Net device features */
 	unsigned long		features;
-#define NETIF_F_SG		1	/* Scatter/gather IO. */
-#define NETIF_F_IP_CSUM		2	/* Can checksum only TCP/UDP over IPv4. */
-#define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
-#define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
-#define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
-#define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
-#define NETIF_F_HW_VLAN_TX	128	/* Transmit VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_RX	256	/* Receive VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_FILTER	512	/* Receive filtering on VLAN */
-#define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
-#define NETIF_F_GSO		2048	/* Enable software GSO. */
-#define NETIF_F_LLTX		4096	/* LockLess TX */
+#define NETIF_F_SG		0x0001	/* Scatter/gather IO. */
+#define NETIF_F_IP_CSUM		0x0002	/* Can checksum only TCP/UDP over IPv4. */
+#define NETIF_F_NO_CSUM		0x0004	/* Does not require checksum. F.e. loopack. */
+#define NETIF_F_HW_CSUM		0x0008	/* Can checksum all the packets. */
+
+#define NETIF_F_HIGHDMA		0x0010	/* Can DMA to high memory. */
+#define NETIF_F_FRAGLIST	0x0020	/* Scatter/gather IO. */
+#define NETIF_F_HW_VLAN_TX	0x0040	/* Transmit VLAN hw acceleration */
+#define NETIF_F_HW_VLAN_RX	0x0080	/* Receive VLAN hw acceleration */
+
+#define NETIF_F_HW_VLAN_FILTER	0x0100	/* Receive filtering on VLAN */
+#define NETIF_F_VLAN_CHALLENGED	0x0200	/* Device cannot handle VLAN packets */
+#define NETIF_F_GSO		0x0400	/* Enable software GSO. */
+#define NETIF_F_LLTX		0x0800	/* LockLess TX */
+
+#define NETIF_F_MEMALLOC	0x1000	/* Supports {SOCK,__GFP}_MEMALLOC */
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
@@ -409,6 +413,12 @@ struct net_device
 	struct Qdisc		*qdisc_sleeping;
 	struct list_head	qdisc_list;
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
+	int			rx_reserve;
+	atomic_t		rx_reserve_used;
+
+	atomic_t		memalloc_socks;
+	unsigned long		memalloc_reserve;
+	spinlock_t		memalloc_lock;
 
 	/* Partially transmitted GSO packet. */
 	struct sk_buff		*gso_skb;
@@ -686,6 +696,20 @@ static inline void dev_kfree_skb_irq(str
  */
 extern void dev_kfree_skb_any(struct sk_buff *skb);
 
+/*
+ * Support for critical network IO under low memory conditions
+ */
+static inline int dev_reserve_used(struct net_device *dev)
+{
+	return atomic_read(&dev->rx_reserve_used);
+}
+
+static inline void dev_unreserve_skb(struct net_device *dev)
+{
+	if (atomic_dec_return(&dev->rx_reserve_used) < 0)
+		atomic_inc(&dev->rx_reserve_used);
+}
+
 #define HAVE_NETIF_RX 1
 extern int		netif_rx(struct sk_buff *skb);
 extern int		netif_rx_ni(struct sk_buff *skb);
Index: linux-2.6/include/linux/skbuff.h
===================================================================
--- linux-2.6.orig/include/linux/skbuff.h
+++ linux-2.6/include/linux/skbuff.h
@@ -282,7 +282,8 @@ struct sk_buff {
 				nfctinfo:3;
 	__u8			pkt_type:3,
 				fclone:2,
-				ipvs_property:1;
+				ipvs_property:1,
+				memalloc:1;
 	__be16			protocol;
 
 	void			(*destructor)(struct sk_buff *skb);
Index: linux-2.6/include/net/sock.h
===================================================================
--- linux-2.6.orig/include/net/sock.h
+++ linux-2.6/include/net/sock.h
@@ -391,6 +391,7 @@ enum sock_flags {
 	SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */
 	SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
 	SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
+	SOCK_MEMALLOC, /* protocol can use memalloc reserve */
 };
 
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
@@ -413,6 +414,13 @@ static inline int sock_flag(struct sock 
 	return test_bit(flag, &sk->sk_flags);
 }
 
+static inline int sk_is_memalloc(struct sock *sk)
+{
+	return sock_flag(sk, SOCK_MEMALLOC);
+}
+
+extern int sk_set_memalloc(struct sock *sk);
+
 static inline void sk_acceptq_removed(struct sock *sk)
 {
 	sk->sk_ack_backlog--;
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -82,6 +82,7 @@ EXPORT_SYMBOL(zone_table);
 
 static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
 int min_free_kbytes = 1024;
+int var_free_kbytes;
 
 unsigned long __meminitdata nr_kernel_pages;
 unsigned long __meminitdata nr_all_pages;
@@ -970,8 +971,8 @@ restart:
 
 	/* This allocation should allow future memory freeing. */
 
-	if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
-			&& !in_interrupt()) {
+	if ((((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
+			&& !in_interrupt()) || (gfp_mask & __GFP_MEMALLOC)) {
 		if (!(gfp_mask & __GFP_NOMEMALLOC)) {
 nofail_alloc:
 			/* go through the zonelist yet again, ignoring mins */
@@ -2196,7 +2197,8 @@ static void setup_per_zone_lowmem_reserv
  */
 void setup_per_zone_pages_min(void)
 {
-	unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
+	unsigned pages_min = (min_free_kbytes + var_free_kbytes)
+		>> (PAGE_SHIFT - 10);
 	unsigned long lowmem_pages = 0;
 	struct zone *zone;
 	unsigned long flags;
@@ -2248,6 +2250,27 @@ void setup_per_zone_pages_min(void)
 	calculate_totalreserve_pages();
 }
 
+int adjust_memalloc_reserve(int pages)
+{
+	int kbytes = var_free_kbytes + (pages << (PAGE_SHIFT - 10));
+	if (kbytes < 0)
+		return -EINVAL;
+	var_free_kbytes = kbytes;
+	setup_per_zone_pages_min();
+	if (pages > 0) {
+		int i;
+		pg_data_t *pgdat;
+		for_each_online_pgdat(pgdat) {
+			for (i = 0; i < pgdat->nr_zones; ++i)
+				wakeup_kswapd(&pgdat->node_zones[i], 0);
+		}
+	}
+	printk(KERN_DEBUG "RX reserve: %d\n", var_free_kbytes);
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(adjust_memalloc_reserve);
+
 /*
  * Initialise min_free_kbytes.
  *
Index: linux-2.6/net/core/skbuff.c
===================================================================
--- linux-2.6.orig/net/core/skbuff.c
+++ linux-2.6/net/core/skbuff.c
@@ -43,6 +43,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/pagemap.h>
 #include <linux/interrupt.h>
 #include <linux/in.h>
 #include <linux/inet.h>
@@ -125,6 +126,8 @@ EXPORT_SYMBOL(skb_truesize_bug);
  *
  */
 
+#define ceiling_log2(x)	fls((x) - 1)
+
 /**
  *	__alloc_skb	-	allocate a network buffer
  *	@size: size to allocate
@@ -147,6 +150,49 @@ struct sk_buff *__alloc_skb(unsigned int
 	struct sk_buff *skb;
 	u8 *data;
 
+	size = SKB_DATA_ALIGN(size);
+
+	if (gfp_mask & __GFP_MEMALLOC) {
+		/*
+		 * We have to do higher order allocations for icky jumbo
+		 * frame drivers :-(
+		 * They really should be migrated to scater/gather DMA
+		 * and use skb fragments.
+		 */
+		unsigned int data_offset =
+			sizeof(struct sk_buff) + sizeof(unsigned int);
+		unsigned long length = size + data_offset +
+			sizeof(struct skb_shared_info);
+		unsigned int pages;
+		unsigned int order;
+		struct page *page;
+		void *kaddr;
+
+		/*
+		 * force fclone alloc in order to fudge a lacking in skb_clone().
+		 */
+		fclone = 1;
+		if (fclone) {
+			data_offset += sizeof(struct sk_buff) + sizeof(atomic_t);
+			length += sizeof(struct sk_buff) + sizeof(atomic_t);
+		}
+		pages = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
+		order = ceiling_log2(pages);
+
+		skb = NULL;
+		if (!(page = alloc_pages(gfp_mask & ~__GFP_HIGHMEM, order)))
+			goto out;
+
+		kaddr = pfn_to_kaddr(page_to_pfn(page));
+		skb = (struct sk_buff *)kaddr;
+
+		*((unsigned int *)(kaddr + data_offset -
+					sizeof(unsigned int))) = order;
+		data = (u8 *)(kaddr + data_offset);
+
+		goto allocated;
+	}
+
 	cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
 
 	/* Get the HEAD */
@@ -155,12 +201,13 @@ struct sk_buff *__alloc_skb(unsigned int
 		goto out;
 
 	/* Get the DATA. Size must match skb_add_mtu(). */
-	size = SKB_DATA_ALIGN(size);
 	data = ____kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
 	if (!data)
 		goto nodata;
 
+allocated:
 	memset(skb, 0, offsetof(struct sk_buff, truesize));
+	skb->memalloc = !!(gfp_mask & __GFP_MEMALLOC);
 	skb->truesize = size + sizeof(struct sk_buff);
 	atomic_set(&skb->users, 1);
 	skb->head = data;
@@ -185,6 +232,7 @@ struct sk_buff *__alloc_skb(unsigned int
 		atomic_set(fclone_ref, 1);
 
 		child->fclone = SKB_FCLONE_UNAVAILABLE;
+		child->memalloc = skb->memalloc;
 	}
 out:
 	return skb;
@@ -250,7 +298,7 @@ nodata:
 }
 
 /**
- *	__netdev_alloc_skb - allocate an skbuff for rx on a specific device
+ *	___netdev_alloc_skb - allocate an skbuff for rx on a specific device
  *	@dev: network device to receive on
  *	@length: length to allocate
  *	@gfp_mask: get_free_pages mask, passed to alloc_skb
@@ -262,7 +310,7 @@ nodata:
  *
  *	%NULL is returned if there is no free memory.
  */
-struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
+static struct sk_buff *___netdev_alloc_skb(struct net_device *dev,
 		unsigned int length, gfp_t gfp_mask)
 {
 	struct sk_buff *skb;
@@ -273,6 +321,34 @@ struct sk_buff *__netdev_alloc_skb(struc
 	return skb;
 }
 
+struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
+		unsigned length, gfp_t gfp_mask)
+{
+	struct sk_buff *skb;
+
+	if (dev && (dev->flags & IFF_MEMALLOC)) {
+		WARN_ON(gfp_mask & (__GFP_NOMEMALLOC | __GFP_MEMALLOC));
+		gfp_mask &= ~(__GFP_NOMEMALLOC | __GFP_MEMALLOC);
+
+		if ((skb = ___netdev_alloc_skb(dev, length,
+					       gfp_mask | __GFP_NOMEMALLOC)))
+			goto done;
+		if (dev_reserve_used(dev) >= dev->rx_reserve)
+			goto out;
+		if (!(skb = ___netdev_alloc_skb(dev, length,
+						gfp_mask | __GFP_MEMALLOC)))
+			goto out;
+		atomic_inc(&dev->rx_reserve_used);
+	} else
+		if (!(skb = ___netdev_alloc_skb(dev, length, gfp_mask)))
+			goto out;
+
+done:
+	skb->dev = dev;
+out:
+	return skb;
+}
+
 static void skb_drop_list(struct sk_buff **listp)
 {
 	struct sk_buff *list = *listp;
@@ -313,10 +389,19 @@ static void skb_release_data(struct sk_b
 		if (skb_shinfo(skb)->frag_list)
 			skb_drop_fraglist(skb);
 
-		kfree(skb->head);
+		if (!skb->memalloc)
+			kfree(skb->head);
 	}
 }
 
+static void free_skb_pages(struct kmem_cache *cache, void *objp)
+{
+	struct sk_buff *skb = (struct sk_buff *)objp;
+	unsigned int order =
+		*(unsigned int *)(skb->head - sizeof(unsigned int));
+	free_pages((unsigned long)skb, order);
+}
+
 /*
  *	Free an skbuff by memory without cleaning the state.
  */
@@ -324,17 +409,21 @@ void kfree_skbmem(struct sk_buff *skb)
 {
 	struct sk_buff *other;
 	atomic_t *fclone_ref;
+	void (*free_skb)(struct kmem_cache *, void *);
 
 	skb_release_data(skb);
+
+	free_skb = skb->memalloc ? free_skb_pages : kmem_cache_free;
+
 	switch (skb->fclone) {
 	case SKB_FCLONE_UNAVAILABLE:
-		kmem_cache_free(skbuff_head_cache, skb);
+		free_skb(skbuff_head_cache, skb);
 		break;
 
 	case SKB_FCLONE_ORIG:
 		fclone_ref = (atomic_t *) (skb + 2);
 		if (atomic_dec_and_test(fclone_ref))
-			kmem_cache_free(skbuff_fclone_cache, skb);
+			free_skb(skbuff_fclone_cache, skb);
 		break;
 
 	case SKB_FCLONE_CLONE:
@@ -347,7 +436,7 @@ void kfree_skbmem(struct sk_buff *skb)
 		skb->fclone = SKB_FCLONE_UNAVAILABLE;
 
 		if (atomic_dec_and_test(fclone_ref))
-			kmem_cache_free(skbuff_fclone_cache, other);
+			free_skb(skbuff_fclone_cache, other);
 		break;
 	};
 }
@@ -363,6 +452,8 @@ void kfree_skbmem(struct sk_buff *skb)
 
 void __kfree_skb(struct sk_buff *skb)
 {
+	struct net_device *dev = skb->dev;
+
 	dst_release(skb->dst);
 #ifdef CONFIG_XFRM
 	secpath_put(skb->sp);
@@ -389,6 +480,8 @@ void __kfree_skb(struct sk_buff *skb)
 #endif
 
 	kfree_skbmem(skb);
+	if (dev && (dev->flags & IFF_MEMALLOC))
+		dev_unreserve_skb(dev);
 }
 
 /**
@@ -434,10 +527,15 @@ struct sk_buff *skb_clone(struct sk_buff
 		n->fclone = SKB_FCLONE_CLONE;
 		atomic_inc(fclone_ref);
 	} else {
+		/*
+		 * should we special-case skb->memalloc cloning?
+		 * for now fudge it by forcing fast-clone alloc.
+		 */
 		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
 		if (!n)
 			return NULL;
 		n->fclone = SKB_FCLONE_UNAVAILABLE;
+		n->memalloc = 0;
 	}
 
 #define C(x) n->x = skb->x
@@ -686,6 +784,8 @@ int pskb_expand_head(struct sk_buff *skb
 	if (skb_shared(skb))
 		BUG();
 
+	BUG_ON(skb->memalloc);
+
 	size = SKB_DATA_ALIGN(size);
 
 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
Index: linux-2.6/net/ethernet/eth.c
===================================================================
--- linux-2.6.orig/net/ethernet/eth.c
+++ linux-2.6/net/ethernet/eth.c
@@ -275,6 +275,7 @@ void ether_setup(struct net_device *dev)
 	dev->mtu		= ETH_DATA_LEN;
 	dev->addr_len		= ETH_ALEN;
 	dev->tx_queue_len	= 1000;	/* Ethernet wants good queues */	
+	dev->rx_reserve		= 384;
 	dev->flags		= IFF_BROADCAST|IFF_MULTICAST;
 	
 	memset(dev->broadcast,0xFF, ETH_ALEN);
Index: linux-2.6/net/ipv4/icmp.c
===================================================================
--- linux-2.6.orig/net/ipv4/icmp.c
+++ linux-2.6/net/ipv4/icmp.c
@@ -938,6 +938,11 @@ int icmp_rcv(struct sk_buff *skb)
 			goto error;
 	}
 
+	if (unlikely(dev_reserve_used(skb->dev))) {
+		dev_unreserve_skb(skb->dev);
+		goto drop;
+	}
+
 	if (!pskb_pull(skb, sizeof(struct icmphdr)))
 		goto error;
 
Index: linux-2.6/net/ipv4/tcp_ipv4.c
===================================================================
--- linux-2.6.orig/net/ipv4/tcp_ipv4.c
+++ linux-2.6/net/ipv4/tcp_ipv4.c
@@ -1093,6 +1093,12 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!sk)
 		goto no_tcp_socket;
 
+	if (unlikely(dev_reserve_used(skb->dev))) {
+		dev_unreserve_skb(skb->dev);
+		if (!sk_is_memalloc(sk))
+			goto discard_and_relse;
+	}
+
 process:
 	if (sk->sk_state == TCP_TIME_WAIT)
 		goto do_time_wait;
Index: linux-2.6/net/ipv4/udp.c
===================================================================
--- linux-2.6.orig/net/ipv4/udp.c
+++ linux-2.6/net/ipv4/udp.c
@@ -1136,7 +1136,15 @@ int udp_rcv(struct sk_buff *skb)
 	sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
 
 	if (sk != NULL) {
-		int ret = udp_queue_rcv_skb(sk, skb);
+		int ret;
+
+		if (unlikely(dev_reserve_used(skb->dev))) {
+			dev_unreserve_skb(skb->dev);
+			if (!sk_is_memalloc(sk))
+				goto drop_noncritical;
+		}
+
+		ret = udp_queue_rcv_skb(sk, skb);
 		sock_put(sk);
 
 		/* a return value > 0 means to resubmit the input, but
@@ -1147,6 +1155,7 @@ int udp_rcv(struct sk_buff *skb)
 		return 0;
 	}
 
+drop_noncritical:
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto drop;
 	nf_reset(skb);
Index: linux-2.6/include/linux/if.h
===================================================================
--- linux-2.6.orig/include/linux/if.h
+++ linux-2.6/include/linux/if.h
@@ -49,6 +49,7 @@
 
 #define IFF_LOWER_UP	0x10000		/* driver signals L1 up		*/
 #define IFF_DORMANT	0x20000		/* driver signals dormant	*/
+#define IFF_MEMALLOC	0x40000		/* driver has a reserve allocated */
 
 #define IFF_VOLATILE	(IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|\
 		IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)
Index: linux-2.6/net/core/dev.c
===================================================================
--- linux-2.6.orig/net/core/dev.c
+++ linux-2.6/net/core/dev.c
@@ -2900,6 +2900,7 @@ int register_netdevice(struct net_device
 #ifdef CONFIG_NET_CLS_ACT
 	spin_lock_init(&dev->ingress_lock);
 #endif
+	spin_lock_init(&dev->memalloc_lock);
 
 	ret = alloc_divert_blk(dev);
 	if (ret)
Index: linux-2.6/net/ipv4/af_inet.c
===================================================================
--- linux-2.6.orig/net/ipv4/af_inet.c
+++ linux-2.6/net/ipv4/af_inet.c
@@ -131,6 +131,20 @@ static DEFINE_SPINLOCK(inetsw_lock);
 void inet_sock_destruct(struct sock *sk)
 {
 	struct inet_sock *inet = inet_sk(sk);
+	struct net_device *dev = ip_dev_find(inet->rcv_saddr);
+
+	if (dev && (dev->flags & IFF_MEMALLOC) &&
+			sk_is_memalloc(sk) &&
+			(atomic_read(&dev->memalloc_socks) == 1)) {
+		spin_lock(&dev->memalloc_lock);
+		if (atomic_dec_and_test(&dev->memalloc_socks)) {
+			dev->flags &= ~IFF_MEMALLOC;
+			WARN_ON(dev_reserve_used(dev));
+			atomic_set(&dev->rx_reserve_used, 0);
+			adjust_memalloc_reserve(-dev->memalloc_reserve);
+		}
+		spin_unlock(&dev->memalloc_lock);
+	}
 
 	__skb_queue_purge(&sk->sk_receive_queue);
 	__skb_queue_purge(&sk->sk_error_queue);
Index: linux-2.6/net/core/sock.c
===================================================================
--- linux-2.6.orig/net/core/sock.c
+++ linux-2.6/net/core/sock.c
@@ -111,6 +111,7 @@
 #include <linux/poll.h>
 #include <linux/tcp.h>
 #include <linux/init.h>
+#include <linux/inetdevice.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -195,6 +196,59 @@ __u32 sysctl_rmem_default = SK_RMEM_MAX;
 /* Maximal space eaten by iovec or ancilliary data plus some space */
 int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
 
+#define ceiling_log2(x) fls((x) - 1)
+
+static inline unsigned int skb_pages(unsigned int mtu)
+{
+	unsigned int pages = (mtu + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	unsigned int order = ceiling_log2(pages);
+	pages = 1 << order;
+	if (pages > 1) ++pages;
+
+	return pages;
+}
+
+int sk_set_memalloc(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct net_device *dev = ip_dev_find(inet->rcv_saddr);
+	int err = 0;
+
+	if (!dev)
+		return -ENODEV;
+
+	if (!(dev->features & NETIF_F_MEMALLOC)) {
+		err = -EPERM;
+		goto out;
+	}
+
+	if (atomic_read(&dev->memalloc_socks) == 0) {
+		spin_lock(&dev->memalloc_lock);
+		if (atomic_read(&dev->memalloc_socks) == 0) {
+			dev->memalloc_reserve =
+				dev->rx_reserve * skb_pages(dev->mtu);
+			err = adjust_memalloc_reserve(dev->memalloc_reserve);
+			if (err) {
+				spin_unlock(&dev->memalloc_lock);
+				printk(KERN_WARNING
+			"%s: Unable to allocate RX reserve, error: %d\n",
+					dev->name, err);
+				goto out;
+			}
+			sock_set_flag(sk, SOCK_MEMALLOC);
+			dev->flags |= IFF_MEMALLOC;
+		}
+		atomic_inc(&dev->memalloc_socks);
+		spin_unlock(&dev->memalloc_lock);
+	} else
+		atomic_inc(&dev->memalloc_socks);
+
+out:
+	dev_put(dev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(sk_set_memalloc);
+
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
 	struct timeval tv;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2006-08-08 19:37 UTC|newest]

Thread overview: 280+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-08-08 19:33 [RFC][PATCH 0/9] Network receive deadlock prevention for NBD Peter Zijlstra
2006-08-08 19:33 ` Peter Zijlstra
2006-08-08 19:33 ` [RFC][PATCH 1/9] pfn_to_kaddr() for UML Peter Zijlstra
2006-08-08 19:33   ` Peter Zijlstra
2006-08-08 19:33 ` Peter Zijlstra [this message]
2006-08-08 19:33   ` [RFC][PATCH 2/9] deadlock prevention core Peter Zijlstra
2006-08-08 20:57   ` Stephen Hemminger
2006-08-08 20:57     ` Stephen Hemminger
2006-08-08 21:05     ` Peter Zijlstra
2006-08-08 21:05       ` Peter Zijlstra
2006-08-09  1:33     ` Daniel Phillips
2006-08-09  1:33       ` Daniel Phillips
2006-08-09  1:38       ` David Miller
2006-08-09  1:38         ` David Miller, Daniel Phillips
2006-08-08 21:17   ` Thomas Graf
2006-08-08 21:17     ` Thomas Graf
2006-08-09  1:34     ` Daniel Phillips
2006-08-09  1:34       ` Daniel Phillips
2006-08-09  1:39       ` David Miller
2006-08-09  1:39         ` David Miller, Daniel Phillips
2006-08-09  5:47         ` Daniel Phillips
2006-08-09  5:47           ` Daniel Phillips
2006-08-09 13:19           ` Thomas Graf
2006-08-09 13:19             ` Thomas Graf
2006-08-09 14:07             ` Peter Zijlstra
2006-08-09 14:07               ` Peter Zijlstra
2006-08-09 16:18               ` Thomas Graf
2006-08-09 16:18                 ` Thomas Graf
2006-08-09 16:19                 ` Peter Zijlstra
2006-08-09 16:19                   ` Peter Zijlstra
2006-08-10  0:01                   ` David Miller
2006-08-10  0:01                     ` David Miller, Peter Zijlstra
2006-08-09 23:58               ` David Miller
2006-08-09 23:58                 ` David Miller, Peter Zijlstra
2006-08-10  6:25                 ` Peter Zijlstra
2006-08-10  6:25                   ` Peter Zijlstra
2006-08-11  4:24                 ` Stephen Hemminger
2006-08-11  4:24                   ` Stephen Hemminger
2006-08-13 21:22                 ` Daniel Phillips
2006-08-13 21:22                   ` Daniel Phillips
2006-08-13 23:49                   ` David Miller
2006-08-13 23:49                     ` David Miller, Daniel Phillips
2006-08-14  1:15                     ` Daniel Phillips
2006-08-14  1:15                       ` Daniel Phillips
2006-08-11  2:37     ` Rik van Riel
2006-08-11  2:37       ` Rik van Riel
2006-08-13 22:05       ` Daniel Phillips
2006-08-13 22:05         ` Daniel Phillips
2006-08-13 23:55         ` David Miller
2006-08-13 23:55           ` David Miller, Daniel Phillips
2006-08-14  1:31           ` Daniel Phillips
2006-08-14  1:31             ` Daniel Phillips
2006-08-14  1:53             ` Andrew Morton
2006-08-14  1:53               ` Andrew Morton
2006-08-14  4:40               ` Peter Zijlstra
2006-08-14  4:40                 ` Peter Zijlstra
2006-08-14  4:58                 ` Andrew Morton
2006-08-14  4:58                   ` Andrew Morton
2006-08-14  5:03                   ` Peter Zijlstra
2006-08-14  5:03                     ` Peter Zijlstra
2006-08-14  5:22                     ` Andrew Morton
2006-08-14  5:22                       ` Andrew Morton
2006-08-14  6:45                       ` Peter Zijlstra
2006-08-14  6:45                         ` Peter Zijlstra
2006-08-14  7:07                         ` Andrew Morton
2006-08-14  7:07                           ` Andrew Morton
2006-08-14  8:15                           ` Peter Zijlstra
2006-08-14  8:15                             ` Peter Zijlstra
2006-08-14  8:25                             ` Evgeniy Polyakov
2006-08-14  8:25                               ` Evgeniy Polyakov
2006-08-14  8:35                               ` Peter Zijlstra
2006-08-14  8:35                                 ` Peter Zijlstra
2006-08-14  8:33                           ` David Miller
2006-08-14  8:33                             ` David Miller, Andrew Morton
2006-08-17  4:27                           ` Daniel Phillips
2006-08-17  4:27                             ` Daniel Phillips
2006-08-14  7:17                         ` Neil Brown
2006-08-14  7:17                           ` Neil Brown
2006-08-14  7:31                           ` Evgeniy Polyakov
2006-08-14  7:31                             ` Evgeniy Polyakov
2006-08-17  3:58                   ` Daniel Phillips
2006-08-17  3:58                     ` Daniel Phillips
2006-08-17  5:57                     ` Andrew Morton
2006-08-17  5:57                       ` Andrew Morton
2006-08-17 23:53                       ` Daniel Phillips
2006-08-17 23:53                         ` Daniel Phillips
2006-08-18  0:24                         ` Rik van Riel
2006-08-18  0:24                           ` Rik van Riel
2006-08-18  0:35                         ` Daniel Phillips
2006-08-18  0:35                           ` Daniel Phillips
2006-08-18  1:14                         ` Neil Brown
2006-08-18  1:14                           ` Neil Brown
2006-08-18  6:05                         ` Andrew Morton
2006-08-18  6:05                           ` Andrew Morton
2006-08-18 21:22                           ` Daniel Phillips
2006-08-18 21:22                             ` Daniel Phillips
2006-08-18 22:34                             ` Andrew Morton
2006-08-18 22:34                               ` Andrew Morton
2006-08-18 23:44                               ` Daniel Phillips
2006-08-18 23:44                                 ` Daniel Phillips
2006-08-19  2:44                                 ` Andrew Morton
2006-08-19  2:44                                   ` Andrew Morton
2006-08-19  4:14                                   ` Network receive stall avoidance (was [PATCH 2/9] deadlock prevention core) Daniel Phillips
2006-08-19  4:14                                     ` Daniel Phillips
2006-08-19  7:28                                     ` Andrew Morton
2006-08-19  7:28                                       ` Andrew Morton
2006-08-19 15:06                                   ` [RFC][PATCH 2/9] deadlock prevention core Rik van Riel
2006-08-19 15:06                                     ` Rik van Riel
2006-08-20  1:33                                     ` Andre Tomt
2006-08-20  1:33                                       ` Andre Tomt
2006-08-19 16:53                                   ` Ray Lee
2006-08-19 16:53                                     ` Ray Lee
2006-08-21 13:27                                   ` Philip R. Auld
2006-08-21 13:27                                     ` Philip R. Auld
2006-08-25 10:47                                     ` Pavel Machek
2006-08-25 10:47                                       ` Pavel Machek
2006-08-21 13:38                                 ` Jens Axboe
2006-08-21 13:38                                   ` Jens Axboe
2006-08-08 22:10   ` David Miller
2006-08-08 22:10     ` David Miller
2006-08-09  1:35     ` Daniel Phillips
2006-08-09  1:35       ` Daniel Phillips
2006-08-09  1:41       ` David Miller
2006-08-09  1:41         ` David Miller, Daniel Phillips
2006-08-09  5:44         ` Daniel Phillips
2006-08-09  5:44           ` Daniel Phillips
2006-08-09  7:00           ` Peter Zijlstra
2006-08-09  7:00             ` Peter Zijlstra
     [not found]   ` <42414.81.207.0.53.1155080443.squirrel@81.207.0.53>
2006-08-09  0:25     ` Daniel Phillips
2006-08-09  0:25       ` Daniel Phillips
2006-08-09 12:02       ` Indan Zupancic
2006-08-09 12:02         ` Indan Zupancic
2006-08-09 12:54         ` Peter Zijlstra
2006-08-09 12:54           ` Peter Zijlstra
2006-08-09 13:48           ` Indan Zupancic
2006-08-09 13:48             ` Indan Zupancic
2006-08-09 14:00             ` Peter Zijlstra
2006-08-09 14:00               ` Peter Zijlstra
2006-08-09 18:34               ` Indan Zupancic
2006-08-09 18:34                 ` Indan Zupancic
2006-08-09 19:45                 ` Peter Zijlstra
2006-08-09 19:45                   ` Peter Zijlstra
2006-08-09 20:19                   ` Peter Zijlstra
2006-08-09 20:19                     ` Peter Zijlstra
2006-08-10  1:21                   ` Indan Zupancic
2006-08-10  1:21                     ` Indan Zupancic
2006-08-09 16:05   ` -v2 " Peter Zijlstra
2006-08-09 16:05     ` Peter Zijlstra
2006-08-08 19:33 ` [RFC][PATCH 3/9] e1000 driver conversion Peter Zijlstra
2006-08-08 19:33   ` Peter Zijlstra
2006-08-08 20:50   ` Auke Kok
2006-08-08 20:50     ` Auke Kok
2006-08-08 20:59     ` Peter Zijlstra
2006-08-08 20:59       ` Peter Zijlstra
2006-08-08 22:32     ` David Miller
2006-08-08 22:32       ` David Miller, Auke Kok
2006-08-08 22:42       ` Auke Kok
2006-08-08 22:42         ` Auke Kok
2006-08-08 19:34 ` [RFC][PATCH 4/9] e100 " Peter Zijlstra
2006-08-08 19:34   ` Peter Zijlstra
2006-08-08 20:13   ` Auke Kok
2006-08-08 20:13     ` Auke Kok
2006-08-08 20:18     ` Peter Zijlstra
2006-08-08 20:18       ` Peter Zijlstra
2006-08-08 19:34 ` [RFC][PATCH 5/9] r8169 " Peter Zijlstra
2006-08-08 19:34   ` Peter Zijlstra
2006-08-08 19:34 ` [RFC][PATCH 6/9] tg3 " Peter Zijlstra
2006-08-08 19:34   ` Peter Zijlstra
2006-08-08 19:34 ` [RFC][PATCH 7/9] UML eth " Peter Zijlstra
2006-08-08 19:34   ` Peter Zijlstra
2006-08-08 19:34 ` [RFC][PATCH 8/9] 3c59x " Peter Zijlstra
2006-08-08 19:34   ` Peter Zijlstra
2006-08-08 23:07   ` Jeff Garzik
2006-08-08 23:07     ` Jeff Garzik
2006-08-09  5:51     ` Daniel Phillips
2006-08-09  5:51       ` Daniel Phillips
2006-08-09  5:55       ` David Miller
2006-08-09  5:55         ` David Miller, Daniel Phillips
2006-08-09  6:30         ` Jeff Garzik
2006-08-09  6:30           ` Jeff Garzik
2006-08-09  7:03           ` Peter Zijlstra
2006-08-09  7:03             ` Peter Zijlstra
2006-08-09  7:20             ` Jeff Garzik
2006-08-09  7:20               ` Jeff Garzik
2006-08-13 19:38         ` Daniel Phillips
2006-08-13 19:38           ` Daniel Phillips
2006-08-13 19:53           ` Jeff Garzik
2006-08-13 19:53             ` Jeff Garzik
2006-08-08 19:34 ` [RFC][PATCH 9/9] deadlock prevention for NBD Peter Zijlstra
2006-08-08 19:34   ` Peter Zijlstra
2006-08-09  5:46 ` [RFC][PATCH 0/9] Network receive " Evgeniy Polyakov
2006-08-09  5:46   ` Evgeniy Polyakov
2006-08-09  5:52   ` Daniel Phillips
2006-08-09  5:52     ` Daniel Phillips
2006-08-09  5:56     ` David Miller
2006-08-09  5:56       ` David Miller, Daniel Phillips
2006-08-09  5:53   ` David Miller
2006-08-09  5:53     ` David Miller, Evgeniy Polyakov
2006-08-09  5:55     ` Evgeniy Polyakov
2006-08-09  5:55       ` Evgeniy Polyakov
2006-08-09 12:37   ` Peter Zijlstra
2006-08-09 12:37     ` Peter Zijlstra
2006-08-09 13:07     ` Evgeniy Polyakov
2006-08-09 13:07       ` Evgeniy Polyakov
2006-08-09 13:32       ` Peter Zijlstra
2006-08-09 13:32         ` Peter Zijlstra
2006-08-09 19:29         ` Evgeniy Polyakov
2006-08-09 19:29           ` Evgeniy Polyakov
2006-08-09 23:54         ` David Miller
2006-08-09 23:54           ` David Miller, Peter Zijlstra
2006-08-10  6:06           ` Peter Zijlstra
2006-08-10  6:06             ` Peter Zijlstra
2006-08-13 20:16             ` Daniel Phillips
2006-08-13 20:16               ` Daniel Phillips
2006-08-14  5:13               ` Evgeniy Polyakov
2006-08-14  5:13                 ` Evgeniy Polyakov
2006-08-14  6:45                 ` Peter Zijlstra
2006-08-14  6:45                   ` Peter Zijlstra
2006-08-14  6:54                   ` Evgeniy Polyakov
2006-08-14  6:54                     ` Evgeniy Polyakov
2006-08-17  4:49                     ` Daniel Phillips
2006-08-17  4:49                       ` Daniel Phillips
2006-08-17  4:48                 ` Daniel Phillips
2006-08-17  4:48                   ` Daniel Phillips
2006-08-17  5:36                   ` Evgeniy Polyakov
2006-08-17  5:36                     ` Evgeniy Polyakov
2006-08-17 18:01                     ` Daniel Phillips
2006-08-17 18:01                       ` Daniel Phillips
2006-08-17 18:42                       ` Evgeniy Polyakov
2006-08-17 18:42                         ` Evgeniy Polyakov
2006-08-17 19:15                         ` Peter Zijlstra
2006-08-17 19:15                           ` Peter Zijlstra
2006-08-17 19:48                           ` Evgeniy Polyakov
2006-08-17 19:48                             ` Evgeniy Polyakov
2006-08-17 23:24                             ` Daniel Phillips
2006-08-17 23:24                               ` Daniel Phillips
2006-08-18  7:16                               ` Evgeniy Polyakov
2006-08-18  7:16                                 ` Evgeniy Polyakov
2006-08-12  3:42         ` Rik van Riel
2006-08-12  3:42           ` Rik van Riel
2006-08-12  8:47           ` Evgeniy Polyakov
2006-08-12  8:47             ` Evgeniy Polyakov
2006-08-12  9:19             ` Peter Zijlstra
2006-08-12  9:19               ` Peter Zijlstra
2006-08-12  9:37               ` Evgeniy Polyakov
2006-08-12  9:37                 ` Evgeniy Polyakov
2006-08-12 10:18                 ` Peter Zijlstra
2006-08-12 10:18                   ` Peter Zijlstra
2006-08-12 10:42                   ` Evgeniy Polyakov
2006-08-12 10:42                     ` Evgeniy Polyakov
2006-08-12 10:51                     ` Evgeniy Polyakov
2006-08-12 10:51                       ` Evgeniy Polyakov
2006-08-12 11:40                     ` Peter Zijlstra
2006-08-12 11:40                       ` Peter Zijlstra
2006-08-12 11:53                       ` Evgeniy Polyakov
2006-08-12 11:53                         ` Evgeniy Polyakov
2006-08-13  0:46                   ` David Miller
2006-08-13  0:46                     ` David Miller, Peter Zijlstra
2006-08-13  1:11                     ` Rik van Riel
2006-08-13  1:11                       ` Rik van Riel
2006-08-12 14:40                 ` Rik van Riel
2006-08-12 14:40                   ` Rik van Riel
2006-08-12 14:49                   ` Evgeniy Polyakov
2006-08-12 14:49                     ` Evgeniy Polyakov
2006-08-12 14:56                     ` Rik van Riel
2006-08-12 14:56                       ` Rik van Riel
2006-08-12 15:08                       ` Evgeniy Polyakov
2006-08-12 15:08                         ` Evgeniy Polyakov
2006-08-12 15:22                         ` Peter Zijlstra
2006-08-12 15:22                           ` Peter Zijlstra
2006-08-14  0:56                         ` Daniel Phillips
2006-08-14  0:56                           ` Daniel Phillips
2006-08-13  0:46                 ` David Miller
2006-08-13  0:46                   ` David Miller, Evgeniy Polyakov
2006-08-13  9:06                   ` Evgeniy Polyakov
2006-08-13  9:06                     ` Evgeniy Polyakov
2006-08-13  9:52                     ` Evgeniy Polyakov
2006-08-13  9:52                       ` Evgeniy Polyakov
2006-08-15 19:17 ` Pavel Machek
2006-08-15 19:17   ` Pavel Machek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060808193345.1396.16773.sendpatchset@lappy \
    --to=a.p.zijlstra@chello.nl \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    --cc=phillips@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.