Netdev List
 help / color / mirror / Atom feed
* [PATCH 4/6] cxgb4vf: convert to SKB paged frag API.
From: Ian Campbell @ 2011-10-20  9:01 UTC (permalink / raw)
  To: netdev@vger.kernel.org; +Cc: Ian Campbell, Casey Leedom, netdev
In-Reply-To: <1319101275.3385.129.camel@zakaz.uk.xensource.com>

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Casey Leedom <leedom@chelsio.com>
Cc: netdev@vger.kernel.org
---
 drivers/net/ethernet/chelsio/cxgb4vf/adapter.h |    2 +-
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c     |   92 ++++++++++-------------
 2 files changed, 41 insertions(+), 53 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
index 594334d..611396c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
@@ -144,7 +144,7 @@ struct sge_fl {
  * An ingress packet gather list.
  */
 struct pkt_gl {
-	skb_frag_t frags[MAX_SKB_FRAGS];
+	struct page_frag frags[MAX_SKB_FRAGS];
 	void *va;			/* virtual address of first byte */
 	unsigned int nfrags;		/* # of fragments */
 	unsigned int tot_len;		/* total length of fragments */
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index c2d456d..8d5d55a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -296,8 +296,8 @@ static int map_skb(struct device *dev, const struct sk_buff *skb,
 	si = skb_shinfo(skb);
 	end = &si->frags[si->nr_frags];
 	for (fp = si->frags; fp < end; fp++) {
-		*++addr = dma_map_page(dev, fp->page, fp->page_offset,
-				       skb_frag_size(fp), DMA_TO_DEVICE);
+		*++addr = skb_frag_dma_map(dev, fp, 0, skb_frag_size(fp),
+					   DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, *addr))
 			goto unwind;
 	}
@@ -1357,6 +1357,35 @@ out_free:
 }
 
 /**
+ *	copy_frags - copy fragments from gather list into skb_shared_info
+ *	@skb: destination skb
+ *	@gl: source internal packet gather list
+ *	@offset: packet start offset in first page
+ *
+ *	Copy an internal packet gather list into a Linux skb_shared_info
+ *	structure.
+ */
+static inline void copy_frags(struct sk_buff *skb,
+			      const struct pkt_gl *gl,
+			      unsigned int offset)
+{
+	int i;
+
+	/* usually there's just one frag */
+	__skb_fill_page_desc(skb, 0, gl->frags[0].page,
+			     gl->frags[0].offset + offset,
+			     gl->frags[0].size - offset);
+	skb_shinfo(skb)->nr_frags = gl->nfrags;
+	for (i = 1; i < gl->nfrags; i++)
+		__skb_fill_page_desc(skb, i, gl->frags[i].page,
+				     gl->frags[i].offset,
+				     gl->frags[i].size);
+
+	/* get a reference to the last page, we don't own it */
+	get_page(gl->frags[gl->nfrags - 1].page);
+}
+
+/**
  *	t4vf_pktgl_to_skb - build an sk_buff from a packet gather list
  *	@gl: the gather list
  *	@skb_len: size of sk_buff main body if it carries fragments
@@ -1369,7 +1398,6 @@ struct sk_buff *t4vf_pktgl_to_skb(const struct pkt_gl *gl,
 				  unsigned int skb_len, unsigned int pull_len)
 {
 	struct sk_buff *skb;
-	struct skb_shared_info *ssi;
 
 	/*
 	 * If the ingress packet is small enough, allocate an skb large enough
@@ -1396,21 +1424,10 @@ struct sk_buff *t4vf_pktgl_to_skb(const struct pkt_gl *gl,
 		__skb_put(skb, pull_len);
 		skb_copy_to_linear_data(skb, gl->va, pull_len);
 
-		ssi = skb_shinfo(skb);
-		ssi->frags[0].page = gl->frags[0].page;
-		ssi->frags[0].page_offset = gl->frags[0].page_offset + pull_len;
-		skb_frag_size_set(&ssi->frags[0], skb_frag_size(&gl->frags[0]) - pull_len);
-		if (gl->nfrags > 1)
-			memcpy(&ssi->frags[1], &gl->frags[1],
-			       (gl->nfrags-1) * sizeof(skb_frag_t));
-		ssi->nr_frags = gl->nfrags;
-
+		copy_frags(skb, gl, pull_len);
 		skb->len = gl->tot_len;
 		skb->data_len = skb->len - pull_len;
 		skb->truesize += skb->data_len;
-
-		/* Get a reference for the last page, we don't own it */
-		get_page(gl->frags[gl->nfrags - 1].page);
 	}
 
 out:
@@ -1434,35 +1451,6 @@ void t4vf_pktgl_free(const struct pkt_gl *gl)
 }
 
 /**
- *	copy_frags - copy fragments from gather list into skb_shared_info
- *	@si: destination skb shared info structure
- *	@gl: source internal packet gather list
- *	@offset: packet start offset in first page
- *
- *	Copy an internal packet gather list into a Linux skb_shared_info
- *	structure.
- */
-static inline void copy_frags(struct skb_shared_info *si,
-			      const struct pkt_gl *gl,
-			      unsigned int offset)
-{
-	unsigned int n;
-
-	/* usually there's just one frag */
-	si->frags[0].page = gl->frags[0].page;
-	si->frags[0].page_offset = gl->frags[0].page_offset + offset;
-	skb_frag_size_set(&si->frags[0], skb_frag_size(&gl->frags[0]) - offset);
-	si->nr_frags = gl->nfrags;
-
-	n = gl->nfrags - 1;
-	if (n)
-		memcpy(&si->frags[1], &gl->frags[1], n * sizeof(skb_frag_t));
-
-	/* get a reference to the last page, we don't own it */
-	get_page(gl->frags[n].page);
-}
-
-/**
  *	do_gro - perform Generic Receive Offload ingress packet processing
  *	@rxq: ingress RX Ethernet Queue
  *	@gl: gather list for ingress packet
@@ -1484,7 +1472,7 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
 		return;
 	}
 
-	copy_frags(skb_shinfo(skb), gl, PKTSHIFT);
+	copy_frags(skb, gl, PKTSHIFT);
 	skb->len = gl->tot_len - PKTSHIFT;
 	skb->data_len = skb->len;
 	skb->truesize += skb->data_len;
@@ -1667,7 +1655,7 @@ int process_responses(struct sge_rspq *rspq, int budget)
 		rmb();
 		rsp_type = RSPD_TYPE(rc->type_gen);
 		if (likely(rsp_type == RSP_TYPE_FLBUF)) {
-			skb_frag_t *fp;
+			struct page_frag *fp;
 			struct pkt_gl gl;
 			const struct rx_sw_desc *sdesc;
 			u32 bufsz, frag;
@@ -1701,9 +1689,9 @@ int process_responses(struct sge_rspq *rspq, int budget)
 				sdesc = &rxq->fl.sdesc[rxq->fl.cidx];
 				bufsz = get_buf_size(sdesc);
 				fp->page = sdesc->page;
-				fp->page_offset = rspq->offset;
-				skb_frag_size_set(fp, min(bufsz, len));
-				len -= skb_frag_size(fp);
+				fp->offset = rspq->offset;
+				fp->size = min(bufsz, len);
+				len -= fp->size;
 				if (!len)
 					break;
 				unmap_rx_buf(rspq->adapter, &rxq->fl);
@@ -1717,9 +1705,9 @@ int process_responses(struct sge_rspq *rspq, int budget)
 			 */
 			dma_sync_single_for_cpu(rspq->adapter->pdev_dev,
 						get_buf_addr(sdesc),
-						skb_frag_size(fp), DMA_FROM_DEVICE);
+						fp->size, DMA_FROM_DEVICE);
 			gl.va = (page_address(gl.frags[0].page) +
-				 gl.frags[0].page_offset);
+				 gl.frags[0].offset);
 			prefetch(gl.va);
 
 			/*
@@ -1728,7 +1716,7 @@ int process_responses(struct sge_rspq *rspq, int budget)
 			 */
 			ret = rspq->handler(rspq, rspq->cur_desc, &gl);
 			if (likely(ret == 0))
-				rspq->offset += ALIGN(skb_frag_size(fp), FL_ALIGN);
+				rspq->offset += ALIGN(fp->size, FL_ALIGN);
 			else
 				restore_rx_bufs(&gl, &rxq->fl, frag);
 		} else if (likely(rsp_type == RSP_TYPE_CPL)) {
-- 
1.7.2.5

^ permalink raw reply related

* [PATCH 6/6] net: add opaque struct around skb frag page
From: Ian Campbell @ 2011-10-20  9:01 UTC (permalink / raw)
  To: netdev@vger.kernel.org; +Cc: Ian Campbell
In-Reply-To: <1319101275.3385.129.camel@zakaz.uk.xensource.com>

I've split this bit out of the skb frag destructor patch since it helps enforce
the use of the fragment API.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 include/linux/skbuff.h |   10 ++++++----
 net/core/skbuff.c      |    6 +++---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1ebf1ea..aec73c1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -140,7 +140,9 @@ struct sk_buff;
 typedef struct skb_frag_struct skb_frag_t;
 
 struct skb_frag_struct {
-	struct page *page;
+	struct {
+		struct page *p;
+	} page;
 #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
 	__u32 page_offset;
 	__u32 size;
@@ -1175,7 +1177,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
 {
 	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
-	frag->page		  = page;
+	frag->page.p		  = page;
 	frag->page_offset	  = off;
 	skb_frag_size_set(frag, size);
 }
@@ -1699,7 +1701,7 @@ static inline void netdev_free_page(struct net_device *dev, struct page *page)
  */
 static inline struct page *skb_frag_page(const skb_frag_t *frag)
 {
-	return frag->page;
+	return frag->page.p;
 }
 
 /**
@@ -1785,7 +1787,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
  */
 static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page)
 {
-	frag->page = page;
+	frag->page.p = page;
 }
 
 /**
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e271040..ca4db40 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -668,14 +668,14 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 
 	/* skb frags release userspace buffers */
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-		put_page(skb_shinfo(skb)->frags[i].page);
+		skb_frag_unref(skb, i);
 
 	uarg->callback(uarg);
 
 	/* skb frags point to kernel buffers */
 	for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) {
-		skb_shinfo(skb)->frags[i - 1].page_offset = 0;
-		skb_shinfo(skb)->frags[i - 1].page = head;
+		__skb_fill_page_desc(skb, i-1, head, 0,
+				     skb_shinfo(skb)->frags[i - 1].size);
 		head = (struct page *)head->private;
 	}
 
-- 
1.7.2.5

^ permalink raw reply related

* [PATCH 3/6] cxgb4: convert to SKB paged frag API.
From: Ian Campbell @ 2011-10-20  9:01 UTC (permalink / raw)
  To: netdev@vger.kernel.org; +Cc: Ian Campbell, Dimitris Michailidis, netdev
In-Reply-To: <1319101275.3385.129.camel@zakaz.uk.xensource.com>

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Dimitris Michailidis <dm@chelsio.com>
Cc: netdev@vger.kernel.org
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h |    2 +-
 drivers/net/ethernet/chelsio/cxgb4/sge.c   |   45 ++++++++++++++-------------
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 223a7f7..0fe1885 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -326,7 +326,7 @@ struct sge_fl {                     /* SGE free-buffer queue state */
 
 /* A packet gather list */
 struct pkt_gl {
-	skb_frag_t frags[MAX_SKB_FRAGS];
+	struct page_frag frags[MAX_SKB_FRAGS];
 	void *va;                         /* virtual address of first byte */
 	unsigned int nfrags;              /* # of fragments */
 	unsigned int tot_len;             /* total length of fragments */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 14f31d3..ddc1698 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -215,8 +215,8 @@ static int map_skb(struct device *dev, const struct sk_buff *skb,
 	end = &si->frags[si->nr_frags];
 
 	for (fp = si->frags; fp < end; fp++) {
-		*++addr = dma_map_page(dev, fp->page, fp->page_offset,
-				       skb_frag_size(fp), DMA_TO_DEVICE);
+		*++addr = skb_frag_dma_map(dev, fp, 0, skb_frag_size(fp),
+					   DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, *addr))
 			goto unwind;
 	}
@@ -1409,22 +1409,23 @@ int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(cxgb4_ofld_send);
 
-static inline void copy_frags(struct skb_shared_info *ssi,
+static inline void copy_frags(struct sk_buff *skb,
 			      const struct pkt_gl *gl, unsigned int offset)
 {
-	unsigned int n;
+	int i;
 
 	/* usually there's just one frag */
-	ssi->frags[0].page = gl->frags[0].page;
-	ssi->frags[0].page_offset = gl->frags[0].page_offset + offset;
-	skb_frag_size_set(&ssi->frags[0], skb_frag_size(&gl->frags[0]) - offset);
-	ssi->nr_frags = gl->nfrags;
-	n = gl->nfrags - 1;
-	if (n)
-		memcpy(&ssi->frags[1], &gl->frags[1], n * sizeof(skb_frag_t));
+	__skb_fill_page_desc(skb, 0, gl->frags[0].page,
+			     gl->frags[0].offset + offset,
+			     gl->frags[0].size - offset);
+	skb_shinfo(skb)->nr_frags = gl->nfrags;
+	for (i = 1; i < gl->nfrags; i++)
+		__skb_fill_page_desc(skb, i, gl->frags[i].page,
+				     gl->frags[i].offset,
+				     gl->frags[i].size);
 
 	/* get a reference to the last page, we don't own it */
-	get_page(gl->frags[n].page);
+	get_page(gl->frags[gl->nfrags - 1].page);
 }
 
 /**
@@ -1459,7 +1460,7 @@ struct sk_buff *cxgb4_pktgl_to_skb(const struct pkt_gl *gl,
 		__skb_put(skb, pull_len);
 		skb_copy_to_linear_data(skb, gl->va, pull_len);
 
-		copy_frags(skb_shinfo(skb), gl, pull_len);
+		copy_frags(skb, gl, pull_len);
 		skb->len = gl->tot_len;
 		skb->data_len = skb->len - pull_len;
 		skb->truesize += skb->data_len;
@@ -1478,7 +1479,7 @@ EXPORT_SYMBOL(cxgb4_pktgl_to_skb);
 static void t4_pktgl_free(const struct pkt_gl *gl)
 {
 	int n;
-	const skb_frag_t *p;
+	const struct page_frag *p;
 
 	for (p = gl->frags, n = gl->nfrags - 1; n--; p++)
 		put_page(p->page);
@@ -1522,7 +1523,7 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
 		return;
 	}
 
-	copy_frags(skb_shinfo(skb), gl, RX_PKT_PAD);
+	copy_frags(skb, gl, RX_PKT_PAD);
 	skb->len = gl->tot_len - RX_PKT_PAD;
 	skb->data_len = skb->len;
 	skb->truesize += skb->data_len;
@@ -1698,7 +1699,7 @@ static int process_responses(struct sge_rspq *q, int budget)
 		rmb();
 		rsp_type = RSPD_TYPE(rc->type_gen);
 		if (likely(rsp_type == RSP_TYPE_FLBUF)) {
-			skb_frag_t *fp;
+			struct page_frag *fp;
 			struct pkt_gl si;
 			const struct rx_sw_desc *rsd;
 			u32 len = ntohl(rc->pldbuflen_qid), bufsz, frags;
@@ -1717,9 +1718,9 @@ static int process_responses(struct sge_rspq *q, int budget)
 				rsd = &rxq->fl.sdesc[rxq->fl.cidx];
 				bufsz = get_buf_size(rsd);
 				fp->page = rsd->page;
-				fp->page_offset = q->offset;
-				skb_frag_size_set(fp, min(bufsz, len));
-				len -= skb_frag_size(fp);
+				fp->offset = q->offset;
+				fp->size = min(bufsz, len);
+				len -= fp->size;
 				if (!len)
 					break;
 				unmap_rx_buf(q->adap, &rxq->fl);
@@ -1731,16 +1732,16 @@ static int process_responses(struct sge_rspq *q, int budget)
 			 */
 			dma_sync_single_for_cpu(q->adap->pdev_dev,
 						get_buf_addr(rsd),
-						skb_frag_size(fp), DMA_FROM_DEVICE);
+						fp->size, DMA_FROM_DEVICE);
 
 			si.va = page_address(si.frags[0].page) +
-				si.frags[0].page_offset;
+				si.frags[0].offset;
 			prefetch(si.va);
 
 			si.nfrags = frags + 1;
 			ret = q->handler(q, q->cur_desc, &si);
 			if (likely(ret == 0))
-				q->offset += ALIGN(skb_frag_size(fp), FL_ALIGN);
+				q->offset += ALIGN(fp->size, FL_ALIGN);
 			else
 				restore_rx_bufs(&si, &rxq->fl, frags);
 		} else if (likely(rsp_type == RSP_TYPE_CPL)) {
-- 
1.7.2.5

^ permalink raw reply related

* [PATCH net-next] virtio_net: fix truesize underestimation
From: Eric Dumazet @ 2011-10-20  9:14 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, Rusty Russell, Michael S. Tsirkin, virtualization,
	Krishna Kumar

We must account in skb->truesize, the size of the fragments, not the
used part of them.

Doing this work is important to avoid unexpected OOM situations.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Rusty Russell <rusty@rustcorp.com.au>
CC: "Michael S. Tsirkin" <mst@redhat.com>
CC: virtualization@lists.linux-foundation.org
CC: Krishna Kumar <krkumar2@in.ibm.com>
---
 drivers/net/virtio_net.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index abbf34f..765ab9a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -150,6 +150,7 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page,
 
 	skb->data_len += size;
 	skb->len += size;
+	skb->truesize += PAGE_SIZE;
 	skb_shinfo(skb)->nr_frags++;
 	*len -= size;
 }
@@ -287,7 +288,6 @@ static void receive_buf(struct net_device *dev, void *buf, unsigned int len)
 	}
 
 	hdr = skb_vnet_hdr(skb);
-	skb->truesize += skb->data_len;
 
 	u64_stats_update_begin(&stats->syncp);
 	stats->rx_bytes += skb->len;

^ permalink raw reply related

* RE: [patch net-next]alx: Atheros AR8131/AR8151/AR8152/AR8161 Ethernet driver
From: Ren, Cloud @ 2011-10-20  9:23 UTC (permalink / raw)
  To: David Miller
  Cc: Rodriguez, Luis, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <20111020.044541.970282389722164761.davem@davemloft.net>


>From: <cloud.ren@Atheros.com>
>Date: Thu, 20 Oct 2011 14:46:24 +0800
>
>> +#define __far
>
>So much unused crap left in these header files, get rid of this stuff.
>
>+#define ALX_HW_WARN(_fmt, _args...) \
>+		ALX_HW_PRINTA(WARNING, _fmt, ## _args)
>+
>+#define ALX_HW_INFO(_fmt, _args...) \
>+		ALX_HW_PRINTA(INFO, _fmt, ## _args)
>+
>+#define ALX_HW_DBG(_fmt, _args...) \
>+		ALX_HW_PRINTA(DEBUG, _fmt, ## _args)
>+
>
>We told you to get rid of your customized debug logging interfaces, yet all of
>this stuff is still there.
>
>+/* delay function */
>+#define US_DELAY(_hw, _n)	__US_DELAY(_n)
>+#define MS_DELAY(_hw, _n)	__MS_DELAY(_n)
>+#define __US_DELAY(_n)		udelay(_n)
>+#define __MS_DELAY(_n)		mdelay(_n)
>
>Useless wrappers for standard kernel interfaces, kill this.
>
>+#define DEBUG_INFO(_a, _b)
>+#define DEBUG_INFOS(_a, _b)
>
>Again we told you to get rid of this stuff.
>
>I suspect it's going to take may rounds of feedback before this driver is
>anywhere near ready for inclusion.
>
>Please just submit it to staging and let it cook there for a couple weeks in the
>interests of our sanity.

As you saw, should I do the two following steps?
1. I firstly try to submit code to linux-staging.git. 
2. After the driver have been accepted by  linux-staging.git, I submit to net-next.git again.

^ permalink raw reply

* Re: [PATCH 0/6] skb fragment API: convert network drivers (part V, take 2)
From: David Miller @ 2011-10-20  9:23 UTC (permalink / raw)
  To: Ian.Campbell; +Cc: netdev, linux-scsi, linux-mm
In-Reply-To: <1319101275.3385.129.camel@zakaz.uk.xensource.com>

From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Thu, 20 Oct 2011 10:01:15 +0100

> The following series is the second attempt to convert a fifth (and
> hopefully final) batch of network drivers to the SKB pages fragment API
> introduced in 131ea6675c76.

Applied, thanks Ian.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH net-next] bnx2x: fix skb truesize underestimation
From: David Miller @ 2011-10-20  9:23 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev, eilong
In-Reply-To: <1319101223.3781.7.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 20 Oct 2011 11:00:23 +0200

> bnx2x allocates a full page per fragment.
> 
> We must account in skb->truesize, the size of the fragment, not the used
> part of it.
>     
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> CC: Eilon Greenstein <eilong@broadcom.com>

Applied.

^ permalink raw reply

* Re: [patch net-next]alx: Atheros AR8131/AR8151/AR8152/AR8161 Ethernet driver
From: David Miller @ 2011-10-20  9:25 UTC (permalink / raw)
  To: cjren; +Cc: rodrigue, netdev, linux-kernel
In-Reply-To: <6349D7A510622448B1BA0967850A8438011CC21D@nasanexd02d.na.qualcomm.com>

From: "Ren, Cloud" <cjren@qca.qualcomm.com>
Date: Thu, 20 Oct 2011 09:23:07 +0000

> As you saw, should I do the two following steps?
> 1. I firstly try to submit code to linux-staging.git. 
> 2. After the driver have been accepted by  linux-staging.git, I submit to net-next.git again.

You submit and get it into staging so that it can sit there for some
time and get reviewed and improved by others.

One doesn't submit directly to net-next right after it gets into
staging, staging is a place where your driver lives while it still
smelly funky and needs more work.

^ permalink raw reply

* Re: [PATCH net-next] virtio_net: fix truesize underestimation
From: David Miller @ 2011-10-20  9:23 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev, rusty, mst, virtualization, krkumar2
In-Reply-To: <1319102086.3781.13.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 20 Oct 2011 11:14:46 +0200

> We must account in skb->truesize, the size of the fragments, not the
> used part of them.
> 
> Doing this work is important to avoid unexpected OOM situations.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied.

^ permalink raw reply

* Re: PROBLEM: System call 'sendmsg' of process ospfd (quagga) causes kernel oops
From: David Miller @ 2011-10-20  9:30 UTC (permalink / raw)
  To: herbert; +Cc: eric.dumazet, evonlanthen, linux-kernel, netdev, timo.teras
In-Reply-To: <20111019080807.GA25099@gondor.apana.org.au>

From: Herbert Xu <herbert@gondor.hengli.com.au>
Date: Wed, 19 Oct 2011 10:08:07 +0200

> I think Eric's initial patch is probably the safest bet for rc10.
> We can then work on the proper fix for the next release.

There are two "initial patch", I wonder which one you mean.

There's his really first patch, which remoevs the lines in IP_GRE
which change dev->needed_headroom.  I was under the impression we
were against doing that.

The other patch he posted duplicates the device attribute variable
caching in two functions.

My patch is just a tweak so that we only do this sequence in one
place, the new sock_alloc_send_skb_reserve() helper, instead of
in both the ipv4 and ipv6 RAW code.

So I'm a little confused what your suggestion for rc10 really
is :-)

^ permalink raw reply

* Re: PROBLEM: System call 'sendmsg' of process ospfd (quagga) causes kernel oops
From: Herbert Xu @ 2011-10-20  9:35 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, evonlanthen, linux-kernel, netdev, timo.teras
In-Reply-To: <20111020.053050.383972361986316046.davem@davemloft.net>

On Thu, Oct 20, 2011 at 05:30:50AM -0400, David Miller wrote:
>
> So I'm a little confused what your suggestion for rc10 really
> is :-)

I meant his first initial patch :)

While it is suboptimal in the sense that should the value of
needed_headroom increase we'll end up constantly reallocating
skbs, I believe that it is at least semantically correct.

In the time being I'll look more closely at all the users of
needed_headroom to see if there's anything we've missed.

Thanks,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [GIT] Networking
From: David Miller @ 2011-10-20  9:43 UTC (permalink / raw)
  To: torvalds; +Cc: akpm, netdev, linux-kernel


I have two fixes still being worked on and under discussion.  One for
pktgen giving too large values to ndelay(), and one for RAW ipv4/ipv6
sockets crashing when used over IP_GRE tunnels.  Probably I can have
both fixes finalized in about a day.

1) When bridge is removed via netlink, we hang, fix from Stephen Hemminger.

2) USE_PHYLIB flag test reversed in tg3 due to regression, fix from Jiri Pirko.

3) IPVS netns down/up deadlock fix from Hans Schillstrom.

4) Leaks and missing SKB pull calls in pptp and l2tp, from Eric Dumazet.

5) Several buffer overruns and missing skb size checks in x25, fixes from
   Matthew Daley.

6) bond_handle_frame() races with taking a bond down, resulting in crash,
   fix from Mitsuo Hayasaka.

7) R8169 WoL regression fix from Francois Romieu.  Energy Efficient Ethernet
   setting for rtl8111evl r8169 chip from Hayes Wang.

8) Add SMSC LAN89218 device IDs, from Phil Edworthy.

9) Bluetooth forgets to propagate LSM attributes on child sockets, fix
   from Paul Moore.

10) Transparent proxy doesn't propagate flag to TIME_WAIT sockets, resulting
    in resets.  Fix from KOVACS Krisztian.

Please pull, thanks a lot.

The following changes since commit 486cf46f3f9be5f2a966016c1a8fe01e32cde09e:

  mm: fix race between mremap and removing migration entry (2011-10-19 23:42:58 -0700)

are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git master

David S. Miller (1):
      Merge branch 'nf' of git://1984.lsi.us.es/net

Eric Dumazet (3):
      l2tp: fix a potential skb leak in l2tp_xmit_skb()
      pptp: fix skb leak in pptp_xmit()
      pptp: pptp_rcv_core() misses pskb_may_pull() call

Florian Westphal (1):
      netfilter: nf_conntrack: fix event flooding in GRE protocol tracker

Gao feng (1):
      netconsole: enable netconsole can make net_device refcnt incorrent

Gerrit Renker (1):
      udplite: fast-path computation of checksum coverage

Hans Schillstrom (1):
      IPVS netns shutdown/startup dead-lock

Jiri Pirko (1):
      tg3: negate USE_PHYLIB flag check

KOVACS Krisztian (1):
      tproxy: copy transparent flag when creating a time wait

Matthew Daley (3):
      x25: Validate incoming call user data lengths
      x25: Handle undersized/fragmented skbs
      x25: Prevent skb overreads when checking call user data

Mitsuo Hayasaka (1):
      bonding: use local function pointer of bond->recv_probe in bond_handle_frame

Paul Moore (1):
      bluetooth: Properly clone LSM attributes to newly created child connections

Phil Edworthy (1):
      smsc911x: Add support for SMSC LAN89218

Thadeu Lima de Souza Cascardo (1):
      ehea: Change maintainer to me

Yan, Zheng (1):
      fib_rules: fix unresolved_rules counting

françois romieu (1):
      r8169: fix driver shutdown WoL regression.

hayeswang (1):
      r8169: fix wrong eee setting for rlt8111evl

stephen hemminger (1):
      bridge: fix hang on removal of bridge via netlink

 MAINTAINERS                            |    2 +-
 drivers/net/bonding/bond_main.c        |    7 +-
 drivers/net/netconsole.c               |    5 +
 drivers/net/pptp.c                     |   22 ++++--
 drivers/net/r8169.c                    |   90 ++++++++++++++--------
 drivers/net/smsc911x.c                 |    2 +
 drivers/net/tg3.c                      |    2 +-
 include/net/ip_vs.h                    |    1 +
 include/net/udplite.h                  |   63 ++++++++--------
 net/bluetooth/l2cap_sock.c             |    4 +
 net/bluetooth/rfcomm/sock.c            |    3 +
 net/bluetooth/sco.c                    |    5 +-
 net/bridge/br_if.c                     |    9 +-
 net/bridge/br_netlink.c                |    1 +
 net/bridge/br_private.h                |    1 +
 net/core/fib_rules.c                   |    5 +-
 net/ipv4/tcp_minisocks.c               |    1 +
 net/l2tp/l2tp_core.c                   |    4 +-
 net/netfilter/ipvs/ip_vs_ctl.c         |  131 +++++++++++++++++++------------
 net/netfilter/ipvs/ip_vs_sync.c        |    6 ++
 net/netfilter/nf_conntrack_proto_gre.c |    4 +-
 net/x25/af_x25.c                       |   40 ++++++++--
 net/x25/x25_dev.c                      |    6 ++
 net/x25/x25_facilities.c               |   10 ++-
 net/x25/x25_in.c                       |   43 +++++++++-
 net/x25/x25_link.c                     |    3 +
 net/x25/x25_subr.c                     |   14 +++-
 security/security.c                    |    1 +
 28 files changed, 330 insertions(+), 155 deletions(-)

^ permalink raw reply

* RE: [patch net-next]alx: Atheros AR8131/AR8151/AR8152/AR8161 Ethernet driver
From: Ren, Cloud @ 2011-10-20  9:48 UTC (permalink / raw)
  To: David Miller
  Cc: Rodriguez, Luis, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <20111020.052506.373437241768777548.davem@davemloft.net>


>From: "Ren, Cloud" <cjren@qca.qualcomm.com>
>Date: Thu, 20 Oct 2011 09:23:07 +0000
>
>> As you saw, should I do the two following steps?
>> 1. I firstly try to submit code to linux-staging.git.
>> 2. After the driver have been accepted by  linux-staging.git, I submit to net-
>next.git again.
>
>You submit and get it into staging so that it can sit there for some time and get
>reviewed and improved by others.
>
>One doesn't submit directly to net-next right after it gets into staging, staging
>is a place where your driver lives while it still smelly funky and needs more
>work.

The driver will support the next generation NICs of Atheros. Meanwhile, the driver can 
also have better optimization for AR8131 and AR8151 than atl1c. For some reason, we 
don't plan to patch atl1c driver to support our new NIC, such as AR8161. So I hope the driver
can stay in net-next in the end. Of course, I will be responsible for modify source code and 
let it match kernel requirements.

^ permalink raw reply

* Re: Comment on nf_queue NF_STOLEN patch
From: Pablo Neira Ayuso @ 2011-10-20 10:30 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Jim Sansing, Linux Network Development list,
	Netfilter Development Mailinglist, Florian Westphal
In-Reply-To: <1318997435.19139.16.camel@edumazet-laptop>

On Wed, Oct 19, 2011 at 06:10:35AM +0200, Eric Dumazet wrote:
> Le mardi 18 octobre 2011 à 17:34 -0400, Jim Sansing a écrit :
> > Eric Dumazet wrote:
> > > Le mardi 18 octobre 2011 à 15:08 -0400, Jim Sansing a écrit :
> > >   
> > >> I have been working on a kernel module that registers with netfilter,
> > >> and I noticed that a patch was added to nf_queue that changed the
> > >> handling of return code NF_FILTER from 'do nothing' to 'free the skb'. 
> > >> I'm not sure which kernel version this went in, but the date of the
> > >> patch is Feb, 19, 2010.
> > >>
> > >> Everything I have read about netfilter states that it is up to the
> > >> netfilter hook to free the skb if NF_STOLEN is returned.  The
> > >> implications of this patch from a hook programming perspective are:
> > >>
> > >> 1) If the skb is used after the return from the hook, it must be cloned.
> > >> 2) The original skb must not be freed.
> > >>
> > >> I suggest that a comment be added to include/linux/netfilter.h that says
> > >> explicitly the skb will be freed if NF_STOLEN is returned.
> > >>     
> > >
> > > But its not true. Just read the code.
> > >
> > > If you are working on this stuff I recommend you take a look at
> > > commits :
> > >
> > > c6675233f9015d3c0460c8aab53ed9b99d915c64
> > > (netfilter: nf_queue: reject NF_STOLEN verdicts from userspace)
> > >
> > > fad54440438a7c231a6ae347738423cbabc936d9
> > > (netfilter: avoid double free in nf_reinject)
> > >
> > > 64507fdbc29c3a622180378210ecea8659b14e40
> > > (netfilter: nf_queue: fix NF_STOLEN skb leak)
> > >
> > > 3bc38712e3a6e0596ccb6f8299043a826f983701
> > > ([NETFILTER]: nf_queue: handle NF_STOP and unknown verdicts in
> > > nf_reinject)
> > >
> > >   
> > 
> > I see that fad54440438a7c231a6ae347738423cbabc936d9 (netfilter: avoid
> > double free in nf_reinject) returns the switch case for NF_STOLEN back
> > to the original state, but I just downloaded 3.0.4, and the skb is still
> > freed.  So for some versions of the kernel, the situation exists. 
> > Hopefully anyone who runs into it will find this thread.
> > 
> 
> Hopefully netfilter guys (CCed) will sort out the problem and ask stable
> submissions, if not already done. 3.0.4 is quite old :)

Not done yet, sorry. I'll do it asap.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH 3/4] net: xen-netback: use API provided by xenbus module to map rings
From: David Vrabel @ 2011-10-20 10:45 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: xen-devel, linux-kernel, David Vrabel, netdev, David S . Miller
In-Reply-To: <1319107519-2253-1-git-send-email-david.vrabel@citrix.com>

The xenbus module provides xenbus_map_ring_valloc() and
xenbus_map_ring_vfree().  Use these to map the Tx and Rx ring pages
granted by the frontend.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
---
Dave, this is a standalone patch and can be applied independently of
the rest of the series.

 drivers/net/xen-netback/common.h  |   11 ++---
 drivers/net/xen-netback/netback.c |   80 ++++++++-----------------------------
 2 files changed, 22 insertions(+), 69 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 161f207..94b79c3 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -58,10 +58,6 @@ struct xenvif {
 	u8               fe_dev_addr[6];
 
 	/* Physical parameters of the comms window. */
-	grant_handle_t   tx_shmem_handle;
-	grant_ref_t      tx_shmem_ref;
-	grant_handle_t   rx_shmem_handle;
-	grant_ref_t      rx_shmem_ref;
 	unsigned int     irq;
 
 	/* List of frontends to notify after a batch of frames sent. */
@@ -70,8 +66,6 @@ struct xenvif {
 	/* The shared rings and indexes. */
 	struct xen_netif_tx_back_ring tx;
 	struct xen_netif_rx_back_ring rx;
-	struct vm_struct *tx_comms_area;
-	struct vm_struct *rx_comms_area;
 
 	/* Frontend feature information. */
 	u8 can_sg:1;
@@ -106,6 +100,11 @@ struct xenvif {
 	wait_queue_head_t waiting_to_free;
 };
 
+static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif)
+{
+	return to_xenbus_device(vif->dev->dev.parent);
+}
+
 #define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
 #define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
 
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index fd00f25..3af2924 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1577,88 +1577,42 @@ static int xen_netbk_kthread(void *data)
 
 void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
 {
-	struct gnttab_unmap_grant_ref op;
-
-	if (vif->tx.sring) {
-		gnttab_set_unmap_op(&op, (unsigned long)vif->tx_comms_area->addr,
-				    GNTMAP_host_map, vif->tx_shmem_handle);
-
-		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-			BUG();
-	}
-
-	if (vif->rx.sring) {
-		gnttab_set_unmap_op(&op, (unsigned long)vif->rx_comms_area->addr,
-				    GNTMAP_host_map, vif->rx_shmem_handle);
-
-		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-			BUG();
-	}
-	if (vif->rx_comms_area)
-		free_vm_area(vif->rx_comms_area);
-	if (vif->tx_comms_area)
-		free_vm_area(vif->tx_comms_area);
+	if (vif->tx.sring)
+		xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
+					vif->tx.sring);
+	if (vif->rx.sring)
+		xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
+					vif->rx.sring);
 }
 
 int xen_netbk_map_frontend_rings(struct xenvif *vif,
 				 grant_ref_t tx_ring_ref,
 				 grant_ref_t rx_ring_ref)
 {
-	struct gnttab_map_grant_ref op;
+	void *addr;
 	struct xen_netif_tx_sring *txs;
 	struct xen_netif_rx_sring *rxs;
 
 	int err = -ENOMEM;
 
-	vif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
-	if (vif->tx_comms_area == NULL)
+	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
+				     tx_ring_ref, &addr);
+	if (err)
 		goto err;
 
-	vif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
-	if (vif->rx_comms_area == NULL)
-		goto err;
-
-	gnttab_set_map_op(&op, (unsigned long)vif->tx_comms_area->addr,
-			  GNTMAP_host_map, tx_ring_ref, vif->domid);
-
-	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
-		BUG();
-
-	if (op.status) {
-		netdev_warn(vif->dev,
-			    "failed to map tx ring. err=%d status=%d\n",
-			    err, op.status);
-		err = op.status;
-		goto err;
-	}
-
-	vif->tx_shmem_ref    = tx_ring_ref;
-	vif->tx_shmem_handle = op.handle;
-
-	txs = (struct xen_netif_tx_sring *)vif->tx_comms_area->addr;
+	txs = (struct xen_netif_tx_sring *)addr;
 	BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
 
-	gnttab_set_map_op(&op, (unsigned long)vif->rx_comms_area->addr,
-			  GNTMAP_host_map, rx_ring_ref, vif->domid);
-
-	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
-		BUG();
-
-	if (op.status) {
-		netdev_warn(vif->dev,
-			    "failed to map rx ring. err=%d status=%d\n",
-			    err, op.status);
-		err = op.status;
+	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
+				     rx_ring_ref, &addr);
+	if (err)
 		goto err;
-	}
-
-	vif->rx_shmem_ref     = rx_ring_ref;
-	vif->rx_shmem_handle  = op.handle;
-	vif->rx_req_cons_peek = 0;
 
-	rxs = (struct xen_netif_rx_sring *)vif->rx_comms_area->addr;
+	rxs = (struct xen_netif_rx_sring *)addr;
 	BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
 
+	vif->rx_req_cons_peek = 0;
+
 	return 0;
 
 err:
-- 
1.7.2.5

^ permalink raw reply related

* Re: [RFC PATCH 0/5] SUNRPC: "RPC pipefs per network namespace" preparations
From: Stanislav Kinsbursky @ 2011-10-20 11:06 UTC (permalink / raw)
  To: Trond.Myklebust@netapp.com
  Cc: linux-nfs@vger.kernel.org, Pavel Emelianov, neilb@suse.de,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	bfields@fieldses.org, davem@davemloft.net, devel@openvz.org
In-Reply-To: <20111017120629.4541.67395.stgit@localhost6.localdomain6>

Guys, please, spend some of your expensive time to review this patch-set briefly.
This is not for commit, but just an idea representation.
I really need some opinions about it, since all my further work aroud RPC pipefs 
depends on it.
IOW I need to now, does anyone has something against this idea.
Trond, please, respond, does this idea suits you in general or not?

17.10.2011 17:10, Stanislav Kinsbursky пишет:
> Hello to everyone.
> RPC pipefs file system have to work per network namespace context is required
> prior to any NFS modifications.
> This is a way how to do it. I'll really appreciate for any comments.
>
> There are several statements about how to make RPC pipefs working per network
> namespace context.
> Here they are:
> 1) RPC pipefs should be mounted per network namespace context.
> 2) RPC pipefs superblock should holds network namespace while active.
> 3) RPC pipefs lookup and readir should be perfomed in network namespace context
> it was mounted. IOW, user-space process, working in another network namespace
> context, should see RPC pipefs dentries from network namespace context this
> mount-point was created (like it was done for sysfs).
>
> These statement leads to some restrictions which we must follow during
> implementation. Here are they:
> 1) RPC pipefs mount can't be performed in kernel context since new super block
> will holds networks namespace reference and it's impossible to recognize, when
> and how we have to release this mount point. IOW rpc_get_mount() and
> rpc_put_mount() have to be removed.
> 2) RPC pipefs should provide some new helpers to lookup directory dentry for
> those modules which creates pipes, because without RPC pipefs mount point
> general lookup can't be performed.
> 3) These methods must garantee, that pipefs superblock will be active during
> pipes creation and destruction.
>
> So, here is the idea of making RPC pipefs works per network namespace context:
> 1) RPC pipefs superblock should holds network namespcae context while active.
> 2) RPC pipefs should send notification events on superblock creation and
> destruction.
> 3) RPC pipefs should provide "lookup dentry by name" method for notification
> subscribers.
> 4) RPC pipefs should place superblock reference on current network namespace
> context on creation and remove it on destruction.
> 5) RPC pipefs should provide safe "lookup dentry by name" method for per-net
> operations, which garantees, that superblock is active, while
> per-net-operations are performing.
> 6) Client and cache directories creation and destruction should be performed
> also on superblock creation and destruction notification events. Note: generic
> creation (like now) can fail (if no superblock is not created yet).
> 7) Pipes creation and destruction should be performed on superblock creation
> and destruction events. Also pipes operations should be performed during
> per-net operation and in this case they could fail (due to the same reason as
> in statement above).
>
> This patch-set implements first 5 points and thus doesn't affects current RPC
> pipefs logic.
>
> The only problem about I'm not sure how to solve properly yet, is auth gss
> pipes creations operations. Hoping for some help with it.
>
>
> The following series consists of:
>
> ---
>
> Stanislav Kinsbursky (5):
>        SUNRPC: hold current network namespace while pipefs superblock is active
>        SUNRPC: send notification events on pipefs sb creation and destruction
>        SUNRPC: pipefs dentry lookup helper introduced
>        SUNRPC: put pipefs superblock link on network namespace
>        SUNRPC: pipefs per-net operations helper introduced
>
>
>   include/linux/sunrpc/rpc_pipe_fs.h |   16 ++++++
>   net/sunrpc/netns.h                 |    3 +
>   net/sunrpc/rpc_pipe.c              |  103 ++++++++++++++++++++++++++++++++++++
>   net/sunrpc/sunrpc_syms.c           |    1
>   4 files changed, 122 insertions(+), 1 deletions(-)
>


-- 
Best regards,
Stanislav Kinsbursky

^ permalink raw reply

* [IEEE802.15.4][6LoWPAN] draft for fragmentation support
From: Alexander Smirnov @ 2011-10-20 11:17 UTC (permalink / raw)
  To: davem
  Cc: dbaryshkov, slapin, linux-zigbee-devel, netdev, eric.dumazet,
	kernel-janitors, jonsmirl, alex.bluesman.smirnov

[-- Attachment #1: Type: text/plain, Size: 567 bytes --]

Hello everybody,

below is the patch which adds support for fragmentation in 6LoWPAN
point to point networks. This activity needs because of difference
in MTU: 1280 ipv6 and 128 ieee802.15.4

This patch is just a draft. Could anyone please look at
it and let me know your opinion.

The most doubtful moments for me are:
1. Should the list 'frag_list' and the mutex 'flist_lock' be
included into dev private data?
2. Can I use 'dev_queue_xmit' to send fragments to queue?
3. Creating new 'skb' instead of copying and modifying main one.

With best regards,
Alexander


[-- Attachment #2: 0001-6LoWPAN-fragmentation-support.patch --]
[-- Type: text/plain, Size: 10375 bytes --]

>From 48472bae269b7b1a4047967ec21eadb217c4fd6d Mon Sep 17 00:00:00 2001
From: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
Date: Thu, 20 Oct 2011 15:02:36 +0400
Subject: [PATCH] 6LoWPAN fragmentation support

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
---
 net/ieee802154/6lowpan.c |  286 +++++++++++++++++++++++++++++++++++++++++++++-
 net/ieee802154/6lowpan.h |    3 +
 2 files changed, 288 insertions(+), 1 deletions(-)

diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 96877bd..1923ec7 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -113,6 +113,24 @@ struct lowpan_dev_record {
 	struct list_head list;
 };
 
+struct lowpan_fragment {
+	u8 in_progress;			/* assembling is in progress */
+	struct sk_buff *skb;		/* skb to be assembled */
+	u8 *data;			/* data to be stored */
+	struct mutex lock;		/* concurency lock */
+	u16 length;			/* frame length to be assemled */
+	u32 bytes_rcv;			/* bytes received */
+	u16 tag;			/* current fragment tag */
+	struct timer_list timer;	/* assembling timer */
+	struct list_head list;		/* fragments list handler */	
+};
+
+static unsigned short fragment_tag;
+
+/* TODO: bind mutex and list to device */
+static LIST_HEAD(lowpan_fragments);
+struct mutex flist_lock;
+
 static inline struct
 lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
 {
@@ -244,6 +262,18 @@ static u8 lowpan_fetch_skb_u8(struct sk_buff *skb)
 	return ret;
 }
 
+static u16 lowpan_fetch_skb_u16(struct sk_buff *skb)
+{
+	u16 ret;
+
+	BUG_ON(skb->len < 2);
+
+	ret = skb->data[0] | (skb->data[1] << 8);
+	skb_pull(skb, 2);
+	return ret;
+}
+
+static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev);
 static int lowpan_header_create(struct sk_buff *skb,
 			   struct net_device *dev,
 			   unsigned short type, const void *_daddr,
@@ -467,9 +497,102 @@ static int lowpan_header_create(struct sk_buff *skb,
 		memcpy(&(sa.hwaddr), saddr, 8);
 
 		mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
+
+		/* frame fragmentation */
+
+		/*
+		 * if payload + mac header doesn't fit MTU-sized frame
+		 * we need to fragment it.
+		 */
+		if (skb->len > (127 - 24)) { /* MTU - MAC_HEADER_LENGTH */
+			struct sk_buff *fr_skb;
+			u16 b_sent = 0;
+			unsigned short payload_len = skb->len;
+			int stat = 0;
+
+			pr_debug("%s: the frame is too big (0x%x),"
+				 "fragmentation needed, using tag = 0x%x\n",
+				 __func__, payload_len, fragment_tag);
+
+			fr_skb = skb_copy(skb, GFP_KERNEL);
+			if (!fr_skb)
+				goto error;
+
+			/* 40-bit - fragment dispatch size */
+			head = kzalloc(5, GFP_KERNEL);
+			if (!head)
+				goto error;
+
+			/* first fagment header */
+			head[0] = LOWPAN_DISPATCH_FRAG1 | (payload_len & 0x7);
+			head[1] = (payload_len >> 3) & 0xff;
+			head[2] = fragment_tag & 0xff;
+			head[3] = fragment_tag >> 8;
+
+
+			lowpan_raw_dump_inline(__func__, "first header",
+							head, 4);
+
+			memcpy(skb_push(fr_skb, 4), head, 4);
+			skb_trim(fr_skb, LOWPAN_FRAG_SIZE);
+
+			dev_hard_header(fr_skb, lowpan_dev_info(dev)->real_dev,
+				type, (void *)&da, (void *)&sa, fr_skb->len);
+
+			/* send fragment to dev queue */
+			dev_queue_xmit(fr_skb);
+
+			/* next fragments headers */
+			head[0] |= 0x20;
+
+			lowpan_raw_dump_inline(__func__, "next headers",
+							head, 5);
+
+			while (b_sent < payload_len) {
+				/* not the first fragment */
+				if (b_sent)
+					skb_pull(skb, LOWPAN_FRAG_SIZE);
+
+				pr_debug("%s: preparing fragment %d\n",
+				    __func__, b_sent / LOWPAN_FRAG_SIZE);
+
+				/*
+				 * create copy of current buffer and trim it
+				 * down to fragment size
+				 */
+				fr_skb = skb_copy(skb, GFP_KERNEL);
+				if (!fr_skb)
+					goto error;
+
+				skb_trim(fr_skb, LOWPAN_FRAG_SIZE);
+
+				/* add fragment header */
+				head[4] = b_sent / 8;
+				memcpy(skb_push(fr_skb, 5), head, 5);
+
+				b_sent += LOWPAN_FRAG_SIZE;
+
+				lowpan_raw_dump_table(__func__,
+				   "fragment data", fr_skb->data, fr_skb->len);
+
+				stat = dev_hard_header(fr_skb,
+					lowpan_dev_info(dev)->real_dev, type,
+					(void *)&da, (void *)&sa, fr_skb->len);
+
+				dev_queue_xmit(fr_skb);
+			}
+
+			/* TODO: what's the correct way to skip default skb? */
+
+			fragment_tag++;
+			return stat;
+		} else
 		return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
 				type, (void *)&da, (void *)&sa, skb->len);
 	}
+error:
+	kfree_skb(skb);
+	return -ENOMEM;
 }
 
 static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr)
@@ -511,6 +634,23 @@ static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr)
 	return stat;
 }
 
+static void lowpan_fragment_timer_expired(unsigned long tag)
+{
+	struct lowpan_fragment *entry, *tmp;
+
+	pr_debug("%s: timer expired for frame with tag %lu\n", __func__, tag);
+
+	mutex_lock(&flist_lock);
+	list_for_each_entry_safe(entry, tmp, &lowpan_fragments, list)
+		if (entry->tag == tag) {
+			list_del(&entry->list);
+			kfree(entry->data);
+			kfree(entry);
+			break;
+		}
+	mutex_unlock(&flist_lock);
+}
+
 static int
 lowpan_process_data(struct sk_buff *skb)
 {
@@ -525,6 +665,139 @@ lowpan_process_data(struct sk_buff *skb)
 	if (skb->len < 2)
 		goto drop;
 	iphc0 = lowpan_fetch_skb_u8(skb);
+
+	/* fragments assmebling */
+	switch (iphc0 & 0xf8) {
+	/* first fragment of the frame */
+	case LOWPAN_DISPATCH_FRAG1:
+	{
+		struct lowpan_fragment *entry, *frame;
+		u16 tag;
+
+		lowpan_raw_dump_inline(__func__, "first frame fragment header",
+								skb->data, 3);
+
+		tmp = lowpan_fetch_skb_u8(skb);
+		tag = lowpan_fetch_skb_u16(skb);
+
+		/*
+		 * check if frame assembling with the same tag is
+		 * already in progress
+		 */
+		rcu_read_lock();
+		list_for_each_entry_rcu(entry, &lowpan_fragments, list)
+			if (entry->tag == tag) {
+				pr_debug("%s ERROR: frame with this tag is"
+					 "alredy in assembling", __func__);
+				goto drop_rcu;
+			}
+		rcu_read_unlock();
+
+		/* alloc new frame structure */
+		frame = kzalloc(sizeof(struct lowpan_fragment), GFP_KERNEL);
+		if (!frame)
+			goto drop;
+
+		INIT_LIST_HEAD(&frame->list);
+
+		frame->bytes_rcv = 0;
+		frame->length = (iphc0 & 7) | (tmp << 3);
+		frame->tag = tag;
+		/* allocate buffer for frame assembling */
+		frame->data = kzalloc(frame->length, GFP_KERNEL);
+		if (!frame->data) {
+			kfree(frame);
+			goto drop;
+		}
+
+		pr_debug("%s: frame to be assembled: length = 0x%x, "
+			 "tag = 0x%x\n", __func__, frame->length, frame->tag);
+
+		init_timer(&frame->timer);
+		/* (number of fragments) * (fragment processing time-out) */
+		frame->timer.expires = jiffies +
+		  (frame->length / LOWPAN_FRAG_SIZE + 1) * LOWPAN_FRAG_TIMEOUT;
+		frame->timer.data = tag;
+		frame->timer.function = lowpan_fragment_timer_expired;
+
+		add_timer(&frame->timer);
+
+		mutex_lock(&flist_lock);
+		list_add_tail(&frame->list, &lowpan_fragments);
+		mutex_unlock(&flist_lock);
+
+		return kfree_skb(skb), 0;
+	}
+	/* second and next fragment of the frame */
+	case LOWPAN_DISPATCH_FRAGN:
+	{
+		u16 tag;
+		struct lowpan_fragment *entry, *t;
+
+		lowpan_raw_dump_inline(__func__, "next fragment header",
+					skb->data, 4);
+
+		lowpan_fetch_skb_u8(skb); /* skip frame length byte */
+		tag = lowpan_fetch_skb_u16(skb);
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(entry, &lowpan_fragments, list)
+			if (entry->tag == tag)
+				break;
+		rcu_read_unlock();
+
+		if (entry->tag != tag) {
+			pr_debug("%s ERROR: no frame structure found for this"
+				 "fragment", __func__);
+			goto drop;
+		}
+
+		tmp = lowpan_fetch_skb_u8(skb); /* fetch offset */
+
+		lowpan_raw_dump_table(__func__, "next fragment payload",
+					skb->data, skb->len);
+
+		/* if payload fits buffer, copy it */
+		if ((tmp * 8 + skb->len) <= entry->length) /* TODO: likely? */
+			memcpy(entry->data + tmp * 8, skb->data, skb->len);
+		else
+			goto drop;
+
+		entry->bytes_rcv += skb->len;
+
+		pr_debug("%s: frame length = 0x%x, bytes received = 0x%x\n",
+			 __func__, entry->length, entry->bytes_rcv);
+
+		/* frame assembling complete */
+		if (entry->bytes_rcv == entry->length) {
+			struct sk_buff *tmp = skb;
+
+			mutex_lock(&flist_lock);
+			list_for_each_entry_safe(entry, t, &lowpan_fragments, list)
+				if (entry->tag == tag) {
+					list_del(&entry->list);
+					/* copy and clear skb */
+					skb = skb_copy_expand(skb, entry->length, skb_tailroom(skb), GFP_KERNEL);
+					skb_pull(skb, skb->len);
+					/* copy new data to skb */
+					memcpy(skb_push(skb, entry->length), entry->data, entry->length);
+					kfree_skb(tmp);
+					del_timer(&entry->timer);
+					kfree(entry->data);
+					kfree(entry);
+
+					iphc0 = lowpan_fetch_skb_u8(skb);
+					break;
+				}
+			mutex_unlock(&flist_lock);
+			break;
+		}
+		return kfree_skb(skb), 0;
+	}
+	default:
+		break;
+	}
+
 	iphc1 = lowpan_fetch_skb_u8(skb);
 
 	_saddr = mac_cb(skb)->sa.hwaddr;
@@ -674,6 +947,8 @@ lowpan_process_data(struct sk_buff *skb)
 	lowpan_raw_dump_table(__func__, "raw header dump", (u8 *)&hdr,
 							sizeof(hdr));
 	return lowpan_skb_deliver(skb, &hdr);
+drop_rcu:
+	rcu_read_unlock();
 drop:
 	kfree(skb);
 	return -EINVAL;
@@ -765,8 +1040,15 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
 		goto drop;
 
 	/* check that it's our buffer */
-	if ((skb->data[0] & 0xe0) == 0x60)
+	switch (skb->data[0] & 0xe0) {
+	case 0x60:		/* ipv6 datagram */
+	case 0xc0:		/* first fragment header */
+	case 0xe0:		/* next fragments headers */
 		lowpan_process_data(skb);
+		break;
+	default:
+		break;
+	}
 
 	return NET_RX_SUCCESS;
 
@@ -793,6 +1075,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
 	lowpan_dev_info(dev)->real_dev = real_dev;
 	mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
 
+	mutex_init(&flist_lock);
+
 	entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL);
 	if (!entry)
 		return -ENOMEM;
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
index 5d8cf80..e8e57c4 100644
--- a/net/ieee802154/6lowpan.h
+++ b/net/ieee802154/6lowpan.h
@@ -159,6 +159,9 @@
 #define LOWPAN_DISPATCH_FRAG1	0xc0 /* 11000xxx */
 #define LOWPAN_DISPATCH_FRAGN	0xe0 /* 11100xxx */
 
+#define LOWPAN_FRAG_SIZE	40		/* fragment payload size */
+#define LOWPAN_FRAG_TIMEOUT	(HZ * 2)	/* processing time: 2 sec */
+
 /*
  * Values of fields within the IPHC encoding first byte
  * (C stands for compressed and I for inline)
-- 
1.7.2.5


^ permalink raw reply related

* [PATCH] ipvs: Fix compilation error in ip_vs.h for ip_vs_confirm_conntrack function.
From: Krzysztof Wilczynski @ 2011-10-20 12:18 UTC (permalink / raw)
  To: Simon Horman; +Cc: Patrick McHardy, netdev

This is to address the following error during the compilation:

  In file included from kernel/sysctl_binary.c:6:
  include/net/ip_vs.h:1406: error: expected identifier or ‘(’ before ‘{’ token
  make[1]: *** [kernel/sysctl_binary.o] Error 1
  make[1]: *** Waiting for unfinished jobs....

That manifests itself when CONFIG_IP_VS_NFCT is undefined in .config file.

Signed-off-by: Krzysztof Wilczynski <krzysztof.wilczynski@linux.com>
---
 include/net/ip_vs.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 139784e..de527d1 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1395,7 +1395,7 @@ static inline void ip_vs_update_conntrack(struct sk_buff *skb,
 {
 }
 
-static inline int ip_vs_confirm_conntrack(struct sk_buff *skb);
+static inline int ip_vs_confirm_conntrack(struct sk_buff *skb)
 {
 	return NF_ACCEPT;
 }
-- 
1.7.7

^ permalink raw reply related

* Re: [RFC PATCH 0/5] SUNRPC: "RPC pipefs per network namespace" preparations
From: bfields-uC3wQj2KruNg9hUCZPvPmw @ 2011-10-20 12:32 UTC (permalink / raw)
  To: Stanislav Kinsbursky
  Cc: Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org,
	linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Pavel Emelianov, neilb-l3A5Bk7waGM@public.gmane.org,
	netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org,
	devel-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org
In-Reply-To: <4EA000C6.1040502-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>

On Thu, Oct 20, 2011 at 03:06:46PM +0400, Stanislav Kinsbursky wrote:
> Guys, please, spend some of your expensive time to review this patch-set briefly.

I'll try to take a look soon, but I'm travelling tomorrow through the
31st, and things will be a little hectic.

Just one quick comment:

> >The only problem about I'm not sure how to solve properly yet, is auth gss
> >pipes creations operations. Hoping for some help with it.

I suspect one reason it may be a little complicated is the
upcall-version switching.  The old version is deprecated, and there's no
need to support the combination of the old version with the a new
feature like containers.  And now that it's been there a while the
version-switching code already achieved its goal of avoiding a flag day.
So, one approach might be:

	- move all the code for the old gss upcall and for the version
	  switching under a new CONFIG_DEPRECATED_GSS, or similar.
	- print a warning if the old stuff is used, and plan to rip it
	  out completely in a future kernel version.
	- do something that works just in the !CONFIG_DEPRECATED_GSS
	  case.

Would that help?

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [IEEE802.15.4][6LoWPAN] draft for fragmentation support
From: Eric Dumazet @ 2011-10-20 12:39 UTC (permalink / raw)
  To: Alexander Smirnov
  Cc: davem, dbaryshkov, slapin, linux-zigbee-devel, netdev, jonsmirl
In-Reply-To: <20111020111718.GA32181@avtobot.ww600.siemens.net>

Le jeudi 20 octobre 2011 à 15:17 +0400, Alexander Smirnov a écrit :
> Hello everybody,
> 
> below is the patch which adds support for fragmentation in 6LoWPAN
> point to point networks. This activity needs because of difference
> in MTU: 1280 ipv6 and 128 ieee802.15.4
> 
> This patch is just a draft. Could anyone please look at
> it and let me know your opinion.
> 

I removed janitor list, since this patch is certainly not a janitor one.

> The most doubtful moments for me are:
> 1. Should the list 'frag_list' and the mutex 'flist_lock' be
> included into dev private data?

	The mutex is wrong, you need a spinlock since run from softirq handler.
 	Allocations should use GFP_ATOMIC for same reason.

> 2. Can I use 'dev_queue_xmit' to send fragments to queue?

	Well, it is not very clean, but it seems there is no alternative

> 3. Creating new 'skb' instead of copying and modifying main one.

	You cant do that without making sure you own the skb and its data.
	Think about a sniffer running...


4) No limitation on number of in-flight fragments. 
You can consume lot of ram and have a list with 65536 elements...





>  net/ieee802154/6lowpan.c |  286
> +++++++++++++++++++++++++++++++++++++++++++++-
>  net/ieee802154/6lowpan.h |    3 +
>  2 files changed, 288 insertions(+), 1 deletions(-)
> 
> diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
> index 96877bd..1923ec7 100644
> --- a/net/ieee802154/6lowpan.c
> +++ b/net/ieee802154/6lowpan.c
> @@ -113,6 +113,24 @@ struct lowpan_dev_record {
>         struct list_head list;
>  };
>  
> +struct lowpan_fragment {
> +       u8 in_progress;                 /* assembling is in progress
> */
> +       struct sk_buff *skb;            /* skb to be assembled */
> +       u8 *data;                       /* data to be stored */
> +       struct mutex lock;              /* concurency lock */
> +       u16 length;                     /* frame length to be assemled
> */
> +       u32 bytes_rcv;                  /* bytes received */
> +       u16 tag;                        /* current fragment tag */
> +       struct timer_list timer;        /* assembling timer */
> +       struct list_head list;          /* fragments list handler
> */    
> +};
> +
> +static unsigned short fragment_tag;
> +
> +/* TODO: bind mutex and list to device */
> +static LIST_HEAD(lowpan_fragments);
> +struct mutex flist_lock;
> +
>  static inline struct
>  lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
>  {
> @@ -244,6 +262,18 @@ static u8 lowpan_fetch_skb_u8(struct sk_buff
> *skb)
>         return ret;
>  }
>  
> +static u16 lowpan_fetch_skb_u16(struct sk_buff *skb)
> +{
> +       u16 ret;
> +
> +       BUG_ON(skb->len < 2);
> 

	Hmm, check pskb_may_pull(skb, 2), or in caller.

	skb->len >= 2 doesnt mean you can access to skb->data[0] and
skb->data[1] : Data might be on a fragment, not on skb head.

> +
> +       ret = skb->data[0] | (skb->data[1] << 8);
> +       skb_pull(skb, 2);
> +       return ret;
> +}
> +
> +static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device
> *dev);
>  static int lowpan_header_create(struct sk_buff *skb,
>                            struct net_device *dev,
>                            unsigned short type, const void *_daddr,
> @@ -467,9 +497,102 @@ static int lowpan_header_create(struct sk_buff
> *skb,
>                 memcpy(&(sa.hwaddr), saddr, 8);
>  
>                 mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
> +
> +               /* frame fragmentation */
> +
> +               /*
> +                * if payload + mac header doesn't fit MTU-sized frame
> +                * we need to fragment it.
> +                */
> +               if (skb->len > (127 - 24)) { /* MTU -
> MAC_HEADER_LENGTH */
> +                       struct sk_buff *fr_skb;
> +                       u16 b_sent = 0;
> +                       unsigned short payload_len = skb->len;
> +                       int stat = 0;
> +
> +                       pr_debug("%s: the frame is too big (0x%x),"
> +                                "fragmentation needed, using tag = 0x
> %x\n",
> +                                __func__, payload_len, fragment_tag);
> +
> +                       fr_skb = skb_copy(skb, GFP_KERNEL);
> 

			GFP_ATOMIC
> 
And I wonder why you skb_copy(). You are not allowed to change skb like
that. ( when you later skb_push(fr_skb, 4), you are modifying this skb
data too...)
> 
> +                       if (!fr_skb)
> +                               goto error;
> +
> +                       /* 40-bit - fragment dispatch size */
> +                       head = kzalloc(5, GFP_KERNEL);

			GFP_ATOMIC


> +                       if (!head)
> +                               goto error;
> +
> +                       /* first fagment header */
> +                       head[0] = LOWPAN_DISPATCH_FRAG1 | (payload_len
> & 0x7);
> +                       head[1] = (payload_len >> 3) & 0xff;
> +                       head[2] = fragment_tag & 0xff;
> +                       head[3] = fragment_tag >> 8;
> +
> +
> +                       lowpan_raw_dump_inline(__func__, "first
> header",
> +                                                       head, 4);
> +
> +                       memcpy(skb_push(fr_skb, 4), head, 4);
> +                       skb_trim(fr_skb, LOWPAN_FRAG_SIZE);
> +
> +                       dev_hard_header(fr_skb,
> lowpan_dev_info(dev)->real_dev,
> +                               type, (void *)&da, (void *)&sa,
> fr_skb->len);
> +
> +                       /* send fragment to dev queue */
> +                       dev_queue_xmit(fr_skb);
> +
> +                       /* next fragments headers */
> +                       head[0] |= 0x20;
> +
> +                       lowpan_raw_dump_inline(__func__, "next
> headers",
> +                                                       head, 5);
> +
> +                       while (b_sent < payload_len) {
> +                               /* not the first fragment */
> +                               if (b_sent)
> +                                       skb_pull(skb,
> LOWPAN_FRAG_SIZE);
> +
> +                               pr_debug("%s: preparing fragment %d
> \n",
> +                                   __func__, b_sent /
> LOWPAN_FRAG_SIZE);
> +
> +                               /*
> +                                * create copy of current buffer and
> trim it
> +                                * down to fragment size
> +                                */
> +                               fr_skb = skb_copy(skb, GFP_KERNEL);
> +                               if (!fr_skb)
> +                                       goto error;
> +
> +                               skb_trim(fr_skb, LOWPAN_FRAG_SIZE);
> +
> +                               /* add fragment header */
> +                               head[4] = b_sent / 8;
> +                               memcpy(skb_push(fr_skb, 5), head, 5);
> +
> +                               b_sent += LOWPAN_FRAG_SIZE;
> +
> +                               lowpan_raw_dump_table(__func__,
> +                                  "fragment data", fr_skb->data,
> fr_skb->len);
> +
> +                               stat = dev_hard_header(fr_skb,
> +                                       lowpan_dev_info(dev)->real_dev, type,
> +                                       (void *)&da, (void *)&sa,
> fr_skb->len);
> +
> +                               dev_queue_xmit(fr_skb);
> +                       }
> +
> +                       /* TODO: what's the correct way to skip
> default skb? */
> +
> +                       fragment_tag++;
> +                       return stat;
> +               } else
>                 return dev_hard_header(skb,
> lowpan_dev_info(dev)->real_dev,
>                                 type, (void *)&da, (void *)&sa,
> skb->len);
>         }
> +error:
> +       kfree_skb(skb);
> +       return -ENOMEM;
>  }
>  
>  static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr
> *hdr)
> @@ -511,6 +634,23 @@ static int lowpan_skb_deliver(struct sk_buff
> *skb, struct ipv6hdr *hdr)
>         return stat;
>  }
>  
> +static void lowpan_fragment_timer_expired(unsigned long tag)
> +{
> +       struct lowpan_fragment *entry, *tmp;
> +
> +       pr_debug("%s: timer expired for frame with tag %lu\n",
> __func__, tag);
> +
> +       mutex_lock(&flist_lock);
> 
> 
	A mutex_lock() is not allowed in this context (softirq).
You must use a spinlock.
> 
> 
> +       list_for_each_entry_safe(entry, tmp, &lowpan_fragments, list)
> +               if (entry->tag == tag) {
> 
> 
	Since you have a timer per entry, instead of doing a lookup to find
'tag', you could just say 'tag' is the pointer to your "struct
lowpan_fragment"

> 
> +                       list_del(&entry->list);
> +                       kfree(entry->data);
> +                       kfree(entry);
> +                       break;
> +               }
> +       mutex_unlock(&flist_lock);
> +}
> 

	struct lowpan_fragment *entry = (struct lowpan_fragment *)tag;
	spin_lock();
	list_del(&entry->list);
	kfree(entry->data);
	kfree(entry);
	spin_unlock();
> 
> +
>  static int
>  lowpan_process_data(struct sk_buff *skb)
>  {
> @@ -525,6 +665,139 @@ lowpan_process_data(struct sk_buff *skb)
>         if (skb->len < 2)
>                 goto drop;
>         iphc0 = lowpan_fetch_skb_u8(skb);
> +
> +       /* fragments assmebling */
> +       switch (iphc0 & 0xf8) {

	0xf8 means ? Please use a macro or something...
> 
> +       /* first fragment of the frame */
> +       case LOWPAN_DISPATCH_FRAG1:
> +       {
> +               struct lowpan_fragment *entry, *frame;
> +               u16 tag;
> +
> +               lowpan_raw_dump_inline(__func__, "first frame fragment
> header",
> +                                                               skb->data, 3);
> +
> +               tmp = lowpan_fetch_skb_u8(skb);
> +               tag = lowpan_fetch_skb_u16(skb);
> +
> +               /*
> +                * check if frame assembling with the same tag is
> +                * already in progress
> +                */
> +               rcu_read_lock();
> +               list_for_each_entry_rcu(entry, &lowpan_fragments,
> list)
> +                       if (entry->tag == tag) {
> +                               pr_debug("%s ERROR: frame with this
> tag is"
> +                                        "alredy in assembling",
> __func__);
> +                               goto drop_rcu;
> +                       }
> +               rcu_read_unlock();
> +
> +               /* alloc new frame structure */
> +               frame = kzalloc(sizeof(struct lowpan_fragment),
> GFP_KERNEL);
> 
> 
   	GFP_ATOMIC
> 
> +               if (!frame)
> +                       goto drop;
> +
> +               INIT_LIST_HEAD(&frame->list);
> +
> +               frame->bytes_rcv = 0;
> +               frame->length = (iphc0 & 7) | (tmp << 3);
> +               frame->tag = tag;
> +               /* allocate buffer for frame assembling */
> +               frame->data = kzalloc(frame->length, GFP_KERNEL);
> 
> 
		GFP_ATOMIC

> +               if (!frame->data) {
> +                       kfree(frame);
> +                       goto drop;
> +               }
> +
> +               pr_debug("%s: frame to be assembled: length = 0x%x, "
> +                        "tag = 0x%x\n", __func__, frame->length,
> frame->tag);
> +
> +               init_timer(&frame->timer);
> +               /* (number of fragments) * (fragment processing
> time-out) */
> +               frame->timer.expires = jiffies +
> +                 (frame->length / LOWPAN_FRAG_SIZE + 1) *
> LOWPAN_FRAG_TIMEOUT;
> +               frame->timer.data = tag;
> +               frame->timer.function = lowpan_fragment_timer_expired;
> +
> +               add_timer(&frame->timer);
> +
> +               mutex_lock(&flist_lock);
> +               list_add_tail(&frame->list, &lowpan_fragments);
> +               mutex_unlock(&flist_lock);
> +
> +               return kfree_skb(skb), 0;
> +       }
> +       /* second and next fragment of the frame */
> +       case LOWPAN_DISPATCH_FRAGN:
> +       {
> +               u16 tag;
> +               struct lowpan_fragment *entry, *t;
> +
> +               lowpan_raw_dump_inline(__func__, "next fragment
> header",
> +                                       skb->data, 4);
> +
> +               lowpan_fetch_skb_u8(skb); /* skip frame length byte */
> +               tag = lowpan_fetch_skb_u16(skb);
> +
> +               rcu_read_lock();
> +               list_for_each_entry_rcu(entry, &lowpan_fragments,
> list)
> +                       if (entry->tag == tag)
> +                               break;
> +               rcu_read_unlock();
> +
> +               if (entry->tag != tag) {
> +                       pr_debug("%s ERROR: no frame structure found
> for this"
> +                                "fragment", __func__);
> +                       goto drop;
> +               }
> +
> +               tmp = lowpan_fetch_skb_u8(skb); /* fetch offset */
> +
> +               lowpan_raw_dump_table(__func__, "next fragment
> payload",
> +                                       skb->data, skb->len);
> +
> +               /* if payload fits buffer, copy it */
> +               if ((tmp * 8 + skb->len) <= entry->length) /* TODO:
> likely? */
> +                       memcpy(entry->data + tmp * 8, skb->data,
> skb->len);
> +               else
> +                       goto drop;
> +
> +               entry->bytes_rcv += skb->len;
> +
> +               pr_debug("%s: frame length = 0x%x, bytes received = 0x
> %x\n",
> +                        __func__, entry->length, entry->bytes_rcv);
> +
> +               /* frame assembling complete */
> +               if (entry->bytes_rcv == entry->length) {
> +                       struct sk_buff *tmp = skb;
> +
> +                       mutex_lock(&flist_lock);
> +                       list_for_each_entry_safe(entry, t,
> &lowpan_fragments, list)
> +                               if (entry->tag == tag) {
> +                                       list_del(&entry->list);
> +                                       /* copy and clear skb */
> +                                       skb = skb_copy_expand(skb,
> entry->length, skb_tailroom(skb), GFP_KERNEL);
> +                                       skb_pull(skb, skb->len);
> +                                       /* copy new data to skb */
> +                                       memcpy(skb_push(skb,
> entry->length), entry->data, entry->length);
> +                                       kfree_skb(tmp);
> +                                       del_timer(&entry->timer);
> +                                       kfree(entry->data);
> +                                       kfree(entry);
> +
> +                                       iphc0 =
> lowpan_fetch_skb_u8(skb);
> +                                       break;
> +                               }
> +                       mutex_unlock(&flist_lock);
> +                       break;
> +               }
> +               return kfree_skb(skb), 0;
> +       }
> +       default:
> +               break;
> +       }
> +
>         iphc1 = lowpan_fetch_skb_u8(skb);
>  
>         _saddr = mac_cb(skb)->sa.hwaddr;
> @@ -674,6 +947,8 @@ lowpan_process_data(struct sk_buff *skb)
>         lowpan_raw_dump_table(__func__, "raw header dump", (u8 *)&hdr,
>                                                         sizeof(hdr));
>         return lowpan_skb_deliver(skb, &hdr);
> +drop_rcu:
> +       rcu_read_unlock();
>  drop:
>         kfree(skb);
>         return -EINVAL;
> @@ -765,8 +1040,15 @@ static int lowpan_rcv(struct sk_buff *skb,
> struct net_device *dev,
>                 goto drop;
>  
>         /* check that it's our buffer */
> -       if ((skb->data[0] & 0xe0) == 0x60)
> +       switch (skb->data[0] & 0xe0) {
> +       case 0x60:              /* ipv6 datagram */
> +       case 0xc0:              /* first fragment header */
> +       case 0xe0:              /* next fragments headers */
>                 lowpan_process_data(skb);
> +               break;
> +       default:
> +               break;
> +       }
>  
>         return NET_RX_SUCCESS;
>  
> @@ -793,6 +1075,8 @@ static int lowpan_newlink(struct net *src_net,
> struct net_device *dev,
>         lowpan_dev_info(dev)->real_dev = real_dev;
>         mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
>  
> +       mutex_init(&flist_lock);
> 
> 
	Doing this init each time a link is setup is wrong.
	Do it once.
> 
> +
>         entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL);
>         if (!entry)
>                 return -ENOMEM;
> diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
> index 5d8cf80..e8e57c4 100644
> --- a/net/ieee802154/6lowpan.h
> +++ b/net/ieee802154/6lowpan.h
> @@ -159,6 +159,9 @@
>  #define LOWPAN_DISPATCH_FRAG1  0xc0 /* 11000xxx */
>  #define LOWPAN_DISPATCH_FRAGN  0xe0 /* 11100xxx */
>  
> +#define LOWPAN_FRAG_SIZE       40              /* fragment payload
> size */
> +#define LOWPAN_FRAG_TIMEOUT    (HZ * 2)        /* processing time: 2
> sec */
> +
>  /*
>   * Values of fields within the IPHC encoding first byte
>   * (C stands for compressed and I for inline)
> -- 
> 1.7.2.5
> 

^ permalink raw reply

* (unknown)
From: Western Union @ 2011-10-20 12:34 UTC (permalink / raw)



You've won $85,000USD by IMF via western union.Confirm with name,age,occupation,
country

^ permalink raw reply

* Re: [IEEE802.15.4][6LoWPAN] draft for fragmentation support
From: Alexander Smirnov @ 2011-10-20 12:50 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: davem, dbaryshkov, slapin, linux-zigbee-devel, netdev, jonsmirl
In-Reply-To: <1319114385.3781.34.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

Hi Eric,

thank you for the replies. And another question I forgot to ask:

when I send fragments, I still have original skb buffer. What should I
do with it, is there any
"proper/good" ways to drop it? Because I've already fragmented it and
do not need to send
original skb to queue.

Thank you,
Alexander

2011/10/20 Eric Dumazet <eric.dumazet@gmail.com>:
> Le jeudi 20 octobre 2011 à 15:17 +0400, Alexander Smirnov a écrit :
>> Hello everybody,
>>
>> below is the patch which adds support for fragmentation in 6LoWPAN
>> point to point networks. This activity needs because of difference
>> in MTU: 1280 ipv6 and 128 ieee802.15.4
>>
>> This patch is just a draft. Could anyone please look at
>> it and let me know your opinion.
>>
>
> I removed janitor list, since this patch is certainly not a janitor one.
>
>> The most doubtful moments for me are:
>> 1. Should the list 'frag_list' and the mutex 'flist_lock' be
>> included into dev private data?
>
>        The mutex is wrong, you need a spinlock since run from softirq handler.
>        Allocations should use GFP_ATOMIC for same reason.
>
>> 2. Can I use 'dev_queue_xmit' to send fragments to queue?
>
>        Well, it is not very clean, but it seems there is no alternative
>
>> 3. Creating new 'skb' instead of copying and modifying main one.
>
>        You cant do that without making sure you own the skb and its data.
>        Think about a sniffer running...
>
>
> 4) No limitation on number of in-flight fragments.
> You can consume lot of ram and have a list with 65536 elements...
>
>
>
>
>
>>  net/ieee802154/6lowpan.c |  286
>> +++++++++++++++++++++++++++++++++++++++++++++-
>>  net/ieee802154/6lowpan.h |    3 +
>>  2 files changed, 288 insertions(+), 1 deletions(-)
>>
>> diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
>> index 96877bd..1923ec7 100644
>> --- a/net/ieee802154/6lowpan.c
>> +++ b/net/ieee802154/6lowpan.c
>> @@ -113,6 +113,24 @@ struct lowpan_dev_record {
>>         struct list_head list;
>>  };
>>
>> +struct lowpan_fragment {
>> +       u8 in_progress;                 /* assembling is in progress
>> */
>> +       struct sk_buff *skb;            /* skb to be assembled */
>> +       u8 *data;                       /* data to be stored */
>> +       struct mutex lock;              /* concurency lock */
>> +       u16 length;                     /* frame length to be assemled
>> */
>> +       u32 bytes_rcv;                  /* bytes received */
>> +       u16 tag;                        /* current fragment tag */
>> +       struct timer_list timer;        /* assembling timer */
>> +       struct list_head list;          /* fragments list handler
>> */
>> +};
>> +
>> +static unsigned short fragment_tag;
>> +
>> +/* TODO: bind mutex and list to device */
>> +static LIST_HEAD(lowpan_fragments);
>> +struct mutex flist_lock;
>> +
>>  static inline struct
>>  lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
>>  {
>> @@ -244,6 +262,18 @@ static u8 lowpan_fetch_skb_u8(struct sk_buff
>> *skb)
>>         return ret;
>>  }
>>
>> +static u16 lowpan_fetch_skb_u16(struct sk_buff *skb)
>> +{
>> +       u16 ret;
>> +
>> +       BUG_ON(skb->len < 2);
>>
>
>        Hmm, check pskb_may_pull(skb, 2), or in caller.
>
>        skb->len >= 2 doesnt mean you can access to skb->data[0] and
> skb->data[1] : Data might be on a fragment, not on skb head.
>
>> +
>> +       ret = skb->data[0] | (skb->data[1] << 8);
>> +       skb_pull(skb, 2);
>> +       return ret;
>> +}
>> +
>> +static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device
>> *dev);
>>  static int lowpan_header_create(struct sk_buff *skb,
>>                            struct net_device *dev,
>>                            unsigned short type, const void *_daddr,
>> @@ -467,9 +497,102 @@ static int lowpan_header_create(struct sk_buff
>> *skb,
>>                 memcpy(&(sa.hwaddr), saddr, 8);
>>
>>                 mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
>> +
>> +               /* frame fragmentation */
>> +
>> +               /*
>> +                * if payload + mac header doesn't fit MTU-sized frame
>> +                * we need to fragment it.
>> +                */
>> +               if (skb->len > (127 - 24)) { /* MTU -
>> MAC_HEADER_LENGTH */
>> +                       struct sk_buff *fr_skb;
>> +                       u16 b_sent = 0;
>> +                       unsigned short payload_len = skb->len;
>> +                       int stat = 0;
>> +
>> +                       pr_debug("%s: the frame is too big (0x%x),"
>> +                                "fragmentation needed, using tag = 0x
>> %x\n",
>> +                                __func__, payload_len, fragment_tag);
>> +
>> +                       fr_skb = skb_copy(skb, GFP_KERNEL);
>>
>
>                        GFP_ATOMIC
>>
> And I wonder why you skb_copy(). You are not allowed to change skb like
> that. ( when you later skb_push(fr_skb, 4), you are modifying this skb
> data too...)
>>
>> +                       if (!fr_skb)
>> +                               goto error;
>> +
>> +                       /* 40-bit - fragment dispatch size */
>> +                       head = kzalloc(5, GFP_KERNEL);
>
>                        GFP_ATOMIC
>
>
>> +                       if (!head)
>> +                               goto error;
>> +
>> +                       /* first fagment header */
>> +                       head[0] = LOWPAN_DISPATCH_FRAG1 | (payload_len
>> & 0x7);
>> +                       head[1] = (payload_len >> 3) & 0xff;
>> +                       head[2] = fragment_tag & 0xff;
>> +                       head[3] = fragment_tag >> 8;
>> +
>> +
>> +                       lowpan_raw_dump_inline(__func__, "first
>> header",
>> +                                                       head, 4);
>> +
>> +                       memcpy(skb_push(fr_skb, 4), head, 4);
>> +                       skb_trim(fr_skb, LOWPAN_FRAG_SIZE);
>> +
>> +                       dev_hard_header(fr_skb,
>> lowpan_dev_info(dev)->real_dev,
>> +                               type, (void *)&da, (void *)&sa,
>> fr_skb->len);
>> +
>> +                       /* send fragment to dev queue */
>> +                       dev_queue_xmit(fr_skb);
>> +
>> +                       /* next fragments headers */
>> +                       head[0] |= 0x20;
>> +
>> +                       lowpan_raw_dump_inline(__func__, "next
>> headers",
>> +                                                       head, 5);
>> +
>> +                       while (b_sent < payload_len) {
>> +                               /* not the first fragment */
>> +                               if (b_sent)
>> +                                       skb_pull(skb,
>> LOWPAN_FRAG_SIZE);
>> +
>> +                               pr_debug("%s: preparing fragment %d
>> \n",
>> +                                   __func__, b_sent /
>> LOWPAN_FRAG_SIZE);
>> +
>> +                               /*
>> +                                * create copy of current buffer and
>> trim it
>> +                                * down to fragment size
>> +                                */
>> +                               fr_skb = skb_copy(skb, GFP_KERNEL);
>> +                               if (!fr_skb)
>> +                                       goto error;
>> +
>> +                               skb_trim(fr_skb, LOWPAN_FRAG_SIZE);
>> +
>> +                               /* add fragment header */
>> +                               head[4] = b_sent / 8;
>> +                               memcpy(skb_push(fr_skb, 5), head, 5);
>> +
>> +                               b_sent += LOWPAN_FRAG_SIZE;
>> +
>> +                               lowpan_raw_dump_table(__func__,
>> +                                  "fragment data", fr_skb->data,
>> fr_skb->len);
>> +
>> +                               stat = dev_hard_header(fr_skb,
>> +                                       lowpan_dev_info(dev)->real_dev, type,
>> +                                       (void *)&da, (void *)&sa,
>> fr_skb->len);
>> +
>> +                               dev_queue_xmit(fr_skb);
>> +                       }
>> +
>> +                       /* TODO: what's the correct way to skip
>> default skb? */
>> +
>> +                       fragment_tag++;
>> +                       return stat;
>> +               } else
>>                 return dev_hard_header(skb,
>> lowpan_dev_info(dev)->real_dev,
>>                                 type, (void *)&da, (void *)&sa,
>> skb->len);
>>         }
>> +error:
>> +       kfree_skb(skb);
>> +       return -ENOMEM;
>>  }
>>
>>  static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr
>> *hdr)
>> @@ -511,6 +634,23 @@ static int lowpan_skb_deliver(struct sk_buff
>> *skb, struct ipv6hdr *hdr)
>>         return stat;
>>  }
>>
>> +static void lowpan_fragment_timer_expired(unsigned long tag)
>> +{
>> +       struct lowpan_fragment *entry, *tmp;
>> +
>> +       pr_debug("%s: timer expired for frame with tag %lu\n",
>> __func__, tag);
>> +
>> +       mutex_lock(&flist_lock);
>>
>>
>        A mutex_lock() is not allowed in this context (softirq).
> You must use a spinlock.
>>
>>
>> +       list_for_each_entry_safe(entry, tmp, &lowpan_fragments, list)
>> +               if (entry->tag == tag) {
>>
>>
>        Since you have a timer per entry, instead of doing a lookup to find
> 'tag', you could just say 'tag' is the pointer to your "struct
> lowpan_fragment"
>
>>
>> +                       list_del(&entry->list);
>> +                       kfree(entry->data);
>> +                       kfree(entry);
>> +                       break;
>> +               }
>> +       mutex_unlock(&flist_lock);
>> +}
>>
>
>        struct lowpan_fragment *entry = (struct lowpan_fragment *)tag;
>        spin_lock();
>        list_del(&entry->list);
>        kfree(entry->data);
>        kfree(entry);
>        spin_unlock();
>>
>> +
>>  static int
>>  lowpan_process_data(struct sk_buff *skb)
>>  {
>> @@ -525,6 +665,139 @@ lowpan_process_data(struct sk_buff *skb)
>>         if (skb->len < 2)
>>                 goto drop;
>>         iphc0 = lowpan_fetch_skb_u8(skb);
>> +
>> +       /* fragments assmebling */
>> +       switch (iphc0 & 0xf8) {
>
>        0xf8 means ? Please use a macro or something...
>>
>> +       /* first fragment of the frame */
>> +       case LOWPAN_DISPATCH_FRAG1:
>> +       {
>> +               struct lowpan_fragment *entry, *frame;
>> +               u16 tag;
>> +
>> +               lowpan_raw_dump_inline(__func__, "first frame fragment
>> header",
>> +                                                               skb->data, 3);
>> +
>> +               tmp = lowpan_fetch_skb_u8(skb);
>> +               tag = lowpan_fetch_skb_u16(skb);
>> +
>> +               /*
>> +                * check if frame assembling with the same tag is
>> +                * already in progress
>> +                */
>> +               rcu_read_lock();
>> +               list_for_each_entry_rcu(entry, &lowpan_fragments,
>> list)
>> +                       if (entry->tag == tag) {
>> +                               pr_debug("%s ERROR: frame with this
>> tag is"
>> +                                        "alredy in assembling",
>> __func__);
>> +                               goto drop_rcu;
>> +                       }
>> +               rcu_read_unlock();
>> +
>> +               /* alloc new frame structure */
>> +               frame = kzalloc(sizeof(struct lowpan_fragment),
>> GFP_KERNEL);
>>
>>
>        GFP_ATOMIC
>>
>> +               if (!frame)
>> +                       goto drop;
>> +
>> +               INIT_LIST_HEAD(&frame->list);
>> +
>> +               frame->bytes_rcv = 0;
>> +               frame->length = (iphc0 & 7) | (tmp << 3);
>> +               frame->tag = tag;
>> +               /* allocate buffer for frame assembling */
>> +               frame->data = kzalloc(frame->length, GFP_KERNEL);
>>
>>
>                GFP_ATOMIC
>
>> +               if (!frame->data) {
>> +                       kfree(frame);
>> +                       goto drop;
>> +               }
>> +
>> +               pr_debug("%s: frame to be assembled: length = 0x%x, "
>> +                        "tag = 0x%x\n", __func__, frame->length,
>> frame->tag);
>> +
>> +               init_timer(&frame->timer);
>> +               /* (number of fragments) * (fragment processing
>> time-out) */
>> +               frame->timer.expires = jiffies +
>> +                 (frame->length / LOWPAN_FRAG_SIZE + 1) *
>> LOWPAN_FRAG_TIMEOUT;
>> +               frame->timer.data = tag;
>> +               frame->timer.function = lowpan_fragment_timer_expired;
>> +
>> +               add_timer(&frame->timer);
>> +
>> +               mutex_lock(&flist_lock);
>> +               list_add_tail(&frame->list, &lowpan_fragments);
>> +               mutex_unlock(&flist_lock);
>> +
>> +               return kfree_skb(skb), 0;
>> +       }
>> +       /* second and next fragment of the frame */
>> +       case LOWPAN_DISPATCH_FRAGN:
>> +       {
>> +               u16 tag;
>> +               struct lowpan_fragment *entry, *t;
>> +
>> +               lowpan_raw_dump_inline(__func__, "next fragment
>> header",
>> +                                       skb->data, 4);
>> +
>> +               lowpan_fetch_skb_u8(skb); /* skip frame length byte */
>> +               tag = lowpan_fetch_skb_u16(skb);
>> +
>> +               rcu_read_lock();
>> +               list_for_each_entry_rcu(entry, &lowpan_fragments,
>> list)
>> +                       if (entry->tag == tag)
>> +                               break;
>> +               rcu_read_unlock();
>> +
>> +               if (entry->tag != tag) {
>> +                       pr_debug("%s ERROR: no frame structure found
>> for this"
>> +                                "fragment", __func__);
>> +                       goto drop;
>> +               }
>> +
>> +               tmp = lowpan_fetch_skb_u8(skb); /* fetch offset */
>> +
>> +               lowpan_raw_dump_table(__func__, "next fragment
>> payload",
>> +                                       skb->data, skb->len);
>> +
>> +               /* if payload fits buffer, copy it */
>> +               if ((tmp * 8 + skb->len) <= entry->length) /* TODO:
>> likely? */
>> +                       memcpy(entry->data + tmp * 8, skb->data,
>> skb->len);
>> +               else
>> +                       goto drop;
>> +
>> +               entry->bytes_rcv += skb->len;
>> +
>> +               pr_debug("%s: frame length = 0x%x, bytes received = 0x
>> %x\n",
>> +                        __func__, entry->length, entry->bytes_rcv);
>> +
>> +               /* frame assembling complete */
>> +               if (entry->bytes_rcv == entry->length) {
>> +                       struct sk_buff *tmp = skb;
>> +
>> +                       mutex_lock(&flist_lock);
>> +                       list_for_each_entry_safe(entry, t,
>> &lowpan_fragments, list)
>> +                               if (entry->tag == tag) {
>> +                                       list_del(&entry->list);
>> +                                       /* copy and clear skb */
>> +                                       skb = skb_copy_expand(skb,
>> entry->length, skb_tailroom(skb), GFP_KERNEL);
>> +                                       skb_pull(skb, skb->len);
>> +                                       /* copy new data to skb */
>> +                                       memcpy(skb_push(skb,
>> entry->length), entry->data, entry->length);
>> +                                       kfree_skb(tmp);
>> +                                       del_timer(&entry->timer);
>> +                                       kfree(entry->data);
>> +                                       kfree(entry);
>> +
>> +                                       iphc0 =
>> lowpan_fetch_skb_u8(skb);
>> +                                       break;
>> +                               }
>> +                       mutex_unlock(&flist_lock);
>> +                       break;
>> +               }
>> +               return kfree_skb(skb), 0;
>> +       }
>> +       default:
>> +               break;
>> +       }
>> +
>>         iphc1 = lowpan_fetch_skb_u8(skb);
>>
>>         _saddr = mac_cb(skb)->sa.hwaddr;
>> @@ -674,6 +947,8 @@ lowpan_process_data(struct sk_buff *skb)
>>         lowpan_raw_dump_table(__func__, "raw header dump", (u8 *)&hdr,
>>                                                         sizeof(hdr));
>>         return lowpan_skb_deliver(skb, &hdr);
>> +drop_rcu:
>> +       rcu_read_unlock();
>>  drop:
>>         kfree(skb);
>>         return -EINVAL;
>> @@ -765,8 +1040,15 @@ static int lowpan_rcv(struct sk_buff *skb,
>> struct net_device *dev,
>>                 goto drop;
>>
>>         /* check that it's our buffer */
>> -       if ((skb->data[0] & 0xe0) == 0x60)
>> +       switch (skb->data[0] & 0xe0) {
>> +       case 0x60:              /* ipv6 datagram */
>> +       case 0xc0:              /* first fragment header */
>> +       case 0xe0:              /* next fragments headers */
>>                 lowpan_process_data(skb);
>> +               break;
>> +       default:
>> +               break;
>> +       }
>>
>>         return NET_RX_SUCCESS;
>>
>> @@ -793,6 +1075,8 @@ static int lowpan_newlink(struct net *src_net,
>> struct net_device *dev,
>>         lowpan_dev_info(dev)->real_dev = real_dev;
>>         mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
>>
>> +       mutex_init(&flist_lock);
>>
>>
>        Doing this init each time a link is setup is wrong.
>        Do it once.
>>
>> +
>>         entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL);
>>         if (!entry)
>>                 return -ENOMEM;
>> diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
>> index 5d8cf80..e8e57c4 100644
>> --- a/net/ieee802154/6lowpan.h
>> +++ b/net/ieee802154/6lowpan.h
>> @@ -159,6 +159,9 @@
>>  #define LOWPAN_DISPATCH_FRAG1  0xc0 /* 11000xxx */
>>  #define LOWPAN_DISPATCH_FRAGN  0xe0 /* 11100xxx */
>>
>> +#define LOWPAN_FRAG_SIZE       40              /* fragment payload
>> size */
>> +#define LOWPAN_FRAG_TIMEOUT    (HZ * 2)        /* processing time: 2
>> sec */
>> +
>>  /*
>>   * Values of fields within the IPHC encoding first byte
>>   * (C stands for compressed and I for inline)
>> --
>> 1.7.2.5
>>
>
>
>

^ permalink raw reply

* Re: [RFC PATCH 0/5] SUNRPC: "RPC pipefs per network namespace" preparations
From: Stanislav Kinsbursky @ 2011-10-20 12:56 UTC (permalink / raw)
  To: bfields@fieldses.org
  Cc: Trond.Myklebust@netapp.com, linux-nfs@vger.kernel.org,
	Pavel Emelianov, neilb@suse.de, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, davem@davemloft.net,
	devel@openvz.org
In-Reply-To: <20111020123242.GN5444@fieldses.org>

20.10.2011 16:32, bfields@fieldses.org пишет:
> On Thu, Oct 20, 2011 at 03:06:46PM +0400, Stanislav Kinsbursky wrote:
>> Guys, please, spend some of your expensive time to review this patch-set briefly.
>
> I'll try to take a look soon, but I'm travelling tomorrow through the
> 31st, and things will be a little hectic.
>

Thanks for your time, Bruce.

> Just one quick comment:
>
>>> The only problem about I'm not sure how to solve properly yet, is auth gss
>>> pipes creations operations. Hoping for some help with it.
>
> I suspect one reason it may be a little complicated is the
> upcall-version switching.  The old version is deprecated, and there's no
> need to support the combination of the old version with the a new
> feature like containers.  And now that it's been there a while the
> version-switching code already achieved its goal of avoiding a flag day.
> So, one approach might be:
>
> 	- move all the code for the old gss upcall and for the version
> 	  switching under a new CONFIG_DEPRECATED_GSS, or similar.
> 	- print a warning if the old stuff is used, and plan to rip it
> 	  out completely in a future kernel version.
> 	- do something that works just in the !CONFIG_DEPRECATED_GSS
> 	  case.
>

Thanks for this comment. I'll check the code for problem you mentioned here.
But I was actually talking about other thing.
Currently we create pipe in gss without any checks since we assume, that pipefs 
client dir is created already.
But with approach, represented in this patch set, pipes and dirs will be created 
only when pipefs was mounted from user-space. I.e. clients with gss auth may 
already present and some callback is required for creating gss pipes.
And also this approch assumes existence of gss auth without pipe.

> Would that help?
>
> --b.


-- 
Best regards,
Stanislav Kinsbursky

^ permalink raw reply

* Re: [PATCH 9/9] make net/core/scm.c uid comparisons user namespace aware
From: Serge E. Hallyn @ 2011-10-20 12:58 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: linux-kernel, akpm, oleg, richard, mikevs, segoon, gregkh,
	dhowells, eparis, Serge E. Hallyn, netdev
In-Reply-To: <m1sjmpytpf.fsf@fess.ebiederm.org>

Quoting Eric W. Biederman (ebiederm@xmission.com):
> Serge Hallyn <serge@hallyn.com> writes:
> 
> > From: "Serge E. Hallyn" <serge.hallyn@canonical.com>
> >
> > Currently uids are compared without regard for the user namespace.
> > Fix that to prevent tasks in a different user namespace from
> > wrongly matching on SCM_CREDENTIALS.
> >
> > In the past, either your uids had to match, or you had to have
> > CAP_SETXID.  In a namespaced world, you must either (both be in the
> > same user namespace and have your uids match), or you must have
> > CAP_SETXID targeted at the other user namespace.  The latter can
> > happen for instance if uid 500 created a new user namespace and
> > now interacts with uid 0 in it.
> 
> Serge this approach is wrong.

Thanks for looking, Eric.

> Because we pass the cred and the pid through the socket socket itself
> is just a conduit and should be ignored in this context.

Ok, that makes sense, but

> The only interesting test should be are you allowed to impersonate other
> users in your current userk namespace.

Why in your current user namespace?  Shouldn't it be in the
target user ns?  I understand it could be wrong to tie the
user ns owning the socket to the target userns (though I still
kind of like it), but just because I have CAP_SETUID in my
own user_ns doesn't mean I should be able to pose as another
uid in your user_ns.

(Now I also see that cred_to_ucred() translates to the current
user_ns, so that should have been a hint to me before about
your intent, but I'm not convinced I agree with your intent).

And you do the same with the pid.  Why is that a valid assumption?

(I've got that feeling that I'll feel like a dunce once you explain :)

> So it should be possible to simplify the entire patch to just:
>  static __inline__ int scm_check_creds(struct ucred *creds)
>  {
>  	const struct cred *cred = current_cred();
> +	struct user_namespace *ns = cred->user_ns;
> 
> -	if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
> -	    ((creds->uid == cred->uid   || creds->uid == cred->euid ||
> -	      creds->uid == cred->suid) || capable(CAP_SETUID)) &&
> -	    ((creds->gid == cred->gid   || creds->gid == cred->egid ||
> -	      creds->gid == cred->sgid) || capable(CAP_SETGID))) {
> +	if ((creds->pid == task_tgid_vnr(current) || ns_capable(ns, CAP_SYS_ADMIN)) &&
> +	    ((creds->uid == cred->uid   || creds->uid == cred->euid ||
> +	      creds->uid == cred->suid) || ns_capable(ns, CAP_SETUID)) &&
> +	    ((creds->gid == cred->gid   || creds->gid == cred->egid ||
> +	      creds->gid == cred->sgid) || ns_capable(ns, CAP_SETGID))) {
>   	       return 0;
>   	}
>   	return -EPERM;
>   }

^ permalink raw reply

* Re: [IEEE802.15.4][6LoWPAN] draft for fragmentation support
From: Eric Dumazet @ 2011-10-20 13:11 UTC (permalink / raw)
  To: Alexander Smirnov
  Cc: davem, dbaryshkov, slapin, linux-zigbee-devel, netdev, jonsmirl
In-Reply-To: <CAJmB2rCe3BJKD07TOSyAT0vbDq_K1VHLOtECqeOMzaTsg3DokA@mail.gmail.com>

Le jeudi 20 octobre 2011 à 16:50 +0400, Alexander Smirnov a écrit :
> Hi Eric,
> 
> thank you for the replies. And another question I forgot to ask:
> 
> when I send fragments, I still have original skb buffer. What should I
> do with it, is there any
> "proper/good" ways to drop it? Because I've already fragmented it and
> do not need to send
> original skb to queue.

I dont quite understand. Once your xmits are done, you must free the
original skb.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox