Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH v3 4/4] dma debug: introduce debug_dma_assert_idle()
From: Dan Williams @ 2014-01-14 22:04 UTC (permalink / raw)
  To: Andrew Morton
  Cc: dmaengine, Vinod Koul, netdev, Joerg Roedel, linux-kernel,
	James Bottomley, Russell King
In-Reply-To: <20140113171412.dd90c020b103f4a686f8dc34@linux-foundation.org>

On Mon, 2014-01-13 at 17:14 -0800, Andrew Morton wrote:
> On Mon, 13 Jan 2014 16:48:47 -0800 Dan Williams <dan.j.williams@intel.com> wrote:
> 
> > Record actively mapped pages and provide an api for asserting a given
> > page is dma inactive before execution proceeds.  Placing
> > debug_dma_assert_idle() in cow_user_page() flagged the violation of the
> > dma-api in the NET_DMA implementation (see commit 77873803363c "net_dma:
> > mark broken").
> 
> Some discussion of the overlap counter thing would be useful.
> 
[..]
> OK, I think I see what's happening.  The tags thing acts as a crude
> counter and if the map/unmap count ends up imbalanced, we deliberately
> leak an entry in the radix-tree so it can later be reported via undescribed
> means.  Thoughts:
> 
> - RADIX_TREE_MAX_TAGS=3 so the code could count to 7, with a bit of
>   futzing around.
> 
> - from a style/readability point of view it is unexpected that
>   __active_pfn_dec_overlap() actually removes radix-tree items.  It
>   would be better to do:
> 
> 	spin_lock_irqsave(&radix_lock, flags);
> 	if (__active_pfn_dec_overlap(entry) == something) {
> 		/*
> 		 * Nice comment goes here
> 		 */
> 		radix_tree_delete(...);
> 	}
> 	spin_unlock_irqrestore(&radix_lock, flags);
> 
> 

Ok, here is v4, let me know if you prefer a new mail or if the
'scissors' are sufficient:

>8-----------------
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 17 Dec 2013 12:31:34 -0800
Subject: [PATCH v4] dma debug: introduce debug_dma_assert_idle()

Record actively mapped pages and provide an api for asserting a given
page is dma inactive before execution proceeds.  Placing
debug_dma_assert_idle() in cow_user_page() flagged the violation of the
dma-api in the NET_DMA implementation (see commit 77873803363c "net_dma:
mark broken").

The implementation includes the capability to count, in a limited way,
repeat mappings of the same page that occur without an intervening
unmap.  This 'overlap' counter is limited to the few bits of tag space
in a radix tree.  This mechanism is added to mitigate false negative
cases where, for example, a page is dma mapped twice and
debug_dma_assert_idle() is called after the page is un-mapped once.

Cc: Joerg Roedel <joro@8bytes.org>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: James Bottomley <JBottomley@Parallels.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dma-debug.h |    6 ++
 lib/Kconfig.debug         |   12 +++-
 lib/dma-debug.c           |  193 ++++++++++++++++++++++++++++++++++++++++++---
 mm/memory.c               |    3 +
 4 files changed, 199 insertions(+), 15 deletions(-)

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index fc0e34ce038f..fe8cb610deac 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -85,6 +85,8 @@ extern void debug_dma_sync_sg_for_device(struct device *dev,
 
 extern void debug_dma_dump_mappings(struct device *dev);
 
+extern void debug_dma_assert_idle(struct page *page);
+
 #else /* CONFIG_DMA_API_DEBUG */
 
 static inline void dma_debug_add_bus(struct bus_type *bus)
@@ -183,6 +185,10 @@ static inline void debug_dma_dump_mappings(struct device *dev)
 {
 }
 
+static inline void debug_dma_assert_idle(struct page *page)
+{
+}
+
 #endif /* CONFIG_DMA_API_DEBUG */
 
 #endif /* __DMA_DEBUG_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index db25707aa41b..20073e7156e4 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1575,8 +1575,16 @@ config DMA_API_DEBUG
 	  With this option you will be able to detect common bugs in device
 	  drivers like double-freeing of DMA mappings or freeing mappings that
 	  were never allocated.
-	  This option causes a performance degredation.  Use only if you want
-	  to debug device drivers. If unsure, say N.
+
+	  This also attempts to catch cases where a page owned by DMA is
+	  accessed by the cpu in a way that could cause data corruption.  For
+	  example, this enables cow_user_page() to check that the source page is
+	  not undergoing DMA.
+
+	  This option causes a performance degradation.  Use only if you want to
+	  debug device drivers and dma interactions.
+
+	  If unsure, say N.
 
 source "samples/Kconfig"
 
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d87a17a819d0..c38083871f11 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -53,11 +53,26 @@ enum map_err_types {
 
 #define DMA_DEBUG_STACKTRACE_ENTRIES 5
 
+/**
+ * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
+ * @list: node on pre-allocated free_entries list
+ * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
+ * @type: single, page, sg, coherent
+ * @pfn: page frame of the start address
+ * @offset: offset of mapping relative to pfn
+ * @size: length of the mapping
+ * @direction: enum dma_data_direction
+ * @sg_call_ents: 'nents' from dma_map_sg
+ * @sg_mapped_ents: 'mapped_ents' from dma_map_sg
+ * @map_err_type: track whether dma_mapping_error() was checked
+ * @stacktrace: support backtraces when a violation is detected
+ */
 struct dma_debug_entry {
 	struct list_head list;
 	struct device    *dev;
 	int              type;
-	phys_addr_t      paddr;
+	unsigned long	 pfn;
+	size_t		 offset;
 	u64              dev_addr;
 	u64              size;
 	int              direction;
@@ -372,6 +387,11 @@ static void hash_bucket_del(struct dma_debug_entry *entry)
 	list_del(&entry->list);
 }
 
+static unsigned long long phys_addr(struct dma_debug_entry *entry)
+{
+	return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
+}
+
 /*
  * Dump mapping entries for debugging purposes
  */
@@ -389,9 +409,9 @@ void debug_dma_dump_mappings(struct device *dev)
 		list_for_each_entry(entry, &bucket->list, list) {
 			if (!dev || dev == entry->dev) {
 				dev_info(entry->dev,
-					 "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n",
+					 "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n",
 					 type2name[entry->type], idx,
-					 (unsigned long long)entry->paddr,
+					 phys_addr(entry), entry->pfn,
 					 entry->dev_addr, entry->size,
 					 dir2name[entry->direction],
 					 maperr2str[entry->map_err_type]);
@@ -404,6 +424,133 @@ void debug_dma_dump_mappings(struct device *dev)
 EXPORT_SYMBOL(debug_dma_dump_mappings);
 
 /*
+ * For each page mapped (initial page in the case of
+ * dma_alloc_coherent/dma_map_{single|page}, or each page in a
+ * scatterlist) insert into this tree using the pfn as the key. At
+ * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry.  If
+ * the pfn already exists at insertion time add a tag as a reference
+ * count for the overlapping mappings.  For now, the overlap tracking
+ * just ensures that 'unmaps' balance 'maps' before marking the pfn
+ * idle, but we should also be flagging overlaps as an API violation.
+ *
+ * Memory usage is mostly constrained by the maximum number of available
+ * dma-debug entries in that we need a free dma_debug_entry before
+ * inserting into the tree.  In the case of dma_map_{single|page} and
+ * dma_alloc_coherent there is only one dma_debug_entry and one pfn to
+ * track per event.  dma_map_sg(), on the other hand,
+ * consumes a single dma_debug_entry, but inserts 'nents' entries into
+ * the tree.
+ *
+ * At any time debug_dma_assert_idle() can be called to trigger a
+ * warning if the given page is in the active set.
+ */
+static RADIX_TREE(dma_active_pfn, GFP_NOWAIT);
+static DEFINE_SPINLOCK(radix_lock);
+#define ACTIVE_PFN_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
+
+static int active_pfn_read_overlap(unsigned long pfn)
+{
+	int overlap = 0, i;
+
+	for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
+		if (radix_tree_tag_get(&dma_active_pfn, pfn, i))
+			overlap |= 1 << i;
+	return overlap;
+}
+
+static int active_pfn_set_overlap(unsigned long pfn, int overlap)
+{
+	int i;
+
+	if (overlap > ACTIVE_PFN_MAX_OVERLAP || overlap < 0)
+		return 0;
+
+	for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
+		if (overlap & 1 << i)
+			radix_tree_tag_set(&dma_active_pfn, pfn, i);
+		else
+			radix_tree_tag_clear(&dma_active_pfn, pfn, i);
+
+	return overlap;
+}
+
+static void active_pfn_inc_overlap(unsigned long pfn)
+{
+	int overlap = active_pfn_read_overlap(pfn);
+
+	overlap = active_pfn_set_overlap(pfn, ++overlap);
+
+	/* If we overflowed the overlap counter then we're potentially
+	 * leaking dma-mappings.  Otherwise, if maps and unmaps are
+	 * balanced then this overflow may cause false negatives in
+	 * debug_dma_assert_idle() as the pfn may be marked idle
+	 * prematurely.
+	 */
+	WARN_ONCE(overlap == 0,
+		  "DMA-API: exceeded %d overlapping mappings of pfn %lx\n",
+		  ACTIVE_PFN_MAX_OVERLAP, pfn);
+}
+
+static int active_pfn_dec_overlap(unsigned long pfn)
+{
+	int overlap = active_pfn_read_overlap(pfn);
+
+	return active_pfn_set_overlap(pfn, --overlap);
+}
+
+static int active_pfn_insert(struct dma_debug_entry *entry)
+{
+	unsigned long flags;
+	int rc;
+
+	spin_lock_irqsave(&radix_lock, flags);
+	rc = radix_tree_insert(&dma_active_pfn, entry->pfn, entry);
+	if (rc == -EEXIST)
+		active_pfn_inc_overlap(entry->pfn);
+	spin_unlock_irqrestore(&radix_lock, flags);
+
+	return rc;
+}
+
+static void active_pfn_remove(struct dma_debug_entry *entry)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&radix_lock, flags);
+	if (active_pfn_dec_overlap(entry->pfn) == 0)
+		radix_tree_delete(&dma_active_pfn, entry->pfn);
+	spin_unlock_irqrestore(&radix_lock, flags);
+}
+
+/**
+ * debug_dma_assert_idle() - assert that a page is not undergoing dma
+ * @page: page to lookup in the dma_active_pfn tree
+ *
+ * Place a call to this routine in cases where the cpu touching the page
+ * before the dma completes (page is dma_unmapped) will lead to data
+ * corruption.
+ */
+void debug_dma_assert_idle(struct page *page)
+{
+	unsigned long flags;
+	struct dma_debug_entry *entry;
+
+	if (!page)
+		return;
+
+	spin_lock_irqsave(&radix_lock, flags);
+	entry = radix_tree_lookup(&dma_active_pfn, page_to_pfn(page));
+	spin_unlock_irqrestore(&radix_lock, flags);
+
+	if (!entry)
+		return;
+
+	err_printk(entry->dev, entry,
+		   "DMA-API: cpu touching an active dma mapped page "
+		   "[pfn=0x%lx]\n", entry->pfn);
+}
+
+/*
  * Wrapper function for adding an entry to the hash.
  * This function takes care of locking itself.
  */
@@ -411,10 +558,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
 {
 	struct hash_bucket *bucket;
 	unsigned long flags;
+	int rc;
 
 	bucket = get_hash_bucket(entry, &flags);
 	hash_bucket_add(bucket, entry);
 	put_hash_bucket(bucket, &flags);
+
+	rc = active_pfn_insert(entry);
+	if (rc == -ENOMEM) {
+		pr_err("DMA-API: pfn tracking ENOMEM, dma-debug disabled\n");
+		global_disable = true;
+	}
+
+	/* TODO: report -EEXIST errors here as overlapping mappings are
+	 * not supported by the DMA API
+	 */
 }
 
 static struct dma_debug_entry *__dma_entry_alloc(void)
@@ -469,6 +627,8 @@ static void dma_entry_free(struct dma_debug_entry *entry)
 {
 	unsigned long flags;
 
+	active_pfn_remove(entry);
+
 	/*
 	 * add to beginning of the list - this way the entries are
 	 * more likely cache hot when they are reallocated.
@@ -895,15 +1055,15 @@ static void check_unmap(struct dma_debug_entry *ref)
 			   ref->dev_addr, ref->size,
 			   type2name[entry->type], type2name[ref->type]);
 	} else if ((entry->type == dma_debug_coherent) &&
-		   (ref->paddr != entry->paddr)) {
+		   (phys_addr(ref) != phys_addr(entry))) {
 		err_printk(ref->dev, entry, "DMA-API: device driver frees "
 			   "DMA memory with different CPU address "
 			   "[device address=0x%016llx] [size=%llu bytes] "
 			   "[cpu alloc address=0x%016llx] "
 			   "[cpu free address=0x%016llx]",
 			   ref->dev_addr, ref->size,
-			   (unsigned long long)entry->paddr,
-			   (unsigned long long)ref->paddr);
+			   phys_addr(entry),
+			   phys_addr(ref));
 	}
 
 	if (ref->sg_call_ents && ref->type == dma_debug_sg &&
@@ -1052,7 +1212,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
 
 	entry->dev       = dev;
 	entry->type      = dma_debug_page;
-	entry->paddr     = page_to_phys(page) + offset;
+	entry->pfn	 = page_to_pfn(page);
+	entry->offset	 = offset,
 	entry->dev_addr  = dma_addr;
 	entry->size      = size;
 	entry->direction = direction;
@@ -1148,7 +1309,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
 
 		entry->type           = dma_debug_sg;
 		entry->dev            = dev;
-		entry->paddr          = sg_phys(s);
+		entry->pfn	      = page_to_pfn(sg_page(s));
+		entry->offset	      = s->offset,
 		entry->size           = sg_dma_len(s);
 		entry->dev_addr       = sg_dma_address(s);
 		entry->direction      = direction;
@@ -1198,7 +1360,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 		struct dma_debug_entry ref = {
 			.type           = dma_debug_sg,
 			.dev            = dev,
-			.paddr          = sg_phys(s),
+			.pfn		= page_to_pfn(sg_page(s)),
+			.offset		= s->offset,
 			.dev_addr       = sg_dma_address(s),
 			.size           = sg_dma_len(s),
 			.direction      = dir,
@@ -1233,7 +1396,8 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
 
 	entry->type      = dma_debug_coherent;
 	entry->dev       = dev;
-	entry->paddr     = virt_to_phys(virt);
+	entry->pfn	 = page_to_pfn(virt_to_page(virt));
+	entry->offset	 = (size_t) virt & PAGE_MASK;
 	entry->size      = size;
 	entry->dev_addr  = dma_addr;
 	entry->direction = DMA_BIDIRECTIONAL;
@@ -1248,7 +1412,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
 	struct dma_debug_entry ref = {
 		.type           = dma_debug_coherent,
 		.dev            = dev,
-		.paddr          = virt_to_phys(virt),
+		.pfn		= page_to_pfn(virt_to_page(virt)),
+		.offset		= (size_t) virt & PAGE_MASK,
 		.dev_addr       = addr,
 		.size           = size,
 		.direction      = DMA_BIDIRECTIONAL,
@@ -1356,7 +1521,8 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 		struct dma_debug_entry ref = {
 			.type           = dma_debug_sg,
 			.dev            = dev,
-			.paddr          = sg_phys(s),
+			.pfn		= page_to_pfn(sg_page(s)),
+			.offset		= s->offset,
 			.dev_addr       = sg_dma_address(s),
 			.size           = sg_dma_len(s),
 			.direction      = direction,
@@ -1388,7 +1554,8 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 		struct dma_debug_entry ref = {
 			.type           = dma_debug_sg,
 			.dev            = dev,
-			.paddr          = sg_phys(s),
+			.pfn		= page_to_pfn(sg_page(s)),
+			.offset		= s->offset,
 			.dev_addr       = sg_dma_address(s),
 			.size           = sg_dma_len(s),
 			.direction      = direction,
diff --git a/mm/memory.c b/mm/memory.c
index 5d9025f3b3e1..c89788436f81 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -59,6 +59,7 @@
 #include <linux/gfp.h>
 #include <linux/migrate.h>
 #include <linux/string.h>
+#include <linux/dma-debug.h>
 
 #include <asm/io.h>
 #include <asm/pgalloc.h>
@@ -2559,6 +2560,8 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
 
 static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
 {
+	debug_dma_assert_idle(src);
+
 	/*
 	 * If the source page was a PFN mapping, we don't have
 	 * a "struct page" for it. We do a best-effort copy by
-- 
1.7.7.6

^ permalink raw reply related

* Re: [PATCH net-next v2 0/3] make skb_checksum_setup generally available
From: David Miller @ 2014-01-14 22:24 UTC (permalink / raw)
  To: paul.durrant; +Cc: netdev, xen-devel
In-Reply-To: <1389261768-30606-1-git-send-email-paul.durrant@citrix.com>

From: Paul Durrant <paul.durrant@citrix.com>
Date: Thu, 9 Jan 2014 10:02:45 +0000

> Both xen-netfront and xen-netback need to be able to set up the partial
> checksum offset of an skb and may also need to recalculate the pseudo-
> header checksum in the process. This functionality is currently private
> and duplicated between the two drivers.
> 
> Patch #1 of this series moves the implementation into the core network code
> as there is nothing xen-specific about it and it is potentially useful to
> any network driver.
> Patch #2 removes the private implementation from netback.
> Patch #3 removes the private implementation from netfront.
> 
> v2:
> - Put skb_checksum_setup in skbuff.c rather than dev.c
> - remove inline

Series applied, thanks.

^ permalink raw reply

* Re: [PATCH] [trivial] ixgbe: Fix format string in ixgbe_fcoe.c
From: Brown, Aaron F @ 2014-01-14 22:25 UTC (permalink / raw)
  To: standby24x7@gmail.com
  Cc: trivial@kernel.org, e1000-devel@lists.sourceforge.net,
	netdev@vger.kernel.org, Allan, Bruce W,
	linux-kernel@vger.kernel.org, Brandeburg, Jesse
In-Reply-To: <1389716082-23395-1-git-send-email-standby24x7@gmail.com>

On Wed, 2014-01-15 at 01:14 +0900, Masanari Iida wrote:
> cppcheck detected following warning in ixgbe_fcoe.c
> (warning) %d in format string (no. 1) requires 'int' but the
> argument type is 'unsigned int'.
> 
> Signed-off-by: Masanari Iida <standby24x7@gmail.com>
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-) 

Thanks Masanari, I have added it to our internal queue for testing.
------------------------------------------------------------------------------
CenturyLink Cloud: The Leader in Enterprise Cloud Services.
Learn Why More Businesses Are Choosing CenturyLink Cloud For
Critical Workloads, Development Environments & Everything In Between.
Get a Quote or Start a Free Trial Today. 
http://pubads.g.doubleclick.net/gampad/clk?id=119420431&iu=/4140/ostg.clktrk
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply

* (unknown), 
From: Yung kyu kim @ 2014-01-14 22:06 UTC (permalink / raw)


Hello,

The Project is about the exportation of 100,000 barrels of Light Crude
Oil daily out from Iraq to Turkey through my client's company in Iraq
at the rate of $92.00 a barrel. This amount to $9,200,000 daily. I ask
for your support as a foreigner to handle this business project with my
client and you are not expected to invest in Iraq

If yes, let me know and we will discuss this project proper.



Kim.

^ permalink raw reply

* Re: [PATCH net-next] hyperv: Add support for physically discontinuous receive buffer
From: David Miller @ 2014-01-14 22:31 UTC (permalink / raw)
  To: haiyangz; +Cc: olaf, netdev, jasowang, driverdev-devel, linux-kernel
In-Reply-To: <1389306287-20057-1-git-send-email-haiyangz@microsoft.com>

From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Thu,  9 Jan 2014 14:24:47 -0800

> This will allow us to use bigger receive buffer, and prevent allocation failure
> due to fragmented memory.
> 
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>

Not until you start using paged SKBs in netvsc_recv_callback.

Whatever fragmention you think you're avoiding in the hyperv layer,
you're still going to get from the:

	skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen);

call there.

This change makes no sense in isolation, therefore I'm not applying it
until you also include the appropriate changes to avoid the same
exact fragmentation issue in netvsc_drv.c as stated above.

^ permalink raw reply

* Re: [PATCH RFC] reciprocal_divide: correction/update of the algorithm
From: Hannes Frederic Sowa @ 2014-01-14 22:39 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, dborkman, linux-kernel, darkjames-ws
In-Reply-To: <1389722825.31367.260.camel@edumazet-glaptop2.roam.corp.google.com>

On Tue, Jan 14, 2014 at 10:07:05AM -0800, Eric Dumazet wrote:
> On Mon, 2014-01-13 at 22:42 +0100, Hannes Frederic Sowa wrote:
> > This patch is a RFC and part of a series Daniel Borkmann and me want to
> > do when introducing prandom_u32_range{,_ro} and prandom_u32_max{,_ro}
> > helpers later this week.
> 
> > -static inline u32 reciprocal_divide(u32 A, u32 R)
> > +struct reciprocal_value reciprocal_value(u32 d);
> > +
> > +static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
> >  {
> > -	return (u32)(((u64)A * R) >> 32);
> > +	u32 t = (u32)(((u64)a * R.m) >> 32);
> > +	return (t + ((a - t) >> R.sh1)) >> R.sh2;
> >  }
> 
> I would rather introduce new helpers and convert users that really need
> them.
>
> For instance, just use a divide in BPF, because doing this on JIT might
> be too complex for the gains. Strangely, libpcap doesn't seem to
> optimize any divide, like divides by a power of two...
> 
> Reciprocal were added 7 years ago, for very specific uses, but current
> cpus have reasonably fast dividers.

Agreed. The new algorithm would need to change the size of struct
sock_filter, which is exported to user space. We will leave BPF as-is
for the time being and check that later.

Greetings,

  Hannes

^ permalink raw reply

* Re: [PATCH net-next 0/3] Improve tracing at the driver/core boundary
From: David Miller @ 2014-01-14 22:47 UTC (permalink / raw)
  To: bhutchings; +Cc: netdev, linux-net-drivers
In-Reply-To: <1389392139.2025.123.camel@bwh-desktop.uk.level5networks.com>

From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 10 Jan 2014 22:15:39 +0000

> These patches add static tracpeoints at the driver/core boundary which
> record various skb fields likely to be useful for datapath debugging.
> On the TX side the boundary is where the core calls ndo_start_xmit, and
> on the RX side it is where any of the various exported receive functions
> is called.
> 
> The set of skb fields is mostly based on what I thought would be
> interesting for sfc.
> 
> These patches are basically the same as what I sent as an RFC in
> November, but rebased.  They now depend on 'net: core: explicitly select
> a txq before doing l2 forwarding', so please merge net into net-next
> before trying to apply them.  The first patch fixes a code formatting
> error left behind after that fix.

This looks great, series applied, thanks Ben.

^ permalink raw reply

* [Patch net-next] net_sched: act: fix a bug in tcf_register_action()
From: Cong Wang @ 2014-01-14 22:48 UTC (permalink / raw)
  To: netdev; +Cc: Jamal Hadi Salim, David S. Miller, Cong Wang

In tcf_register_action() we check ->type and ->kind to see if there
is an existing action registered, but ipt action registers two
actions with same type but different kinds. This should be a valid
case, otherwise only xt can be registered.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>

---
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 35f89e9..2070ee3 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -273,7 +273,7 @@ int tcf_register_action(struct tc_action_ops *act)

 	write_lock(&act_mod_lock);
 	list_for_each_entry(a, &act_base, head) {
-		if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
+		if (act->type == a->type && (strcmp(act->kind, a->kind) == 0)) {
 			write_unlock(&act_mod_lock);
 			return -EEXIST;
 		}

^ permalink raw reply related

* Re: [PATCH RFC] reciprocal_divide: correction/update of the algorithm
From: Eric Dumazet @ 2014-01-14 22:45 UTC (permalink / raw)
  To: Austin S Hemmelgarn
  Cc: Hannes Frederic Sowa, netdev, dborkman, linux-kernel,
	darkjames-ws
In-Reply-To: <52D5A3DC.9030107@gmail.com>

On Tue, 2014-01-14 at 15:53 -0500, Austin S Hemmelgarn wrote:
> On 2014-01-14 14:50, Eric Dumazet wrote:
> > On Tue, 2014-01-14 at 14:22 -0500, Austin S Hemmelgarn wrote:
> > 
> >> I disagree with the statement that current CPU's have reasonably fast
> >> dividers.  A lot of embedded processors and many low-end x86 CPU's do
> >> not in-fact have any hardware divider, and usually provide it using
> >> microcode based emulation if they provide it at all.  The AMD Jaguar
> >> micro-architecture in particular comes to mind, it uses an iterative
> >> division algorithm provided by the microcode that only produces 2 bits
> >> of quotient per cycle, even in the best case (2 8-bit integers and an
> >> integral 8-bit quotient) this still takes 4 cycles, which is twice as
> >> slow as any other math operation on the same processor.
> > 
> > I doubt you run any BPF filter with a divide instruction in it on these
> > platform.
> > 
> > Get real, do not over optimize things where it does not matter.
> > 
> Actually, I have three Jaguar based routers, and use BPF regularly as
> part of their iptables rules to log certain packet types.



1) Have you enabled BPF JIT

2) Do you have divide instructions in a BPF filter, 
   if yes, I would like to have an example.
   (current code works well for small divisors anyway)

3) How much time is spent in BPF compared to the rest of the stack,
   especially if you run iptables...

Spending 2 or 3 days of work to save ~7 cycles for a divide that
probably can be replaced by a shift anyway, while spending 5000 cycles
per packet is what I call not a wise optimization, especially
if dealing with old hardware.

Even on a Jaguar, the proposed alternative 

+       u32 t = (u32)(((u64)a * R.m) >> 32);
+       return (t + ((a - t) >> R.sh1)) >> R.sh2;

will have a similar cost.

^ permalink raw reply

* Re: linux-next: Tree for Jan 14 (lowpan, 802.15.4)
From: Dmitry Eremin-Solenikov @ 2014-01-14 22:54 UTC (permalink / raw)
  To: Randy Dunlap, David S. Miller, Marcel Holtmann
  Cc: Stephen Rothwell, linux-next, kernel list, linux-zigbee-devel,
	Alexander Smirnov, netdev@vger.kernel.org, Jukka Rissanen
In-Reply-To: <52D578A7.8020000@infradead.org>

[-- Attachment #1: Type: text/plain, Size: 638 bytes --]

Hello,


On Tue, Jan 14, 2014 at 9:49 PM, Randy Dunlap <rdunlap@infradead.org> wrote:
>
> On 01/13/2014 09:51 PM, Stephen Rothwell wrote:
> > Hi all,
> >
> > This tree fails (more than usual) the powerpc allyesconfig build.
> >
> > Changes since 20140113:
> >
>
>
> on i386:
>
> net/built-in.o: In function `header_create':
> 6lowpan.c:(.text+0x166149): undefined reference to `lowpan_header_compress'
> net/built-in.o: In function `bt_6lowpan_recv':
> (.text+0x166b3c): undefined reference to `lowpan_process_data'

Ah, nice Makefile hack there.
David, Marcel, could you please consider the attached patch.


-- 
With best wishes
Dmitry

[-- Attachment #2: 0001-net-move-6lowpan-compression-code-to-separate-module.patch --]
[-- Type: text/x-patch, Size: 2784 bytes --]

From 4c3db6d3c244decf434665c30c1bf75aad1f94b2 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dmitry_eremin@mentor.com>
Date: Wed, 15 Jan 2014 02:50:40 +0400
Subject: [PATCH] net: move 6lowpan compression code to separate module

IEEE 802.15.4 and Bluetooth networking stacks share 6lowpan compression
code. Instead of introducing Makefile/Kconfig hacks, build this code as
a separate module referenced from both ieee802154 and bluetooth modules.

This fixes the following build error observed in some kernel
configurations:

net/built-in.o: In function `header_create': 6lowpan.c:(.text+0x166149): undefined reference to `lowpan_header_compress'
net/built-in.o: In function `bt_6lowpan_recv': (.text+0x166b3c): undefined reference to `lowpan_process_data'

Signed-off-by: Dmitry Eremin-Solenikov <dmitry_eremin@mentor.com>
---
 net/Makefile            | 2 +-
 net/bluetooth/Kconfig   | 1 +
 net/ieee802154/Kconfig  | 7 +++++++
 net/ieee802154/Makefile | 3 ++-
 4 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/net/Makefile b/net/Makefile
index 8fa2f91..cbbbe6d 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_CAIF)		+= caif/
 ifneq ($(CONFIG_DCB),)
 obj-y				+= dcb/
 endif
-obj-$(CONFIG_IEEE802154)	+= ieee802154/
+obj-y				+= ieee802154/
 obj-$(CONFIG_MAC802154)		+= mac802154/
 
 ifeq ($(CONFIG_NET),y)
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index d3f3f7b..985b560 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -12,6 +12,7 @@ menuconfig BT
 	select CRYPTO_AES
 	select CRYPTO_ECB
 	select CRYPTO_SHA256
+	select 6LOWPAN_IPHC
 	help
 	  Bluetooth is low-cost, low-power, short-range wireless technology.
 	  It was designed as a replacement for cables and other short-range
diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index b2e06df..9c9879d 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -13,5 +13,12 @@ config IEEE802154
 config IEEE802154_6LOWPAN
 	tristate "6lowpan support over IEEE 802.15.4"
 	depends on IEEE802154 && IPV6
+	select 6LOWPAN_IPHC
 	---help---
 	IPv6 compression over IEEE 802.15.4.
+
+config 6LOWPAN_IPHC
+	tristate
+	---help---
+	  6lowpan compression code which is shared between IEEE 802.15.4 and Bluetooth
+	  stacks.
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 951a83e..e8f0588 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o
-obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o 6lowpan_iphc.o
+obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o
+obj-$(CONFIG_6LOWPAN_IPHC) += 6lowpan_iphc.o
 
 ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o
 af_802154-y := af_ieee802154.o raw.o dgram.o
-- 
1.8.5.2


^ permalink raw reply related

* Re: [Bug 68501] New: llc_fixup_skb considers PDU len including ETH_HLEN
From: David Miller @ 2014-01-14 23:01 UTC (permalink / raw)
  To: stephen; +Cc: netdev
In-Reply-To: <20140110151156.37ae2618@nehalam.linuxnetplumber.net>

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Fri, 10 Jan 2014 15:11:56 -0800

> In below snip of code in llc_fixup_skb we calculate pdulen from
> eth_hdr(skb)->h_proto which contains total length of packet include ETH_HLEN.
> 
> <snip>
> 119         if (skb->protocol == htons(ETH_P_802_2)) {
> 120                 __be16 pdulen = eth_hdr(skb)->h_proto;
> 121                 s32 data_size = ntohs(pdulen) - llc_len;
> 122 
> 123                 if (data_size < 0 ||
> 124                     !pskb_may_pull(skb, data_size))
> 125                         return 0;
> 
> </snip>
> 
> 
> Line 121 should be changed to 
> 
> 121                 s32 data_size = ntohs(pdulen) - llc_len - ETH_HLEN;

This can't be right, everything I can find says that the length here is:

	the 802.2 packet length - the number of bytes of the 802.2
	(LLC and data) portion of the frame, excluding padding

Which imples that the ethernet header length is not included.

^ permalink raw reply

* Re: [Patch net-next] ipv4: register igmp_notifier even when !CONFIG_PROC_FS
From: David Miller @ 2014-01-14 23:04 UTC (permalink / raw)
  To: xiyou.wangcong; +Cc: netdev, stephen, kaber
In-Reply-To: <1389398985-16198-1-git-send-email-xiyou.wangcong@gmail.com>

From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Fri, 10 Jan 2014 16:09:45 -0800

> We still need this notifier even when we don't config
> PROC_FS.
> 
> It should be rare to have a kernel without PROC_FS,
> so just for completeness.
> 
> Cc: Stephen Hemminger <stephen@networkplumber.org>
> Cc: David S. Miller <davem@davemloft.net>
> Cc: Patrick McHardy <kaber@trash.net>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>

This looks a little ugly, but I can't suggest anything better.

Applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next] ipv6: copy traffic class from ping request to reply
From: David Miller @ 2014-01-14 23:09 UTC (permalink / raw)
  To: hannes; +Cc: simon-schneider, netdev
In-Reply-To: <20140111105546.GB30388@order.stressinduktion.org>

From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sat, 11 Jan 2014 11:55:46 +0100

> Suggested-by: Simon Schneider <simon-schneider@gmx.net>
> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>

This matches ipv4 behavior (except ipv4 accomplishes this by adjusting
the ICMP socket TOS value every packet response), so applied, thanks!

^ permalink raw reply

* Re: [PATCH net-next] IPv6: enable TCP to use an anycast address
From: David Miller @ 2014-01-14 23:14 UTC (permalink / raw)
  To: hannes; +Cc: fx.lebail, kuznet, netdev, jmorris, yoshfuji, kaber
In-Reply-To: <20140113011146.GG6586@order.stressinduktion.org>

From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 13 Jan 2014 02:11:46 +0100

> Hi!
> 
> On Sun, Jan 12, 2014 at 06:53:47AM -0800, François-Xavier Le Bail wrote:
>> On Sat, 1/11/14, Hannes Frederic Sowa <hannes@stressinduktion.org> wrote:
>> 
>> > On Sat, Jan 11, 2014 at 05:38:27PM +0400, Alexey Kuznetsov wrote:
>> > > On Sat, Jan 11, 2014 at 5:06 PM, François-Xavier Le Bail
>> > > <fx.lebail@yahoo.com> wrote:
>> > > > Many DNS root-servers use TCP with anycast (IPv4 and IPV6).
>> > > 
>> > > Actually, I was alerted by reset processing in your patch, it cannot be right.
>> > > 
>> > > Do not you think this must not be enabled for common use? At least
>> > > some separate sysctl disabled by default.
>> 
>> > The idea I had, was, that if a socket does knowingly bind to an anycast
>> > address, it is allowed to do so and process queries on it with both TCP and
>> > UDP. I don't think we need a sysctl for that? Anycast addresses are either
>> > pre-defined (e.g. the subnet router anycast address) or specified by a flag
>> > when the administrator adds one. Currently one can only add anycast addresses
>> > either by forwarding and gets the per-subnet anycast address or with a
>> > setsockopt IPV6_JOIN_ANYCAST.
>> 
>> > So the problem is what should be allowed when the socket listens on an any
>> > address? Maybe this should be protected by a sysctl?
>> 
>> TCP case:
>> With my two patches (the one for bind and this one for tcp), when a
>> SOCK_STREAM socket listen to in6addr_any, the server is able to 
>> send TCP reply with unicast or anycast source address, according
>> to the destination address used by the client.
>> 
>> dest request unicast => src reply unicast (current behavior)
>> dest resquet anycast => src reply anycast (new)
>> 
>> So, I don't think there is a need for a sysctl.
> 
> I am still thinking about the RST-case and am a bit unsure here. But I
> currently don't see any problems.

I think this needs much more discussion and analysis before I can really
seriously consider applying this, sorry.

^ permalink raw reply

* Re: [PATCH net-next] net: replace macros net_random and net_srandom with direct calls to prandom
From: David Miller @ 2014-01-14 23:15 UTC (permalink / raw)
  To: aruna.hewapathirane; +Cc: netdev, hannes
In-Reply-To: <1389442559-13097-1-git-send-email-aruna.hewapathirane@gmail.com>

From: Aruna-Hewapathirane <aruna.hewapathirane@gmail.com>
Date: Sat, 11 Jan 2014 07:15:59 -0500

> This patch removes the net_random and net_srandom macros and replaces
> them with direct calls to the prandom ones. As new commits only seem to
> use prandom_u32 there is no use to keep them around.
> This change makes it easier to grep for users of prandom_u32.
> 
> Signed-off-by: Aruna-Hewapathirane <aruna.hewapathirane@gmail.com>
> Suggested-by: Hannes Frederic Sowa <hannes@stressinduktion.org>

Applied, thanks.

^ permalink raw reply

* [RFC PATCH net-next] etherdevice: Use ether_addr_copy to copy an Ethernet address
From: Joe Perches @ 2014-01-14 23:18 UTC (permalink / raw)
  To: netdev; +Cc: linux-arm-kernel, linux-arch

Some systems can use the normally known u16 alignment of
Ethernet addresses to save some code/text bytes and cycles.

This does not change currently emitted code on x86 by gcc 4.8.

Signed-off-by: Joe Perches <joe@perches.com>
---

Yes, it's a trivial change, but maybe slightly useful...

For instance: with netpoll.c memcpy changed to ether_addr_copy:

arm old (4.6.3):
	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
    27e0:	e4973042 	ldr	r3, [r7], #66	; 0x42
    27e4:	e2860006 	add	r0, r6, #6
    27e8:	e3a02006 	mov	r2, #6
    27ec:	e59311e8 	ldr	r1, [r3, #488]	; 0x1e8
    27f0:	ebfffffe 	bl	0 <memcpy>
			27f0: R_ARM_CALL	memcpy
	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
    27f4:	e1a00006 	mov	r0, r6
    27f8:	e1a01007 	mov	r1, r7
    27fc:	e3a02006 	mov	r2, #6
    2800:	ebfffffe 	bl	0 <memcpy>
			2800: R_ARM_CALL	memcpy

arm new:

	*(u32 *)dst = *(const u32 *)src;
    27dc:	e5932000 	ldr	r2, [r3]
    27e0:	e5802006 	str	r2, [r0, #6]
	*(u16 *)(dst + 4) = *(const u16 *)(src + 4);
    27e4:	e1d330b4 	ldrh	r3, [r3, #4]
    27e8:	e1c030ba 	strh	r3, [r0, #10]
 * Please note: dst & src must both be aligned to u16.
 */
static inline void ether_addr_copy(u8 *dst, const u8 *src)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
	*(u32 *)dst = *(const u32 *)src;
    27ec:	e5953042 	ldr	r3, [r5, #66]	; 0x42
    27f0:	e5803000 	str	r3, [r0]
	*(u16 *)(dst + 4) = *(const u16 *)(src + 4);
    27f4:	e1d534b6 	ldrh	r3, [r5, #70]	; 0x46
    27f8:	e1c030b4 	strh	r3, [r0, #4]

 include/linux/etherdevice.h | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index f344ac0..1f26c55 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -218,6 +218,28 @@ static inline void eth_hw_addr_random(struct net_device *dev)
 }
 
 /**
+ * ether_addr_copy - Copy an Ethernet address
+ * @dst: Pointer to a six-byte array Ethernet address destination
+ * @src: Pointer to a six-byte array Ethernet address source
+ *
+ * Please note: dst & src must both be aligned to u16.
+ */
+static inline void ether_addr_copy(u8 *dst, const u8 *src)
+{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+	*(u32 *)dst = *(const u32 *)src;
+	*(u16 *)(dst + 4) = *(const u16 *)(src + 4);
+#else
+	u16 *a = (u16 *)dst;
+	const u16 *b = (const u16 *)src;
+
+	a[0] = b[0];
+	a[1] = b[1];
+	a[2] = b[2];
+#endif
+}
+
+/**
  * eth_hw_addr_inherit - Copy dev_addr from another net_device
  * @dst: pointer to net_device to copy dev_addr to
  * @src: pointer to net_device to copy dev_addr from
@@ -229,7 +251,7 @@ static inline void eth_hw_addr_inherit(struct net_device *dst,
 				       struct net_device *src)
 {
 	dst->addr_assign_type = src->addr_assign_type;
-	memcpy(dst->dev_addr, src->dev_addr, ETH_ALEN);
+	ether_addr_copy(dst->dev_addr, src->dev_addr);
 }
 
 /**

^ permalink raw reply related

* Compensation Award !
From: Ruben Alberto Jimenez Cuentas @ 2014-01-14 23:06 UTC (permalink / raw)



Compensation Grant of $5,000,000.00  
USD...Contact:mrspatriciapowell@gmail.com for claims.






El presente correo no representa la opinión o consentimiento oficial por parte de la Universidad del Norte por lo que no adquiere responsabilidad alguna por el contenido del presente mensaje salvo en los casos de funcionarios de la Institución en ejercicio de sus atribuciones reglamentarias. Este correo puede provenir de una cuenta que la Universidad ofrece a los estudiantes como parte del servicio educativo o de funcionarios de la Institución, evento en el cual tanto el mensaje como sus anexos son estrictamente confidenciales. Este mensaje ha sido verificado con software antivirus; no obstante lo anterior, la Universidad del Norte no garantiza que sea seguro o no contenga errores o virus por lo que no se hace responsable de su transmisión.


^ permalink raw reply

* Re: [PATCH net-next v3 2/2] stmmac: Fix kernel crashes for jumbo frames
From: Ben Hutchings @ 2014-01-14 23:20 UTC (permalink / raw)
  To: Vince Bridgers
  Cc: devicetree, netdev, Giuseppe CAVALLARO, robh+dt, pawel.moll,
	mark.rutland, ijc+devicetree, Kumar Gala, Dinh Nguyen,
	Rayagond Kokatanur
In-Reply-To: <CAOwfj2Ne1oRU1z6BaH4g2_EJJFtJOWbgs2oMzSq5nNpz9G6LsA@mail.gmail.com>

On Tue, 2014-01-14 at 14:44 -0600, Vince Bridgers wrote:
> On Tue, Jan 14, 2014 at 1:53 PM, Ben Hutchings
[...]
> >>
> >>       /*
> >>        * Currently only the properties needed on SPEAr600
> >> @@ -60,6 +64,7 @@ static int stmmac_probe_config_dt(struct platform_device *pdev,
> >>       if (of_device_is_compatible(np, "st,spear600-gmac") ||
> >>               of_device_is_compatible(np, "snps,dwmac-3.70a") ||
> >>               of_device_is_compatible(np, "snps,dwmac")) {
> >> +             of_property_read_u32(np, "max-frame-size", &plat->maxmtu);
> > [...]
> >
> > Is it the maximum frame size, including Ethernet header?  (And then, is
> > the CRC or any VLAN header included in the frame size?)
> > Or is it the MTU, excluding all of those?
> > You really need to be very clear about what this number represents.
> 
> max-frame-size as read from the device tree is defined in the ePAPR
> v1.1 specification. I originally used a name of "max-mtu", but was
> asked to change that since this attribute was already defined in the
> ePAPR v1.1 specification.
[...]

Oh dear, this specification is just as confused as I am.  The example
value in §6.3.1.4 is 1518, implying that it is really the frame size,
including the Ethernet header, any VLAN tag, and maybe the CRC.  However
the full example in Appendix B1 has 0x5dc (== 1500) implying that it is
the MTU.

I suppose you should follow whatever interpretation is already used by
other drivers. :-/

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: [PATCH RFC] reciprocal_divide: correction/update of the algorithm
From: Borislav Petkov @ 2014-01-14 23:25 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Austin S Hemmelgarn, Hannes Frederic Sowa, netdev, dborkman,
	linux-kernel, darkjames-ws
In-Reply-To: <1389739537.31367.273.camel@edumazet-glaptop2.roam.corp.google.com>

On Tue, Jan 14, 2014 at 02:45:37PM -0800, Eric Dumazet wrote:
> Even on a Jaguar, the proposed alternative

I don't know what Jaguar you guys are talking about but the Jaguar
I know - Fam16h - has an int hardware divider:

http://developer.amd.com/wordpress/media/2012/10/SOG_16h_52128_PUB_Rev1_1.pdf

So all that talk about microcode is plain wrong. The hardware divider
comes from Llano (F12h) so it must be some other Jaguar, maybe Bobcat.

:-)

If it is Bobcat, then it has a 1-bit per cycle ucode int divider.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply

* Re: [Xen-devel][PATCH net-next] xen-netfront: clean up code in xennet_release_rx_bufs
From: David Miller @ 2014-01-14 23:28 UTC (permalink / raw)
  To: Annie.li; +Cc: xen-devel, netdev, konrad.wilk, ian.campbell, wei.liu2
In-Reply-To: <1389307718-2845-1-git-send-email-Annie.li@oracle.com>

From: Annie Li <Annie.li@oracle.com>
Date: Fri, 10 Jan 2014 06:48:38 +0800

> Current netfront only grants pages for grant copy, not for grant transfer, so
> remove corresponding transfer code and add receiving copy code in
> xennet_release_rx_bufs.
> 
> Signed-off-by: Annie Li <Annie.li@oracle.com>

If a Xen networking driver would review this I'd appreciate it.

Thanks.

^ permalink raw reply

* Re: pull-request: can-next 2014-01-11
From: David Miller @ 2014-01-14 23:30 UTC (permalink / raw)
  To: mkl; +Cc: netdev, linux-can, kernel
In-Reply-To: <52D191E2.8040703@pengutronix.de>

From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Sat, 11 Jan 2014 19:48:02 +0100

> this is a pull request of three patches for net-next/master.
> 
> Oleg Moroz added support for a new PCI card to the generic SJA1000 PCI
> driver, Guenter Roeck's patch limits the flexcan driver to little
> endian arm (and powerpc) and I fixed a sparse warning found by the
> kbuild robot in the ti_hecc driver.

Pulled, thanks Marc.

^ permalink raw reply

* Re: [PATCH net-next] IPv6: add option to use anycast addresses as source addresses in icmp error messages
From: Hannes Frederic Sowa @ 2014-01-14 23:38 UTC (permalink / raw)
  To: Francois-Xavier Le Bail, netdev, Bill Fink, David S. Miller,
	Alexey Kuznetsov, James Morris, Hideaki Yoshifuji,
	Patrick McHardy
In-Reply-To: <20140114131344.GR6586@order.stressinduktion.org>

On Tue, Jan 14, 2014 at 02:13:44PM +0100, Hannes Frederic Sowa wrote:
> On Mon, Jan 13, 2014 at 06:22:44PM +0100, Francois-Xavier Le Bail wrote:
> > - Add "anycast_src_icmp_error" sysctl to control the use of anycast addresses
> >   as source addresses for ICMPv6 error messages. This sysctl is false by
> >   default to preserve existing behavior.
> > - Use it in icmp6_send().
> > 
> > Suggested-by: Bill Fink <billfink@mindspring.com>
> > Signed-off-by: Francois-Xavier Le Bail <fx.lebail@yahoo.com>
> 
> Regarding the anycast patches, I contacted someone from IETF.
> 
> The number of sysctls needed to get introduced to have all the flexibility
> regarding source address selection and don't break backward compatibility
> concerns me a bit.
> 
> Especially on end hosts, where those switches will be important, I think we
> really have to think about sensible defaults without breaking current
> software.
> 
> I currently consider a per-address flag, if those anycast addresses
> should be available in source address selection (also with an enhancement to
> current IPV6_JOIN_ANYCAST logic).

Francois, we should really think about this. Also if we should just
make the pre-defined subnet address just a normal anycast address in the
long-term (which just happens to get automatically added to an interface
if forwarding is enabled) and bundle all the source address selection
logic on the per-address state.

If that would be the case, we could revert
509aba3b0d366b7f16a9a2eebac1156b25f5f622 ("IPv6: add the option to use
anycast addresses as source addresses in echo reply") and thus would
eliminate one sysctl.

It would be fine if we can make this decision before David merges with
Linus.  I guess we can still do this decision while in -rc phase. But
as soon as the knob is in a released version of linux we can never take
it back (I really don't like sysctls).

Greetings,

  Hannes

^ permalink raw reply

* Re: [PATCH] ieee802154: Fix memory leak in ieee802154_add_iface()
From: David Miller @ 2014-01-14 23:41 UTC (permalink / raw)
  To: cengelma; +Cc: alex.bluesman.smirnov, dbaryshkov, netdev, linux-zigbee-devel
In-Reply-To: <20140111221930.7255e2be@spike>

From: Christian Engelmayer <cengelma@gmx.at>
Date: Sat, 11 Jan 2014 22:19:30 +0100

> Fix a memory leak in the ieee802154_add_iface() error handling path.
> Detected by Coverity: CID 710490.
> 
> Signed-off-by: Christian Engelmayer <cengelma@gmx.at>

Applied and queued up for -stable, thanks.

^ permalink raw reply

* [PATCH net-next 0/2] net: mvneta: simple cleanups
From: Arnaud Ebalard @ 2014-01-14 23:45 UTC (permalink / raw)
  To: David Miller, Willy Tarreau, Thomas Petazzoni; +Cc: netdev, linux-arm-kernel

Those two patches are intended for net-next. They apply on top of
performance improvements patches from Willy for mvneta driver.
They provide some simple cleanups for unused variables, function
params or return values.

Arnaud Ebalard (2):
  net: mvneta: mvneta_tx_done_gbe() cleanups
  net: mvneta: make mvneta_txq_done() return void

 drivers/net/ethernet/marvell/mvneta.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

-- 
1.8.5.2

^ permalink raw reply

* [PATCH net-next 1/2] net: mvneta: mvneta_tx_done_gbe() cleanups
From: Arnaud Ebalard @ 2014-01-14 23:45 UTC (permalink / raw)
  To: David Miller, Willy Tarreau, Thomas Petazzoni; +Cc: netdev, linux-arm-kernel
In-Reply-To: <cover.1389742334.git.arno@natisbad.org>


mvneta_tx_done_gbe() return value and third parameter are no more
used. This patch changes the function prototype and removes a useless
variable where the function is called.

Signed-off-by: Arnaud Ebalard <arno@natisbad.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index f5fc7a249880..8c5150124b5e 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1704,30 +1704,23 @@ static void mvneta_txq_done_force(struct mvneta_port *pp,
 /* Handle tx done - called in softirq context. The <cause_tx_done> argument
  * must be a valid cause according to MVNETA_TXQ_INTR_MASK_ALL.
  */
-static u32 mvneta_tx_done_gbe(struct mvneta_port *pp, u32 cause_tx_done,
-			      int *tx_todo)
+static void mvneta_tx_done_gbe(struct mvneta_port *pp, u32 cause_tx_done)
 {
 	struct mvneta_tx_queue *txq;
-	u32 tx_done = 0;
 	struct netdev_queue *nq;
 
-	*tx_todo = 0;
 	while (cause_tx_done) {
 		txq = mvneta_tx_done_policy(pp, cause_tx_done);
 
 		nq = netdev_get_tx_queue(pp->dev, txq->id);
 		__netif_tx_lock(nq, smp_processor_id());
 
-		if (txq->count) {
-			tx_done += mvneta_txq_done(pp, txq);
-			*tx_todo += txq->count;
-		}
+		if (txq->count)
+			mvneta_txq_done(pp, txq);
 
 		__netif_tx_unlock(nq);
 		cause_tx_done &= ~((1 << txq->id));
 	}
-
-	return tx_done;
 }
 
 /* Compute crc8 of the specified address, using a unique algorithm ,
@@ -1961,9 +1954,7 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 
 	/* Release Tx descriptors */
 	if (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL) {
-		int tx_todo = 0;
-
-		mvneta_tx_done_gbe(pp, (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL), &tx_todo);
+		mvneta_tx_done_gbe(pp, (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL));
 		cause_rx_tx &= ~MVNETA_TX_INTR_MASK_ALL;
 	}
 
-- 
1.8.5.2

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox