Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v2] net: ethernet: realtek: atp: checkpatch errors and warnings corrected
From: Roberto Medina @ 2014-10-27 23:51 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, Roberto Medina

From: Roberto Medina <robertoxmed@gmail.com>

Several warnings and errors of coding style rules corrected.
Compile tested.

Signed-off-by: Roberto Medina <robertoxmed@gmail.com>

---
 drivers/net/ethernet/realtek/atp.h | 246 +++++++++++++++++++------------------
 1 file changed, 127 insertions(+), 119 deletions(-)

diff --git a/drivers/net/ethernet/realtek/atp.h b/drivers/net/ethernet/realtek/atp.h
index 040b137..32497f0 100644
--- a/drivers/net/ethernet/realtek/atp.h
+++ b/drivers/net/ethernet/realtek/atp.h
@@ -6,10 +6,10 @@
 
 /* The header prepended to received packets. */
 struct rx_header {
-    ushort pad;			/* Pad. */
-    ushort rx_count;
-    ushort rx_status;		/* Unknown bit assignments :-<.  */
-    ushort cur_addr;		/* Apparently the current buffer address(?) */
+	ushort pad;		/* Pad. */
+	ushort rx_count;
+	ushort rx_status;	/* Unknown bit assignments :-<.  */
+	ushort cur_addr;	/* Apparently the current buffer address(?) */
 };
 
 #define PAR_DATA	0
@@ -29,22 +29,25 @@ struct rx_header {
 #define RdAddr	0xC0
 #define HNib	0x10
 
-enum page0_regs
-{
-    /* The first six registers hold the ethernet physical station address. */
-    PAR0 = 0, PAR1 = 1, PAR2 = 2, PAR3 = 3, PAR4 = 4, PAR5 = 5,
-    TxCNT0 = 6, TxCNT1 = 7,		/* The transmit byte count. */
-    TxSTAT = 8, RxSTAT = 9,		/* Tx and Rx status. */
-    ISR = 10, IMR = 11,			/* Interrupt status and mask. */
-    CMR1 = 12,				/* Command register 1. */
-    CMR2 = 13,				/* Command register 2. */
-    MODSEL = 14,			/* Mode select register. */
-    MAR = 14,				/* Memory address register (?). */
-    CMR2_h = 0x1d, };
-
-enum eepage_regs
-{ PROM_CMD = 6, PROM_DATA = 7 };	/* Note that PROM_CMD is in the "high" bits. */
+enum page0_regs {
+	/* The first six registers hold
+	 * the ethernet physical station address.
+	 */
+	PAR0 = 0, PAR1 = 1, PAR2 = 2, PAR3 = 3, PAR4 = 4, PAR5 = 5,
+	TxCNT0 = 6, TxCNT1 = 7,		/* The transmit byte count. */
+	TxSTAT = 8, RxSTAT = 9,		/* Tx and Rx status. */
+	ISR = 10, IMR = 11,		/* Interrupt status and mask. */
+	CMR1 = 12,			/* Command register 1. */
+	CMR2 = 13,			/* Command register 2. */
+	MODSEL = 14,		/* Mode select register. */
+	MAR = 14,			/* Memory address register (?). */
+	CMR2_h = 0x1d,
+};
 
+enum eepage_regs {
+	PROM_CMD = 6,
+	PROM_DATA = 7	/* Note that PROM_CMD is in the "high" bits. */
+};
 
 #define ISR_TxOK	0x01
 #define ISR_RxOK	0x04
@@ -72,141 +75,146 @@ enum eepage_regs
 #define CMR2h_Normal	2	/* Accept physical and broadcast address. */
 #define CMR2h_PROMISC	3	/* Promiscuous mode. */
 
-/* An inline function used below: it differs from inb() by explicitly return an unsigned
-   char, saving a truncation. */
+/* An inline function used below: it differs from inb() by explicitly
+ * return an unsigned char, saving a truncation.
+ */
 static inline unsigned char inbyte(unsigned short port)
 {
-    unsigned char _v;
-    __asm__ __volatile__ ("inb %w1,%b0" :"=a" (_v):"d" (port));
-    return _v;
+	unsigned char _v;
+
+	__asm__ __volatile__ ("inb %w1,%b0" : "=a" (_v) : "d" (port));
+	return _v;
 }
 
 /* Read register OFFSET.
-   This command should always be terminated with read_end(). */
+ * This command should always be terminated with read_end().
+ */
 static inline unsigned char read_nibble(short port, unsigned char offset)
 {
-    unsigned char retval;
-    outb(EOC+offset, port + PAR_DATA);
-    outb(RdAddr+offset, port + PAR_DATA);
-    inbyte(port + PAR_STATUS);		/* Settling time delay */
-    retval = inbyte(port + PAR_STATUS);
-    outb(EOC+offset, port + PAR_DATA);
-
-    return retval;
+	unsigned char retval;
+
+	outb(EOC+offset, port + PAR_DATA);
+	outb(RdAddr+offset, port + PAR_DATA);
+	inbyte(port + PAR_STATUS);	/* Settling time delay */
+	retval = inbyte(port + PAR_STATUS);
+	outb(EOC+offset, port + PAR_DATA);
+
+	return retval;
 }
 
 /* Functions for bulk data read.  The interrupt line is always disabled. */
 /* Get a byte using read mode 0, reading data from the control lines. */
 static inline unsigned char read_byte_mode0(short ioaddr)
 {
-    unsigned char low_nib;
-
-    outb(Ctrl_LNibRead, ioaddr + PAR_CONTROL);
-    inbyte(ioaddr + PAR_STATUS);
-    low_nib = (inbyte(ioaddr + PAR_STATUS) >> 3) & 0x0f;
-    outb(Ctrl_HNibRead, ioaddr + PAR_CONTROL);
-    inbyte(ioaddr + PAR_STATUS);	/* Settling time delay -- needed!  */
-    inbyte(ioaddr + PAR_STATUS);	/* Settling time delay -- needed!  */
-    return low_nib | ((inbyte(ioaddr + PAR_STATUS) << 1) & 0xf0);
+	unsigned char low_nib;
+
+	outb(Ctrl_LNibRead, ioaddr + PAR_CONTROL);
+	inbyte(ioaddr + PAR_STATUS);
+	low_nib = (inbyte(ioaddr + PAR_STATUS) >> 3) & 0x0f;
+	outb(Ctrl_HNibRead, ioaddr + PAR_CONTROL);
+	inbyte(ioaddr + PAR_STATUS);	/* Settling time delay -- needed!  */
+	inbyte(ioaddr + PAR_STATUS);	/* Settling time delay -- needed!  */
+	return low_nib | ((inbyte(ioaddr + PAR_STATUS) << 1) & 0xf0);
 }
 
 /* The same as read_byte_mode0(), but does multiple inb()s for stability. */
 static inline unsigned char read_byte_mode2(short ioaddr)
 {
-    unsigned char low_nib;
-
-    outb(Ctrl_LNibRead, ioaddr + PAR_CONTROL);
-    inbyte(ioaddr + PAR_STATUS);
-    low_nib = (inbyte(ioaddr + PAR_STATUS) >> 3) & 0x0f;
-    outb(Ctrl_HNibRead, ioaddr + PAR_CONTROL);
-    inbyte(ioaddr + PAR_STATUS);	/* Settling time delay -- needed!  */
-    return low_nib | ((inbyte(ioaddr + PAR_STATUS) << 1) & 0xf0);
+	unsigned char low_nib;
+
+	outb(Ctrl_LNibRead, ioaddr + PAR_CONTROL);
+	inbyte(ioaddr + PAR_STATUS);
+	low_nib = (inbyte(ioaddr + PAR_STATUS) >> 3) & 0x0f;
+	outb(Ctrl_HNibRead, ioaddr + PAR_CONTROL);
+	inbyte(ioaddr + PAR_STATUS);	/* Settling time delay -- needed!  */
+	return low_nib | ((inbyte(ioaddr + PAR_STATUS) << 1) & 0xf0);
 }
 
 /* Read a byte through the data register. */
 static inline unsigned char read_byte_mode4(short ioaddr)
 {
-    unsigned char low_nib;
+	unsigned char low_nib;
 
-    outb(RdAddr | MAR, ioaddr + PAR_DATA);
-    low_nib = (inbyte(ioaddr + PAR_STATUS) >> 3) & 0x0f;
-    outb(RdAddr | HNib | MAR, ioaddr + PAR_DATA);
-    return low_nib | ((inbyte(ioaddr + PAR_STATUS) << 1) & 0xf0);
+	outb(RdAddr | MAR, ioaddr + PAR_DATA);
+	low_nib = (inbyte(ioaddr + PAR_STATUS) >> 3) & 0x0f;
+	outb(RdAddr | HNib | MAR, ioaddr + PAR_DATA);
+	return low_nib | ((inbyte(ioaddr + PAR_STATUS) << 1) & 0xf0);
 }
 
 /* Read a byte through the data register, double reading to allow settling. */
 static inline unsigned char read_byte_mode6(short ioaddr)
 {
-    unsigned char low_nib;
-
-    outb(RdAddr | MAR, ioaddr + PAR_DATA);
-    inbyte(ioaddr + PAR_STATUS);
-    low_nib = (inbyte(ioaddr + PAR_STATUS) >> 3) & 0x0f;
-    outb(RdAddr | HNib | MAR, ioaddr + PAR_DATA);
-    inbyte(ioaddr + PAR_STATUS);
-    return low_nib | ((inbyte(ioaddr + PAR_STATUS) << 1) & 0xf0);
+	unsigned char low_nib;
+
+	outb(RdAddr | MAR, ioaddr + PAR_DATA);
+	inbyte(ioaddr + PAR_STATUS);
+	low_nib = (inbyte(ioaddr + PAR_STATUS) >> 3) & 0x0f;
+	outb(RdAddr | HNib | MAR, ioaddr + PAR_DATA);
+	inbyte(ioaddr + PAR_STATUS);
+	return low_nib | ((inbyte(ioaddr + PAR_STATUS) << 1) & 0xf0);
 }
 
 static inline void
 write_reg(short port, unsigned char reg, unsigned char value)
 {
-    unsigned char outval;
-    outb(EOC | reg, port + PAR_DATA);
-    outval = WrAddr | reg;
-    outb(outval, port + PAR_DATA);
-    outb(outval, port + PAR_DATA);	/* Double write for PS/2. */
-
-    outval &= 0xf0;
-    outval |= value;
-    outb(outval, port + PAR_DATA);
-    outval &= 0x1f;
-    outb(outval, port + PAR_DATA);
-    outb(outval, port + PAR_DATA);
-
-    outb(EOC | outval, port + PAR_DATA);
+	unsigned char outval;
+
+	outb(EOC | reg, port + PAR_DATA);
+	outval = WrAddr | reg;
+	outb(outval, port + PAR_DATA);
+	outb(outval, port + PAR_DATA);	/* Double write for PS/2. */
+
+	outval &= 0xf0;
+	outval |= value;
+	outb(outval, port + PAR_DATA);
+	outval &= 0x1f;
+	outb(outval, port + PAR_DATA);
+	outb(outval, port + PAR_DATA);
+
+	outb(EOC | outval, port + PAR_DATA);
 }
 
 static inline void
 write_reg_high(short port, unsigned char reg, unsigned char value)
 {
-    unsigned char outval = EOC | HNib | reg;
+	unsigned char outval = EOC | HNib | reg;
 
-    outb(outval, port + PAR_DATA);
-    outval &= WrAddr | HNib | 0x0f;
-    outb(outval, port + PAR_DATA);
-    outb(outval, port + PAR_DATA);	/* Double write for PS/2. */
+	outb(outval, port + PAR_DATA);
+	outval &= WrAddr | HNib | 0x0f;
+	outb(outval, port + PAR_DATA);
+	outb(outval, port + PAR_DATA);	/* Double write for PS/2. */
 
-    outval = WrAddr | HNib | value;
-    outb(outval, port + PAR_DATA);
-    outval &= HNib | 0x0f;		/* HNib | value */
-    outb(outval, port + PAR_DATA);
-    outb(outval, port + PAR_DATA);
+	outval = WrAddr | HNib | value;
+	outb(outval, port + PAR_DATA);
+	outval &= HNib | 0x0f;		/* HNib | value */
+	outb(outval, port + PAR_DATA);
+	outb(outval, port + PAR_DATA);
 
-    outb(EOC | HNib | outval, port + PAR_DATA);
+	outb(EOC | HNib | outval, port + PAR_DATA);
 }
 
 /* Write a byte out using nibble mode.  The low nibble is written first. */
 static inline void
 write_reg_byte(short port, unsigned char reg, unsigned char value)
 {
-    unsigned char outval;
-    outb(EOC | reg, port + PAR_DATA); 	/* Reset the address register. */
-    outval = WrAddr | reg;
-    outb(outval, port + PAR_DATA);
-    outb(outval, port + PAR_DATA);	/* Double write for PS/2. */
-
-    outb((outval & 0xf0) | (value & 0x0f), port + PAR_DATA);
-    outb(value & 0x0f, port + PAR_DATA);
-    value >>= 4;
-    outb(value, port + PAR_DATA);
-    outb(0x10 | value, port + PAR_DATA);
-    outb(0x10 | value, port + PAR_DATA);
-
-    outb(EOC  | value, port + PAR_DATA); 	/* Reset the address register. */
+	unsigned char outval;
+
+	outb(EOC | reg, port + PAR_DATA); /* Reset the address register. */
+	outval = WrAddr | reg;
+	outb(outval, port + PAR_DATA);
+	outb(outval, port + PAR_DATA);	/* Double write for PS/2. */
+
+	outb((outval & 0xf0) | (value & 0x0f), port + PAR_DATA);
+	outb(value & 0x0f, port + PAR_DATA);
+	value >>= 4;
+	outb(value, port + PAR_DATA);
+	outb(0x10 | value, port + PAR_DATA);
+	outb(0x10 | value, port + PAR_DATA);
+
+	outb(EOC  | value, port + PAR_DATA); /* Reset the address register. */
 }
 
-/*
- * Bulk data writes to the packet buffer.  The interrupt line remains enabled.
+/* Bulk data writes to the packet buffer.  The interrupt line remains enabled.
  * The first, faster method uses only the dataport (data modes 0, 2 & 4).
  * The second (backup) method uses data and control regs (modes 1, 3 & 5).
  * It should only be needed when there is skew between the individual data
@@ -214,28 +222,28 @@ write_reg_byte(short port, unsigned char reg, unsigned char value)
  */
 static inline void write_byte_mode0(short ioaddr, unsigned char value)
 {
-    outb(value & 0x0f, ioaddr + PAR_DATA);
-    outb((value>>4) | 0x10, ioaddr + PAR_DATA);
+	outb(value & 0x0f, ioaddr + PAR_DATA);
+	outb((value>>4) | 0x10, ioaddr + PAR_DATA);
 }
 
 static inline void write_byte_mode1(short ioaddr, unsigned char value)
 {
-    outb(value & 0x0f, ioaddr + PAR_DATA);
-    outb(Ctrl_IRQEN | Ctrl_LNibWrite, ioaddr + PAR_CONTROL);
-    outb((value>>4) | 0x10, ioaddr + PAR_DATA);
-    outb(Ctrl_IRQEN | Ctrl_HNibWrite, ioaddr + PAR_CONTROL);
+	outb(value & 0x0f, ioaddr + PAR_DATA);
+	outb(Ctrl_IRQEN | Ctrl_LNibWrite, ioaddr + PAR_CONTROL);
+	outb((value>>4) | 0x10, ioaddr + PAR_DATA);
+	outb(Ctrl_IRQEN | Ctrl_HNibWrite, ioaddr + PAR_CONTROL);
 }
 
 /* Write 16bit VALUE to the packet buffer: the same as above just doubled. */
 static inline void write_word_mode0(short ioaddr, unsigned short value)
 {
-    outb(value & 0x0f, ioaddr + PAR_DATA);
-    value >>= 4;
-    outb((value & 0x0f) | 0x10, ioaddr + PAR_DATA);
-    value >>= 4;
-    outb(value & 0x0f, ioaddr + PAR_DATA);
-    value >>= 4;
-    outb((value & 0x0f) | 0x10, ioaddr + PAR_DATA);
+	outb(value & 0x0f, ioaddr + PAR_DATA);
+	value >>= 4;
+	outb((value & 0x0f) | 0x10, ioaddr + PAR_DATA);
+	value >>= 4;
+	outb(value & 0x0f, ioaddr + PAR_DATA);
+	value >>= 4;
+	outb((value & 0x0f) | 0x10, ioaddr + PAR_DATA);
 }
 
 /*  EEPROM_Ctrl bits. */
@@ -248,10 +256,10 @@ static inline void write_word_mode0(short ioaddr, unsigned short value)
 
 /* Delay between EEPROM clock transitions. */
 #define eeprom_delay(ticks) \
-do { int _i = 40; while (--_i > 0) { __SLOW_DOWN_IO; }} while (0)
+do { int _i = 40; while (--_i > 0) { __SLOW_DOWN_IO; } } while (0)
 
 /* The EEPROM commands include the alway-set leading bit. */
 #define EE_WRITE_CMD(offset)	(((5 << 6) + (offset)) << 17)
-#define EE_READ(offset) 	(((6 << 6) + (offset)) << 17)
+#define EE_READ(offset)		(((6 << 6) + (offset)) << 17)
 #define EE_ERASE(offset)	(((7 << 6) + (offset)) << 17)
 #define EE_CMD_SIZE	27	/* The command+address+data size. */
-- 
2.1.2

^ permalink raw reply related

* Re: [PATCH net-next 2/2] udp: Reset flow table for flows over unconnected sockets
From: Eric Dumazet @ 2014-10-27 23:19 UTC (permalink / raw)
  To: Tom Herbert; +Cc: David Miller, Linux Netdev List
In-Reply-To: <CA+mtBx_V3WT1bbXY9F731GNdDdb3+ebHwj9hRyVEFynAPYhSXg@mail.gmail.com>

On Mon, 2014-10-27 at 12:36 -0700, Tom Herbert wrote:

> Please try this patch and provide real data to support your points.
> 

Yep. This is not good, I confirm my fear.

Google servers are shifting to serve both TCP & UDP traffic (QUIC
protocol), with an increasing UDP load.

Millions of packets per second per host, from millions of different
sources...

And your patch voids the RFS table, adds another cache miss in fast path
for UDP rx path which is already too expensive.

> If a TCP connection is hot it will continually refresh the table for
> that connection, if connection becomes idle it only takes one received
> packet to restore the CPU. The only time there could be a persistent
> problem is if collision rate is high (which probably means table is
> too small).

RFS already has a low hit/miss rate, this patch does not help neither
UDP or TCP.

Ideally, RFS should be enabled on a protocol base, not an agnostic u32
flow hash.

Whatever strategy you implement, as long as different protocols share a
common hash table, it wont be perfect for mixed workloads.

Fundamental problem is that when an UDP packet comes, its not possible
to know if its a 'flow' or 'not', unless we perform an expensive lookup,
and then RPS/RFS cost becomes prohibitive.

While for TCP, the current RFS cache miss is good enough, because about
all packets are for connected flows. We eventually have bad steering for
<not yet established> flows where the stack performs poorly anyway.

^ permalink raw reply

* Re: [PATCH net-next v3 0/5] cleanup on resource check
From: David Miller @ 2014-10-27 23:16 UTC (permalink / raw)
  To: varkabhadram; +Cc: netdev, sergei.shtylyov, varkab
In-Reply-To: <1414116730-4590-1-git-send-email-varkab@cdac.in>

From: Varka Bhadram <varkabhadram@gmail.com>
Date: Fri, 24 Oct 2014 07:42:05 +0530

> This series removes the duplication of sanity check for
> platform_get_resource() return resource. It will be checked 
> with devm_ioremap_resource()
> 
> changes since v2:
> 	- Merge #1 and #2 patches into single patch
> 	- remove the comment
> 
> changes since v1:
> 	- remove NULL dereference on resource_size()

Series applied, thanks.

^ permalink raw reply

* Re: [PATCH net] bpf: split eBPF out of NET
From: David Miller @ 2014-10-27 23:10 UTC (permalink / raw)
  To: ast
  Cc: geert, josh, mingo, rostedt, hannes, edumazet, dborkman, netdev,
	linux-kernel
In-Reply-To: <1414114868-28228-1-git-send-email-ast@plumgrid.com>

From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 23 Oct 2014 18:41:08 -0700

> introduce two configs:
> - hidden CONFIG_BPF to select eBPF interpreter that classic socket filters
>   depend on
> - visible CONFIG_BPF_SYSCALL (default off) that tracing and sockets can use
> 
> that solves several problems:
> - tracing and others that wish to use eBPF don't need to depend on NET.
>   They can use BPF_SYSCALL to allow loading from userspace or select BPF
>   to use it directly from kernel in NET-less configs.
> - in 3.18 programs cannot be attached to events yet, so don't force it on
> - when the rest of eBPF infra is there in 3.19+, it's still useful to
>   switch it off to minimize kernel size
> 
> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
> ---
> 
> bloat-o-meter on x64 shows:
> add/remove: 0/60 grow/shrink: 0/2 up/down: 0/-15601 (-15601)
> 
> tested with many different config combinations. Hopefully didn't miss anything.

Applied with two changes:

1) boolean --> bool
2) Moved bloat-o-meter and testing information into commit message.

Thanks.

^ permalink raw reply

* Re: [Bug 86851] New: Reproducible panic on heavy UDP traffic
From: Nikolay Aleksandrov @ 2014-10-27 23:06 UTC (permalink / raw)
  To: Patrick McLean; +Cc: Eric Dumazet, Florian Westphal, Stephen Hemminger, netdev
In-Reply-To: <20141027155938.28248b5e@gentoo.org>

On 10/27/2014 11:59 PM, Patrick McLean wrote:
> On Mon, 27 Oct 2014 09:48:15 +0100
> Nikolay Aleksandrov <nikolay@redhat.com> wrote:
> 
>> On 10/27/2014 01:47 AM, Eric Dumazet wrote:
>>> On Mon, 2014-10-27 at 00:28 +0100, Nikolay Aleksandrov wrote:
>>>
>>>>
>>>> Thanks for CCing me.
>>>> I'll dig in the code tomorrow but my first thought when I saw this
>>>> was could it be possible that we have a race condition between
>>>> ip_frag_queue() and inet_frag_evict(), more precisely between the
>>>> ipq_kill() calls from ip_frag_queue and inet_frag_evict since the
>>>> frag could be found before we have entered the evictor which then
>>>> can add it to its expire list but the ipq_kill() from
>>>> ip_frag_queue() can do a list_del after we release the chain lock
>>>> in the evictor so we may end up like this ?
>>>
>>> Yes, either we use hlist_del_init() but loose poison aid, or test if
>>> frag was evicted :
>>>
>>> Not sure about refcount.
>>>
>>> diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
>>> index 9eb89f3f0ee4..894ec30c5896 100644
>>> --- a/net/ipv4/inet_fragment.c
>>> +++ b/net/ipv4/inet_fragment.c
>>> @@ -285,7 +285,8 @@ static inline void fq_unlink(struct
>>> inet_frag_queue *fq, struct inet_frags *f) struct inet_frag_bucket
>>> *hb; 
>>>  	hb = get_frag_bucket_locked(fq, f);
>>> -	hlist_del(&fq->list);
>>> +	if (!(fq->flags & INET_FRAG_EVICTED))
>>> +		hlist_del(&fq->list);
>>>  	spin_unlock(&hb->chain_lock);
>>>  }
>>>  
>>>
>>>
>>
>> Exactly, I was thinking about a similar fix since the evict flag is
>> only set with the chain lock. IMO the refcount should be fine.
>> CCing the reporter.
>> Patrick could you please try Eric's patch ?
>>
> 
> It no longer panics with that patch, but it does produce a large amount
> of warnings, here is an example of what I am getting. I will attach the
> full log to the bug.
> 

Great! Thanks for testing.
As I said earlier we have a valid case that can hit the WARN_ON in
inet_evict_frag().
Anyhow, Eric would you mind posting the patch officially ?
If you'd like me to remove the WARN_ON() in a separate one just let me
know, otherwise feel free to remove it in the fix for the race.

Cheers,
 Nik

^ permalink raw reply

* Re: [PATCH] net: ethernet: realtek: atp: checkpatch errors and warnings corrected
From: Roberto Medina @ 2014-10-27 23:04 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141027.185355.1412632964802440855.davem@davemloft.net>

On 10/27/2014 11:53 PM, David Miller wrote:
> From: David Miller <davem@davemloft.net>
> Date: Mon, 27 Oct 2014 18:52:11 -0400 (EDT)
>
>> From: Roberto Medina <robertoxmed@gmail.com>
>> Date: Thu, 23 Oct 2014 19:10:00 +0200
>>
>>> From: Roberto Medina <robertoxmed@gmail.com>
>>>
>>> Several warnings and errors of coding style errors corrected.
>>>
>>> Signed-off-by: Roberto Medina <robertoxmed@gmail.com>
>>
>> Applied, thanks.
>
> Wait, are you serious, you didn't even compile test this change?
>
> For real?
>
> In file included from drivers/net/ethernet/realtek/atp.c:146:0:
> drivers/net/ethernet/realtek/atp.h:79:15: error: expected ‘;’, identifier or ‘(’ before ‘unsigned’
>   static inline unsigned char inbyte(unsigned short port)
>                 ^
> drivers/net/ethernet/realtek/atp.h:79:15: error: ‘inline’ in empty declaration
>

Wow I'm really sorry for that, I did compile my changes, or at least 
that's what I thought when I compiled the directory.

Will submit another patch. Sorry for the inconvenience again.

> Don't even submit changes meant for inclusion if you're not even
> willing to build test them.
>
> And I'm very serious about this.
>

^ permalink raw reply

* Re: [bug] sunhme unable to receive IPv4 unicasts
From: David Miller @ 2014-10-27 23:03 UTC (permalink / raw)
  To: WIMPy; +Cc: netdev
In-Reply-To: <op.xn7d94v644i3fq@lx3.fl.yeti.dk>

From: "Birger Harzenetter" <WIMPy@yeti.dk>
Date: Fri, 24 Oct 2014 00:04:42 +0200

>  from https://bugzilla.kernel.org/show_bug.cgi?id=86731
> 
> Any kind of transmission is fine.
> Reception of IPv4 broadcast works
> Reception of non-IP works (pppoe tested)
> But IPv4 unicasts don't show up at all.
> IPv6 not tested
> 
> Additional tests:
> I tried to set promiscous mode, but still no trace of unicasts seen with  
> tcpdump.
> 
> Last known working version: 3.15.10

Unfortunately there have been zero functional changes to this driver
since v3.15 :-/

^ permalink raw reply

* Re: [PATCH] bridge: Add support for IEEE 802.11 Proxy ARP
From: David Miller @ 2014-10-27 23:02 UTC (permalink / raw)
  To: kyeyoonp; +Cc: jouni, netdev
In-Reply-To: <1414100957-8288-1-git-send-email-kyeyoonp@qca.qualcomm.com>

From: Kyeyoon Park <kyeyoonp@qca.qualcomm.com>
Date: Thu, 23 Oct 2014 14:49:17 -0700

> From: Kyeyoon Park <kyeyoonp@codeaurora.org>
> 
> This feature is defined in IEEE Std 802.11-2012, 10.23.13. It allows
> the AP devices to keep track of the hardware-address-to-IP-address
> mapping of the mobile devices within the WLAN network.
> 
> The AP will learn this mapping via observing DHCP, ARP, and NS/NA
> frames. When a request for such information is made (i.e. ARP request,
> Neighbor Solicitation), the AP will respond on behalf of the
> associated mobile device. In the process of doing so, the AP will drop
> the multicast request frame that was intended to go out to the wireless
> medium.
> 
> It was recommended at the LKS workshop to do this implementation in
> the bridge layer. vxlan.c is already doing something very similar.
> The DHCP snooping code will be added to the userspace application
> (hostapd) per the recommendation.
> 
> This RFC commit is only for IPv4. A similar approach in the bridge
> layer will be taken for IPv6 as well.
> 
> Signed-off-by: Kyeyoon Park <kyeyoonp@codeaurora.org>

Looks good to me, applied, thanks.

^ permalink raw reply

* Re: [PATCH net 0/2] cxgb4 : DCBx fixes for apps/host lldp agents
From: David Miller @ 2014-10-27 23:00 UTC (permalink / raw)
  To: anish; +Cc: netdev, hariprasad, leedom
In-Reply-To: <1414100251-15702-1-git-send-email-anish@chelsio.com>

From: Anish Bhatt <anish@chelsio.com>
Date: Thu, 23 Oct 2014 14:37:29 -0700

> This patchset  contains some minor fixes for cxgb4 DCBx code. Chiefly, cxgb4 
> was not cleaning up any apps added to kernel app table when link was lost.
> Disabling DCBx in firmware would automatically set DCBx state to host-managed
> and enabled, we now wait for an explicit enable call from an lldp agent instead
> 
> First patch was originally sent to net-next, but considering it applies to
> correcting behaviour of code already in net, I think it qualifies as a bug fix.
> -Anish

Series applied, thanks Anish.

^ permalink raw reply

* Re: [Bug 86851] New: Reproducible panic on heavy UDP traffic
From: Patrick McLean @ 2014-10-27 22:59 UTC (permalink / raw)
  To: Nikolay Aleksandrov
  Cc: Eric Dumazet, Florian Westphal, Stephen Hemminger, netdev
In-Reply-To: <544E06CF.30709@redhat.com>

On Mon, 27 Oct 2014 09:48:15 +0100
Nikolay Aleksandrov <nikolay@redhat.com> wrote:

> On 10/27/2014 01:47 AM, Eric Dumazet wrote:
> > On Mon, 2014-10-27 at 00:28 +0100, Nikolay Aleksandrov wrote:
> > 
> >>
> >> Thanks for CCing me.
> >> I'll dig in the code tomorrow but my first thought when I saw this
> >> was could it be possible that we have a race condition between
> >> ip_frag_queue() and inet_frag_evict(), more precisely between the
> >> ipq_kill() calls from ip_frag_queue and inet_frag_evict since the
> >> frag could be found before we have entered the evictor which then
> >> can add it to its expire list but the ipq_kill() from
> >> ip_frag_queue() can do a list_del after we release the chain lock
> >> in the evictor so we may end up like this ?
> > 
> > Yes, either we use hlist_del_init() but loose poison aid, or test if
> > frag was evicted :
> > 
> > Not sure about refcount.
> > 
> > diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
> > index 9eb89f3f0ee4..894ec30c5896 100644
> > --- a/net/ipv4/inet_fragment.c
> > +++ b/net/ipv4/inet_fragment.c
> > @@ -285,7 +285,8 @@ static inline void fq_unlink(struct
> > inet_frag_queue *fq, struct inet_frags *f) struct inet_frag_bucket
> > *hb; 
> >  	hb = get_frag_bucket_locked(fq, f);
> > -	hlist_del(&fq->list);
> > +	if (!(fq->flags & INET_FRAG_EVICTED))
> > +		hlist_del(&fq->list);
> >  	spin_unlock(&hb->chain_lock);
> >  }
> >  
> > 
> > 
> 
> Exactly, I was thinking about a similar fix since the evict flag is
> only set with the chain lock. IMO the refcount should be fine.
> CCing the reporter.
> Patrick could you please try Eric's patch ?
> 

It no longer panics with that patch, but it does produce a large amount
of warnings, here is an example of what I am getting. I will attach the
full log to the bug.

> [  205.042923] ------------[ cut here ]------------
> [  205.042933] WARNING: CPU: 4 PID: 615 at net/ipv4/inet_fragment.c:149 inet_evict_bucket+0x172/0x180()
> [  205.042934] Modules linked in: nfs fscache nfsd auth_rpcgss nfs_acl lockd grace sunrpc 8021q garp mrp bonding x86_pkg_temp_thermal joydev sb_edac edac_core ioatdma tpm_tis ext4 mbcache jbd2 igb ixgbe i2c_algo_bit raid1 mdio crc32c_intel megaraid_sas dca
> [  205.042953] CPU: 4 PID: 615 Comm: kworker/4:2 Not tainted 3.18.0-rc2-base-7+ #3
> [  205.042955] Hardware name: Intel Corporation S2600GZ/S2600GZ, BIOS SE5C600.86B.02.03.0003.041920141333 04/19/2014
> [  205.042957] Workqueue: events inet_frag_worker
> [  205.042958]  0000000000000000 0000000000000009 ffffffff81624cd2 0000000000000000
> [  205.042960]  ffffffff81117b7d ffff8817c83a4740 0000000000000000 ffffffff81aa6820
> [  205.042962]  ffff8817ce073d70 ffff8817c83a4738 ffffffff81597cb2 ffffffff81aa8e28
> [  205.042964] Call Trace:
> [  205.042969]  [<ffffffff81624cd2>] ? dump_stack+0x41/0x51
> [  205.042973]  [<ffffffff81117b7d>] ? warn_slowpath_common+0x6d/0x90
> [  205.042975]  [<ffffffff81597cb2>] ? inet_evict_bucket+0x172/0x180
> [  205.042976]  [<ffffffff81597d22>] ? inet_frag_worker+0x62/0x210
> [  205.042979]  [<ffffffff8112c312>] ? process_one_work+0x132/0x360
> [  205.042981]  [<ffffffff8112ca23>] ? worker_thread+0x113/0x590
> [  205.042983]  [<ffffffff8112c910>] ? rescuer_thread+0x3d0/0x3d0
> [  205.042986]  [<ffffffff8113123c>] ? kthread+0xbc/0xe0
> [  205.042991]  [<ffffffff81040000>] ? xen_teardown_timer+0x10/0x70
> [  205.042993]  [<ffffffff81131180>] ? kthread_create_on_node+0x170/0x170
> [  205.042996]  [<ffffffff8162a9fc>] ? ret_from_fork+0x7c/0xb0
> [  205.042998]  [<ffffffff81131180>] ? kthread_create_on_node+0x170/0x170
> [  205.043000] ---[ end trace ed2bb7d412e082bc ]---
> [  205.752744] ------------[ cut here ]------------
> [  205.752752] WARNING: CPU: 2 PID: 610 at net/ipv4/inet_fragment.c:149 inet_evict_bucket+0x172/0x180()
> [  205.752754] Modules linked in: nfs fscache nfsd auth_rpcgss nfs_acl lockd grace sunrpc 8021q garp mrp bonding x86_pkg_temp_thermal joydev sb_edac edac_core ioatdma tpm_tis ext4 mbcache jbd2 igb ixgbe i2c_algo_bit raid1 mdio crc32c_intel megaraid_sas dca
> [  205.752773] CPU: 2 PID: 610 Comm: kworker/2:2 Tainted: G        W      3.18.0-rc2-base-7+ #3 
> [  205.752774] Hardware name: Intel Corporation S2600GZ/S2600GZ, BIOS SE5C600.86B.02.03.0003.041920141333 04/19/2014
> [  205.752777] Workqueue: events inet_frag_worker
> [  205.752779]  0000000000000000 0000000000000009 ffffffff81624cd2 0000000000000000
> [  205.752780]  ffffffff81117b7d ffff882fc473c740 0000000000000000 ffffffff81aa6820
> [  205.752782]  ffff8817ce7afd70 ffff882fc473c738 ffffffff81597cb2 ffffffff81aa87a8
> [  205.752784] Call Trace:
> [  205.752790]  [<ffffffff81624cd2>] ? dump_stack+0x41/0x51
> [  205.752793]  [<ffffffff81117b7d>] ? warn_slowpath_common+0x6d/0x90
> [  205.752795]  [<ffffffff81597cb2>] ? inet_evict_bucket+0x172/0x180
> [  205.752797]  [<ffffffff81597d22>] ? inet_frag_worker+0x62/0x210
> [  205.752799]  [<ffffffff8112c312>] ? process_one_work+0x132/0x360
> [  205.752801]  [<ffffffff8112ca23>] ? worker_thread+0x113/0x590
> [  205.752803]  [<ffffffff8112c910>] ? rescuer_thread+0x3d0/0x3d0
> [  205.752806]  [<ffffffff8113123c>] ? kthread+0xbc/0xe0
> [  205.752810]  [<ffffffff81040000>] ? xen_teardown_timer+0x10/0x70
> [  205.752812]  [<ffffffff81131180>] ? kthread_create_on_node+0x170/0x170
> [  205.752815]  [<ffffffff8162a9fc>] ? ret_from_fork+0x7c/0xb0
> [  205.752818]  [<ffffffff81131180>] ? kthread_create_on_node+0x170/0x170
> [  205.752820] ---[ end trace ed2bb7d412e082bd ]---
> [  206.737865] ------------[ cut here ]------------

^ permalink raw reply

* Re: [net 1/2] sctp: add transport state in /proc/net/sctp/remaddr
From: David Miller @ 2014-10-27 22:55 UTC (permalink / raw)
  To: michele; +Cc: linux-sctp, vyasevich, nhorman, netdev, dborkman
In-Reply-To: <1414093721-14921-1-git-send-email-michele@acksyn.org>

From: Michele Baldessari <michele@acksyn.org>
Date: Thu, 23 Oct 2014 21:48:40 +0200

> It is often quite helpful to be able to know the state of a transport
> outside of the application itself (for troubleshooting purposes or for
> monitoring purposes). Add it under /proc/net/sctp/remaddr.
> 
> Signed-off-by: Michele Baldessari <michele@acksyn.org>

You can't change the layout of procfs files, applications parse
these files and any modification can potentially break such tools.

Secondly, even if this change were acceptable, targetting this
change at anything other than the net-next tree is not appropriate
because it is a new feature.

^ permalink raw reply

* Re: [PATCH] net: ethernet: realtek: atp: checkpatch errors and warnings corrected
From: David Miller @ 2014-10-27 22:53 UTC (permalink / raw)
  To: robertoxmed; +Cc: netdev, linux-kernel
In-Reply-To: <20141027.185211.1433818002293610983.davem@davemloft.net>

From: David Miller <davem@davemloft.net>
Date: Mon, 27 Oct 2014 18:52:11 -0400 (EDT)

> From: Roberto Medina <robertoxmed@gmail.com>
> Date: Thu, 23 Oct 2014 19:10:00 +0200
> 
>> From: Roberto Medina <robertoxmed@gmail.com>
>> 
>> Several warnings and errors of coding style errors corrected.
>> 
>> Signed-off-by: Roberto Medina <robertoxmed@gmail.com>
> 
> Applied, thanks.

Wait, are you serious, you didn't even compile test this change?

For real?

In file included from drivers/net/ethernet/realtek/atp.c:146:0:
drivers/net/ethernet/realtek/atp.h:79:15: error: expected ‘;’, identifier or ‘(’ before ‘unsigned’
 static inline unsigned char inbyte(unsigned short port)
               ^
drivers/net/ethernet/realtek/atp.h:79:15: error: ‘inline’ in empty declaration

Don't even submit changes meant for inclusion if you're not even
willing to build test them.

And I'm very serious about this.

^ permalink raw reply

* Re: [PATCH] net: ethernet: realtek: atp: checkpatch errors and warnings corrected
From: David Miller @ 2014-10-27 22:52 UTC (permalink / raw)
  To: robertoxmed; +Cc: netdev, linux-kernel
In-Reply-To: <1414084200-5377-1-git-send-email-robertoxmed@gmail.com>

From: Roberto Medina <robertoxmed@gmail.com>
Date: Thu, 23 Oct 2014 19:10:00 +0200

> From: Roberto Medina <robertoxmed@gmail.com>
> 
> Several warnings and errors of coding style errors corrected.
> 
> Signed-off-by: Roberto Medina <robertoxmed@gmail.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH 0/8] Netfilter fixes for net
From: David Miller @ 2014-10-27 22:49 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel, netdev
In-Reply-To: <1414445887-5108-1-git-send-email-pablo@netfilter.org>

From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 27 Oct 2014 22:37:59 +0100

> The following patchset contains Netfilter fixes for your net tree,
> they are:
 ...
> You can pull these changes from:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git

Pulled, thanks a lot Pablo!

^ permalink raw reply

* Re: [PATCH v3] ipv6: notify userspace when we added or changed an ipv6 token
From: Daniel Borkmann @ 2014-10-27 22:25 UTC (permalink / raw)
  To: Lubomir Rintel; +Cc: netdev, David S. Miller, Hannes Frederic Sowa
In-Reply-To: <1414427956-20056-1-git-send-email-lkundrak@v3.sk>

On 10/27/2014 05:39 PM, Lubomir Rintel wrote:
> NetworkManager might want to know that it changed when the router advertisement
> arrives.
>
> Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
> Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Cc: Daniel Borkmann <dborkman@redhat.com>

Looks better, thanks!

Acked-by: Daniel Borkmann <dborkman@redhat.com>

^ permalink raw reply

* Re: [PATCH] ovs: Turn vports with dependencies into separate modules
From: Thomas Graf @ 2014-10-27 21:47 UTC (permalink / raw)
  To: Pravin Shelar; +Cc: dev@openvswitch.org, netdev
In-Reply-To: <CALnjE+opCLA9KJ5RHaUs1vbx41p6=iUi9B59Q7G0TeSTmWm7_w@mail.gmail.com>

On 10/27/14 at 10:14am, Pravin Shelar wrote:
> On Fri, Oct 24, 2014 at 2:57 PM, Thomas Graf <tgraf@suug.ch> wrote:
> > I was refering to how many other kernel APIs have been designed, a
> > registration API allowing a vport to be implemented exclusively in the
> > scope of a single file tends to be cleaner than having to touch multiple
> > files and maintaining an init list.
> >
> This has never been issue in openvswitch. Plus we do not need loadable
> vport module to fix this issue.
> 
> > It also allows for OVS to be built into vmlinuz while vports can
> > remain as modules even if vxlan itself is built as a module.
> >
> 
> What is problem with current OVS built into kernel?

What I mean specifically is the following dependency logic which will
no longer be required:

depends on NET_IPGRE_DEMUX && !(OPENVSWITCH=y && NET_IPGRE_DEMUX=m)

The patch also brings additional flexibility to users of
distributions. Distros typically ship something like an allmodconfig
so a user can either run openvswitch.ko with all encaps compiled in
or not run openvswitch.ko. With vports as module, a user can blacklist
a certain encap type.

Another advantage is obviously that users can run additional vport
types on top of their distribution kernels.

Is there anything specific that you are concerned with in regard
to this proposed change?

^ permalink raw reply

* [PATCH 8/8] netfilter: nft_compat: fix wrong target lookup in nft_target_select_ops()
From: Pablo Neira Ayuso @ 2014-10-27 21:38 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1414445887-5108-1-git-send-email-pablo@netfilter.org>

From: Arturo Borrero <arturo.borrero.glez@gmail.com>

The code looks for an already loaded target, and the correct list to search
is nft_target_list, not nft_match_list.

Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_compat.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 0480f57..9d6d6f6 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -672,7 +672,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 	family = ctx->afi->family;
 
 	/* Re-use the existing target if it's already loaded. */
-	list_for_each_entry(nft_target, &nft_match_list, head) {
+	list_for_each_entry(nft_target, &nft_target_list, head) {
 		struct xt_target *target = nft_target->ops.data;
 
 		if (strcmp(target->name, tg_name) == 0 &&
-- 
1.7.10.4


^ permalink raw reply related

* [PATCH 6/8] netfilter: nfnetlink_log: fix maximum packet length logged to userspace
From: Pablo Neira Ayuso @ 2014-10-27 21:38 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1414445887-5108-1-git-send-email-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

don't try to queue payloads > 0xffff - NLA_HDRLEN, it does not work.
The nla length includes the size of the nla struct, so anything larger
results in u16 integer overflow.

This patch is similar to
9cefbbc9c8f9abe (netfilter: nfnetlink_queue: cleanup copy_range usage).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_log.c |    8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 8117fba..2d02eac3 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -43,7 +43,8 @@
 #define NFULNL_NLBUFSIZ_DEFAULT	NLMSG_GOODSIZE
 #define NFULNL_TIMEOUT_DEFAULT 	100	/* every second */
 #define NFULNL_QTHRESH_DEFAULT 	100	/* 100 packets */
-#define NFULNL_COPY_RANGE_MAX	0xFFFF	/* max packet size is limited by 16-bit struct nfattr nfa_len field */
+/* max packet size is limited by 16-bit struct nfattr nfa_len field */
+#define NFULNL_COPY_RANGE_MAX	(0xFFFF - NLA_HDRLEN)
 
 #define PRINTR(x, args...)	do { if (net_ratelimit()) \
 				     printk(x, ## args); } while (0);
@@ -252,6 +253,8 @@ nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode,
 
 	case NFULNL_COPY_PACKET:
 		inst->copy_mode = mode;
+		if (range == 0)
+			range = NFULNL_COPY_RANGE_MAX;
 		inst->copy_range = min_t(unsigned int,
 					 range, NFULNL_COPY_RANGE_MAX);
 		break;
@@ -679,8 +682,7 @@ nfulnl_log_packet(struct net *net,
 		break;
 
 	case NFULNL_COPY_PACKET:
-		if (inst->copy_range == 0
-		    || inst->copy_range > skb->len)
+		if (inst->copy_range > skb->len)
 			data_len = skb->len;
 		else
 			data_len = inst->copy_range;
-- 
1.7.10.4


^ permalink raw reply related

* [PATCH 5/8] netfilter: nf_log: account for size of NLMSG_DONE attribute
From: Pablo Neira Ayuso @ 2014-10-27 21:38 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1414445887-5108-1-git-send-email-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

We currently neither account for the nlattr size, nor do we consider
the size of the trailing NLMSG_DONE when allocating nlmsg skb.

This can result in nflog to stop working, as __nfulnl_send() re-tries
sending forever if it failed to append NLMSG_DONE (which will never
work if buffer is not large enough).

Reported-by: Houcheng Lin <houcheng@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_log.c |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index b1e3a05..8117fba 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -649,7 +649,8 @@ nfulnl_log_packet(struct net *net,
 		+ nla_total_size(sizeof(u_int32_t))	/* gid */
 		+ nla_total_size(plen)			/* prefix */
 		+ nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
-		+ nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp));
+		+ nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp))
+		+ nla_total_size(sizeof(struct nfgenmsg));	/* NLMSG_DONE */
 
 	if (in && skb_mac_header_was_set(skb)) {
 		size +=   nla_total_size(skb->dev->hard_header_len)
@@ -692,8 +693,7 @@ nfulnl_log_packet(struct net *net,
 		goto unlock_and_release;
 	}
 
-	if (inst->skb &&
-	    size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) {
+	if (inst->skb && size > skb_tailroom(inst->skb)) {
 		/* either the queue len is too high or we don't have
 		 * enough room in the skb left. flush to userspace. */
 		__nfulnl_flush(inst);
-- 
1.7.10.4


^ permalink raw reply related

* [PATCH 7/8] netfilter: nf_log: release skbuff on nlmsg put failure
From: Pablo Neira Ayuso @ 2014-10-27 21:38 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1414445887-5108-1-git-send-email-pablo@netfilter.org>

From: Houcheng Lin <houcheng@gmail.com>

The kernel should reserve enough room in the skb so that the DONE
message can always be appended.  However, in case of e.g. new attribute
erronously not being size-accounted for, __nfulnl_send() will still
try to put next nlmsg into this full skbuf, causing the skb to be stuck
forever and blocking delivery of further messages.

Fix issue by releasing skb immediately after nlmsg_put error and
WARN() so we can track down the cause of such size mismatch.

[ fw@strlen.de: add tailroom/len info to WARN ]

Signed-off-by: Houcheng Lin <houcheng@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_log.c |   17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 2d02eac3..5f1be5b 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -346,26 +346,25 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
 	return skb;
 }
 
-static int
+static void
 __nfulnl_send(struct nfulnl_instance *inst)
 {
-	int status = -1;
-
 	if (inst->qlen > 1) {
 		struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0,
 						 NLMSG_DONE,
 						 sizeof(struct nfgenmsg),
 						 0);
-		if (!nlh)
+		if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n",
+			      inst->skb->len, skb_tailroom(inst->skb))) {
+			kfree_skb(inst->skb);
 			goto out;
+		}
 	}
-	status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
-				   MSG_DONTWAIT);
-
+	nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
+			  MSG_DONTWAIT);
+out:
 	inst->qlen = 0;
 	inst->skb = NULL;
-out:
-	return status;
 }
 
 static void
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 4/8] bridge: Do not compile options in br_parse_ip_options
From: Pablo Neira Ayuso @ 2014-10-27 21:38 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1414445887-5108-1-git-send-email-pablo@netfilter.org>

From: Herbert Xu <herbert@gondor.apana.org.au>

Commit 462fb2af9788a82a534f8184abfde31574e1cfa0

	bridge : Sanitize skb before it enters the IP stack

broke when IP options are actually used because it mangles the
skb as if it entered the IP stack which is wrong because the
bridge is supposed to operate below the IP stack.

Since nobody has actually requested for parsing of IP options
this patch fixes it by simply reverting to the previous approach
of ignoring all IP options, i.e., zeroing the IPCB.

If and when somebody who uses IP options and actually needs them
to be parsed by the bridge complains then we can revisit this.

Reported-by: David Newall <davidn@davidnewall.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/br_netfilter.c |   24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 1bada53..1a4f32c 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -192,7 +192,6 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
 
 static int br_parse_ip_options(struct sk_buff *skb)
 {
-	struct ip_options *opt;
 	const struct iphdr *iph;
 	struct net_device *dev = skb->dev;
 	u32 len;
@@ -201,7 +200,6 @@ static int br_parse_ip_options(struct sk_buff *skb)
 		goto inhdr_error;
 
 	iph = ip_hdr(skb);
-	opt = &(IPCB(skb)->opt);
 
 	/* Basic sanity checks */
 	if (iph->ihl < 5 || iph->version != 4)
@@ -227,23 +225,11 @@ static int br_parse_ip_options(struct sk_buff *skb)
 	}
 
 	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
-	if (iph->ihl == 5)
-		return 0;
-
-	opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
-	if (ip_options_compile(dev_net(dev), opt, skb))
-		goto inhdr_error;
-
-	/* Check correct handling of SRR option */
-	if (unlikely(opt->srr)) {
-		struct in_device *in_dev = __in_dev_get_rcu(dev);
-		if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev))
-			goto drop;
-
-		if (ip_options_rcv_srr(skb))
-			goto drop;
-	}
-
+	/* We should really parse IP options here but until
+	 * somebody who actually uses IP options complains to
+	 * us we'll just silently ignore the options because
+	 * we're lazy!
+	 */
 	return 0;
 
 inhdr_error:
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 3/8] netfilter: nf_tables: check for NULL in nf_tables_newchain pcpu stats allocation
From: Pablo Neira Ayuso @ 2014-10-27 21:38 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1414445887-5108-1-git-send-email-pablo@netfilter.org>

From: Sabrina Dubroca <sd@queasysnail.net>

alloc_percpu returns NULL on failure, not a negative error code.

Fixes: ff3cd7b3c922 ("netfilter: nf_tables: refactor chain statistic routines")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 65eb2a1..11ab4b0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1328,10 +1328,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 			basechain->stats = stats;
 		} else {
 			stats = netdev_alloc_pcpu_stats(struct nft_stats);
-			if (IS_ERR(stats)) {
+			if (stats == NULL) {
 				module_put(type->owner);
 				kfree(basechain);
-				return PTR_ERR(stats);
+				return -ENOMEM;
 			}
 			rcu_assign_pointer(basechain->stats, stats);
 		}
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 2/8] netfilter: ipset: off by one in ip_set_nfnl_get_byindex()
From: Pablo Neira Ayuso @ 2014-10-27 21:38 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1414445887-5108-1-git-send-email-pablo@netfilter.org>

From: Dan Carpenter <dan.carpenter@oracle.com>

The ->ip_set_list[] array is initialized in ip_set_net_init() and it
has ->ip_set_max elements so this check should be >= instead of >
otherwise we are off by one.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_core.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 912e5a0..86f9d76 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -659,7 +659,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
 	struct ip_set *set;
 	struct ip_set_net *inst = ip_set_pernet(net);
 
-	if (index > inst->ip_set_max)
+	if (index >= inst->ip_set_max)
 		return IPSET_INVALID_ID;
 
 	nfnl_lock(NFNL_SUBSYS_IPSET);
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 1/8] netfilter: nf_conntrack: allow server to become a client in TW handling
From: Pablo Neira Ayuso @ 2014-10-27 21:38 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1414445887-5108-1-git-send-email-pablo@netfilter.org>

From: Marcelo Leitner <mleitner@redhat.com>

When a port that was used to listen for inbound connections gets closed
and reused for outgoing connections (like rsh ends up doing for stderr
flow), current we may reject the SYN/ACK packet for the new connection
because tcp_conntracks states forbirds a port to become a client while
there is still a TIME_WAIT entry in there for it.

As TCP may expire the TIME_WAIT socket in 60s and conntrack's timeout
for it is 120s, there is a ~60s window that the application can end up
opening a port that conntrack will end up blocking.

This patch fixes this by simply allowing such state transition: if we
see a SYN, in TIME_WAIT state, on REPLY direction, move it to sSS. Note
that the rest of the code already handles this situation, more
specificly in tcp_packet(), first switch clause.

Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Acked-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_proto_tcp.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 44d1ea3..d87b642 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -213,7 +213,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
 	{
 /* REPLY */
 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
-/*syn*/	   { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
+/*syn*/	   { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
 /*
  *	sNO -> sIV	Never reached.
  *	sSS -> sS2	Simultaneous open
@@ -223,7 +223,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *	sFW -> sIV
  *	sCW -> sIV
  *	sLA -> sIV
- *	sTW -> sIV	Reopened connection, but server may not do it.
+ *	sTW -> sSS	Reopened connection, but server may have switched role
  *	sCL -> sIV
  */
 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 0/8] Netfilter fixes for net
From: Pablo Neira Ayuso @ 2014-10-27 21:37 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev

Hi David,

The following patchset contains Netfilter fixes for your net tree,
they are:

1) Allow to recycle a TCP port in conntrack when the change role from
   server to client, from Marcelo Leitner.

2) Fix possible off by one access in ip_set_nfnl_get_byindex(), patch
   from Dan Carpenter.

3) alloc_percpu returns NULL on error, no need for IS_ERR() in nf_tables
   chain statistic updates. From Sabrina Dubroca.

4) Don't compile ip options in bridge netfilter, this mangles the packet
   and bridge should not alter layer >= 3 headers when forwarding packets.
   Patch from Herbert Xu and tested by Florian Westphal.

5) Account the final NLMSG_DONE message when calculating the size of the
   nflog netlink batches. Patch from Florian Westphal.

6) Fix a possible netlink attribute length overflow with large packets.
   Again from Florian Westphal.

7) Release the skbuff if nfnetlink_log fails to put the final
   NLMSG_DONE message. This fixes a leak on error. This shouldn't ever
   happen though, otherwise this means we miscalculate the netlink batch
   size, so spot a warning if this ever happens so we can track down the
   problem. This patch from Houcheng Lin.

8) Look at the right list when recycling targets in the nft_compat,
   patch from Arturo Borrero.

You can pull these changes from:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git

Thanks!

----------------------------------------------------------------

The following changes since commit 7c1c97d54f9bfc810908d3903cb8bcacf734df18:

  net: sched: initialize bstats syncp (2014-10-21 21:45:21 -0400)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git master

for you to fetch changes up to 7965ee93719921ea5978f331da653dfa2d7b99f5:

  netfilter: nft_compat: fix wrong target lookup in nft_target_select_ops() (2014-10-27 22:17:46 +0100)

----------------------------------------------------------------
Arturo Borrero (1):
      netfilter: nft_compat: fix wrong target lookup in nft_target_select_ops()

Dan Carpenter (1):
      netfilter: ipset: off by one in ip_set_nfnl_get_byindex()

Florian Westphal (2):
      netfilter: nf_log: account for size of NLMSG_DONE attribute
      netfilter: nfnetlink_log: fix maximum packet length logged to userspace

Herbert Xu (1):
      bridge: Do not compile options in br_parse_ip_options

Houcheng Lin (1):
      netfilter: nf_log: release skbuff on nlmsg put failure

Marcelo Leitner (1):
      netfilter: nf_conntrack: allow server to become a client in TW handling

Sabrina Dubroca (1):
      netfilter: nf_tables: check for NULL in nf_tables_newchain pcpu stats allocation

 net/bridge/br_netfilter.c              |   24 +++++-------------------
 net/netfilter/ipset/ip_set_core.c      |    2 +-
 net/netfilter/nf_conntrack_proto_tcp.c |    4 ++--
 net/netfilter/nf_tables_api.c          |    4 ++--
 net/netfilter/nfnetlink_log.c          |   31 ++++++++++++++++---------------
 net/netfilter/nft_compat.c             |    2 +-
 6 files changed, 27 insertions(+), 40 deletions(-)

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox