Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 0/4] Introduce and use printk pointer extension %pV
From: Joe Perches @ 2010-06-27 11:02 UTC (permalink / raw)
  To: Andrew Morton, David Miller; +Cc: linux-kernel, netdev

Recursive printk can reduce the total image size of an x86 defconfig about 1% 
by reducing duplicated KERN_<level> strings and centralizing the functions
used by macros in new separate functions.

Joe Perches (4):
  vsprintf: Recursive vsnprintf: Add "%pV", struct va_format
  device.h drivers/base/core.c Convert dev_<level> logging macros to functions
  netdevice.h net/core/dev.c: Convert netdev_<level> logging macros to functions
  netdevice.h: Change netif_<level> macros to call netdev_<level> functions

 drivers/base/core.c       |   64 +++++++++++++++++++++++++
 include/linux/device.h    |  112 ++++++++++++++++++++++++++++++++++----------
 include/linux/kernel.h    |    5 ++
 include/linux/netdevice.h |   56 ++++++++++++----------
 lib/vsprintf.c            |    9 ++++
 net/core/dev.c            |   62 +++++++++++++++++++++++++
 6 files changed, 256 insertions(+), 52 deletions(-)


^ permalink raw reply

* [PATCH 4/4] netdevice.h: Change netif_<level> macros to call netdev_<level> functions
From: Joe Perches @ 2010-06-27 11:02 UTC (permalink / raw)
  To: Andrew Morton, David Miller; +Cc: linux-kernel, netdev
In-Reply-To: <cover.1277636090.git.joe@perches.com>

Reduces text ~300 bytes of text (woohoo!) in an x86 defconfig

$ size vmlinux*
   text	   data	    bss	    dec	    hex	filename
7198526	 720112	1366288	9284926	 8dad3e	vmlinux
7198862	 720112	1366288	9285262	 8dae8e	vmlinux.netdev

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/linux/netdevice.h |   20 +++++++++++++-------
 1 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7f3197d..489a612 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2291,20 +2291,26 @@ do {					  			\
 		netdev_printk(level, (dev), fmt, ##args);	\
 } while (0)
 
+#define netif_level(level, priv, type, dev, fmt, args...)	\
+do {								\
+	if (netif_msg_##type(priv))				\
+		netdev_##level(dev, fmt, ##args);		\
+} while (0)
+
 #define netif_emerg(priv, type, dev, fmt, args...)		\
-	netif_printk(priv, type, KERN_EMERG, dev, fmt, ##args)
+	netif_level(emerg, priv, type, dev, fmt, ##args)
 #define netif_alert(priv, type, dev, fmt, args...)		\
-	netif_printk(priv, type, KERN_ALERT, dev, fmt, ##args)
+	netif_level(alert, priv, type, dev, fmt, ##args)
 #define netif_crit(priv, type, dev, fmt, args...)		\
-	netif_printk(priv, type, KERN_CRIT, dev, fmt, ##args)
+	netif_level(crit, priv, type, dev, fmt, ##args)
 #define netif_err(priv, type, dev, fmt, args...)		\
-	netif_printk(priv, type, KERN_ERR, dev, fmt, ##args)
+	netif_level(err, priv, type, dev, fmt, ##args)
 #define netif_warn(priv, type, dev, fmt, args...)		\
-	netif_printk(priv, type, KERN_WARNING, dev, fmt, ##args)
+	netif_level(warn, priv, type, dev, fmt, ##args)
 #define netif_notice(priv, type, dev, fmt, args...)		\
-	netif_printk(priv, type, KERN_NOTICE, dev, fmt, ##args)
+	netif_level(notice, priv, type, dev, fmt, ##args)
 #define netif_info(priv, type, dev, fmt, args...)		\
-	netif_printk(priv, type, KERN_INFO, (dev), fmt, ##args)
+	netif_level(info, priv, type, dev, fmt, ##args)
 
 #if defined(DEBUG)
 #define netif_dbg(priv, type, dev, format, args...)		\
-- 
1.7.1.337.g6068.dirty


^ permalink raw reply related

* [PATCH RFC] vhost-net: add dhclient work-around from userspace
From: Michael S. Tsirkin @ 2010-06-27 15:46 UTC (permalink / raw)
  To: Sridhar Samudrala, David S. Miller, Arnd Bergmann,
	Paul E. McKenney, kvm
In-Reply-To: <20100626.200320.43025947.davem@davemloft.net>

Userspace virtio server has the following hack
so guests rely on it, and we have to replicate it, too:

use source port to detect incoming IPv4 DHCP response packets,
and fill in the checksum for these.

The issue we are solving is that on linux guests, some apps
that use recvmsg with AF_PACKET sockets, don't know how to
handle CHECKSUM_PARTIAL;
The interface to return the relevant information was added
in 8dc4194474159660d7f37c495e3fc3f10d0db8cc,
and older userspace does not use it.
One important user of recvmsg with AF_PACKET is dhclient,
so we add a work-around just for DHCP.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---

So here's what I came up with: I basically copied
the work-around from userspace virtio.
As suggested by Dave (assuming I understood the suggestion
correctly) this implements the workaround in vhost-net, so
other tun users don't start relying on it.
Untested.

 drivers/vhost/net.c |   42 +++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 41 insertions(+), 1 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 54096ee..9ed4051 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -25,6 +25,10 @@
 #include <linux/if_tun.h>
 #include <linux/if_macvlan.h>
 
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/netdevice.h>
+
 #include <net/sock.h>
 
 #include "vhost.h"
@@ -191,6 +195,42 @@ static void handle_tx(struct vhost_net *net)
 	unuse_mm(net->dev.mm);
 }
 
+static int peek_head(struct sock *sk)
+{
+	struct sk_buff *head;
+	int ret;
+
+	lock_sock(sk);
+	head = skb_peek(&sk->sk_receive_queue);
+	if (likely(head)) {
+		ret = 1;
+		/* Userspace virtio server has the following hack so
+		 * guests rely on it, and we have to replicate it, too: */
+		/* On linux guests, some apps that use recvmsg with AF_PACKET
+		 * sockets, don't know how to handle CHECKSUM_PARTIAL;
+		 * The interface to return the relevant information was added in
+		 * 8dc4194474159660d7f37c495e3fc3f10d0db8cc,
+		 * and older userspace does not use it.
+		 * One important user of recvmsg with AF_PACKET is dhclient,
+		 * so we add a work-around just for DHCP. */
+		/* We use source port to detect DHCP packets. */
+		if (skb->ip_summed == CHECKSUM_PARTIAL &&
+		    skb->protocol == htons(ETH_P_IP) &&
+		    skb_network_header_len(skb) >= sizeof(struct iphdr) &&
+		    ip_hdr(skb)->protocol == IPPRODO_UDP &&
+		    skb_headlen(skb) >= skb_transport_offset(skb) + sizeof(struct udphdr) &&
+		    udp_hdr(skb)->source == htons(0x67)) {
+			skb_checksum_help(skb);
+			/* Restore ip_summed value: tun passes it to user. */
+			skb->ip_summed = CHECKSUM_PARTIAL;
+		}
+	} else {
+		ret = 0;
+	}
+	release_sock(sk);
+	return len;
+}
+
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
 static void handle_rx(struct vhost_net *net)
@@ -228,7 +268,7 @@ static void handle_rx(struct vhost_net *net)
 	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
 		vq->log : NULL;
 
-	for (;;) {
+	while (peek_head(sock)) {
 		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
 					 ARRAY_SIZE(vq->iov),
 					 &out, &in,
-- 
1.7.1.12.g42b7f

^ permalink raw reply related

* Re: [PATCH] cpmac: do not leak struct net_device on phy_connect errors
From: Florian Fainelli @ 2010-06-27 17:05 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <201006211007.49039.florian@openwrt.org>

Hi David,

Forgot to mention that this is relevant for -stable and current net-next-2.6. Thanks!

Le Monday 21 June 2010 10:07:48,  Fainelli a écrit :
> If the call to phy_connect fails, we will return directly instead of
> freeing the previously allocated struct net_device.
> 
> Signed-off-by: Florian Fainelli <florian@openwrt.org>
> CC: stable@kernel.org
> ---
> diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
> index 3c58db5..23786ee 100644
> --- a/drivers/net/cpmac.c
> +++ b/drivers/net/cpmac.c
> @@ -1181,7 +1181,8 @@ static int __devinit cpmac_probe(struct
> platform_device *pdev) if (netif_msg_drv(priv))
>  			printk(KERN_ERR "%s: Could not attach to PHY\n",
>  			       dev->name);
> -		return PTR_ERR(priv->phy);
> +		rc = PTR_ERR(priv->phy);
> +		goto fail;
>  	}
> 
>  	if ((rc = register_netdev(dev))) {


^ permalink raw reply

* [REGRESSION] e1000e stopped working
From: Maxim Levitsky @ 2010-06-27 17:27 UTC (permalink / raw)
  To: netdev@vger.kernel.org

Just that,

It doesn't receive anything from my internet router during DHCP.


00:19.0 Ethernet controller [0200]: Intel Corporation 82566DC Gigabit Network Connection [8086:104b] (rev 02)
	Subsystem: Intel Corporation Device [8086:0001]
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0
	Interrupt: pin A routed to IRQ 47
	Region 0: Memory at 50300000 (32-bit, non-prefetchable) [size=128K]
	Region 1: Memory at 50324000 (32-bit, non-prefetchable) [size=4K]
	Region 2: I/O ports at 30e0 [size=32]
	Capabilities: [c8] Power Management version 2
		Flags: PMEClk- DSI+ D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
		Status: D0 PME-Enable- DSel=0 DScale=1 PME-
	Capabilities: [d0] Message Signalled Interrupts: Mask- 64bit+ Queue=0/0 Enable+
		Address: 00000000fee0100c  Data: 41c9
	Kernel driver in use: e1000e
	Kernel modules: e1000e

I use vanilla tree, commit bf2937695fe2330bfd8933a2310e7bdd2581dc2e


Best regards,
	Maxim Levitsky


^ permalink raw reply

* Re: [REGRESSION] e1000e stopped working
From: Maxim Levitsky @ 2010-06-27 17:29 UTC (permalink / raw)
  To: netdev@vger.kernel.org
In-Reply-To: <1277659633.2989.2.camel@localhost.localdomain>

On Sun, 2010-06-27 at 20:27 +0300, Maxim Levitsky wrote:
> Just that,
> 
> It doesn't receive anything from my internet router during DHCP.
> 
> 
> 00:19.0 Ethernet controller [0200]: Intel Corporation 82566DC Gigabit Network Connection [8086:104b] (rev 02)
> 	Subsystem: Intel Corporation Device [8086:0001]
> 	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
> 	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
> 	Latency: 0
> 	Interrupt: pin A routed to IRQ 47
> 	Region 0: Memory at 50300000 (32-bit, non-prefetchable) [size=128K]
> 	Region 1: Memory at 50324000 (32-bit, non-prefetchable) [size=4K]
> 	Region 2: I/O ports at 30e0 [size=32]
> 	Capabilities: [c8] Power Management version 2
> 		Flags: PMEClk- DSI+ D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
> 		Status: D0 PME-Enable- DSel=0 DScale=1 PME-
> 	Capabilities: [d0] Message Signalled Interrupts: Mask- 64bit+ Queue=0/0 Enable+
> 		Address: 00000000fee0100c  Data: 41c9
> 	Kernel driver in use: e1000e
> 	Kernel modules: e1000e
> 
> I use vanilla tree, commit bf2937695fe2330bfd8933a2310e7bdd2581dc2e
> 
> 
> Best regards,
> 	Maxim Levitsky
> 

It appears to work now after reboot.
Will keep a look for this.

Disregard for now.

Best regards,
	Maxim Levitsky


^ permalink raw reply

* [GIT] Networking
From: David Miller @ 2010-06-27 17:30 UTC (permalink / raw)
  To: torvalds; +Cc: akpm, netdev, linux-kernel


Here are the networking bug fixes that have accumulated while you were away,
most notably:

1) Blind ipsec policy dereference leads to OOPS, fixed by Timo Teräs.

2) TEQL devices should not set IFF_XMIT_DST_RELEASE since it is important
   to allow the SKB's dst to get referenced post transmit.  Fix from
   Tom Hughes.

3) ipv6 proxy neighbor discovery regression leads to OOPS, fix from
   Stephen Hemminger.

4) RX timestamping improperly programmed in gianfar, breaking some families
   of cards.  Regression from 2.6.34, fix from Manfred Rudigier.

5) New ID for pcnet_cs and fix for simultaneous use of lan and modem cards
   of smc91c92_cs chips, from Ken Kawasaki.

6) Loopback delivery regression fix from John Fastabend, this hit a lot of
   folks.

7) deliver_clone in bridging can crash on OOM, fix from Herbert Xu.

8) UDP Fragment Offload generates illegal packets, also from Herbert Xu.

9) Fix netdev_mc_count() conversion in lasi82596 driver.  From Helge Deller.
   Similar fix to bluetooth driver from Gustavo F. Padovan.

10) ISDN/gigaset bug fixes via Tilman Schmidt.

11) Fix rewriting erroneously rxhash in __copy_skb_header().

12) sky2_phy_reinit() can leave RX/TX disabled, fix from Brandon Philips.
    This cures a hang hit by several users.

13) netxen bug fixes from Amit Kumar Salecha, memory leaks, and improper
    programming of caching window register.

14) A couple wireless one-liners via John Linville and the wireless crew.

Please pull, thanks a lot!

The following changes since commit 7e27d6e778cd87b6f2415515d7127eba53fe5d02:
  Linus Torvalds (1):
        Linux 2.6.35-rc3

are available in the git repository at:

  master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6.git master

Amit Kumar Salecha (3):
      netxen: fix memory leaks in error path
      netxen: fix rcv buffer leak
      netxen: fix caching window register

Andy Gospodarek (1):
      ixgbe: fix automatic LRO/RSC settings for low latency

Anirban Chakraborty (1):
      net: add dependency on fw class module to qlcnic and netxen_nic

Anton Vorontsov (1):
      gianfar: Fix oversized packets handling

Bob Copeland (1):
      ath5k: initialize ah->ah_current_channel

Brandon Philips (1):
      sky2: enable rx/tx in sky2_phy_reinit()

Christoph Fritz (1):
      mac80211: fix warn, enum may be used uninitialized

David S. Miller (3):
      Merge branch 'master' of git://git.kernel.org/.../kaber/nf-2.6
      Merge branch 'master' of git://git.kernel.org/.../linville/wireless-2.6
      Merge branch 'master' of git://git.kernel.org/.../linville/wireless-2.6

David Woodhouse (1):
      phylib: Add autoload support for the LXT973 phy.

Dmitry Kravkov (1):
      cnic: Disable statistics initialization for eth clients that do not support statistics

Dominik Brodowski (1):
      pcmcia: dev_node removal bugfix

Don Skidmore (2):
      ixgbe: fix for race with 8259(8|9) during shutdown
      ixgbe: add comment on SFP+ ID for Active DA

Eric Dumazet (2):
      net: rxhash already set in __copy_skb_header
      snmp: fix SNMP_ADD_STATS()

FUJITA Tomonori (1):
      bnx2: fix dma_get_ops compilation breakage

Filip Aben (1):
      hso: remove setting of low_latency flag

Florian Fainelli (1):
      cpmac: do not leak struct net_device on phy_connect errors

Gustavo F. Padovan (1):
      Bluetooth: Bring back var 'i' increment

Helge Deller (1):
      lasi82596: fix netdev_mc_count conversion

Herbert Xu (2):
      bridge: Fix OOM crash in deliver_clone
      udp: Fix bogus UFO packet generation

Jan-Bernd Themann (2):
      ehea: fix delayed packet processing
      ehea: Fix kernel deadlock in DLPAR-mem processing

Jiri Slaby (1):
      ISDN: hysdn, fix potential NULL dereference

Joe Perches (1):
      e1000: Fix message logging defect

Joerg Albert (1):
      p54pci: add Symbol AP-300 minipci adapters pciid

John Fastabend (1):
      net: fix deliver_no_wcard regression on loopback device

John W. Linville (1):
      iwlwifi: cancel scan watchdog in iwl_bg_abort_scan

Ken Kawasaki (2):
      pcnet_cs: add new id (TOSHIBA Modem/LAN Card)
      smc91c92_cs: fix the problem that lan & modem does not work simultaneously

Manfred Rudigier (1):
      gianfar: Fix setup of RX time stamping

Michal Schmidt (1):
      vxge: fix memory leak in vxge_alloc_msix() error path

Prarit Bhargava (1):
      libertas_tf: Fix warning in lbtf_rx for stats struct

Ralf Baechle (1):
      NET: MIPSsim: Fix modpost warning.

Randy Dunlap (1):
      enic: fix pci_alloc_consistent argument

Reinette Chatre (1):
      iwlwifi: serialize station management actions

Sergey Matyukevich (1):
      ucc_geth: fix for RX skb buffers recycling

Shanyu Zhao (1):
      iwlagn: verify flow id in compressed BA packet

Sven Wegener (1):
      ipvs: Add missing locking during connection table hashing and unhashing

Tilman Schmidt (5):
      isdn/gigaset: honor CAPI application's buffer size request
      isdn/gigaset: correct CAPI voice connection encoding
      isdn/gigaset: correct CAPI DATA_B3 Delivery Confirmation
      isdn/gigaset: encode HLC and BC together
      isdn/gigaset: correct CAPI connection state storage

Tim Gardner (1):
      hostap: Protect against initialization interrupt

Timo Teräs (1):
      xfrm: check bundle policy existance before dereferencing it

Tom Hughes (1):
      Clear IFF_XMIT_DST_RELEASE for teql interfaces

Zhu Yi (1):
      wireless: orphan ipw2x00 drivers

stephen hemminger (2):
      bridge: fdb cleanup runs too often
      ipv6: fix NULL reference in proxy neighbor discovery

 MAINTAINERS                                 |   10 +-
 drivers/isdn/gigaset/asyncdata.c            |   44 +---
 drivers/isdn/gigaset/capi.c                 |  405 +++++++++++++++++++--------
 drivers/isdn/gigaset/common.c               |   36 +--
 drivers/isdn/gigaset/ev-layer.c             |    4 +-
 drivers/isdn/gigaset/gigaset.h              |   38 ++-
 drivers/isdn/gigaset/i4l.c                  |   21 ++
 drivers/isdn/gigaset/isocdata.c             |   72 ++---
 drivers/isdn/hysdn/hysdn_net.c              |    3 +-
 drivers/net/Kconfig                         |    2 +
 drivers/net/bnx2.c                          |   11 +-
 drivers/net/cnic.c                          |   55 +++--
 drivers/net/cpmac.c                         |    3 +-
 drivers/net/e1000/e1000_main.c              |   17 +-
 drivers/net/ehea/ehea.h                     |    2 +-
 drivers/net/ehea/ehea_main.c                |    9 +-
 drivers/net/enic/vnic_dev.c                 |    2 +-
 drivers/net/gianfar.c                       |   25 ++-
 drivers/net/ixgbe/ixgbe_ethtool.c           |   37 +--
 drivers/net/ixgbe/ixgbe_main.c              |    4 +
 drivers/net/ixgbe/ixgbe_phy.c               |    2 +
 drivers/net/lib82596.c                      |    2 +-
 drivers/net/mipsnet.c                       |    2 +-
 drivers/net/netxen/netxen_nic_ctx.c         |    3 +-
 drivers/net/netxen/netxen_nic_hw.c          |    4 -
 drivers/net/netxen/netxen_nic_init.c        |   13 +-
 drivers/net/pcmcia/pcnet_cs.c               |    1 +
 drivers/net/pcmcia/smc91c92_cs.c            |   19 +-
 drivers/net/phy/lxt.c                       |    1 +
 drivers/net/sky2.c                          |   19 +-
 drivers/net/ucc_geth.c                      |    2 +
 drivers/net/usb/hso.c                       |    1 -
 drivers/net/vxge/vxge-main.c                |   29 ++-
 drivers/net/wireless/ath/ath5k/attach.c     |    1 +
 drivers/net/wireless/hostap/hostap_cs.c     |   15 +-
 drivers/net/wireless/hostap/hostap_hw.c     |   13 +
 drivers/net/wireless/hostap/hostap_wlan.h   |    2 +-
 drivers/net/wireless/iwlwifi/iwl-agn-tx.c   |    5 +
 drivers/net/wireless/iwlwifi/iwl-agn.c      |    8 +-
 drivers/net/wireless/iwlwifi/iwl-scan.c     |    1 +
 drivers/net/wireless/iwlwifi/iwl-sta.c      |    4 +
 drivers/net/wireless/iwlwifi/iwl3945-base.c |    9 +-
 drivers/net/wireless/libertas_tf/main.c     |    2 +-
 drivers/net/wireless/p54/p54pci.c           |    2 +
 drivers/serial/serial_cs.c                  |    1 +
 include/net/snmp.h                          |    2 +-
 net/bluetooth/bnep/netdev.c                 |    2 +
 net/bridge/br_fdb.c                         |    6 +-
 net/bridge/br_forward.c                     |    4 +-
 net/core/skbuff.c                           |    2 +-
 net/ipv4/ip_output.c                        |    9 +-
 net/ipv6/ndisc.c                            |    2 +-
 net/mac80211/work.c                         |    2 +-
 net/netfilter/ipvs/ip_vs_conn.c             |    4 +
 net/sched/sch_teql.c                        |    1 +
 net/xfrm/xfrm_policy.c                      |    3 +-
 56 files changed, 618 insertions(+), 380 deletions(-)

^ permalink raw reply

* Re: [REGRESSION] e1000e stopped working
From: Maxim Levitsky @ 2010-06-27 17:43 UTC (permalink / raw)
  To: netdev@vger.kernel.org
In-Reply-To: <1277659785.4028.1.camel@localhost.localdomain>

On Sun, 2010-06-27 at 20:29 +0300, Maxim Levitsky wrote:
> On Sun, 2010-06-27 at 20:27 +0300, Maxim Levitsky wrote:
> > Just that,
> > 
> > It doesn't receive anything from my internet router during DHCP.
> > 
> > 
> > 00:19.0 Ethernet controller [0200]: Intel Corporation 82566DC Gigabit Network Connection [8086:104b] (rev 02)
> > 	Subsystem: Intel Corporation Device [8086:0001]
> > 	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
> > 	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
> > 	Latency: 0
> > 	Interrupt: pin A routed to IRQ 47
> > 	Region 0: Memory at 50300000 (32-bit, non-prefetchable) [size=128K]
> > 	Region 1: Memory at 50324000 (32-bit, non-prefetchable) [size=4K]
> > 	Region 2: I/O ports at 30e0 [size=32]
> > 	Capabilities: [c8] Power Management version 2
> > 		Flags: PMEClk- DSI+ D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
> > 		Status: D0 PME-Enable- DSel=0 DScale=1 PME-
> > 	Capabilities: [d0] Message Signalled Interrupts: Mask- 64bit+ Queue=0/0 Enable+
> > 		Address: 00000000fee0100c  Data: 41c9
> > 	Kernel driver in use: e1000e
> > 	Kernel modules: e1000e
> > 
> > I use vanilla tree, commit bf2937695fe2330bfd8933a2310e7bdd2581dc2e
> > 
> > 
> > Best regards,
> > 	Maxim Levitsky
> > 
> 
> It appears to work now after reboot.
> Will keep a look for this.
> 
> Disregard for now.


Just s2ram cycle, problem is back.
Did full reboot (power off then on), same thing card doesn't work...


>Best regards,
 	Maxim Levitsky
 


^ permalink raw reply

* Re: [REGRESSION] e1000e stopped working
From: Maxim Levitsky @ 2010-06-27 17:47 UTC (permalink / raw)
  To: netdev@vger.kernel.org
In-Reply-To: <1277660638.3321.1.camel@localhost.localdomain>

On Sun, 2010-06-27 at 20:43 +0300, Maxim Levitsky wrote:
> On Sun, 2010-06-27 at 20:29 +0300, Maxim Levitsky wrote:
> > On Sun, 2010-06-27 at 20:27 +0300, Maxim Levitsky wrote:
> > > Just that,
> > > 
> > > It doesn't receive anything from my internet router during DHCP.
> > > 
> > > 
> > > 00:19.0 Ethernet controller [0200]: Intel Corporation 82566DC Gigabit Network Connection [8086:104b] (rev 02)
> > > 	Subsystem: Intel Corporation Device [8086:0001]
> > > 	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
> > > 	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
> > > 	Latency: 0
> > > 	Interrupt: pin A routed to IRQ 47
> > > 	Region 0: Memory at 50300000 (32-bit, non-prefetchable) [size=128K]
> > > 	Region 1: Memory at 50324000 (32-bit, non-prefetchable) [size=4K]
> > > 	Region 2: I/O ports at 30e0 [size=32]
> > > 	Capabilities: [c8] Power Management version 2
> > > 		Flags: PMEClk- DSI+ D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
> > > 		Status: D0 PME-Enable- DSel=0 DScale=1 PME-
> > > 	Capabilities: [d0] Message Signalled Interrupts: Mask- 64bit+ Queue=0/0 Enable+
> > > 		Address: 00000000fee0100c  Data: 41c9
> > > 	Kernel driver in use: e1000e
> > > 	Kernel modules: e1000e
> > > 
> > > I use vanilla tree, commit bf2937695fe2330bfd8933a2310e7bdd2581dc2e
> > > 
> > > 
> > > Best regards,
> > > 	Maxim Levitsky
> > > 
> > 
> > It appears to work now after reboot.
> > Will keep a look for this.
> > 
> > Disregard for now.
> 
> 
> Just s2ram cycle, problem is back.
> Did full reboot (power off then on), same thing card doesn't work...
> 

Yep, s2ram sometimes 'fixes', sometimes breaks the card.
Something got broken in device initialization path.

Best regards,
 	Maxim Levitsky
 



^ permalink raw reply

* [PATCH] drivers/net/Makefile: conditionally descend to wireless
From: Nicolas Kaiser @ 2010-06-27 21:44 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, linux-kbuild, linux-kernel

Don't descend to wireless unless it is actually used.

Signed-off-by: Nicolas Kaiser <nikai@nikai.net>
---
 drivers/net/Makefile |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 0a0512a..9715c95 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -275,7 +275,7 @@ obj-$(CONFIG_USB_USBNET)        += usb/
 obj-$(CONFIG_USB_ZD1201)        += usb/
 obj-$(CONFIG_USB_IPHETH)        += usb/
 
-obj-y += wireless/
+obj-$(CONFIG_WLAN) += wireless/
 obj-$(CONFIG_NET_TULIP) += tulip/
 obj-$(CONFIG_HAMRADIO) += hamradio/
 obj-$(CONFIG_IRDA) += irda/
-- 
1.7.1

^ permalink raw reply related

* Re: Reviewing batman-adv for net/
From: Hagen Paul Pfeifer @ 2010-06-27 22:27 UTC (permalink / raw)
  To: Sven Eckelmann; +Cc: David S. Miller, netdev, b.a.t.m.a.n
In-Reply-To: <201006260214.06662.sven.eckelmann@gmx.de>

* Sven Eckelmann | 2010-06-26 02:14:05 [+0200]:

>batman-adv is a meshing protocol currently in drivers/staging/batman-adv. We 
>asked GregKH recently [1] if he thinks that it is ok to send a patch to you 
>for review. He acknowledged that request and now we would like to ask you to 
>review it.
>
>I will send a patch referencing this mail. I hope you find time to send us 
>your opinion and ideas.

Hello Sven,

o Where are the advantages of BATMAN compared to OLSR? OLSR is standardized
  by the IETF which is a great advantage compared to BATMAN. 20 byte IPv4
	header respective 60 byte IPv6 header savings? Network protocol neutrality?
	Where are the differences between BATMAN and 802.11s? Where are the
	similarities? See commit 37c5798968d0ce4 of 802.11s

o Are there any publications which describe the protocol behavior in detail?
  I assume it is a pro-active protocol - similar to standard BATMAN -
	but what about neighbor discovery, TC, HNA and the like? What about multicast
	transport? It is difficult to rate the patchset if no one knowns what the
	intended behavior is.

o The major code is twisted about bookkeeping stuff, configuration aspects and
  so on. The minor codebase is about protocol processing. What about a
	generalized architecture and a userspace implementation of the protocol? And
	communication via netlink sockets.


Cheers, Hagen

^ permalink raw reply

* Re: Reviewing batman-adv for net/
From: Marek Lindner @ 2010-06-27 23:15 UTC (permalink / raw)
  To: b.a.t.m.a.n-ZwoEplunGu2X36UT3dwllkB+6BGkLq7r, David S. Miller
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, Hagen Paul Pfeifer
In-Reply-To: <20100627222706.GC8285@nuttenaction>

Hi Hagen,

> o Where are the advantages of BATMAN compared to OLSR? OLSR is standardized
>   by the IETF which is a great advantage compared to BATMAN. 20 byte IPv4
> 	header respective 60 byte IPv6 header savings? Network protocol
> neutrality? Where are the differences between BATMAN and 802.11s? Where
> are the similarities? See commit 37c5798968d0ce4 of 802.11s

the OLSR as standardized by the IETF is known to be flawed when used outside of 
a simulator (even the IETF Manet people know this - I spoke with one of them). 
We have assembled a few documents explaining some of its weaknesses on our 
website (www.open-mesh.org) but I suggest you get in touch with the folks of 
www.olsr.org. They can go into the details of why they don't follow the RFC.

The B.A.T.M.A.N. kernel module implementation operates on layer 2.5 and as 
such, is not limited to wifi networks (opposed to 802.11s) but can run on any 
interface supporting ethernet frames. The project has its roots in the free 
wireless network scene (specially Freifunk) and is used by many communities 
and companies around the globe.

> o Are there any publications which describe the protocol behavior in
> detail? I assume it is a pro-active protocol - similar to standard BATMAN
> - but what about neighbor discovery, TC, HNA and the like? What about
> multicast transport? It is difficult to rate the patchset if no one knowns
> what the intended behavior is.

Yes, it is a pro-active protocol and we have a set of documentation about it. 
Please check our website and feel free to ask if you have more questions.

Regards,
Marek

^ permalink raw reply

* Re: [PATCH] vlan_dev: VLAN 0 should be treated as "no vlan tag" (802.1p packet)
From: Pedro Garcia @ 2010-06-27 23:21 UTC (permalink / raw)
  To: netdev; +Cc: Patrick McHardy, Ben Hutchings, Eric Dumazet
In-Reply-To: <1276679284.2632.22.camel@edumazet-laptop>

On Wed, 16 Jun 2010 11:08:04 +0200, Eric Dumazet <eric.dumazet@gmail.com>
wrote:
> Le mercredi 16 juin 2010 à 10:49 +0200, Pedro Garcia a écrit :
>> On Mon, 14 Jun 2010 21:12:52 +0200, Eric Dumazet <eric.dumazet@gmail.com>
> 
>> > Good luck for your first patch !
>> 
>> Here it is again. I added the modifications in
>> http://kerneltrap.org/mailarchive/linux-netdev/2010/5/23/6277868 for HW
>> accelerated incoming packets (it did not apply cleanly on the last
>> version of
>> the kernel, so I applied manually). Now, if the VLAN 0 is not explicitly
>> created by the user, VLAN 0 packets will be treated as no VLAN (802.1p
>> packets), instead of dropping them.
>> 
>> The patch is now for two files: vlan_core (accel) and vlan_dev (non
>> accel)
>> 
>> I can not test on HW accelerated devices, so if someone can check it I
>> will appreciate (even though in the thread above it looked like yes). For
>> non accel I tessted in 2.6.26. Now the patch is for
>> net-next-2.6, and it compiles OK, but I a have to setup a test
>> environment to check it is still OK (should, but better to test).
>> 
>> Signed-off-by: Pedro Garcia <pedro.netdev@dondevamos.com>
> 
> OK, the patch itself is correct.
> 
> Now, could you please send it again with a proper changelog ?
> 
> In this changelog, please explain why patch is needed, and
> keep lines short (< 72 chars), like the one you did in your first mail.
> 
> I'll then add my Signed-off-by, since I wrote the accelerated part ;)
> 
> Note : I wonder if another patch is needed, in case 8021q module is
> _not_ loaded. We probably should accept vlan 0 frames in this case ?

Last version of the patch. Now I think it is OK, of course pending 
Eric's signed-off-by for the accel HW part.

If this is too long for a changelog, tell me and I will try to sum it
up:

- Without the 8021q module loaded in the kernel, all 802.1p packets 
(VLAN 0 but QoS tagging) are silently discarded (as expected, as 
the protocol is not loaded).

- Without this patch in 8021q module, these packets are forwarded to 
the module, but they are discarded also if VLAN 0 is not configured,
which should not be the default behaviour, as VLAN 0 is not really
a VLANed packet but a 802.1p packet. Defining VLAN 0 makes it almost
impossible to communicate with mixed 802.1p and non 802.1p devices on
the same network due to arp table issues.

- Changed logic to skip vlan specific code in vlan_skb_recv if VLAN 
is 0 and we have not defined a VLAN with ID 0, but we accept the 
packet with the encapsulated proto and pass it later to netif_rx.

- In the vlan device event handler, added some logic to add VLAN 0 
to HW filter in devices that support it (this prevented any traffic
in VLAN 0 to reach the stack in e1000e with HW filter under 2.6.35,
and probably also with other HW filtered cards, so we fix it here).

- In the vlan unregister logic, prevent the elimination of VLAN 0 
in devices with HW filter.

- The default behaviour is to ignore the VLAN 0 tagging and accept
the packet as if it was not tagged, but we can still define a 
VLAN 0 if desired (so it is backwards compatible).

Signed-off-by: Pedro Garcia <pedro.netdev@dondevamos.com>
--
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 3c1c8c1..d9abc43 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -155,9 +155,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
        BUG_ON(!grp);
 
        /* Take it out of our own structures, but be sure to interlock with
-        * HW accelerating devices or SW vlan input packet processing.
+        * HW accelerating devices or SW vlan input packet processing if
+        * VLAN is not 0 (leave it there for 802.1p).
         */
-       if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
+       if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER))
                ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id);
 
        grp->nr_vlans--;
@@ -419,6 +420,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
        if (is_vlan_dev(dev))
                __vlan_device_event(dev, event);
 
+       if ((event == NETDEV_UP) &&
+           (dev->features & NETIF_F_HW_VLAN_FILTER) &&
+           (dev->netdev_ops->ndo_vlan_rx_add_vid)) {
+               pr_info("8021q: adding VLAN 0 to HW filter on device %s\n",
+                       dev->name);
+               dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
+       }
+
        grp = __vlan_find_group(dev);
        if (!grp)
                goto out;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 50f58f5..daaca31 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -8,6 +8,9 @@
 int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
                      u16 vlan_tci, int polling)
 {
+       struct net_device *vlan_dev;
+       u16 vlan_id;
+
        if (netpoll_rx(skb))
                return NET_RX_DROP;
 
@@ -16,10 +19,14 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 
        skb->skb_iif = skb->dev->ifindex;
        __vlan_hwaccel_put_tag(skb, vlan_tci);
-       skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK);
+       vlan_id = vlan_tci & VLAN_VID_MASK;
+       vlan_dev = vlan_group_get_device(grp, vlan_id);
 
-       if (!skb->dev)
-               goto drop;
+       if (vlan_dev)
+               skb->dev = vlan_dev;
+       else
+               if (vlan_id)
+                       goto drop;
 
        return (polling ? netif_receive_skb(skb) : netif_rx(skb));
 
@@ -82,16 +89,22 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
                unsigned int vlan_tci, struct sk_buff *skb)
 {
        struct sk_buff *p;
+       struct net_device *vlan_dev;
+       u16 vlan_id;
 
        if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
                skb->deliver_no_wcard = 1;
 
        skb->skb_iif = skb->dev->ifindex;
        __vlan_hwaccel_put_tag(skb, vlan_tci);
-       skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK);
-
-       if (!skb->dev)
-               goto drop;
+       vlan_id = vlan_tci & VLAN_VID_MASK;
+       vlan_dev = vlan_group_get_device(grp, vlan_id);
+
+       if (vlan_dev)
+               skb->dev = vlan_dev;
+       else
+               if (vlan_id)
+                       goto drop;
 
        for (p = napi->gro_list; p; p = p->next) {
                NAPI_GRO_CB(p)->same_flow =
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 5298426..21f7229 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -142,6 +142,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 {
        struct vlan_hdr *vhdr;
        struct vlan_rx_stats *rx_stats;
+       struct net_device *vlan_dev;
        u16 vlan_id;
        u16 vlan_tci;
 
@@ -157,53 +158,69 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
        vlan_id = vlan_tci & VLAN_VID_MASK;
 
        rcu_read_lock();
-       skb->dev = __find_vlan_dev(dev, vlan_id);
-       if (!skb->dev) {
-               pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
-                        __func__, vlan_id, dev->name);
-               goto err_unlock;
-       }
-
-       rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats,
-                              smp_processor_id());
-       rx_stats->rx_packets++;
-       rx_stats->rx_bytes += skb->len;
-
-       skb_pull_rcsum(skb, VLAN_HLEN);
-
-       skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
+       vlan_dev = __find_vlan_dev(dev, vlan_id);
 
-       pr_debug("%s: priority: %u for TCI: %hu\n",
-                __func__, skb->priority, vlan_tci);
-
-       switch (skb->pkt_type) {
-       case PACKET_BROADCAST: /* Yeah, stats collect these together.. */
-               /* stats->broadcast ++; // no such counter :-( */
-               break;
-
-       case PACKET_MULTICAST:
-               rx_stats->multicast++;
-               break;
+       /* If the VLAN device is defined, we use it.
+        * If not, and the VID is 0, it is a 802.1p packet (not
+        * really a VLAN), so we will just netif_rx it later to the
+        * original interface, but with the skb->proto set to the
+        * wrapped proto: we do nothing here.
+        */
 
-       case PACKET_OTHERHOST:
-               /* Our lower layer thinks this is not local, let's make sure.
-                * This allows the VLAN to have a different MAC than the
-                * underlying device, and still route correctly.
-                */
-               if (!compare_ether_addr(eth_hdr(skb)->h_dest,
-                                       skb->dev->dev_addr))
-                       skb->pkt_type = PACKET_HOST;
-               break;
-       default:
-               break;
+       if (!vlan_dev) {
+               if (vlan_id) {
+                       pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
+                                __func__, vlan_id, dev->name);
+                       goto err_unlock;
+               }
+               rx_stats = NULL;
+       } else {
+               skb->dev = vlan_dev;
+
+               rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats,
+                                       smp_processor_id());
+               rx_stats->rx_packets++;
+               rx_stats->rx_bytes += skb->len;
+
+               skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
+
+               pr_debug("%s: priority: %u for TCI: %hu\n",
+                        __func__, skb->priority, vlan_tci);
+
+               switch (skb->pkt_type) {
+               case PACKET_BROADCAST:
+                       /* Yeah, stats collect these together.. */
+                       /* stats->broadcast ++; // no such counter :-( */
+                       break;
+
+               case PACKET_MULTICAST:
+                       rx_stats->multicast++;
+                       break;
+
+               case PACKET_OTHERHOST:
+                       /* Our lower layer thinks this is not local, let's make
+                       * sure.
+                       * This allows the VLAN to have a different MAC than the
+                       * underlying device, and still route correctly.
+                       */
+                       if (!compare_ether_addr(eth_hdr(skb)->h_dest,
+                                               skb->dev->dev_addr))
+                               skb->pkt_type = PACKET_HOST;
+                       break;
+               default:
+                       break;
+               }
        }
 
+       skb_pull_rcsum(skb, VLAN_HLEN);
        vlan_set_encap_proto(skb, vhdr);
 
-       skb = vlan_check_reorder_header(skb);
-       if (!skb) {
-               rx_stats->rx_errors++;
-               goto err_unlock;
+       if (vlan_dev) {
+               skb = vlan_check_reorder_header(skb);
+               if (!skb) {
+                       rx_stats->rx_errors++;
+                       goto err_unlock;
+               }
        }
 
        netif_rx(skb);


^ permalink raw reply related

* Re: [RFC PATCH v2 4/5] skb: add tracepoints to freeing skb
From: Koki Sanagi @ 2010-06-28  0:25 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: linux-kernel, kaneshige.kenji, izumi.taku, netdev
In-Reply-To: <1277496347.2481.12.camel@edumazet-laptop>

(2010/06/26 5:05), Eric Dumazet wrote:
> Le vendredi 25 juin 2010 à 16:12 +0900, Koki Sanagi a écrit :
> 
>>> You might add a trace point to skb_free_datagram_locked() too, since it
>>> contains an inlined consume_skb()
>>>
>>
>> I think it is contrary.
> 
> I think you are _very_ wrong.
> 
>> skb_free_datagram_locked() contains consume_skb(), so tracepoint isn't needed.
>> Because skb_free_datagram_locked() can be traced by trace_consume_skb().
>>
>>
>>
> 
> Koki, it would be good if you worked on net-next-2.6, so that my comment
> applies.

Yes, I am very wrong... I saw skb_free_datagram_locked() at linux-2.6.34.
It would be good to add tracepoint to it.
 
> Also, not sending this kind of patches on netdev is not going to help
> very much.
> 
> Who is supposed to review them on lkml and Ack them ? 
> 
> Nobody.
> 
> Please build your future network related patches against net-next-2.6,
> and send them to nedev and David Miller, the official network
> maintainer, as stated in MAINTAINERS file.
> 
> NETWORKING [GENERAL]
> M:      "David S. Miller" <davem@davemloft.net>
> L:      netdev@vger.kernel.org
> W:      http://www.linuxfoundation.org/en/Net
> W:      http://patchwork.ozlabs.org/project/netdev/list/
> T:      git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.git
> T:      git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6.git
> S:      Maintained
> F:      net/
> F:      include/net/
> F:      include/linux/in.h
> F:      include/linux/net.h
> F:      include/linux/netdevice.h

Yes, my submitting this time has many improper things(address, format and description...).
I'll check and modify them next time.

Thanks,
Koki Sanagi.
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 

^ permalink raw reply

* [PATCH v2] netfilter: xtables target SYNPROXY
From: Changli Gao @ 2010-06-28  2:27 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: David S. Miller, Alexey Kuznetsov, Jan Engelhardt,
	Jozsef Kadlecsik, Pekka Savola (ipv6), James Morris,
	Hideaki YOSHIFUJI, netfilter-devel, netdev, Changli Gao

xtables target SYNPROXY.

This patch implements an xtables target SYNPROXY. As the connection to the
TCP server won't be established until the ACK from the client is received, it
can protect the TCP server from the SYN-flood attacks.

It works in the raw table of the PREROUTING chain, before conntracking system.
Syncookies is used, so no new state is introduced into the conntracking system.
In fact, until the first connection is established, conntracking system doesn't
see any packets. So when there is a SYN-flood attack, conntracking system won't
be busy on finding and deleting the un-assured ct.

As the SYN-packet of the second connection request is sent locally, the DNAT
rules which are in the PREROUTING chain should be moved to the OUTPUT chain.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/netfilter/nf_conntrack.h        |   10 
 include/net/netfilter/nf_conntrack_core.h   |   21 
 include/net/netfilter/nf_conntrack_extend.h |    2 
 include/net/tcp.h                           |    7 
 net/ipv4/syncookies.c                       |   22 
 net/ipv4/tcp_ipv4.c                         |    9 
 net/netfilter/Kconfig                       |   17 
 net/netfilter/Makefile                      |    1 
 net/netfilter/nf_conntrack_core.c           |   45 +
 net/netfilter/xt_SYNPROXY.c                 |  678 ++++++++++++++++++++++++++++
 10 files changed, 793 insertions(+), 19 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index e624dae..5e6d8e4 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -311,5 +311,15 @@ do {							\
 #define MODULE_ALIAS_NFCT_HELPER(helper) \
         MODULE_ALIAS("nfct-helper-" helper)
 
+#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
+extern unsigned int (*syn_proxy_pre_hook)(struct sk_buff *skb,
+					  struct nf_conn *ct,
+					  enum ip_conntrack_info ctinfo);
+
+extern unsigned int (*syn_proxy_post_hook)(struct sk_buff *skb,
+					   struct nf_conn *ct,
+					   enum ip_conntrack_info ctinfo);
+#endif
 #endif /* __KERNEL__ */
 #endif /* _NF_CONNTRACK_H */
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index aced085..637b404 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -54,6 +54,23 @@ nf_conntrack_find_get(struct net *net, u16 zone,
 
 extern int __nf_conntrack_confirm(struct sk_buff *skb);
 
+static inline unsigned int syn_proxy_post_call(struct sk_buff *skb,
+					       struct nf_conn *ct,
+					       enum ip_conntrack_info ctinfo)
+{
+	unsigned int ret = NF_ACCEPT;
+#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
+	unsigned int (*syn_proxy)(struct sk_buff *, struct nf_conn *,
+				  enum ip_conntrack_info);
+	syn_proxy = rcu_dereference(syn_proxy_post_hook);
+	if (syn_proxy)
+		ret = syn_proxy(skb, ct, ctinfo);
+#endif
+
+	return ret;
+}
+
 /* Confirm a connection: returns NF_DROP if packet must be dropped. */
 static inline int nf_conntrack_confirm(struct sk_buff *skb)
 {
@@ -63,8 +80,10 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
 	if (ct && !nf_ct_is_untracked(ct)) {
 		if (!nf_ct_is_confirmed(ct))
 			ret = __nf_conntrack_confirm(skb);
-		if (likely(ret == NF_ACCEPT))
+		if (likely(ret == NF_ACCEPT)) {
 			nf_ct_deliver_cached_events(ct);
+			ret = syn_proxy_post_call(skb, ct, skb->nfctinfo);
+		}
 	}
 	return ret;
 }
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index 32d15bd..b2ae7e9 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -11,6 +11,7 @@ enum nf_ct_ext_id {
 	NF_CT_EXT_ACCT,
 	NF_CT_EXT_ECACHE,
 	NF_CT_EXT_ZONE,
+	NF_CT_EXT_SYNPROXY,
 	NF_CT_EXT_NUM,
 };
 
@@ -19,6 +20,7 @@ enum nf_ct_ext_id {
 #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
 #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
 #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
+#define NF_CT_EXT_SYNPROXY_TYPE struct syn_proxy_state
 
 /* Extensions: optional stuff which isn't permanently in struct. */
 struct nf_ct_ext {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c2f96c2..06f28d3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -460,8 +460,11 @@ extern int			tcp_disconnect(struct sock *sk, int flags);
 extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
 extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 
 				    struct ip_options *opt);
-extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, 
-				     __u16 *mss);
+extern __u32 __cookie_v4_init_sequence(__be32 saddr, __be32 daddr,
+				       __be16 sport, __be16 dport, __u32 seq,
+				       __u16 *mssp);
+extern int cookie_v4_check_sequence(const struct iphdr *iph,
+				    const struct tcphdr *th, __u32 cookie);
 
 extern __u32 cookie_init_timestamp(struct request_sock *req);
 extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 650cace..3adcba3 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -159,26 +159,21 @@ static __u16 const msstab[] = {
  * Generate a syncookie.  mssp points to the mss, which is returned
  * rounded down to the value encoded in the cookie.
  */
-__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+__u32 __cookie_v4_init_sequence(__be32 saddr, __be32 daddr, __be16 sport,
+				__be16 dport, __u32 seq, __u16 *mssp)
 {
-	const struct iphdr *iph = ip_hdr(skb);
-	const struct tcphdr *th = tcp_hdr(skb);
 	int mssind;
 	const __u16 mss = *mssp;
 
-	tcp_synq_overflow(sk);
-
 	for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
 		if (mss >= msstab[mssind])
 			break;
 	*mssp = msstab[mssind];
 
-	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
-
-	return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
-				     th->source, th->dest, ntohl(th->seq),
+	return secure_tcp_syn_cookie(saddr, daddr, sport, dport, seq,
 				     jiffies / (HZ * 60), mssind);
 }
+EXPORT_SYMBOL(__cookie_v4_init_sequence);
 
 /*
  * This (misnamed) value is the age of syncookie which is permitted.
@@ -191,10 +186,9 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
  * Check if a ack sequence number is a valid syncookie.
  * Return the decoded mss if it is, or 0 if not.
  */
-static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
+int cookie_v4_check_sequence(const struct iphdr *iph, const struct tcphdr *th,
+			     __u32 cookie)
 {
-	const struct iphdr *iph = ip_hdr(skb);
-	const struct tcphdr *th = tcp_hdr(skb);
 	__u32 seq = ntohl(th->seq) - 1;
 	__u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
 					    th->source, th->dest, seq,
@@ -203,6 +197,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
 
 	return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
 }
+EXPORT_SYMBOL(cookie_v4_check_sequence);
 
 static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 					   struct request_sock *req,
@@ -282,7 +277,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 		goto out;
 
 	if (tcp_synq_no_recent_overflow(sk) ||
-	    (mss = cookie_check(skb, cookie)) == 0) {
+	    (mss = cookie_v4_check_sequence(ip_hdr(skb), tcp_hdr(skb),
+					    cookie)) == 0) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
 		goto out;
 	}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8fa32f5..3b094c7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1332,7 +1332,14 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (want_cookie) {
-		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+		struct tcphdr *th;
+
+		tcp_synq_overflow(sk);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
+		th = tcp_hdr(skb);
+		isn = __cookie_v4_init_sequence(saddr, daddr, th->source,
+						th->dest, ntohl(th->seq),
+						&req->mss);
 		req->cookie_ts = tmp_opt.tstamp_ok;
 	} else if (!isn) {
 		struct inet_peer *peer = NULL;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 413ed24..fd8ad8c 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -560,6 +560,23 @@ config NETFILTER_XT_TARGET_SECMARK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_SYNPROXY
+	tristate '"SYNPROXY" target support (EXPERIMENTAL)'
+	depends on EXPERIMENTAL
+	depends on SYN_COOKIES
+	depends on IP_NF_RAW
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	help
+	  The SYNPROXY target allows a raw rule to specify that some TCP
+	  connections are relayed to protect the TCP servers from the SYN-flood
+	  DoS attacks. Syn cookies is used to save the initial state, so no
+	  conntrack is needed until the client side connection is established.
+	  It frees the connection tracking system from creating/deleting
+	  conntracks when SYN-flood DoS attack acts.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_TCPMSS
 	tristate '"TCPMSS" target support'
 	depends on (IPV6 || IPV6=n)
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index e28420a..4e32834 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) += xt_SYNPROXY.o
 
 # matches
 obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 16b41b4..dd85d6f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -800,6 +800,26 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	return ct;
 }
 
+static inline unsigned int syn_proxy_pre_call(int protonum, struct sk_buff *skb,
+					      struct nf_conn *ct,
+					      enum ip_conntrack_info ctinfo)
+{
+	unsigned int ret = NF_ACCEPT;
+#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
+	unsigned int (*syn_proxy)(struct sk_buff *, struct nf_conn *,
+				  enum ip_conntrack_info);
+
+	if (protonum == IPPROTO_TCP) {
+		syn_proxy = rcu_dereference(syn_proxy_pre_hook);
+		if (syn_proxy)
+			ret = syn_proxy(skb, ct, ctinfo);
+	}
+#endif
+
+	return ret;
+}
+
 unsigned int
 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		struct sk_buff *skb)
@@ -855,8 +875,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 			       l3proto, l4proto, &set_reply, &ctinfo);
 	if (!ct) {
 		/* Not valid part of a connection */
-		NF_CT_STAT_INC_ATOMIC(net, invalid);
-		ret = NF_ACCEPT;
+		ret = syn_proxy_pre_call(protonum, skb, NULL, ctinfo);
+		if (ret == NF_ACCEPT)
+			NF_CT_STAT_INC_ATOMIC(net, invalid);
 		goto out;
 	}
 
@@ -869,6 +890,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 
 	NF_CT_ASSERT(skb->nfct);
 
+	ret = syn_proxy_pre_call(protonum, skb, ct, ctinfo);
+	if (ret != NF_ACCEPT)
+		goto out;
 	ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum);
 	if (ret <= 0) {
 		/* Invalid: inverse of the return code tells
@@ -1476,6 +1500,17 @@ s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			u32 seq);
 EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
 
+#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
+unsigned int (*syn_proxy_pre_hook)(struct sk_buff *skb, struct nf_conn *ct,
+				   enum ip_conntrack_info ctinfo);
+EXPORT_SYMBOL(syn_proxy_pre_hook);
+
+unsigned int (*syn_proxy_post_hook)(struct sk_buff *skb, struct nf_conn *ct,
+				    enum ip_conntrack_info ctinfo);
+EXPORT_SYMBOL(syn_proxy_post_hook);
+#endif
+
 int nf_conntrack_init(struct net *net)
 {
 	int ret;
@@ -1496,6 +1531,12 @@ int nf_conntrack_init(struct net *net)
 
 		/* Howto get NAT offsets */
 		rcu_assign_pointer(nf_ct_nat_offset, NULL);
+
+#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
+		rcu_assign_pointer(syn_proxy_pre_hook, NULL);
+		rcu_assign_pointer(syn_proxy_post_hook, NULL);
+#endif
 	}
 	return 0;
 
diff --git a/net/netfilter/xt_SYNPROXY.c b/net/netfilter/xt_SYNPROXY.c
new file mode 100644
index 0000000..5e05259
--- /dev/null
+++ b/net/netfilter/xt_SYNPROXY.c
@@ -0,0 +1,678 @@
+/* (C) 2010- Changli Gao <xiaosuo@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * It bases on ipt_REJECT.c
+ */
+#define pr_fmt(fmt) "SYNPROXY: " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/unaligned/access_ok.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/route.h>
+#include <net/dst.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Changli Gao <xiaosuo@gmail.com>");
+MODULE_DESCRIPTION("Xtables: \"SYNPROXY\" target for IPv4");
+MODULE_ALIAS("ipt_SYNPROXY");
+
+enum {
+	TCP_SEND_FLAG_NOTRACE	= 0x1,
+	TCP_SEND_FLAG_SYNCOOKIE	= 0x2,
+	TCP_SEND_FLAG_ACK2SYN	= 0x4,
+};
+
+struct syn_proxy_state {
+	u16	seq_inited;
+	__be16	window;
+	u32	seq_diff;
+};
+
+static int get_mtu(const struct dst_entry *dst)
+{
+	int mtu;
+
+	mtu = dst_mtu(dst);
+	if (mtu)
+		return mtu;
+
+	return dst->dev ? dst->dev->mtu : 0;
+}
+
+static int get_advmss(const struct dst_entry *dst)
+{
+	int advmss;
+
+	advmss = dst_metric(dst, RTAX_ADVMSS);
+	if (advmss)
+		return advmss;
+	advmss = get_mtu(dst);
+	if (advmss)
+		return advmss - (sizeof(struct iphdr) + sizeof(struct tcphdr));
+
+	return TCP_MSS_DEFAULT;
+}
+
+static int syn_proxy_route(struct sk_buff *skb, struct net *net, u16 *pmss)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	struct rtable *rt;
+	struct flowi fl = {};
+	unsigned int type;
+	int flags = 0;
+	int err;
+	u16 mss;
+
+	type = inet_addr_type(net, iph->saddr);
+	if (type != RTN_LOCAL) {
+		type = inet_addr_type(net, iph->daddr);
+		if (type == RTN_LOCAL)
+			flags |= FLOWI_FLAG_ANYSRC;
+	}
+
+	if (type == RTN_LOCAL) {
+		fl.nl_u.ip4_u.daddr = iph->daddr;
+		fl.nl_u.ip4_u.saddr = iph->saddr;
+		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+		fl.flags = flags;
+		err = ip_route_output_key(net, &rt, &fl);
+		if (err)
+			goto out;
+
+		skb_dst_set(skb, &rt->dst);
+	} else {
+		/* non-local src, find valid iif to satisfy
+		 * rp-filter when calling ip_route_input. */
+		fl.nl_u.ip4_u.daddr = iph->saddr;
+		err = ip_route_output_key(net, &rt, &fl);
+		if (err)
+			goto out;
+
+		err = ip_route_input(skb, iph->daddr, iph->saddr,
+				     RT_TOS(iph->tos), rt->dst.dev);
+		if (err) {
+			dst_release(&rt->dst);
+			goto out;
+		}
+		if (pmss) {
+			mss = get_advmss(&rt->dst);
+			if (*pmss > mss)
+				*pmss = mss;
+		}
+		dst_release(&rt->dst);
+	}
+
+	err = skb_dst(skb)->error;
+	if (!err && pmss) {
+		mss = get_advmss(skb_dst(skb));
+		if (*pmss > mss)
+			*pmss = mss;
+	}
+
+out:
+	return err;
+}
+
+static int tcp_send(__be32 src, __be32 dst, __be16 sport, __be16 dport,
+		    u32 seq, u32 ack_seq, __be16 window, u16 mss, u8 tcp_flags,
+		    u8 tos, struct net_device *dev, int flags,
+		    struct sk_buff *oskb)
+{
+	struct sk_buff *skb;
+	struct iphdr *iph;
+	struct tcphdr *th;
+	int err, len;
+
+	len = sizeof(*th);
+	if (mss)
+		len += TCPOLEN_MSS;
+
+	skb = NULL;
+	/* caller must give me a large enough oskb */
+	if (oskb) {
+		unsigned char *odata = oskb->data;
+
+		if (skb_recycle_check(oskb, 0)) {
+			oskb->data = odata;
+			skb_reset_tail_pointer(oskb);
+			skb = oskb;
+			pr_debug("recycle skb\n");
+		}
+	}
+	if (!skb) {
+		skb = alloc_skb(LL_MAX_HEADER + sizeof(*iph) + len, GFP_ATOMIC);
+		if (!skb) {
+			err = -ENOMEM;
+			goto out;
+		}
+		skb_reserve(skb, LL_MAX_HEADER);
+	}
+
+	skb_reset_network_header(skb);
+	if (!(flags & TCP_SEND_FLAG_ACK2SYN) || skb != oskb) {
+		iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
+		iph->version	= 4;
+		iph->ihl	= sizeof(*iph) / 4;
+		iph->tos	= tos;
+		/* tot_len is set in ip_local_out() */
+		iph->id		= 0;
+		iph->frag_off	= htons(IP_DF);
+		iph->protocol	= IPPROTO_TCP;
+		iph->saddr	= src;
+		iph->daddr	= dst;
+		th = (struct tcphdr *)skb_put(skb, len);
+		th->source	= sport;
+		th->dest	= dport;
+	} else {
+		iph = (struct iphdr *)skb->data;
+		iph->id		= 0;
+		iph->frag_off	= htons(IP_DF);
+		skb_put(skb, iph->ihl * 4 + len);
+		th = (struct tcphdr *)(skb->data + iph->ihl * 4);
+	}
+
+	th->seq		= htonl(seq);
+	th->ack_seq	= htonl(ack_seq);
+	tcp_flag_byte(th) = tcp_flags;
+	th->doff	= len / 4;
+	th->window	= window;
+	th->urg_ptr	= 0;
+
+	if ((flags & TCP_SEND_FLAG_SYNCOOKIE) && mss)
+		err = syn_proxy_route(skb, dev_net(dev), &mss);
+	else
+		err = syn_proxy_route(skb, dev_net(dev), NULL);
+	if (err)
+		goto err_out;
+
+	if ((flags & TCP_SEND_FLAG_SYNCOOKIE)) {
+		if (mss) {
+			th->seq = htonl(__cookie_v4_init_sequence(dst, src,
+								  dport, sport,
+								  ack_seq - 1,
+								  &mss));
+		} else {
+			mss = TCP_MSS_DEFAULT;
+			th->seq = htonl(__cookie_v4_init_sequence(dst, src,
+								  dport, sport,
+								  ack_seq - 1,
+								  &mss));
+			mss = 0;
+		}
+	}
+
+	if (mss)
+		* (__force __be32 *)(th + 1) = htonl((TCPOPT_MSS << 24) |
+						     (TCPOLEN_MSS << 16) |
+						     mss);
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	th->check = ~tcp_v4_check(len, src, dst, 0);
+	skb->csum_start = (unsigned char *)th - skb->head;
+	skb->csum_offset = offsetof(struct tcphdr, check);
+
+	if (!(flags & TCP_SEND_FLAG_ACK2SYN) || skb != oskb)
+		iph->ttl	= dst_metric(skb_dst(skb), RTAX_HOPLIMIT);
+
+	if (skb->len > get_mtu(skb_dst(skb))) {
+		if (printk_ratelimit())
+			pr_warning("%s has smaller mtu: %d\n",
+				   skb_dst(skb)->dev->name,
+				   get_mtu(skb_dst(skb)));
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	if ((flags & TCP_SEND_FLAG_NOTRACE)) {
+		skb->nfct = &nf_ct_untracked_get()->ct_general;
+		skb->nfctinfo = IP_CT_NEW;
+		nf_conntrack_get(skb->nfct);
+	}
+
+	pr_debug("ip_local_out: %pI4n:%hu -> %pI4n:%hu (seq=%u, "
+		 "ack_seq=%u mss=%hu flags=%hhx)\n", &src, ntohs(th->source),
+		 &dst, ntohs(th->dest), ntohl(th->seq), ack_seq, mss,
+		 tcp_flags);
+
+	err = ip_local_out(skb);
+	if (err > 0)
+		err = net_xmit_errno(err);
+
+	pr_debug("ip_local_out: return with %d\n", err);
+out:
+	if (oskb && oskb != skb)
+		kfree_skb(oskb);
+
+	return err;
+
+err_out:
+	kfree_skb(skb);
+	goto out;
+}
+
+static int get_mss(u8 *data, int len)
+{
+	u8 olen;
+
+	while (len >= TCPOLEN_MSS) {
+		switch (data[0]) {
+		case TCPOPT_EOL:
+			return 0;
+		case TCPOPT_NOP:
+			data++;
+			len--;
+			break;
+		case TCPOPT_MSS:
+			if (data[1] != TCPOLEN_MSS)
+				return -EINVAL;
+			return get_unaligned_be16(data + 2);
+		default:
+			olen = data[1];
+			if (olen < 2 || olen > len)
+				return -EINVAL;
+			data += olen;
+			len -= olen;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static DEFINE_PER_CPU(struct syn_proxy_state, syn_proxy_state);
+
+/* syn_proxy_pre isn't under the protection of nf_conntrack_proto_tcp.c */
+static unsigned int syn_proxy_pre(struct sk_buff *skb, struct nf_conn *ct,
+				  enum ip_conntrack_info ctinfo)
+{
+	struct syn_proxy_state *state;
+	struct iphdr *iph;
+	struct tcphdr *th, _th;
+
+	/* only support IPv4 now */
+	iph = ip_hdr(skb);
+	if (iph->version != 4)
+		return NF_ACCEPT;
+
+	th = skb_header_pointer(skb, iph->ihl * 4, sizeof(_th), &_th);
+	if (th == NULL)
+		return NF_DROP;
+
+	if (!ct || !nf_ct_is_confirmed(ct)) {
+		int ret;
+
+		if (!th->syn && th->ack) {
+			u16 mss;
+			struct sk_buff *rec_skb;
+
+			mss = cookie_v4_check_sequence(iph, th,
+						       ntohl(th->ack_seq) - 1);
+			if (!mss)
+				return NF_ACCEPT;
+
+			pr_debug("%pI4n:%hu -> %pI4n:%hu(mss=%hu)\n",
+				 &iph->saddr, ntohs(th->source),
+				 &iph->daddr, ntohs(th->dest), mss);
+
+			if (skb_tailroom(skb) < TCPOLEN_MSS &&
+			    skb->len < iph->ihl * 4 + sizeof(*th) + TCPOLEN_MSS)
+				rec_skb = NULL;
+			else
+				rec_skb = skb;
+
+			local_bh_disable();
+			state = &__get_cpu_var(syn_proxy_state);
+			state->seq_inited = 1;
+			state->window = th->window;
+			state->seq_diff = ntohl(th->ack_seq) - 1;
+			if (rec_skb)
+				tcp_send(iph->saddr, iph->daddr, 0, 0,
+					 ntohl(th->seq) - 1, 0, th->window,
+					 mss, TCPHDR_SYN, 0, skb->dev,
+					 TCP_SEND_FLAG_ACK2SYN, rec_skb);
+			else
+				tcp_send(iph->saddr, iph->daddr, th->source,
+					 th->dest, ntohl(th->seq) - 1, 0,
+					 th->window, mss, TCPHDR_SYN,
+					 iph->tos, skb->dev, 0, NULL);
+			state->seq_inited = 0;
+			local_bh_enable();
+
+			if (!rec_skb)
+				kfree_skb(skb);
+
+			return NF_STOLEN;
+		}
+
+		if (!ct || !th->syn || th->ack)
+			return NF_ACCEPT;
+
+		ret = NF_ACCEPT;
+		local_bh_disable();
+		state = &__get_cpu_var(syn_proxy_state);
+		if (state->seq_inited) {
+			struct syn_proxy_state *nstate;
+
+			nstate = nf_ct_ext_add(ct, NF_CT_EXT_SYNPROXY,
+					       GFP_ATOMIC);
+			if (nstate != NULL) {
+				nstate->seq_inited = 0;
+				nstate->window = state->window;
+				nstate->seq_diff = state->seq_diff;
+				pr_debug("seq_diff: %u\n", nstate->seq_diff);
+			} else {
+				ret = NF_DROP;
+			}
+		}
+		local_bh_enable();
+
+		return ret;
+	}
+
+	state = nf_ct_ext_find(ct, NF_CT_EXT_SYNPROXY);
+	if (!state)
+		return NF_ACCEPT;
+
+	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+		__be32 newack;
+
+		/* don't need to mangle duplicate SYN packets */
+		if (th->syn && !th->ack)
+			return NF_ACCEPT;
+		if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*th)))
+			return NF_DROP;
+		th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
+		newack = htonl(ntohl(th->ack_seq) - state->seq_diff);
+		inet_proto_csum_replace4(&th->check, skb, th->ack_seq, newack,
+					 0);
+		pr_debug("alter ack seq: %u -> %u\n",
+			 ntohl(th->ack_seq), ntohl(newack));
+		th->ack_seq = newack;
+	} else {
+		/* Simultaneous open ? Oh, no. The connection between
+		 * client and us is established. */
+		if (th->syn && !th->ack)
+			return NF_DROP;
+	}
+
+	return NF_ACCEPT;
+}
+
+static unsigned int syn_proxy_mangle_pkt(struct sk_buff *skb, struct iphdr *iph,
+					 struct tcphdr *th, u32 seq_diff)
+{
+	__be32 new;
+	int olen;
+
+	if (skb->len < (iph->ihl + th->doff) * 4)
+		return NF_DROP;
+	if (!skb_make_writable(skb, (iph->ihl + th->doff) * 4))
+		return NF_DROP;
+	iph = (struct iphdr *)(skb->data);
+	th = (struct tcphdr *)(skb->data + iph->ihl * 4);
+
+	new = tcp_flag_word(th) & (~TCP_FLAG_SYN);
+	inet_proto_csum_replace4(&th->check, skb, tcp_flag_word(th), new, 0);
+	tcp_flag_word(th) = new;
+
+	new = htonl(ntohl(th->seq) + seq_diff);
+	inet_proto_csum_replace4(&th->check, skb, th->seq, new, 0);
+	pr_debug("alter seq: %u -> %u\n", ntohl(th->seq), ntohl(new));
+	th->seq = new;
+
+	olen = th->doff - sizeof(*th) / 4;
+	if (olen) {
+		__be32 *opt;
+
+		opt = (__force __be32 *)(th + 1);
+#define TCPOPT_EOL_WORD ((TCPOPT_EOL << 24) + (TCPOPT_EOL << 16) + \
+			 (TCPOPT_EOL << 8) + TCPOPT_EOL)
+		inet_proto_csum_replace4(&th->check, skb, *opt, TCPOPT_EOL_WORD,
+					 0);
+		*opt = TCPOPT_EOL_WORD;
+	}
+
+	return NF_ACCEPT;
+}
+
+static unsigned int syn_proxy_post(struct sk_buff *skb, struct nf_conn *ct,
+				   enum ip_conntrack_info ctinfo)
+{
+	struct syn_proxy_state *state;
+	struct iphdr *iph;
+	struct tcphdr *th;
+
+	/* untraced packets don't have NF_CT_EXT_SYNPROXY ext, as they don't
+	 * enter syn_proxy_pre() */
+	state = nf_ct_ext_find(ct, NF_CT_EXT_SYNPROXY);
+	if (state == NULL)
+		return NF_ACCEPT;
+
+	iph = ip_hdr(skb);
+	if (!skb_make_writable(skb, iph->ihl * 4 + sizeof(*th)))
+		return NF_DROP;
+	th = (struct tcphdr *)(skb->data + iph->ihl * 4);
+	if (!state->seq_inited) {
+		if (th->syn) {
+			/* It must be from original direction, as the ones
+			 * from the other side are dropped in function
+			 * syn_proxy_pre() */
+			if (!th->ack)
+				return NF_ACCEPT;
+
+			pr_debug("SYN-ACK %pI4n:%hu -> %pI4n:%hu "
+				 "(seq=%u ack_seq=%u)\n",
+				 &iph->saddr, ntohs(th->source), &iph->daddr,
+				 ntohs(th->dest), ntohl(th->seq),
+				 ntohl(th->ack_seq));
+
+			/* SYN-ACK from reply direction with the protection
+			 * of conntrack */
+			spin_lock_bh(&ct->lock);
+			if (!state->seq_inited) {
+				state->seq_inited = 1;
+				pr_debug("update seq_diff %u -> %u\n",
+					 state->seq_diff,
+					 state->seq_diff - ntohl(th->seq));
+				state->seq_diff -= ntohl(th->seq);
+			}
+			spin_unlock_bh(&ct->lock);
+			tcp_send(iph->daddr, iph->saddr, th->dest, th->source,
+				 ntohl(th->ack_seq),
+				 ntohl(th->seq) + 1 + state->seq_diff,
+				 state->window, 0, TCPHDR_ACK, iph->tos,
+				 skb->dev, 0, NULL);
+
+			return syn_proxy_mangle_pkt(skb, iph, th,
+						    state->seq_diff + 1);
+		} else {
+			__be32 newseq;
+
+			if (!th->rst)
+				return NF_ACCEPT;
+			newseq = htonl(state->seq_diff + 1);
+			inet_proto_csum_replace4(&th->check, skb, th->seq,
+						 newseq, 0);
+			pr_debug("alter RST seq: %u -> %u\n",
+				 ntohl(th->seq), ntohl(newseq));
+			th->seq = newseq;
+
+			return NF_ACCEPT;
+		}
+	}
+
+	/* ct should be in ESTABLISHED state, but if the ack packets from
+	 * us are lost. */
+	if (th->syn) {
+		if (!th->ack)
+			return NF_ACCEPT;
+
+		tcp_send(iph->daddr, iph->saddr, th->dest, th->source,
+			 ntohl(th->ack_seq),
+			 ntohl(th->seq) + 1 + state->seq_diff,
+			 state->window, 0, TCPHDR_ACK, iph->tos,
+			 skb->dev, 0, NULL);
+
+		return syn_proxy_mangle_pkt(skb, iph, th, state->seq_diff + 1);
+	}
+
+	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+		__be32 newseq;
+
+		newseq = htonl(ntohl(th->seq) + state->seq_diff);
+		inet_proto_csum_replace4(&th->check, skb, th->seq, newseq, 0);
+		pr_debug("alter seq: %u -> %u\n", ntohl(th->seq),
+			 ntohl(newseq));
+		th->seq = newseq;
+	}
+
+	return NF_ACCEPT;
+}
+
+static unsigned int tcp_process(struct sk_buff *skb)
+{
+	const struct iphdr *iph;
+	const struct tcphdr *th;
+	int err;
+	u16 mss;
+
+	iph = ip_hdr(skb);
+	if (iph->frag_off & htons(IP_OFFSET))
+		goto out;
+	if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(*th)))
+		goto out;
+	th = (const struct tcphdr *)(skb->data + iph->ihl * 4);
+	if ((tcp_flag_byte(th) &
+	     (TCPHDR_FIN | TCPHDR_RST | TCPHDR_ACK | TCPHDR_SYN)) != TCPHDR_SYN)
+		goto out;
+
+	if (nf_ip_checksum(skb, NF_INET_PRE_ROUTING, iph->ihl * 4, IPPROTO_TCP))
+		goto out;
+	mss = 0;
+	if (th->doff > sizeof(*th) / 4) {
+		if (!pskb_may_pull(skb, (iph->ihl + th->doff) * 4))
+			goto out;
+		err = get_mss((u8 *)(th + 1), th->doff * 4 - sizeof(*th));
+		if (err < 0)
+			goto out;
+		if (err != 0)
+			mss = err;
+	} else if (th->doff != sizeof(*th) / 4)
+		goto out;
+
+	tcp_send(iph->daddr, iph->saddr, th->dest, th->source, 0,
+		 ntohl(th->seq) + 1, 0, mss, TCPHDR_SYN | TCPHDR_ACK,
+		 iph->tos, skb->dev,
+		 TCP_SEND_FLAG_NOTRACE | TCP_SEND_FLAG_SYNCOOKIE, skb);
+
+	return NF_STOLEN;
+
+out:
+	return NF_DROP;
+}
+
+static unsigned int synproxy_tg(struct sk_buff *skb,
+				const struct xt_action_param *par)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	int ret;
+
+	/* received from lo */
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct)
+		return IPT_CONTINUE;
+
+	local_bh_disable();
+	if (!__get_cpu_var(syn_proxy_state).seq_inited)
+		ret = tcp_process(skb);
+	else
+		ret = IPT_CONTINUE;
+	local_bh_enable();
+
+	return ret;
+}
+
+static int synproxy_tg_check(const struct xt_tgchk_param *par)
+{
+	int ret;
+
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0)
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
+
+	return ret;
+}
+
+static void synproxy_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_target synproxy_tg_reg __read_mostly = {
+	.name		= "SYNPROXY",
+	.family		= NFPROTO_IPV4,
+	.target		= synproxy_tg,
+	.table		= "raw",
+	.hooks		= 1 << NF_INET_PRE_ROUTING,
+	.proto		= IPPROTO_TCP,
+	.checkentry	= synproxy_tg_check,
+	.destroy	= synproxy_tg_destroy,
+	.me		= THIS_MODULE,
+};
+
+static struct nf_ct_ext_type syn_proxy_state_ext __read_mostly = {
+	.len	= sizeof(struct syn_proxy_state),
+	.align	= __alignof__(struct syn_proxy_state),
+	.id	= NF_CT_EXT_SYNPROXY,
+};
+
+static int __init synproxy_tg_init(void)
+{
+	int err;
+
+	rcu_assign_pointer(syn_proxy_pre_hook, syn_proxy_pre);
+	rcu_assign_pointer(syn_proxy_post_hook, syn_proxy_post);
+	err = nf_ct_extend_register(&syn_proxy_state_ext);
+	if (err)
+		goto err_out;
+	err = xt_register_target(&synproxy_tg_reg);
+	if (err)
+		goto err_out2;
+
+	return err;
+
+err_out2:
+	nf_ct_extend_unregister(&syn_proxy_state_ext);
+err_out:
+	rcu_assign_pointer(syn_proxy_post_hook, NULL);
+	rcu_assign_pointer(syn_proxy_pre_hook, NULL);
+	rcu_barrier();
+
+	return err;
+}
+
+static void __exit synproxy_tg_exit(void)
+{
+	xt_unregister_target(&synproxy_tg_reg);
+	nf_ct_extend_unregister(&syn_proxy_state_ext);
+	rcu_assign_pointer(syn_proxy_post_hook, NULL);
+	rcu_assign_pointer(syn_proxy_pre_hook, NULL);
+	rcu_barrier();
+}
+
+module_init(synproxy_tg_init);
+module_exit(synproxy_tg_exit);

^ permalink raw reply related

* Re: [PATCH 2/2 v2] Driver core: reduce duplicated code
From: Eric Miao @ 2010-06-28  4:55 UTC (permalink / raw)
  To: Uwe Kleine-König
  Cc: Greg KH, Randy Dunlap, Dmitry Torokhov, Anisse Astier,
	Greg Kroah-Hartman, Magnus Damm, Rafael J. Wysocki, Paul Mundt,
	linux-doc, linux-kernel, netdev
In-Reply-To: <20100622052314.GA17128@pengutronix.de>

2010/6/22 Uwe Kleine-König <u.kleine-koenig@pengutronix.de>:
> Hi Greg,
>
>> > I changed the semantic slightly to only call
>> > platform_device_add_resources if data != NULL instead of size != 0.  The
>> > idea is to support wrappers like:
>> >
>> >     #define add_blablub(id, pdata) \
>> >             platform_device_register_resndata(NULL, "blablub", id, \
>> >                     NULL, 0, pdata, sizeof(struct blablub_platform_data))
>> >
>> > that don't fail if pdata=NULL.  Ditto for res.
>>
>> That's fine, but why would you want to have a #define for something like
>> this?  Is it really needed?
> Well, what is really needed?  I intend to use it on arm/imx.  I have
> several different machines using similar SoCs and so I want to have a
> function à la:
>
>        struct platform_device *__init imx_add_imx_i2c(int id,
>                resource_size_t iobase, resource_size_t iosize, int irq,
>                const struct imxi2c_platform_data *pdata)
>
> that builds a struct resource[] and then calls
> platform_device_register_resndata().  And then I have a set of macros
> like:
>
>        #define imx21_add_i2c_imx(pdata)        \
>                imx_add_imx_i2c(0, MX2x_I2C_BASE_ADDR, SZ_4K, MX2x_INT_I2C, pdata)
>        #define imx25_add_imx_i2c0(pdata)       \
>                imx_add_imx_i2c(0, MX25_I2C1_BASE_ADDR, SZ_16K, MX25_INT_I2C1, pdata)
>        ##define imx25_add_imx_i2c1(pdata)      \
>                imx_add_imx_i2c(1, MX25_I2C2_BASE_ADDR, SZ_16K, MX25_INT_I2C2, pdata)
>
> etc.  The final goal is to get rid of files like
> arch/arm/mach-mx3/devices.c.
>

Hi Uwe,

I suggest you to have a look into arch/arm/mach-mmp/devices.c and
arch/arm/mach-mmp/pxa{168,910}.c as well as
arch/arm/mach-mmp/include/mach/pxa{168,910}.h, maybe we can find
some common practice.

>> Anyway, this version looks fine to me, I'll go apply it.
> \o/
>
> Best regards and thanks
> Uwe
>
> --
> Pengutronix e.K.                           | Uwe Kleine-König            |
> Industrial Linux Solutions                 | http://www.pengutronix.de/  |
>

^ permalink raw reply

* Re: dhclient, checksum and tap
From: David Miller @ 2010-06-28  4:59 UTC (permalink / raw)
  To: mst; +Cc: herbert.xu, netdev
In-Reply-To: <20100627082439.GA8472@redhat.com>

From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 27 Jun 2010 11:24:40 +0300

> Just to spell it out for me, you think the hack should be done
> in vhost-net?

Pretty much, yes.

^ permalink raw reply

* Re: [PATCH 2/2 v2] Driver core: reduce duplicated code
From: Uwe Kleine-König @ 2010-06-28  5:16 UTC (permalink / raw)
  To: Eric Miao, Sascha Hauer
  Cc: Greg KH, Randy Dunlap, Dmitry Torokhov, Anisse Astier,
	Greg Kroah-Hartman, Magnus Damm, Rafael J. Wysocki, Paul Mundt,
	linux-doc, linux-kernel, netdev
In-Reply-To: <AANLkTinMLrXyqKZ76AiWiw6N_glrWWrP-2_aUTzQm5Xr@mail.gmail.com>

Hi Eric,

On Mon, Jun 28, 2010 at 12:55:45PM +0800, Eric Miao wrote:
> I suggest you to have a look into arch/arm/mach-mmp/devices.c and
> arch/arm/mach-mmp/pxa{168,910}.c as well as
> arch/arm/mach-mmp/include/mach/pxa{168,910}.h, maybe we can find
> some common practice.
I think I like this approach in general, I already thought about not
passing all parameters as function/macro arguments, too.  But maybe this
becomes too excessive for imx as I would need too many of these device
desc for the different imx variants?!

Anyhow a few things I thought when looking in the files you suggested:

 - Why not use an array for all uart devdescs, maybe the code for
   pxa168_add_uart could become a bit smaller then?:

	extern struct pxa_device_desc pxa168_device_uart[2];
	...
	static inline int pxa168_add_uart(int id)
	{
		struct pxa_device_desc *d = pxa168_device_uart + id;

		if (id < 0 || id > 2)
			return -EINVAL;

		return pxa_register_device(d, NULL, 0);
	}

   (Ditto for the other types obviously.)

 - shouldn't all these pxa_device_descs and pxa168_add_$device functions
   be __initdata and __init?

 - pxa_register_device is better than my add_resndata function in (at
   least) one aspect as it sets coherent_dma_mask, too.  This is
   something I missed when trying to add mxc-mmc (IIRC) devices.

Thanks
Uwe

-- 
Pengutronix e.K.                           | Uwe Kleine-König            |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |

^ permalink raw reply

* Re: [PATCH 2/2 v2] Driver core: reduce duplicated code
From: Eric Miao @ 2010-06-28  5:27 UTC (permalink / raw)
  To: Uwe Kleine-König
  Cc: Sascha Hauer, Greg KH, Randy Dunlap, Dmitry Torokhov,
	Anisse Astier, Greg Kroah-Hartman, Magnus Damm, Rafael J. Wysocki,
	Paul Mundt, linux-doc, linux-kernel, netdev
In-Reply-To: <20100628051606.GA16445@pengutronix.de>

2010/6/28 Uwe Kleine-König <u.kleine-koenig@pengutronix.de>:
> Hi Eric,
>
> On Mon, Jun 28, 2010 at 12:55:45PM +0800, Eric Miao wrote:
>> I suggest you to have a look into arch/arm/mach-mmp/devices.c and
>> arch/arm/mach-mmp/pxa{168,910}.c as well as
>> arch/arm/mach-mmp/include/mach/pxa{168,910}.h, maybe we can find
>> some common practice.
> I think I like this approach in general, I already thought about not
> passing all parameters as function/macro arguments, too.  But maybe this
> becomes too excessive for imx as I would need too many of these device
> desc for the different imx variants?!
>
> Anyhow a few things I thought when looking in the files you suggested:
>
>  - Why not use an array for all uart devdescs, maybe the code for
>   pxa168_add_uart could become a bit smaller then?:
>
>        extern struct pxa_device_desc pxa168_device_uart[2];
>        ...
>        static inline int pxa168_add_uart(int id)
>        {
>                struct pxa_device_desc *d = pxa168_device_uart + id;
>
>                if (id < 0 || id > 2)
>                        return -EINVAL;
>
>                return pxa_register_device(d, NULL, 0);
>        }
>
>   (Ditto for the other types obviously.)

That's a good suggestion, yet it came that way for two reasons:

1. the initial naming mess, uart0 was later renamed to uart1, e.g.
2. and the restrictions of PXA{168,910}_DEVICE() macros, these
   macros are handy to simplify the definition, but may require fancy
   tricks to make it support array

>
>  - shouldn't all these pxa_device_descs and pxa168_add_$device functions
>   be __initdata and __init?
>

pxa{168,910}_add_device() are actually 'static inline' so my assumption
is they will be inlined when referenced, otherwise won't occupy any code
space. The *_descs, however, they are __initdata if you look into the
definitions of PXA{168,910}_DEVICES

>  - pxa_register_device is better than my add_resndata function in (at
>   least) one aspect as it sets coherent_dma_mask, too.  This is
>   something I missed when trying to add mxc-mmc (IIRC) devices.
>
> Thanks
> Uwe
>
> --
> Pengutronix e.K.                           | Uwe Kleine-König            |
> Industrial Linux Solutions                 | http://www.pengutronix.de/  |
>

^ permalink raw reply

* Re: Reviewing batman-adv for net/
From: Henning Rogge @ 2010-06-28  5:32 UTC (permalink / raw)
  To: b.a.t.m.a.n-ZwoEplunGu2X36UT3dwllkB+6BGkLq7r
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, Hagen Paul Pfeifer, Marek Lindner,
	David S. Miller
In-Reply-To: <201006280115.25518.lindner_marek-LWAfsSFWpa4@public.gmane.org>

[-- Attachment #1: Type: Text/Plain, Size: 1518 bytes --]

Am Montag 28 Juni 2010, 01:15:23 schrieb Marek Lindner:
> the OLSR as standardized by the IETF is known to be flawed when used
> outside of a simulator (even the IETF Manet people know this - I spoke
> with one of them). We have assembled a few documents explaining some of
> its weaknesses on our website (www.open-mesh.org) but I suggest you get in
> touch with the folks of www.olsr.org. They can go into the details of why
> they don't follow the RFC.
OLSRv1 lacks the support for routing metrics in the IETF RFC document. Most of 
the research groups began integrating routing metrics later, and OLSRv2 (which 
is worked on at the moment, 2 RFC's done, 1 at IESG, 1 as a draft) will 
contain specified ways to include a routing metric on a link and how to use 
it. We were talking about the generic metric encoding some weeks ago on the 
MANET WG list.

Using hopcount metric (as described in the RFC 3626) will result in a "worst 
link first" strategy in wireless networks, because you always optimize for 
long links which will break often.

It's not difficult to integrate a routing metric into OLSRv1, but in the old 
OLSRv1 RFC (2003) there it's not specified. We (the olsr.org team) use a 
custom message format to add metric information to our hello/tc messages, the 
NRL use a different one for their own metric aware OLSR.

Henning Rogge (olsr.org team)

-- 
1) You can't win.
2) You can't break even.
3) You can't leave the game.
— The Laws of Thermodynamics, summarized

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply

* b44: Reset due to FIFO overflow.
From: James Courtier-Dutton @ 2010-06-28  7:41 UTC (permalink / raw)
  To: netdev

Hi,

Reference:
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/279102

I can see this bug and can reproduce it 100% on demand.
The problem seems to be that when the b44 has a incoming FIFO buffer
overflow, it resets the entire card, dis-associates with the access
point and therefore takes some time before it can pass traffic again.
Can anyone point me to some code that would just recover the FIFO
instead of reset the entire card?

I am a kernel developer, but I don't have any data sheets on this card
so was hoping someone with more knowledge of its workings, could help
me.

I can then test it, and see if it is a good fix or not.

Kind Regards

James

^ permalink raw reply

* Re: Reviewing batman-adv for net/
From: Sven Eckelmann @ 2010-06-28  8:55 UTC (permalink / raw)
  To: b.a.t.m.a.n-ZwoEplunGu2X36UT3dwllkB+6BGkLq7r
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, Hagen Paul Pfeifer,
	b.a.t.m.a.n-ZwoEplunGu2X36UT3dwlltHuzzzSOjJt, David S. Miller
In-Reply-To: <20100627222706.GC8285@nuttenaction>

[-- Attachment #1: Type: Text/Plain, Size: 2409 bytes --]

Hagen Paul Pfeifer wrote:

Thanks for your comments. Marek already answered the first two questions, and 
I will write something about the last part.

[...]
> o The major code is twisted about bookkeeping stuff, configuration aspects
> and so on. The minor codebase is about protocol processing. What about a
> generalized architecture and a userspace implementation of the protocol?
> And communication via netlink sockets.

Even if it sounds interesting to have a userspace implementation - it just 
doesn't work. Maybe you have something different in mind - so please comment a 
little bit more about about your idea.

There were different implementations for that protocol, first one was a 
complete userspace implementation - but it was just slow and not really 
usable. The second one was not using skb because it was only a port of the 
userspace implementation to the kernel. It was faster (better throughput, not 
as much latency added), but still we noticed that we couldn't saturate our 
fast links due to the overhead we added. That was the time the current form of 
processing and forwarding using skbs was implemented. It was quite plain to us 
that those "tiny" parts must be processed as fast as possible and we are not 
able to communicate a lot inside the kernel to get the information we need to 
forward packets or process new ones.

So I would assume that when we communicate with userspace which does some 
processing of the incoming packets and changes them to get forwarded, we would 
have the same overhead problems as before. That means we add unnecessary 
latency and reduce the throughput a lot.

It works quite well for layer 3 mesh protocols (olsr, batman, babel, bmx, ..) 
because they must not care about the actual routing of the packets - 
everything is done by the IP routing code. But it does not work for things 
which must route ethernet frames as there does not exist such a framework and 
it is hard to create one which everyone will like and has enough information 
to provide all features they need. Meshing with ethernet frames is relative 
young (please correct me) and we see all the time that we need more things 
which couldn't be done by layer 3 (or at least with a lot more work). An 
example would be interface interface alternating which can reduce 
interferences when communicating over many hops.

thanks,
	Sven

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: b44: Reset due to FIFO overflow.
From: Eric Dumazet @ 2010-06-28  9:13 UTC (permalink / raw)
  To: James Courtier-Dutton; +Cc: netdev
In-Reply-To: <AANLkTinwnTw3Yqy3POghliZDRmRYUfJL6Gy67GNNLiYv@mail.gmail.com>

Le lundi 28 juin 2010 à 08:41 +0100, James Courtier-Dutton a écrit :
> Hi,
> 
> Reference:
> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/279102
> 
> I can see this bug and can reproduce it 100% on demand.
> The problem seems to be that when the b44 has a incoming FIFO buffer
> overflow, it resets the entire card, dis-associates with the access
> point and therefore takes some time before it can pass traffic again.
> Can anyone point me to some code that would just recover the FIFO
> instead of reset the entire card?
> 
> I am a kernel developer, but I don't have any data sheets on this card
> so was hoping someone with more knowledge of its workings, could help
> me.
> 
> I can then test it, and see if it is a good fix or not.
> 

Hi

Problem is we dont know if a Receive Fifo overflow is a minor or major
indication from b44 chip.

A minor indication would be : Chip tells us one or more frame were lost.
No special action needed from driver.

A major indication (as of current implemented in b44 driver) is :
I am completely out of order and need a reset. Please do it.

Patch to switch from major to minor indication is easy, but we dont know
if its valid or not.

diff --git a/drivers/net/b44.h b/drivers/net/b44.h
index e1905a4..514dc3a 100644
--- a/drivers/net/b44.h
+++ b/drivers/net/b44.h
@@ -42,7 +42,7 @@
 #define  ISTAT_EMAC		0x04000000 /* EMAC Interrupt */
 #define  ISTAT_MII_WRITE	0x08000000 /* MII Write Interrupt */
 #define  ISTAT_MII_READ		0x10000000 /* MII Read Interrupt */
-#define  ISTAT_ERRORS (ISTAT_DSCE|ISTAT_DATAE|ISTAT_DPE|ISTAT_RDU|ISTAT_RFO|ISTAT_TFU)
+#define  ISTAT_ERRORS (ISTAT_DSCE|ISTAT_DATAE|ISTAT_DPE|ISTAT_RDU|ISTAT_TFU)
 #define B44_IMASK	0x0024UL /* Interrupt Mask */
 #define  IMASK_DEF		(ISTAT_ERRORS | ISTAT_TO | ISTAT_RX | ISTAT_TX)
 #define B44_GPTIMER	0x0028UL /* General Purpose Timer */

^ permalink raw reply related

* [PATCH -next] e1000e: fail when try to setup unsupported features
From: Stanislaw Gruszka @ 2010-06-28  9:26 UTC (permalink / raw)
  To: netdev; +Cc: Amerigo Wang, Jeff Kirsher

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
 drivers/net/e1000e/ethtool.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c
index 77c5829..6355a1b 100644
--- a/drivers/net/e1000e/ethtool.c
+++ b/drivers/net/e1000e/ethtool.c
@@ -2051,7 +2051,6 @@ static const struct ethtool_ops e1000_ethtool_ops = {
 	.get_coalesce		= e1000_get_coalesce,
 	.set_coalesce		= e1000_set_coalesce,
 	.get_flags		= ethtool_op_get_flags,
-	.set_flags		= ethtool_op_set_flags,
 };
 
 void e1000e_set_ethtool_ops(struct net_device *netdev)
-- 
1.5.5.6

^ permalink raw reply related

* [PATCH -next] bnx2x: fail when try to setup unsupported features
From: Stanislaw Gruszka @ 2010-06-28  9:28 UTC (permalink / raw)
  To: netdev; +Cc: Amerigo Wang, Eilon Greenstein

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
 drivers/net/bnx2x_main.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index 57ff5b3..0809f6c 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -10982,6 +10982,9 @@ static int bnx2x_set_flags(struct net_device *dev, u32 data)
 	int changed = 0;
 	int rc = 0;
 
+	if (data & ~(ETH_FLAG_LRO | ETH_FLAG_RXHASH))
+		return -EOPNOTSUPP;
+
 	if (bp->recovery_state != BNX2X_RECOVERY_DONE) {
 		printk(KERN_ERR "Handling parity error recovery. Try again later\n");
 		return -EAGAIN;
-- 
1.5.5.6


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox