Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH] checkpatch: Add --strict preference for #defines using BIT(foo)
From: Joe Perches @ 2014-11-09 14:22 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: Andrew Morton, David Miller, netdev, LKML
In-Reply-To: <20141109095025.GA1840@nanopsycho.orion>

On Sun, 2014-11-09 at 10:50 +0100, Jiri Pirko wrote:
> Joe, regarding the other Dave's comment, the multiline one, that is also
> not covered by checkpatch. Would please you take care of that as well?

No, as far as I know, it's not feasible given
insertion/deletion, but you are welcome to try.

^ permalink raw reply

* [patch net-next] bridge: rename fdb_*_hw to fdb_*_hw_addr to avoid confusion
From: Jiri Pirko @ 2014-11-09 16:40 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
	azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
	john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
	linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
	buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
	simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
	gospo, bcrl
In-Reply-To: <1415530280-9190-1-git-send-email-jiri@resnulli.us>

The current name might seem that this actually offloads the fdb entry to
hw. So rename it to clearly present that this for hardware address
addition/removal.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
 net/bridge/br_fdb.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index e02d21b..3886f84 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -91,7 +91,7 @@ static void fdb_rcu_free(struct rcu_head *head)
  * are then updated with the new information.
  * Called under RTNL.
  */
-static void fdb_add_hw(struct net_bridge *br, const unsigned char *addr)
+static void fdb_add_hw_addr(struct net_bridge *br, const unsigned char *addr)
 {
 	int err;
 	struct net_bridge_port *p;
@@ -119,7 +119,7 @@ undo:
  * the ports with needed information.
  * Called under RTNL.
  */
-static void fdb_del_hw(struct net_bridge *br, const unsigned char *addr)
+static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr)
 {
 	struct net_bridge_port *p;
 
@@ -134,7 +134,7 @@ static void fdb_del_hw(struct net_bridge *br, const unsigned char *addr)
 static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
 {
 	if (f->is_static) {
-		fdb_del_hw(br, f->addr.addr);
+		fdb_del_hw_addr(br, f->addr.addr);
 		if (f->dst)
 			netdev_sw_port_fdb_del(f->dst->dev,
 					       f->addr.addr, f->vlan_id);
@@ -519,7 +519,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 		return -ENOMEM;
 
 	fdb->is_local = fdb->is_static = 1;
-	fdb_add_hw(br, addr);
+	fdb_add_hw_addr(br, addr);
 	fdb_notify(br, fdb, RTM_NEWNEIGH);
 	return 0;
 }
@@ -759,21 +759,21 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
 			fdb->is_local = 1;
 			if (!fdb->is_static) {
 				fdb->is_static = 1;
-				fdb_add_hw(br, addr);
+				fdb_add_hw_addr(br, addr);
 				netdev_sw_port_fdb_add(source->dev, addr, vid);
 			}
 		} else if (state & NUD_NOARP) {
 			fdb->is_local = 0;
 			if (!fdb->is_static) {
 				fdb->is_static = 1;
-				fdb_add_hw(br, addr);
+				fdb_add_hw_addr(br, addr);
 				netdev_sw_port_fdb_add(source->dev, addr, vid);
 			}
 		} else {
 			fdb->is_local = 0;
 			if (fdb->is_static) {
 				fdb->is_static = 0;
-				fdb_del_hw(br, addr);
+				fdb_del_hw_addr(br, addr);
 				netdev_sw_port_fdb_del(source->dev, addr, vid);
 			}
 		}
-- 
1.9.3

^ permalink raw reply related

* [PATCH] brcmfmac: unlink URB when request timed out
From: Mathy Vanhoef @ 2014-11-09 18:10 UTC (permalink / raw)
  To: brudley-dY08KVG/lbpWk0Htik3J/w, arend-dY08KVG/lbpWk0Htik3J/w,
	frankyl-dY08KVG/lbpWk0Htik3J/w, meuleman-dY08KVG/lbpWk0Htik3J/w,
	linville-2XuSBdqkA4R54TAoqtyWWQ, pieterpg-dY08KVG/lbpWk0Htik3J/w,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	brcm80211-dev-list-dY08KVG/lbpWk0Htik3J/w,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

From: Mathy Vanhoef <vanhoefm-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

Unlink the submitted URB in brcmf_usb_dl_cmd if the request timed out. This
assures the URB is never submitted twice, preventing a driver crash.

Signed-off-by: Mathy Vanhoef <vanhoefm-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
---
Currently brcmfmac may crash when a USB device is attached (tested with a LG
TWFM-B003D). In particular it fails on the second call to brcmf_usb_dl_cmd in
the while loop of brcmf_usb_resetcfg. The problem is that an URB is being
submitted twice:

[  169.861800] brcmfmac: brcmf_usb_dl_writeimage Enter, fw f14db000, len 348160
[  171.787791] brcmfmac: brcmf_usb_dl_writeimage Exit, err=0
[  171.787797] brcmfmac: brcmf_usb_dlstart Exit, err=0
[  171.787799] brcmfmac: brcmf_usb_dlrun Enter
[  171.791794] brcmfmac: brcmf_usb_resetcfg Enter
[  173.988072] ------------[ cut here ]------------
[  173.988083] WARNING: CPU: 0 PID: 369 at drivers/usb/core/urb.c:339 usb_submit_urb+0x4e6/0x500()
[  173.988085] URB eaf45f00 submitted while active
[  173.988086] Modules linked in: brcmfmac brcmutil vmw_pvscsi pcnet32 mptspi mptscsih mptbase
[  173.988100] CPU: 0 PID: 369 Comm: kworker/0:2 Not tainted 3.18.0-rc3-wl #1
[  173.988102] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013
[  173.988106] Workqueue: events request_firmware_work_func
[  173.988108]  00000000 00000000 ee747db8 c1711f4a ee747df8 ee747de8 c103edaf c18d1e10
[  173.988112]  ee747e14 00000171 c18a8b29 00000153 c1490556 c1490556 eaf45f00 eafdc660
[  173.988115]  f14b8fa0 ee747e00 c103ee4e 00000009 ee747df8 c18d1e10 ee747e14 ee747e50
[  173.988119] Call Trace:
[  173.988129]  [<c1711f4a>] dump_stack+0x41/0x52
[  173.988136]  [<c103edaf>] warn_slowpath_common+0x7f/0xa0
[  173.988139]  [<c1490556>] ? usb_submit_urb+0x4e6/0x500
[  173.988141]  [<c1490556>] ? usb_submit_urb+0x4e6/0x500
[  173.988147]  [<f14b8fa0>] ? brcmf_usb_ioctl_resp_wake+0x40/0x40 [brcmfmac]
[  173.988150]  [<c103ee4e>] warn_slowpath_fmt+0x2e/0x30
[  173.988152]  [<c1490556>] usb_submit_urb+0x4e6/0x500
[  173.988156]  [<c1123de1>] ? __kmalloc+0x21/0x140
[  173.988161]  [<f14b91c3>] ? brcmf_usb_dl_cmd+0x33/0x120 [brcmfmac]
[  173.988166]  [<f14b9243>] brcmf_usb_dl_cmd+0xb3/0x120 [brcmfmac]
[  173.988170]  [<f14ba6c4>] brcmf_usb_probe_phase2+0x4e4/0x640 [brcmfmac]
[  173.988176]  [<f14b4900>] brcmf_fw_request_code_done+0xd0/0xf0 [brcmfmac]
[  173.988178]  [<c1400876>] request_firmware_work_func+0x26/0x50
[  173.988182]  [<c10513ee>] process_one_work+0x11e/0x360
[  173.988184]  [<c1051750>] worker_thread+0xf0/0x3c0
[  173.988205]  [<c106e14a>] ? __wake_up_locked+0x1a/0x20
[  173.988208]  [<c1051660>] ? process_scheduled_works+0x30/0x30
[  173.988211]  [<c1055b56>] kthread+0x96/0xb0
[  173.988214]  [<c1719c81>] ret_from_kernel_thread+0x21/0x30
[  173.988217]  [<c1055ac0>] ? kthread_worker_fn+0x110/0x110
[  173.988219] ---[ end trace 0c88bf46801de083 ]---
[  173.988221] brcmf_usb_dl_cmd: usb_submit_urb failed -16
[  173.988396] brcmfmac: brcmf_usb_probe_phase2 failed: dev=1-1, err=-19
[  173.989503] brcmfmac: brcmf_usb_disconnect Enter

This patch fixes the brcmf_usb_dl_cmd function to prevent an URB from being
submitted twice. Tested using a LG TWFM-B003D, which now works properly.


 drivers/net/wireless/brcm80211/brcmfmac/usb.c |    6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/brcm80211/brcmfmac/usb.c
index 5265aa7..1bc7858 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/usb.c
@@ -738,10 +738,12 @@ static int brcmf_usb_dl_cmd(struct brcmf_usbdev_info *devinfo, u8 cmd,
 		goto finalize;
 	}
 
-	if (!brcmf_usb_ioctl_resp_wait(devinfo))
+	if (!brcmf_usb_ioctl_resp_wait(devinfo)) {
+		usb_unlink_urb(devinfo->ctl_urb);
 		ret = -ETIMEDOUT;
-	else
+	} else {
 		memcpy(buffer, tmpbuf, buflen);
+	}
 
 finalize:
 	kfree(tmpbuf);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: e100: Laptop battery drain and WoL settings from EEPROM
From: Ondrej Zary @ 2014-11-09 18:18 UTC (permalink / raw)
  To: e1000-devel; +Cc: netdev, Kernel development list
In-Reply-To: <201411090015.41446.linux@rainbow-software.org>

On Sunday 09 November 2014 00:15:41 Ondrej Zary wrote:
> Hello,
> there is long-standing problem with battery drain after turning off at
> least some Toshiba laptops, see
> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/110784
>
> I have the same problem with Toshiba Portege R100. When I shut it down in
> Linux, the battery is drained to zero in a couple of days. I noticed that
> the LAN port is still active, even when AC disconnected.
>
> The WoL is enabled by default by e100 driver:
>
> # ethtool eth0
> Settings for eth0:
>         Supported ports: [ TP MII ]
>         Supported link modes:   10baseT/Half 10baseT/Full
>                                 100baseT/Half 100baseT/Full
>         Supported pause frame use: No
>         Supports auto-negotiation: Yes
>         Advertised link modes:  10baseT/Half 10baseT/Full
>                                 100baseT/Half 100baseT/Full
>         Advertised pause frame use: Symmetric
>         Advertised auto-negotiation: Yes
>         Link partner advertised link modes:  10baseT/Half
>         Link partner advertised pause frame use: No
>         Link partner advertised auto-negotiation: No
>         Speed: 10Mb/s
>         Duplex: Half
>         Port: MII
>         PHYAD: 1
>         Transceiver: internal
>         Auto-negotiation: on
>         Supports Wake-on: g
>         Wake-on: g
>         Current message level: 0x00000007 (7)
>                                drv probe link
>         Link detected: yes
>
> By this code:
>         /* Wol magic packet can be enabled from eeprom */
>         if ((nic->mac >= mac_82558_D101_A4) &&
>            (nic->eeprom[eeprom_id] & eeprom_id_wol)) {
>                 nic->flags |= wol_magic;
>                 device_set_wakeup_enable(&pdev->dev, true);
>         }
>
> because the WoL bit is set in EEPROM ID word:
>
> # ethtool -e eth0
> Offset          Values
> ------          ------
> 0x0000:         xx xx xx xx xx xx 03 1b 00 00 01 02 01 47 00 00
> 0x0010:         00 00 00 00 a2 49 01 00 79 11 7f 00 00 00 00 00
>                             ^^ bit 5 here
> 0x0020:         00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 0x0030:         00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 0x0040:         00 00 00 00 00 00 3d 10 00 00 00 00 00 00 00 00
> 0x0050:         00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 0x0060:         e4 00 3f 40 09 41 00 00 00 00 00 00 00 00 00 00
> 0x0070:         00 00 00 00 00 00 00 00 00 00 00 00 00 00 f7 fe
>
> Looks like this laptop is probably WoL-capable even on battery.
>
> Measured the current from AC adapter:
> around 20mA with WoL inactive (shut down from Windows or by power button in
> GRUB) around 40mA with WoL active (shut down from Linux)
>
> So to work-around this problem, users must disable WoL manually on each
> boot.
>
> Maybe the driver should ignore the EEPROM WoL bit on Toshiba susbsystem
> IDs? Or completely, like Windows driver does?

Suggested patch:

diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index 781065e..bd4fe64 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -2949,13 +2949,6 @@ static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		}
 	}
 
-	/* Wol magic packet can be enabled from eeprom */
-	if ((nic->mac >= mac_82558_D101_A4) &&
-	   (nic->eeprom[eeprom_id] & eeprom_id_wol)) {
-		nic->flags |= wol_magic;
-		device_set_wakeup_enable(&pdev->dev, true);
-	}
-
 	/* ack any pending wake events, disable PME */
 	pci_pme_active(pdev, false);
 
-- 
Ondrej Zary

------------------------------------------------------------------------------
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply related

* [RFC PATCH net-next] net: Convert LIMIT_NETDEBUG to net_dbg_ratelimited
From: Joe Perches @ 2014-11-09 19:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel, Remi Denis-Courmont
In-Reply-To: <20141108.204202.2273082249078358608.davem@davemloft.net>

Use the more common dynamic_debug capable net_dbg_ratelimited
and remove the LIMIT_NETDEBUG macro.

This may have some negative impact on messages that were
emitted at KERN_INFO that are not not enabled at all unless
DEBUG is defined or dynamic_debug is enabled.  Even so, 
these messages are now _not_ emitted by default.

This eliminates the use of the net_msg_warn sysctl
"/proc/sys/net/core/warnings".

All messages are still ratelimited.

Some KERN_LEVEL uses are changed to KERN_DEBUG.

Miscellanea:

o Update the sysctl documentation
o Remove the embedded uses of pr_fmt
o Coalesce format fragments
o Realign arguments

Signed-off-by: Joe Perches <joe@perches.com>
---

Let me know if you want this consolidate patch broken up
into multiple patches or any of the messages and the
macro kept.

 Documentation/sysctl/net.txt | 12 ++++++++----
 include/net/sock.h           |  8 +-------
 include/net/udplite.h        |  6 +++---
 net/ipv4/icmp.c              |  8 ++++----
 net/ipv4/inet_fragment.c     |  2 +-
 net/ipv4/ip_fragment.c       |  3 +--
 net/ipv4/tcp_input.c         |  8 ++++----
 net/ipv4/tcp_timer.c         | 18 ++++++++++--------
 net/ipv4/udp.c               | 30 +++++++++++++++---------------
 net/ipv6/addrconf.c          |  6 ++----
 net/ipv6/ah6.c               |  7 +++----
 net/ipv6/datagram.c          |  4 ++--
 net/ipv6/esp6.c              |  4 ++--
 net/ipv6/exthdrs.c           | 18 +++++++++---------
 net/ipv6/icmp.c              | 15 +++++++--------
 net/ipv6/mip6.c              | 11 ++++++-----
 net/ipv6/netfilter.c         |  2 +-
 net/ipv6/udp.c               | 31 +++++++++++++------------------
 net/phonet/af_phonet.c       |  9 +++++----
 net/phonet/pep-gprs.c        |  3 +--
 net/phonet/pep.c             | 12 ++++++------
 21 files changed, 104 insertions(+), 113 deletions(-)

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 04892b8..46cd03d 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -120,10 +120,14 @@ seconds.
 warnings
 --------
 
-This controls console messages from the networking stack that can occur because
-of problems on the network like duplicate address or bad checksums. Normally,
-this should be enabled, but if the problem persists the messages can be
-disabled.
+This sysctl is now unused.
+
+This was used to control console messages from the networking stack that
+occur because of problems on the network like duplicate address or bad
+checksums.
+
+These messages are now emitted at KERN_DEBUG and can generally be enabled
+and controlled by the dynamic_debug facility.
 
 netdev_budget
 -------------
diff --git a/include/net/sock.h b/include/net/sock.h
index 6767d75..db363ad 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2276,13 +2276,7 @@ bool sk_ns_capable(const struct sock *sk,
 bool sk_capable(const struct sock *sk, int cap);
 bool sk_net_capable(const struct sock *sk, int cap);
 
-/*
- *	Enable debug/info messages
- */
-extern int net_msg_warn;
-#define LIMIT_NETDEBUG(fmt, args...) \
-	do { if (net_msg_warn && net_ratelimit()) printk(fmt,##args); } while(0)
-
+extern int net_msg_warn;	/* Unused, but still a sysctl */
 extern __u32 sysctl_wmem_max;
 extern __u32 sysctl_rmem_max;
 
diff --git a/include/net/udplite.h b/include/net/udplite.h
index 2caadab..9a28a51 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -40,7 +40,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
          * checksum. UDP-Lite (like IPv6) mandates checksums, hence packets
          * with a zero checksum field are illegal.                            */
 	if (uh->check == 0) {
-		LIMIT_NETDEBUG(KERN_DEBUG "UDPLite: zeroed checksum field\n");
+		net_dbg_ratelimited("UDPLite: zeroed checksum field\n");
 		return 1;
 	}
 
@@ -52,8 +52,8 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
 		/*
 		 * Coverage length violates RFC 3828: log and discard silently.
 		 */
-		LIMIT_NETDEBUG(KERN_DEBUG "UDPLite: bad csum coverage %d/%d\n",
-			       cscov, skb->len);
+		net_dbg_ratelimited("UDPLite: bad csum coverage %d/%d\n",
+				    cscov, skb->len);
 		return 1;
 
 	} else if (cscov < skb->len) {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 5882f58..36b7bfa 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -784,8 +784,8 @@ static void icmp_unreach(struct sk_buff *skb)
 			 */
 			switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
 			default:
-				LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"),
-					       &iph->daddr);
+				net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
+						    &iph->daddr);
 				break;
 			case 2:
 				goto out;
@@ -798,8 +798,8 @@ static void icmp_unreach(struct sk_buff *skb)
 			}
 			break;
 		case ICMP_SR_FAILED:
-			LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: Source Route Failed\n"),
-				       &iph->daddr);
+			net_dbg_ratelimited("%pI4: Source Route Failed\n",
+					    &iph->daddr);
 			break;
 		default:
 			break;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 19419b6..e792035 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -458,6 +458,6 @@ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
 		". Dropping fragment.\n";
 
 	if (PTR_ERR(q) == -ENOBUFS)
-		LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg);
+		net_dbg_ratelimited("%s%s", prefix, msg);
 }
 EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 4d964da..e5b6d0d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -618,8 +618,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	return 0;
 
 out_nomem:
-	LIMIT_NETDEBUG(KERN_ERR pr_fmt("queue_glue: no memory for gluing queue %p\n"),
-		       qp);
+	net_dbg_ratelimited("queue_glue: no memory for gluing queue %p\n", qp);
 	err = -ENOMEM;
 	goto out_fail;
 out_oversize:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5f979c7..d91436b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5854,12 +5854,12 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
 	struct inet_request_sock *ireq = inet_rsk(req);
 
 	if (family == AF_INET)
-		LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
-			       &ireq->ir_rmt_addr, port);
+		net_dbg_ratelimited("drop open request from %pI4/%u\n",
+				    &ireq->ir_rmt_addr, port);
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (family == AF_INET6)
-		LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI6/%u\n"),
-			       &ireq->ir_v6_rmt_addr, port);
+		net_dbg_ratelimited("drop open request from %pI6/%u\n",
+				    &ireq->ir_v6_rmt_addr, port);
 #endif
 }
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 9b21ae8..1829c7f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -374,17 +374,19 @@ void tcp_retransmit_timer(struct sock *sk)
 		 */
 		struct inet_sock *inet = inet_sk(sk);
 		if (sk->sk_family == AF_INET) {
-			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"),
-				       &inet->inet_daddr,
-				       ntohs(inet->inet_dport), inet->inet_num,
-				       tp->snd_una, tp->snd_nxt);
+			net_dbg_ratelimited("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
+					    &inet->inet_daddr,
+					    ntohs(inet->inet_dport),
+					    inet->inet_num,
+					    tp->snd_una, tp->snd_nxt);
 		}
 #if IS_ENABLED(CONFIG_IPV6)
 		else if (sk->sk_family == AF_INET6) {
-			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"),
-				       &sk->sk_v6_daddr,
-				       ntohs(inet->inet_dport), inet->inet_num,
-				       tp->snd_una, tp->snd_nxt);
+			net_dbg_ratelimited("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
+					    &sk->sk_v6_daddr,
+					    ntohs(inet->inet_dport),
+					    inet->inet_num,
+					    tp->snd_una, tp->snd_nxt);
 		}
 #endif
 		if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5d0fdca..b809f81 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1051,7 +1051,7 @@ back_from_confirm:
 		/* ... which is an evident application bug. --ANK */
 		release_sock(sk);
 
-		LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("cork app bug 2\n"));
+		net_dbg_ratelimited("cork app bug 2\n");
 		err = -EINVAL;
 		goto out;
 	}
@@ -1133,7 +1133,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
 	if (unlikely(!up->pending)) {
 		release_sock(sk);
 
-		LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("udp cork app bug 3\n"));
+		net_dbg_ratelimited("udp cork app bug 3\n");
 		return -EINVAL;
 	}
 
@@ -1546,8 +1546,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		 * provided by the application."
 		 */
 		if (up->pcrlen == 0) {          /* full coverage was set  */
-			LIMIT_NETDEBUG(KERN_WARNING "UDPLite: partial coverage %d while full coverage %d requested\n",
-				       UDP_SKB_CB(skb)->cscov, skb->len);
+			net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n",
+					    UDP_SKB_CB(skb)->cscov, skb->len);
 			goto drop;
 		}
 		/* The next case involves violating the min. coverage requested
@@ -1557,8 +1557,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		 * Therefore the above ...()->partial_cov statement is essential.
 		 */
 		if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
-			LIMIT_NETDEBUG(KERN_WARNING "UDPLite: coverage %d too small, need min %d\n",
-				       UDP_SKB_CB(skb)->cscov, up->pcrlen);
+			net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n",
+					    UDP_SKB_CB(skb)->cscov, up->pcrlen);
 			goto drop;
 		}
 	}
@@ -1827,11 +1827,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	return 0;
 
 short_packet:
-	LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
-		       proto == IPPROTO_UDPLITE ? "Lite" : "",
-		       &saddr, ntohs(uh->source),
-		       ulen, skb->len,
-		       &daddr, ntohs(uh->dest));
+	net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
+			    proto == IPPROTO_UDPLITE ? "Lite" : "",
+			    &saddr, ntohs(uh->source),
+			    ulen, skb->len,
+			    &daddr, ntohs(uh->dest));
 	goto drop;
 
 csum_error:
@@ -1839,10 +1839,10 @@ csum_error:
 	 * RFC1122: OK.  Discards the bad packet silently (as far as
 	 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
 	 */
-	LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
-		       proto == IPPROTO_UDPLITE ? "Lite" : "",
-		       &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
-		       ulen);
+	net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
+			    proto == IPPROTO_UDPLITE ? "Lite" : "",
+			    &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
+			    ulen);
 	UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
 drop:
 	UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 06e8978..251fcb4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1411,10 +1411,8 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
 
 			if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
 				     score->addr_type & IPV6_ADDR_MULTICAST)) {
-				LIMIT_NETDEBUG(KERN_DEBUG
-					       "ADDRCONF: unspecified / multicast address "
-					       "assigned as unicast address on %s",
-					       dev->name);
+				net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s",
+						    dev->name);
 				continue;
 			}
 
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 6d16eb0..8ab1989 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -272,10 +272,9 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
 				ipv6_rearrange_destopt(iph, exthdr.opth);
 		case NEXTHDR_HOP:
 			if (!zero_out_mutable_opts(exthdr.opth)) {
-				LIMIT_NETDEBUG(
-					KERN_WARNING "overrun %sopts\n",
-					nexthdr == NEXTHDR_HOP ?
-						"hop" : "dest");
+				net_dbg_ratelimited("overrun %sopts\n",
+						    nexthdr == NEXTHDR_HOP ?
+						    "hop" : "dest");
 				return -EINVAL;
 			}
 			break;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 5c6996e..cc11396 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -893,8 +893,8 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 			break;
 		    }
 		default:
-			LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n",
-				       cmsg->cmsg_type);
+			net_dbg_ratelimited("invalid cmsg type: %d\n",
+					    cmsg->cmsg_type);
 			err = -EINVAL;
 			goto exit_f;
 		}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index d21d7b2..d2c2d74 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -286,8 +286,8 @@ static int esp_input_done2(struct sk_buff *skb, int err)
 	err = -EINVAL;
 	padlen = nexthdr[0];
 	if (padlen + 2 + alen >= elen) {
-		LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage "
-			       "padlen=%d, elen=%d\n", padlen + 2, elen - alen);
+		net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n",
+				    padlen + 2, elen - alen);
 		goto out;
 	}
 
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 601d896..a7bbbe4 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -184,7 +184,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
 	int ret;
 
 	if (opt->dsthao) {
-		LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n");
+		net_dbg_ratelimited("hao duplicated\n");
 		goto discard;
 	}
 	opt->dsthao = opt->dst1;
@@ -193,14 +193,14 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
 	hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);
 
 	if (hao->length != 16) {
-		LIMIT_NETDEBUG(
-			KERN_DEBUG "hao invalid option length = %d\n", hao->length);
+		net_dbg_ratelimited("hao invalid option length = %d\n",
+				    hao->length);
 		goto discard;
 	}
 
 	if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) {
-		LIMIT_NETDEBUG(
-			KERN_DEBUG "hao is not an unicast addr: %pI6\n", &hao->addr);
+		net_dbg_ratelimited("hao is not an unicast addr: %pI6\n",
+				    &hao->addr);
 		goto discard;
 	}
 
@@ -551,8 +551,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
 		memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra));
 		return true;
 	}
-	LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
-		       nh[optoff + 1]);
+	net_dbg_ratelimited("ipv6_hop_ra: wrong RA length %d\n",
+			    nh[optoff + 1]);
 	kfree_skb(skb);
 	return false;
 }
@@ -566,8 +566,8 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
 	u32 pkt_len;
 
 	if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
-		LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
-			       nh[optoff+1]);
+		net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
+				    nh[optoff+1]);
 		IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
 				 IPSTATS_MIB_INHDRERRORS);
 		goto drop;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 62c1037..0929340 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -338,7 +338,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
 	 * anycast.
 	 */
 	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
-		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");
+		net_dbg_ratelimited("icmp6_send: acast source\n");
 		dst_release(dst);
 		return ERR_PTR(-EINVAL);
 	}
@@ -452,7 +452,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	 *	and anycast addresses will be checked later.
 	 */
 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
-		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");
+		net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n");
 		return;
 	}
 
@@ -460,7 +460,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	 *	Never answer to a ICMP packet.
 	 */
 	if (is_ineligible(skb)) {
-		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");
+		net_dbg_ratelimited("icmp6_send: no reply to icmp error\n");
 		return;
 	}
 
@@ -509,7 +509,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	len = skb->len - msg.offset;
 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
 	if (len < 0) {
-		LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
+		net_dbg_ratelimited("icmp: len problem\n");
 		goto out_dst_release;
 	}
 
@@ -706,9 +706,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
 	daddr = &ipv6_hdr(skb)->daddr;
 
 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
-		LIMIT_NETDEBUG(KERN_DEBUG
-			       "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
-			       saddr, daddr);
+		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
+				    saddr, daddr);
 		goto csum_error;
 	}
 
@@ -781,7 +780,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 		if (type & ICMPV6_INFOMSG_MASK)
 			break;
 
-		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
+		net_dbg_ratelimited("icmpv6: msg of unknown type\n");
 
 		/*
 		 * error of unknown type.
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index f61429d..b9779d4 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -97,16 +97,17 @@ static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
 		return -1;
 
 	if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
-		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
-			       mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
+		net_dbg_ratelimited("mip6: MH message too short: %d vs >=%d\n",
+				    mh->ip6mh_hdrlen,
+				    mip6_mh_len(mh->ip6mh_type));
 		mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) +
 				skb_network_header_len(skb));
 		return -1;
 	}
 
 	if (mh->ip6mh_proto != IPPROTO_NONE) {
-		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
-			       mh->ip6mh_proto);
+		net_dbg_ratelimited("mip6: MH invalid payload proto = %d\n",
+				    mh->ip6mh_proto);
 		mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) +
 				skb_network_header_len(skb));
 		return -1;
@@ -288,7 +289,7 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
 			 * XXX: packet if HAO exists.
 			 */
 			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
-				LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n");
+				net_dbg_ratelimited("mip6: hao exists already, override\n");
 				return offset;
 			}
 
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d38e6a8..398377a 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -36,7 +36,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
 	err = dst->error;
 	if (err) {
 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
-		LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
+		net_dbg_ratelimited("ip6_route_me_harder: No more route\n");
 		dst_release(dst);
 		return err;
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b756355..db3652c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -659,15 +659,13 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if ((is_udplite & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
 
 		if (up->pcrlen == 0) {          /* full coverage was set  */
-			LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage"
-				" %d while full coverage %d requested\n",
-				UDP_SKB_CB(skb)->cscov, skb->len);
+			net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n",
+					    UDP_SKB_CB(skb)->cscov, skb->len);
 			goto drop;
 		}
 		if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
-			LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d "
-						    "too small, need min %d\n",
-				       UDP_SKB_CB(skb)->cscov, up->pcrlen);
+			net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n",
+					    UDP_SKB_CB(skb)->cscov, up->pcrlen);
 			goto drop;
 		}
 	}
@@ -760,9 +758,9 @@ static void udp6_csum_zero_error(struct sk_buff *skb)
 	/* RFC 2460 section 8.1 says that we SHOULD log
 	 * this error. Well, it is reasonable.
 	 */
-	LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
-		       &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
-		       &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
+	net_dbg_ratelimited("IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
+			    &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
+			    &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
 }
 
 /*
@@ -930,14 +928,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	return 0;
 
 short_packet:
-	LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
-		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
-		       saddr,
-		       ntohs(uh->source),
-		       ulen,
-		       skb->len,
-		       daddr,
-		       ntohs(uh->dest));
+	net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
+			    proto == IPPROTO_UDPLITE ? "-Lite" : "",
+			    saddr, ntohs(uh->source),
+			    ulen, skb->len,
+			    daddr, ntohs(uh->dest));
 	goto discard;
 csum_error:
 	UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
@@ -1289,7 +1284,7 @@ back_from_confirm:
 		/* ... which is an evident application bug. --ANK */
 		release_sock(sk);
 
-		LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
+		net_dbg_ratelimited("udp cork app bug 2\n");
 		err = -EINVAL;
 		goto out;
 	}
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 5a940db..32ab87d 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -426,16 +426,17 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
 
 		out_dev = phonet_route_output(net, pn_sockaddr_get_addr(&sa));
 		if (!out_dev) {
-			LIMIT_NETDEBUG(KERN_WARNING"No Phonet route to %02X\n",
-					pn_sockaddr_get_addr(&sa));
+			net_dbg_ratelimited("No Phonet route to %02X\n",
+					    pn_sockaddr_get_addr(&sa));
 			goto out;
 		}
 
 		__skb_push(skb, sizeof(struct phonethdr));
 		skb->dev = out_dev;
 		if (out_dev == dev) {
-			LIMIT_NETDEBUG(KERN_ERR"Phonet loop to %02X on %s\n",
-					pn_sockaddr_get_addr(&sa), dev->name);
+			net_dbg_ratelimited("Phonet loop to %02X on %s\n",
+					    pn_sockaddr_get_addr(&sa),
+					    dev->name);
 			goto out_dev;
 		}
 		/* Some drivers (e.g. TUN) do not allocate HW header space */
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index e9a83a6..fa8237f 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -203,8 +203,7 @@ static netdev_tx_t gprs_xmit(struct sk_buff *skb, struct net_device *dev)
 	len = skb->len;
 	err = pep_write(sk, skb);
 	if (err) {
-		LIMIT_NETDEBUG(KERN_WARNING"%s: TX error (%d)\n",
-				dev->name, err);
+		net_dbg_ratelimited("%s: TX error (%d)\n", dev->name, err);
 		dev->stats.tx_aborted_errors++;
 		dev->stats.tx_errors++;
 	} else {
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 44b2123..9cd069d 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -272,8 +272,8 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
 
 	hdr = pnp_hdr(skb);
 	if (hdr->data[0] != PN_PEP_TYPE_COMMON) {
-		LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP type: %u\n",
-				(unsigned int)hdr->data[0]);
+		net_dbg_ratelimited("Phonet unknown PEP type: %u\n",
+				    (unsigned int)hdr->data[0]);
 		return -EOPNOTSUPP;
 	}
 
@@ -304,8 +304,8 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
 		break;
 
 	default:
-		LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP indication: %u\n",
-				(unsigned int)hdr->data[1]);
+		net_dbg_ratelimited("Phonet unknown PEP indication: %u\n",
+				    (unsigned int)hdr->data[1]);
 		return -EOPNOTSUPP;
 	}
 	if (wake)
@@ -451,8 +451,8 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 		break;
 
 	default:
-		LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP message: %u\n",
-				hdr->message_id);
+		net_dbg_ratelimited("Phonet unknown PEP message: %u\n",
+				    hdr->message_id);
 		err = -EINVAL;
 	}
 out:

^ permalink raw reply related

* BUG ? - natsemi: Allow users to disable workaround for DspCfg reset
From: devzero @ 2014-11-09 19:44 UTC (permalink / raw)
  To: netdev; +Cc: Jeff Garzik, Mark Brown

Hi, 

i wanted to get rid of some heavy natsemi driver dmesg spamming like

Apr  1 11:50:06 debian7 kernel: [ 4400.311000] eth0: possible phy reset: re-initializing
Apr  1 11:50:06 debian7 kernel: [ 4400.311319] eth0: DSPCFG accepted after 0 usec.
Apr  1 11:50:06 debian7 kernel: [ 4400.311433] eth0: Wake-up event 0x80000b
Apr  1 11:50:06 debian7 kernel: [ 4400.311537] eth0: Setting full-duplex based on negotiated link capability.

and was happy to find a module param - but i`m confused about dspcfg_workaround parameter usage. 

i can indeed disable the spamming by setting natsemi.dspcfg_workaround=0 at boot.

but should`t /sys/devices/pci0000:00/0000:00:0f.0/dspcfg_workaround  reflect the current state then and shouldn`t i be able to set this also via echo 0|1 or off|on >..... at runtime ? that does not seem to work. it always shows "off" and i cannot change the behaviour via this file.

regards
roland

references:
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=1a14780960888c97371a9918f42c4dbe6957efb4

ps:
jeff/mark - pardon, i sent the mail a second time because of mistyping mailinglist-adress due to marc.info list naming... 

^ permalink raw reply

* Re: [PATCH 1/4] inet: Add skb_copy_datagram_iter
From: Al Viro @ 2014-11-09 21:19 UTC (permalink / raw)
  To: David Miller; +Cc: herbert, netdev, linux-kernel, bcrl, Michael S. Tsirkin
In-Reply-To: <20141107234253.GE7996@ZenIV.linux.org.uk>

[Michael Cc'd]

On Fri, Nov 07, 2014 at 11:42:53PM +0000, Al Viro wrote:

> I'll finish RTFS drivers/vhost and if it turns out to be OK I'll post the
> series moving those checks to the moment of copying iovec from userland,
> so that kernel-side we could always rely on ->msg_iov elements having been
> verified.

Two questions:
1) does sparc64 access_ok() need to differ for 32bit and 64bit tasks?
AFAICS, x86 and ppc just check that address is OK for 64bit process -
if a 32bit process passes the kernel an address that would be valid
for 64bit process, but not for 32bit one, we just get a pagefault in
__copy_from_user() and friends.  No kernel objects are going to have
a virtual address in that range, so access_ok() doesn't bother preventing
such access attempts there...

2) shouldn't vhost_dev_cleanup() stop the worker thread before doing anything
else?  AFAICS, we do parts of vhost_dev teardown while the thread is
still running; granted, we keep dev->mm pinned down until after it stops
(or we would be _really_ screwed), but is it safe to do all those fput()s, etc.
while it's still running?  Michael?

^ permalink raw reply

* Re: e100: Laptop battery drain and WoL settings from EEPROM
From: Francois Romieu @ 2014-11-09 23:34 UTC (permalink / raw)
  To: Ondrej Zary; +Cc: e1000-devel, netdev, Kernel development list
In-Reply-To: <201411091918.04036.linux@rainbow-software.org>

Ondrej Zary <linux@rainbow-software.org> :
[...]
> > Looks like this laptop is probably WoL-capable even on battery.
> >
> > Measured the current from AC adapter:
> > around 20mA with WoL inactive (shut down from Windows or by power button in
> > GRUB) around 40mA with WoL active (shut down from Linux)
> >
> > So to work-around this problem, users must disable WoL manually on each
> > boot.

Or configure udev so that ethtool disables WoL.

The current policy has been here for ages. Some users may rely on it.

There must be some strong rationale for their setup to have to be changed.

-- 
Ueimor

^ permalink raw reply

* [PATCH net-next] dsa: Use netdev_<level> instead of printk
From: Joe Perches @ 2014-11-10  0:32 UTC (permalink / raw)
  To: netdev; +Cc: LKML, Florian Fainelli

Neaten and standardize the logging output.

Other miscellanea:

o Use pr_notice_once instead of a guard flag.
o Convert existing pr_<level> uses too.

Signed-off-by: Joe Perches <joe@perches.com>
---
 net/dsa/dsa.c   | 28 ++++++++++++----------------
 net/dsa/slave.c | 10 +++++-----
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index dd646a8..4648f12 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -192,12 +192,12 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 	 */
 	drv = dsa_switch_probe(host_dev, pd->sw_addr, &name);
 	if (drv == NULL) {
-		printk(KERN_ERR "%s[%d]: could not detect attached switch\n",
-		       dst->master_netdev->name, index);
+		netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
+			   index);
 		return ERR_PTR(-EINVAL);
 	}
-	printk(KERN_INFO "%s[%d]: detected a %s switch\n",
-		dst->master_netdev->name, index, name);
+	netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n",
+		    index, name);
 
 
 	/*
@@ -225,7 +225,8 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 
 		if (!strcmp(name, "cpu")) {
 			if (dst->cpu_switch != -1) {
-				printk(KERN_ERR "multiple cpu ports?!\n");
+				netdev_err(dst->master_netdev,
+					   "multiple cpu ports?!\n");
 				ret = -EINVAL;
 				goto out;
 			}
@@ -320,10 +321,8 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 
 		slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]);
 		if (slave_dev == NULL) {
-			printk(KERN_ERR "%s[%d]: can't create dsa "
-			       "slave device for port %d(%s)\n",
-			       dst->master_netdev->name,
-			       index, i, pd->port_names[i]);
+			netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s)\n",
+				   index, i, pd->port_names[i]);
 			continue;
 		}
 
@@ -701,15 +700,13 @@ static inline void dsa_of_remove(struct platform_device *pdev)
 
 static int dsa_probe(struct platform_device *pdev)
 {
-	static int dsa_version_printed;
 	struct dsa_platform_data *pd = pdev->dev.platform_data;
 	struct net_device *dev;
 	struct dsa_switch_tree *dst;
 	int i, ret;
 
-	if (!dsa_version_printed++)
-		printk(KERN_NOTICE "Distributed Switch Architecture "
-			"driver version %s\n", dsa_driver_version);
+	pr_notice_once("Distributed Switch Architecture driver version %s\n",
+		       dsa_driver_version);
 
 	if (pdev->dev.of_node) {
 		ret = dsa_of_probe(pdev);
@@ -753,9 +750,8 @@ static int dsa_probe(struct platform_device *pdev)
 
 		ds = dsa_switch_setup(dst, i, &pdev->dev, pd->chip[i].host_dev);
 		if (IS_ERR(ds)) {
-			printk(KERN_ERR "%s[%d]: couldn't create dsa switch "
-				"instance (error %ld)\n", dev->name, i,
-				PTR_ERR(ds));
+			netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n",
+				   i, PTR_ERR(ds));
 			continue;
 		}
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 0ea466d..528380a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -532,7 +532,7 @@ static void dsa_slave_phy_setup(struct dsa_slave_priv *p,
 		 */
 		ret = of_phy_register_fixed_link(port_dn);
 		if (ret) {
-			pr_err("failed to register fixed PHY\n");
+			netdev_err(slave_dev, "failed to register fixed PHY\n");
 			return;
 		}
 		phy_is_fixed = true;
@@ -558,8 +558,8 @@ static void dsa_slave_phy_setup(struct dsa_slave_priv *p,
 		phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
 				   p->phy_interface);
 	} else {
-		pr_info("attached PHY at address %d [%s]\n",
-			p->phy->addr, p->phy->drv->name);
+		netdev_info(slave_dev, "attached PHY at address %d [%s]\n",
+			    p->phy->addr, p->phy->drv->name);
 	}
 }
 
@@ -657,8 +657,8 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 
 	ret = register_netdev(slave_dev);
 	if (ret) {
-		printk(KERN_ERR "%s: error %d registering interface %s\n",
-				master->name, ret, slave_dev->name);
+		netdev_err(master, "error %d registering interface %s\n",
+			   ret, slave_dev->name);
 		free_netdev(slave_dev);
 		return NULL;
 	}

^ permalink raw reply related

* Re: Face some error after applying commit 7dfa4b414d4(net/mlx4_en: Code cleanups in tx path)
From: Wei Yang @ 2014-11-10  1:59 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Wei Yang, Amir Vadai, David Miller, netdev
In-Reply-To: <CANn89i+ekXHJSePzQ0rWx2KKqwYGTrok3-ZZ1RdEygVJcGDqRQ@mail.gmail.com>

On Fri, Nov 07, 2014 at 07:38:15PM -0800, Eric Dumazet wrote:
>On Fri, Nov 7, 2014 at 6:57 PM, Wei Yang <weiyang@linux.vnet.ibm.com> wrote:
>> Eric and Amir
>>
>> I am testing the VF on PowerNV platform with 3.18-rc2.
>> After applying this patch I face some errors.
>>
>> First is the compiling error.
>>
>>     drivers/net/ethernet/mellanox/mlx4//en_tx.c: In function ‘mlx4_en_xmit’:
>>     drivers/net/ethernet/mellanox/mlx4//en_tx.c:802:8: error: ‘shinfo’ undeclared (first use in this function)
>>             shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
>>             ^
>>     include/linux/compiler.h:160:42: note: in definition of macro ‘unlikely’
>>      # define unlikely(x) __builtin_expect(!!(x), 0)
>>                                               ^
>>     drivers/net/ethernet/mellanox/mlx4//en_tx.c:802:8: note: each undeclared identifier is reported only once for each function it appears in
>>             shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
>>             ^
>>     include/linux/compiler.h:160:42: note: in definition of macro ‘unlikely’
>>      # define unlikely(x) __builtin_expect(!!(x), 0)
>>                                               ^
>>     make[1]: *** [drivers/net/ethernet/mellanox/mlx4//en_tx.o] Error 1
>>     make: *** [_module_drivers/net/ethernet/mellanox/mlx4/] Error 2
>>
>
>
>This compilation error seems strange.
>
>Are you sure your tree is pristine, not corrupted in any way ?

I believe I did the revert one by one with git revert.

>
>
>> I tried to fix this with following change:
>>
>>     [root@tian-lp1 3.18]# git diff
>>     diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/m
>>     index eaf23eb..d2f06a7 100644
>>     --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>>     +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>>     @@ -799,8 +799,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_dev
>>              * set flag for further reference
>>              */
>>             if (unlikely(ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
>>     -                    shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
>>     -               shinfo->tx_flags |= SKBTX_IN_PROGRESS;
>>     +                    skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
>>     +               skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
>>                     tx_info->ts_requested = 1;
>>             }
>>
>> But seems to face another error.
>>
>
>I suspect your tree is not the official tree, I do not see how you got
>this compilation error.


I checked the upstream git tree again, and find this commit:

https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7dfa4b414d4eec8da56e44fb2b4aea3e549b092f


And I want to say the shinfo local variable is introduced in commit:

https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=b9d8839a44092cb4268ef2813c34d5dbf3363603

And in my log tree, also checked the upstream, this one is applyed after the
first one. And the compiling error will disappear untill I apply this one.

So this compiling issue can't reproduced at your side? You have reset --hard
to the "Code cleanup" one, and can't see the error? That is strange.

-- 
Richard Yang
Help you, Help me

^ permalink raw reply

* Re: Face some error after applying commit 7dfa4b414d4(net/mlx4_en: Code cleanups in tx path)
From: Wei Yang @ 2014-11-10  2:07 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Amir Vadai, David Miller, netdev
In-Reply-To: <20141110015933.GB6294@richard>

This is the git output in on my machine.

[ywywyang@tian-lp1 3.18]$ git status 
# On branch p8-sriov-3.18-rc2-mlx4
# Your branch is behind 'origin/p8-sriov-3.18-rc2-mlx4' by 14 commits, and can be fast-forwarded.
#   (use "git pull" to update your local branch)
#
nothing to commit, working directory clean
[ywywyang@tian-lp1 3.18]$ git oneline -40
40c4198 Revert "net/mlx4_en: Align tx path structures to cache lines"
7d071a0 Revert "net/mlx4_en: Avoid calling bswap in tx fast path"
5aa717e Revert "net/mlx4_en: tx_info allocated with kmalloc() instead of vmalloc
77ab7f7 Revert "net/mlx4_en: Avoid a cache line miss in TX completion for single
6fee4f6 Revert "net/mlx4_en: Use prefetch in tx path"
6509cd2 Revert "net/mlx4_en: Avoid false sharing in mlx4_en_en_process_tx_cq()"
50b7df1 Revert "net/mlx4_en: mlx4_en_xmit() reads ring->cons once, and ahead of 
a0cc8e8 Revert "net/mlx4_en: Use local var in tx flow for skb_shinfo(skb)"
8cc9b1d Revert "net/mlx4_en: Use local var for skb_headlen(skb)"
2894ad1 Revert "net/mlx4_en: tx_info->ts_requested was not cleared"
10f191f Revert "net/mlx4_en: Enable the compiler to make is_inline() inlined"
5b6e300 Revert "net/mlx4_en: Use the new tx_copybreak to set inline threshold"
277c194 Revert "net/mlx4_en: remove NETDEV_TX_BUSY"
3f626de Revert "net: add netdev_txq_bql_{enqueue, complete}_prefetchw() helpers"
c1d0c02 Revert "mlx4: fix race accessing page->_count"
cac7f24 Linux 3.18-rc2

You could see current tree is clean and based on v3.18-rc2.

On Mon, Nov 10, 2014 at 09:59:33AM +0800, Wei Yang wrote:
>On Fri, Nov 07, 2014 at 07:38:15PM -0800, Eric Dumazet wrote:
>>On Fri, Nov 7, 2014 at 6:57 PM, Wei Yang <weiyang@linux.vnet.ibm.com> wrote:
>>> Eric and Amir
>>>
>>> I am testing the VF on PowerNV platform with 3.18-rc2.
>>> After applying this patch I face some errors.
>>>
>>> First is the compiling error.
>>>
>>>     drivers/net/ethernet/mellanox/mlx4//en_tx.c: In function ‘mlx4_en_xmit’:
>>>     drivers/net/ethernet/mellanox/mlx4//en_tx.c:802:8: error: ‘shinfo’ undeclared (first use in this function)
>>>             shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
>>>             ^
>>>     include/linux/compiler.h:160:42: note: in definition of macro ‘unlikely’
>>>      # define unlikely(x) __builtin_expect(!!(x), 0)
>>>                                               ^
>>>     drivers/net/ethernet/mellanox/mlx4//en_tx.c:802:8: note: each undeclared identifier is reported only once for each function it appears in
>>>             shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
>>>             ^
>>>     include/linux/compiler.h:160:42: note: in definition of macro ‘unlikely’
>>>      # define unlikely(x) __builtin_expect(!!(x), 0)
>>>                                               ^
>>>     make[1]: *** [drivers/net/ethernet/mellanox/mlx4//en_tx.o] Error 1
>>>     make: *** [_module_drivers/net/ethernet/mellanox/mlx4/] Error 2
>>>
>>
>>
>>This compilation error seems strange.
>>
>>Are you sure your tree is pristine, not corrupted in any way ?
>
>I believe I did the revert one by one with git revert.
>
>>
>>
>>> I tried to fix this with following change:
>>>
>>>     [root@tian-lp1 3.18]# git diff
>>>     diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/m
>>>     index eaf23eb..d2f06a7 100644
>>>     --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>>>     +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
>>>     @@ -799,8 +799,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_dev
>>>              * set flag for further reference
>>>              */
>>>             if (unlikely(ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
>>>     -                    shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
>>>     -               shinfo->tx_flags |= SKBTX_IN_PROGRESS;
>>>     +                    skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
>>>     +               skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
>>>                     tx_info->ts_requested = 1;
>>>             }
>>>
>>> But seems to face another error.
>>>
>>
>>I suspect your tree is not the official tree, I do not see how you got
>>this compilation error.
>
>
>I checked the upstream git tree again, and find this commit:
>
>https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7dfa4b414d4eec8da56e44fb2b4aea3e549b092f
>
>
>And I want to say the shinfo local variable is introduced in commit:
>
>https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=b9d8839a44092cb4268ef2813c34d5dbf3363603
>
>And in my log tree, also checked the upstream, this one is applyed after the
>first one. And the compiling error will disappear untill I apply this one.
>
>So this compiling issue can't reproduced at your side? You have reset --hard
>to the "Code cleanup" one, and can't see the error? That is strange.
>
>-- 
>Richard Yang
>Help you, Help me

-- 
Richard Yang
Help you, Help me

^ permalink raw reply

* Re: Face some error after applying commit 7dfa4b414d4(net/mlx4_en: Code cleanups in tx path)
From: Eric Dumazet @ 2014-11-10  2:46 UTC (permalink / raw)
  To: Wei Yang; +Cc: Eric Dumazet, Amir Vadai, David Miller, netdev
In-Reply-To: <20141110015933.GB6294@richard>

On Mon, 2014-11-10 at 09:59 +0800, Wei Yang wrote:
> On Fri, Nov 07, 2014 at 07:38:15PM -0800, Eric Dumazet wrote:
> >On Fri, Nov 7, 2014 at 6:57 PM, Wei Yang <weiyang@linux.vnet.ibm.com> wrote:
> >> Eric and Amir
> >>
> >> I am testing the VF on PowerNV platform with 3.18-rc2.
> >> After applying this patch I face some errors.
> >>
> >> First is the compiling error.
> >>
> >>     drivers/net/ethernet/mellanox/mlx4//en_tx.c: In function ‘mlx4_en_xmit’:
> >>     drivers/net/ethernet/mellanox/mlx4//en_tx.c:802:8: error: ‘shinfo’ undeclared (first use in this function)
> >>             shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
> >>             ^
> >>     include/linux/compiler.h:160:42: note: in definition of macro ‘unlikely’
> >>      # define unlikely(x) __builtin_expect(!!(x), 0)
> >>                                               ^
> >>     drivers/net/ethernet/mellanox/mlx4//en_tx.c:802:8: note: each undeclared identifier is reported only once for each function it appears in
> >>             shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
> >>             ^
> >>     include/linux/compiler.h:160:42: note: in definition of macro ‘unlikely’
> >>      # define unlikely(x) __builtin_expect(!!(x), 0)
> >>                                               ^
> >>     make[1]: *** [drivers/net/ethernet/mellanox/mlx4//en_tx.o] Error 1
> >>     make: *** [_module_drivers/net/ethernet/mellanox/mlx4/] Error 2
> >>
> >
> >
> >This compilation error seems strange.
> >
> >Are you sure your tree is pristine, not corrupted in any way ?
> 
> I believe I did the revert one by one with git revert.
> 
> >
> >
> >> I tried to fix this with following change:
> >>
> >>     [root@tian-lp1 3.18]# git diff
> >>     diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/m
> >>     index eaf23eb..d2f06a7 100644
> >>     --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> >>     +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> >>     @@ -799,8 +799,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_dev
> >>              * set flag for further reference
> >>              */
> >>             if (unlikely(ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
> >>     -                    shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
> >>     -               shinfo->tx_flags |= SKBTX_IN_PROGRESS;
> >>     +                    skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
> >>     +               skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
> >>                     tx_info->ts_requested = 1;
> >>             }
> >>
> >> But seems to face another error.
> >>
> >
> >I suspect your tree is not the official tree, I do not see how you got
> >this compilation error.
> 
> 
> I checked the upstream git tree again, and find this commit:
> 
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7dfa4b414d4eec8da56e44fb2b4aea3e549b092f
> 
> 
> And I want to say the shinfo local variable is introduced in commit:
> 
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=b9d8839a44092cb4268ef2813c34d5dbf3363603
> 
> And in my log tree, also checked the upstream, this one is applyed after the
> first one. And the compiling error will disappear untill I apply this one.
> 
> So this compiling issue can't reproduced at your side? You have reset --hard
> to the "Code cleanup" one, and can't see the error? That is strange.
> 

Okay, your message was not clear : I thought you had a compilation error
on current tree.

The true story of these patches is that Mellanox split an initial big
chunk [1] I gave into multiple patches.

Maybe they missed that one patch did not actually compile.

[1] https://patchwork.ozlabs.org/patch/394256/

Now, it is done, there is nothing we can do.

I'll let Mellanox comment, but it looks like your hardware does not like
something.

Have you tried to disable Blue Frame ?

^ permalink raw reply

* linux-next: manual merge of the tiny tree with the net-next tree
From: Stephen Rothwell @ 2014-11-10  3:25 UTC (permalink / raw)
  To: Josh Triplett, David Miller, netdev
  Cc: linux-next, linux-kernel, Hannes Frederic Sowa, Catalina Mocanu

[-- Attachment #1: Type: text/plain, Size: 1456 bytes --]

Hi Josh,

Today's linux-next merge of the tiny tree got a conflict in
lib/Makefile between commit e5a2c8999576 ("fast_hash: avoid indirect
function calls") from the net-next tree and commit 4ecea0db79ef ("lib:
rhashtable: Make rhashtable.c optional") from the tiny tree.

I fixed it up (see below) and can carry the fix as necessary (no action
is required).

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

diff --cc lib/Makefile
index 04e53dd16070,47b8305288e2..000000000000
--- a/lib/Makefile
+++ b/lib/Makefile
@@@ -22,11 -22,14 +22,14 @@@ lib-$(CONFIG_SMP) += cpumask.
  lib-y	+= kobject.o klist.o
  obj-y	+= lockref.o
  
- obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
+ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
  	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
- 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
+ 	 gcd.o lcm.o list_sort.o uuid.o iovec.o clz_ctz.o \
  	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
- 	 percpu-refcount.o percpu_ida.o rhashtable.o
 -	 percpu-refcount.o percpu_ida.o hash.o
++	 percpu-refcount.o percpu_ida.o
+ obj-$(CONFIG_FLEX_ARRAY) += flex_array.o
+ obj-$(CONFIG_HALFMD4) += halfmd4.o
+ obj-$(CONFIG_RHASHTABLE) += rhashtable.o
  obj-y += string_helpers.o
  obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
  obj-y += kstrtox.o

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* RE: [PATCH net-next 2/2] r8152: adjust rtl_start_rx
From: Hayes Wang @ 2014-11-10  3:29 UTC (permalink / raw)
  To: David Miller
  Cc: netdev@vger.kernel.org, nic_swsd, linux-kernel@vger.kernel.org,
	linux-usb@vger.kernel.org
In-Reply-To: <20141107.113522.837502028522211960.davem@davemloft.net>

 David Miller [mailto:davem@davemloft.net] 
> Sent: Saturday, November 08, 2014 12:35 AM
[...]
> Does this even work?
> 
> If you leave a hole in the ring, the device is going to stop there
> anyways.

Excuse me. I don't sure I understand your meaning clearly.

The behavior is different for PCI(e) and USB ethernet device.
The PCI nic could know the ring buffer by certain way, so
the device could fill the data into the buffer one by one
automatically. However, for usb nic, the driver has to
indicate (i.e. submit) each buffer for each data. The device
doesn't know what is the next buffer by itself. That is,
the driver determines the order by which the data would be
filled.

Therefore, when I try to submit 10 rx buffers and some of
them fail, I could get the data depending on the order of
the successful ones. Besides, the driver has to submit the
buffer for next data continually, so the previous unsuccessful
ones could be tried again for the same time.

Best Regards,
Hayes

^ permalink raw reply

* Re: [patch net-next v2 00/10] introduce rocker switch driver with hardware accelerated datapath api - phase 1: bridge fdb offload
From: Jamal Hadi Salim @ 2014-11-10  3:31 UTC (permalink / raw)
  To: Jiri Pirko, netdev
  Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
	azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
	john.r.fastabend, edumazet, sfeldma, f.fainelli, roopa, linville,
	jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a, buytenh,
	aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye, simon.horman,
	alexander.h.duyck, john.ronciak, mleitner, shrijeet, gospo, bcrl
In-Reply-To: <1415530280-9190-1-git-send-email-jiri@resnulli.us>

Jiri,

I am hoping you have considered what Ben Lahaise's, John Fastabend's,
and Roopa's patches after all those discussions and
meetings we have had (in which you promised you will merge patches
in). I am not seeing much of that here or mention of anything of that
sort.
At least please get their sign on - this  is such an important piece of
new work that you should make sure you get consensus.
Otherwise we are back to square one and everyone is going their way with
their patches;

Ben/Roopa/John - please issue either a signed-off as well
if you agree with this approach otherwise i am hoping none of these
patches are merged in.

I will look at the patches and comment.

cheers,
jamal

On 11/09/14 05:51, Jiri Pirko wrote:
> Hi all.
>
> This patchset is just the first phase of switch and switch-ish device
> support api in kernel. Note that the api will extend (our complete work
> can be pulled from https://github.com/jpirko/net-next-rocker).
>
> So what this patchset includes:
> - introduce switchdev api for implementing switch drivers (so far
>    only linux bridge fdb offload is covered)
> - introduce rocker switch driver which implements switchdev api
>
> As to the discussion if there is need to have specific class of device
> representing the switch itself, so far we found no need to introduce that.
> But we are generally ok with the idea and when the time comes and it will
> be needed, it can be easily introduced without any disturbance.
>
> This patchset introduces switch id export through rtnetlink and sysfs,
> which is similar to what we have for port id in SR-IOV. I will send iproute2
> patchset for showing the switch id for port netdevs once this is applied.
>
> For detailed description, please see individual patches.
>
> v1->v2:
> - addressed all DaveM's comments
>
> Jiri Pirko (5):
>    net: rename netdev_phys_port_id to more generic name
>    net: introduce generic switch devices support
>    rtnl: expose physical switch id for particular device
>    net-sysfs: expose physical switch id for particular device
>    rocker: introduce rocker switch driver
>
> Scott Feldman (5):
>    bridge: introduce fdb offloading via switchdev
>    bridge: call netdev_sw_port_stp_update when bridge port STP status
>      changes
>    bridge: add API to notify bridge driver of learned FBD on offloaded
>      device
>    rocker: implement rocker ofdpa flow table manipulation
>    rocker: implement L2 bridge offloading
>
>   Documentation/networking/switchdev.txt           |   59 +
>   MAINTAINERS                                      |   14 +
>   drivers/net/ethernet/Kconfig                     |    1 +
>   drivers/net/ethernet/Makefile                    |    1 +
>   drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |    2 +-
>   drivers/net/ethernet/intel/i40e/i40e_main.c      |    2 +-
>   drivers/net/ethernet/mellanox/mlx4/en_netdev.c   |    2 +-
>   drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |    2 +-
>   drivers/net/ethernet/rocker/Kconfig              |   27 +
>   drivers/net/ethernet/rocker/Makefile             |    5 +
>   drivers/net/ethernet/rocker/rocker.c             | 4182 ++++++++++++++++++++++
>   drivers/net/ethernet/rocker/rocker.h             |  427 +++
>   include/linux/if_bridge.h                        |   18 +
>   include/linux/netdevice.h                        |   48 +-
>   include/net/switchdev.h                          |   53 +
>   include/uapi/linux/if_link.h                     |    1 +
>   net/Kconfig                                      |    1 +
>   net/Makefile                                     |    3 +
>   net/bridge/br_fdb.c                              |   94 +-
>   net/bridge/br_netlink.c                          |    2 +
>   net/bridge/br_stp.c                              |    4 +
>   net/bridge/br_stp_if.c                           |    3 +
>   net/bridge/br_stp_timer.c                        |    2 +
>   net/core/dev.c                                   |    2 +-
>   net/core/net-sysfs.c                             |   26 +-
>   net/core/rtnetlink.c                             |   30 +-
>   net/switchdev/Kconfig                            |   13 +
>   net/switchdev/Makefile                           |    5 +
>   net/switchdev/switchdev.c                        |   93 +
>   29 files changed, 5104 insertions(+), 18 deletions(-)
>   create mode 100644 Documentation/networking/switchdev.txt
>   create mode 100644 drivers/net/ethernet/rocker/Kconfig
>   create mode 100644 drivers/net/ethernet/rocker/Makefile
>   create mode 100644 drivers/net/ethernet/rocker/rocker.c
>   create mode 100644 drivers/net/ethernet/rocker/rocker.h
>   create mode 100644 include/net/switchdev.h
>   create mode 100644 net/switchdev/Kconfig
>   create mode 100644 net/switchdev/Makefile
>   create mode 100644 net/switchdev/switchdev.c
>

^ permalink raw reply

* Re: [patch net-next v2 01/10] net: rename netdev_phys_port_id to more generic name
From: Jamal Hadi Salim @ 2014-11-10  3:35 UTC (permalink / raw)
  To: Jiri Pirko, netdev
  Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
	azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
	john.r.fastabend, edumazet, sfeldma, f.fainelli, roopa, linville,
	jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a, buytenh,
	aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye, simon.horman,
	alexander.h.duyck, john.ronciak, mleitner, shrijeet, gospo, bcrl
In-Reply-To: <1415530280-9190-2-git-send-email-jiri@resnulli.us>


On 11/09/14 05:51, Jiri Pirko wrote:
> So this can be reused for identification of other "items" as well.
>




>
> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
> index 9dd0669..55dc4da 100644
> --- a/net/core/net-sysfs.c
> +++ b/net/core/net-sysfs.c
> @@ -387,7 +387,7 @@ static ssize_t phys_port_id_show(struct device *dev,
>   		return restart_syscall();
>
>   	if (dev_isalive(netdev)) {
> -		struct netdev_phys_port_id ppid;
> +		struct netdev_phys_item_id ppid;
>
>   		ret = dev_get_phys_port_id(netdev, &ppid);
>   		if (!ret)
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index a688268..1087c6d 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -868,7 +868,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
>   	       + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
>   	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
>   	       + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
> -	       + nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */
> +	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_PORT_ID */
>   }
>

[...]
>   static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
> @@ -952,7 +952,7 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev,
>   static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
>   {
>   	int err;
> -	struct netdev_phys_port_id ppid;
> +	struct netdev_phys_item_id ppid;
>
>   	err = dev_get_phys_port_id(dev, &ppid);
>   	if (err) {
> @@ -1196,7 +1196,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
>   	[IFLA_PROMISCUITY]	= { .type = NLA_U32 },
>   	[IFLA_NUM_TX_QUEUES]	= { .type = NLA_U32 },
>   	[IFLA_NUM_RX_QUEUES]	= { .type = NLA_U32 },
> -	[IFLA_PHYS_PORT_ID]	= { .type = NLA_BINARY, .len = MAX_PHYS_PORT_ID_LEN },
> +	[IFLA_PHYS_PORT_ID]	= { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
>   	[IFLA_CARRIER_CHANGES]	= { .type = NLA_U32 },  /* ignored */
>   };


wouldnt this just break an existing ABI? You may need to introduce a new 
attribute.

cheers,
jamal

^ permalink raw reply

* Re: [patch net-next v2 03/10] rtnl: expose physical switch id for particular device
From: Jamal Hadi Salim @ 2014-11-10  3:43 UTC (permalink / raw)
  To: Jiri Pirko, netdev
  Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
	azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
	john.r.fastabend, edumazet, sfeldma, f.fainelli, roopa, linville,
	jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a, buytenh,
	aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye, simon.horman,
	alexander.h.duyck, john.ronciak, mleitner, shrijeet, gospo, bcrl
In-Reply-To: <1415530280-9190-4-git-send-email-jiri@resnulli.us>

On 11/09/14 05:51, Jiri Pirko wrote:
> The netdevice represents a port in a switch, it will expose
> IFLA_PHYS_SWITCH_ID value via rtnl. Two netdevices with the same value
> belong to one physical switch.
>
> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
> ---
>   include/uapi/linux/if_link.h |  1 +
>   net/core/rtnetlink.c         | 26 +++++++++++++++++++++++++-
>   2 files changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index 7072d83..4163753 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -145,6 +145,7 @@ enum {
>   	IFLA_CARRIER,
>   	IFLA_PHYS_PORT_ID,
>   	IFLA_CARRIER_CHANGES,
> +	IFLA_PHYS_SWITCH_ID,
>   	__IFLA_MAX
>   };
>


> @@ -1198,6 +1221,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
>   	[IFLA_NUM_RX_QUEUES]	= { .type = NLA_U32 },
>   	[IFLA_PHYS_PORT_ID]	= { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
>   	[IFLA_CARRIER_CHANGES]	= { .type = NLA_U32 },  /* ignored */
> +	[IFLA_PHYS_SWITCH_ID]	= { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
>   };
>

Ok, looking at this compared to #1 i can see you are introducing 
IFLA_PHYS_SWITCH_ID but then why did you need to change 
IFLA_PHYS_PORT_ID earlier?

cheers,
jamal

>   static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
>

^ permalink raw reply

* Re: [patch net-next v2 00/10] introduce rocker switch driver with hardware accelerated datapath api - phase 1: bridge fdb offload
From: Simon Horman @ 2014-11-10  3:46 UTC (permalink / raw)
  To: Jamal Hadi Salim
  Cc: Jiri Pirko, netdev, davem, nhorman, andy, tgraf, dborkman,
	ogerlitz, jesse, pshelar, azhou, ben, stephen, jeffrey.t.kirsher,
	vyasevic, xiyou.wangcong, john.r.fastabend, edumazet, sfeldma,
	f.fainelli, roopa, linville, jasowang, ebiederm, nicolas.dichtel,
	ryazanov.s.a, buytenh, aviadr, nbd, alexei.starovoitov,
	Neil.Jerram, ronye, alexander.h.duyck, john.ronciak, mleitner,
	shrijeet, gospo, bcrl
In-Reply-To: <5460319B.2010605@mojatatu.com>

Hi Jamal, Hi Jiri,

On a somewhat related note I am also wondering what if any progress has
been made regarding discussions of (and code for) the following:

1. Exposing flow tables to user-space
   - I realise that this is Open vSwitch specific to some extent
     but I am in no way implying that it should be done instead of
     non-Open vSwitch specific work.
   - Jiri, IIRC this was part ~v2 of your earlier offload patchset

2. Describing Switch Hardware
   - I see John Fastabend moving forwards on this in his git repository
     https://github.com/jrfastab/flow-net-next

The way that I see things is that both of the above could be exposed via
netlink. And that the first at least could be backed by NDOs.  As such I
see this work as complementary and perhaps applying on top of this
patchset. If I am mistaken in this regards it would be good to know :)

I am of course also interested to know if the above are moving forwards.
To be clear I am very interested in being able to use these APIs to
perform Open vSwitch offloads and I am very happy to help.
(Jamal: I'm also interested in non-Open vSwitch offloads :)

On Sun, Nov 09, 2014 at 10:31:39PM -0500, Jamal Hadi Salim wrote:
> Jiri,
> 
> I am hoping you have considered what Ben Lahaise's, John Fastabend's,
> and Roopa's patches after all those discussions and
> meetings we have had (in which you promised you will merge patches
> in). I am not seeing much of that here or mention of anything of that
> sort.
> At least please get their sign on - this  is such an important piece of
> new work that you should make sure you get consensus.
> Otherwise we are back to square one and everyone is going their way with
> their patches;
> 
> Ben/Roopa/John - please issue either a signed-off as well
> if you agree with this approach otherwise i am hoping none of these
> patches are merged in.
> 
> I will look at the patches and comment.
> 
> cheers,
> jamal
> 
> On 11/09/14 05:51, Jiri Pirko wrote:
> >Hi all.
> >
> >This patchset is just the first phase of switch and switch-ish device
> >support api in kernel. Note that the api will extend (our complete work
> >can be pulled from https://github.com/jpirko/net-next-rocker).
> >
> >So what this patchset includes:
> >- introduce switchdev api for implementing switch drivers (so far
> >   only linux bridge fdb offload is covered)
> >- introduce rocker switch driver which implements switchdev api
> >
> >As to the discussion if there is need to have specific class of device
> >representing the switch itself, so far we found no need to introduce that.
> >But we are generally ok with the idea and when the time comes and it will
> >be needed, it can be easily introduced without any disturbance.
> >
> >This patchset introduces switch id export through rtnetlink and sysfs,
> >which is similar to what we have for port id in SR-IOV. I will send iproute2
> >patchset for showing the switch id for port netdevs once this is applied.
> >
> >For detailed description, please see individual patches.
> >
> >v1->v2:
> >- addressed all DaveM's comments
> >
> >Jiri Pirko (5):
> >   net: rename netdev_phys_port_id to more generic name
> >   net: introduce generic switch devices support
> >   rtnl: expose physical switch id for particular device
> >   net-sysfs: expose physical switch id for particular device
> >   rocker: introduce rocker switch driver
> >
> >Scott Feldman (5):
> >   bridge: introduce fdb offloading via switchdev
> >   bridge: call netdev_sw_port_stp_update when bridge port STP status
> >     changes
> >   bridge: add API to notify bridge driver of learned FBD on offloaded
> >     device
> >   rocker: implement rocker ofdpa flow table manipulation
> >   rocker: implement L2 bridge offloading
> >
> >  Documentation/networking/switchdev.txt           |   59 +
> >  MAINTAINERS                                      |   14 +
> >  drivers/net/ethernet/Kconfig                     |    1 +
> >  drivers/net/ethernet/Makefile                    |    1 +
> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |    2 +-
> >  drivers/net/ethernet/intel/i40e/i40e_main.c      |    2 +-
> >  drivers/net/ethernet/mellanox/mlx4/en_netdev.c   |    2 +-
> >  drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |    2 +-
> >  drivers/net/ethernet/rocker/Kconfig              |   27 +
> >  drivers/net/ethernet/rocker/Makefile             |    5 +
> >  drivers/net/ethernet/rocker/rocker.c             | 4182 ++++++++++++++++++++++
> >  drivers/net/ethernet/rocker/rocker.h             |  427 +++
> >  include/linux/if_bridge.h                        |   18 +
> >  include/linux/netdevice.h                        |   48 +-
> >  include/net/switchdev.h                          |   53 +
> >  include/uapi/linux/if_link.h                     |    1 +
> >  net/Kconfig                                      |    1 +
> >  net/Makefile                                     |    3 +
> >  net/bridge/br_fdb.c                              |   94 +-
> >  net/bridge/br_netlink.c                          |    2 +
> >  net/bridge/br_stp.c                              |    4 +
> >  net/bridge/br_stp_if.c                           |    3 +
> >  net/bridge/br_stp_timer.c                        |    2 +
> >  net/core/dev.c                                   |    2 +-
> >  net/core/net-sysfs.c                             |   26 +-
> >  net/core/rtnetlink.c                             |   30 +-
> >  net/switchdev/Kconfig                            |   13 +
> >  net/switchdev/Makefile                           |    5 +
> >  net/switchdev/switchdev.c                        |   93 +
> >  29 files changed, 5104 insertions(+), 18 deletions(-)
> >  create mode 100644 Documentation/networking/switchdev.txt
> >  create mode 100644 drivers/net/ethernet/rocker/Kconfig
> >  create mode 100644 drivers/net/ethernet/rocker/Makefile
> >  create mode 100644 drivers/net/ethernet/rocker/rocker.c
> >  create mode 100644 drivers/net/ethernet/rocker/rocker.h
> >  create mode 100644 include/net/switchdev.h
> >  create mode 100644 net/switchdev/Kconfig
> >  create mode 100644 net/switchdev/Makefile
> >  create mode 100644 net/switchdev/switchdev.c
> >
> 

^ permalink raw reply

* Re: [patch net-next v2 06/10] bridge: introduce fdb offloading via switchdev
From: Jamal Hadi Salim @ 2014-11-10  3:47 UTC (permalink / raw)
  To: Jiri Pirko, netdev
  Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
	azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
	john.r.fastabend, edumazet, sfeldma, f.fainelli, roopa, linville,
	jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a, buytenh,
	aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye, simon.horman,
	alexander.h.duyck, john.ronciak, mleitner, shrijeet, gospo, bcrl
In-Reply-To: <1415530280-9190-7-git-send-email-jiri@resnulli.us>

On 11/09/14 05:51, Jiri Pirko wrote:
> From: Scott Feldman <sfeldma@gmail.com>
>
> Add two new ndos: ndo_sw_port_fdb_add/del to offload static bridge
> fdb entries.  Static bridge FDB entries are installed, for example,
> using iproute2 bridge cmd:
>
>         bridge fdb add ADDR dev DEV master vlan VID
>
> This would install ADDR into the bridge's FDB for port DEV on vlan VID.  The
> switch driver implements two ndo_swdev ops to add/delete FDB entries in the
> switch device:
>
>         int ndo_sw_port_fdb_add(struct net_device *dev,
>                                 const unsigned char *addr,
>                                 u16 vid);
>
>         int ndo_sw_port_fdb_del(struct net_device *dev,
>                                 const unsigned char *addr,
>                                 u16 vid);
>
> The driver returns 0 on success, negative error code on failure.
>
> Note: the switch driver would not implement ndo_fdb_add/del/dump on a port
> netdev as these are intended for devices maintaining their own FDB.  In our
> case, we want the Linux bridge to own the FBD.
>
> Note: by default, the bridge does not filter on VLAN and only bridges untagged
> traffic.  To enable VLAN support, turn on VLAN filtering:
>
>        echo 1 >/sys/class/net/<bridge>/bridge/vlan_filtering
>

Sorry - why is the current fdb_add/del insufficient? It needs a vlanid
and the master/self flags should indicate intent to add to h/w vs s/w.

cheers,
jamal

^ permalink raw reply

* Re: [patch net-next v2 10/10] rocker: implement L2 bridge offloading
From: Jamal Hadi Salim @ 2014-11-10  3:53 UTC (permalink / raw)
  To: Jiri Pirko, netdev
  Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
	azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
	john.r.fastabend, edumazet, sfeldma, f.fainelli, roopa, linville,
	jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a, buytenh,
	aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye, simon.horman,
	alexander.h.duyck, john.ronciak, mleitner, shrijeet, gospo, bcrl
In-Reply-To: <1415530280-9190-11-git-send-email-jiri@resnulli.us>

On 11/09/14 05:51, Jiri Pirko wrote:
> From: Scott Feldman <sfeldma@gmail.com>
>
> Add L2 bridge offloading support to rocker driver.  Here, the Linux bridge
> driver is used to collect swdev ports into a tagged (or untagged) VLAN
> bridge.  The swdev will offload from the bridge driver the following L2
> bridging functions:
>
>   - Learning of neighbor MAC addresses on VLAN X  Learned mac/vlan is
> installed in bridge FDB.  (And removed when device unlearns mac/vlan).
> Learning must be turned off on each bridge port to disable the feature in
> the bridge driver.
>

I have quiet a few use cases where the above is a no-no. I dont want
learning of any sort (we have a knob for that in the bridge).
And i dont want learning in hardware to be reflected in software.
Basically this is a policy decision. Introduce a knob to choose whether
hardware learnt addresses should be reflected in software.


Have to run, but will comment when i get the time.

cheers,
jamal

^ permalink raw reply

* [GIT net-next] Open vSwitch
From: Pravin B Shelar @ 2014-11-10  3:58 UTC (permalink / raw)
  To: davem; +Cc: netdev

Following batch of patches brings feature parity between upstream
ovs and out of tree ovs module.

Two features are added, first adds support to export egress
tunnel information for a packet. This is used to improve
visibility in network traffic. Second feature allows userspace
vswitchd process to probe ovs module features. Other patches
are optimization and code cleanup.

----------------------------------------------------------------
The following changes since commit c0560b9c523341516eabf0f3b51832256caa7bbb:

  dccp: Convert DCCP_WARN to net_warn_ratelimited (2014-11-08 21:22:54 -0500)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/pshelar/openvswitch.git net_next_ovs

for you to fetch changes up to 05da5898a96c05e32aa9850c9cd89eef29471b13:

  openvswitch: Add support for OVS_FLOW_ATTR_PROBE. (2014-11-09 18:58:44 -0800)

----------------------------------------------------------------
Jarno Rajahalme (1):
      openvswitch: Add support for OVS_FLOW_ATTR_PROBE.

Pravin B Shelar (3):
      openvswitch: Export symbols as GPL symbols.
      openvswitch: Optimize recirc action.
      openvswitch: Remove redundant key ref from upcall_info.

Thomas Graf (1):
      openvswitch: Constify various function arguments

Wenyu Zhang (1):
      openvswitch: Extend packet attribute for egress tunnel info

 include/uapi/linux/openvswitch.h |  15 ++
 net/openvswitch/actions.c        | 180 ++++++++++++++------
 net/openvswitch/datapath.c       | 129 ++++++++------
 net/openvswitch/datapath.h       |  22 +--
 net/openvswitch/flow.c           |   8 +-
 net/openvswitch/flow.h           |  71 ++++++--
 net/openvswitch/flow_netlink.c   | 357 +++++++++++++++++++++++----------------
 net/openvswitch/flow_netlink.h   |  13 +-
 net/openvswitch/flow_table.c     |  12 +-
 net/openvswitch/flow_table.h     |   8 +-
 net/openvswitch/vport-geneve.c   |  23 ++-
 net/openvswitch/vport-gre.c      |  12 +-
 net/openvswitch/vport-netdev.c   |   2 +-
 net/openvswitch/vport-vxlan.c    |  24 ++-
 net/openvswitch/vport.c          |  81 +++++++--
 net/openvswitch/vport.h          |  20 ++-
 16 files changed, 664 insertions(+), 313 deletions(-)

^ permalink raw reply

* [PATCH net-next 1/6] openvswitch: Export symbols as GPL symbols.
From: Pravin B Shelar @ 2014-11-10  3:59 UTC (permalink / raw)
  To: davem; +Cc: netdev, Pravin B Shelar, Thomas Graf

vport can be compiled as modules, therefore openvswitch needs
to export few symbols. Export them as GPL symbols.

CC: Thomas Graf <tgraf@noironetworks.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
---
 net/openvswitch/datapath.c |  4 ++--
 net/openvswitch/vport.c    | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 014485e..6cfb44f 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -59,7 +59,7 @@
 #include "vport-netdev.h"
 
 int ovs_net_id __read_mostly;
-EXPORT_SYMBOL(ovs_net_id);
+EXPORT_SYMBOL_GPL(ovs_net_id);
 
 static struct genl_family dp_packet_genl_family;
 static struct genl_family dp_flow_genl_family;
@@ -131,7 +131,7 @@ int lockdep_ovsl_is_held(void)
 	else
 		return 1;
 }
-EXPORT_SYMBOL(lockdep_ovsl_is_held);
+EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
 #endif
 
 static struct vport *new_vport(const struct vport_parms *);
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 8168ef0..4b5dd18 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -90,7 +90,7 @@ errout:
 	ovs_unlock();
 	return err;
 }
-EXPORT_SYMBOL(ovs_vport_ops_register);
+EXPORT_SYMBOL_GPL(ovs_vport_ops_register);
 
 void ovs_vport_ops_unregister(struct vport_ops *ops)
 {
@@ -98,7 +98,7 @@ void ovs_vport_ops_unregister(struct vport_ops *ops)
 	list_del(&ops->list);
 	ovs_unlock();
 }
-EXPORT_SYMBOL(ovs_vport_ops_unregister);
+EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister);
 
 /**
  *	ovs_vport_locate - find a port that has already been created
@@ -165,7 +165,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
 
 	return vport;
 }
-EXPORT_SYMBOL(ovs_vport_alloc);
+EXPORT_SYMBOL_GPL(ovs_vport_alloc);
 
 /**
  *	ovs_vport_free - uninitialize and free vport
@@ -186,7 +186,7 @@ void ovs_vport_free(struct vport *vport)
 	free_percpu(vport->percpu_stats);
 	kfree(vport);
 }
-EXPORT_SYMBOL(ovs_vport_free);
+EXPORT_SYMBOL_GPL(ovs_vport_free);
 
 static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms)
 {
@@ -493,7 +493,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
 	}
 	ovs_dp_process_packet(skb, &key);
 }
-EXPORT_SYMBOL(ovs_vport_receive);
+EXPORT_SYMBOL_GPL(ovs_vport_receive);
 
 /**
  *	ovs_vport_send - send a packet on a device
@@ -572,4 +572,4 @@ void ovs_vport_deferred_free(struct vport *vport)
 
 	call_rcu(&vport->rcu, free_vport_rcu);
 }
-EXPORT_SYMBOL(ovs_vport_deferred_free);
+EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 2/6] openvswitch: Extend packet attribute for egress tunnel info
From: Pravin B Shelar @ 2014-11-10  3:59 UTC (permalink / raw)
  To: davem; +Cc: netdev, Wenyu Zhang, Pravin B Shelar

From: Wenyu Zhang <wenyuz@vmware.com>

OVS vswitch has extended IPFIX exporter to export tunnel headers
to improve network visibility.
To export this information userspace needs to know egress tunnel
for given packet. By extending packet attributes datapath can
export egress tunnel info for given packet. So that userspace
can ask for egress tunnel info in userspace action. This
information is used to build IPFIX data for given flow.

Signed-off-by: Wenyu Zhang <wenyuz@vmware.com>
Acked-by: Romain Lenglet <rlenglet@vmware.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
---
 include/uapi/linux/openvswitch.h | 13 +++++++++
 net/openvswitch/actions.c        | 19 ++++++++++++
 net/openvswitch/datapath.c       | 21 +++++++++++---
 net/openvswitch/datapath.h       |  2 ++
 net/openvswitch/flow.h           | 62 +++++++++++++++++++++++++++++++---------
 net/openvswitch/flow_netlink.c   | 54 +++++++++++++++++++++++++++-------
 net/openvswitch/flow_netlink.h   |  3 ++
 net/openvswitch/vport-geneve.c   | 21 +++++++++++++-
 net/openvswitch/vport-gre.c      | 12 +++++++-
 net/openvswitch/vport-vxlan.c    | 24 +++++++++++++++-
 net/openvswitch/vport.c          | 61 +++++++++++++++++++++++++++++++++++++++
 net/openvswitch/vport.h          | 14 +++++++++
 12 files changed, 275 insertions(+), 31 deletions(-)

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 26c36c4..cf81856 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -157,6 +157,11 @@ enum ovs_packet_cmd {
  * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
  * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content
  * specified there.
+ * @OVS_PACKET_ATTR_EGRESS_TUN_KEY: Present for an %OVS_PACKET_CMD_ACTION
+ * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
+ * %OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute, which is sent only if the
+ * output port is actually a tunnel port. Contains the output tunnel key
+ * extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes.
  *
  * These attributes follow the &struct ovs_header within the Generic Netlink
  * payload for %OVS_PACKET_* commands.
@@ -167,6 +172,8 @@ enum ovs_packet_attr {
 	OVS_PACKET_ATTR_KEY,         /* Nested OVS_KEY_ATTR_* attributes. */
 	OVS_PACKET_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
 	OVS_PACKET_ATTR_USERDATA,    /* OVS_ACTION_ATTR_USERSPACE arg. */
+	OVS_PACKET_ATTR_EGRESS_TUN_KEY,  /* Nested OVS_TUNNEL_KEY_ATTR_*
+					    attributes. */
 	__OVS_PACKET_ATTR_MAX
 };
 
@@ -315,6 +322,8 @@ enum ovs_tunnel_key_attr {
 	OVS_TUNNEL_KEY_ATTR_CSUM,               /* No argument. CSUM packet. */
 	OVS_TUNNEL_KEY_ATTR_OAM,                /* No argument. OAM frame.  */
 	OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,        /* Array of Geneve options. */
+	OVS_TUNNEL_KEY_ATTR_TP_SRC,		/* be16 src Transport Port. */
+	OVS_TUNNEL_KEY_ATTR_TP_DST,		/* be16 dst Transport Port. */
 	__OVS_TUNNEL_KEY_ATTR_MAX
 };
 
@@ -480,11 +489,15 @@ enum ovs_sample_attr {
  * message should be sent.  Required.
  * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is
  * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA.
+ * @OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: If present, u32 output port to get
+ * tunnel info.
  */
 enum ovs_userspace_attr {
 	OVS_USERSPACE_ATTR_UNSPEC,
 	OVS_USERSPACE_ATTR_PID,	      /* u32 Netlink PID to receive upcalls. */
 	OVS_USERSPACE_ATTR_USERDATA,  /* Optional user-specified cookie. */
+	OVS_USERSPACE_ATTR_EGRESS_TUN_PORT,  /* Optional, u32 output port
+					      * to get tunnel info. */
 	__OVS_USERSPACE_ATTR_MAX
 };
 
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index f7e5891..ceb618c 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -564,6 +564,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
 static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 			    struct sw_flow_key *key, const struct nlattr *attr)
 {
+	struct ovs_tunnel_info info;
 	struct dp_upcall_info upcall;
 	const struct nlattr *a;
 	int rem;
@@ -572,6 +573,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 	upcall.key = key;
 	upcall.userdata = NULL;
 	upcall.portid = 0;
+	upcall.egress_tun_info = NULL;
 
 	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
 		 a = nla_next(a, &rem)) {
@@ -583,7 +585,24 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 		case OVS_USERSPACE_ATTR_PID:
 			upcall.portid = nla_get_u32(a);
 			break;
+
+		case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
+			/* Get out tunnel info. */
+			struct vport *vport;
+
+			vport = ovs_vport_rcu(dp, nla_get_u32(a));
+			if (vport) {
+				int err;
+
+				err = ovs_vport_get_egress_tun_info(vport, skb,
+								    &info);
+				if (!err)
+					upcall.egress_tun_info = &info;
+			}
+			break;
 		}
+
+		} /* End of switch. */
 	}
 
 	return ovs_dp_upcall(dp, skb, &upcall);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 6cfb44f..c2ac340 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -274,6 +274,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 		upcall.key = key;
 		upcall.userdata = NULL;
 		upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+		upcall.egress_tun_info = NULL;
 		error = ovs_dp_upcall(dp, skb, &upcall);
 		if (unlikely(error))
 			kfree_skb(skb);
@@ -375,7 +376,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 	return err;
 }
 
-static size_t upcall_msg_size(const struct nlattr *userdata,
+static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
 			      unsigned int hdrlen)
 {
 	size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
@@ -383,8 +384,12 @@ static size_t upcall_msg_size(const struct nlattr *userdata,
 		+ nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
 
 	/* OVS_PACKET_ATTR_USERDATA */
-	if (userdata)
-		size += NLA_ALIGN(userdata->nla_len);
+	if (upcall_info->userdata)
+		size += NLA_ALIGN(upcall_info->userdata->nla_len);
+
+	/* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
+	if (upcall_info->egress_tun_info)
+		size += nla_total_size(ovs_tun_key_attr_size());
 
 	return size;
 }
@@ -440,7 +445,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 	else
 		hlen = skb->len;
 
-	len = upcall_msg_size(upcall_info->userdata, hlen);
+	len = upcall_msg_size(upcall_info, hlen);
 	user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
 	if (!user_skb) {
 		err = -ENOMEM;
@@ -461,6 +466,14 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 			  nla_len(upcall_info->userdata),
 			  nla_data(upcall_info->userdata));
 
+	if (upcall_info->egress_tun_info) {
+		nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
+		err = ovs_nla_put_egress_tunnel_key(user_skb,
+						    upcall_info->egress_tun_info);
+		BUG_ON(err);
+		nla_nest_end(user_skb, nla);
+	}
+
 	/* Only reserve room for attribute header, packet data is added
 	 * in skb_zerocopy() */
 	if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 1c56a80..2bc577b 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -114,12 +114,14 @@ struct ovs_skb_cb {
  * @pid: Netlink PID to which packet should be sent.  If @pid is 0 then no
  * packet is sent and the packet is accounted in the datapath's @n_lost
  * counter.
+ * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
  */
 struct dp_upcall_info {
 	u8 cmd;
 	const struct sw_flow_key *key;
 	const struct nlattr *userdata;
 	u32 portid;
+	const struct ovs_tunnel_info *egress_tun_info;
 };
 
 /**
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 4962bee..543b358 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -37,8 +37,8 @@ struct sk_buff;
 
 /* Used to memset ovs_key_ipv4_tunnel padding. */
 #define OVS_TUNNEL_KEY_SIZE					\
-	(offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) +	\
-	FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl))
+	(offsetof(struct ovs_key_ipv4_tunnel, tp_dst) +		\
+	 FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, tp_dst))
 
 struct ovs_key_ipv4_tunnel {
 	__be64 tun_id;
@@ -47,6 +47,8 @@ struct ovs_key_ipv4_tunnel {
 	__be16 tun_flags;
 	u8   ipv4_tos;
 	u8   ipv4_ttl;
+	__be16 tp_src;
+	__be16 tp_dst;
 } __packed __aligned(4); /* Minimize padding. */
 
 struct ovs_tunnel_info {
@@ -64,27 +66,59 @@ struct ovs_tunnel_info {
 			       FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
 			       opt_len))
 
-static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
-					  const struct iphdr *iph,
-					  __be64 tun_id, __be16 tun_flags,
-					  struct geneve_opt *opts,
-					  u8 opts_len)
+static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
+					    __be32 saddr, __be32 daddr,
+					    u8 tos, u8 ttl,
+					    __be16 tp_src,
+					    __be16 tp_dst,
+					    __be64 tun_id,
+					    __be16 tun_flags,
+					    struct geneve_opt *opts,
+					    u8 opts_len)
 {
 	tun_info->tunnel.tun_id = tun_id;
-	tun_info->tunnel.ipv4_src = iph->saddr;
-	tun_info->tunnel.ipv4_dst = iph->daddr;
-	tun_info->tunnel.ipv4_tos = iph->tos;
-	tun_info->tunnel.ipv4_ttl = iph->ttl;
+	tun_info->tunnel.ipv4_src = saddr;
+	tun_info->tunnel.ipv4_dst = daddr;
+	tun_info->tunnel.ipv4_tos = tos;
+	tun_info->tunnel.ipv4_ttl = ttl;
 	tun_info->tunnel.tun_flags = tun_flags;
 
-	/* clear struct padding. */
-	memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0,
-	       sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
+	/* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
+	 * the upper tunnel are used.
+	 * E.g: GRE over IPSEC, the tp_src and tp_port are zero.
+	 */
+	tun_info->tunnel.tp_src = tp_src;
+	tun_info->tunnel.tp_dst = tp_dst;
+
+	/* Clear struct padding. */
+	if (sizeof(tun_info->tunnel) != OVS_TUNNEL_KEY_SIZE)
+		memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE,
+		       0, sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
 
 	tun_info->options = opts;
 	tun_info->options_len = opts_len;
 }
 
+static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
+					  const struct iphdr *iph,
+					  __be16 tp_src,
+					  __be16 tp_dst,
+					  __be64 tun_id,
+					  __be16 tun_flags,
+					  struct geneve_opt *opts,
+					  u8 opts_len)
+{
+	__ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr,
+				 iph->tos, iph->ttl,
+				 tp_src, tp_dst,
+				 tun_id, tun_flags,
+				 opts, opts_len);
+}
+
+#define OVS_SW_FLOW_KEY_METADATA_SIZE			\
+	(offsetof(struct sw_flow_key, recirc_id) +	\
+	FIELD_SIZEOF(struct sw_flow_key, recirc_id))
+
 struct sw_flow_key {
 	u8 tun_opts[255];
 	u8 tun_opts_len;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index ed31097..98a3e96 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -245,6 +245,24 @@ static bool match_validate(const struct sw_flow_match *match,
 	return true;
 }
 
+size_t ovs_tun_key_attr_size(void)
+{
+	/* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
+	 * updating this function.
+	 */
+	return    nla_total_size(8)    /* OVS_TUNNEL_KEY_ATTR_ID */
+		+ nla_total_size(4)    /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
+		+ nla_total_size(4)    /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
+		+ nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
+		+ nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
+		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
+		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
+		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
+		+ nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
+		+ nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
+}
+
 size_t ovs_key_attr_size(void)
 {
 	/* Whenever adding new OVS_KEY_ FIELDS, we should consider
@@ -254,15 +272,7 @@ size_t ovs_key_attr_size(void)
 
 	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
 		+ nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
-		  + nla_total_size(8)   /* OVS_TUNNEL_KEY_ATTR_ID */
-		  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
-		  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
-		  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TOS */
-		  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TTL */
-		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
-		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_CSUM */
-		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_OAM */
-		  + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+		  + ovs_tun_key_attr_size()
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_DP_HASH */
@@ -393,6 +403,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 			[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
 			[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
 			[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
+			[OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16),
+			[OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16),
 			[OVS_TUNNEL_KEY_ATTR_OAM] = 0,
 			[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
 		};
@@ -440,6 +452,14 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 		case OVS_TUNNEL_KEY_ATTR_CSUM:
 			tun_flags |= TUNNEL_CSUM;
 			break;
+		case OVS_TUNNEL_KEY_ATTR_TP_SRC:
+			SW_FLOW_KEY_PUT(match, tun_key.tp_src,
+					nla_get_be16(a), is_mask);
+			break;
+		case OVS_TUNNEL_KEY_ATTR_TP_DST:
+			SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
+					nla_get_be16(a), is_mask);
+			break;
 		case OVS_TUNNEL_KEY_ATTR_OAM:
 			tun_flags |= TUNNEL_OAM;
 			break;
@@ -548,6 +568,12 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if ((output->tun_flags & TUNNEL_CSUM) &&
 	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
 		return -EMSGSIZE;
+	if (output->tp_src &&
+	    nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
+		return -EMSGSIZE;
+	if (output->tp_dst &&
+	    nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
+		return -EMSGSIZE;
 	if ((output->tun_flags & TUNNEL_OAM) &&
 	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
 		return -EMSGSIZE;
@@ -559,7 +585,6 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 	return 0;
 }
 
-
 static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 			      const struct ovs_key_ipv4_tunnel *output,
 			      const struct geneve_opt *tun_opts,
@@ -580,6 +605,14 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 	return 0;
 }
 
+int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
+				  const struct ovs_tunnel_info *egress_tun_info)
+{
+	return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel,
+				    egress_tun_info->options,
+				    egress_tun_info->options_len);
+}
+
 static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
 				 const struct nlattr **a, bool is_mask)
 {
@@ -1653,6 +1686,7 @@ static int validate_userspace(const struct nlattr *attr)
 	static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
 		[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
 		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
+		[OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
 	};
 	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
 	int error;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index eb0b177..90bbe37 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -37,6 +37,7 @@
 
 #include "flow.h"
 
+size_t ovs_tun_key_attr_size(void);
 size_t ovs_key_attr_size(void);
 
 void ovs_match_init(struct sw_flow_match *match,
@@ -49,6 +50,8 @@ int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *);
 int ovs_nla_get_match(struct sw_flow_match *match,
 		      const struct nlattr *,
 		      const struct nlattr *);
+int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
+				  const struct ovs_tunnel_info *);
 
 int ovs_nla_copy_actions(const struct nlattr *attr,
 			 const struct sw_flow_key *key,
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 70c9765..e31f19c 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -97,7 +97,9 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
 
 	key = vni_to_tunnel_id(geneveh->vni);
 
-	ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags,
+	ovs_flow_tun_info_init(&tun_info, ip_hdr(skb),
+			       udp_hdr(skb)->source, udp_hdr(skb)->dest,
+			       key, flags,
 			       geneveh->options, opts_len);
 
 	ovs_vport_receive(vport, skb, &tun_info);
@@ -228,6 +230,22 @@ static const char *geneve_get_name(const struct vport *vport)
 	return geneve_port->name;
 }
 
+static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+				      struct ovs_tunnel_info *egress_tun_info)
+{
+	struct geneve_port *geneve_port = geneve_vport(vport);
+	struct net *net = ovs_dp_get_net(vport->dp);
+	__be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport;
+	__be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
+
+	/* Get tp_src and tp_dst, refert to geneve_build_header().
+	 */
+	return ovs_tunnel_get_egress_info(egress_tun_info,
+					  ovs_dp_get_net(vport->dp),
+					  OVS_CB(skb)->egress_tun_info,
+					  IPPROTO_UDP, skb->mark, sport, dport);
+}
+
 static struct vport_ops ovs_geneve_vport_ops = {
 	.type		= OVS_VPORT_TYPE_GENEVE,
 	.create		= geneve_tnl_create,
@@ -236,6 +254,7 @@ static struct vport_ops ovs_geneve_vport_ops = {
 	.get_options	= geneve_get_options,
 	.send		= geneve_tnl_send,
 	.owner          = THIS_MODULE,
+	.get_egress_tun_info	= geneve_get_egress_tun_info,
 };
 
 static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 00270b6..8e61a5c 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -108,7 +108,7 @@ static int gre_rcv(struct sk_buff *skb,
 		return PACKET_REJECT;
 
 	key = key_to_tunnel_id(tpi->key, tpi->seq);
-	ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key,
+	ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), 0, 0, key,
 			       filter_tnl_flags(tpi->flags), NULL, 0);
 
 	ovs_vport_receive(vport, skb, &tun_info);
@@ -284,12 +284,22 @@ static void gre_tnl_destroy(struct vport *vport)
 	gre_exit();
 }
 
+static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+				   struct ovs_tunnel_info *egress_tun_info)
+{
+	return ovs_tunnel_get_egress_info(egress_tun_info,
+					  ovs_dp_get_net(vport->dp),
+					  OVS_CB(skb)->egress_tun_info,
+					  IPPROTO_GRE, skb->mark, 0, 0);
+}
+
 static struct vport_ops ovs_gre_vport_ops = {
 	.type		= OVS_VPORT_TYPE_GRE,
 	.create		= gre_create,
 	.destroy	= gre_tnl_destroy,
 	.get_name	= gre_get_name,
 	.send		= gre_tnl_send,
+	.get_egress_tun_info	= gre_get_egress_tun_info,
 	.owner		= THIS_MODULE,
 };
 
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 965e750..38f95a5 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -69,7 +69,9 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
 	key = cpu_to_be64(ntohl(vx_vni) >> 8);
-	ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0);
+	ovs_flow_tun_info_init(&tun_info, iph,
+			       udp_hdr(skb)->source, udp_hdr(skb)->dest,
+			       key, TUNNEL_KEY, NULL, 0);
 
 	ovs_vport_receive(vport, skb, &tun_info);
 }
@@ -189,6 +191,25 @@ error:
 	return err;
 }
 
+static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+				     struct ovs_tunnel_info *egress_tun_info)
+{
+	struct net *net = ovs_dp_get_net(vport->dp);
+	struct vxlan_port *vxlan_port = vxlan_vport(vport);
+	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+	__be16 src_port;
+	int port_min;
+	int port_max;
+
+	inet_get_local_port_range(net, &port_min, &port_max);
+	src_port = udp_flow_src_port(net, skb, 0, 0, true);
+
+	return ovs_tunnel_get_egress_info(egress_tun_info, net,
+					  OVS_CB(skb)->egress_tun_info,
+					  IPPROTO_UDP, skb->mark,
+					  src_port, dst_port);
+}
+
 static const char *vxlan_get_name(const struct vport *vport)
 {
 	struct vxlan_port *vxlan_port = vxlan_vport(vport);
@@ -202,6 +223,7 @@ static struct vport_ops ovs_vxlan_vport_ops = {
 	.get_name	= vxlan_get_name,
 	.get_options	= vxlan_get_options,
 	.send		= vxlan_tnl_send,
+	.get_egress_tun_info	= vxlan_get_egress_tun_info,
 	.owner		= THIS_MODULE,
 };
 
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 4b5dd18..630e819 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -573,3 +573,64 @@ void ovs_vport_deferred_free(struct vport *vport)
 	call_rcu(&vport->rcu, free_vport_rcu);
 }
 EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
+
+int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
+			       struct net *net,
+			       const struct ovs_tunnel_info *tun_info,
+			       u8 ipproto,
+			       u32 skb_mark,
+			       __be16 tp_src,
+			       __be16 tp_dst)
+{
+	const struct ovs_key_ipv4_tunnel *tun_key;
+	struct rtable *rt;
+	struct flowi4 fl;
+
+	if (unlikely(!tun_info))
+		return -EINVAL;
+
+	tun_key = &tun_info->tunnel;
+
+	/* Route lookup to get srouce IP address.
+	 * The process may need to be changed if the corresponding process
+	 * in vports ops changed.
+	 */
+	memset(&fl, 0, sizeof(fl));
+	fl.daddr = tun_key->ipv4_dst;
+	fl.saddr = tun_key->ipv4_src;
+	fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
+	fl.flowi4_mark = skb_mark;
+	fl.flowi4_proto = IPPROTO_GRE;
+
+	rt = ip_route_output_key(net, &fl);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
+
+	ip_rt_put(rt);
+
+	/* Generate egress_tun_info based on tun_info,
+	 * saddr, tp_src and tp_dst
+	 */
+	__ovs_flow_tun_info_init(egress_tun_info,
+				 fl.saddr, tun_key->ipv4_dst,
+				 tun_key->ipv4_tos,
+				 tun_key->ipv4_ttl,
+				 tp_src, tp_dst,
+				 tun_key->tun_id,
+				 tun_key->tun_flags,
+				 tun_info->options,
+				 tun_info->options_len);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
+
+int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+				  struct ovs_tunnel_info *info)
+{
+	/* get_egress_tun_info() is only implemented on tunnel ports. */
+	if (unlikely(!vport->ops->get_egress_tun_info))
+		return -EINVAL;
+
+	return vport->ops->get_egress_tun_info(vport, skb, info);
+}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index e41c3fa..0635d1d 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -58,6 +58,16 @@ u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
 
 int ovs_vport_send(struct vport *, struct sk_buff *);
 
+int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
+			       struct net *net,
+			       const struct ovs_tunnel_info *tun_info,
+			       u8 ipproto,
+			       u32 skb_mark,
+			       __be16 tp_src,
+			       __be16 tp_dst);
+int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+				  struct ovs_tunnel_info *info);
+
 /* The following definitions are for implementers of vport devices: */
 
 struct vport_err_stats {
@@ -146,6 +156,8 @@ struct vport_parms {
  * @get_name: Get the device's name.
  * @send: Send a packet on the device.  Returns the length of the packet sent,
  * zero for dropped packets or negative for error.
+ * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
+ * a packet.
  */
 struct vport_ops {
 	enum ovs_vport_type type;
@@ -161,6 +173,8 @@ struct vport_ops {
 	const char *(*get_name)(const struct vport *);
 
 	int (*send)(struct vport *, struct sk_buff *);
+	int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
+				   struct ovs_tunnel_info *);
 
 	struct module *owner;
 	struct list_head list;
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 3/6] openvswitch: Optimize recirc action.
From: Pravin B Shelar @ 2014-11-10  3:59 UTC (permalink / raw)
  To: davem; +Cc: netdev, Pravin B Shelar

OVS need to flow key for flow lookup in recic action. OVS
does key extract in recic action. Most of cases we could
use OVS_CB packet key directly and can avoid packet flow key
extract. SET action we can update flow-key along with packet
to keep it consistent. But there are some action like MPLS
pop which forces OVS to do flow-extract. In such cases we
can mark flow key as invalid so that subsequent recirc
action can do full flow extract.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>
---
 net/openvswitch/actions.c | 151 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 106 insertions(+), 45 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ceb618c..d4c2f73 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -109,6 +109,16 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
 	return da;
 }
 
+static void invalidate_flow_key(struct sw_flow_key *key)
+{
+	key->eth.type = htons(0);
+}
+
+static bool is_flow_key_valid(const struct sw_flow_key *key)
+{
+	return !!key->eth.type;
+}
+
 static int make_writable(struct sk_buff *skb, int write_len)
 {
 	if (!pskb_may_pull(skb, write_len))
@@ -120,7 +130,7 @@ static int make_writable(struct sk_buff *skb, int write_len)
 	return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 }
 
-static int push_mpls(struct sk_buff *skb,
+static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 		     const struct ovs_action_push_mpls *mpls)
 {
 	__be32 *new_mpls_lse;
@@ -151,10 +161,12 @@ static int push_mpls(struct sk_buff *skb,
 	skb_set_inner_protocol(skb, skb->protocol);
 	skb->protocol = mpls->mpls_ethertype;
 
+	invalidate_flow_key(key);
 	return 0;
 }
 
-static int pop_mpls(struct sk_buff *skb, const __be16 ethertype)
+static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
+		    const __be16 ethertype)
 {
 	struct ethhdr *hdr;
 	int err;
@@ -181,10 +193,13 @@ static int pop_mpls(struct sk_buff *skb, const __be16 ethertype)
 	hdr->h_proto = ethertype;
 	if (eth_p_mpls(skb->protocol))
 		skb->protocol = ethertype;
+
+	invalidate_flow_key(key);
 	return 0;
 }
 
-static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
+static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
+		    const __be32 *mpls_lse)
 {
 	__be32 *stack;
 	int err;
@@ -196,13 +211,12 @@ static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
 	stack = (__be32 *)skb_mpls_header(skb);
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		__be32 diff[] = { ~(*stack), *mpls_lse };
-
 		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
 					  ~skb->csum);
 	}
 
 	*stack = *mpls_lse;
-
+	key->mpls.top_lse = *mpls_lse;
 	return 0;
 }
 
@@ -237,7 +251,7 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
 	return 0;
 }
 
-static int pop_vlan(struct sk_buff *skb)
+static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 {
 	__be16 tci;
 	int err;
@@ -255,9 +269,12 @@ static int pop_vlan(struct sk_buff *skb)
 	}
 	/* move next vlan tag to hw accel tag */
 	if (likely(skb->protocol != htons(ETH_P_8021Q) ||
-		   skb->len < VLAN_ETH_HLEN))
+		   skb->len < VLAN_ETH_HLEN)) {
+		key->eth.tci = 0;
 		return 0;
+	}
 
+	invalidate_flow_key(key);
 	err = __pop_vlan_tci(skb, &tci);
 	if (unlikely(err))
 		return err;
@@ -266,7 +283,8 @@ static int pop_vlan(struct sk_buff *skb)
 	return 0;
 }
 
-static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan)
+static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
+		     const struct ovs_action_push_vlan *vlan)
 {
 	if (unlikely(vlan_tx_tag_present(skb))) {
 		u16 current_tag;
@@ -283,12 +301,15 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
 			skb->csum = csum_add(skb->csum, csum_partial(skb->data
 					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
 
+		invalidate_flow_key(key);
+	} else {
+		key->eth.tci = vlan->vlan_tci;
 	}
 	__vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
 	return 0;
 }
 
-static int set_eth_addr(struct sk_buff *skb,
+static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key,
 			const struct ovs_key_ethernet *eth_key)
 {
 	int err;
@@ -303,11 +324,13 @@ static int set_eth_addr(struct sk_buff *skb,
 
 	ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 
+	ether_addr_copy(key->eth.src, eth_key->eth_src);
+	ether_addr_copy(key->eth.dst, eth_key->eth_dst);
 	return 0;
 }
 
 static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
-				__be32 *addr, __be32 new_addr)
+			__be32 *addr, __be32 new_addr)
 {
 	int transport_len = skb->len - skb_transport_offset(skb);
 
@@ -386,7 +409,8 @@ static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
 	nh->ttl = new_ttl;
 }
 
-static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
+static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
+		    const struct ovs_key_ipv4 *ipv4_key)
 {
 	struct iphdr *nh;
 	int err;
@@ -398,22 +422,31 @@ static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
 
 	nh = ip_hdr(skb);
 
-	if (ipv4_key->ipv4_src != nh->saddr)
+	if (ipv4_key->ipv4_src != nh->saddr) {
 		set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
+		key->ipv4.addr.src = ipv4_key->ipv4_src;
+	}
 
-	if (ipv4_key->ipv4_dst != nh->daddr)
+	if (ipv4_key->ipv4_dst != nh->daddr) {
 		set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
+		key->ipv4.addr.dst = ipv4_key->ipv4_dst;
+	}
 
-	if (ipv4_key->ipv4_tos != nh->tos)
+	if (ipv4_key->ipv4_tos != nh->tos) {
 		ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
+		key->ip.tos = nh->tos;
+	}
 
-	if (ipv4_key->ipv4_ttl != nh->ttl)
+	if (ipv4_key->ipv4_ttl != nh->ttl) {
 		set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
+		key->ip.ttl = ipv4_key->ipv4_ttl;
+	}
 
 	return 0;
 }
 
-static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
+static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
+		    const struct ovs_key_ipv6 *ipv6_key)
 {
 	struct ipv6hdr *nh;
 	int err;
@@ -429,9 +462,12 @@ static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
 	saddr = (__be32 *)&nh->saddr;
 	daddr = (__be32 *)&nh->daddr;
 
-	if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src)))
+	if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) {
 		set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
 			      ipv6_key->ipv6_src, true);
+		memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src,
+		       sizeof(ipv6_key->ipv6_src));
+	}
 
 	if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
 		unsigned int offset = 0;
@@ -445,12 +481,18 @@ static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
 
 		set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
 			      ipv6_key->ipv6_dst, recalc_csum);
+		memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst,
+		       sizeof(ipv6_key->ipv6_dst));
 	}
 
 	set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
+	key->ip.tos = ipv6_get_dsfield(nh);
+
 	set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
-	nh->hop_limit = ipv6_key->ipv6_hlimit;
+	key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
 
+	nh->hop_limit = ipv6_key->ipv6_hlimit;
+	key->ip.ttl = ipv6_key->ipv6_hlimit;
 	return 0;
 }
 
@@ -478,7 +520,8 @@ static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
 	}
 }
 
-static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
+static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
+		   const struct ovs_key_udp *udp_port_key)
 {
 	struct udphdr *uh;
 	int err;
@@ -489,16 +532,21 @@ static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
 		return err;
 
 	uh = udp_hdr(skb);
-	if (udp_port_key->udp_src != uh->source)
+	if (udp_port_key->udp_src != uh->source) {
 		set_udp_port(skb, &uh->source, udp_port_key->udp_src);
+		key->tp.src = udp_port_key->udp_src;
+	}
 
-	if (udp_port_key->udp_dst != uh->dest)
+	if (udp_port_key->udp_dst != uh->dest) {
 		set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
+		key->tp.dst = udp_port_key->udp_dst;
+	}
 
 	return 0;
 }
 
-static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
+static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
+		   const struct ovs_key_tcp *tcp_port_key)
 {
 	struct tcphdr *th;
 	int err;
@@ -509,17 +557,21 @@ static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
 		return err;
 
 	th = tcp_hdr(skb);
-	if (tcp_port_key->tcp_src != th->source)
+	if (tcp_port_key->tcp_src != th->source) {
 		set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
+		key->tp.src = tcp_port_key->tcp_src;
+	}
 
-	if (tcp_port_key->tcp_dst != th->dest)
+	if (tcp_port_key->tcp_dst != th->dest) {
 		set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
+		key->tp.dst = tcp_port_key->tcp_dst;
+	}
 
 	return 0;
 }
 
-static int set_sctp(struct sk_buff *skb,
-		     const struct ovs_key_sctp *sctp_port_key)
+static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key,
+		    const struct ovs_key_sctp *sctp_port_key)
 {
 	struct sctphdr *sh;
 	int err;
@@ -546,6 +598,8 @@ static int set_sctp(struct sk_buff *skb,
 		sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
 
 		skb_clear_hash(skb);
+		key->tp.src = sctp_port_key->sctp_src;
+		key->tp.dst = sctp_port_key->sctp_dst;
 	}
 
 	return 0;
@@ -675,18 +729,20 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
 	key->ovs_flow_hash = hash;
 }
 
-static int execute_set_action(struct sk_buff *skb,
-				 const struct nlattr *nested_attr)
+static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
+			      const struct nlattr *nested_attr)
 {
 	int err = 0;
 
 	switch (nla_type(nested_attr)) {
 	case OVS_KEY_ATTR_PRIORITY:
 		skb->priority = nla_get_u32(nested_attr);
+		key->phy.priority = skb->priority;
 		break;
 
 	case OVS_KEY_ATTR_SKB_MARK:
 		skb->mark = nla_get_u32(nested_attr);
+		key->phy.skb_mark = skb->mark;
 		break;
 
 	case OVS_KEY_ATTR_TUNNEL_INFO:
@@ -694,31 +750,31 @@ static int execute_set_action(struct sk_buff *skb,
 		break;
 
 	case OVS_KEY_ATTR_ETHERNET:
-		err = set_eth_addr(skb, nla_data(nested_attr));
+		err = set_eth_addr(skb, key, nla_data(nested_attr));
 		break;
 
 	case OVS_KEY_ATTR_IPV4:
-		err = set_ipv4(skb, nla_data(nested_attr));
+		err = set_ipv4(skb, key, nla_data(nested_attr));
 		break;
 
 	case OVS_KEY_ATTR_IPV6:
-		err = set_ipv6(skb, nla_data(nested_attr));
+		err = set_ipv6(skb, key, nla_data(nested_attr));
 		break;
 
 	case OVS_KEY_ATTR_TCP:
-		err = set_tcp(skb, nla_data(nested_attr));
+		err = set_tcp(skb, key, nla_data(nested_attr));
 		break;
 
 	case OVS_KEY_ATTR_UDP:
-		err = set_udp(skb, nla_data(nested_attr));
+		err = set_udp(skb, key, nla_data(nested_attr));
 		break;
 
 	case OVS_KEY_ATTR_SCTP:
-		err = set_sctp(skb, nla_data(nested_attr));
+		err = set_sctp(skb, key, nla_data(nested_attr));
 		break;
 
 	case OVS_KEY_ATTR_MPLS:
-		err = set_mpls(skb, nla_data(nested_attr));
+		err = set_mpls(skb, key, nla_data(nested_attr));
 		break;
 	}
 
@@ -730,11 +786,15 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
 			  const struct nlattr *a, int rem)
 {
 	struct deferred_action *da;
-	int err;
 
-	err = ovs_flow_key_update(skb, key);
-	if (err)
-		return err;
+	if (!is_flow_key_valid(key)) {
+		int err;
+
+		err = ovs_flow_key_update(skb, key);
+		if (err)
+			return err;
+	}
+	BUG_ON(!is_flow_key_valid(key));
 
 	if (!nla_is_last(a, rem)) {
 		/* Recirc action is the not the last action
@@ -771,7 +831,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 	/* Every output action needs a separate clone of 'skb', but the common
 	 * case is just a single output action, so that doing a clone and
 	 * then freeing the original skbuff is wasteful.  So the following code
-	 * is slightly obscure just to avoid that. */
+	 * is slightly obscure just to avoid that.
+	 */
 	int prev_port = -1;
 	const struct nlattr *a;
 	int rem;
@@ -803,21 +864,21 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			break;
 
 		case OVS_ACTION_ATTR_PUSH_MPLS:
-			err = push_mpls(skb, nla_data(a));
+			err = push_mpls(skb, key, nla_data(a));
 			break;
 
 		case OVS_ACTION_ATTR_POP_MPLS:
-			err = pop_mpls(skb, nla_get_be16(a));
+			err = pop_mpls(skb, key, nla_get_be16(a));
 			break;
 
 		case OVS_ACTION_ATTR_PUSH_VLAN:
-			err = push_vlan(skb, nla_data(a));
+			err = push_vlan(skb, key, nla_data(a));
 			if (unlikely(err)) /* skb already freed. */
 				return err;
 			break;
 
 		case OVS_ACTION_ATTR_POP_VLAN:
-			err = pop_vlan(skb);
+			err = pop_vlan(skb, key);
 			break;
 
 		case OVS_ACTION_ATTR_RECIRC:
@@ -832,7 +893,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			break;
 
 		case OVS_ACTION_ATTR_SET:
-			err = execute_set_action(skb, nla_data(a));
+			err = execute_set_action(skb, key, nla_data(a));
 			break;
 
 		case OVS_ACTION_ATTR_SAMPLE:
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 4/6] openvswitch: Remove redundant key ref from upcall_info.
From: Pravin B Shelar @ 2014-11-10  3:59 UTC (permalink / raw)
  To: davem; +Cc: netdev, Pravin B Shelar

struct dp_upcall_info has pointer to pkt_key which is already
available in OVS_CB.  This also simplifies upcall handling
for gso packet.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>
---
 net/openvswitch/actions.c  |  3 +--
 net/openvswitch/datapath.c | 45 ++++++++++++++++++++++++++-------------------
 net/openvswitch/datapath.h | 12 +++++-------
 3 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index d4c2f73..10c94ac 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -624,7 +624,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 	int rem;
 
 	upcall.cmd = OVS_PACKET_CMD_ACTION;
-	upcall.key = key;
 	upcall.userdata = NULL;
 	upcall.portid = 0;
 	upcall.egress_tun_info = NULL;
@@ -659,7 +658,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 		} /* End of switch. */
 	}
 
-	return ovs_dp_upcall(dp, skb, &upcall);
+	return ovs_dp_upcall(dp, skb, key, &upcall);
 }
 
 static int sample(struct datapath *dp, struct sk_buff *skb,
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index c2ac340..7146b38 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -136,8 +136,10 @@ EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
 
 static struct vport *new_vport(const struct vport_parms *);
 static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
+			     const struct sw_flow_key *,
 			     const struct dp_upcall_info *);
 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
+				  const struct sw_flow_key *,
 				  const struct dp_upcall_info *);
 
 /* Must be called with rcu_read_lock. */
@@ -271,11 +273,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 		int error;
 
 		upcall.cmd = OVS_PACKET_CMD_MISS;
-		upcall.key = key;
 		upcall.userdata = NULL;
 		upcall.portid = ovs_vport_find_upcall_portid(p, skb);
 		upcall.egress_tun_info = NULL;
-		error = ovs_dp_upcall(dp, skb, &upcall);
+		error = ovs_dp_upcall(dp, skb, key, &upcall);
 		if (unlikely(error))
 			kfree_skb(skb);
 		else
@@ -299,6 +300,7 @@ out:
 }
 
 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
+		  const struct sw_flow_key *key,
 		  const struct dp_upcall_info *upcall_info)
 {
 	struct dp_stats_percpu *stats;
@@ -310,9 +312,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 	}
 
 	if (!skb_is_gso(skb))
-		err = queue_userspace_packet(dp, skb, upcall_info);
+		err = queue_userspace_packet(dp, skb, key, upcall_info);
 	else
-		err = queue_gso_packets(dp, skb, upcall_info);
+		err = queue_gso_packets(dp, skb, key, upcall_info);
 	if (err)
 		goto err;
 
@@ -329,39 +331,43 @@ err:
 }
 
 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
+			     const struct sw_flow_key *key,
 			     const struct dp_upcall_info *upcall_info)
 {
 	unsigned short gso_type = skb_shinfo(skb)->gso_type;
-	struct dp_upcall_info later_info;
 	struct sw_flow_key later_key;
 	struct sk_buff *segs, *nskb;
+	struct ovs_skb_cb ovs_cb;
 	int err;
 
+	ovs_cb = *OVS_CB(skb);
 	segs = __skb_gso_segment(skb, NETIF_F_SG, false);
+	*OVS_CB(skb) = ovs_cb;
 	if (IS_ERR(segs))
 		return PTR_ERR(segs);
 	if (segs == NULL)
 		return -EINVAL;
 
+	if (gso_type & SKB_GSO_UDP) {
+		/* The initial flow key extracted by ovs_flow_key_extract()
+		 * in this case is for a first fragment, so we need to
+		 * properly mark later fragments.
+		 */
+		later_key = *key;
+		later_key.ip.frag = OVS_FRAG_TYPE_LATER;
+	}
+
 	/* Queue all of the segments. */
 	skb = segs;
 	do {
-		err = queue_userspace_packet(dp, skb, upcall_info);
+		*OVS_CB(skb) = ovs_cb;
+		if (gso_type & SKB_GSO_UDP && skb != segs)
+			key = &later_key;
+
+		err = queue_userspace_packet(dp, skb, key, upcall_info);
 		if (err)
 			break;
 
-		if (skb == segs && gso_type & SKB_GSO_UDP) {
-			/* The initial flow key extracted by ovs_flow_extract()
-			 * in this case is for a first fragment, so we need to
-			 * properly mark later fragments.
-			 */
-			later_key = *upcall_info->key;
-			later_key.ip.frag = OVS_FRAG_TYPE_LATER;
-
-			later_info = *upcall_info;
-			later_info.key = &later_key;
-			upcall_info = &later_info;
-		}
 	} while ((skb = skb->next));
 
 	/* Free all of the segments. */
@@ -395,6 +401,7 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
 }
 
 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
+				  const struct sw_flow_key *key,
 				  const struct dp_upcall_info *upcall_info)
 {
 	struct ovs_header *upcall;
@@ -457,7 +464,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 	upcall->dp_ifindex = dp_ifindex;
 
 	nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
-	err = ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
+	err = ovs_nla_put_flow(key, key, user_skb);
 	BUG_ON(err);
 	nla_nest_end(user_skb, nla);
 
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 2bc577b..8de9f7e 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -108,20 +108,18 @@ struct ovs_skb_cb {
 /**
  * struct dp_upcall - metadata to include with a packet to send to userspace
  * @cmd: One of %OVS_PACKET_CMD_*.
- * @key: Becomes %OVS_PACKET_ATTR_KEY.  Must be nonnull.
  * @userdata: If nonnull, its variable-length value is passed to userspace as
  * %OVS_PACKET_ATTR_USERDATA.
- * @pid: Netlink PID to which packet should be sent.  If @pid is 0 then no
- * packet is sent and the packet is accounted in the datapath's @n_lost
+ * @portid: Netlink portid to which packet should be sent.  If @portid is 0
+ * then no packet is sent and the packet is accounted in the datapath's @n_lost
  * counter.
  * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
  */
 struct dp_upcall_info {
-	u8 cmd;
-	const struct sw_flow_key *key;
+	const struct ovs_tunnel_info *egress_tun_info;
 	const struct nlattr *userdata;
 	u32 portid;
-	const struct ovs_tunnel_info *egress_tun_info;
+	u8 cmd;
 };
 
 /**
@@ -187,7 +185,7 @@ extern struct genl_family dp_vport_genl_family;
 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
 void ovs_dp_detach_port(struct vport *);
 int ovs_dp_upcall(struct datapath *, struct sk_buff *,
-		  const struct dp_upcall_info *);
+		  const struct sw_flow_key *, const struct dp_upcall_info *);
 
 const char *ovs_dp_name(const struct datapath *dp);
 struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
-- 
1.9.3

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox