Netdev List
 help / color / mirror / Atom feed
* [PATCH v2 2/3] r8169: Get and enable optional ether_clk clock
From: Hans de Goede @ 2018-09-12  9:34 UTC (permalink / raw)
  To: David S . Miller, Heiner Kallweit, Michael Turquette,
	Stephen Boyd, Andy Shevchenko, Pierre-Louis Bossart
  Cc: Hans de Goede, linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA, Johannes Stezenbach, Carlo Caione,
	linux-clk-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20180912093456.23400-1-hdegoede-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

On some boards a platform clock is used as clock for the r8169 chip,
this commit adds support for getting and enabling this clock (assuming
it has an "ether_clk" alias set on it).

This is related to commit d31fd43c0f9a ("clk: x86: Do not gate clocks
enabled by the firmware") which is a previous attempt to fix this for some
x86 boards, but this causes all Cherry Trail SoC using boards to not reach
there lowest power states when suspending.

This commit (together with an atom-pmc-clk driver commit adding the alias)
fixes things properly by making the r8169 get the clock and enable it when
it needs it.

Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=193891#c102
Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=196861
Cc: Johannes Stezenbach <js-FF7aIK3TAVNeoWH0uzbU5w@public.gmane.org>
Cc: Carlo Caione <carlo-6IF/jdPJHihWk0Htik3J/w@public.gmane.org>
Reported-by: Johannes Stezenbach <js-FF7aIK3TAVNeoWH0uzbU5w@public.gmane.org>
Acked-by: Stephen Boyd <sboyd-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Signed-off-by: Hans de Goede <hdegoede-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
---
Changes in v2:
-Tweaked the commit msg a bit
-Added: Stephen's Acked-by, Andy's Reviewed-by
---
 drivers/net/ethernet/realtek/r8169.c | 33 ++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index b08d51bf7a20..474229548731 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -13,6 +13,7 @@
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/ethtool.h>
 #include <linux/phy.h>
@@ -665,6 +666,7 @@ struct rtl8169_private {
 
 	u16 event_slow;
 	const struct rtl_coalesce_info *coalesce_info;
+	struct clk *clk;
 
 	struct mdio_ops {
 		void (*write)(struct rtl8169_private *, int, int);
@@ -7254,6 +7256,11 @@ static int rtl_jumbo_max(struct rtl8169_private *tp)
 	}
 }
 
+static void rtl_disable_clk(void *data)
+{
+	clk_disable_unprepare(data);
+}
+
 static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data;
@@ -7274,6 +7281,32 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp->msg_enable = netif_msg_init(debug.msg_enable, R8169_MSG_DEFAULT);
 	tp->supports_gmii = cfg->has_gmii;
 
+	/* Get the *optional* external "ether_clk" used on some boards */
+	tp->clk = devm_clk_get(&pdev->dev, "ether_clk");
+	if (IS_ERR(tp->clk)) {
+		rc = PTR_ERR(tp->clk);
+		if (rc == -ENOENT) {
+			/* clk-core allows NULL (for suspend / resume) */
+			tp->clk = NULL;
+		} else if (rc == -EPROBE_DEFER) {
+			return rc;
+		} else {
+			dev_err(&pdev->dev, "failed to get clk: %d\n", rc);
+			return rc;
+		}
+	} else {
+		rc = clk_prepare_enable(tp->clk);
+		if (rc) {
+			dev_err(&pdev->dev, "failed to enable clk: %d\n", rc);
+			return rc;
+		}
+
+		rc = devm_add_action_or_reset(&pdev->dev, rtl_disable_clk,
+					      tp->clk);
+		if (rc)
+			return rc;
+	}
+
 	/* enable device (incl. PCI PM wakeup and hotplug setup) */
 	rc = pcim_enable_device(pdev);
 	if (rc < 0) {
-- 
2.19.0.rc0

^ permalink raw reply related

* Re: Fw: [Bug 201071] New: Creating a vxlan in state 'up' does not give proper RTM_NEWLINK message
From: Roopa Prabhu @ 2018-09-12  4:48 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev
In-Reply-To: <CAJieiUgvybeEJ8dpDxh2AjMO2Zys7icqLZLEbOjJY6yiOnJ0yA@mail.gmail.com>

On Tue, Sep 11, 2018 at 3:10 PM, Roopa Prabhu <roopa@cumulusnetworks.com> wrote:
> On Mon, Sep 10, 2018 at 11:55 AM, Stephen Hemminger
> <stephen@networkplumber.org> wrote:
>>
>>
>> Begin forwarded message:
>>
>> Date: Mon, 10 Sep 2018 04:04:37 +0000
>> From: bugzilla-daemon@bugzilla.kernel.org
>> To: stephen@networkplumber.org
>> Subject: [Bug 201071] New: Creating a vxlan in state 'up' does not give proper RTM_NEWLINK message
>>
>>
>> https://bugzilla.kernel.org/show_bug.cgi?id=201071
>>
>>             Bug ID: 201071
>>            Summary: Creating a vxlan in state 'up' does not give proper
>>                     RTM_NEWLINK message
>>            Product: Networking
>>            Version: 2.5
>>     Kernel Version: 4.19-rc1
>>           Hardware: All
>>                 OS: Linux
>>               Tree: Mainline
>>             Status: NEW
>>           Severity: normal
>>           Priority: P1
>>          Component: Other
>>           Assignee: stephen@networkplumber.org
>>           Reporter: liam.mcbirnie@boeing.com
>>         Regression: Yes
>>
>> If a vxlan is created with state 'up', the RTM_NEWLINK message shows the state
>> as down, and there no other netlink messages are sent.
>> As a result, processes listening to netlink are never notified that the vxlan
>> link is up.
>
> thanks for the fwd. looking...
>
>

attached a patch to the bug. Will post it here after some confirmation
of the fix from the reporter.

^ permalink raw reply

* Re: [Patch net] tipc: check return value of __tipc_dump_start()
From: Ying Xue @ 2018-09-12 10:04 UTC (permalink / raw)
  To: Cong Wang, netdev; +Cc: tipc-discussion
In-Reply-To: <20180911221217.23392-1-xiyou.wangcong@gmail.com>

On 09/12/2018 06:12 AM, Cong Wang wrote:
> When __tipc_dump_start() fails with running out of memory,
> we have no reason to continue, especially we should avoid
> calling tipc_dump_done().
> 
> Fixes: 8f5c5fcf3533 ("tipc: call start and done ops directly in __tipc_nl_compat_dumpit()")
> Reported-and-tested-by: syzbot+3f8324abccfbf8c74a9f@syzkaller.appspotmail.com
> Cc: Jon Maloy <jon.maloy@ericsson.com>
> Cc: Ying Xue <ying.xue@windriver.com>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>

Acked-by: Ying Xue <ying.xue@windriver.com>

> ---
>  net/tipc/netlink_compat.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
> index 82f665728382..6376467e78f8 100644
> --- a/net/tipc/netlink_compat.c
> +++ b/net/tipc/netlink_compat.c
> @@ -185,7 +185,10 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
>  		return -ENOMEM;
>  
>  	buf->sk = msg->dst_sk;
> -	__tipc_dump_start(&cb, msg->net);
> +	if (__tipc_dump_start(&cb, msg->net)) {
> +		kfree_skb(buf);
> +		return -ENOMEM;
> +	}
>  
>  	do {
>  		int rem;
> 

^ permalink raw reply

* KMSAN: uninit-value in pppoe_rcv
From: syzbot @ 2018-09-12 10:24 UTC (permalink / raw)
  To: linux-kernel, mostrows, netdev, syzkaller-bugs

Hello,

syzbot found the following crash on:

HEAD commit:    d2d741e5d189 kmsan: add initialization for shmem pages
git tree:       https://github.com/google/kmsan.git/master
console output: https://syzkaller.appspot.com/x/log.txt?x=1465fc37800000
kernel config:  https://syzkaller.appspot.com/x/.config?x=48f9de3384bcd0f
dashboard link: https://syzkaller.appspot.com/bug?extid=f5f6080811c849739212
compiler:       clang version 7.0.0 (trunk 329391)
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=14d6e607800000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=10a15b5b800000

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+f5f6080811c849739212@syzkaller.appspotmail.com

IPVS: ftp: loaded support on port[0] = 21
==================================================================
BUG: KMSAN: uninit-value in __get_item drivers/net/ppp/pppoe.c:172 [inline]
BUG: KMSAN: uninit-value in get_item drivers/net/ppp/pppoe.c:236 [inline]
BUG: KMSAN: uninit-value in pppoe_rcv+0xcef/0x10e0  
drivers/net/ppp/pppoe.c:450
CPU: 0 PID: 4543 Comm: syz-executor355 Not tainted 4.16.0+ #87
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:17 [inline]
  dump_stack+0x185/0x1d0 lib/dump_stack.c:53
  kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067
  __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683
  __get_item drivers/net/ppp/pppoe.c:172 [inline]
  get_item drivers/net/ppp/pppoe.c:236 [inline]
  pppoe_rcv+0xcef/0x10e0 drivers/net/ppp/pppoe.c:450
  __netif_receive_skb_core+0x47df/0x4a90 net/core/dev.c:4562
  __netif_receive_skb net/core/dev.c:4627 [inline]
  netif_receive_skb_internal+0x49d/0x630 net/core/dev.c:4701
  netif_receive_skb+0x230/0x240 net/core/dev.c:4725
  tun_rx_batched drivers/net/tun.c:1555 [inline]
  tun_get_user+0x740f/0x7c60 drivers/net/tun.c:1962
  tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990
  call_write_iter include/linux/fs.h:1782 [inline]
  new_sync_write fs/read_write.c:469 [inline]
  __vfs_write+0x7fb/0x9f0 fs/read_write.c:482
  vfs_write+0x463/0x8d0 fs/read_write.c:544
  SYSC_write+0x172/0x360 fs/read_write.c:589
  SyS_write+0x55/0x80 fs/read_write.c:581
  do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
RIP: 0033:0x4447c9
RSP: 002b:00007fff64c8fc28 EFLAGS: 00000297 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004447c9
RDX: 000000000000fd87 RSI: 0000000020000600 RDI: 0000000000000004
RBP: 00000000006cf018 R08: 00007fff64c8fda8 R09: 00007fff00006bda
R10: 0000000000005fe7 R11: 0000000000000297 R12: 00000000004020d0
R13: 0000000000402160 R14: 0000000000000000 R15: 0000000000000000

Uninit was created at:
  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline]
  kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188
  kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314
  kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321
  slab_post_alloc_hook mm/slab.h:445 [inline]
  slab_alloc_node mm/slub.c:2737 [inline]
  __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369
  __kmalloc_reserve net/core/skbuff.c:138 [inline]
  __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206
  alloc_skb include/linux/skbuff.h:984 [inline]
  alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234
  sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085
  tun_alloc_skb drivers/net/tun.c:1532 [inline]
  tun_get_user+0x2242/0x7c60 drivers/net/tun.c:1829
  tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990
  call_write_iter include/linux/fs.h:1782 [inline]
  new_sync_write fs/read_write.c:469 [inline]
  __vfs_write+0x7fb/0x9f0 fs/read_write.c:482
  vfs_write+0x463/0x8d0 fs/read_write.c:544
  SYSC_write+0x172/0x360 fs/read_write.c:589
  SyS_write+0x55/0x80 fs/read_write.c:581
  do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
==================================================================


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#bug-status-tracking for how to communicate with  
syzbot.
syzbot can test patches for this bug, for details see:
https://goo.gl/tpsmEJ#testing-patches

^ permalink raw reply

* Re: [PATCH net-next] net: dsa: b53: Uninitialized variable in b53_adjust_link()
From: David Miller @ 2018-09-12  6:00 UTC (permalink / raw)
  To: dan.carpenter; +Cc: f.fainelli, andrew, vivien.didelot, netdev, kernel-janitors
In-Reply-To: <20180908083925.i5j23wi67m5by2ew@kili.mountain>

From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 8 Sep 2018 11:39:25 +0300

> The "pause" variable is only initialized on BCM5301x.
> 
> Fixes: 5e004460f874 ("net: dsa: b53: Add helper to set link parameters")
> Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>

Applied, thanks Dan.

^ permalink raw reply

* Re: [PATCH net] powerpc: use big endian to hash len and proto in csum_ipv6_magic
From: David Miller @ 2018-09-12  6:01 UTC (permalink / raw)
  To: lucien.xin; +Cc: netdev, linuxppc-dev, christophe.leroy, mpe, roopa
In-Reply-To: <9183876a4a8ff0099686521d60f395a5230b67ed.1536401712.git.lucien.xin@gmail.com>

From: Xin Long <lucien.xin@gmail.com>
Date: Sat,  8 Sep 2018 18:15:12 +0800

> The function csum_ipv6_magic doesn't convert len and proto to big
> endian before doing ipv6 csum hash, which is not consistent with
> RFC and other arches.
> 
> Jianlin found it when ICMPv6 packets from other hosts were dropped
> in the powerpc64 system.
> 
> This patch is to fix it by using instruction 'lwbrx' to do this
> conversion in powerpc32/64 csum_ipv6_magic.
> 
> Fixes: e9c4943a107b ("powerpc: Implement csum_ipv6_magic in assembly")
> Reported-by: Jianlin Shi <jishi@redhat.com>
> Signed-off-by: Xin Long <lucien.xin@gmail.com>

Xin, please address the feedback you were given.

Thank you.

^ permalink raw reply

* Re: [PATCH] net: xenbus: remove redundant condition check before debugfs_remove_recursive
From: David Miller @ 2018-09-12  6:01 UTC (permalink / raw)
  To: zhongjiang; +Cc: paul.durrant, wei.liu2, xen-devel, netdev
In-Reply-To: <1536413706-31838-1-git-send-email-zhongjiang@huawei.com>

From: zhong jiang <zhongjiang@huawei.com>
Date: Sat, 8 Sep 2018 21:35:06 +0800

> debugfs_remove_recursive has taken the IS_ERR_OR_NULL into account. Just
> remove the unnecessary condition check.
> 
> Signed-off-by: zhong jiang <zhongjiang@huawei.com>

Applied to net-next.

^ permalink raw reply

* Re: [PATCH net 00/13] cls_u32 cleanups and fixes.
From: David Miller @ 2018-09-12  6:15 UTC (permalink / raw)
  To: viro; +Cc: netdev, jhs, xiyou.wangcong, jiri
In-Reply-To: <20180909013132.3222-1-viro@ZenIV.linux.org.uk>

From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Sun,  9 Sep 2018 02:31:19 +0100

> A series of net/sched/cls_u32.c cleanups and fixes:

Al, let's separate the serious bug fixes into a separate series
targetting net.  Then we can do all of the cleanups and
simplifications in net-next once I merge net into there.

Thank you.

^ permalink raw reply

* Re: [PATCH net-next 1/8] devlink: Add generic parameter hw_tc_offload
From: Vasundhara Volam @ 2018-09-12  6:17 UTC (permalink / raw)
  To: Jiří Pírko; +Cc: David Miller, michael.chan@broadcom.com, Netdev
In-Reply-To: <20180911095130.GC25110@nanopsycho>

On Tue, Sep 11, 2018 at 3:25 PM Jiri Pirko <jiri@resnulli.us> wrote:
>
> Tue, Sep 11, 2018 at 10:44:58AM CEST, vasundhara-v.volam@broadcom.com wrote:
> >hw_tc_offload - Enable/Disable TC flower offload in the device.
> >
> >Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
> >---
> > include/net/devlink.h | 4 ++++
> > net/core/devlink.c    | 5 +++++
> > 2 files changed, 9 insertions(+)
> >
> >diff --git a/include/net/devlink.h b/include/net/devlink.h
> >index b9b89d6..a0e9ce9 100644
> >--- a/include/net/devlink.h
> >+++ b/include/net/devlink.h
> >@@ -362,6 +362,7 @@ enum devlink_param_generic_id {
> >       DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
> >       DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
> >       DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
> >+      DEVLINK_PARAM_GENERIC_ID_HW_TC_OFFLOAD,
>
> Could you please describe why do you need this here and why the
> tc_offload flag in ethtool is not enough. How do you imagine the user
> should use them together?
Jiri, tc_offload flag in ethtool will modify feature in driver at
runtime. But I am adding
tc_offload param here to toggle this feature in NVM Config of our
adapter, whose configuration
mode is permanent and will be effective only with a reboot of the server.

User has to turn on tc_offload feature in NVM config of the adapter and then
enabling the tc_offload flag in ethtool will completely enable the
feature in driver.

^ permalink raw reply

* Re: [PATCH net] powerpc: use big endian to hash len and proto in csum_ipv6_magic
From: Christophe LEROY @ 2018-09-12  6:27 UTC (permalink / raw)
  To: David Miller, lucien.xin; +Cc: netdev, linuxppc-dev, mpe, roopa
In-Reply-To: <20180911.230105.563027666901362955.davem@davemloft.net>


Le 12/09/2018 à 08:01, David Miller a écrit :
> From: Xin Long <lucien.xin@gmail.com>
> Date: Sat,  8 Sep 2018 18:15:12 +0800
> 
>> The function csum_ipv6_magic doesn't convert len and proto to big
>> endian before doing ipv6 csum hash, which is not consistent with
>> RFC and other arches.
>>
>> Jianlin found it when ICMPv6 packets from other hosts were dropped
>> in the powerpc64 system.
>>
>> This patch is to fix it by using instruction 'lwbrx' to do this
>> conversion in powerpc32/64 csum_ipv6_magic.
>>
>> Fixes: e9c4943a107b ("powerpc: Implement csum_ipv6_magic in assembly")
>> Reported-by: Jianlin Shi <jishi@redhat.com>
>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
> 
> Xin, please address the feedback you were given.

I submitted an alternative fix, and Lucien Xin gave its Tested-by:

See https://patchwork.ozlabs.org/patch/967868/

Christophe

^ permalink raw reply

* Re: [PATCH net] powerpc: use big endian to hash len and proto in csum_ipv6_magic
From: Xin Long @ 2018-09-12  6:27 UTC (permalink / raw)
  To: davem
  Cc: network dev, linuxppc-dev, Christophe Leroy, Michael Ellerman,
	Roopa Prabhu
In-Reply-To: <20180911.230105.563027666901362955.davem@davemloft.net>

On Wed, Sep 12, 2018 at 2:01 PM David Miller <davem@davemloft.net> wrote:
>
> From: Xin Long <lucien.xin@gmail.com>
> Date: Sat,  8 Sep 2018 18:15:12 +0800
>
> > The function csum_ipv6_magic doesn't convert len and proto to big
> > endian before doing ipv6 csum hash, which is not consistent with
> > RFC and other arches.
> >
> > Jianlin found it when ICMPv6 packets from other hosts were dropped
> > in the powerpc64 system.
> >
> > This patch is to fix it by using instruction 'lwbrx' to do this
> > conversion in powerpc32/64 csum_ipv6_magic.
> >
> > Fixes: e9c4943a107b ("powerpc: Implement csum_ipv6_magic in assembly")
> > Reported-by: Jianlin Shi <jishi@redhat.com>
> > Signed-off-by: Xin Long <lucien.xin@gmail.com>
>
> Xin, please address the feedback you were given.
Christophe posted another one,
https://lore.kernel.org/patchwork/patch/983905/

Sorry, I didn't notice netdev wasn't in its CC-list.

^ permalink raw reply

* Re: [PATCH v2 net] MIPS: lantiq: dma: add dev pointer
From: David Miller @ 2018-09-12  6:33 UTC (permalink / raw)
  To: hauke
  Cc: netdev, andrew, f.fainelli, john, linux-mips, hauke.mehrtens,
	paul.burton
In-Reply-To: <20180909192623.14998-1-hauke@hauke-m.de>

From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sun,  9 Sep 2018 21:26:23 +0200

> dma_zalloc_coherent() now crashes if no dev pointer is given.
> Add a dev pointer to the ltq_dma_channel structure and fill it in the
> driver using it.
> 
> This fixes a bug introduced in kernel 4.19.
> 
> Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>

Applied, thank you.

^ permalink raw reply

* Re: [PATCH net-next 1/8] devlink: Add generic parameter hw_tc_offload
From: Jakub Kicinski @ 2018-09-12  6:34 UTC (permalink / raw)
  To: Vasundhara Volam
  Cc: Jiří Pírko, David Miller,
	michael.chan@broadcom.com, Netdev
In-Reply-To: <CAACQVJrNyDXCuEFRVWcVh9tpb4_8WpLJ3U+ufm8TXcVYncHD6A@mail.gmail.com>

On Wed, 12 Sep 2018 11:47:59 +0530, Vasundhara Volam wrote:
> On Tue, Sep 11, 2018 at 3:25 PM Jiri Pirko <jiri@resnulli.us> wrote:
> >
> > Tue, Sep 11, 2018 at 10:44:58AM CEST, vasundhara-v.volam@broadcom.com wrote:  
> > >hw_tc_offload - Enable/Disable TC flower offload in the device.
> > >
> > >Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
> > >---
> > > include/net/devlink.h | 4 ++++
> > > net/core/devlink.c    | 5 +++++
> > > 2 files changed, 9 insertions(+)
> > >
> > >diff --git a/include/net/devlink.h b/include/net/devlink.h
> > >index b9b89d6..a0e9ce9 100644
> > >--- a/include/net/devlink.h
> > >+++ b/include/net/devlink.h
> > >@@ -362,6 +362,7 @@ enum devlink_param_generic_id {
> > >       DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
> > >       DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
> > >       DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
> > >+      DEVLINK_PARAM_GENERIC_ID_HW_TC_OFFLOAD,  
> >
> > Could you please describe why do you need this here and why the
> > tc_offload flag in ethtool is not enough. How do you imagine the user
> > should use them together?  
> Jiri, tc_offload flag in ethtool will modify feature in driver at
> runtime. But I am adding
> tc_offload param here to toggle this feature in NVM Config of our
> adapter, whose configuration
> mode is permanent and will be effective only with a reboot of the server.
> 
> User has to turn on tc_offload feature in NVM config of the adapter and then
> enabling the tc_offload flag in ethtool will completely enable the
> feature in driver.

Thanks for explaining, however, I don't think we have trouble
understanding *what* you are doing, but rather *why* you're doing it.

^ permalink raw reply

* Re: [PATCH net-next] tcp: rate limit synflood warnings further
From: David Miller @ 2018-09-12  6:35 UTC (permalink / raw)
  To: willemdebruijn.kernel; +Cc: netdev, eric.dumazet, willemb
In-Reply-To: <20180909231212.212470-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Date: Sun,  9 Sep 2018 19:12:12 -0400

> From: Willem de Bruijn <willemb@google.com>
> 
> Convert pr_info to net_info_ratelimited to limit the total number of
> synflood warnings.
> 
> Commit 946cedccbd73 ("tcp: Change possible SYN flooding messages")
> rate limits synflood warnings to one per listener.
> 
> Workloads that open many listener sockets can still see a high rate of
> log messages. Syzkaller is one frequent example.
> 
> Signed-off-by: Willem de Bruijn <willemb@google.com>

Applied, thanks Willem.

Is this stable material?

^ permalink raw reply

* Re: [PATCH net-next 0/3] liquidio: Removed droq lock from Rx path
From: David Miller @ 2018-09-12  6:37 UTC (permalink / raw)
  To: felix.manlunas
  Cc: netdev, raghu.vatsavayi, derek.chickles, satananda.burla,
	intiyaz.basha
In-Reply-To: <20180910063322.GA4011@felix-thinkpad.cavium.com>

From: Felix Manlunas <felix.manlunas@cavium.com>
Date: Sun, 9 Sep 2018 23:33:22 -0700

> Series of patches for removing droq lock from Rx Path.

Series applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next 0/8] bnxt_en: devlink param updates
From: Vasundhara Volam @ 2018-09-12  6:39 UTC (permalink / raw)
  To: jakub.kicinski
  Cc: David Miller, michael.chan@broadcom.com, Netdev, alexander.duyck
In-Reply-To: <20180911133215.5798ed2a@cakuba>

On Tue, Sep 11, 2018 at 5:04 PM Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
>
> On Tue, 11 Sep 2018 14:14:57 +0530, Vasundhara Volam wrote:
> > This patchset adds support for 4 generic and 1 driver-specific devlink
> > parameters.
> >
> > Also, this patchset adds support to return proper error code if
> > HWRM_NVM_GET/SET_VARIABLE commands return error code
> > HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED.
> >
> > Vasundhara Volam (8):
> >   devlink: Add generic parameter hw_tc_offload
>
> Much like Jiri, I can't help but wonder why do you need this?
There is a request from our customer for a way to toggle tc_offload
feature in our adapter.
>
> >   devlink: Add generic parameter ignore_ari
> >   devlink: Add generic parameter msix_vec_per_pf_max
> >   devlink: Add generic parameter msix_vec_per_pf_min
>
> IMHO more structured API would be preferable if possible.  The string
> keys won't scale if you want to set the parameters per PF, and
> creating more structured API for PCIe which is a relatively slow
> moving HW spec seems tractable.
Sorry, could you please suggest an example? We will try to adapt.
>
> Not to mention the question Alex posed before about where this knobs
> should actually live.  I'm personally fine with devlink.
Okay
>
> >   bnxt_en: Use hw_tc_offload and ignore_ari devlink parameters
> >   bnxt_en: return proper error when FW returns
> >     HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED
> >   bnxt_en: Use msix_vec_per_pf_max and msix_vec_per_pf_min devlink
> >     params.
> >   bnxt_en: Add a driver specific devlink parameter.
>
> The details about the device specific devlink parameter are very much
> lacking.  Why does the patch subject not mention any specifics?  What's
> GRE in the first place?
>
> You really need to provide more details and docs for all these
> parameters.
Sure, I will add all the documentation in my next version of the patch. Thanks.

^ permalink raw reply

* Re: [PATCH net-next 0/8] bnxt_en: devlink param updates
From: Vasundhara Volam @ 2018-09-12  6:40 UTC (permalink / raw)
  To: Jiří Pírko
  Cc: jakub.kicinski, David Miller, michael.chan@broadcom.com, Netdev,
	alexander.duyck
In-Reply-To: <20180911115126.GF25110@nanopsycho>

On Tue, Sep 11, 2018 at 5:25 PM Jiri Pirko <jiri@resnulli.us> wrote:
>
> Tue, Sep 11, 2018 at 01:33:51PM CEST, jakub.kicinski@netronome.com wrote:
> >On Tue, 11 Sep 2018 14:14:57 +0530, Vasundhara Volam wrote:
> >> This patchset adds support for 4 generic and 1 driver-specific devlink
> >> parameters.
> >>
> >> Also, this patchset adds support to return proper error code if
> >> HWRM_NVM_GET/SET_VARIABLE commands return error code
> >> HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED.
> >>
> >> Vasundhara Volam (8):
> >>   devlink: Add generic parameter hw_tc_offload
> >
> >Much like Jiri, I can't help but wonder why do you need this?
> >
> >>   devlink: Add generic parameter ignore_ari
> >>   devlink: Add generic parameter msix_vec_per_pf_max
> >>   devlink: Add generic parameter msix_vec_per_pf_min
> >
> >IMHO more structured API would be preferable if possible.  The string
> >keys won't scale if you want to set the parameters per PF, and
> >creating more structured API for PCIe which is a relatively slow
> >moving HW spec seems tractable.
> >
> >Not to mention the question Alex posed before about where this knobs
> >should actually live.  I'm personally fine with devlink.
> >
> >>   bnxt_en: Use hw_tc_offload and ignore_ari devlink parameters
> >>   bnxt_en: return proper error when FW returns
> >>     HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED
> >>   bnxt_en: Use msix_vec_per_pf_max and msix_vec_per_pf_min devlink
> >>     params.
> >>   bnxt_en: Add a driver specific devlink parameter.
> >
> >The details about the device specific devlink parameter are very much
> >lacking.  Why does the patch subject not mention any specifics?  What's
> >GRE in the first place?
> >
> >You really need to provide more details and docs for all these
> >parameters.
>
> We really need Documentation/networking/devlink-params.txt to describe
> these.
>
Sure. I will add the documentation in my next version of the patchset
for all generic and driver specific parameters.

^ permalink raw reply

* Re: [PATCH net-next v3 0/7] net: aquantia: implement WOL and EEE support
From: David Miller @ 2018-09-12  6:48 UTC (permalink / raw)
  To: igor.russkikh; +Cc: netdev
In-Reply-To: <cover.1536572107.git.igor.russkikh@aquantia.com>

From: Igor Russkikh <igor.russkikh@aquantia.com>
Date: Mon, 10 Sep 2018 12:39:27 +0300

> This is v3 of WOL/EEE functionality patch for atlantic driver.
> 
> In this patchset Yana Esina and Nikita Danilov implemented:
> 
> - Upload function to interact with FW memory
> - Definitions and structures necessary for the correct operation of Wake ON Lan
> - The functionality Wake On Lan via ethtool (Magic packet is supported)
> - The functionality for Energy-Efficient Ethernet configuration via ethtool
> 
> Version 3:
> - use ETH_ALEN instead of raw number
> 
> Version 2 has the following fixes:
> - patchset reorganized to extract renaming and whitespace fixes into separate
>   patches
> - some of magic numbers replaced with defines
> - reverse christmas tree applied

Series applied, thanks.

> Discussion outcome regarding driver version bumps was not finished
> (here https://patchwork.ozlabs.org/patch/954905/)
> David, could you suggest the best way to proceed on this?

Having a channel for your driver that is outside of upstream and Linux
distribution packages creates lots of problems.

When a user reports a problem with an upstream kernel, that verion
dictates which driver source was being used.  There is not confusion
or ambiguity.

For a distribution kernel, the distributor hashes out which driver
they published in their kernel package when evaluating a bug reported
to them.

None of these two entities is ready to evaluate and handle properly
your custom scheme.

So generally I frown against separate distribution schemes.  It is
in the final analysis an inferior experience for the user because
you basically narrow all of their support channels for problems
down to you and you alone.  The whole idea is to make it work the
opposite way.

So in the upstream tree, really, the driver version is pretty useless.

^ permalink raw reply

* Re: [PATCH net] geneve: add ttl inherit support
From: Jiri Benc @ 2018-09-12  6:53 UTC (permalink / raw)
  To: Hangbin Liu; +Cc: netdev, Pravin Shelar, David S. Miller, Stefano Brivio
In-Reply-To: <1536717861-21590-1-git-send-email-liuhangbin@gmail.com>

On Wed, 12 Sep 2018 10:04:21 +0800, Hangbin Liu wrote:
> Similar with commit 72f6d71e491e6 ("vxlan: add ttl inherit support"),
> currently ttl == 0 means "use whatever default value" on geneve instead
> of inherit inner ttl. To respect compatibility with old behavior, let's
> add a new IFLA_GENEVE_TTL_INHERIT for geneve ttl inherit support.
> 
> Reported-by: Jianlin Shi <jishi@redhat.com>
> Suggested-by: Jiri Benc <jbenc@redhat.com>
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>

Reviewed-by: Jiri Benc <jbenc@redhat.com>

Thanks, Hangbin!

 Jiri

^ permalink raw reply

* Re: [PATCH net-next v1] net/tls: Fixed return value when tls_complete_pending_work() fails
From: David Miller @ 2018-09-12  7:04 UTC (permalink / raw)
  To: vakul.garg; +Cc: netdev, borisp, aviadye, davejwatson, doronrk
In-Reply-To: <20180910172346.13181-1-vakul.garg@nxp.com>

From: Vakul Garg <vakul.garg@nxp.com>
Date: Mon, 10 Sep 2018 22:53:46 +0530

> In tls_sw_sendmsg() and tls_sw_sendpage(), the variable 'ret' has
> been set to return value of tls_complete_pending_work(). This allows
> return of proper error code if tls_complete_pending_work() fails.
> 
> Fixes: 3c4d7559159b ("tls: kernel TLS support")
> Signed-off-by: Vakul Garg <vakul.garg@nxp.com>

Applied, thank you.

^ permalink raw reply

* Re: [PATCH][net-next][v2] netlink: remove hash::nelems check in netlink_insert
From: David Miller @ 2018-09-12  7:08 UTC (permalink / raw)
  To: lirongqing; +Cc: netdev
In-Reply-To: <1536627901-18256-1-git-send-email-lirongqing@baidu.com>

From: Li RongQing <lirongqing@baidu.com>
Date: Tue, 11 Sep 2018 09:05:01 +0800

> The type of hash::nelems has been changed from size_t to atom_t
> which in fact is int, so not need to check if BITS_PER_LONG, that
> is bit number of size_t, is bigger than 32
> 
> and rht_grow_above_max() will be called to check if hashtable is
> too big, ensure it can not bigger than 1<<31
> 
> Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> Signed-off-by: Li RongQing <lirongqing@baidu.com>

Applied.

^ permalink raw reply

* Re: [Patch net v2] rds: fix two RCU related problems
From: David Miller @ 2018-09-12  7:10 UTC (permalink / raw)
  To: xiyou.wangcong; +Cc: netdev, sowmini.varadhan, santosh.shilimkar, rds-devel
In-Reply-To: <20180911012726.5353-1-xiyou.wangcong@gmail.com>

From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 10 Sep 2018 18:27:26 -0700

> When a rds sock is bound, it is inserted into the bind_hash_table
> which is protected by RCU. But when releasing rds sock, after it
> is removed from this hash table, it is freed immediately without
> respecting RCU grace period. This could cause some use-after-free
> as reported by syzbot.
> 
> Mark the rds sock with SOCK_RCU_FREE before inserting it into the
> bind_hash_table, so that it would be always freed after a RCU grace
> period.
> 
> The other problem is in rds_find_bound(), the rds sock could be
> freed in between rhashtable_lookup_fast() and rds_sock_addref(),
> so we need to extend RCU read lock protection in rds_find_bound()
> to close this race condition.
> 
> Reported-and-tested-by: syzbot+8967084bcac563795dc6@syzkaller.appspotmail.com
> Reported-by: syzbot+93a5839deb355537440f@syzkaller.appspotmail.com
> Cc: Sowmini Varadhan <sowmini.varadhan@oracle.com>
> Cc: Santosh Shilimkar <santosh.shilimkar@oracle.com>
> Cc: rds-devel@oss.oracle.com
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>

Applied and queued up for -stable, thanks.

^ permalink raw reply

* Re: [PATCH] net: ethernet: Use DIV_ROUND_UP instead of reimplementing its function
From: Tariq Toukan @ 2018-09-12 12:14 UTC (permalink / raw)
  To: zhong jiang, davem, claudiu.manoil, saeedm, derek.chickles
  Cc: leon, jdmason, netdev, linux-kernel
In-Reply-To: <1536671295-14432-1-git-send-email-zhongjiang@huawei.com>



On 11/09/2018 4:08 PM, zhong jiang wrote:
> DIV_ROUND_UP has implemented the code-opened function. Therefore, just
> replace the implementation with DIV_ROUND_UP.
> 
> Signed-off-by: zhong jiang <zhongjiang@huawei.com>
> ---
>   drivers/net/ethernet/broadcom/sb1250-mac.c         | 2 +-
>   drivers/net/ethernet/cavium/liquidio/octeon_droq.c | 2 +-
>   drivers/net/ethernet/freescale/gianfar_ethtool.c   | 2 +-
>   drivers/net/ethernet/ibm/emac/mal.h                | 2 +-
>   drivers/net/ethernet/mellanox/mlx4/alloc.c         | 2 +-
>   drivers/net/ethernet/mellanox/mlx4/icm.c           | 2 +-
>   drivers/net/ethernet/mellanox/mlx5/core/srq.c      | 2 +-
>   drivers/net/ethernet/neterion/s2io.c               | 2 +-
>   8 files changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
> index ef4a0c3..1082529 100644
> --- a/drivers/net/ethernet/broadcom/sb1250-mac.c
> +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
> @@ -156,7 +156,7 @@ enum sbmac_state {
>   			  (d)->sbdma_dscrtable : (d)->f+1)
>   
>   
> -#define NUMCACHEBLKS(x) (((x)+SMP_CACHE_BYTES-1)/SMP_CACHE_BYTES)
> +#define NUMCACHEBLKS(x) DIV_ROUND_UP(x, SMP_CACHE_BYTES)
>   
>   #define SBMAC_MAX_TXDESCR	256
>   #define SBMAC_MAX_RXDESCR	256
> diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
> index a71dbb7..b57008d 100644
> --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
> +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
> @@ -530,7 +530,7 @@ void octeon_droq_check_oom(struct octeon_droq *droq)
>   static inline u32
>   octeon_droq_get_bufcount(u32 buf_size, u32 total_len)
>   {
> -	return ((total_len + buf_size - 1) / buf_size);
> +	return DIV_ROUND_UP(total_len, buf_size);
>   }
>   
>   static int
> diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
> index 395a526..b536eb0 100644
> --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
> +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
> @@ -230,7 +230,7 @@ static unsigned int gfar_usecs2ticks(struct gfar_private *priv,
>   
>   	/* Make sure we return a number greater than 0
>   	 * if usecs > 0 */
> -	return (usecs * 1000 + count - 1) / count;
> +	return DIV_ROUND_UP(usecs * 1000, count);
>   }
>   
>   /* Convert ethernet clock ticks to microseconds */
> diff --git a/drivers/net/ethernet/ibm/emac/mal.h b/drivers/net/ethernet/ibm/emac/mal.h
> index eeade2e..e4c20f0 100644
> --- a/drivers/net/ethernet/ibm/emac/mal.h
> +++ b/drivers/net/ethernet/ibm/emac/mal.h
> @@ -136,7 +136,7 @@ static inline int mal_rx_size(int len)
>   
>   static inline int mal_tx_chunks(int len)
>   {
> -	return (len + MAL_MAX_TX_SIZE - 1) / MAL_MAX_TX_SIZE;
> +	return DIV_ROUND_UP(len, MAL_MAX_TX_SIZE);
>   }
>   
>   #define MAL_CHAN_MASK(n)	(0x80000000 >> (n))
> diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
> index 4bdf250..deef5a9 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
> @@ -614,7 +614,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
>   		int i;
>   
>   		buf->direct.buf = NULL;
> -		buf->nbufs	= (size + PAGE_SIZE - 1) / PAGE_SIZE;
> +		buf->nbufs      = DIV_ROUND_UP(size, PAGE_SIZE);
>   		buf->npages	= buf->nbufs;
>   		buf->page_shift  = PAGE_SHIFT;
>   		buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
> diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
> index 7262c63..4b43511 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/icm.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
> @@ -406,7 +406,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
>   	obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size;
>   	if (WARN_ON(!obj_per_chunk))
>   		return -EINVAL;
> -	num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk;
> +	num_icm = DIV_ROUND_UP(nobj, obj_per_chunk);
>   
>   	table->icm      = kvcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL);
>   	if (!table->icm)
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
> index 23cc337..7e20666 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
> @@ -73,7 +73,7 @@ static int get_pas_size(struct mlx5_srq_attr *in)
>   	u32 rq_sz	  = 1 << (log_srq_size + 4 + log_rq_stride);
>   	u32 page_size	  = 1 << log_page_size;
>   	u32 rq_sz_po      = rq_sz + (page_offset * po_quanta);
> -	u32 rq_num_pas	  = (rq_sz_po + page_size - 1) / page_size;
> +	u32 rq_num_pas    = DIV_ROUND_UP(rq_sz_po, page_size);
>   
>   	return rq_num_pas * sizeof(u64);
>   }
> diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
> index b8983e7..f980f10 100644
> --- a/drivers/net/ethernet/neterion/s2io.c
> +++ b/drivers/net/ethernet/neterion/s2io.c
> @@ -491,7 +491,7 @@ static void do_s2io_copy_mac_addr(struct s2io_nic *sp, int offset, u64 mac_addr)
>   };
>   
>   /* A simplifier macro used both by init and free shared_mem Fns(). */
> -#define TXD_MEM_PAGE_CNT(len, per_each) ((len+per_each - 1) / per_each)
> +#define TXD_MEM_PAGE_CNT(len, per_each) DIV_ROUND_UP(len, per_each)
>   
>   /* netqueue manipulation helper functions */
>   static inline void s2io_stop_all_tx_queue(struct s2io_nic *sp)
> 

For the Mellanox parts:
Acked-by: Tariq Toukan <tariqt@mellanox.com>

^ permalink raw reply

* Re: Fw: Payment approval and SEPA direct debit mandate
From: a.hodin @ 2018-09-12  7:09 UTC (permalink / raw)


[-- Attachment #1: Type: text/plain, Size: 1020 bytes --]

Hello,

As outlined by telephone from your office, enclosed the undersigned 
purchase contract for ELW And payment receipt.

wie telefonisch besprochen von deinem Büro, anbei der Unterzeichnete 
Kaufvertrag zum ELW Und Zahlungsbeleg.

Mit freundlichen Grüßen


I.A. Florian Carolus

Klaase GmbH & Co. KG
Spedition & Logistik
Zum Kratt 8 - 10
D-24787 Fockbek

Tel. +49 (0) 4331-43817-19
Fax +49 (0) 4331-43817-25
info@klaasegmbh.com
www.spedition-kraase.de



Personenbezogene Daten, die im Rahmen dieser Kommunikation ausgetauscht 
werden, unterliegen den Datenschutzbestimmungen der EU-DSGVO.
Weitere Informationen und Ihre Rechte können Sie unter nachfolgendem 
Link abrufen: https://www.spedition-braase.de/datenschutz.html

Steuer-Nr.: 2828202105 UST-ID.Nr.: DE 135375677
Kommanditgesellschaft Sitz 24787 Fockbek, Handelsregister: Amtsgericht 
Kiel, HRA 311RD
Komplementär: FD Verwaltungs-GmbH Sitz 24787 Fockbek, Handelsregister: 
Amtsgericht Kiel, HRB 631RD
Geschäftsführung: Fritz Dethlefsen, Dirk Zernitz

[-- Attachment #2: Invoice Documents GmbH--RDPNMAZSFS-ZTJP-12-09 PDF.zip --]
[-- Type: application/zip, Size: 1166 bytes --]

^ permalink raw reply

* Re: [PATCH net-next v2 2/5] virtio_ring: support creating packed ring
From: Michael S. Tsirkin @ 2018-09-12 12:45 UTC (permalink / raw)
  To: Tiwei Bie
  Cc: jasowang, virtualization, linux-kernel, netdev, virtio-dev, wexu,
	jfreimann
In-Reply-To: <20180910022837.GA11822@debian>

On Mon, Sep 10, 2018 at 10:28:37AM +0800, Tiwei Bie wrote:
> On Fri, Sep 07, 2018 at 10:03:24AM -0400, Michael S. Tsirkin wrote:
> > On Wed, Jul 11, 2018 at 10:27:08AM +0800, Tiwei Bie wrote:
> > > This commit introduces the support for creating packed ring.
> > > All split ring specific functions are added _split suffix.
> > > Some necessary stubs for packed ring are also added.
> > > 
> > > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> > 
> > I'd rather have a patch just renaming split functions, then
> > add all packed stuff in as a separate patch on top.
> 
> Sure, I will do that.
> 
> > 
> > 
> > > ---
> > >  drivers/virtio/virtio_ring.c | 801 +++++++++++++++++++++++------------
> > >  include/linux/virtio_ring.h  |   8 +-
> > >  2 files changed, 546 insertions(+), 263 deletions(-)
> > > 
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 814b395007b2..c4f8abc7445a 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -60,11 +60,15 @@ struct vring_desc_state {
> > >  	struct vring_desc *indir_desc;	/* Indirect descriptor, if any. */
> > >  };
> > >  
> > > +struct vring_desc_state_packed {
> > > +	int next;			/* The next desc state. */
> > 
> > So this can go away with IN_ORDER?
> 
> Yes. If IN_ORDER is negotiated, next won't be needed anymore.
> Currently, IN_ORDER isn't included in this patch set, because
> some changes for split ring are needed to make sure that it
> will use the descs in the expected order. After that,
> optimizations can be done for both of split ring and packed
> ring respectively.
> 
> > 
> > > +};
> > > +
> > >  struct vring_virtqueue {
> > >  	struct virtqueue vq;
> > >  
> > > -	/* Actual memory layout for this queue */
> > > -	struct vring vring;
> > > +	/* Is this a packed ring? */
> > > +	bool packed;
> > >  
> > >  	/* Can we use weak barriers? */
> > >  	bool weak_barriers;
> > > @@ -86,11 +90,39 @@ struct vring_virtqueue {
> > >  	/* Last used index we've seen. */
> > >  	u16 last_used_idx;
> > >  
> > > -	/* Last written value to avail->flags */
> > > -	u16 avail_flags_shadow;
> > > +	union {
> > > +		/* Available for split ring */
> > > +		struct {
> > > +			/* Actual memory layout for this queue. */
> > > +			struct vring vring;
> > >  
> > > -	/* Last written value to avail->idx in guest byte order */
> > > -	u16 avail_idx_shadow;
> > > +			/* Last written value to avail->flags */
> > > +			u16 avail_flags_shadow;
> > > +
> > > +			/* Last written value to avail->idx in
> > > +			 * guest byte order. */
> > > +			u16 avail_idx_shadow;
> > > +		};
> > 
> > Name this field split so it's easier to detect misuse of e.g.
> > packed fields in split code?
> 
> Good point, I'll do that.
> 
> > 
> > > +
> > > +		/* Available for packed ring */
> > > +		struct {
> > > +			/* Actual memory layout for this queue. */
> > > +			struct vring_packed vring_packed;
> > > +
> > > +			/* Driver ring wrap counter. */
> > > +			bool avail_wrap_counter;
> > > +
> > > +			/* Device ring wrap counter. */
> > > +			bool used_wrap_counter;
> > > +
> > > +			/* Index of the next avail descriptor. */
> > > +			u16 next_avail_idx;
> > > +
> > > +			/* Last written value to driver->flags in
> > > +			 * guest byte order. */
> > > +			u16 event_flags_shadow;
> > > +		};
> > > +	};
> [...]
> > > +
> > > +/*
> > > + * The layout for the packed ring is a continuous chunk of memory
> > > + * which looks like this.
> > > + *
> > > + * struct vring_packed {
> > > + *	// The actual descriptors (16 bytes each)
> > > + *	struct vring_packed_desc desc[num];
> > > + *
> > > + *	// Padding to the next align boundary.
> > > + *	char pad[];
> > > + *
> > > + *	// Driver Event Suppression
> > > + *	struct vring_packed_desc_event driver;
> > > + *
> > > + *	// Device Event Suppression
> > > + *	struct vring_packed_desc_event device;
> > > + * };
> > > + */
> > 
> > Why not just allocate event structures separately?
> > Is it a win to have them share a cache line for some reason?
> 
> Will do that.
> 
> > 
> > > +static inline void vring_init_packed(struct vring_packed *vr, unsigned int num,
> > > +				     void *p, unsigned long align)
> > > +{
> > > +	vr->num = num;
> > > +	vr->desc = p;
> > > +	vr->driver = (void *)ALIGN(((uintptr_t)p +
> > > +		sizeof(struct vring_packed_desc) * num), align);
> > > +	vr->device = vr->driver + 1;
> > > +}
> > 
> > What's all this about alignment? Where does it come from?
> 
> It comes from the `vring_align` parameter of vring_create_virtqueue()
> and vring_new_virtqueue():
> 
> https://github.com/torvalds/linux/blob/a49a9dcce802/drivers/virtio/virtio_ring.c#L1061
> https://github.com/torvalds/linux/blob/a49a9dcce802/drivers/virtio/virtio_ring.c#L1123

Note the TODO - we just never got to fixing it. It would be great to fix
this for virtio 1 rings, but if not - at least let's not add this
stuff for packed rings.

> Should I just ignore it in packed ring?
> 
> CCW defined this:
> 
> #define KVM_VIRTIO_CCW_RING_ALIGN 4096
> 
> I'm not familiar with CCW. Currently, in this patch set, packed ring
> isn't enabled on CCW dues to some legacy accessors are not implemented
> in packed ring yet.


Then you need to take steps not to negotiate this feature bit for
ccw drivers.

> > 
> > > +
> > > +static inline unsigned vring_size_packed(unsigned int num, unsigned long align)
> > > +{
> > > +	return ((sizeof(struct vring_packed_desc) * num + align - 1)
> > > +		& ~(align - 1)) + sizeof(struct vring_packed_desc_event) * 2;
> > > +}
> [...]
> > > @@ -1129,10 +1388,17 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
> > >  				      void (*callback)(struct virtqueue *vq),
> > >  				      const char *name)
> > >  {
> > > -	struct vring vring;
> > > -	vring_init(&vring, num, pages, vring_align);
> > > -	return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
> > > -				     notify, callback, name);
> > > +	union vring_union vring;
> > > +	bool packed;
> > > +
> > > +	packed = virtio_has_feature(vdev, VIRTIO_F_RING_PACKED);
> > > +	if (packed)
> > > +		vring_init_packed(&vring.vring_packed, num, pages, vring_align);
> > > +	else
> > > +		vring_init(&vring.vring_split, num, pages, vring_align);
> > 
> > 
> > vring_init in the UAPI header is more or less a bug.
> > I'd just stop using it, keep it around for legacy userspace.
> 
> Got it. I'd like to do that. Thanks.
> 
> > 
> > > +
> > > +	return __vring_new_virtqueue(index, vring, packed, vdev, weak_barriers,
> > > +				     context, notify, callback, name);
> > >  }
> > >  EXPORT_SYMBOL_GPL(vring_new_virtqueue);
> > >  
> > > @@ -1142,7 +1408,9 @@ void vring_del_virtqueue(struct virtqueue *_vq)
> > >  
> > >  	if (vq->we_own_ring) {
> > >  		vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
> > > -				 vq->vring.desc, vq->queue_dma_addr);
> > > +				 vq->packed ? (void *)vq->vring_packed.desc :
> > > +					      (void *)vq->vring.desc,
> > > +				 vq->queue_dma_addr);
> > >  	}
> > >  	list_del(&_vq->list);
> > >  	kfree(vq);
> > > @@ -1184,7 +1452,7 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
> > >  
> > >  	struct vring_virtqueue *vq = to_vvq(_vq);
> > >  
> > > -	return vq->vring.num;
> > > +	return vq->packed ? vq->vring_packed.num : vq->vring.num;
> > >  }
> > >  EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
> > >  
> > > @@ -1227,6 +1495,10 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
> > >  
> > >  	BUG_ON(!vq->we_own_ring);
> > >  
> > > +	if (vq->packed)
> > > +		return vq->queue_dma_addr + ((char *)vq->vring_packed.driver -
> > > +				(char *)vq->vring_packed.desc);
> > > +
> > >  	return vq->queue_dma_addr +
> > >  		((char *)vq->vring.avail - (char *)vq->vring.desc);
> > >  }
> > > @@ -1238,11 +1510,16 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
> > >  
> > >  	BUG_ON(!vq->we_own_ring);
> > >  
> > > +	if (vq->packed)
> > > +		return vq->queue_dma_addr + ((char *)vq->vring_packed.device -
> > > +				(char *)vq->vring_packed.desc);
> > > +
> > >  	return vq->queue_dma_addr +
> > >  		((char *)vq->vring.used - (char *)vq->vring.desc);
> > >  }
> > >  EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
> > >  
> > > +/* Only available for split ring */
> > >  const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > >  {
> > >  	return &to_vvq(vq)->vring;
> > > diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
> > > index fab02133a919..992142b35f55 100644
> > > --- a/include/linux/virtio_ring.h
> > > +++ b/include/linux/virtio_ring.h
> > > @@ -60,6 +60,11 @@ static inline void virtio_store_mb(bool weak_barriers,
> > >  struct virtio_device;
> > >  struct virtqueue;
> > >  
> > > +union vring_union {
> > > +	struct vring vring_split;
> > > +	struct vring_packed vring_packed;
> > > +};
> > > +
> > >  /*
> > >   * Creates a virtqueue and allocates the descriptor ring.  If
> > >   * may_reduce_num is set, then this may allocate a smaller ring than
> > > @@ -79,7 +84,8 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
> > >  
> > >  /* Creates a virtqueue with a custom layout. */
> > >  struct virtqueue *__vring_new_virtqueue(unsigned int index,
> > > -					struct vring vring,
> > > +					union vring_union vring,
> > > +					bool packed,
> > >  					struct virtio_device *vdev,
> > >  					bool weak_barriers,
> > >  					bool ctx,
> > > -- 
> > > 2.18.0

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox