Netdev List
 help / color / mirror / Atom feed
* [PATCH net v2 01/11] net: core: limit nested device depth
From: Taehee Yoo @ 2019-09-07 13:45 UTC (permalink / raw)
  To: davem, netdev, j.vosburgh, vfalico, andy, jiri, sd, roopa, saeedm,
	manishc, rahulv, kys, haiyangz, sthemmin, sashal, hare, varun,
	ubraun, kgraul, jay.vosburgh
  Cc: ap420073

Current code doesn't limit the number of nested devices.
Nested devices would be handled recursively and this needs huge stack
memory. So, unlimited nested devices could make stack overflow.

This patch adds upper_level and lower_leve, they are common variables
and represent maximum lower/upper depth.
When upper/lower device is attached or dettached,
{lower/upper}_level are updated. and if maximum depth is bigger than 8,
attach routine fails and returns -EMLINK.

Test commands:
    ip link add dummy0 type dummy
    ip link add link dummy0 name vlan1 type vlan id 1
    ip link set vlan1 up

    for i in {2..100}
    do
	    let A=$i-1

	    ip link add name vlan$i link vlan$A type vlan id $i
    done

Splat looks like:
[  140.483124] BUG: looking up invalid subclass: 8
[  140.483505] turning off the locking correctness validator.
[  140.483505] CPU: 0 PID: 1324 Comm: ip Not tainted 5.3.0-rc7+ #322
[  140.483505] Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 07/08/2015
[  140.483505] Call Trace:
[  140.483505]  dump_stack+0x7c/0xbb
[  140.483505]  register_lock_class+0x64d/0x14d0
[  140.483505]  ? is_dynamic_key+0x230/0x230
[  140.483505]  ? module_assert_mutex_or_preempt+0x41/0x70
[  140.483505]  ? __module_address+0x3f/0x3c0
[  140.483505]  lockdep_init_map+0x24e/0x630
[  140.483505]  vlan_dev_init+0x828/0xce0 [8021q]
[  140.483505]  register_netdevice+0x24f/0xd70
[  140.483505]  ? netdev_change_features+0xa0/0xa0
[  140.483505]  ? dev_get_nest_level+0xe1/0x170
[  140.483505]  register_vlan_dev+0x29b/0x710 [8021q]
[  140.483505]  __rtnl_newlink+0xb75/0x1180
[  ... ]

[  168.446539] WARNING: can't dereference registers at 00000000bef3d701 for ip apic_timer_interrupt+0xf/0x20
[  168.466843] ==================================================================
[  168.469452] BUG: KASAN: slab-out-of-bounds in __unwind_start+0x71/0x850
[  168.480707] Write of size 88 at addr ffff8880b8856d38 by task ip/1758
[  168.480707]
[  168.480707] CPU: 1 PID: 1758 Comm: ip Not tainted 5.3.0-rc7+ #322
[  ... ]
[  168.794493] Rebooting in 5 seconds..

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
---

v1 -> v2 : this patch isn't changed

 include/linux/netdevice.h |   4 ++
 net/core/dev.c            | 106 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 88292953aa6f..5bb5756129af 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1624,6 +1624,8 @@ enum netdev_priv_flags {
  *	@type:		Interface hardware type
  *	@hard_header_len: Maximum hardware header length.
  *	@min_header_len:  Minimum hardware header length
+ *	@upper_level:	Maximum depth level of upper devices.
+ *	@lower_level:	Maximum depth level of lower devices.
  *
  *	@needed_headroom: Extra headroom the hardware may need, but not in all
  *			  cases can this be guaranteed
@@ -1854,6 +1856,8 @@ struct net_device {
 	unsigned short		type;
 	unsigned short		hard_header_len;
 	unsigned char		min_header_len;
+	unsigned char		upper_level;
+	unsigned char		lower_level;
 
 	unsigned short		needed_headroom;
 	unsigned short		needed_tailroom;
diff --git a/net/core/dev.c b/net/core/dev.c
index 0891f499c1bb..6a4b4ce62204 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -146,6 +146,7 @@
 #include "net-sysfs.h"
 
 #define MAX_GRO_SKBS 8
+#define MAX_NEST_DEV 8
 
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
@@ -6602,6 +6603,21 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
 
+static struct net_device *netdev_next_upper_dev(struct net_device *dev,
+						struct list_head **iter)
+{
+	struct netdev_adjacent *upper;
+
+	upper = list_entry((*iter)->next, struct netdev_adjacent, list);
+
+	if (&upper->list == &dev->adj_list.upper)
+		return NULL;
+
+	*iter = &upper->list;
+
+	return upper->dev;
+}
+
 static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
 						    struct list_head **iter)
 {
@@ -6619,6 +6635,33 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
 	return upper->dev;
 }
 
+int netdev_walk_all_upper_dev(struct net_device *dev,
+			      int (*fn)(struct net_device *dev,
+					void *data),
+			      void *data)
+{
+	struct net_device *udev;
+	struct list_head *iter;
+	int ret;
+
+	for (iter = &dev->adj_list.upper,
+	     udev = netdev_next_upper_dev(dev, &iter);
+	     udev;
+	     udev = netdev_next_upper_dev(dev, &iter)) {
+		/* first is the upper device itself */
+		ret = fn(udev, data);
+		if (ret)
+			return ret;
+
+		/* then look at all of its upper devices */
+		ret = netdev_walk_all_upper_dev(udev, fn, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
 				  int (*fn)(struct net_device *dev,
 					    void *data),
@@ -6785,6 +6828,52 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
 	return lower->dev;
 }
 
+static u8 __netdev_upper_depth(struct net_device *dev)
+{
+	struct net_device *udev;
+	struct list_head *iter;
+	u8 max_depth = 0;
+
+	for (iter = &dev->adj_list.upper,
+	     udev = netdev_next_upper_dev(dev, &iter);
+	     udev;
+	     udev = netdev_next_upper_dev(dev, &iter)) {
+		if (max_depth < udev->upper_level)
+			max_depth = udev->upper_level;
+	}
+
+	return max_depth;
+}
+
+static u8 __netdev_lower_depth(struct net_device *dev)
+{
+	struct net_device *ldev;
+	struct list_head *iter;
+	u8 max_depth = 0;
+
+	for (iter = &dev->adj_list.lower,
+	     ldev = netdev_next_lower_dev(dev, &iter);
+	     ldev;
+	     ldev = netdev_next_lower_dev(dev, &iter)) {
+		if (max_depth < ldev->lower_level)
+			max_depth = ldev->lower_level;
+	}
+
+	return max_depth;
+}
+
+static int __netdev_update_upper_level(struct net_device *dev, void *data)
+{
+	dev->upper_level = __netdev_upper_depth(dev) + 1;
+	return 0;
+}
+
+static int __netdev_update_lower_level(struct net_device *dev, void *data)
+{
+	dev->lower_level = __netdev_lower_depth(dev) + 1;
+	return 0;
+}
+
 int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
 				  int (*fn)(struct net_device *dev,
 					    void *data),
@@ -7063,6 +7152,9 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	if (netdev_has_upper_dev(upper_dev, dev))
 		return -EBUSY;
 
+	if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV)
+		return -EMLINK;
+
 	if (!master) {
 		if (netdev_has_upper_dev(dev, upper_dev))
 			return -EEXIST;
@@ -7089,6 +7181,12 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	if (ret)
 		goto rollback;
 
+	__netdev_update_upper_level(dev, NULL);
+	netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
+
+	__netdev_update_lower_level(upper_dev, NULL);
+	netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL);
+
 	return 0;
 
 rollback:
@@ -7171,6 +7269,12 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 
 	call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
 				      &changeupper_info.info);
+
+	__netdev_update_upper_level(dev, NULL);
+	netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
+
+	__netdev_update_lower_level(upper_dev, NULL);
+	netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL);
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
@@ -9157,6 +9261,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
 	dev->gso_max_size = GSO_MAX_SIZE;
 	dev->gso_max_segs = GSO_MAX_SEGS;
+	dev->upper_level = 1;
+	dev->lower_level = 1;
 
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
-- 
2.17.1


^ permalink raw reply related

* [PATCH net v2 00/11] net: fix nested device bugs
From: Taehee Yoo @ 2019-09-07 13:45 UTC (permalink / raw)
  To: davem, netdev, j.vosburgh, vfalico, andy, jiri, sd, roopa, saeedm,
	manishc, rahulv, kys, haiyangz, sthemmin, sashal, hare, varun,
	ubraun, kgraul, jay.vosburgh
  Cc: ap420073

This patchset fixes several bugs that are related to nesting
device infrastructure.
Current nesting infrastructure code doesn't limit the depth level of
devices. nested devices could be handled recursively. at that moment,
it needs huge memory and stack overflow could occur.
Below devices type have same bug.
VLAN, BONDING, TEAM, MACSEC, MACVLAN and VXLAN.

Test commands:
    ip link add dummy0 type dummy
    ip link add vlan1 link dummy0 type vlan id 1

    for i in {2..100}
    do
	    let A=$i-1
	    ip link add name vlan$i link vlan$A type vlan id $i
    done
    ip link del dummy0

1st patch actually fixes the root cause.
It adds new common variables {upper/lower}_level that represent
depth level. upper_level variable is depth of upper devices.
lower_level variable is depth of lower devices.

      [U][L]       [U][L]
vlan1  1  5  vlan4  1  4
vlan2  2  4  vlan5  2  3
vlan3  3  3    |
  |            |
  +------------+
  |
vlan6  4  2
dummy0 5  1

After this patch, the nesting infrastructure code uses this variable to
check the depth level.

2, 4, 5, 6, 7 patches fix lockdep related problem.
Before this patch, devices use static lockdep map.
So, if devices that are same type is nested, lockdep will warn about
recursive situation.
These patches make these devices use dynamic lockdep key instead of
static lock or subclass.

3rd patch fixes unexpected IFF_BONDING bit unset.

8th patch fixes a refcnt leak in the macsec module.

9th patch adds ignore flag to an adjacent structure.
In order to exchange an adjacent node safely, ignore flag is needed.

10th patch makes vxlan add an adjacent link to limit depth level.

11th patch removes unnecessary variables and callback.

v1 -> v2 :
 - Make the 3rd patch do not add a new priv_flag.

Taehee Yoo (11):
  net: core: limit nested device depth
  vlan: use dynamic lockdep key instead of subclass
  bonding: fix unexpected IFF_BONDING bit unset
  bonding: use dynamic lockdep key instead of subclass
  team: use dynamic lockdep key instead of static key
  macsec: use dynamic lockdep key instead of subclass
  macvlan: use dynamic lockdep key instead of subclass
  macsec: fix refcnt leak in module exit routine
  net: core: add ignore flag to netdev_adjacent structure
  vxlan: add adjacent link to limit depth level
  net: remove unnecessary variables and callback

 drivers/net/bonding/bond_alb.c                |   2 +-
 drivers/net/bonding/bond_main.c               |  81 ++++--
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   |   2 +-
 drivers/net/macsec.c                          |  50 ++--
 drivers/net/macvlan.c                         |  36 ++-
 drivers/net/team/team.c                       |  61 ++++-
 drivers/net/vxlan.c                           |  71 ++++-
 include/linux/if_macvlan.h                    |   3 +-
 include/linux/if_team.h                       |   5 +
 include/linux/if_vlan.h                       |  13 +-
 include/linux/netdevice.h                     |  20 +-
 include/net/bonding.h                         |   4 +-
 include/net/vxlan.h                           |   1 +
 net/8021q/vlan.c                              |   1 -
 net/8021q/vlan_dev.c                          |  32 +--
 net/core/dev.c                                | 252 ++++++++++++++++--
 net/core/dev_addr_lists.c                     |  12 +-
 net/smc/smc_core.c                            |   2 +-
 net/smc/smc_pnet.c                            |   2 +-
 19 files changed, 508 insertions(+), 142 deletions(-)

-- 
2.17.1


^ permalink raw reply

* Re: [net-next 00/16][pull request] 100GbE Intel Wired LAN Driver Updates 2019-09-05
From: David Miller @ 2019-09-07 13:27 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, nhorman, sassmann
In-Reply-To: <20190905203406.4152-1-jeffrey.t.kirsher@intel.com>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu,  5 Sep 2019 13:33:50 -0700

> This series contains updates to ice driver.

Pulled, thanks Jeff.

^ permalink raw reply

* Re: [PATCH] net: stmmac: socfpga: re-use the `interface` parameter from platform data
From: kbuild test robot @ 2019-09-07 12:54 UTC (permalink / raw)
  To: Alexandru Ardelean
  Cc: kbuild-all, netdev, linux-stm32, linux-arm-kernel, linux-kernel,
	peppe.cavallaro, alexandre.torgue, joabreu, mcoquelin.stm32,
	davem, Alexandru Ardelean
In-Reply-To: <20190906123054.5514-1-alexandru.ardelean@analog.com>

[-- Attachment #1: Type: text/plain, Size: 7941 bytes --]

Hi Alexandru,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[cannot apply to v5.3-rc7 next-20190904]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Alexandru-Ardelean/net-stmmac-socfpga-re-use-the-interface-parameter-from-platform-data/20190907-190627
config: sparc64-allmodconfig (attached as .config)
compiler: sparc64-linux-gcc (GCC) 7.4.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.4.0 make.cross ARCH=sparc64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from include/linux/dma-mapping.h:7:0,
                    from include/linux/skbuff.h:30,
                    from include/linux/if_ether.h:19,
                    from include/uapi/linux/ethtool.h:19,
                    from include/linux/ethtool.h:18,
                    from include/linux/phy.h:16,
                    from drivers/net//ethernet/stmicro/stmmac/dwmac-socfpga.c:11:
   drivers/net//ethernet/stmicro/stmmac/dwmac-socfpga.c: In function 'socfpga_gen5_set_phy_mode':
>> drivers/net//ethernet/stmicro/stmmac/dwmac-socfpga.c:264:44: error: 'phymode' undeclared (first use in this function); did you mean 'phy_modes'?
      dev_err(dwmac->dev, "bad phy mode %d\n", phymode);
                                               ^
   include/linux/device.h:1499:32: note: in definition of macro 'dev_err'
     _dev_err(dev, dev_fmt(fmt), ##__VA_ARGS__)
                                   ^~~~~~~~~~~
   drivers/net//ethernet/stmicro/stmmac/dwmac-socfpga.c:264:44: note: each undeclared identifier is reported only once for each function it appears in
      dev_err(dwmac->dev, "bad phy mode %d\n", phymode);
                                               ^
   include/linux/device.h:1499:32: note: in definition of macro 'dev_err'
     _dev_err(dev, dev_fmt(fmt), ##__VA_ARGS__)
                                   ^~~~~~~~~~~
   drivers/net//ethernet/stmicro/stmmac/dwmac-socfpga.c: In function 'socfpga_gen10_set_phy_mode':
   drivers/net//ethernet/stmicro/stmmac/dwmac-socfpga.c:340:6: error: 'phymode' undeclared (first use in this function); did you mean 'phy_modes'?
         phymode == PHY_INTERFACE_MODE_MII ||
         ^~~~~~~
         phy_modes

vim +264 drivers/net//ethernet/stmicro/stmmac/dwmac-socfpga.c

40ae25505fe834 Dinh Nguyen        2019-06-05  255  
40ae25505fe834 Dinh Nguyen        2019-06-05  256  static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac)
40ae25505fe834 Dinh Nguyen        2019-06-05  257  {
40ae25505fe834 Dinh Nguyen        2019-06-05  258  	struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr;
40ae25505fe834 Dinh Nguyen        2019-06-05  259  	u32 reg_offset = dwmac->reg_offset;
40ae25505fe834 Dinh Nguyen        2019-06-05  260  	u32 reg_shift = dwmac->reg_shift;
40ae25505fe834 Dinh Nguyen        2019-06-05  261  	u32 ctrl, val, module;
40ae25505fe834 Dinh Nguyen        2019-06-05  262  
6169afbe4a340b Alexandru Ardelean 2019-09-06  263  	if (socfpga_set_phy_mode_common(dwmac, &val)) {
801d233b7302ee Dinh Nguyen        2014-03-26 @264  		dev_err(dwmac->dev, "bad phy mode %d\n", phymode);
801d233b7302ee Dinh Nguyen        2014-03-26  265  		return -EINVAL;
801d233b7302ee Dinh Nguyen        2014-03-26  266  	}
801d233b7302ee Dinh Nguyen        2014-03-26  267  
b4834c86e11baf Ley Foon Tan       2014-08-20  268  	/* Overwrite val to GMII if splitter core is enabled. The phymode here
b4834c86e11baf Ley Foon Tan       2014-08-20  269  	 * is the actual phy mode on phy hardware, but phy interface from
b4834c86e11baf Ley Foon Tan       2014-08-20  270  	 * EMAC core is GMII.
b4834c86e11baf Ley Foon Tan       2014-08-20  271  	 */
b4834c86e11baf Ley Foon Tan       2014-08-20  272  	if (dwmac->splitter_base)
b4834c86e11baf Ley Foon Tan       2014-08-20  273  		val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII;
b4834c86e11baf Ley Foon Tan       2014-08-20  274  
70cb136f773083 Joachim Eastwood   2016-05-01  275  	/* Assert reset to the enet controller before changing the phy mode */
bc8a2d9bcbf1ca Dinh Nguyen        2018-06-19  276  	reset_control_assert(dwmac->stmmac_ocp_rst);
70cb136f773083 Joachim Eastwood   2016-05-01  277  	reset_control_assert(dwmac->stmmac_rst);
70cb136f773083 Joachim Eastwood   2016-05-01  278  
801d233b7302ee Dinh Nguyen        2014-03-26  279  	regmap_read(sys_mgr_base_addr, reg_offset, &ctrl);
801d233b7302ee Dinh Nguyen        2014-03-26  280  	ctrl &= ~(SYSMGR_EMACGRP_CTRL_PHYSEL_MASK << reg_shift);
801d233b7302ee Dinh Nguyen        2014-03-26  281  	ctrl |= val << reg_shift;
801d233b7302ee Dinh Nguyen        2014-03-26  282  
013dae5dbc07aa Stephan Gatzka     2017-08-22  283  	if (dwmac->f2h_ptp_ref_clk ||
013dae5dbc07aa Stephan Gatzka     2017-08-22  284  	    phymode == PHY_INTERFACE_MODE_MII ||
013dae5dbc07aa Stephan Gatzka     2017-08-22  285  	    phymode == PHY_INTERFACE_MODE_GMII ||
013dae5dbc07aa Stephan Gatzka     2017-08-22  286  	    phymode == PHY_INTERFACE_MODE_SGMII) {
43569814fa35b2 Phil Reid          2015-12-14  287  		ctrl |= SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2);
734e00fa02eff5 Phil Reid          2016-04-07  288  		regmap_read(sys_mgr_base_addr, SYSMGR_FPGAGRP_MODULE_REG,
734e00fa02eff5 Phil Reid          2016-04-07  289  			    &module);
734e00fa02eff5 Phil Reid          2016-04-07  290  		module |= (SYSMGR_FPGAGRP_MODULE_EMAC << (reg_shift / 2));
734e00fa02eff5 Phil Reid          2016-04-07  291  		regmap_write(sys_mgr_base_addr, SYSMGR_FPGAGRP_MODULE_REG,
734e00fa02eff5 Phil Reid          2016-04-07  292  			     module);
734e00fa02eff5 Phil Reid          2016-04-07  293  	} else {
43569814fa35b2 Phil Reid          2015-12-14  294  		ctrl &= ~(SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2));
734e00fa02eff5 Phil Reid          2016-04-07  295  	}
43569814fa35b2 Phil Reid          2015-12-14  296  
801d233b7302ee Dinh Nguyen        2014-03-26  297  	regmap_write(sys_mgr_base_addr, reg_offset, ctrl);
734e00fa02eff5 Phil Reid          2016-04-07  298  
70cb136f773083 Joachim Eastwood   2016-05-01  299  	/* Deassert reset for the phy configuration to be sampled by
70cb136f773083 Joachim Eastwood   2016-05-01  300  	 * the enet controller, and operation to start in requested mode
70cb136f773083 Joachim Eastwood   2016-05-01  301  	 */
bc8a2d9bcbf1ca Dinh Nguyen        2018-06-19  302  	reset_control_deassert(dwmac->stmmac_ocp_rst);
70cb136f773083 Joachim Eastwood   2016-05-01  303  	reset_control_deassert(dwmac->stmmac_rst);
fb3bbdb859891e Tien Hock Loh      2016-07-07  304  	if (phymode == PHY_INTERFACE_MODE_SGMII) {
fb3bbdb859891e Tien Hock Loh      2016-07-07  305  		if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) {
fb3bbdb859891e Tien Hock Loh      2016-07-07  306  			dev_err(dwmac->dev, "Unable to initialize TSE PCS");
fb3bbdb859891e Tien Hock Loh      2016-07-07  307  			return -EINVAL;
fb3bbdb859891e Tien Hock Loh      2016-07-07  308  		}
fb3bbdb859891e Tien Hock Loh      2016-07-07  309  	}
70cb136f773083 Joachim Eastwood   2016-05-01  310  
801d233b7302ee Dinh Nguyen        2014-03-26  311  	return 0;
801d233b7302ee Dinh Nguyen        2014-03-26  312  }
801d233b7302ee Dinh Nguyen        2014-03-26  313  

:::::: The code at line 264 was first introduced by commit
:::::: 801d233b7302eeab94750427a623c10c044cb0ca net: stmmac: Add SOCFPGA glue driver

:::::: TO: Dinh Nguyen <dinguyen@altera.com>
:::::: CC: David S. Miller <davem@davemloft.net>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 58668 bytes --]

^ permalink raw reply

* Re: [PATCH] net/skbuff: silence warnings under memory pressure
From: Tetsuo Handa @ 2019-09-07 11:00 UTC (permalink / raw)
  To: Michal Hocko, Sergey Senozhatsky
  Cc: Qian Cai, Eric Dumazet, davem, netdev, linux-mm, linux-kernel,
	Petr Mladek, Sergey Senozhatsky, Steven Rostedt
In-Reply-To: <20190904082540.GI3838@dhcp22.suse.cz>

On 2019/09/04 17:25, Michal Hocko wrote:
> On Wed 04-09-19 16:00:42, Sergey Senozhatsky wrote:
>> On (09/04/19 15:41), Sergey Senozhatsky wrote:
>>> But the thing is different in case of dump_stack() + show_mem() +
>>> some other output. Because now we ratelimit not a single printk() line,
>>> but hundreds of them. The ratelimit becomes - 10 * $$$ lines in 5 seconds
>>> (IOW, now we talk about thousands of lines).
>>
>> And on devices with slow serial consoles this can be somewhat close to
>> "no ratelimit". *Suppose* that warn_alloc() adds 700 lines each time.
>> Within 5 seconds we can call warn_alloc() 10 times, which will add 7000
>> lines to the logbuf. If printk() can evict only 6000 lines in 5 seconds
>> then we have a growing number of pending logbuf messages.
> 
> Yes, ratelimit is problematic when the ratelimited operation is slow. I
> guess that is a well known problem and we would need to rework both the
> api and the implementation to make it work in those cases as well.
> Essentially we need to make the ratelimit act as a gatekeeper to an
> operation section - something like a critical section except you can
> tolerate more code executions but not too many. So effectively
> 
> 	start_throttle(rate, number);
> 	/* here goes your operation */
> 	end_throttle();
> 
> one operation is not considered done until the whole section ends.
> Or something along those lines.

Regarding OOM killer which is serialized by oom_lock mutex, I proposed
"mm, oom: More aggressively ratelimit dump_header()." at
https://lkml.kernel.org/r/1550325895-9291-2-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp
but it was ignored.

> 
> In this particular case we can increase the rate limit parameters of
> course but I think that longterm we need a better api.
> 

I proposed "printk: Introduce "store now but print later" prefix." at
https://lkml.kernel.org/r/1550896930-12324-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp
but it was not accepted.

But I think that more better solution for warn_alloc() is to defer printing
 from allocating context (which may not be allowed to sleep) to WQ context.
I proposed "mm,oom: Defer dump_tasks() output." at
https://lkml.kernel.org/r/7de2310d-afbd-e616-e83a-d75103b986c6@i-love.sakura.ne.jp
and answered to Michal's concerns. I hope we can apply the same thing for
warn_alloc() and show_mem(). Then, we can minimize latency for both
"memory allocation failures" and "OOM killer invocations".


^ permalink raw reply

* Re: [net-next 02/11] devlink: add 'reset_dev_on_drv_probe' param
From: Jiri Pirko @ 2019-09-07 10:28 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Dirk van der Merwe, Simon Horman, David Miller, netdev,
	oss-drivers
In-Reply-To: <20190906211730.5c362b48@cakuba.netronome.com>

Sat, Sep 07, 2019 at 06:17:30AM CEST, jakub.kicinski@netronome.com wrote:
>On Fri, 6 Sep 2019 11:40:54 -0700, Dirk van der Merwe wrote:
>> >> DEVLINK_PARAM_RESET_DEV_VALUE_UNKNOWN (0)
>> >> +			  Unknown or invalid value.  
>> > Why do you need this? Do you have usecase for this value?  
>> 
>> I added this in to avoid having the entire netlink dump fail when there 
>> are invalid values read from hardware.
>> 
>> This way, it can report an unknown or invalid value instead of failing 
>> the operation.
>
>That's the first reason, the second is that we also want to report 
>the unknown value if it's not recognized by the driver. For u8/enum
>parameters the value may possibly be set to a value older driver
>doesn't understand, but users should still be able to set them to one
>of the known ones.

Ok.

>
>We'd also like to add that to 'fw_load_policy'. WDYT?

Ok.

^ permalink raw reply

* Re: INFO: rcu detected stall in igmp_ifc_timer_expire
From: syzbot @ 2019-09-07  9:56 UTC (permalink / raw)
  To: davem, herbert, jhs, jiri, linux-kernel, netdev, steffen.klassert,
	syzkaller-bugs, xiyou.wangcong
In-Reply-To: <000000000000a26437057e4915ff@google.com>

syzbot has found a reproducer for the following crash on:

HEAD commit:    1e3778cb Merge tag 'scsi-fixes' of git://git.kernel.org/pu..
git tree:       upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=12df164e600000
kernel config:  https://syzkaller.appspot.com/x/.config?x=b89bb446a3faaba4
dashboard link: https://syzkaller.appspot.com/bug?extid=041483004a7f45f1f20a
compiler:       gcc (GCC) 9.0.0 20181231 (experimental)
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=148c3001600000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=13b12cd1600000

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+041483004a7f45f1f20a@syzkaller.appspotmail.com

rcu: INFO: rcu_preempt self-detected stall on CPU
rcu: 	1-...!: (1 GPs behind) idle=b8e/0/0x3 softirq=12119/12122 fqs=6
	(t=10500 jiffies g=10289 q=55)
rcu: rcu_preempt kthread starved for 10480 jiffies! g10289 f0x0  
RCU_GP_WAIT_FQS(5) ->state=0x402 ->cpu=0
rcu: RCU grace-period kthread stack dump:
rcu_preempt     I29520    10      2 0x80004000
Call Trace:
  context_switch kernel/sched/core.c:3254 [inline]
  __schedule+0x755/0x1580 kernel/sched/core.c:3880
  schedule+0xd9/0x260 kernel/sched/core.c:3947
  schedule_timeout+0x486/0xc50 kernel/time/timer.c:1807
  rcu_gp_fqs_loop kernel/rcu/tree.c:1611 [inline]
  rcu_gp_kthread+0x9b2/0x18c0 kernel/rcu/tree.c:1768
  kthread+0x361/0x430 kernel/kthread.c:255
  ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352
NMI backtrace for cpu 1
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.3.0-rc7+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011
Call Trace:
  <IRQ>
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x172/0x1f0 lib/dump_stack.c:113
  nmi_cpu_backtrace.cold+0x70/0xb2 lib/nmi_backtrace.c:101
  nmi_trigger_cpumask_backtrace+0x23b/0x28b lib/nmi_backtrace.c:62
  arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38
  trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline]
  rcu_dump_cpu_stacks+0x183/0x1cf kernel/rcu/tree_stall.h:254
  print_cpu_stall kernel/rcu/tree_stall.h:455 [inline]
  check_cpu_stall kernel/rcu/tree_stall.h:529 [inline]
  rcu_pending kernel/rcu/tree.c:2736 [inline]
  rcu_sched_clock_irq.cold+0x4dd/0xc13 kernel/rcu/tree.c:2183
  update_process_times+0x32/0x80 kernel/time/timer.c:1639
  tick_sched_handle+0xa2/0x190 kernel/time/tick-sched.c:167
  tick_sched_timer+0x53/0x140 kernel/time/tick-sched.c:1296
  __run_hrtimer kernel/time/hrtimer.c:1389 [inline]
  __hrtimer_run_queues+0x364/0xe40 kernel/time/hrtimer.c:1451
  hrtimer_interrupt+0x314/0x770 kernel/time/hrtimer.c:1509
  local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1106 [inline]
  smp_apic_timer_interrupt+0x160/0x610 arch/x86/kernel/apic/apic.c:1131
  apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830
RIP: 0010:__list_del_entry_valid+0xb3/0xf5 lib/list_debug.c:54
Code: 75 53 49 8b 14 24 4c 39 f2 0f 85 99 00 00 00 49 8d 7d 08 48 b8 00 00  
00 00 00 fc ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 75 24 <49> 8b 55 08 4c  
39 f2 0f 85 aa 00 00 00 41 5c b8 01 00 00 00 41 5d
RSP: 0018:ffff8880ae909010 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13
RAX: dffffc0000000000 RBX: ffff88808c064338 RCX: ffffffff85c65b39
RDX: 1ffff1101180c87b RSI: ffffffff85c66006 RDI: ffff88808c0643d8
RBP: ffff8880ae909028 R08: ffff8880a98d6340 R09: 0000000000000000
R10: fffffbfff134afaf R11: ffff8880a98d6340 R12: ffff88808c0643d0
R13: ffff88808c0643d0 R14: ffff88808c064338 R15: 0000000000000000
  __list_del_entry include/linux/list.h:131 [inline]
  list_move_tail include/linux/list.h:213 [inline]
  hhf_dequeue+0x5c5/0xa20 net/sched/sch_hhf.c:439
  dequeue_skb net/sched/sch_generic.c:258 [inline]
  qdisc_restart net/sched/sch_generic.c:361 [inline]
  __qdisc_run+0x1e7/0x19d0 net/sched/sch_generic.c:379
  __dev_xmit_skb net/core/dev.c:3533 [inline]
  __dev_queue_xmit+0x16f1/0x3650 net/core/dev.c:3838
  dev_queue_xmit+0x18/0x20 net/core/dev.c:3902
  br_dev_queue_push_xmit+0x3f3/0x5c0 net/bridge/br_forward.c:52
  NF_HOOK include/linux/netfilter.h:305 [inline]
  NF_HOOK include/linux/netfilter.h:299 [inline]
  br_forward_finish+0xfa/0x400 net/bridge/br_forward.c:65
  NF_HOOK include/linux/netfilter.h:305 [inline]
  NF_HOOK include/linux/netfilter.h:299 [inline]
  __br_forward+0x641/0xb00 net/bridge/br_forward.c:109
  deliver_clone+0x61/0xc0 net/bridge/br_forward.c:125
  maybe_deliver+0x2c7/0x390 net/bridge/br_forward.c:181
  br_flood+0x13a/0x3d0 net/bridge/br_forward.c:223
  br_dev_xmit+0x98c/0x15a0 net/bridge/br_device.c:100
  __netdev_start_xmit include/linux/netdevice.h:4406 [inline]
  netdev_start_xmit include/linux/netdevice.h:4420 [inline]
  xmit_one net/core/dev.c:3280 [inline]
  dev_hard_start_xmit+0x1a3/0x9c0 net/core/dev.c:3296
  __dev_queue_xmit+0x2b15/0x3650 net/core/dev.c:3869
  dev_queue_xmit+0x18/0x20 net/core/dev.c:3902
  neigh_hh_output include/net/neighbour.h:500 [inline]
  neigh_output include/net/neighbour.h:509 [inline]
  ip_finish_output2+0x1726/0x2570 net/ipv4/ip_output.c:228
  __ip_finish_output net/ipv4/ip_output.c:308 [inline]
  __ip_finish_output+0x5fc/0xb90 net/ipv4/ip_output.c:290
  ip_finish_output+0x38/0x1f0 net/ipv4/ip_output.c:318
  NF_HOOK_COND include/linux/netfilter.h:294 [inline]
  ip_output+0x21f/0x640 net/ipv4/ip_output.c:432
  dst_output include/net/dst.h:436 [inline]
  ip_local_out+0xbb/0x190 net/ipv4/ip_output.c:125
  igmpv3_sendpack+0x1b5/0x2c0 net/ipv4/igmp.c:426
  igmpv3_send_cr net/ipv4/igmp.c:721 [inline]
  igmp_ifc_timer_expire+0x687/0xa00 net/ipv4/igmp.c:809
  call_timer_fn+0x1ac/0x780 kernel/time/timer.c:1322
  expire_timers kernel/time/timer.c:1366 [inline]
  __run_timers kernel/time/timer.c:1685 [inline]
  __run_timers kernel/time/timer.c:1653 [inline]
  run_timer_softirq+0x697/0x17a0 kernel/time/timer.c:1698
  __do_softirq+0x262/0x98c kernel/softirq.c:292
  invoke_softirq kernel/softirq.c:373 [inline]
  irq_exit+0x19b/0x1e0 kernel/softirq.c:413
  exiting_irq arch/x86/include/asm/apic.h:537 [inline]
  smp_apic_timer_interrupt+0x1a3/0x610 arch/x86/kernel/apic/apic.c:1133
  apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830
  </IRQ>
RIP: 0010:native_safe_halt+0xe/0x10 arch/x86/include/asm/irqflags.h:61
Code: 38 73 6e fa eb 8a 90 90 90 90 90 90 e9 07 00 00 00 0f 00 2d 24 1b 4a  
00 f4 c3 66 90 e9 07 00 00 00 0f 00 2d 14 1b 4a 00 fb f4 <c3> 90 55 48 89  
e5 41 57 41 56 41 55 41 54 53 e8 ee 32 22 fa e8 39
RSP: 0018:ffff8880a98e7d68 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff13
RAX: 1ffffffff11a5e8d RBX: ffff8880a98d6340 RCX: 1ffffffff134b5ee
RDX: dffffc0000000000 RSI: ffffffff8177f14e RDI: ffffffff873e050c
RBP: ffff8880a98e7d98 R08: ffff8880a98d6340 R09: ffffed101531ac69
R10: ffffed101531ac68 R11: ffff8880a98d6347 R12: dffffc0000000000
R13: ffffffff89a57d78 R14: 0000000000000000 R15: 0000000000000001
  arch_cpu_idle+0xa/0x10 arch/x86/kernel/process.c:571
  default_idle_call+0x84/0xb0 kernel/sched/idle.c:94
  cpuidle_idle_call kernel/sched/idle.c:154 [inline]
  do_idle+0x413/0x760 kernel/sched/idle.c:263
  cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:354
  start_secondary+0x315/0x430 arch/x86/kernel/smpboot.c:264
  secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241


^ permalink raw reply

* Re: [RFC PATCH 3/3] Enable ptp_kvm for arm64
From: Marc Zyngier @ 2019-09-07  9:15 UTC (permalink / raw)
  To: Jianyong Wu (Arm Technology China)
  Cc: netdev@vger.kernel.org, pbonzini@redhat.com,
	sean.j.christopherson@intel.com, richardcochran@gmail.com,
	Mark Rutland, Will Deacon, Suzuki Poulose,
	linux-kernel@vger.kernel.org, Steve Capper,
	Kaly Xin (Arm Technology China), Justin He (Arm Technology China)
In-Reply-To: <HE1PR0801MB16768ED94EA50010EEF634EAF4BA0@HE1PR0801MB1676.eurprd08.prod.outlook.com>

On Fri, 06 Sep 2019 12:58:15 +0100,
"Jianyong Wu (Arm Technology China)" <Jianyong.Wu@arm.com> wrote:
> 
> Hi Marc,
> 
> Very sorry to have missed this comments.
> 
> > -----Original Message-----
> > From: Marc Zyngier <maz@kernel.org>
> > Sent: Thursday, August 29, 2019 6:33 PM
> > To: Jianyong Wu (Arm Technology China) <Jianyong.Wu@arm.com>;
> > netdev@vger.kernel.org; pbonzini@redhat.com;
> > sean.j.christopherson@intel.com; richardcochran@gmail.com; Mark Rutland
> > <Mark.Rutland@arm.com>; Will Deacon <Will.Deacon@arm.com>; Suzuki
> > Poulose <Suzuki.Poulose@arm.com>
> > Cc: linux-kernel@vger.kernel.org; Steve Capper <Steve.Capper@arm.com>;
> > Kaly Xin (Arm Technology China) <Kaly.Xin@arm.com>; Justin He (Arm
> > Technology China) <Justin.He@arm.com>
> > Subject: Re: [RFC PATCH 3/3] Enable ptp_kvm for arm64
> >
> > On 29/08/2019 07:39, Jianyong Wu wrote:
> > > Currently in arm64 virtualization environment, there is no mechanism
> > > to keep time sync between guest and host. Time in guest will drift
> > > compared with host after boot up as they may both use third party time
> > > sources to correct their time respectively. The time deviation will be
> > > in order of milliseconds but some scenarios ask for higher time
> > > precision, like in cloud envirenment, we want all the VMs running in
> > > the host aquire the same level accuracy from host clock.
> > >
> > > Use of kvm ptp clock, which choose the host clock source clock as a
> > > reference clock to sync time clock between guest and host has been
> > > adopted by x86 which makes the time sync order from milliseconds to
> > nanoseconds.
> > >
> > > This patch enable kvm ptp on arm64 and we get the similar clock drift
> > > as found with x86 with kvm ptp.
> > >
> > > Test result comparison between with kvm ptp and without it in arm64
> > > are as follows. This test derived from the result of command 'chronyc
> > > sources'. we should take more cure of the last sample column which
> > > shows the offset between the local clock and the source at the last
> > measurement.
> > >
> > > no kvm ptp in guest:
> > > MS Name/IP address   Stratum Poll Reach LastRx Last sample
> > >
> > ==========================================================
> > ==============
> > > ^* dns1.synet.edu.cn      2   6   377    13  +1040us[+1581us] +/-   21ms
> > > ^* dns1.synet.edu.cn      2   6   377    21  +1040us[+1581us] +/-   21ms
> > > ^* dns1.synet.edu.cn      2   6   377    29  +1040us[+1581us] +/-   21ms
> > > ^* dns1.synet.edu.cn      2   6   377    37  +1040us[+1581us] +/-   21ms
> > > ^* dns1.synet.edu.cn      2   6   377    45  +1040us[+1581us] +/-   21ms
> > > ^* dns1.synet.edu.cn      2   6   377    53  +1040us[+1581us] +/-   21ms
> > > ^* dns1.synet.edu.cn      2   6   377    61  +1040us[+1581us] +/-   21ms
> > > ^* dns1.synet.edu.cn      2   6   377     4   -130us[ +796us] +/-   21ms
> > > ^* dns1.synet.edu.cn      2   6   377    12   -130us[ +796us] +/-   21ms
> > > ^* dns1.synet.edu.cn      2   6   377    20   -130us[ +796us] +/-   21ms
> > >
> > > in host:
> > > MS Name/IP address   Stratum Poll Reach LastRx Last sample
> > >
> > ==========================================================
> > ==============
> > > ^* 120.25.115.20          2   7   377    72   -470us[ -603us] +/-   18ms
> > > ^* 120.25.115.20          2   7   377    92   -470us[ -603us] +/-   18ms
> > > ^* 120.25.115.20          2   7   377   112   -470us[ -603us] +/-   18ms
> > > ^* 120.25.115.20          2   7   377     2   +872ns[-6808ns] +/-   17ms
> > > ^* 120.25.115.20          2   7   377    22   +872ns[-6808ns] +/-   17ms
> > > ^* 120.25.115.20          2   7   377    43   +872ns[-6808ns] +/-   17ms
> > > ^* 120.25.115.20          2   7   377    63   +872ns[-6808ns] +/-   17ms
> > > ^* 120.25.115.20          2   7   377    83   +872ns[-6808ns] +/-   17ms
> > > ^* 120.25.115.20          2   7   377   103   +872ns[-6808ns] +/-   17ms
> > > ^* 120.25.115.20          2   7   377   123   +872ns[-6808ns] +/-   17ms
> > >
> > > The dns1.synet.edu.cn is the network reference clock for guest and
> > > 120.25.115.20 is the network reference clock for host. we can't get
> > > the clock error between guest and host directly, but a roughly
> > > estimated value will be in order of hundreds of us to ms.
> > >
> > > with kvm ptp in guest:
> > > chrony has been disabled in host to remove the disturb by network clock.
> >
> > Is that a realistic use case? Why should the host not use NTP?
> >
> 
> Not really, NTP will change the the host clock which will contaminate the data of sync between
> Host and guest. But in reality, we will keep NTP online.
> 
> > >
> > > MS Name/IP address         Stratum Poll Reach LastRx Last sample
> > >
> > ==========================================================
> > ==============
> > > * PHC0                    0   3   377     8     -7ns[   +1ns] +/-    3ns
> > > * PHC0                    0   3   377     8     +1ns[  +16ns] +/-    3ns
> > > * PHC0                    0   3   377     6     -4ns[   -0ns] +/-    6ns
> > > * PHC0                    0   3   377     6     -8ns[  -12ns] +/-    5ns
> > > * PHC0                    0   3   377     5     +2ns[   +4ns] +/-    4ns
> > > * PHC0                    0   3   377    13     +2ns[   +4ns] +/-    4ns
> > > * PHC0                    0   3   377    12     -4ns[   -6ns] +/-    4ns
> > > * PHC0                    0   3   377    11     -8ns[  -11ns] +/-    6ns
> > > * PHC0                    0   3   377    10    -14ns[  -20ns] +/-    4ns
> > > * PHC0                    0   3   377     8     +4ns[   +5ns] +/-    4ns
> > >
> > > The PHC0 is the ptp clock which choose the host clock as its source
> > > clock. So we can be sure to say that the clock error between host and
> > > guest is in order of ns.
> > >
> > > Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
> > > ---
> > >  arch/arm64/include/asm/arch_timer.h  |  3 ++
> > >  arch/arm64/kvm/arch_ptp_kvm.c        | 76
> > ++++++++++++++++++++++++++++
> > >  drivers/clocksource/arm_arch_timer.c |  6 ++-
> > >  drivers/ptp/Kconfig                  |  2 +-
> > >  include/linux/arm-smccc.h            | 14 +++++
> > >  virt/kvm/arm/psci.c                  | 17 +++++++
> > >  6 files changed, 115 insertions(+), 3 deletions(-)  create mode
> > > 100644 arch/arm64/kvm/arch_ptp_kvm.c
> >
> > Please split this patch into two parts: the hypervisor code in a patch and the
> > guest code in another patch. Having both of them together is confusing.
> >
> Ok,  really better.
> 
> > >
> > > diff --git a/arch/arm64/include/asm/arch_timer.h
> > > b/arch/arm64/include/asm/arch_timer.h
> > > index 6756178c27db..880576a814b6 100644
> > > --- a/arch/arm64/include/asm/arch_timer.h
> > > +++ b/arch/arm64/include/asm/arch_timer.h
> > > @@ -229,4 +229,7 @@ static inline int arch_timer_arch_init(void)
> > >     return 0;
> > >  }
> > >
> > > +extern struct clocksource clocksource_counter; extern u64
> > > +arch_counter_read(struct clocksource *cs);
> >
> > I'm definitely not keen on exposing the internals of the arch_timer driver to
> > random subsystems. Furthermore, you seem to expect that the guest kernel
> > will only use the arch timer as a clocksource, and nothing really guarantees
> > that (in which case get_device_system_crosststamp will fail).
> >
> The code here is really ugly, I need a better solution to offer a clock source
> For the guest.
> 
> > It looks to me that we'd be better off exposing a core timekeeping API that
> > populates a struct system_counterval_t based on the *current* timekeeper
> > monotonic clocksource. This would simplify the split between generic and
> > arch-specific code.
> >
> I think it really necessary.
> 
> > Whether or not tglx will be happy with the idea is another problem, but I'm
> > certainly not taking any change to the arch timer code based on this.
> >
> I can have a try, but the detail is not clear for me now.

Something along those lines:

From 5f1c061e55c691d64012bc7c1490a1a8c4432c67 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sat, 7 Sep 2019 10:11:49 +0100
Subject: [PATCH] timekeeping: Expose API allowing retrival of current
 clocksource and counter value

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/timekeeping.h |  5 +++++
 kernel/time/timekeeping.c   | 12 ++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index b27e2ffa96c1..6df26a913711 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -275,6 +275,11 @@ extern int get_device_system_crosststamp(
 			struct system_time_snapshot *history,
 			struct system_device_crosststamp *xtstamp);
 
+/*
+ * Obtain current monotonic clock and its counter value
+ */
+extern void get_current_counterval(struct system_counterval_t *sc);
+
 /*
  * Simultaneously snapshot realtime and monotonic raw clocks
  */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d911c8470149..de689bbd3808 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1098,6 +1098,18 @@ static bool cycle_between(u64 before, u64 test, u64 after)
 	return false;
 }
 
+/**
+ * get_current_counterval - Snapshot the current clocksource and counter value
+ * @sc:	Pointer to a struct containing the current clocksource and its value
+ */
+void get_current_counterval(struct system_counterval_t *sc)
+{
+	struct timekeeper *tk = &tk_core.timekeeper;
+
+	sc->cs = READ_ONCE(tk->tkr_mono.clock);
+	sc->cycles = sc->cs->read(sc->cs);
+}
+
 /**
  * get_device_system_crosststamp - Synchronously capture system/device timestamp
  * @get_time_fn:	Callback to get simultaneous device time and

which should do the right thing.

>
> > > +
> > >  #endif
> > > diff --git a/arch/arm64/kvm/arch_ptp_kvm.c
> > > b/arch/arm64/kvm/arch_ptp_kvm.c
> >
> > We don't put non-hypervisor in arch/arm64/kvm. Please move it back to
> > drivers/ptp (as well as its x86 counterpart), and just link the two parts there.
> > This should also allow this to be enabled for 32bit guests.
> >
> Err, sorry, what's mean of "link the two parts there"? should I add
> another two file update driver/ptp/ Both for arm64 and x86 to
> contains these arch-specific code or pack them all into ptp_kvm.c?

What I'm suggesting is that you have 3 files:

  drivers/ptp/ptp_kvm.c
  drivers/ptp/ptp_kvm_x86.c
  drivers/ptp/ptp_kvm_arm.c

and let the Makefile combine them.

[...]

> > Other questions: how does this works with VM migration? Specially when
> > moving from a hypervisor that supports the feature to one that doesn't?
> >
> I think it won't solve the problem generated by VM migration and
> only for VMs in a single machine.  Ptp_kvm only works for VMs in the
> same machine.  But using ptp (not ptp_kvm) clock, all the machines
> in a low latency network environment can keep time sync in high
> precision, Then VMs move from one machine to another will obtain a
> high precision time sync.

That's a problem. Migration must be possible from one host to another,
even if that means temporarily loosing some (or a lot of)
precision. The service must be discoverable from userspace on the host
so that the MVV can decie whether a migration is possible or not.

Thanks,

	M.

-- 
Jazz is not dead, it just smells funny.

^ permalink raw reply related

* Re: pull-request: wireless-drivers-next 2019-09-07
From: David Miller @ 2019-09-07  8:34 UTC (permalink / raw)
  To: kvalo; +Cc: linux-wireless, netdev, linux-kernel
In-Reply-To: <87blvwlelw.fsf@kamboji.qca.qualcomm.com>

From: Kalle Valo <kvalo@codeaurora.org>
Date: Sat, 07 Sep 2019 11:01:15 +0300

> here's a pull request to net-next for v5.4, more info below. Please let
> me know if there are any problems.

Pulled, thanks Kalle.

^ permalink raw reply

* pull-request: wireless-drivers-next 2019-09-07
From: Kalle Valo @ 2019-09-07  8:01 UTC (permalink / raw)
  To: David Miller; +Cc: linux-wireless, netdev, linux-kernel

Hi Dave,

here's a pull request to net-next for v5.4, more info below. Please let
me know if there are any problems.

Kalle

The following changes since commit 67538eb5c00f08d7fe27f1bb703098b17302bdc0:

  Merge branch 'mvpp2-per-cpu-buffers' (2019-09-02 12:07:46 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git tags/wireless-drivers-next-for-davem-2019-09-07

for you to fetch changes up to 67e974c3ae21c8ced474eae3ce9261a6f827e95c:

  Merge tag 'iwlwifi-next-for-kalle-2019-09-06' of git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-next (2019-09-07 10:21:07 +0300)

----------------------------------------------------------------
wireless-drivers-next patches for 5.4

Second set of patches for 5.4. Lots of changes for iwlwifi and mt76,
but also smaller changes to other drivers.

Major changes:

iwlwifi

* remove broken and unused runtime power management mode for PCIe
  devices, removes IWLWIFI_PCIE_RTPM Kconfig option as well

* support new ACPI value for per-platform antenna gain

* support for single antenna diversity

* support for new WoWLAN FW API

brcmfmac

* add reset debugfs file for testing firmware restart

mt76

* DFS pattern detector for mt7615 (DFS channels not enabled yet)

* Channel Switch Announcement (CSA) support for mt7615

* new device support for mt76x0

* support for more ciphers in mt7615

* smart carrier sense on mt7615

* survey support on mt7615

* multiple interfaces on mt76x02u

rtw88

* enable MSI interrupt

----------------------------------------------------------------
Alex Malamud (2):
      iwlwifi: LTR updates
      iwlwifi: Set w-pointer upon resume according to SN

Andy Shevchenko (3):
      hostap: use %*ph to print small buffer
      brcmfmac: use %*ph to print small buffer
      zd1211rw: use %*ph to print small buffer

Ayala Beker (2):
      iwlwifi: scan: add support for new scan request command version
      iwlwifi: scan: don't pass large argument by value

Beker Ayala (1):
      iwlwifi: mvm: fix scan config command size

Christoph Hellwig (1):
      iwlwifi: stop passing bogus gfp flags arguments to dma_alloc_coherent

Colin Ian King (4):
      rtw88: remove redundant assignment to pointer debugfs_topdir
      brcmfmac: remove redundant assignment to pointer hash
      ipw2x00: fix spelling mistake "initializationg" -> "initialization"
      bcma: fix incorrect update of BCMA_CORE_PCI_MDIO_DATA

Dan Carpenter (1):
      rtw88: Fix an error message

Emmanuel Grumbach (20):
      iwlwifi: mvm: remove redundant condition in iwl_mvm_set_hw_rfkill_state
      iwlwifi: mvm: start to remove the code for d0i3
      iwlwifi: remove all the d0i3 references
      iwlwifi: mvm: remove the tx defer for d0i3
      iwlwifi: mvm: remove the d0i3 entry/exit flow
      iwlwifi: mvm: iwl_mvm_wowlan_config_key_params is for wowlan only
      iwlwifi: mvm: remove d0i3_ap_sta_id
      iwlwifi: mvm: remove iwl_mvm_update_d0i3_power_mode
      iwlwifi: mvm: remove last leftovers of d0i3
      iwlwifi: remove CMD_HIGH_PRIO
      iwlwifi: trans: remove suspending flag
      iwlwifi: remove the code under IWLWIFI_PCIE_RTPM
      iwlwifi: remove runtime_pm_mode
      iwlwifi: remove the opmode's d0i3 handlers
      iwlwifi: pcie: remove the refs / unrefs from the transport
      iwlwifi: pcie: remove some more d0i3 code from the transport
      iwlwifi: remove the d0i3 related module parameters
      iwlwifi: remove pm_runtime completely
      iwlwifi: mvm: simplify the channel switch flow for newer firmware
      iwlwifi: mvm: don't log un-decrypted frames

Felix Fietkau (16):
      mt76: round up length on mt76_wr_copy
      mt76: mt7615: clean up FWDL TXQ during/after firmware upload
      mt76: mt7603: enable hardware rate up/down selection
      mt76: mt7615: move mt7615_mcu_set_rates to mac.c
      mt76: mt7615: reset rate index/counters on rate table update
      mt76: mt7615: sync with mt7603 rate control changes
      mt76: mt7615: fix using VHT STBC rates
      mt76: mt7615: fix PS buffering of action frames
      mt76: mt7615: fix invalid fallback rates
      mt76: mt7603: fix invalid fallback rates
      mt76: mt7615: add missing register initialization
      mt76: mt7615: apply calibration-free data from OTP
      mt76: dma: reset q->rx_head on rx reset
      mt76: stop rx aggregation on station removal
      mt76: do not send BAR frame on tx aggregation flush stop
      mt76: remove offchannel check in tx scheduling

Gil Adam (1):
      iwlwifi: support per-platform antenna gain

Guenter Roeck (1):
      rtw88: drop unused rtw_coex_coex_dm_reset()

Gustavo A. R. Silva (1):
      zd1211rw: zd_usb: Use struct_size() helper

Haim Dreyfuss (4):
      iwlwifi: remove unused regdb_ptrs allocation
      iwlwifi: add support for suspend-resume flow for new device generation
      iwlwifi: add sta_id to WOWLAN_CONFIG_CMD
      iwlwifi: mvm: add support for single antenna diversity

Hariprasad Kelam (1):
      iwlwifi: fix warning iwl-trans.h is included more than once

Ilan Peer (1):
      iwlwifi: mvm: Block 26-tone RU OFDMA transmissions

Ilia Lin (1):
      iwlwifi: Send DQA enable command only if TVL is on

Jia-Ju Bai (1):
      brcm80211: Avoid possible null-pointer dereferences in wlc_phy_radio_init_2056()

Jian-Hong Pan (1):
      rtw88: pci: Move a mass of jobs in hw IRQ to soft IRQ

Johannes Berg (5):
      iwlwifi: mvm: remove unnecessary forward declarations
      iwlwifi: mvm: use FW thermal monitoring regardless of CONFIG_THERMAL
      iwlwifi: api: fix FTM struct documentation
      iwlwifi: mvm: drop BA sessions on too many old-SN frames
      iwlwifi: mvm: handle BAR_FRAME_RELEASE (0xc2) notification

Kalle Valo (2):
      Merge tag 'mt76-for-kvalo-2019-09-05' of https://github.com/nbd168/wireless
      Merge tag 'iwlwifi-next-for-kalle-2019-09-06' of git://git.kernel.org/.../iwlwifi/iwlwifi-next

Larry Finger (14):
      rtlwifi: rtl8192cu: Fix value set in descriptor
      rtlwifi: rtl_pci: Fix memory leak when hardware init fails
      rtlwifi: rtl8192ee: Remove unused GET_XXX and SET_XXX
      rtlwifi: rtl8192ee: Replace local bit manipulation macros
      rtlwifi: rtl8192ee: Convert macros that set descriptor
      rtlwifi: rtl8192ee: Convert inline routines to little-endian words
      rtlwifi: rtl8192ee: Remove some variable initializations
      rtlwifi: rtl8192cu: Remove unused GET_XXX and SET_XXX
      rtlwifi: rtl8192cu: Replace local bit manipulation macros
      rtlwifi: rtl8192cu: Convert macros that set descriptor
      rtlwifi: rtl8192cu: Convert inline routines to little-endian words
      rtlwifi: rtl8821ae: Fix incorrect returned values
      rtlwifi: rtl8188ee: Fix incorrect returned values
      rtlwifi: rtl8192ce: Fix incorrect returned values

Lorenzo Bianconi (33):
      mt76: mt7615: fix sparse warnings: warning: restricted __le16 degrades to integer
      mt76: mt7615: introduce mt7615_regd_notifier
      mt76: mt7615: add hw dfs pattern detector support
      mt76: mt7615: do not perform txcalibration before cac is complited
      mt76: mt7615: add csa support
      mt76: mt7615: add radar pattern test knob to debugfs
      mt76: mt7615: fall back to sw encryption for unsupported ciphers
      mt76: mt7615: always release sem in mt7615_load_patch
      mt76: mt7615: introduce mt7615_mcu_send_ram_firmware routine
      mt76: mt76u: fix typo in mt76u_fill_rx_sg
      mt76: mt76x0u: add support to TP-Link T2UHP
      mt76: mt7615: move mt7615_mac_get_key_info in mac.c
      mt76: mt7615: add mt7615_mac_wtbl_addr routine
      mt76: mt7615: introduce mt7615_mac_wtbl_set_key routine
      mt76: mt7615: remove wtbl_sec_key definition
      mt76: mt7615: add set_key_cmd and mt76_wcid to mt7615_mac_wtbl_set_key signature
      mt76: introduce mt76_mmio_read_copy routine
      mt76: mt7615: fix MT7615_WATCHDOG_TIME definition
      mt76: mt7603: fix watchdog rescheduling in mt7603_set_channel
      mt76: mt7615: rework locking scheme for mt7615_set_channel
      mt76: mt7615: add Smart Carrier Sense support
      mt76: mt76x02: introduce mt76x02_pre_tbtt_enable and mt76x02_beacon_enable macros
      mt76: mt76x02: do not copy beacon skb in mt76x02_mac_set_beacon_enable
      mt76: mt76x02u: enable multi-vif support
      mt76: mt76x02u: enable survey support
      mt76: mt7603: move survey_time in mt76_dev
      mt76: mt7615: enable survey support
      mt76: move mt76_tx_tasklet in mt76 module
      mt76: mt7603: remove unnecessary mcu queue initialization
      mt76: mt7615: add BIP_CMAC_128 cipher support
      mt76: add default implementation for mt76_sw_scan/mt76_sw_scan_complete
      mt76: mt7615: introduce mt7615_txwi_to_txp utility routine
      mt76: mt7615: add support to read temperature from mcu

Luca Coelho (12):
      iwlwifi: bump FW API to 49 for 22000 series
      iwlwifi: mvm: remove check for lq_sta in __iwl_mvm_rs_tx_status()
      iwlwifi: bump FW API to 50 for 22000 series
      iwlwifi: remove duplicate FW string definitions
      iwlwifi: remove unnecessary IWL_DEVICE_AX200_COMMON definition
      iwlwifi: separate elements from cfg that are needed by trans_alloc
      iwlwifi: pcie: use the cfg we passed to iwl_trans_pcie_alloc()
      iwlwifi: pcie: move some cfg mangling from trans_pcie_alloc to probe
      iwlwifi: pcie: set iwl_trans->cfg later in the probe function
      iwlwifi: pass the iwl_config_trans_params when needed
      iwlwifi: add a pointer to the trans_cfg directly in trans
      iwlwifi: always access the trans configuration via trans

Mordechay Goodstein (1):
      iwlwifi: mvm: name magic numbers with enum

Oliver Neukum (1):
      zd1211rw: remove false assertion from zd_mac_clear()

Rafał Miłecki (3):
      brcmfmac: get chip's default RAM info during PCIe setup
      brcmfmac: add stub version of brcmf_debugfs_get_devdir()
      brcmfmac: add "reset" debugfs entry for testing reset

Ryder Lee (8):
      mt76: mt7615: add 4 WMM sets support
      mt76: mt7615: update cw_min/max related settings
      mt76: Add paragraphs to describe the config symbols fully
      mt76: mt7603: fix some checkpatch warnings
      mt76: mt7615: fix some checkpatch warnings
      mt76: mt76x02: fix some checkpatch warnings
      mt76: switch to SPDX tag instead of verbose boilerplate text
      mt76: fix some checkpatch warnings

Shahar S Matityahu (24):
      iwlwifi: dbg: move monitor recording functionality from header file
      iwlwifi: dbg: move debug recording stop from trans to op mode
      iwlwifi: dbg: support debug recording suspend resume command
      iwlwifi: add ldbg config cmd debug print
      iwlwifi: dbg_ini: align dbg tlv functions names to a single format
      iwlwifi: dbg: add debug periphery registers to 9000 device family
      iwlwifi: dbg_ini: maintain buffer allocations from trans instead of TLVs buffer
      iwlwifi: dbg_ini: use linked list to store debug TLVs
      iwlwifi: dbg_ini: remove periphery phy and aux regions handling
      iwlwifi: dbg_ini: use function to check if ini dbg mode is on
      iwlwifi: dbg_ini: verify debug TLVs at allocation phase
      iwlwifi: dbg_ini: remove debug flow TLV
      iwlwifi: dbg: align wrt log prints to the same format
      iwlwifi: dbg_ini: separate cfg and dump flows to different modules
      iwlwifi: dbg_ini: use linked list for dump TLVs during dump creation
      iwlwifi: dbg_ini: move tx fifo data into fw runtime
      iwlwifi: dbg_ini: make a single ops struct for paging collect
      iwlwifi: dbg_ini: use regions ops array instead of switch case in dump flow
      iwlwifi: add iwl_tlv_array_len()
      iwlwifi: dbg_ini: remove apply point, switch to time point API
      iwlwifi: fw api: add DRAM buffer allocation command
      iwlwifi: dbg_ini: fix dump structs doc
      iwlwifi: dbg_ini: remove periodic trigger
      iwlwifi: dbg: remove iwl_fw_cancel_dumps function

Shaul Triebitz (2):
      iwlwifi: mvm: add the skb length to a print
      iwlwifi: pass the iwl_trans instead of cfg to some functions

Stanislaw Gruszka (9):
      rt2x00: do not set IEEE80211_TX_STAT_AMPDU_NO_BACK on tx status
      mt76: usb: fix endian in mt76u_copy
      mt76: usb: remove unneeded {put,get}_unaligned
      mt76: mt76x02: use params->ssn value directly
      mt76: mt7603: use params->ssn value directly
      mt76: mt7615: use params->ssn value directly
      mt76: make mt76_rx_convert static
      mt76: mt76x0: remove redundant chandef copy
      mt76: mt76x0: remove unneeded return value on set channel

Tova Mussai (2):
      iwlwifi: allocate bigger nvm data in case of UHB
      iwlwifi: mvm: look for the first supported channel when add/remove phy ctxt

Valdis Kletnieks (1):
      rtlwifi: fix non-kerneldoc comment in usb.c

Wei Yongjun (2):
      rtw88: fix seq_file memory leak
      rtlwifi: Fix file release memory leak

Wenwen Wang (1):
      airo: fix memory leaks

Xulin Sun (1):
      brcmfmac: replace strncpy() by strscpy()

Yu-Yen Ting (1):
      rtw88: pci: enable MSI interrupt

YueHaibing (3):
      rtlwifi: remove unused variables 'RTL8712_SDIO_EFUSE_TABLE' and 'MAX_PGPKT_SIZE'
      bcma: remove two unused variables
      mt76: mt7603: use devm_platform_ioremap_resource() to simplify code

zhong jiang (1):
      hostap: remove set but not used variable 'copied' in prism2_io_debug_proc_read

 drivers/bcma/driver_mips.c                         |   16 -
 drivers/bcma/driver_pci.c                          |    4 +-
 .../broadcom/brcm80211/brcmfmac/cfg80211.c         |   22 +-
 .../wireless/broadcom/brcm80211/brcmfmac/chip.c    |    6 +-
 .../wireless/broadcom/brcm80211/brcmfmac/chip.h    |    1 +
 .../wireless/broadcom/brcm80211/brcmfmac/core.c    |   25 +
 .../wireless/broadcom/brcm80211/brcmfmac/debug.h   |    4 +
 .../wireless/broadcom/brcm80211/brcmfmac/msgbuf.c  |    1 -
 .../wireless/broadcom/brcm80211/brcmfmac/pcie.c    |    6 +
 .../broadcom/brcm80211/brcmsmac/phy/phy_n.c        |    2 +-
 drivers/net/wireless/cisco/airo.c                  |   11 +-
 drivers/net/wireless/intel/ipw2x00/ipw2200.c       |    2 +-
 drivers/net/wireless/intel/iwlwifi/Kconfig         |   14 -
 drivers/net/wireless/intel/iwlwifi/cfg/1000.c      |   14 +-
 drivers/net/wireless/intel/iwlwifi/cfg/2000.c      |   26 +-
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c     |   46 +-
 drivers/net/wireless/intel/iwlwifi/cfg/5000.c      |   18 +-
 drivers/net/wireless/intel/iwlwifi/cfg/6000.c      |   44 +-
 drivers/net/wireless/intel/iwlwifi/cfg/7000.c      |   10 +-
 drivers/net/wireless/intel/iwlwifi/cfg/8000.c      |   10 +-
 drivers/net/wireless/intel/iwlwifi/cfg/9000.c      |   10 +-
 drivers/net/wireless/intel/iwlwifi/dvm/devices.c   |    3 +-
 drivers/net/wireless/intel/iwlwifi/dvm/led.c       |    5 +-
 drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c  |    4 +-
 drivers/net/wireless/intel/iwlwifi/dvm/main.c      |   12 +-
 drivers/net/wireless/intel/iwlwifi/dvm/power.c     |    3 +-
 drivers/net/wireless/intel/iwlwifi/dvm/tx.c        |    5 +-
 drivers/net/wireless/intel/iwlwifi/fw/acpi.h       |   12 +
 .../net/wireless/intel/iwlwifi/fw/api/commands.h   |    7 +
 drivers/net/wireless/intel/iwlwifi/fw/api/d3.h     |    6 +-
 .../net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h    |  102 +-
 drivers/net/wireless/intel/iwlwifi/fw/api/debug.h  |   83 +-
 .../net/wireless/intel/iwlwifi/fw/api/location.h   |    4 +-
 drivers/net/wireless/intel/iwlwifi/fw/api/mac.h    |    4 +
 drivers/net/wireless/intel/iwlwifi/fw/api/phy.h    |    7 +
 drivers/net/wireless/intel/iwlwifi/fw/api/power.h  |   12 +
 drivers/net/wireless/intel/iwlwifi/fw/api/rs.h     |   18 +-
 drivers/net/wireless/intel/iwlwifi/fw/api/rx.h     |   32 +
 drivers/net/wireless/intel/iwlwifi/fw/api/scan.h   |   55 +-
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c        | 1155 +++++++-------------
 drivers/net/wireless/intel/iwlwifi/fw/dbg.h        |  121 +-
 drivers/net/wireless/intel/iwlwifi/fw/error-dump.h |   38 +-
 drivers/net/wireless/intel/iwlwifi/fw/file.h       |   29 +-
 drivers/net/wireless/intel/iwlwifi/fw/img.h        |    9 -
 drivers/net/wireless/intel/iwlwifi/fw/init.c       |    2 -
 drivers/net/wireless/intel/iwlwifi/fw/paging.c     |    6 +-
 drivers/net/wireless/intel/iwlwifi/fw/runtime.h    |   23 +-
 drivers/net/wireless/intel/iwlwifi/fw/smem.c       |    2 +-
 drivers/net/wireless/intel/iwlwifi/iwl-config.h    |   51 +-
 drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c   |  236 ++--
 drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.h   |   36 +-
 drivers/net/wireless/intel/iwlwifi/iwl-devtrace.h  |    1 -
 drivers/net/wireless/intel/iwlwifi/iwl-drv.c       |   30 +-
 .../net/wireless/intel/iwlwifi/iwl-eeprom-parse.c  |   21 +-
 .../net/wireless/intel/iwlwifi/iwl-eeprom-parse.h  |    4 +-
 .../net/wireless/intel/iwlwifi/iwl-eeprom-read.c   |   14 +-
 drivers/net/wireless/intel/iwlwifi/iwl-fh.h        |    6 +-
 drivers/net/wireless/intel/iwlwifi/iwl-io.c        |   21 +-
 drivers/net/wireless/intel/iwlwifi/iwl-io.h        |   18 +-
 drivers/net/wireless/intel/iwlwifi/iwl-modparams.h |    9 +-
 drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c |   60 +-
 drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h   |   27 +-
 drivers/net/wireless/intel/iwlwifi/iwl-prph.h      |    5 +
 drivers/net/wireless/intel/iwlwifi/iwl-trans.c     |   16 -
 drivers/net/wireless/intel/iwlwifi/iwl-trans.h     |  150 +--
 drivers/net/wireless/intel/iwlwifi/mvm/constants.h |    1 +
 drivers/net/wireless/intel/iwlwifi/mvm/d3.c        |  224 +---
 drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c   |  154 +--
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c        |  183 +++-
 drivers/net/wireless/intel/iwlwifi/mvm/led.c       |    6 +-
 drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c  |    9 +-
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c  |  424 ++-----
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h       |  150 +--
 drivers/net/wireless/intel/iwlwifi/mvm/nvm.c       |    4 +-
 drivers/net/wireless/intel/iwlwifi/mvm/ops.c       |  480 +-------
 drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c  |   11 +-
 drivers/net/wireless/intel/iwlwifi/mvm/power.c     |   82 +-
 drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c     |   19 +-
 drivers/net/wireless/intel/iwlwifi/mvm/rs.c        |    8 +-
 drivers/net/wireless/intel/iwlwifi/mvm/rx.c        |   18 +-
 drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c      |  115 +-
 drivers/net/wireless/intel/iwlwifi/mvm/scan.c      |   98 +-
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c       |   29 +-
 drivers/net/wireless/intel/iwlwifi/mvm/tdls.c      |    9 -
 .../net/wireless/intel/iwlwifi/mvm/time-event.c    |    8 +-
 drivers/net/wireless/intel/iwlwifi/mvm/tt.c        |   12 +-
 drivers/net/wireless/intel/iwlwifi/mvm/tx.c        |    9 +-
 drivers/net/wireless/intel/iwlwifi/mvm/utils.c     |   19 +-
 .../wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c   |    4 +-
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c      |  326 ++----
 drivers/net/wireless/intel/iwlwifi/pcie/internal.h |   38 +-
 drivers/net/wireless/intel/iwlwifi/pcie/rx.c       |   77 +-
 .../net/wireless/intel/iwlwifi/pcie/trans-gen2.c   |   19 +-
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c    |  380 +++----
 drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c  |   52 +-
 drivers/net/wireless/intel/iwlwifi/pcie/tx.c       |  176 ++-
 .../net/wireless/intersil/hostap/hostap_download.c |    6 +-
 drivers/net/wireless/intersil/hostap/hostap_plx.c  |    3 +-
 drivers/net/wireless/intersil/hostap/hostap_proc.c |    3 +-
 drivers/net/wireless/mediatek/mt76/agg-rx.c        |   36 +-
 drivers/net/wireless/mediatek/mt76/debugfs.c       |   13 +-
 drivers/net/wireless/mediatek/mt76/dma.c           |   21 +-
 drivers/net/wireless/mediatek/mt76/dma.h           |   13 +-
 drivers/net/wireless/mediatek/mt76/eeprom.c        |   13 +-
 drivers/net/wireless/mediatek/mt76/mac80211.c      |   78 +-
 drivers/net/wireless/mediatek/mt76/mcu.c           |   13 +-
 drivers/net/wireless/mediatek/mt76/mmio.c          |   28 +-
 drivers/net/wireless/mediatek/mt76/mt76.h          |   43 +-
 drivers/net/wireless/mediatek/mt76/mt7603/Kconfig  |    6 +-
 drivers/net/wireless/mediatek/mt76/mt7603/beacon.c |    2 +-
 drivers/net/wireless/mediatek/mt76/mt7603/core.c   |    2 +-
 .../net/wireless/mediatek/mt76/mt7603/debugfs.c    |    2 +-
 drivers/net/wireless/mediatek/mt76/mt7603/dma.c    |   17 +-
 drivers/net/wireless/mediatek/mt76/mt7603/eeprom.c |    2 +-
 drivers/net/wireless/mediatek/mt76/mt7603/init.c   |    6 +-
 drivers/net/wireless/mediatek/mt76/mt7603/mac.c    |   14 +-
 drivers/net/wireless/mediatek/mt76/mt7603/main.c   |   41 +-
 drivers/net/wireless/mediatek/mt76/mt7603/mcu.c    |    2 +-
 drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h |    1 -
 drivers/net/wireless/mediatek/mt76/mt7603/pci.c    |    2 +-
 drivers/net/wireless/mediatek/mt76/mt7603/soc.c    |    5 +-
 drivers/net/wireless/mediatek/mt76/mt7615/Kconfig  |    7 +-
 drivers/net/wireless/mediatek/mt76/mt7615/Makefile |    3 +-
 .../net/wireless/mediatek/mt76/mt7615/debugfs.c    |   91 ++
 drivers/net/wireless/mediatek/mt76/mt7615/dma.c    |   12 +-
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c |   38 +
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |   80 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    |  733 ++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7615/mac.h    |   28 +
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |  111 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    |  474 ++++----
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |   54 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |   99 +-
 drivers/net/wireless/mediatek/mt76/mt7615/pci.c    |    1 +
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h   |   75 ++
 drivers/net/wireless/mediatek/mt76/mt76x0/Kconfig  |   12 +-
 drivers/net/wireless/mediatek/mt76/mt76x0/main.c   |   17 +-
 drivers/net/wireless/mediatek/mt76/mt76x0/mt76x0.h |    2 +-
 drivers/net/wireless/mediatek/mt76/mt76x0/pci.c    |   15 +-
 .../net/wireless/mediatek/mt76/mt76x0/pci_mcu.c    |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x0/phy.c    |   49 +-
 drivers/net/wireless/mediatek/mt76/mt76x0/phy.h    |   10 +-
 drivers/net/wireless/mediatek/mt76/mt76x0/usb.c    |   28 +-
 .../net/wireless/mediatek/mt76/mt76x0/usb_mcu.c    |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x02.h       |   45 +-
 .../net/wireless/mediatek/mt76/mt76x02_beacon.c    |   83 +-
 .../net/wireless/mediatek/mt76/mt76x02_debugfs.c   |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c   |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_dfs.h   |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_dma.h   |   13 +-
 .../net/wireless/mediatek/mt76/mt76x02_eeprom.c    |   13 +-
 .../net/wireless/mediatek/mt76/mt76x02_eeprom.h    |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_mac.c   |   29 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_mac.h   |   15 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c   |   26 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_mcu.h   |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c  |   24 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_phy.c   |   16 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_phy.h   |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_regs.h  |   41 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_trace.c |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_trace.h |   16 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_txrx.c  |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_usb.h   |   13 +-
 .../net/wireless/mediatek/mt76/mt76x02_usb_core.c  |   13 +-
 .../net/wireless/mediatek/mt76/mt76x02_usb_mcu.c   |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_util.c  |   75 +-
 drivers/net/wireless/mediatek/mt76/mt76x2/Kconfig  |   14 +-
 drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c |   23 +-
 drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.h |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x2/init.c   |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x2/mac.c    |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x2/mac.h    |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x2/mcu.c    |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x2/mcu.h    |   16 +-
 drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h |   13 +-
 .../net/wireless/mediatek/mt76/mt76x2/mt76x2u.h    |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x2/pci.c    |   13 +-
 .../net/wireless/mediatek/mt76/mt76x2/pci_init.c   |   14 +-
 .../net/wireless/mediatek/mt76/mt76x2/pci_main.c   |   15 +-
 .../net/wireless/mediatek/mt76/mt76x2/pci_mcu.c    |   17 +-
 .../net/wireless/mediatek/mt76/mt76x2/pci_phy.c    |   13 +-
 drivers/net/wireless/mediatek/mt76/mt76x2/phy.c    |   18 +-
 drivers/net/wireless/mediatek/mt76/mt76x2/usb.c    |   14 +-
 .../net/wireless/mediatek/mt76/mt76x2/usb_init.c   |   20 +-
 .../net/wireless/mediatek/mt76/mt76x2/usb_mac.c    |   13 +-
 .../net/wireless/mediatek/mt76/mt76x2/usb_main.c   |   24 +-
 .../net/wireless/mediatek/mt76/mt76x2/usb_mcu.c    |   13 +-
 .../net/wireless/mediatek/mt76/mt76x2/usb_phy.c    |   13 +-
 drivers/net/wireless/mediatek/mt76/trace.c         |   13 +-
 drivers/net/wireless/mediatek/mt76/trace.h         |   22 +-
 drivers/net/wireless/mediatek/mt76/tx.c            |   44 +-
 drivers/net/wireless/mediatek/mt76/usb.c           |   60 +-
 drivers/net/wireless/mediatek/mt76/usb_trace.c     |   13 +-
 drivers/net/wireless/mediatek/mt76/usb_trace.h     |   24 +-
 drivers/net/wireless/mediatek/mt76/util.c          |   13 +-
 drivers/net/wireless/mediatek/mt76/util.h          |    4 +-
 drivers/net/wireless/ralink/rt2x00/rt2x00dev.c     |    3 -
 drivers/net/wireless/realtek/rtlwifi/debug.c       |    2 +-
 drivers/net/wireless/realtek/rtlwifi/efuse.c       |   17 -
 drivers/net/wireless/realtek/rtlwifi/pci.c         |    2 +
 .../net/wireless/realtek/rtlwifi/rtl8188ee/trx.h   |   14 +-
 .../net/wireless/realtek/rtlwifi/rtl8192ce/trx.h   |    6 +-
 .../net/wireless/realtek/rtlwifi/rtl8192cu/mac.c   |    8 +-
 .../net/wireless/realtek/rtlwifi/rtl8192cu/trx.c   |  272 ++---
 .../net/wireless/realtek/rtlwifi/rtl8192cu/trx.h   |  529 ++++-----
 .../net/wireless/realtek/rtlwifi/rtl8192ee/trx.c   |  314 +++---
 .../net/wireless/realtek/rtlwifi/rtl8192ee/trx.h   |  861 +++++++--------
 .../net/wireless/realtek/rtlwifi/rtl8821ae/trx.h   |   10 +-
 drivers/net/wireless/realtek/rtlwifi/usb.c         |   16 +-
 drivers/net/wireless/realtek/rtw88/coex.c          |    7 +-
 drivers/net/wireless/realtek/rtw88/debug.c         |    4 +-
 drivers/net/wireless/realtek/rtw88/pci.c           |   70 +-
 drivers/net/wireless/zydas/zd1211rw/zd_chip.c      |    3 +-
 drivers/net/wireless/zydas/zd1211rw/zd_mac.c       |    1 -
 drivers/net/wireless/zydas/zd1211rw/zd_usb.c       |   11 +-
 216 files changed, 5206 insertions(+), 6244 deletions(-)
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c

^ permalink raw reply

* Re: pull-request: bpf 2019-09-06
From: David Miller @ 2019-09-07  7:53 UTC (permalink / raw)
  To: ast; +Cc: daniel, netdev, bpf, kernel-team
In-Reply-To: <20190906222032.1007163-1-ast@kernel.org>

From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 6 Sep 2019 15:20:32 -0700

> The following pull-request contains BPF updates for your *net* tree.
> 
> The main changes are:
> 
> 1) verifier precision tracking fix, from Alexei.
> 
> Please consider pulling these changes from:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git

Pulled, thanks!

^ permalink raw reply

* Re: [PATCH 0/7] libbpf: Fix cast away const qualifiers in btf.h
From: Jiri Olsa @ 2019-09-07  6:54 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann,
	netdev@vger.kernel.org, bpf@vger.kernel.org, Yonghong Song,
	Martin Lau
In-Reply-To: <62e760de-e746-c512-350a-c2188a2bb3ed@fb.com>

On Fri, Sep 06, 2019 at 09:09:17AM +0000, Andrii Nakryiko wrote:
> On 9/6/19 8:31 AM, Jiri Olsa wrote:
> > hi,
> > when including btf.h in bpftrace, I'm getting -Wcast-qual warnings like:
> > 
> >    bpf/btf.h: In function ‘btf_var_secinfo* btf_var_secinfos(const btf_type*)’:
> >    bpf/btf.h:302:41: warning: cast from type ‘const btf_type*’ to type
> >    ‘btf_var_secinfo*’ casts away qualifiers [-Wcast-qual]
> >      302 |  return (struct btf_var_secinfo *)(t + 1);
> >          |                                         ^
> > 
> > I changed the btf.h header to comply with -Wcast-qual checks
> > and used const cast away casting in libbpf objects, where it's
> 
> Hey Jiri,
> 
> We made all those helper funcs return non-const structs intentionally to 
> improve their usability and avoid all those casts that you added back.
> 
> Also, those helpers are now part of public API, so we can't just change 
> them to const, as it can break existing users easily.
> 
> If there is a need to run with -Wcast-qual, we should probably disable 
> those checks where appropriate in libbpf code.
> 
> So this will be a NACK from me, sorry.

ok, I'll disable disable it in bpftrace code then

thanks,
jirka

> 
> > all related to deduplication code, so I believe loosing const
> > is fine there.
> > 
> > thanks,
> > jirka
> > 
> > 
> > ---
> > Jiri Olsa (7):
> >        libbpf: Use const cast for btf_int_* functions
> >        libbpf: Return const btf_array from btf_array inline function
> >        libbpf: Return const btf_enum from btf_enum inline function
> >        libbpf: Return const btf_member from btf_members inline function
> >        libbpf: Return const btf_param from btf_params inline function
> >        libbpf: Return const btf_var from btf_var inline function
> >        libbpf: Return const struct btf_var_secinfo from btf_var_secinfos inline function
> > 
> >   tools/lib/bpf/btf.c    | 21 +++++++++++----------
> >   tools/lib/bpf/btf.h    | 30 +++++++++++++++---------------
> >   tools/lib/bpf/libbpf.c |  2 +-
> >   3 files changed, 27 insertions(+), 26 deletions(-)
> > 
> 

^ permalink raw reply

* Re: [PATCH bpf-next v10 2/4] bpf: new helper to obtain namespace data from current task New bpf helper bpf_get_current_pidns_info.
From: Yonghong Song @ 2019-09-07  6:34 UTC (permalink / raw)
  To: Al Viro
  Cc: Carlos Neira, netdev@vger.kernel.org, ebiederm@xmission.com,
	brouer@redhat.com, bpf@vger.kernel.org
In-Reply-To: <20190907001056.GA1131@ZenIV.linux.org.uk>



On 9/6/19 5:10 PM, Al Viro wrote:
> On Fri, Sep 06, 2019 at 11:21:14PM +0000, Yonghong Song wrote:
> 
>> -bash-4.4$ readlink /proc/self/ns/pid
>> pid:[4026531836]
>> -bash-4.4$ stat /proc/self/ns/pid
>>     File: ‘/proc/self/ns/pid’ -> ‘pid:[4026531836]’
>>     Size: 0               Blocks: 0          IO Block: 1024   symbolic link
>> Device: 4h/4d   Inode: 344795989   Links: 1
>> Access: (0777/lrwxrwxrwx)  Uid: (128203/     yhs)   Gid: (  100/   users)
>> Context: user_u:base_r:base_t
>> Access: 2019-09-06 16:06:09.431616380 -0700
>> Modify: 2019-09-06 16:06:09.431616380 -0700
>> Change: 2019-09-06 16:06:09.431616380 -0700
>>    Birth: -
>> -bash-4.4$
>>
>> Based on a discussion with Eric Biederman back in 2019 Linux
>> Plumbers, Eric suggested that to uniquely identify a
>> namespace, device id (major/minor) number should also
>> be included. Although today's kernel implementation
>> has the same device for all namespace pseudo files,
>> but from uapi perspective, device id should be included.
>>
>> That is the reason why we try to get device id which holds
>> pid namespace pseudo file.
>>
>> Do you have a better suggestion on how to get
>> the device id for 'current' pid namespace? Or from design, we
>> really should not care about device id at all?
> 
> What the hell is "device id for pid namespace"?  This is the
> first time I've heard about that mystery object, so it's
> hard to tell where it could be found.
> 
> I can tell you what device numbers are involved in the areas
> you seem to be looking in.
> 
> 1) there's whatever device number that gets assigned to
> (this) procfs instance.  That, ironically, _is_ per-pidns, but
> that of the procfs instance, not that of your process (and
> those can be different).  That's what you get in ->st_dev
> when doing lstat() of anything in /proc (assuming that
> procfs is mounted there, in the first place).  NOTE:
> that's lstat(2), not stat(2).  stat(1) uses lstat(2),
> unless given -L (in which case it's stat(2) time).  The
> difference:
> 
> root@kvm1:~# stat /proc/self/ns/pid
>    File: /proc/self/ns/pid -> pid:[4026531836]
>    Size: 0               Blocks: 0          IO Block: 1024   symbolic link
> Device: 4h/4d   Inode: 17396       Links: 1
> Access: (0777/lrwxrwxrwx)  Uid: (    0/    root)   Gid: (    0/    root)
> Access: 2019-09-06 19:43:11.871312319 -0400
> Modify: 2019-09-06 19:43:11.871312319 -0400
> Change: 2019-09-06 19:43:11.871312319 -0400
>   Birth: -
> root@kvm1:~# stat -L /proc/self/ns/pid
>    File: /proc/self/ns/pid
>    Size: 0               Blocks: 0          IO Block: 4096   regular empty file
> Device: 3h/3d   Inode: 4026531836  Links: 1
> Access: (0444/-r--r--r--)  Uid: (    0/    root)   Gid: (    0/    root)
> Access: 2019-09-06 19:43:15.955313293 -0400
> Modify: 2019-09-06 19:43:15.955313293 -0400
> Change: 2019-09-06 19:43:15.955313293 -0400
>   Birth: -
> 
> The former is lstat, the latter - stat.
> 
> 2) device number of the filesystem where the symlink target lives.
> In this case, it's nsfs and there's only one instance on the entire
> system.  _That_ would be obtained by looking at st_dev in stat(2) on
> /proc/self/ns/pid (0:3 above).
> 
> 3) device number *OF* the symlink.  That would be st_rdev in lstat(2).
> There's none - it's a symlink, not a character or block device.  It's
> always zero and always will be zero.
> 
> 4) the same for the target; st_rdev in stat(2) results and again,
> there's no such beast - it's neither character nor block device.
> 
> Your code is looking at (3).  Please, reread any textbook on Unix
> in the section that would cover stat(2) and discussion of the
> difference between st_dev and st_rdev.
> 
> I have no idea what Eric had been talking about - it's hard to
> reconstruct by what you said so far.  Making nsfs per-userns,
> perhaps?  But that makes no sense whatsoever, not that userns
> ever had...  Cheap shots aside, I really can't guess what that's
> about.  Sorry.

Thanks for the detailed information. The device number we want
is nsfs. Indeed, currently, there is only one instance
on the entire system. But not exactly sure what is the possibility
to have more than one nsfs device in the future. Maybe per-userns
or any other criteria?

> 
> In any case, pathname resolution is *NOT* for the situations where
> you can't block.  Even if it's procfs (and from the same pidns as
> the process) mounted there, there is no promise that the target
> of /proc/self has already been looked up and not evicted from
> memory since then.  And in case of cache miss pathwalk will
> have to call ->lookup(), which requires locking the directory
> (rw_sem, shared).  You can't do that in such context.
> 
> And that doesn't even go into the possibility that process has
> something very different mounted on /proc.
> 
> Again, I don't know what it is that you want to get to, but
> I would strongly recommend finding a way to get to that data
> that would not involve going anywhere near pathname resolution.
> 
> How would you expect the userland to work with that value,
> whatever it might be?  If it's just a 32bit field that will
> never be read, you might as well store there the same value
> you store now (0, that is) in much cheaper and safer way ;-)

Suppose inside pid namespace, user can pass the device number,
say n1, (`stat -L /proc/self/ns/pid`) to bpf program (through map
or JIT). At runtime, bpf program will try to get device number,
say n2, for the 'current' process. If n1 is not the same as
n2, that means they are not in the same namespace. 'current'
is in the same pid namespace as the user iff
n1 == n2 and also pidns id is the same for 'current' and
the one with `lsns -t pid`.

Are you aware of any way to get the pidns device number
for 'current' without going through the pathname
lookup?


^ permalink raw reply

* [PATCH net-next 4/4] net/tls: align non temporal copy to cache lines
From: Jakub Kicinski @ 2019-09-07  5:30 UTC (permalink / raw)
  To: davem
  Cc: netdev, oss-drivers, davejwatson, borisp, aviadye, john.fastabend,
	daniel, Jakub Kicinski, Dirk van der Merwe
In-Reply-To: <20190907053000.23869-1-jakub.kicinski@netronome.com>

Unlike normal TCP code TLS has to touch the cache lines
it copies into to fill header info. On memory-heavy workloads
having non temporal stores and normal accesses targeting
the same cache line leads to significant overhead.

Measured 3% overhead running 3600 round robin connections
with additional memory heavy workload.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
---
 net/tls/tls_device.c | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 916c3c0a99f0..f959487c5cd1 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -372,6 +372,31 @@ static int tls_do_allocation(struct sock *sk,
 	return 0;
 }
 
+static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i)
+{
+	size_t pre_copy, nocache;
+
+	pre_copy = ~((unsigned long)addr - 1) & (SMP_CACHE_BYTES - 1);
+	if (pre_copy) {
+		pre_copy = min(pre_copy, bytes);
+		if (copy_from_iter(addr, pre_copy, i) != pre_copy)
+			return -EFAULT;
+		bytes -= pre_copy;
+		addr += pre_copy;
+	}
+
+	nocache = round_down(bytes, SMP_CACHE_BYTES);
+	if (copy_from_iter_nocache(addr, nocache, i) != nocache)
+		return -EFAULT;
+	bytes -= nocache;
+	addr += nocache;
+
+	if (bytes && copy_from_iter(addr, bytes, i) != bytes)
+		return -EFAULT;
+
+	return 0;
+}
+
 static int tls_push_data(struct sock *sk,
 			 struct iov_iter *msg_iter,
 			 size_t size, int flags,
@@ -445,12 +470,10 @@ static int tls_push_data(struct sock *sk,
 		copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
 		copy = min_t(size_t, copy, (max_open_record_len - record->len));
 
-		if (copy_from_iter_nocache(page_address(pfrag->page) +
-					       pfrag->offset,
-					   copy, msg_iter) != copy) {
-			rc = -EFAULT;
+		rc = tls_device_copy_data(page_address(pfrag->page) +
+					  pfrag->offset, copy, msg_iter);
+		if (rc)
 			goto handle_error;
-		}
 		tls_append_frag(record, pfrag, copy);
 
 		size -= copy;
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next 3/4] net/tls: remove the record tail optimization
From: Jakub Kicinski @ 2019-09-07  5:29 UTC (permalink / raw)
  To: davem
  Cc: netdev, oss-drivers, davejwatson, borisp, aviadye, john.fastabend,
	daniel, Jakub Kicinski, Dirk van der Merwe
In-Reply-To: <20190907053000.23869-1-jakub.kicinski@netronome.com>

For TLS device offload the tag/message authentication code are
filled in by the device. The kernel merely reserves space for
them. Because device overwrites it, the contents of the tag make
do no matter. Current code tries to save space by reusing the
header as the tag. This, however, leads to an additional frag
being created and defeats buffer coalescing (which trickles
all the way down to the drivers).

Remove this optimization, and try to allocate the space for
the tag in the usual way, leave the memory uninitialized.
If memory allocation fails rewind the record pointer so that
we use the already copied user data as tag.

Note that the optimization was actually buggy, as the tag
for TLS 1.2 is 16 bytes, but header is just 13, so the reuse
may had looked past the end of the page..

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
---
 net/tls/tls_device.c | 67 +++++++++++++++++++++++++++++++-------------
 1 file changed, 47 insertions(+), 20 deletions(-)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index b11355e00514..916c3c0a99f0 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -256,29 +256,13 @@ static int tls_push_record(struct sock *sk,
 			   struct tls_context *ctx,
 			   struct tls_offload_context_tx *offload_ctx,
 			   struct tls_record_info *record,
-			   struct page_frag *pfrag,
-			   int flags,
-			   unsigned char record_type)
+			   int flags)
 {
 	struct tls_prot_info *prot = &ctx->prot_info;
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct page_frag dummy_tag_frag;
 	skb_frag_t *frag;
 	int i;
 
-	/* fill prepend */
-	frag = &record->frags[0];
-	tls_fill_prepend(ctx,
-			 skb_frag_address(frag),
-			 record->len - prot->prepend_size,
-			 record_type,
-			 prot->version);
-
-	/* HW doesn't care about the data in the tag, because it fills it. */
-	dummy_tag_frag.page = skb_frag_page(frag);
-	dummy_tag_frag.offset = 0;
-
-	tls_append_frag(record, &dummy_tag_frag, prot->tag_size);
 	record->end_seq = tp->write_seq + record->len;
 	list_add_tail_rcu(&record->list, &offload_ctx->records_list);
 	offload_ctx->open_record = NULL;
@@ -302,6 +286,38 @@ static int tls_push_record(struct sock *sk,
 	return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
 }
 
+static int tls_device_record_close(struct sock *sk,
+				   struct tls_context *ctx,
+				   struct tls_record_info *record,
+				   struct page_frag *pfrag,
+				   unsigned char record_type)
+{
+	struct tls_prot_info *prot = &ctx->prot_info;
+	int ret;
+
+	/* append tag
+	 * device will fill in the tag, we just need to append a placeholder
+	 * use socket memory to improve coalescing (re-using a single buffer
+	 * increases frag count)
+	 * if we can't allocate memory now, steal some back from data
+	 */
+	if (likely(skb_page_frag_refill(prot->tag_size, pfrag,
+					sk->sk_allocation))) {
+		ret = 0;
+		tls_append_frag(record, pfrag, prot->tag_size);
+	} else {
+		ret = prot->tag_size;
+		if (record->len <= prot->overhead_size)
+			return -ENOMEM;
+	}
+
+	/* fill prepend */
+	tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]),
+			 record->len - prot->overhead_size,
+			 record_type, prot->version);
+	return ret;
+}
+
 static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
 				 struct page_frag *pfrag,
 				 size_t prepend_size)
@@ -452,13 +468,24 @@ static int tls_push_data(struct sock *sk,
 
 		if (done || record->len >= max_open_record_len ||
 		    (record->num_frags >= MAX_SKB_FRAGS - 1)) {
+			rc = tls_device_record_close(sk, tls_ctx, record,
+						     pfrag, record_type);
+			if (rc) {
+				if (rc > 0) {
+					size += rc;
+				} else {
+					size = orig_size;
+					destroy_record(record);
+					ctx->open_record = NULL;
+					break;
+				}
+			}
+
 			rc = tls_push_record(sk,
 					     tls_ctx,
 					     ctx,
 					     record,
-					     pfrag,
-					     tls_push_record_flags,
-					     record_type);
+					     tls_push_record_flags);
 			if (rc < 0)
 				break;
 		}
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next 2/4] net/tls: use RCU for the adder to the offload record list
From: Jakub Kicinski @ 2019-09-07  5:29 UTC (permalink / raw)
  To: davem
  Cc: netdev, oss-drivers, davejwatson, borisp, aviadye, john.fastabend,
	daniel, Jakub Kicinski, Dirk van der Merwe
In-Reply-To: <20190907053000.23869-1-jakub.kicinski@netronome.com>

All modifications to TLS record list happen under the socket
lock. Since records form an ordered queue readers are only
concerned about elements being removed, additions can happen
concurrently.

Use RCU primitives to ensure the correct access types
(READ_ONCE/WRITE_ONCE).

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
---
 net/tls/tls_device.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 285c9f9e94e4..b11355e00514 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -280,9 +280,7 @@ static int tls_push_record(struct sock *sk,
 
 	tls_append_frag(record, &dummy_tag_frag, prot->tag_size);
 	record->end_seq = tp->write_seq + record->len;
-	spin_lock_irq(&offload_ctx->lock);
-	list_add_tail(&record->list, &offload_ctx->records_list);
-	spin_unlock_irq(&offload_ctx->lock);
+	list_add_tail_rcu(&record->list, &offload_ctx->records_list);
 	offload_ctx->open_record = NULL;
 
 	if (test_bit(TLS_TX_SYNC_SCHED, &ctx->flags))
@@ -535,12 +533,16 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
 		/* if retransmit_hint is irrelevant start
 		 * from the beggining of the list
 		 */
-		info = list_first_entry(&context->records_list,
-					struct tls_record_info, list);
+		info = list_first_entry_or_null(&context->records_list,
+						struct tls_record_info, list);
+		if (!info)
+			return NULL;
 		record_sn = context->unacked_record_sn;
 	}
 
-	list_for_each_entry_from(info, &context->records_list, list) {
+	/* We just need the _rcu for the READ_ONCE() */
+	rcu_read_lock();
+	list_for_each_entry_from_rcu(info, &context->records_list, list) {
 		if (before(seq, info->end_seq)) {
 			if (!context->retransmit_hint ||
 			    after(info->end_seq,
@@ -549,12 +551,15 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
 				context->retransmit_hint = info;
 			}
 			*p_record_sn = record_sn;
-			return info;
+			goto exit_rcu_unlock;
 		}
 		record_sn++;
 	}
+	info = NULL;
 
-	return NULL;
+exit_rcu_unlock:
+	rcu_read_unlock();
+	return info;
 }
 EXPORT_SYMBOL(tls_get_record);
 
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next 1/4] net/tls: unref frags in order
From: Jakub Kicinski @ 2019-09-07  5:29 UTC (permalink / raw)
  To: davem
  Cc: netdev, oss-drivers, davejwatson, borisp, aviadye, john.fastabend,
	daniel, Jakub Kicinski, Dirk van der Merwe
In-Reply-To: <20190907053000.23869-1-jakub.kicinski@netronome.com>

It's generally more cache friendly to walk arrays in order,
especially those which are likely not in cache.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
---
 net/tls/tls_device.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 41c106e45f01..285c9f9e94e4 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -122,13 +122,10 @@ static struct net_device *get_netdev_for_sock(struct sock *sk)
 
 static void destroy_record(struct tls_record_info *record)
 {
-	int nr_frags = record->num_frags;
-	skb_frag_t *frag;
+	int i;
 
-	while (nr_frags-- > 0) {
-		frag = &record->frags[nr_frags];
-		__skb_frag_unref(frag);
-	}
+	for (i = 0; i < record->num_frags; i++)
+		__skb_frag_unref(&record->frags[i]);
 	kfree(record);
 }
 
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next 0/4] net/tls: small TX offload optimizations
From: Jakub Kicinski @ 2019-09-07  5:29 UTC (permalink / raw)
  To: davem
  Cc: netdev, oss-drivers, davejwatson, borisp, aviadye, john.fastabend,
	daniel, Jakub Kicinski

Hi!

This set brings small TLS TX device optimizations. The biggest
gain comes from fixing a misuse of non temporal copy instructions.
On a synthetic workload modelled after customer's RFC application
I see 3-5% percent gain.

Jakub Kicinski (4):
  net/tls: unref frags in order
  net/tls: use RCU for the adder to the offload record list
  net/tls: remove the record tail optimization
  net/tls: align non temporal copy to cache lines

 net/tls/tls_device.c | 121 ++++++++++++++++++++++++++++++-------------
 1 file changed, 84 insertions(+), 37 deletions(-)

-- 
2.21.0


^ permalink raw reply

* test
From: Rain River @ 2019-09-07  5:01 UTC (permalink / raw)
  To: netdev

Please ignore it.

^ permalink raw reply

* Re: [PATCH net-next, 2/2] hv_netvsc: Sync offloading features to VF NIC
From: Jakub Kicinski @ 2019-09-07  4:25 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: sashal@kernel.org, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org, KY Srinivasan, Stephen Hemminger,
	olaf@aepfle.de, vkuznets, davem@davemloft.net,
	linux-kernel@vger.kernel.org, Mark Bloch
In-Reply-To: <DM6PR21MB13373166435FD2FC5543D349CABB0@DM6PR21MB1337.namprd21.prod.outlook.com>

On Thu, 5 Sep 2019 23:07:32 +0000, Haiyang Zhang wrote:
> > On Fri, 30 Aug 2019 03:45:38 +0000, Haiyang Zhang wrote:  
> > > VF NIC may go down then come up during host servicing events. This
> > > causes the VF NIC offloading feature settings to roll back to the
> > > defaults. This patch can synchronize features from synthetic NIC to
> > > the VF NIC during ndo_set_features (ethtool -K), and
> > > netvsc_register_vf when VF comes back after host events.
> > >
> > > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > > Cc: Mark Bloch <markb@mellanox.com>  
> > 
> > If we want to make this change in behaviour we should change net_failover
> > at the same time.  
> 
> After checking the net_failover, I found it's for virtio based SRIOV, and very 
> different from what we did for Hyper-V based SRIOV.
> 
> We let the netvsc driver acts as both the synthetic (PV) driver and the transparent 
> bonding master for the VF NIC. But net_failover acts as a master device on top 
> of both virtio PV NIC, and VF NIC. And the net_failover doesn't implemented 
> operations, like ndo_set_features.
> So the code change for our netvsc driver cannot be applied to net_failover driver.
> 
> I will re-submit my two patches (fixing the extra tab in the 1st one as you pointed 
> out). Thanks!

I think it stands to reason that two modules which implement the same
functionality behave the same.

^ permalink raw reply

* Re: [PATCH] net/ibmvnic: free reset work of removed device from queue
From: kbuild test robot @ 2019-09-07  4:24 UTC (permalink / raw)
  To: Juliet Kim; +Cc: kbuild-all, netdev, julietk, linuxppc-dev
In-Reply-To: <20190905213001.19818-1-julietk@linux.vnet.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 7041 bytes --]

Hi Juliet,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[cannot apply to v5.3-rc7 next-20190904]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Juliet-Kim/net-ibmvnic-free-reset-work-of-removed-device-from-queue/20190906-195317
config: powerpc-allyesconfig (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 7.4.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.4.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   drivers/net/ethernet/ibm/ibmvnic.c: In function '__ibmvnic_reset':
>> drivers/net/ethernet/ibm/ibmvnic.c:1986:3: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
      if (adapter->state == VNIC_REMOVING ||
      ^~
   drivers/net/ethernet/ibm/ibmvnic.c:1989:4: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the 'if'
       rc = EBUSY;
       ^~
>> drivers/net/ethernet/ibm/ibmvnic.c:2002:4: error: break statement not within loop or switch
       break;
       ^~~~~
   drivers/net/ethernet/ibm/ibmvnic.c: At top level:
>> drivers/net/ethernet/ibm/ibmvnic.c:2007:2: error: expected identifier or '(' before 'if'
     if (adapter->wait_for_reset) {
     ^~
   drivers/net/ethernet/ibm/ibmvnic.c:2013:2: error: expected identifier or '(' before 'if'
     if (rc) {
     ^~
>> drivers/net/ethernet/ibm/ibmvnic.c:2018:9: error: expected '=', ',', ';', 'asm' or '__attribute__' before '->' token
     adapter->resetting = false;
            ^~
   drivers/net/ethernet/ibm/ibmvnic.c:2019:2: error: expected identifier or '(' before 'if'
     if (we_lock_rtnl)
     ^~
>> drivers/net/ethernet/ibm/ibmvnic.c:2021:1: error: expected identifier or '(' before '}' token
    }
    ^
   drivers/net/ethernet/ibm/ibmvnic.c:1953:13: warning: 'free_all_rwi' defined but not used [-Wunused-function]
    static void free_all_rwi(struct ibmvnic_adapter *adapter)
                ^~~~~~~~~~~~

vim +2002 drivers/net/ethernet/ibm/ibmvnic.c

ed651a10875f13 Nathan Fontenot 2017-05-03  1963  
ed651a10875f13 Nathan Fontenot 2017-05-03  1964  static void __ibmvnic_reset(struct work_struct *work)
ed651a10875f13 Nathan Fontenot 2017-05-03  1965  {
ed651a10875f13 Nathan Fontenot 2017-05-03  1966  	struct ibmvnic_rwi *rwi;
ed651a10875f13 Nathan Fontenot 2017-05-03  1967  	struct ibmvnic_adapter *adapter;
a5681e20b541a5 Juliet Kim      2018-11-19  1968  	bool we_lock_rtnl = false;
ed651a10875f13 Nathan Fontenot 2017-05-03  1969  	u32 reset_state;
c26eba03e4073b John Allen      2017-10-26  1970  	int rc = 0;
ed651a10875f13 Nathan Fontenot 2017-05-03  1971  
ed651a10875f13 Nathan Fontenot 2017-05-03  1972  	adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
ed651a10875f13 Nathan Fontenot 2017-05-03  1973  
a5681e20b541a5 Juliet Kim      2018-11-19  1974  	/* netif_set_real_num_xx_queues needs to take rtnl lock here
a5681e20b541a5 Juliet Kim      2018-11-19  1975  	 * unless wait_for_reset is set, in which case the rtnl lock
a5681e20b541a5 Juliet Kim      2018-11-19  1976  	 * has already been taken before initializing the reset
a5681e20b541a5 Juliet Kim      2018-11-19  1977  	 */
a5681e20b541a5 Juliet Kim      2018-11-19  1978  	if (!adapter->wait_for_reset) {
a5681e20b541a5 Juliet Kim      2018-11-19  1979  		rtnl_lock();
a5681e20b541a5 Juliet Kim      2018-11-19  1980  		we_lock_rtnl = true;
a5681e20b541a5 Juliet Kim      2018-11-19  1981  	}
ed651a10875f13 Nathan Fontenot 2017-05-03  1982  	reset_state = adapter->state;
ed651a10875f13 Nathan Fontenot 2017-05-03  1983  
ed651a10875f13 Nathan Fontenot 2017-05-03  1984  	rwi = get_next_rwi(adapter);
ed651a10875f13 Nathan Fontenot 2017-05-03  1985  	while (rwi) {
36f1031c51a253 Thomas Falcon   2019-08-27 @1986  		if (adapter->state == VNIC_REMOVING ||
36f1031c51a253 Thomas Falcon   2019-08-27  1987  		    adapter->state == VNIC_REMOVED)
42a863ed7971cb Juliet Kim      2019-09-05  1988  			kfree(rwi);
42a863ed7971cb Juliet Kim      2019-09-05 @1989  			rc = EBUSY;
42a863ed7971cb Juliet Kim      2019-09-05  1990  			break;
42a863ed7971cb Juliet Kim      2019-09-05  1991  		}
36f1031c51a253 Thomas Falcon   2019-08-27  1992  
2770a7984db588 Thomas Falcon   2018-05-23  1993  		if (adapter->force_reset_recovery) {
2770a7984db588 Thomas Falcon   2018-05-23  1994  			adapter->force_reset_recovery = false;
2770a7984db588 Thomas Falcon   2018-05-23  1995  			rc = do_hard_reset(adapter, rwi, reset_state);
2770a7984db588 Thomas Falcon   2018-05-23  1996  		} else {
ed651a10875f13 Nathan Fontenot 2017-05-03  1997  			rc = do_reset(adapter, rwi, reset_state);
2770a7984db588 Thomas Falcon   2018-05-23  1998  		}
ed651a10875f13 Nathan Fontenot 2017-05-03  1999  		kfree(rwi);
2770a7984db588 Thomas Falcon   2018-05-23  2000  		if (rc && rc != IBMVNIC_INIT_FAILED &&
2770a7984db588 Thomas Falcon   2018-05-23  2001  		    !adapter->force_reset_recovery)
ed651a10875f13 Nathan Fontenot 2017-05-03 @2002  			break;
ed651a10875f13 Nathan Fontenot 2017-05-03  2003  
ed651a10875f13 Nathan Fontenot 2017-05-03  2004  		rwi = get_next_rwi(adapter);
ed651a10875f13 Nathan Fontenot 2017-05-03  2005  	}
ed651a10875f13 Nathan Fontenot 2017-05-03  2006  
c26eba03e4073b John Allen      2017-10-26 @2007  	if (adapter->wait_for_reset) {
c26eba03e4073b John Allen      2017-10-26  2008  		adapter->wait_for_reset = false;
c26eba03e4073b John Allen      2017-10-26  2009  		adapter->reset_done_rc = rc;
c26eba03e4073b John Allen      2017-10-26  2010  		complete(&adapter->reset_done);
c26eba03e4073b John Allen      2017-10-26  2011  	}
c26eba03e4073b John Allen      2017-10-26  2012  
ed651a10875f13 Nathan Fontenot 2017-05-03 @2013  	if (rc) {
d1cf33d93166f1 Nathan Fontenot 2017-08-08  2014  		netdev_dbg(adapter->netdev, "Reset failed\n");
ed651a10875f13 Nathan Fontenot 2017-05-03  2015  		free_all_rwi(adapter);
ed651a10875f13 Nathan Fontenot 2017-05-03  2016  	}
42a863ed7971cb Juliet Kim      2019-09-05  2017  
ed651a10875f13 Nathan Fontenot 2017-05-03 @2018  	adapter->resetting = false;
a5681e20b541a5 Juliet Kim      2018-11-19  2019  	if (we_lock_rtnl)
a5681e20b541a5 Juliet Kim      2018-11-19  2020  		rtnl_unlock();
ed651a10875f13 Nathan Fontenot 2017-05-03 @2021  }
ed651a10875f13 Nathan Fontenot 2017-05-03  2022  

:::::: The code at line 2002 was first introduced by commit
:::::: ed651a10875f13135a5f59c1bae4d51b377b3925 ibmvnic: Updated reset handling

:::::: TO: Nathan Fontenot <nfont@linux.vnet.ibm.com>
:::::: CC: David S. Miller <davem@davemloft.net>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 62432 bytes --]

^ permalink raw reply

* Re: [net-next 02/11] devlink: add 'reset_dev_on_drv_probe' param
From: Jakub Kicinski @ 2019-09-07  4:17 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Dirk van der Merwe, Simon Horman, David Miller, netdev,
	oss-drivers
In-Reply-To: <8066ba35-2f9b-c175-100f-e754b4ca65be@netronome.com>

On Fri, 6 Sep 2019 11:40:54 -0700, Dirk van der Merwe wrote:
> >> DEVLINK_PARAM_RESET_DEV_VALUE_UNKNOWN (0)
> >> +			  Unknown or invalid value.  
> > Why do you need this? Do you have usecase for this value?  
> 
> I added this in to avoid having the entire netlink dump fail when there 
> are invalid values read from hardware.
> 
> This way, it can report an unknown or invalid value instead of failing 
> the operation.

That's the first reason, the second is that we also want to report 
the unknown value if it's not recognized by the driver. For u8/enum
parameters the value may possibly be set to a value older driver
doesn't understand, but users should still be able to set them to one
of the known ones.

We'd also like to add that to 'fw_load_policy'. WDYT?

^ permalink raw reply

* Re: [PATCH v3 bpf-next 2/3] bpf: implement CAP_BPF
From: kbuild test robot @ 2019-09-07  4:09 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: kbuild-all, davem, daniel, peterz, luto, netdev, bpf, kernel-team,
	linux-api
In-Reply-To: <20190904184335.360074-2-ast@kernel.org>

[-- Attachment #1: Type: text/plain, Size: 4029 bytes --]

Hi Alexei,

I love your patch! Perhaps something to improve:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Alexei-Starovoitov/capability-introduce-CAP_BPF-and-CAP_TRACING/20190906-215814
base:   https://kernel.googlesource.com/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: x86_64-randconfig-c003-201935 (attached as .config)
compiler: gcc-7 (Debian 7.4.0-11) 7.4.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from include/linux/export.h:45:0,
                    from include/linux/linkage.h:7,
                    from include/linux/kernel.h:8,
                    from include/linux/list.h:9,
                    from include/linux/timer.h:5,
                    from include/linux/workqueue.h:9,
                    from include/linux/bpf.h:9,
                    from kernel/bpf/syscall.c:4:
   kernel/bpf/syscall.c: In function 'bpf_prog_test_run':
   kernel/bpf/syscall.c:2087:6: warning: the address of 'capable_bpf_net_admin' will always evaluate as 'true' [-Waddress]
     if (!capable_bpf_net_admin)
         ^
   include/linux/compiler.h:58:52: note: in definition of macro '__trace_if_var'
    #define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
                                                       ^~~~
>> kernel/bpf/syscall.c:2087:2: note: in expansion of macro 'if'
     if (!capable_bpf_net_admin)
     ^~
   kernel/bpf/syscall.c:2087:6: warning: the address of 'capable_bpf_net_admin' will always evaluate as 'true' [-Waddress]
     if (!capable_bpf_net_admin)
         ^
   include/linux/compiler.h:58:61: note: in definition of macro '__trace_if_var'
    #define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
                                                                ^~~~
>> kernel/bpf/syscall.c:2087:2: note: in expansion of macro 'if'
     if (!capable_bpf_net_admin)
     ^~
   kernel/bpf/syscall.c:2087:6: warning: the address of 'capable_bpf_net_admin' will always evaluate as 'true' [-Waddress]
     if (!capable_bpf_net_admin)
         ^
   include/linux/compiler.h:69:3: note: in definition of macro '__trace_if_value'
     (cond) ?     \
      ^~~~
   include/linux/compiler.h:56:28: note: in expansion of macro '__trace_if_var'
    #define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
                               ^~~~~~~~~~~~~~
>> kernel/bpf/syscall.c:2087:2: note: in expansion of macro 'if'
     if (!capable_bpf_net_admin)
     ^~

vim +/if +2087 kernel/bpf/syscall.c

  2080	
  2081	static int bpf_prog_test_run(const union bpf_attr *attr,
  2082				     union bpf_attr __user *uattr)
  2083	{
  2084		struct bpf_prog *prog;
  2085		int ret = -ENOTSUPP;
  2086	
> 2087		if (!capable_bpf_net_admin)
  2088			/* test_run callback is available for networking progs only.
  2089			 * Add capable_bpf_tracing() above when tracing progs become runable.
  2090			 */
  2091			return -EPERM;
  2092		if (CHECK_ATTR(BPF_PROG_TEST_RUN))
  2093			return -EINVAL;
  2094	
  2095		if ((attr->test.ctx_size_in && !attr->test.ctx_in) ||
  2096		    (!attr->test.ctx_size_in && attr->test.ctx_in))
  2097			return -EINVAL;
  2098	
  2099		if ((attr->test.ctx_size_out && !attr->test.ctx_out) ||
  2100		    (!attr->test.ctx_size_out && attr->test.ctx_out))
  2101			return -EINVAL;
  2102	
  2103		prog = bpf_prog_get(attr->test.prog_fd);
  2104		if (IS_ERR(prog))
  2105			return PTR_ERR(prog);
  2106	
  2107		if (prog->aux->ops->test_run)
  2108			ret = prog->aux->ops->test_run(prog, attr, uattr);
  2109	
  2110		bpf_prog_put(prog);
  2111		return ret;
  2112	}
  2113	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 29761 bytes --]

^ permalink raw reply

* Re: [PATCH] ethernet: micrel: Use DIV_ROUND_CLOSEST directly to make it readable
From: zhong jiang @ 2019-09-07  3:14 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: davem, kstewart, gregkh, netdev, linux-kernel
In-Reply-To: <20190906194050.GB2339@lunn.ch>

On 2019/9/7 3:40, Andrew Lunn wrote:
> On Thu, Sep 05, 2019 at 11:53:48PM +0800, zhong jiang wrote:
>> The kernel.h macro DIV_ROUND_CLOSEST performs the computation (x + d/2)/d
>> but is perhaps more readable.
> Hi Zhong
>
> Did you find this by hand, or did you use a tool. If a tool is used,
> it is normal to give some credit to the tool.
With the following help of Coccinelle. 
-(((x) + ((__divisor) / 2)) / (__divisor))
+ DIV_ROUND_CLOSEST(x,__divisor)

Sometimes, I will add the information in the description. Sometimes, I desn't do that.

I will certainly add the description when I send an series of patches to modify the case.

Thanks,
zhong jiang

> Thanks
> 	Andrew
>
> .
>



^ permalink raw reply

* Re: [PATCH bpf-next 8/8] samples: bpf: Makefile: base progs build on Makefile.progs
From: Ivan Khoronzhuk @ 2019-09-07  1:24 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: ast, daniel, yhs, davem, jakub.kicinski, hawk, john.fastabend,
	linux-kernel, netdev, bpf, clang-built-linux
In-Reply-To: <20190906233429.6ass5x5inaypvbpr@ast-mbp.dhcp.thefacebook.com>

On Fri, Sep 06, 2019 at 04:34:31PM -0700, Alexei Starovoitov wrote:
>On Thu, Sep 05, 2019 at 12:22:12AM +0300, Ivan Khoronzhuk wrote:
>> +
>> +If need to use environment of target board, the SYSROOT also can be set,
>> +pointing on FS of target board:
>> +
>> +make samples/bpf/ LLC=~/git/llvm/build/bin/llc \
>> +     CLANG=~/git/llvm/build/bin/clang \
>> +     SYSROOT=~/some_sdk/linux-devkit/sysroots/aarch64-linux-gnu
>
>Patches 7 and 8 look quite heavy. I don't have a way to test them
>which makes me a bit uneasy to accept them as-is.
>Would be great if somebody could give Tested-by.
>
I can try to split patch 8 in v2, but not significantly.

-- 
Regards,
Ivan Khoronzhuk

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox