Netdev List
 help / color / mirror / Atom feed
* [PATCH RFC 10/18] r8168: switch to phy_ethtool_get/set_link_ksettings
From: Heiner Kallweit @ 2017-12-21 20:50 UTC (permalink / raw)
  To: Andrew Lunn, Realtek linux nic maintainers, Chun-Hao Lin
  Cc: David Miller, netdev@vger.kernel.org
In-Reply-To: <83321b2e-8402-26c5-9703-3fe795cc893d@gmail.com>

Use phy_ethtool_get/set_link_ksettings instead of open coding these
ethtool ops.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/net/ethernet/realtek/r8168.c | 50 +++---------------------------------
 1 file changed, 3 insertions(+), 47 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8168.c b/drivers/net/ethernet/realtek/r8168.c
index 62d0e0169..e698f13c2 100644
--- a/drivers/net/ethernet/realtek/r8168.c
+++ b/drivers/net/ethernet/realtek/r8168.c
@@ -2004,50 +2004,6 @@ static void rtl8168_rx_vlan_tag(struct RxDesc *desc, struct sk_buff *skb)
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), swab16(opts2 & 0xffff));
 }
 
-static int rtl8168_get_link_ksettings_xmii(struct net_device *dev,
-					   struct ethtool_link_ksettings *cmd)
-{
-	struct rtl8168_private *tp = netdev_priv(dev);
-
-	mii_ethtool_get_link_ksettings(&tp->mii, cmd);
-
-	return 0;
-}
-
-static int rtl8168_get_link_ksettings(struct net_device *dev,
-				      struct ethtool_link_ksettings *cmd)
-{
-	struct rtl8168_private *tp = netdev_priv(dev);
-	int rc;
-
-	rtl_lock_work(tp);
-	rc = rtl8168_get_link_ksettings_xmii(dev, cmd);
-	rtl_unlock_work(tp);
-
-	return rc;
-}
-
-static int rtl8168_set_link_ksettings(struct net_device *dev,
-				      const struct ethtool_link_ksettings *cmd)
-{
-	struct rtl8168_private *tp = netdev_priv(dev);
-	int rc;
-	u32 advertising;
-
-	if (!ethtool_convert_link_mode_to_legacy_u32(&advertising,
-	    cmd->link_modes.advertising))
-		return -EINVAL;
-
-	del_timer_sync(&tp->timer);
-
-	rtl_lock_work(tp);
-	rc = rtl8168_set_speed(dev, cmd->base.autoneg, cmd->base.speed,
-			       cmd->base.duplex, advertising);
-	rtl_unlock_work(tp);
-
-	return rc;
-}
-
 static void rtl8168_get_regs(struct net_device *dev, struct ethtool_regs *regs,
 			     void *p)
 {
@@ -2317,7 +2273,7 @@ static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev)
 	const struct rtl_coalesce_info *ci;
 	int rc;
 
-	rc = rtl8168_get_link_ksettings(dev, &ecmd);
+	rc = phy_ethtool_get_link_ksettings(dev, &ecmd);
 	if (rc < 0)
 		return ERR_PTR(rc);
 
@@ -2478,8 +2434,8 @@ static const struct ethtool_ops rtl8168_ethtool_ops = {
 	.get_ethtool_stats	= rtl8168_get_ethtool_stats,
 	.get_ts_info		= ethtool_op_get_ts_info,
 	.nway_reset		= rtl8168_nway_reset,
-	.get_link_ksettings	= rtl8168_get_link_ksettings,
-	.set_link_ksettings	= rtl8168_set_link_ksettings,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 };
 
 static void rtl8168_get_mac_version(struct rtl8168_private *tp,
-- 
2.15.1

^ permalink raw reply related

* [PATCH RFC 11/18] r8168: switch to phy_ethtool_nway_reset
From: Heiner Kallweit @ 2017-12-21 20:50 UTC (permalink / raw)
  To: Andrew Lunn, Realtek linux nic maintainers, Chun-Hao Lin
  Cc: David Miller, netdev@vger.kernel.org
In-Reply-To: <83321b2e-8402-26c5-9703-3fe795cc893d@gmail.com>

Switch to phy_ethtool_nway_reset.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/net/ethernet/realtek/Kconfig | 1 -
 drivers/net/ethernet/realtek/r8168.c | 9 +--------
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/realtek/Kconfig b/drivers/net/ethernet/realtek/Kconfig
index 97b24103e..a31aa84be 100644
--- a/drivers/net/ethernet/realtek/Kconfig
+++ b/drivers/net/ethernet/realtek/Kconfig
@@ -100,7 +100,6 @@ config R8168
 	select FW_LOADER
 	select CRC32
 	select PHYLIB
-	select MII
 	---help---
 	  Say Y here if you have a Realtek 8168 PCI Gigabit Ethernet adapter.
 	  This driver supports the PCIE models.
diff --git a/drivers/net/ethernet/realtek/r8168.c b/drivers/net/ethernet/realtek/r8168.c
index e698f13c2..33f61e100 100644
--- a/drivers/net/ethernet/realtek/r8168.c
+++ b/drivers/net/ethernet/realtek/r8168.c
@@ -2194,13 +2194,6 @@ static void rtl8168_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 	}
 }
 
-static int rtl8168_nway_reset(struct net_device *dev)
-{
-	struct rtl8168_private *tp = netdev_priv(dev);
-
-	return mii_nway_restart(&tp->mii);
-}
-
 /*
  * Interrupt coalescing
  *
@@ -2433,7 +2426,7 @@ static const struct ethtool_ops rtl8168_ethtool_ops = {
 	.get_sset_count		= rtl8168_get_sset_count,
 	.get_ethtool_stats	= rtl8168_get_ethtool_stats,
 	.get_ts_info		= ethtool_op_get_ts_info,
-	.nway_reset		= rtl8168_nway_reset,
+	.nway_reset		= phy_ethtool_nway_reset,
 	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
 	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 };
-- 
2.15.1

^ permalink raw reply related

* [PATCH RFC 12/18] r8168: switch to phy_mii_ioctl
From: Heiner Kallweit @ 2017-12-21 20:50 UTC (permalink / raw)
  To: Andrew Lunn, Realtek linux nic maintainers, Chun-Hao Lin
  Cc: David Miller, netdev@vger.kernel.org
In-Reply-To: <83321b2e-8402-26c5-9703-3fe795cc893d@gmail.com>

Use phy_mii_ioctl for handling the ioctl's.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/net/ethernet/realtek/r8168.c | 25 +++----------------------
 1 file changed, 3 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8168.c b/drivers/net/ethernet/realtek/r8168.c
index 33f61e100..d33f93a31 100644
--- a/drivers/net/ethernet/realtek/r8168.c
+++ b/drivers/net/ethernet/realtek/r8168.c
@@ -4316,31 +4316,12 @@ static int rtl_set_mac_address(struct net_device *dev, void *p)
 	return 0;
 }
 
-static int rtl_xmii_ioctl(struct rtl8168_private *tp,
-			  struct mii_ioctl_data *data, int cmd)
-{
-	switch (cmd) {
-	case SIOCGMIIPHY:
-		data->phy_id = 32; /* Internal PHY */
-		return 0;
-
-	case SIOCGMIIREG:
-		data->val_out = rtl_readphy(tp, data->reg_num & 0x1f);
-		return 0;
-
-	case SIOCSMIIREG:
-		rtl_writephy(tp, data->reg_num & 0x1f, data->val_in);
-		return 0;
-	}
-	return -EOPNOTSUPP;
-}
-
 static int rtl8168_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct rtl8168_private *tp = netdev_priv(dev);
-	struct mii_ioctl_data *data = if_mii(ifr);
+	if (!netif_running(dev))
+		return -ENODEV;
 
-	return netif_running(dev) ? rtl_xmii_ioctl(tp, data, cmd) : -ENODEV;
+	return phy_mii_ioctl(dev->phydev, ifr, cmd);
 }
 
 static void rtl_init_mdio_ops(struct rtl8168_private *tp)
-- 
2.15.1

^ permalink raw reply related

* [PATCH RFC 13/18] r8168: replace speed_down with genphy_restart_aneg
From: Heiner Kallweit @ 2017-12-21 20:50 UTC (permalink / raw)
  To: Andrew Lunn, Realtek linux nic maintainers, Chun-Hao Lin
  Cc: David Miller, netdev@vger.kernel.org
In-Reply-To: <83321b2e-8402-26c5-9703-3fe795cc893d@gmail.com>

Dealing with link partner abilities is handled by phylib, so let's
just trigger autonegotiation here.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/net/ethernet/realtek/r8168.c | 26 +-------------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8168.c b/drivers/net/ethernet/realtek/r8168.c
index d33f93a31..6b398915f 100644
--- a/drivers/net/ethernet/realtek/r8168.c
+++ b/drivers/net/ethernet/realtek/r8168.c
@@ -4360,30 +4360,6 @@ static void rtl_init_mdio_ops(struct rtl8168_private *tp)
 	}
 }
 
-static void rtl_speed_down(struct rtl8168_private *tp)
-{
-	u32 adv;
-	int lpa;
-
-	rtl_writephy(tp, 0x1f, 0x0000);
-	lpa = rtl_readphy(tp, MII_LPA);
-
-	if (lpa & (LPA_10HALF | LPA_10FULL))
-		adv = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full;
-	else if (lpa & (LPA_100HALF | LPA_100FULL))
-		adv = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full |
-		      ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full;
-	else
-		adv = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full |
-		      ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full |
-		      (tp->mii.supports_gmii ?
-		       ADVERTISED_1000baseT_Half |
-		       ADVERTISED_1000baseT_Full : 0);
-
-	rtl8168_set_speed(tp->dev, AUTONEG_ENABLE, SPEED_1000, DUPLEX_FULL,
-			  adv);
-}
-
 static void rtl_wol_suspend_quirk(struct rtl8168_private *tp)
 {
 	void __iomem *ioaddr = tp->mmio_addr;
@@ -4424,7 +4400,7 @@ static bool rtl_wol_pll_power_down(struct rtl8168_private *tp)
 	if (!(__rtl8168_get_wol(tp) & WAKE_ANY))
 		return false;
 
-	rtl_speed_down(tp);
+	genphy_restart_aneg(tp->dev->phydev);
 	rtl_wol_suspend_quirk(tp);
 
 	return true;
-- 
2.15.1

^ permalink raw reply related

* [PATCH RFC 14/18] r8168: remove rtl8168_set_speed
From: Heiner Kallweit @ 2017-12-21 20:50 UTC (permalink / raw)
  To: Andrew Lunn, Realtek linux nic maintainers, Chun-Hao Lin
  Cc: David Miller, netdev@vger.kernel.org
In-Reply-To: <83321b2e-8402-26c5-9703-3fe795cc893d@gmail.com>

All these PHY basics are handled by phylib, so let's remove this code.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/net/ethernet/realtek/r8168.c | 92 ------------------------------------
 1 file changed, 92 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8168.c b/drivers/net/ethernet/realtek/r8168.c
index 6b398915f..f88fc0fa0 100644
--- a/drivers/net/ethernet/realtek/r8168.c
+++ b/drivers/net/ethernet/realtek/r8168.c
@@ -1843,91 +1843,6 @@ static int rtl8168_get_regs_len(struct net_device *dev)
 	return R8168_REGS_SIZE;
 }
 
-static int rtl8168_set_speed_xmii(struct net_device *dev,
-				  u8 autoneg, u16 speed, u8 duplex, u32 adv)
-{
-	struct rtl8168_private *tp = netdev_priv(dev);
-	int giga_ctrl, bmcr;
-	int rc = -EINVAL;
-
-	rtl_writephy(tp, 0x1f, 0x0000);
-
-	if (autoneg == AUTONEG_ENABLE) {
-		int auto_nego;
-
-		auto_nego = rtl_readphy(tp, MII_ADVERTISE);
-		auto_nego &= ~(ADVERTISE_10HALF | ADVERTISE_10FULL |
-				ADVERTISE_100HALF | ADVERTISE_100FULL);
-
-		if (adv & ADVERTISED_10baseT_Half)
-			auto_nego |= ADVERTISE_10HALF;
-		if (adv & ADVERTISED_10baseT_Full)
-			auto_nego |= ADVERTISE_10FULL;
-		if (adv & ADVERTISED_100baseT_Half)
-			auto_nego |= ADVERTISE_100HALF;
-		if (adv & ADVERTISED_100baseT_Full)
-			auto_nego |= ADVERTISE_100FULL;
-
-		auto_nego |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
-
-		giga_ctrl = rtl_readphy(tp, MII_CTRL1000);
-		giga_ctrl &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF);
-
-		/* The 8100e/8101e/8102e do Fast Ethernet only. */
-		if (tp->mii.supports_gmii) {
-			if (adv & ADVERTISED_1000baseT_Half)
-				giga_ctrl |= ADVERTISE_1000HALF;
-			if (adv & ADVERTISED_1000baseT_Full)
-				giga_ctrl |= ADVERTISE_1000FULL;
-		} else if (adv & (ADVERTISED_1000baseT_Half |
-				  ADVERTISED_1000baseT_Full)) {
-			netif_info(tp, link, dev,
-				   "PHY does not support 1000Mbps\n");
-			goto out;
-		}
-
-		bmcr = BMCR_ANENABLE | BMCR_ANRESTART;
-
-		rtl_writephy(tp, MII_ADVERTISE, auto_nego);
-		rtl_writephy(tp, MII_CTRL1000, giga_ctrl);
-	} else {
-		if (speed == SPEED_10)
-			bmcr = 0;
-		else if (speed == SPEED_100)
-			bmcr = BMCR_SPEED100;
-		else
-			goto out;
-
-		if (duplex == DUPLEX_FULL)
-			bmcr |= BMCR_FULLDPLX;
-	}
-
-	rtl_writephy(tp, MII_BMCR, bmcr);
-
-	rc = 0;
-out:
-	return rc;
-}
-
-static int rtl8168_set_speed(struct net_device *dev,
-			     u8 autoneg, u16 speed, u8 duplex, u32 advertising)
-{
-	struct rtl8168_private *tp = netdev_priv(dev);
-	int ret;
-
-	ret = rtl8168_set_speed_xmii(dev, autoneg, speed, duplex, advertising);
-	if (ret < 0)
-		goto out;
-
-	if (netif_running(dev) && (autoneg == AUTONEG_ENABLE) &&
-	    (advertising & ADVERTISED_1000baseT_Full) &&
-	    !pci_is_pcie(tp->pci_dev)) {
-		mod_timer(&tp->timer, jiffies + RTL8168_PHY_TIMEOUT);
-	}
-out:
-	return ret;
-}
-
 static netdev_features_t rtl8168_fix_features(struct net_device *dev,
 	netdev_features_t features)
 {
@@ -4264,13 +4179,6 @@ static void rtl8168_init_phy(struct net_device *dev, struct rtl8168_private *tp)
 	pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40);
 
 	genphy_soft_reset(dev->phydev);
-
-	rtl8168_set_speed(dev, AUTONEG_ENABLE, SPEED_1000, DUPLEX_FULL,
-			  ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full |
-			  ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full |
-			  (tp->mii.supports_gmii ?
-			   ADVERTISED_1000baseT_Half |
-			   ADVERTISED_1000baseT_Full : 0));
 }
 
 static void rtl_rar_set(struct rtl8168_private *tp, u8 *addr)
-- 
2.15.1

^ permalink raw reply related

* [PATCH RFC 15/18] r8168: remove rtl_phy_work and rtl8168_phy_timer
From: Heiner Kallweit @ 2017-12-21 20:50 UTC (permalink / raw)
  To: Andrew Lunn, Realtek linux nic maintainers, Chun-Hao Lin
  Cc: David Miller, netdev@vger.kernel.org
In-Reply-To: <83321b2e-8402-26c5-9703-3fe795cc893d@gmail.com>

Remove further code which is replaced by phylib.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/net/ethernet/realtek/r8168.c | 60 ------------------------------------
 1 file changed, 60 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8168.c b/drivers/net/ethernet/realtek/r8168.c
index f88fc0fa0..91191d178 100644
--- a/drivers/net/ethernet/realtek/r8168.c
+++ b/drivers/net/ethernet/realtek/r8168.c
@@ -752,7 +752,6 @@ enum rtl_flag {
 	RTL_FLAG_TASK_ENABLED,
 	RTL_FLAG_TASK_SLOW_PENDING,
 	RTL_FLAG_TASK_RESET_PENDING,
-	RTL_FLAG_TASK_PHY_PENDING,
 	RTL_FLAG_MAX
 };
 
@@ -781,7 +780,6 @@ struct rtl8168_private {
 	dma_addr_t RxPhyAddr;
 	void *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
 	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
-	struct timer_list timer;
 	u16 cp_cmd;
 
 	u16 event_slow;
@@ -1559,24 +1557,6 @@ static void rtl8168_irq_mask_and_ack(struct rtl8168_private *tp)
 	RTL_R8(ChipCmd);
 }
 
-static unsigned int rtl8168_xmii_reset_pending(struct rtl8168_private *tp)
-{
-	return rtl_readphy(tp, MII_BMCR) & BMCR_RESET;
-}
-
-static unsigned int rtl8168_xmii_link_ok(void __iomem *ioaddr)
-{
-	return RTL_R8(PHYstatus) & LinkStatus;
-}
-
-static void rtl8168_xmii_reset_enable(struct rtl8168_private *tp)
-{
-	unsigned int val;
-
-	val = rtl_readphy(tp, MII_BMCR) | BMCR_RESET;
-	rtl_writephy(tp, MII_BMCR, val & 0xffff);
-}
-
 static void rtl_link_chg_patch(struct rtl8168_private *tp)
 {
 	void __iomem *ioaddr = tp->mmio_addr;
@@ -4131,47 +4111,12 @@ static void rtl_hw_phy_config(struct net_device *dev)
 	}
 }
 
-static void rtl_phy_work(struct rtl8168_private *tp)
-{
-	struct timer_list *timer = &tp->timer;
-	void __iomem *ioaddr = tp->mmio_addr;
-	unsigned long timeout = RTL8168_PHY_TIMEOUT;
-
-	assert(tp->mac_version > RTL_GIGA_MAC_VER_01);
-
-	if (rtl8168_xmii_reset_pending(tp)) {
-		/*
-		 * A busy loop could burn quite a few cycles on nowadays CPU.
-		 * Let's delay the execution of the timer for a few ticks.
-		 */
-		timeout = HZ/10;
-		goto out_mod_timer;
-	}
-
-	if (rtl8168_xmii_link_ok(ioaddr))
-		return;
-
-	netif_dbg(tp, link, tp->dev, "PHY reset until link up\n");
-
-	rtl8168_xmii_reset_enable(tp);
-
-out_mod_timer:
-	mod_timer(timer, jiffies + timeout);
-}
-
 static void rtl_schedule_task(struct rtl8168_private *tp, enum rtl_flag flag)
 {
 	if (!test_and_set_bit(flag, tp->wk.flags))
 		schedule_work(&tp->wk.work);
 }
 
-static void rtl8168_phy_timer(struct timer_list *t)
-{
-	struct rtl8168_private *tp = from_timer(tp, t, timer);
-
-	rtl_schedule_task(tp, RTL_FLAG_TASK_PHY_PENDING);
-}
-
 static void rtl8168_init_phy(struct net_device *dev, struct rtl8168_private *tp)
 {
 	rtl_hw_phy_config(dev);
@@ -7120,7 +7065,6 @@ static void rtl_task(struct work_struct *work)
 		/* XXX - keep rtl_slow_event_work() as first element. */
 		{ RTL_FLAG_TASK_SLOW_PENDING,	rtl_slow_event_work },
 		{ RTL_FLAG_TASK_RESET_PENDING,	rtl_reset_work },
-		{ RTL_FLAG_TASK_PHY_PENDING,	rtl_phy_work }
 	};
 	struct rtl8168_private *tp =
 		container_of(work, struct rtl8168_private, wk.work);
@@ -7184,8 +7128,6 @@ static void rtl8168_down(struct net_device *dev)
 
 	phy_stop(dev->phydev);
 
-	del_timer_sync(&tp->timer);
-
 	napi_disable(&tp->napi);
 	netif_stop_queue(dev);
 
@@ -8076,8 +8018,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	tp->opts1_mask = ~(RxBOVF | RxFOVF);
 
-	timer_setup(&tp->timer, rtl8168_phy_timer, 0);
-
 	tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
 
 	tp->counters = dmam_alloc_coherent (&pdev->dev, sizeof(*tp->counters),
-- 
2.15.1

^ permalink raw reply related

* [PATCH RFC 16/18] r8168: use phy_read/write in rtl_readphy/writephy helpers
From: Heiner Kallweit @ 2017-12-21 20:50 UTC (permalink / raw)
  To: Andrew Lunn, Realtek linux nic maintainers, Chun-Hao Lin
  Cc: David Miller, netdev@vger.kernel.org
In-Reply-To: <83321b2e-8402-26c5-9703-3fe795cc893d@gmail.com>

Instead of accessing mdio_ops directly use phy_read/write in these
helpers.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/net/ethernet/realtek/r8168.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8168.c b/drivers/net/ethernet/realtek/r8168.c
index 91191d178..64c87509d 100644
--- a/drivers/net/ethernet/realtek/r8168.c
+++ b/drivers/net/ethernet/realtek/r8168.c
@@ -1167,12 +1167,12 @@ static int r8168dp_2_mdio_read(struct rtl8168_private *tp, int reg)
 
 static void rtl_writephy(struct rtl8168_private *tp, int location, u32 val)
 {
-	tp->mdio_ops.write(tp, location, val);
+	phy_write(tp->dev->phydev, location, val);
 }
 
 static int rtl_readphy(struct rtl8168_private *tp, int location)
 {
-	return tp->mdio_ops.read(tp, location);
+	return phy_read(tp->dev->phydev, location);
 }
 
 static void rtl_patchphy(struct rtl8168_private *tp, int reg_addr, int value)
-- 
2.15.1

^ permalink raw reply related

* [PATCH RFC 17/18] r8168: remove use of struct mii_if_info
From: Heiner Kallweit @ 2017-12-21 20:50 UTC (permalink / raw)
  To: Andrew Lunn, Realtek linux nic maintainers, Chun-Hao Lin
  Cc: David Miller, netdev@vger.kernel.org
In-Reply-To: <83321b2e-8402-26c5-9703-3fe795cc893d@gmail.com>

After switching to phylib we don't need most elements of
struct mii_if_info any longer.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/net/ethernet/realtek/r8168.c | 37 +++++++-----------------------------
 1 file changed, 7 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8168.c b/drivers/net/ethernet/realtek/r8168.c
index 64c87509d..765c60bc1 100644
--- a/drivers/net/ethernet/realtek/r8168.c
+++ b/drivers/net/ethernet/realtek/r8168.c
@@ -816,7 +816,7 @@ struct rtl8168_private {
 
 	unsigned features;
 
-	struct mii_if_info mii;
+	bool supports_gmii;
 	struct mii_bus *mii_bus;
 	dma_addr_t counters_phys_addr;
 	struct rtl8168_counters *counters;
@@ -1188,21 +1188,6 @@ static void rtl_w0w1_phy(struct rtl8168_private *tp, int reg_addr, int p, int m)
 	rtl_writephy(tp, reg_addr, (val & ~m) | p);
 }
 
-static void rtl_mdio_write(struct net_device *dev, int phy_id, int location,
-			   int val)
-{
-	struct rtl8168_private *tp = netdev_priv(dev);
-
-	rtl_writephy(tp, location, val);
-}
-
-static int rtl_mdio_read(struct net_device *dev, int phy_id, int location)
-{
-	struct rtl8168_private *tp = netdev_priv(dev);
-
-	return rtl_readphy(tp, location);
-}
-
 DECLARE_RTL_COND(rtl_ephyar_cond)
 {
 	void __iomem *ioaddr = tp->mmio_addr;
@@ -2447,15 +2432,15 @@ static void rtl8168_get_mac_version(struct rtl8168_private *tp,
 			     "unknown MAC, using family default\n");
 		tp->mac_version = default_version;
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_42) {
-		tp->mac_version = tp->mii.supports_gmii ?
+		tp->mac_version = tp->supports_gmii ?
 				  RTL_GIGA_MAC_VER_42 :
 				  RTL_GIGA_MAC_VER_43;
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_45) {
-		tp->mac_version = tp->mii.supports_gmii ?
+		tp->mac_version = tp->supports_gmii ?
 				  RTL_GIGA_MAC_VER_45 :
 				  RTL_GIGA_MAC_VER_47;
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_46) {
-		tp->mac_version = tp->mii.supports_gmii ?
+		tp->mac_version = tp->supports_gmii ?
 				  RTL_GIGA_MAC_VER_46 :
 				  RTL_GIGA_MAC_VER_48;
 	}
@@ -7691,7 +7676,7 @@ static int r8168_phy_connect(struct rtl8168_private *tp)
 	phy_interface_t phy_mode;
 	int ret;
 
-	phy_mode = tp->mii.supports_gmii ? PHY_INTERFACE_MODE_GMII :
+	phy_mode = tp->supports_gmii ? PHY_INTERFACE_MODE_GMII :
 		   PHY_INTERFACE_MODE_MII;
 
 	phydev = phy_find_first(tp->mii_bus);
@@ -7705,7 +7690,7 @@ static int r8168_phy_connect(struct rtl8168_private *tp)
 
 	phy_attached_info(phydev);
 
-	if (!tp->mii.supports_gmii && (phydev->supported &
+	if (!tp->supports_gmii && (phydev->supported &
 	    (SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full))) {
 		netif_info(tp, probe, tp->dev, "Restrict PHY to 100Mbit because MAC doesn't support 1GBit");
 		phy_set_max_speed(phydev, SPEED_100);
@@ -7784,7 +7769,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data;
 	const unsigned int region = cfg->region;
 	struct rtl8168_private *tp;
-	struct mii_if_info *mii;
 	struct net_device *dev;
 	void __iomem *ioaddr;
 	int chipset, i;
@@ -7805,14 +7789,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp->dev = dev;
 	tp->pci_dev = pdev;
 	tp->msg_enable = netif_msg_init(debug.msg_enable, R8168_MSG_DEFAULT);
-
-	mii = &tp->mii;
-	mii->dev = dev;
-	mii->mdio_read = rtl_mdio_read;
-	mii->mdio_write = rtl_mdio_write;
-	mii->phy_id_mask = 0x1f;
-	mii->reg_num_mask = 0x1f;
-	mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
+	tp->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
 
 	/* disable ASPM completely as that cause random device stop working
 	 * problems as well as full system hangs for some PCIe devices users */
-- 
2.15.1

^ permalink raw reply related

* [PATCH RFC 18/18] r8168: use link speed information as maintained by phylib
From: Heiner Kallweit @ 2017-12-21 20:50 UTC (permalink / raw)
  To: Andrew Lunn, Realtek linux nic maintainers, Chun-Hao Lin
  Cc: David Miller, netdev@vger.kernel.org
In-Reply-To: <83321b2e-8402-26c5-9703-3fe795cc893d@gmail.com>

Let's use the speed information as maintained by phylib instead of
reading it directly from a register.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/net/ethernet/realtek/r8168.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8168.c b/drivers/net/ethernet/realtek/r8168.c
index 765c60bc1..6c57af825 100644
--- a/drivers/net/ethernet/realtek/r8168.c
+++ b/drivers/net/ethernet/realtek/r8168.c
@@ -618,16 +618,6 @@ enum rtl_register_content {
 	INTT_2		= 0x0002,	// 8168
 	INTT_3		= 0x0003,	// 8168
 
-	/* rtl8168_PHYstatus */
-	TBI_Enable	= 0x80,
-	TxFlowCtrl	= 0x40,
-	RxFlowCtrl	= 0x20,
-	_1000bpsF	= 0x10,
-	_100bps		= 0x08,
-	_10bps		= 0x04,
-	LinkStatus	= 0x02,
-	FullDup		= 0x01,
-
 	/* ResetCounterCommand */
 	CounterReset	= 0x1,
 
@@ -1544,20 +1534,20 @@ static void rtl8168_irq_mask_and_ack(struct rtl8168_private *tp)
 
 static void rtl_link_chg_patch(struct rtl8168_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct net_device *dev = tp->dev;
+	struct phy_device *phydev = dev->phydev;
 
 	if (!netif_running(dev))
 		return;
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_34 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_38) {
-		if (RTL_R8(PHYstatus) & _1000bpsF) {
+		if (phydev->speed == SPEED_1000) {
 			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
 				      ERIAR_EXGMAC);
-		} else if (RTL_R8(PHYstatus) & _100bps) {
+		} else if (phydev->speed == SPEED_100) {
 			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x0000001f,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
@@ -1575,7 +1565,7 @@ static void rtl_link_chg_patch(struct rtl8168_private *tp)
 			     ERIAR_EXGMAC);
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_35 ||
 		   tp->mac_version == RTL_GIGA_MAC_VER_36) {
-		if (RTL_R8(PHYstatus) & _1000bpsF) {
+		if (phydev->speed == SPEED_1000) {
 			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
@@ -1587,7 +1577,7 @@ static void rtl_link_chg_patch(struct rtl8168_private *tp)
 				      ERIAR_EXGMAC);
 		}
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_37) {
-		if (RTL_R8(PHYstatus) & _10bps) {
+		if (phydev->speed == SPEED_10) {
 			rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x4d02,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_0011, 0x0060,
-- 
2.15.1

^ permalink raw reply related

* Re: [Patch net] net_sched: fix a missing rcu barrier in mini_qdisc_pair_swap()
From: Cong Wang @ 2017-12-21 20:54 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: Linux Kernel Network Developers, Jiri Pirko, John Fastabend
In-Reply-To: <CAM_iQpVfYUsJ6D5fpE+Ng8wGxK6uoN3PRntYO_Y0j+7YRKdSqg@mail.gmail.com>

On Thu, Dec 21, 2017 at 11:01 AM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
> On Thu, Dec 21, 2017 at 1:03 AM, Jiri Pirko <jiri@resnulli.us> wrote:
>>
>>
>> But again, we don't we just free qdisc in call_rcu and avoid the
>> barrier?
>
>
> Non-sense again. Why qdisc code should be adjusted for your
> miniq code? It is your own responsibility to take care of this shit.
> Don't spread it out of minq.

Also, in case you believe call_rcu to free qdisc is queued after
the call_rcu in miniq, you are wrong again:

https://www.kernel.org/pub/linux/kernel/people/paulmck/Answers/RCU/RCUCBordering.html

The rcu callbacks don't guarantee FIFO ordering.

^ permalink raw reply

* Re: [PATCH] net: Revert "net_sched: no need to free qdisc in RCU callback"
From: Cong Wang @ 2017-12-21 20:59 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: John Fastabend, David Miller, Jakub Kicinski,
	Linux Kernel Network Developers, Eric Dumazet
In-Reply-To: <20171221083908.GA1930@nanopsycho>

On Thu, Dec 21, 2017 at 12:39 AM, Jiri Pirko <jiri@resnulli.us> wrote:
>
> Why just moving qdisc_free to rcu is not enough? It would resolve this
> issue and also avoid using synchronize net. Something like:

If you mean Jakub's issue, apparently not:
https://www.kernel.org/pub/linux/kernel/people/paulmck/Answers/RCU/RCUCBordering.html

Jiri, you have to use a rcu barrier to wait for a rcu callback, not
queuing another rcu callback, the ordering is simply NOT guaranteed.

What's more importantly, you already have one rcu barrier in the
same function. Why keep believing you don't need it?

^ permalink raw reply

* [PATCH bpf-next v2 0/8] bpf: offload: report device back to user space (take 2)
From: Jakub Kicinski @ 2017-12-21 21:01 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski

Hi!

This series is a redo of reporting offload device information to
user space after the first attempt did not take into account name
spaces.  As requested by Kirill offloads are now protected by an
r/w sem.  This allows us to remove the workqueue and free the
offload state fully when device is removed (suggested by Alexei).

Net namespace is reported with a device/inode pair.

The accompanying bpftool support is placed in common code because
maps will have very similar info.  Note that the UAPI information
can't be nicely encapsulated into a struct, because in case we
need to grow the device information the new fields will have to
be added at the end of struct bpf_prog_info, we can't grow
structures in the middle of bpf_prog_info.

v2:
 - rework the locking in patch 1 (use RCU instead of locking
   dependencies);
 - grab RTNL for a short time in patch 6;
 - minor update to the test in patch 8.

Jakub Kicinski (8):
  bpf: offload: don't require rtnl for dev list manipulation
  bpf: offload: don't use prog->aux->offload as boolean
  bpf: offload: allow netdev to disappear while verifier is running
  bpf: offload: free prog->aux->offload when device disappears
  bpf: offload: free program id when device disappears
  bpf: offload: report device information for offloaded programs
  tools: bpftool: report device information for offloaded programs
  selftests/bpf: test device info reporting for bound progs

 drivers/net/ethernet/netronome/nfp/bpf/main.h     |   2 +-
 drivers/net/ethernet/netronome/nfp/bpf/verifier.c |   2 +-
 drivers/net/netdevsim/bpf.c                       |   2 +-
 fs/nsfs.c                                         |   2 +-
 include/linux/bpf.h                               |  16 ++-
 include/linux/bpf_verifier.h                      |  16 +--
 include/linux/netdevice.h                         |   4 +-
 include/linux/proc_ns.h                           |   1 +
 include/uapi/linux/bpf.h                          |   3 +
 kernel/bpf/offload.c                              | 133 ++++++++++++++++------
 kernel/bpf/syscall.c                              |  19 +++-
 kernel/bpf/verifier.c                             |  20 ++--
 tools/bpf/bpftool/common.c                        |  52 +++++++++
 tools/bpf/bpftool/main.h                          |   2 +
 tools/bpf/bpftool/prog.c                          |   3 +
 tools/include/uapi/linux/bpf.h                    |   3 +
 tools/testing/selftests/bpf/test_offload.py       | 112 ++++++++++++++++--
 17 files changed, 308 insertions(+), 84 deletions(-)

-- 
2.15.1

^ permalink raw reply

* [PATCH bpf-next v2 1/8] bpf: offload: don't require rtnl for dev list manipulation
From: Jakub Kicinski @ 2017-12-21 21:01 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski
In-Reply-To: <20171221210120.30166-1-jakub.kicinski@netronome.com>

We don't need the RTNL lock for all operations on offload state.
We only need to hold it around ndo calls.  The device offload
initialization doesn't require it.  The soon-to-come querying
of the offload info will only need it partially.  We will also
be able to remove the waitqueue in following patches.

Use struct rw_semaphore because map offload will require sleeping
with the semaphore held for read.

Suggested-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
v2:
 - use dev_get_by_index_rcu() instead of implicit lock dependencies;
 - use DECLARE_RWSEM() instead of init_rwsem() (Kirill).
---
 kernel/bpf/offload.c | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 8455b89d1bbf..f049073a37e6 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -20,8 +20,12 @@
 #include <linux/netdevice.h>
 #include <linux/printk.h>
 #include <linux/rtnetlink.h>
+#include <linux/rwsem.h>
 
-/* protected by RTNL */
+/* Protects bpf_prog_offload_devs and offload members of all progs.
+ * RTNL lock cannot be taken when holding this lock.
+ */
+static DECLARE_RWSEM(bpf_devs_lock);
 static LIST_HEAD(bpf_prog_offload_devs);
 
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
@@ -43,19 +47,30 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 	offload->prog = prog;
 	init_waitqueue_head(&offload->verifier_done);
 
-	rtnl_lock();
-	offload->netdev = __dev_get_by_index(net, attr->prog_ifindex);
+	rcu_read_lock();
+	offload->netdev = dev_get_by_index_rcu(net, attr->prog_ifindex);
 	if (!offload->netdev) {
-		rtnl_unlock();
-		kfree(offload);
-		return -EINVAL;
+		rcu_read_unlock();
+		goto err_free;
 	}
+	dev_hold(offload->netdev);
+	rcu_read_unlock();
 
+	down_write(&bpf_devs_lock);
+	if (offload->netdev->reg_state != NETREG_REGISTERED)
+		goto err_unlock;
 	prog->aux->offload = offload;
 	list_add_tail(&offload->offloads, &bpf_prog_offload_devs);
-	rtnl_unlock();
+	dev_put(offload->netdev);
+	up_write(&bpf_devs_lock);
 
 	return 0;
+err_unlock:
+	up_write(&bpf_devs_lock);
+	dev_put(offload->netdev);
+err_free:
+	kfree(offload);
+	return -EINVAL;
 }
 
 static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
@@ -126,7 +141,9 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog)
 	wake_up(&offload->verifier_done);
 
 	rtnl_lock();
+	down_write(&bpf_devs_lock);
 	__bpf_prog_offload_destroy(prog);
+	up_write(&bpf_devs_lock);
 	rtnl_unlock();
 
 	kfree(offload);
@@ -181,11 +198,13 @@ static int bpf_offload_notification(struct notifier_block *notifier,
 		if (netdev->reg_state != NETREG_UNREGISTERING)
 			break;
 
+		down_write(&bpf_devs_lock);
 		list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs,
 					 offloads) {
 			if (offload->netdev == netdev)
 				__bpf_prog_offload_destroy(offload->prog);
 		}
+		up_write(&bpf_devs_lock);
 		break;
 	default:
 		break;
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 2/8] bpf: offload: don't use prog->aux->offload as boolean
From: Jakub Kicinski @ 2017-12-21 21:01 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski
In-Reply-To: <20171221210120.30166-1-jakub.kicinski@netronome.com>

We currently use aux->offload to indicate that program is bound
to a specific device.  This forces us to keep the offload structure
around even after the device is gone.  Add a bool member to
struct bpf_prog_aux to indicate if offload was requested.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h  | 3 ++-
 kernel/bpf/syscall.c | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index da54ef644fcd..838eee10e979 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -201,6 +201,7 @@ struct bpf_prog_aux {
 	u32 stack_depth;
 	u32 id;
 	u32 func_cnt;
+	bool offload_requested;
 	struct bpf_prog **func;
 	void *jit_data; /* JIT specific data. arch dependent */
 	struct latch_tree_node ksym_tnode;
@@ -529,7 +530,7 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
 
 static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
 {
-	return aux->offload;
+	return aux->offload_requested;
 }
 #else
 static inline int bpf_prog_offload_init(struct bpf_prog *prog,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e2e1c78ce1dc..1143db61584c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1151,6 +1151,8 @@ static int bpf_prog_load(union bpf_attr *attr)
 	if (!prog)
 		return -ENOMEM;
 
+	prog->aux->offload_requested = !!attr->prog_ifindex;
+
 	err = security_bpf_prog_alloc(prog->aux);
 	if (err)
 		goto free_prog_nouncharge;
@@ -1172,7 +1174,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 	atomic_set(&prog->aux->refcnt, 1);
 	prog->gpl_compatible = is_gpl ? 1 : 0;
 
-	if (attr->prog_ifindex) {
+	if (bpf_prog_is_dev_bound(prog->aux)) {
 		err = bpf_prog_offload_init(prog, attr);
 		if (err)
 			goto free_prog;
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 3/8] bpf: offload: allow netdev to disappear while verifier is running
From: Jakub Kicinski @ 2017-12-21 21:01 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski
In-Reply-To: <20171221210120.30166-1-jakub.kicinski@netronome.com>

To allow verifier instruction callbacks without any extra locking
NETDEV_UNREGISTER notification would wait on a waitqueue for verifier
to finish.  This design decision was made when rtnl lock was providing
all the locking.  Use the read/write lock instead and remove the
workqueue.

Verifier will now call into the offload code, so dev_ops are moved
to offload structure.  Since verifier calls are all under
bpf_prog_is_dev_bound() we no longer need static inline implementations
to please builds with CONFIG_NET=n.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.h     |  2 +-
 drivers/net/ethernet/netronome/nfp/bpf/verifier.c |  2 +-
 drivers/net/netdevsim/bpf.c                       |  2 +-
 include/linux/bpf.h                               |  9 +++++--
 include/linux/bpf_verifier.h                      | 16 ++----------
 include/linux/netdevice.h                         |  4 +--
 kernel/bpf/offload.c                              | 30 ++++++++++++-----------
 kernel/bpf/verifier.c                             | 20 ++++++---------
 8 files changed, 37 insertions(+), 48 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index aae1be9ed056..89a9b6393882 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -238,7 +238,7 @@ struct nfp_bpf_vnic {
 
 int nfp_bpf_jit(struct nfp_prog *prog);
 
-extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops;
+extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops;
 
 struct netdev_bpf;
 struct nfp_app;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 9c2608445bd8..d8870c2f11f3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -260,6 +260,6 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
 	return 0;
 }
 
-const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = {
+const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = {
 	.insn_hook = nfp_verify_insn,
 };
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index a243fa7ae02f..5134d5c1306c 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -66,7 +66,7 @@ nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn)
 	return 0;
 }
 
-static const struct bpf_ext_analyzer_ops nsim_bpf_analyzer_ops = {
+static const struct bpf_prog_offload_ops nsim_bpf_analyzer_ops = {
 	.insn_hook = nsim_bpf_verify_insn,
 };
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 838eee10e979..669549f7e3e8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -17,6 +17,7 @@
 #include <linux/numa.h>
 #include <linux/wait.h>
 
+struct bpf_verifier_env;
 struct perf_event;
 struct bpf_prog;
 struct bpf_map;
@@ -184,14 +185,18 @@ struct bpf_verifier_ops {
 				  struct bpf_prog *prog, u32 *target_size);
 };
 
+struct bpf_prog_offload_ops {
+	int (*insn_hook)(struct bpf_verifier_env *env,
+			 int insn_idx, int prev_insn_idx);
+};
+
 struct bpf_dev_offload {
 	struct bpf_prog		*prog;
 	struct net_device	*netdev;
 	void			*dev_priv;
 	struct list_head	offloads;
 	bool			dev_state;
-	bool			verifier_running;
-	wait_queue_head_t	verifier_done;
+	const struct bpf_prog_offload_ops *dev_ops;
 };
 
 struct bpf_prog_aux {
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index aaac589e490c..02ede122d35b 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -166,12 +166,6 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifer_log *log)
 	return log->len_used >= log->len_total - 1;
 }
 
-struct bpf_verifier_env;
-struct bpf_ext_analyzer_ops {
-	int (*insn_hook)(struct bpf_verifier_env *env,
-			 int insn_idx, int prev_insn_idx);
-};
-
 #define BPF_MAX_SUBPROGS 256
 
 /* single container for all structs
@@ -185,7 +179,6 @@ struct bpf_verifier_env {
 	bool strict_alignment;		/* perform strict pointer alignment checks */
 	struct bpf_verifier_state *cur_state; /* current verifier state */
 	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
-	const struct bpf_ext_analyzer_ops *dev_ops; /* device analyzer ops */
 	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
 	u32 used_map_cnt;		/* number of used maps */
 	u32 id_gen;			/* used to generate unique reg IDs */
@@ -205,13 +198,8 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
 	return cur->frame[cur->curframe]->regs;
 }
 
-#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);
-#else
-static inline int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env)
-{
-	return -EOPNOTSUPP;
-}
-#endif
+int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
+				 int insn_idx, int prev_insn_idx);
 
 #endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cc4ce7456e38..0a1a4a111546 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -804,7 +804,7 @@ enum bpf_netdev_command {
 	BPF_OFFLOAD_DESTROY,
 };
 
-struct bpf_ext_analyzer_ops;
+struct bpf_prog_offload_ops;
 struct netlink_ext_ack;
 
 struct netdev_bpf {
@@ -826,7 +826,7 @@ struct netdev_bpf {
 		/* BPF_OFFLOAD_VERIFIER_PREP */
 		struct {
 			struct bpf_prog *prog;
-			const struct bpf_ext_analyzer_ops *ops; /* callee set */
+			const struct bpf_prog_offload_ops *ops; /* callee set */
 		} verifier;
 		/* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */
 		struct {
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index f049073a37e6..2f2184408d31 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -45,7 +45,6 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 		return -ENOMEM;
 
 	offload->prog = prog;
-	init_waitqueue_head(&offload->verifier_done);
 
 	rcu_read_lock();
 	offload->netdev = dev_get_by_index_rcu(net, attr->prog_ifindex);
@@ -102,15 +101,28 @@ int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env)
 	if (err)
 		goto exit_unlock;
 
-	env->dev_ops = data.verifier.ops;
-
+	env->prog->aux->offload->dev_ops = data.verifier.ops;
 	env->prog->aux->offload->dev_state = true;
-	env->prog->aux->offload->verifier_running = true;
 exit_unlock:
 	rtnl_unlock();
 	return err;
 }
 
+int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
+				 int insn_idx, int prev_insn_idx)
+{
+	struct bpf_dev_offload *offload;
+	int ret = -ENODEV;
+
+	down_read(&bpf_devs_lock);
+	offload = env->prog->aux->offload;
+	if (offload->netdev)
+		ret = offload->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
+	up_read(&bpf_devs_lock);
+
+	return ret;
+}
+
 static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
 {
 	struct bpf_dev_offload *offload = prog->aux->offload;
@@ -122,9 +134,6 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
 
 	data.offload.prog = prog;
 
-	if (offload->verifier_running)
-		wait_event(offload->verifier_done, !offload->verifier_running);
-
 	if (offload->dev_state)
 		WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data));
 
@@ -137,9 +146,6 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog)
 {
 	struct bpf_dev_offload *offload = prog->aux->offload;
 
-	offload->verifier_running = false;
-	wake_up(&offload->verifier_done);
-
 	rtnl_lock();
 	down_write(&bpf_devs_lock);
 	__bpf_prog_offload_destroy(prog);
@@ -151,15 +157,11 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog)
 
 static int bpf_prog_offload_translate(struct bpf_prog *prog)
 {
-	struct bpf_dev_offload *offload = prog->aux->offload;
 	struct netdev_bpf data = {};
 	int ret;
 
 	data.offload.prog = prog;
 
-	offload->verifier_running = false;
-	wake_up(&offload->verifier_done);
-
 	rtnl_lock();
 	ret = __bpf_offload_ndo(prog, BPF_OFFLOAD_TRANSLATE, &data);
 	rtnl_unlock();
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 48b2901cf483..6b95efad5828 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4341,15 +4341,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	return 0;
 }
 
-static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
-				  int insn_idx, int prev_insn_idx)
-{
-	if (env->dev_ops && env->dev_ops->insn_hook)
-		return env->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
-
-	return 0;
-}
-
 static int do_check(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *state;
@@ -4431,9 +4422,12 @@ static int do_check(struct bpf_verifier_env *env)
 				       env->allow_ptr_leaks);
 		}
 
-		err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
-		if (err)
-			return err;
+		if (bpf_prog_is_dev_bound(env->prog->aux)) {
+			err = bpf_prog_offload_verify_insn(env, insn_idx,
+							   prev_insn_idx);
+			if (err)
+				return err;
+		}
 
 		regs = cur_regs(env);
 		env->insn_aux_data[insn_idx].seen = true;
@@ -5341,7 +5335,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
 		env->strict_alignment = true;
 
-	if (env->prog->aux->offload) {
+	if (bpf_prog_is_dev_bound(env->prog->aux)) {
 		ret = bpf_prog_offload_verifier_prep(env);
 		if (ret)
 			goto err_unlock;
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 4/8] bpf: offload: free prog->aux->offload when device disappears
From: Jakub Kicinski @ 2017-12-21 21:01 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski
In-Reply-To: <20171221210120.30166-1-jakub.kicinski@netronome.com>

All bpf offload operations should now be under bpf_devs_lock,
it's safe to free and clear the entire offload structure,
not only the netdev pointer.

__bpf_prog_offload_destroy() will no longer be called multiple
times.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/offload.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 2f2184408d31..60be15b9d8f1 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -75,12 +75,14 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
 			     struct netdev_bpf *data)
 {
-	struct net_device *netdev = prog->aux->offload->netdev;
+	struct bpf_dev_offload *offload = prog->aux->offload;
+	struct net_device *netdev;
 
 	ASSERT_RTNL();
 
-	if (!netdev)
+	if (!offload)
 		return -ENODEV;
+	netdev = offload->netdev;
 	if (!netdev->netdev_ops->ndo_bpf)
 		return -EOPNOTSUPP;
 
@@ -116,7 +118,7 @@ int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
 
 	down_read(&bpf_devs_lock);
 	offload = env->prog->aux->offload;
-	if (offload->netdev)
+	if (offload)
 		ret = offload->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
 	up_read(&bpf_devs_lock);
 
@@ -128,31 +130,24 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
 	struct bpf_dev_offload *offload = prog->aux->offload;
 	struct netdev_bpf data = {};
 
-	/* Caution - if netdev is destroyed before the program, this function
-	 * will be called twice.
-	 */
-
 	data.offload.prog = prog;
 
 	if (offload->dev_state)
 		WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data));
 
-	offload->dev_state = false;
 	list_del_init(&offload->offloads);
-	offload->netdev = NULL;
+	kfree(offload);
+	prog->aux->offload = NULL;
 }
 
 void bpf_prog_offload_destroy(struct bpf_prog *prog)
 {
-	struct bpf_dev_offload *offload = prog->aux->offload;
-
 	rtnl_lock();
 	down_write(&bpf_devs_lock);
-	__bpf_prog_offload_destroy(prog);
+	if (prog->aux->offload)
+		__bpf_prog_offload_destroy(prog);
 	up_write(&bpf_devs_lock);
 	rtnl_unlock();
-
-	kfree(offload);
 }
 
 static int bpf_prog_offload_translate(struct bpf_prog *prog)
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 6/8] bpf: offload: report device information for offloaded programs
From: Jakub Kicinski @ 2017-12-21 21:01 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel
  Cc: ktkhai, oss-drivers, Jakub Kicinski, Eric W . Biederman
In-Reply-To: <20171221210120.30166-1-jakub.kicinski@netronome.com>

Report to the user ifindex and namespace information of offloaded
programs.  If device has disappeared return -ENODEV.  Specify the
namespace using dev/inode combination.

CC: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
v2:
 - take RTNL lock to grab a coherent snapshot of device state
   (ifindex vs name space) and avoid races with name space
   moves (based on Eric's comment on Kirill's patch to
   peernet2id_alloc()).
---
 fs/nsfs.c                      |  2 +-
 include/linux/bpf.h            |  2 ++
 include/linux/proc_ns.h        |  1 +
 include/uapi/linux/bpf.h       |  3 +++
 kernel/bpf/offload.c           | 44 ++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           |  6 ++++++
 tools/include/uapi/linux/bpf.h |  3 +++
 7 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/fs/nsfs.c b/fs/nsfs.c
index 7c6f76d29f56..e50628675935 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -51,7 +51,7 @@ static void nsfs_evict(struct inode *inode)
 	ns->ops->put(ns);
 }
 
-static void *__ns_get_path(struct path *path, struct ns_common *ns)
+void *__ns_get_path(struct path *path, struct ns_common *ns)
 {
 	struct vfsmount *mnt = nsfs_mnt;
 	struct dentry *dentry;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9a916ab34299..7810ae57b357 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -531,6 +531,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
 
 int bpf_prog_offload_compile(struct bpf_prog *prog);
 void bpf_prog_offload_destroy(struct bpf_prog *prog);
+int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
+			       struct bpf_prog *prog);
 
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 2ff18c9840a7..1733359cf713 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -76,6 +76,7 @@ static inline int ns_alloc_inum(struct ns_common *ns)
 
 extern struct file *proc_ns_fget(int fd);
 #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
+extern void *__ns_get_path(struct path *path, struct ns_common *ns);
 extern void *ns_get_path(struct path *path, struct task_struct *task,
 			const struct proc_ns_operations *ns_ops);
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d01f1cb3cfc0..72b37fc3bc0c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -921,6 +921,9 @@ struct bpf_prog_info {
 	__u32 nr_map_ids;
 	__aligned_u64 map_ids;
 	char name[BPF_OBJ_NAME_LEN];
+	__u32 ifindex;
+	__u64 netns_dev;
+	__u64 netns_ino;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 1e6064ea3609..4d50000bd1e3 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -16,9 +16,11 @@
 #include <linux/bpf.h>
 #include <linux/bpf_verifier.h>
 #include <linux/bug.h>
+#include <linux/kdev_t.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/printk.h>
+#include <linux/proc_ns.h>
 #include <linux/rtnetlink.h>
 #include <linux/rwsem.h>
 
@@ -181,6 +183,48 @@ int bpf_prog_offload_compile(struct bpf_prog *prog)
 	return bpf_prog_offload_translate(prog);
 }
 
+int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
+			       struct bpf_prog *prog)
+{
+	struct bpf_dev_offload *offload;
+	struct inode *ns_inode;
+	struct path ns_path;
+	int ifindex, err;
+	struct net *net;
+
+again:
+	rtnl_lock();
+	down_read(&bpf_devs_lock);
+
+	offload = prog->aux->offload;
+	if (!offload) {
+		up_read(&bpf_devs_lock);
+		rtnl_unlock();
+		return -ENODEV;
+	}
+
+	ifindex = offload->netdev->ifindex;
+	net = dev_net(offload->netdev);
+	get_net(net); /* __ns_get_path() drops the reference */
+
+	up_read(&bpf_devs_lock);
+	rtnl_unlock();
+
+	err = PTR_ERR_OR_ZERO(__ns_get_path(&ns_path, &net->ns));
+	if (err) {
+		if (err == -EAGAIN)
+			goto again;
+		return err;
+	}
+	ns_inode = ns_path.dentry->d_inode;
+
+	info->ifindex = ifindex;
+	info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
+	info->netns_ino = ns_inode->i_ino;
+
+	return 0;
+}
+
 const struct bpf_prog_ops bpf_offload_prog_ops = {
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7d9f5b0f0e49..20444fd678d0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1624,6 +1624,12 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 			return -EFAULT;
 	}
 
+	if (bpf_prog_is_dev_bound(prog->aux)) {
+		err = bpf_prog_offload_info_fill(&info, prog);
+		if (err)
+			return err;
+	}
+
 done:
 	if (copy_to_user(uinfo, &info, info_len) ||
 	    put_user(info_len, &uattr->info.info_len))
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index db1b0923a308..4e8c60acfa32 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -921,6 +921,9 @@ struct bpf_prog_info {
 	__u32 nr_map_ids;
 	__aligned_u64 map_ids;
 	char name[BPF_OBJ_NAME_LEN];
+	__u32 ifindex;
+	__u64 netns_dev;
+	__u64 netns_ino;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 5/8] bpf: offload: free program id when device disappears
From: Jakub Kicinski @ 2017-12-21 21:01 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski
In-Reply-To: <20171221210120.30166-1-jakub.kicinski@netronome.com>

Bound programs are quite useless after their device disappears.
They are simply waiting for reference count to go to zero,
don't list them in BPF_PROG_GET_NEXT_ID by freeing their ID
early.

Note that orphaned offload programs will return -ENODEV on
BPF_OBJ_GET_INFO_BY_FD so user will never see ID 0.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h  | 2 ++
 kernel/bpf/offload.c | 3 +++
 kernel/bpf/syscall.c | 9 +++++++--
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 669549f7e3e8..9a916ab34299 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -357,6 +357,8 @@ void bpf_prog_put(struct bpf_prog *prog);
 int __bpf_prog_charge(struct user_struct *user, u32 pages);
 void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
 
+void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
+
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
 struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 60be15b9d8f1..1e6064ea3609 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -135,6 +135,9 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
 	if (offload->dev_state)
 		WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data));
 
+	/* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */
+	bpf_prog_free_id(prog, true);
+
 	list_del_init(&offload->offloads);
 	kfree(offload);
 	prog->aux->offload = NULL;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1143db61584c..7d9f5b0f0e49 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -905,9 +905,13 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog)
 	return id > 0 ? 0 : id;
 }
 
-static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
+void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
 {
-	/* cBPF to eBPF migrations are currently not in the idr store. */
+	/* cBPF to eBPF migrations are currently not in the idr store.
+	 * Offloaded programs are removed from the store when their device
+	 * disappears - even if someone grabs an fd to them they are unusable,
+	 * simply waiting for refcnt to drop to be freed.
+	 */
 	if (!prog->aux->id)
 		return;
 
@@ -917,6 +921,7 @@ static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
 		__acquire(&prog_idr_lock);
 
 	idr_remove(&prog_idr, prog->aux->id);
+	prog->aux->id = 0;
 
 	if (do_idr_lock)
 		spin_unlock_bh(&prog_idr_lock);
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 7/8] tools: bpftool: report device information for offloaded programs
From: Jakub Kicinski @ 2017-12-21 21:01 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski
In-Reply-To: <20171221210120.30166-1-jakub.kicinski@netronome.com>

Print the just-exposed device information about device to which
program is bound.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
 tools/bpf/bpftool/common.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++
 tools/bpf/bpftool/main.h   |  2 ++
 tools/bpf/bpftool/prog.c   |  3 +++
 3 files changed, 57 insertions(+)

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index b62c94e3997a..6601c95a9258 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -44,7 +44,9 @@
 #include <unistd.h>
 #include <linux/limits.h>
 #include <linux/magic.h>
+#include <net/if.h>
 #include <sys/mount.h>
+#include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/vfs.h>
 
@@ -412,3 +414,53 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab)
 		free(obj);
 	}
 }
+
+static char *
+ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf)
+{
+	struct stat st;
+	int err;
+
+	err = stat("/proc/self/ns/net", &st);
+	if (err) {
+		p_err("Can't stat /proc/self: %s", strerror(errno));
+		return NULL;
+	}
+
+	if (st.st_dev != ns_dev || st.st_ino != ns_ino)
+		return NULL;
+
+	return if_indextoname(ifindex, buf);
+}
+
+void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
+{
+	char name[IF_NAMESIZE];
+
+	if (!ifindex)
+		return;
+
+	printf(" dev ");
+	if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name))
+		printf("%s", name);
+	else
+		printf("ifindex %u ns_dev %llu ns_ino %llu",
+		       ifindex, ns_dev, ns_inode);
+}
+
+void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
+{
+	char name[IF_NAMESIZE];
+
+	if (!ifindex)
+		return;
+
+	jsonw_name(json_wtr, "dev");
+	jsonw_start_object(json_wtr);
+	jsonw_uint_field(json_wtr, "ifindex", ifindex);
+	jsonw_uint_field(json_wtr, "ns_dev", ns_dev);
+	jsonw_uint_field(json_wtr, "ns_inode", ns_inode);
+	if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name))
+		jsonw_string_field(json_wtr, "ifname", name);
+	jsonw_end_object(json_wtr);
+}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 8f6d3cac0347..65b526fe6e7e 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -96,6 +96,8 @@ struct pinned_obj {
 int build_pinned_obj_table(struct pinned_obj_table *table,
 			   enum bpf_obj_type type);
 void delete_pinned_obj_table(struct pinned_obj_table *tab);
+void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
+void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
 
 struct cmd {
 	const char *cmd;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 037484ceaeaf..4ccf6301f0fe 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -230,6 +230,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
 		     info->tag[0], info->tag[1], info->tag[2], info->tag[3],
 		     info->tag[4], info->tag[5], info->tag[6], info->tag[7]);
 
+	print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
+
 	if (info->load_time) {
 		char buf[32];
 
@@ -287,6 +289,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 
 	printf("tag ");
 	fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
+	print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
 	printf("\n");
 
 	if (info->load_time) {
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next v2 8/8] selftests/bpf: test device info reporting for bound progs
From: Jakub Kicinski @ 2017-12-21 21:01 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski
In-Reply-To: <20171221210120.30166-1-jakub.kicinski@netronome.com>

Check if bound programs report correct device info.  Test
in local namespace, in remote one, back to the local ns,
remove the device and check that information is cleared.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
--
v2:
 - check the error code from "prog show pin XX" with device
   removed is -ENODEV.
---
 tools/testing/selftests/bpf/test_offload.py | 112 +++++++++++++++++++++++++---
 1 file changed, 101 insertions(+), 11 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index c940505c2978..e3c750f17cb8 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -18,6 +18,8 @@ import argparse
 import json
 import os
 import pprint
+import random
+import string
 import subprocess
 import time
 
@@ -27,6 +29,7 @@ bpf_test_dir = os.path.dirname(os.path.realpath(__file__))
 pp = pprint.PrettyPrinter()
 devs = [] # devices we created for clean up
 files = [] # files to be removed
+netns = [] # net namespaces to be removed
 
 def log_get_sec(level=0):
     return "*" * (log_level + level)
@@ -128,22 +131,25 @@ files = [] # files to be removed
     if f in files:
         files.remove(f)
 
-def tool(name, args, flags, JSON=True, fail=True):
+def tool(name, args, flags, JSON=True, ns="", fail=True):
     params = ""
     if JSON:
         params += "%s " % (flags["json"])
 
-    ret, out = cmd(name + " " + params + args, fail=fail)
+    if ns != "":
+        ns = "ip netns exec %s " % (ns)
+
+    ret, out = cmd(ns + name + " " + params + args, fail=fail)
     if JSON and len(out.strip()) != 0:
         return ret, json.loads(out)
     else:
         return ret, out
 
-def bpftool(args, JSON=True, fail=True):
-    return tool("bpftool", args, {"json":"-p"}, JSON=JSON, fail=fail)
+def bpftool(args, JSON=True, ns="", fail=True):
+    return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
 
-def bpftool_prog_list(expected=None):
-    _, progs = bpftool("prog show", JSON=True, fail=True)
+def bpftool_prog_list(expected=None, ns=""):
+    _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
     if expected is not None:
         if len(progs) != expected:
             fail(True, "%d BPF programs loaded, expected %d" %
@@ -158,13 +164,13 @@ files = [] # files to be removed
         time.sleep(0.05)
     raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
 
-def ip(args, force=False, JSON=True, fail=True):
+def ip(args, force=False, JSON=True, ns="", fail=True):
     if force:
         args = "-force " + args
-    return tool("ip", args, {"json":"-j"}, JSON=JSON, fail=fail)
+    return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, fail=fail)
 
-def tc(args, JSON=True, fail=True):
-    return tool("tc", args, {"json":"-p"}, JSON=JSON, fail=fail)
+def tc(args, JSON=True, ns="", fail=True):
+    return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
 
 def ethtool(dev, opt, args, fail=True):
     return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
@@ -178,6 +184,15 @@ files = [] # files to be removed
 def bpf_bytecode(bytecode):
     return "bytecode \"%s\"" % (bytecode)
 
+def mknetns(n_retry=10):
+    for i in range(n_retry):
+        name = ''.join([random.choice(string.ascii_letters) for i in range(8)])
+        ret, _ = ip("netns add %s" % (name), fail=False)
+        if ret == 0:
+            netns.append(name)
+            return name
+    return None
+
 class DebugfsDir:
     """
     Class for accessing DebugFS directories as a dictionary.
@@ -237,6 +252,8 @@ files = [] # files to be removed
         self.dev = self._netdevsim_create()
         devs.append(self)
 
+        self.ns = ""
+
         self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
         self.dfs_refresh()
 
@@ -257,7 +274,7 @@ files = [] # files to be removed
 
     def remove(self):
         devs.remove(self)
-        ip("link del dev %s" % (self.dev["ifname"]))
+        ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns)
 
     def dfs_refresh(self):
         self.dfs = DebugfsDir(self.dfs_dir)
@@ -285,6 +302,11 @@ files = [] # files to be removed
             time.sleep(0.05)
         raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs))
 
+    def set_ns(self, ns):
+        name = "1" if ns == "" else ns
+        ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns)
+        self.ns = ns
+
     def set_mtu(self, mtu, fail=True):
         return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
                   fail=fail)
@@ -372,6 +394,8 @@ files = [] # files to be removed
         dev.remove()
     for f in files:
         cmd("rm -f %s" % (f))
+    for ns in netns:
+        cmd("ip netns delete %s" % (ns))
 
 def pin_prog(file_name, idx=0):
     progs = bpftool_prog_list(expected=(idx + 1))
@@ -381,6 +405,35 @@ files = [] # files to be removed
 
     return file_name, bpf_pinned(file_name)
 
+def check_dev_info(other_ns, ns, pin_file=None, removed=False):
+    if removed:
+        bpftool_prog_list(expected=0)
+        ret, err = bpftool("prog show pin %s" % (pin_file), fail=False)
+        fail(ret == 0, "Showing prog with removed device did not fail")
+        fail(err["error"].find("No such device") == -1,
+             "Showing prog with removed device expected ENODEV, error is %s" %
+             (err["error"]))
+        return
+    progs = bpftool_prog_list(expected=int(not removed), ns=ns)
+    prog = progs[0]
+
+    fail("dev" not in prog.keys(), "Device parameters not reported")
+    dev = prog["dev"]
+    fail("ifindex" not in dev.keys(), "Device parameters not reported")
+    fail("ns_dev" not in dev.keys(), "Device parameters not reported")
+    fail("ns_inode" not in dev.keys(), "Device parameters not reported")
+
+    if not removed and not other_ns:
+        fail("ifname" not in dev.keys(), "Ifname not reported")
+        fail(dev["ifname"] != sim["ifname"],
+             "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"]))
+    else:
+        fail("ifname" in dev.keys(), "Ifname is reported for other ns")
+        if removed:
+            fail(dev["ifindex"] != 0, "Device perameters not zero on removed")
+            fail(dev["ns_dev"] != 0, "Device perameters not zero on removed")
+            fail(dev["ns_inode"] != 0, "Device perameters not zero on removed")
+
 # Parse command line
 parser = argparse.ArgumentParser()
 parser.add_argument("--log", help="output verbose log to given file")
@@ -417,6 +470,12 @@ samples = ["sample_ret0.o"]
     skip(ret != 0, "sample %s/%s not found, please compile it" %
          (bpf_test_dir, s))
 
+# Check if net namespaces seem to work
+ns = mknetns()
+skip(ns is None, "Could not create a net namespace")
+cmd("ip netns delete %s" % (ns))
+netns = []
+
 try:
     obj = bpf_obj("sample_ret0.o")
     bytecode = bpf_bytecode("1,6 0 0 4294967295,")
@@ -549,6 +608,8 @@ samples = ["sample_ret0.o"]
     progs = bpftool_prog_list(expected=1)
     fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
          "Loaded program has wrong ID")
+    fail("dev" in progs[0].keys(),
+         "Device parameters reported for non-offloaded program")
 
     start_test("Test XDP prog replace with bad flags...")
     ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False)
@@ -673,6 +734,35 @@ samples = ["sample_ret0.o"]
     fail(time_diff < delay_sec, "Removal process took %s, expected %s" %
          (time_diff, delay_sec))
 
+    # Remove all pinned files and reinstantiate the netdev
+    clean_up()
+    bpftool_prog_list_wait(expected=0)
+
+    sim = NetdevSim()
+    sim.set_ethtool_tc_offloads(True)
+    sim.set_xdp(obj, "offload")
+
+    start_test("Test bpftool bound info reporting (own ns)...")
+    check_dev_info(False, "")
+
+    start_test("Test bpftool bound info reporting (other ns)...")
+    ns = mknetns()
+    sim.set_ns(ns)
+    check_dev_info(True, "")
+
+    start_test("Test bpftool bound info reporting (remote ns)...")
+    check_dev_info(False, ns)
+
+    start_test("Test bpftool bound info reporting (back to own ns)...")
+    sim.set_ns("")
+    check_dev_info(False, "")
+
+    pin_file, _ = pin_prog("/sys/fs/bpf/tmp")
+    sim.remove()
+
+    start_test("Test bpftool bound info reporting (removed dev)...")
+    check_dev_info(True, "", pin_file=pin_file, removed=True)
+
     print("%s: OK" % (os.path.basename(__file__)))
 
 finally:
-- 
2.15.1

^ permalink raw reply related

* Re: [PATCH RFC 00/18] r8169: separate r8168 driver and add experimental phylib support
From: David Miller @ 2017-12-21 21:16 UTC (permalink / raw)
  To: hkallweit1; +Cc: andrew, nic_swsd, hau, netdev
In-Reply-To: <83321b2e-8402-26c5-9703-3fe795cc893d@gmail.com>

From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 21 Dec 2017 21:38:11 +0100

> This experimental series separates drivers for PCI / PCIE NIC's and
> adds initial phylib support to the separated r8168 driver.

Thanks for working on this.

The RX and TX ring handling is basically going to be identical
for the two chips, so it's very undesirable to duplicate that
code in the two drivers.

Getting good test coverage is going to be extremely challenging
for this, so the more code you share between the two drivers
rather than duplicate the better.

^ permalink raw reply

* Re: [PATCH RFC 00/18] r8169: separate r8168 driver and add experimental phylib support
From: Heiner Kallweit @ 2017-12-21 21:27 UTC (permalink / raw)
  To: David Miller; +Cc: andrew, nic_swsd, hau, netdev
In-Reply-To: <20171221.161603.944790580293624594.davem@davemloft.net>

Am 21.12.2017 um 22:16 schrieb David Miller:
> From: Heiner Kallweit <hkallweit1@gmail.com>
> Date: Thu, 21 Dec 2017 21:38:11 +0100
> 
>> This experimental series separates drivers for PCI / PCIE NIC's and
>> adds initial phylib support to the separated r8168 driver.
> 
> Thanks for working on this.
> 
> The RX and TX ring handling is basically going to be identical
> for the two chips, so it's very undesirable to duplicate that
> code in the two drivers.
> 
Agree .. My approach would be:
- remove everything that's not needed from both drivers
- see what's still identical and factor it out into lib(s)

Currently the driver is one source code file with 8.700 LoC.
That's way too big anyway IMO and should be splitted.

> Getting good test coverage is going to be extremely challenging
> for this, so the more code you share between the two drivers
> rather than duplicate the better.
> 
I'm aware of this (seeing that basically every chip needs certain
quirks) and don't expect the patch set to be mainline-ready
very soon. I have access to one supported chip only, so I hope
others give it a try too.

^ permalink raw reply

* Re: RCU callback crashes
From: Cong Wang @ 2017-12-21 21:31 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: John Fastabend, Jiri Pirko, netdev@vger.kernel.org
In-Reply-To: <20171220164419.42c63ebf@cakuba.netronome.com>

[-- Attachment #1: Type: text/plain, Size: 2276 bytes --]

On Wed, Dec 20, 2017 at 4:50 PM, Jakub Kicinski <kubakici@wp.pl> wrote:
> On Wed, 20 Dec 2017 16:41:14 -0800, Jakub Kicinski wrote:
>> Just as I hit send... :)  but this looks unrelated, "Comm: sshd" -
>> so probably from the management interface.
>>
>> [  154.604041] ==================================================================
>> [  154.612245] BUG: KASAN: slab-out-of-bounds in pfifo_fast_dequeue+0x140/0x2d0
>> [  154.620219] Read of size 8 at addr ffff88086bb64040 by task sshd/983
>> [  154.627403]
>> [  154.629161] CPU: 10 PID: 983 Comm: sshd Not tainted 4.15.0-rc3-perf-00984-g82d3fc87a4aa-dirty #13
>> [  154.639190] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.3.4 11/08/2016
>> [  154.647665] Call Trace:
>> [  154.650494]  dump_stack+0xa6/0x118
>> [  154.654387]  ? _atomic_dec_and_lock+0xe8/0xe8
>> [  154.659355]  ? trace_event_raw_event_rcu_torture_read+0x190/0x190
>> [  154.666263]  ? rcu_segcblist_enqueue+0xe9/0x120
>> [  154.671422]  ? _raw_spin_unlock_bh+0x91/0xc0
>> [  154.676286]  ? pfifo_fast_dequeue+0x140/0x2d0
>> [  154.681251]  print_address_description+0x6a/0x270
>> [  154.686601]  ? pfifo_fast_dequeue+0x140/0x2d0
>> [  154.691565]  kasan_report+0x23f/0x350
>> [  154.695752]  pfifo_fast_dequeue+0x140/0x2d0
>
> If we trust stack decode it's:
>
>    615  static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
>    616  {
>    617          struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
>    618          struct sk_buff *skb = NULL;
>    619          int band;
>    620
>    621          for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
>    622                  struct skb_array *q = band2list(priv, band);
>    623
>>> 624                  if (__skb_array_empty(q))
>    625                          continue;
>    626
>    627                  skb = skb_array_consume_bh(q);
>    628          }
>    629          if (likely(skb)) {
>    630                  qdisc_qstats_cpu_backlog_dec(qdisc, skb);
>    631                  qdisc_bstats_cpu_update(qdisc, skb);
>    632                  qdisc_qstats_cpu_qlen_dec(qdisc);
>    633          }
>    634
>    635          return skb;
>    636  }

Hi, Jakub

Could you test the attached patch? It looks like the __skb_array_empty()
use is unsafe.

Thanks!

[-- Attachment #2: pfifo_fast_dequeue.diff --]
[-- Type: text/plain, Size: 463 bytes --]

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 10aaa3b615ce..8d47fb4aadb4 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -621,10 +621,6 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
 
 	for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
 		struct skb_array *q = band2list(priv, band);
-
-		if (__skb_array_empty(q))
-			continue;
-
 		skb = skb_array_consume_bh(q);
 	}
 	if (likely(skb)) {

^ permalink raw reply related

* [GIT] Networking
From: David Miller @ 2017-12-21 21:32 UTC (permalink / raw)
  To: torvalds; +Cc: akpm, netdev, linux-kernel


What's a holiday weekend without some networking bug fixes?

1) Fix some eBPF JIT bugs wrt. SKB pointers across helper function
   calls, from Daniel Borkmann.

2) Fix regression from errata limiting change to marvell PHY driver,
   from Zhao Qiang.

3) Fix u16 overflow in SCTP, from Xin Long.

4) Fix potential memory leak during bridge newlink, from Nikolay
   Aleksandrov.

5) Fix BPF selftest build on s390, from Hendrik Brueckner.

6) Don't append to cfg80211 automatically generated certs file,
   always write new ones from scratch.  From Thierry Reding.

7) Fix sleep in atomic in mac80211 hwsim, from Jia-Ju Bai.

8) Fix hang on tg3 MTU change with certain chips, from Brian King.

9) Add stall detection to arc emac driver and reset chip when this
   happens, from Alexander Kochetkov.

10) Fix MTU limitng in GRE tunnel drivers, from Xin Long.

11) Fix stmmac timestamping bug due to mis-shifting of field.
    From Fredrik Hallenberg.

12) Fix metrics match when deleting an ipv4 route.  The kernel sets
    some internal metrics bits which the user isn't going to set
    when it makes the delete request.  From Phil Sutter.

13) mvneta driver loop over RX queues limits on "txq_number" :-)
    Fix from Yelena Krivosheev.

14) Fix double free and memory corruption in get_net_ns_by_id, from
    Eric W. Biederman.

15) Flush ipv4 FIB tables in the reverse order.  Some tables can
    share their actual backing data, in particular this happens
    for the MAIN and LOCAL tables.  We have to kill the LOCAL
    table first, because it uses MAIN's backing memory.  Fix from
    Ido Schimmel.

16) Several eBPF verifier value tracking fixes, from Edward Cree,
    Jann Horn, and Alexei Starovoitov.

17) Make changes to ipv6 autoflowlabel sysctl really propagate to
    sockets, unless the socket has set the per-socket value
    explicitly.  From Shaohua Li.

18) Fix leaks and double callback invocations of zerocopy SKBs,
    from Willem de Bruijn.

Please pull, thanks a lot!

The following changes since commit f3b5ad89de16f5d42e8ad36fbdf85f705c1ae051:

  Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma (2017-12-16 13:43:08 -0800)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git 

for you to fetch changes up to c50b7c473f609189da3bccd28ee5dcf3b55109cd:

  Merge branch 'net-zerocopy-fixes' (2017-12-21 15:00:59 -0500)

----------------------------------------------------------------
Adiel Aloni (1):
      mac80211_hwsim: enable TODS BIT in null data frame

Alexander Kochetkov (2):
      net: arc_emac: fix arc_emac_rx() error paths
      net: arc_emac: restart stalled EMAC

Alexei Starovoitov (3):
      Merge branch 'bpf-jit-fixes'
      bpf: fix integer overflows
      bpf: do not allow root to mangle valid pointers

Alexey Khoroshilov (1):
      net: phy: xgene: disable clk on error paths

Alexey Kodanev (1):
      vxlan: restore dev->mtu setting based on lower device

Brendan McGrath (1):
      ipv6: icmp6: Allow icmp messages to be looped back

Brian King (1):
      tg3: Fix rx hang on MTU change with 5717/5719

Daniel Borkmann (6):
      bpf, s390x: do not reload skb pointers in non-skb context
      bpf, ppc64: do not reload skb pointers in non-skb context
      bpf: guarantee r1 to be ctx in case of bpf_helper_changes_pkt_data
      bpf, sparc: fix usage of wrong reg for load_skb_regs after call
      bpf: add test case for ld_abs and helper changing pkt data
      Merge branch 'bpf-verifier-sec-fixes'

David Miller (1):
      bpf: Fix tools and testing build.

David S. Miller (7):
      Merge git://git.kernel.org/.../bpf/bpf
      Merge tag 'mac80211-for-davem-2017-12-19' of git://git.kernel.org/.../jberg/mac80211
      Merge branch 'mvneta-fixes'
      Merge branch 'cls_bpf-fix-offload-state-tracking-with-block-callbacks'
      Merge tag 'mlx5-fixes-2017-12-19' of git://git.kernel.org/.../saeed/linux
      Merge git://git.kernel.org/.../bpf/bpf
      Merge branch 'net-zerocopy-fixes'

Edward Cree (1):
      bpf/verifier: fix bounds calculation on BPF_RSH

Eran Ben Elisha (1):
      net/mlx5: Fix rate limit packet pacing naming and struct

Eric Garver (1):
      openvswitch: Fix pop_vlan action for double tagged frames

Eric W. Biederman (1):
      net: Fix double free and memory corruption in get_net_ns_by_id()

Eugenia Emantayev (2):
      net/mlx5e: Fix defaulting RX ring size when not needed
      net/mlx5: Fix misspelling in the error message and comment

Fredrik Hallenberg (2):
      net: stmmac: Fix TX timestamp calculation
      net: stmmac: Fix bad RX timestamp extraction

Gal Pressman (4):
      net/mlx5e: Fix features check of IPv6 traffic
      net/mlx5e: Fix possible deadlock of VXLAN lock
      net/mlx5e: Add refcount to VXLAN structure
      net/mlx5e: Prevent possible races in VXLAN control flow

Hemanth Puranik (1):
      net: qcom/emac: Change the order of mac up and sgmii open

Hendrik Brueckner (1):
      bpf: fix broken BPF selftest build on s390

Huy Nguyen (1):
      net/mlx5e: Fix ETS BW check

Ido Schimmel (2):
      ipv4: Fix use-after-free when flushing FIB tables
      ipv6: Honor specified parameters in fibmatch lookup

Jakub Kicinski (2):
      cls_bpf: fix offload assumptions after callback conversion
      nfp: bpf: keep track of the offloaded program

Jann Horn (7):
      bpf: fix incorrect sign extension in check_alu_op()
      bpf: fix incorrect tracking of register size truncation
      bpf: fix 32-bit ALU op verification
      bpf: fix missing error return in check_stack_boundary()
      bpf: force strict alignment checks for stack pointers
      bpf: don't prune branches when a scalar is replaced with a pointer
      selftests/bpf: add tests for recent bugfixes

Jia-Ju Bai (1):
      mac80211_hwsim: Fix a possible sleep-in-atomic bug in hwsim_get_radio_nl

Johannes Berg (2):
      nl80211: fix nl80211_send_iface() error paths
      cfg80211: ship certificates as hex files

Jon Maloy (4):
      tipc: fix lost member events bug
      tipc: remove leaving group member from all lists
      tipc: fix list sorting bug in function tipc_group_update_member()
      tipc: remove joining group member from congested list

Jonathan Corbet (1):
      nl80211: Remove obsolete kerneldoc line

Julian Wiedmann (1):
      s390/qeth: fix error handling in checksum cmd callback

Kamal Heib (1):
      net/mlx5: FPGA, return -EINVAL if size is zero

Maor Gottlieb (1):
      net/mlx5: Fix steering memory leak

Moni Shoua (1):
      net/mlx5: Fix error flow in CREATE_QP command

Moshe Shemesh (2):
      net/mlx5: Cleanup IRQs in case of unload failure
      net/mlx5: Stay in polling mode when command EQ destroy fails

Naresh Kamboju (1):
      selftests: net: Adding config fragment CONFIG_NUMA=y

Nikolay Aleksandrov (1):
      net: bridge: fix early call to br_stp_change_bridge_id and plug newlink leaks

Petr Machata (1):
      mlxsw: spectrum_router: Remove batch neighbour deletion causing FW bug

Phil Sutter (1):
      ipv4: fib: Fix metrics match when deleting a route

Russell King (1):
      net: phy: marvell: avoid pause mode on SGMII-to-Copper for 88e151x

Saeed Mahameed (1):
      Revert "mlx5: move affinity hints assignments to generic code"

Sean Wang (1):
      net: mediatek: setup proper state for disabled GMAC on the default

Shaohua Li (1):
      net: reevalulate autoflowlabel setting after sysctl setting

Song Liu (1):
      xdp: linearize skb in netif_receive_generic_xdp()

Thierry Reding (1):
      cfg80211: always rewrite generated files from scratch

Willem de Bruijn (2):
      skbuff: orphan frags before zerocopy clone
      skbuff: skb_copy_ubufs must release uarg even without user frags

Xin Long (6):
      sctp: fix the issue that a __u16 variable may overflow in sctp_ulpq_renege
      sctp: add SCTP_CID_RECONF conversion in sctp_cname
      vxlan: update skb dst pmtu on tx path
      ip_gre: remove the incorrect mtu limit for ipgre tap
      ip6_gre: remove the incorrect mtu limit for ipgre tap
      ip6_tunnel: get the min mtu properly in ip6_tnl_xmit

Yelena Krivosheev (3):
      net: mvneta: clear interface link status on port disable
      net: mvneta: use proper rxq_number in loop on rx queues
      net: mvneta: eliminate wrong call to handle rx descriptor error

Zhao Qiang (1):
      net: phy: marvell: Limit 88m1101 autoneg errata to 88E1145 as well.

 arch/powerpc/net/bpf_jit_comp64.c                     |   6 +-
 arch/s390/net/bpf_jit_comp.c                          |  11 +-
 arch/sparc/net/bpf_jit_comp_64.c                      |   6 +-
 drivers/net/ethernet/arc/emac.h                       |   2 +
 drivers/net/ethernet/arc/emac_main.c                  | 164 ++++++++++++++++++++++++----
 drivers/net/ethernet/broadcom/tg3.c                   |   4 +-
 drivers/net/ethernet/marvell/mvneta.c                 |   8 +-
 drivers/net/ethernet/mediatek/mtk_eth_soc.c           |  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c         |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h          |   9 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c    |  10 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c  |  10 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c     |  63 ++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/eq.c          |  20 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c    |   6 +
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c     |  16 ++-
 drivers/net/ethernet/mellanox/mlx5/core/health.c      |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c        |  75 ++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/qp.c          |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/rl.c          |  22 ++--
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c       |  64 ++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.h       |   1 +
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c |  15 +--
 drivers/net/ethernet/netronome/nfp/bpf/main.c         |  55 ++++++++--
 drivers/net/ethernet/netronome/nfp/bpf/main.h         |   8 ++
 drivers/net/ethernet/qualcomm/emac/emac.c             |   6 +-
 drivers/net/ethernet/stmicro/stmmac/common.h          |   2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c    |   5 +-
 drivers/net/ethernet/stmicro/stmmac/enh_desc.c        |   3 +-
 drivers/net/ethernet/stmicro/stmmac/norm_desc.c       |   2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c |   6 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c     |   2 +-
 drivers/net/phy/marvell.c                             |  14 ++-
 drivers/net/phy/mdio-xgene.c                          |  21 +++-
 drivers/net/vxlan.c                                   |  19 ++++
 drivers/net/wireless/mac80211_hwsim.c                 |   3 +-
 drivers/s390/net/qeth_core_main.c                     |   9 +-
 include/linux/bpf_verifier.h                          |   4 +-
 include/linux/ipv6.h                                  |   3 +-
 include/linux/mlx5/driver.h                           |   3 +-
 include/linux/mlx5/mlx5_ifc.h                         |   8 +-
 include/net/cfg80211.h                                |   1 -
 include/net/pkt_cls.h                                 |   5 +-
 kernel/bpf/verifier.c                                 | 283 +++++++++++++++++++++++++++--------------------
 lib/test_bpf.c                                        |  43 ++++++++
 net/bridge/br_netlink.c                               |  11 +-
 net/core/dev.c                                        |   2 +-
 net/core/net_namespace.c                              |   2 +-
 net/core/skbuff.c                                     |   7 +-
 net/ipv4/fib_frontend.c                               |   9 +-
 net/ipv4/fib_semantics.c                              |   8 +-
 net/ipv4/ip_gre.c                                     |   1 +
 net/ipv6/af_inet6.c                                   |   1 -
 net/ipv6/ip6_gre.c                                    |   1 +
 net/ipv6/ip6_output.c                                 |  12 +-
 net/ipv6/ip6_tunnel.c                                 |   9 +-
 net/ipv6/ipv6_sockglue.c                              |   1 +
 net/ipv6/route.c                                      |  20 ++--
 net/openvswitch/flow.c                                |  15 ++-
 net/sched/cls_bpf.c                                   |  93 +++++++---------
 net/sctp/debug.c                                      |   3 +
 net/sctp/ulpqueue.c                                   |  24 ++--
 net/tipc/group.c                                      |  16 +--
 net/wireless/Makefile                                 |  31 ++----
 net/wireless/certs/sforshee.hex                       |  86 +++++++++++++++
 net/wireless/certs/sforshee.x509                      | Bin 680 -> 0 bytes
 net/wireless/nl80211.c                                |   6 +-
 tools/arch/s390/include/uapi/asm/bpf_perf_event.h     |   2 +-
 tools/testing/selftests/bpf/Makefile                  |   2 +-
 tools/testing/selftests/bpf/test_progs.c              |   8 +-
 tools/testing/selftests/bpf/test_verifier.c           | 629 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
 tools/testing/selftests/net/config                    |   1 +
 73 files changed, 1548 insertions(+), 492 deletions(-)
 create mode 100644 net/wireless/certs/sforshee.hex
 delete mode 100644 net/wireless/certs/sforshee.x509

^ permalink raw reply

* Re: RCU callback crashes
From: Jakub Kicinski @ 2017-12-21 21:45 UTC (permalink / raw)
  To: Cong Wang; +Cc: John Fastabend, Jiri Pirko, netdev@vger.kernel.org
In-Reply-To: <CAM_iQpV8+NknPgGbNDzF+=S8Px4rxO2=PMwV5BLDQJhX5CGmDQ@mail.gmail.com>

On Thu, 21 Dec 2017 13:31:01 -0800, Cong Wang wrote:
> >    629          if (likely(skb)) {
> >    630                  qdisc_qstats_cpu_backlog_dec(qdisc, skb);
> >    631                  qdisc_bstats_cpu_update(qdisc, skb);
> >    632                  qdisc_qstats_cpu_qlen_dec(qdisc);
> >    633          }
> >    634
> >    635          return skb;
> >    636  }  
> 
> Hi, Jakub
> 
> Could you test the attached patch? It looks like the __skb_array_empty()
> use is unsafe.

I don't have a reproducer, unfortunately, I haven't seen the splat
since :(  FWIW the kernel config was with all debug/checks disabled,
only KASAN on.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox