Netdev List

Netdev List
 help / color / mirror / Atom feed

* nf_reject_ipv4: module license 'unspecified' taints kernel
From: Dave Young @ 2014-10-10  9:19 UTC (permalink / raw)
  To: pablo; +Cc: davem, netdev, linux-kernel

Hi,

With today's linus tree, I got below kmsg:
[   23.545204] nf_reject_ipv4: module license 'unspecified' taints kernel.
[   23.551886] Disabling lock debugging due to kernel taint
[   23.557302] nf_reject_ipv4: Unknown symbol lock_is_held (err 0)
[   23.563256] nf_reject_ipv4: Unknown symbol rcu_read_lock_bh_held (err 0)
[   23.569992] nf_reject_ipv4: Unknown symbol ip_local_out_sk (err 0)
[   23.576206] nf_reject_ipv4: Unknown symbol debug_lockdep_rcu_enabled (err 0)
[   23.583290] nf_reject_ipv4: Unknown symbol rcu_is_watching (err 0)
[   23.589523] nf_reject_ipv4: Unknown symbol rcu_lock_map (err

It could be caused by below commit:

commit c8d7b98bec43faaa6583c3135030be5eb4693acb
Author: Pablo Neira Ayuso <pablo@netfilter.org>
Date:   Fri Sep 26 14:35:15 2014 +0200

    netfilter: move nf_send_resetX() code to nf_reject_ipvX modules

    Move nf_send_reset() and nf_send_reset6() to nf_reject_ipv4 and
    nf_reject_ipv6 respectively. This code is shared by x_tables and
    nf_tables.

    Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

I believe there's similar issue for ipv6 reject part though I have disabled ipv6
in my .config.

Let me know if you need any furthur information.

Thanks
Dave

^ permalink raw reply

* [PATCH v2] ixgbe: check adapter->vfinfo before dereference
From: Thierry Herbelot @ 2014-10-10  8:45 UTC (permalink / raw)
  To: Jeff Kirsher, Jesse Brandeburg, Bruce Allan, netdev; +Cc: Thierry Herbelot

this protects against the following panic:
(before a VF was actually created on p96p1 PF Ethernet port)
ip link set p96p1 vf 0 spoofchk off
BUG: unable to handle kernel NULL pointer dereference at 0000000000000052
IP: [<ffffffffa044a1c1>] ixgbe_ndo_set_vf_spoofchk+0x51/0x150 [ixgbe]

Signed-off-by: Thierry Herbelot <thierry.herbelot@6wind.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c |   73 +++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 706fc69..29279ad 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -316,7 +316,7 @@ static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
 	int entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK)
 		       >> IXGBE_VT_MSGINFO_SHIFT;
 	u16 *hash_list = (u16 *)&msgbuf[1];
-	struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
+	struct vf_data_storage *vfinfo;
 	struct ixgbe_hw *hw = &adapter->hw;
 	int i;
 	u32 vector_bit;
@@ -324,6 +324,11 @@ static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
 	u32 mta_reg;
 	u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
 
+	if (!adapter->vfinfo)
+		return -1;
+
+	vfinfo = &adapter->vfinfo[vf];
+
 	/* only so many hash values supported */
 	entries = min(entries, IXGBE_MAX_VF_MC_ENTRIES);
 
@@ -365,6 +370,9 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter)
 	u32 vector_reg;
 	u32 mta_reg;
 
+	if (!adapter->vfinfo)
+		return;
+
 	for (i = 0; i < adapter->num_vfs; i++) {
 		u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(i));
 		vfinfo = &adapter->vfinfo[i];
@@ -418,6 +426,9 @@ static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
 		u32 reg_offset, vf_shift, vfre;
 		s32 err = 0;
 
+		if (!adapter->vfinfo)
+			return -1;
+
 #ifdef CONFIG_FCOE
 		if (dev->features & NETIF_F_FCOE_MTU)
 			pf_max_frame = max_t(int, pf_max_frame,
@@ -507,6 +518,9 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
 	struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
 	u8 num_tcs = netdev_get_num_tc(adapter->netdev);
 
+	if (!adapter->vfinfo)
+		return;
+
 	/* add PF assigned VLAN or VLAN 0 */
 	ixgbe_set_vf_vlan(adapter, true, vfinfo->pf_vlan, vf);
 
@@ -543,6 +557,8 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
 static int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter,
 			    int vf, unsigned char *mac_addr)
 {
+	if (!adapter->vfinfo)
+		return -1;
 	ixgbe_del_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf);
 	memcpy(adapter->vfinfo[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
 	ixgbe_add_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf);
@@ -612,6 +628,9 @@ int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask)
 
 	bool enable = ((event_mask & 0x10000000U) != 0);
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	if (enable)
 		eth_zero_addr(adapter->vfinfo[vfn].vf_mac_addresses);
 
@@ -622,13 +641,18 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
 {
 	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
 	struct ixgbe_hw *hw = &adapter->hw;
-	unsigned char *vf_mac = adapter->vfinfo[vf].vf_mac_addresses;
+	unsigned char *vf_mac;
 	u32 reg, reg_offset, vf_shift;
 	u32 msgbuf[4] = {0, 0, 0, 0};
 	u8 *addr = (u8 *)(&msgbuf[1]);
 	u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
 	int i;
 
+	if (!adapter->vfinfo)
+		return -1;
+
+	vf_mac = adapter->vfinfo[vf].vf_mac_addresses;
+
 	e_info(probe, "VF Reset msg received from vf %d\n", vf);
 
 	/* reset the filters for the device */
@@ -723,6 +747,9 @@ static int ixgbe_set_vf_mac_addr(struct ixgbe_adapter *adapter,
 {
 	u8 *new_mac = ((u8 *)(&msgbuf[1]));
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	if (!is_valid_ether_addr(new_mac)) {
 		e_warn(drv, "VF %d attempted to set invalid mac\n", vf);
 		return -1;
@@ -775,6 +802,9 @@ static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter,
 	u32 bits;
 	u8 tcs = netdev_get_num_tc(adapter->netdev);
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	if (adapter->vfinfo[vf].pf_vlan || tcs) {
 		e_warn(drv,
 		       "VF %d attempted to override administratively set VLAN configuration\n"
@@ -841,6 +871,9 @@ static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter,
 		    IXGBE_VT_MSGINFO_SHIFT;
 	int err;
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	if (adapter->vfinfo[vf].pf_set_mac && index > 0) {
 		e_warn(drv,
 		       "VF %d requested MACVLAN filter but is administratively denied\n",
@@ -877,6 +910,9 @@ static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter,
 {
 	int api = msgbuf[1];
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	switch (api) {
 	case ixgbe_mbox_api_10:
 	case ixgbe_mbox_api_11:
@@ -899,6 +935,9 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter,
 	unsigned int default_tc = 0;
 	u8 num_tcs = netdev_get_num_tc(dev);
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	/* verify the PF is supporting the correct APIs */
 	switch (adapter->vfinfo[vf].vf_api) {
 	case ixgbe_mbox_api_20:
@@ -937,6 +976,9 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 	struct ixgbe_hw *hw = &adapter->hw;
 	s32 retval;
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	retval = ixgbe_read_mbx(hw, msgbuf, mbx_size, vf);
 
 	if (retval) {
@@ -1010,6 +1052,9 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 msg = IXGBE_VT_MSGTYPE_NACK;
 
+	if (!adapter->vfinfo)
+		return;
+
 	/* if device isn't clear to send it shouldn't be reading either */
 	if (!adapter->vfinfo[vf].clear_to_send)
 		ixgbe_write_mbx(hw, &msg, 1, vf);
@@ -1053,6 +1098,9 @@ void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter)
 	u32 ping;
 	int i;
 
+	if (!adapter->vfinfo)
+		return;
+
 	for (i = 0 ; i < adapter->num_vfs; i++) {
 		ping = IXGBE_PF_CONTROL_MSG;
 		if (adapter->vfinfo[i].clear_to_send)
@@ -1066,6 +1114,9 @@ int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	if (!is_valid_ether_addr(mac) || (vf >= adapter->num_vfs))
 		return -EINVAL;
+	if (!adapter->vfinfo)
+		return -1;
+
 	adapter->vfinfo[vf].pf_set_mac = true;
 	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
 	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
@@ -1085,6 +1136,9 @@ int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7))
 		return -EINVAL;
 	if (vlan || qos) {
@@ -1149,8 +1203,12 @@ static void ixgbe_set_vf_rate_limit(struct ixgbe_adapter *adapter, int vf)
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 bcnrc_val = 0;
 	u16 queue, queues_per_pool;
-	u16 tx_rate = adapter->vfinfo[vf].tx_rate;
+	u16 tx_rate;
 
+	if (!adapter->vfinfo)
+		return;
+
+	tx_rate = adapter->vfinfo[vf].tx_rate;
 	if (tx_rate) {
 		/* start with base link speed value */
 		bcnrc_val = adapter->vf_rate_link_speed;
@@ -1199,6 +1257,9 @@ void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter)
 {
 	int i;
 
+	if (!adapter->vfinfo)
+		return;
+
 	/* VF Tx rate limit was not set */
 	if (!adapter->vf_rate_link_speed)
 		return;
@@ -1261,6 +1322,9 @@ int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting)
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 regval;
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	adapter->vfinfo[vf].spoofchk_enabled = setting;
 
 	regval = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg));
@@ -1285,6 +1349,9 @@ int ixgbe_ndo_get_vf_config(struct net_device *netdev,
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	if (vf >= adapter->num_vfs)
 		return -EINVAL;
+	if (!adapter->vfinfo)
+		return -EINVAL;
+
 	ivi->vf = vf;
 	memcpy(&ivi->mac, adapter->vfinfo[vf].vf_mac_addresses, ETH_ALEN);
 	ivi->max_tx_rate = adapter->vfinfo[vf].tx_rate;
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH v2] ixgbe: check adapter->vfinfo before dereference
From: Thierry Herbelot @ 2014-10-10  8:53 UTC (permalink / raw)
  To: Jeff Kirsher; +Cc: Jesse Brandeburg, Bruce Allan, netdev
In-Reply-To: <1412931052.2427.33.camel@jtkirshe-mobl>

On 10/10/2014 10:50 AM, Jeff Kirsher wrote:
> On Fri, 2014-10-10 at 10:45 +0200, Thierry Herbelot wrote:
>> this protects against the following panic:
>> (before a VF was actually created on p96p1 PF Ethernet port)
>> ip link set p96p1 vf 0 spoofchk off
>> BUG: unable to handle kernel NULL pointer dereference at
>> 0000000000000052
>> IP: [<ffffffffa044a1c1>] ixgbe_ndo_set_vf_spoofchk+0x51/0x150 [ixgbe]
>>
>> Signed-off-by: Thierry Herbelot <thierry.herbelot@6wind.com>
>> ---
>>   drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c |   73
>> +++++++++++++++++++++++-
>>   1 file changed, 70 insertions(+), 3 deletions(-)
>
> Thanks for fixing that up Thierry.  I have added your patch to my queue.

Sorry for the miscompile : I wrote the patch back in August, and left it 
to rot while doing other things and I did not check it was really 
correct before sending it yesterday.

	Thierry

>


-- 
Thierry Herbelot
6WIND
Software Engineer

^ permalink raw reply

* Re: [PATCH v2] ixgbe: check adapter->vfinfo before dereference
From: Jeff Kirsher @ 2014-10-10  8:50 UTC (permalink / raw)
  To: Thierry Herbelot; +Cc: Jesse Brandeburg, Bruce Allan, netdev
In-Reply-To: <1412930732-892-1-git-send-email-thierry.herbelot@6wind.com>

[-- Attachment #1: Type: text/plain, Size: 652 bytes --]

On Fri, 2014-10-10 at 10:45 +0200, Thierry Herbelot wrote:
> this protects against the following panic:
> (before a VF was actually created on p96p1 PF Ethernet port)
> ip link set p96p1 vf 0 spoofchk off
> BUG: unable to handle kernel NULL pointer dereference at
> 0000000000000052
> IP: [<ffffffffa044a1c1>] ixgbe_ndo_set_vf_spoofchk+0x51/0x150 [ixgbe]
> 
> Signed-off-by: Thierry Herbelot <thierry.herbelot@6wind.com>
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c |   73
> +++++++++++++++++++++++-
>  1 file changed, 70 insertions(+), 3 deletions(-)

Thanks for fixing that up Thierry.  I have added your patch to my queue.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Mail,View Attach
From: Mr. Stephen Gianino @ 2014-10-09 19:45 UTC (permalink / raw)
  To: Recipients

[-- Attachment #1: Mail message body --]
[-- Type: text/plain, Size: 123 bytes --]



---
This email is free from viruses and malware because avast! Antivirus protection is active.
http://www.avast.com

[-- Attachment #2: Untitled.png --]
[-- Type: image/png, Size: 35094 bytes --]

^ permalink raw reply

* [PATCH 1/1] Checkpatch: coding style errors in Nvidia ethernet driver
From: Akshay Sarode @ 2014-10-10  8:01 UTC (permalink / raw)
  To: John Stultz, netdev, linux-kernel; +Cc: Akshay Sarode

ERROR: "foo* bar" should be "foo *bar"
ERROR: do not initialise statics to 0 or NULL
CHECK: spinlock_t definition without comment
Signed-off-by: Akshay Sarode <akshaysarode21@gmail.com>
---
 drivers/net/ethernet/nvidia/forcedeth.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index f39cae6..dd03d1a 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -748,7 +748,7 @@ struct nv_skb_map {
 
 /* in dev: base, irq */
 struct fe_priv {
-	spinlock_t lock;
+	spinlock_t lock; /* spinlock for SMA lock handling */
 
 	struct net_device *dev;
 	struct napi_struct napi;
@@ -893,6 +893,7 @@ enum {
 	NV_MSI_INT_DISABLED,
 	NV_MSI_INT_ENABLED
 };
+
 static int msi = NV_MSI_INT_ENABLED;
 
 /*
@@ -902,6 +903,7 @@ enum {
 	NV_MSIX_INT_DISABLED,
 	NV_MSIX_INT_ENABLED
 };
+
 static int msix = NV_MSIX_INT_ENABLED;
 
 /*
@@ -911,12 +913,18 @@ enum {
 	NV_DMA_64BIT_DISABLED,
 	NV_DMA_64BIT_ENABLED
 };
+
 static int dma_64bit = NV_DMA_64BIT_ENABLED;
 
 /*
  * Debug output control for tx_timeout
  */
-static bool debug_tx_timeout = false;
+enum {
+	NV_DEBUG_TX_TIMEOUT_DISABLED,
+	NV_DEBUG_TX_TIMEOUT_ENABLED
+};
+
+static bool debug_tx_timeout = NV_DEBUG_TX_TIMEOUT_DISABLED;
 
 /*
  * Crossover Detection
@@ -926,6 +934,7 @@ enum {
 	NV_CROSSOVER_DETECTION_DISABLED,
 	NV_CROSSOVER_DETECTION_ENABLED
 };
+
 static int phy_cross = NV_CROSSOVER_DETECTION_DISABLED;
 
 /*
@@ -4562,7 +4571,8 @@ static int nv_nway_reset(struct net_device *dev)
 	return ret;
 }
 
-static void nv_get_ringparam(struct net_device *dev, struct ethtool_ringparam* ring)
+static void nv_get_ringparam(struct net_device *dev,
+			     struct ethtool_ringparam *ring)
 {
 	struct fe_priv *np = netdev_priv(dev);
 
@@ -4573,7 +4583,8 @@ static void nv_get_ringparam(struct net_device *dev, struct ethtool_ringparam* r
 	ring->tx_pending = np->tx_ring_size;
 }
 
-static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ring)
+static int nv_set_ringparam(struct net_device *dev,
+			    struct ethtool_ringparam *ring)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
@@ -4685,7 +4696,8 @@ exit:
 	return -ENOMEM;
 }
 
-static void nv_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam* pause)
+static void nv_get_pauseparam(struct net_device *dev,
+			      struct ethtool_pauseparam *pause)
 {
 	struct fe_priv *np = netdev_priv(dev);
 
@@ -4694,7 +4706,8 @@ static void nv_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam*
 	pause->tx_pause = (np->pause_flags & NV_PAUSEFRAME_TX_ENABLE) != 0;
 }
 
-static int nv_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam* pause)
+static int nv_set_pauseparam(struct net_device *dev,
+			     struct ethtool_pauseparam *pause)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	int adv, bmcr;
-- 
1.9.1

^ permalink raw reply related

* [PATCH v2] net/phy: micrel: Add clock support for KSZ8021/KSZ8031
From: Sascha Hauer @ 2014-10-10  7:48 UTC (permalink / raw)
  To: Florian Fainelli; +Cc: netdev, linux-kernel, kernel, Sascha Hauer

The KSZ8021 and KSZ8031 support RMII reference input clocks of 25MHz
and 50MHz. Both PHYs differ in the default frequency they expect
after reset. If this differs from the actual input clock, then
register 0x1f bit 7 must be changed.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---

Changes since v1:

- Move clock handling to the probe callback
- Bail out with an error for invalid clock rates

 Documentation/devicetree/bindings/net/micrel.txt |  6 +++++
 drivers/net/phy/micrel.c                         | 31 ++++++++++++++++++++++--
 include/linux/micrel_phy.h                       |  1 +
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/micrel.txt b/Documentation/devicetree/bindings/net/micrel.txt
index 98a3e61..e1d99b9 100644
--- a/Documentation/devicetree/bindings/net/micrel.txt
+++ b/Documentation/devicetree/bindings/net/micrel.txt
@@ -16,3 +16,9 @@ Optional properties:
 	      KSZ8051: register 0x1f, bits 5..4
 
               See the respective PHY datasheet for the mode values.
+
+ - clocks, clock-names: contains clocks according to the common clock bindings.
+
+              supported clocks:
+	      - KSZ8021, KSZ8031: "rmii-ref": The RMII refence input clock. Used
+		to determine the XI input clock.
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 011dbda..492435f 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -26,6 +26,7 @@
 #include <linux/phy.h>
 #include <linux/micrel_phy.h>
 #include <linux/of.h>
+#include <linux/clk.h>
 
 /* Operation Mode Strap Override */
 #define MII_KSZPHY_OMSO				0x16
@@ -72,9 +73,12 @@ static int ksz_config_flags(struct phy_device *phydev)
 {
 	int regval;
 
-	if (phydev->dev_flags & MICREL_PHY_50MHZ_CLK) {
+	if (phydev->dev_flags & (MICREL_PHY_50MHZ_CLK | MICREL_PHY_25MHZ_CLK)) {
 		regval = phy_read(phydev, MII_KSZPHY_CTRL);
-		regval |= KSZ8051_RMII_50MHZ_CLK;
+		if (phydev->dev_flags & MICREL_PHY_50MHZ_CLK)
+			regval |= KSZ8051_RMII_50MHZ_CLK;
+		else
+			regval &= ~KSZ8051_RMII_50MHZ_CLK;
 		return phy_write(phydev, MII_KSZPHY_CTRL, regval);
 	}
 	return 0;
@@ -440,6 +444,27 @@ ksz9021_wr_mmd_phyreg(struct phy_device *phydev, int ptrad, int devnum,
 {
 }
 
+static int ksz8021_probe(struct phy_device *phydev)
+{
+	struct clk *clk;
+
+	clk = devm_clk_get(&phydev->dev, "rmii-ref");
+	if (!IS_ERR(clk)) {
+		unsigned long rate = clk_get_rate(clk);
+
+		if (rate > 24500000 && rate < 25500000) {
+			phydev->dev_flags |= MICREL_PHY_25MHZ_CLK;
+		} else if (rate > 49500000 && rate < 50500000) {
+			phydev->dev_flags |= MICREL_PHY_50MHZ_CLK;
+		} else {
+			dev_err(&phydev->dev, "Clock rate out of range: %ld\n", rate);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static struct phy_driver ksphy_driver[] = {
 {
 	.phy_id		= PHY_ID_KS8737,
@@ -462,6 +487,7 @@ static struct phy_driver ksphy_driver[] = {
 	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
 			   SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.probe		= ksz8021_probe,
 	.config_init	= ksz8021_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -477,6 +503,7 @@ static struct phy_driver ksphy_driver[] = {
 	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
 			   SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.probe		= ksz8021_probe,
 	.config_init	= ksz8021_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 2e5b194..53d33de 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -37,6 +37,7 @@
 
 /* struct phy_device dev_flags definitions */
 #define MICREL_PHY_50MHZ_CLK	0x00000001
+#define MICREL_PHY_25MHZ_CLK	0x00000002
 
 #define MICREL_KSZ9021_EXTREG_CTRL	0xB
 #define MICREL_KSZ9021_EXTREG_DATA_WRITE	0xC
-- 
2.1.1

^ permalink raw reply related

* Re: [PATCH] net/phy: micrel: Add clock support for KSZ8021/KSZ8031
From: Sascha Hauer @ 2014-10-10  7:44 UTC (permalink / raw)
  To: Florian Fainelli; +Cc: netdev, linux-kernel, kernel
In-Reply-To: <5436BF5B.5060006@gmail.com>

On Thu, Oct 09, 2014 at 10:01:15AM -0700, Florian Fainelli wrote:
> On 10/09/2014 05:32 AM, Sascha Hauer wrote:
> >  static int ksz8021_config_init(struct phy_device *phydev)
> >  {
> > +	struct clk *clk;
> >  	const u16 val = KSZPHY_OMSO_B_CAST_OFF | KSZPHY_OMSO_RMII_OVERRIDE;
> >  	int rc;
> >  
> > +	clk = devm_clk_get(&phydev->dev, "rmii-ref");
> > +	if (!IS_ERR(clk)) {
> > +		unsigned long rate = clk_get_rate(clk);
> > +
> > +		if (rate > 24500000 && rate < 25500000)
> > +			phydev->dev_flags |= MICREL_PHY_25MHZ_CLK;
> > +		else if (rate > 49500000 && rate < 50500000)
> > +			phydev->dev_flags |= MICREL_PHY_50MHZ_CLK;
> > +	}
> 
> I suppose that you could move this to the PHY driver probe() callback,
> and perform the rate checking from here, rejecting a clock whose rate is
> out of the acceptable range, and return an error to prevent the PHY
> driver registration? It is really up to you though.

Oh, it seems doing this in config_init is really wrong as it can be
called multiple times, even as a response to the SIOCSMIIREG ioctl.
We would request the clock each time then without ever releasing it.
I'll send an updated version with clock handling in probe().

Sascha

-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply

* Re: [PATCH v3 0/3] Enable FEC pps feather
From: Richard Cochran @ 2014-10-10  7:44 UTC (permalink / raw)
  To: Luwei Zhou
  Cc: davem, netdev, shawn.guo, bhutchings, R49496, b38611, b20596,
	stephen
In-Reply-To: <1412918130-18830-1-git-send-email-b45643@freescale.com>

On Fri, Oct 10, 2014 at 01:15:27PM +0800, Luwei Zhou wrote:
> Change from v2 to v3:
> 	-Using the default channel 0 to be PPS channel not PTP_PIN_SET/GETFUNC interface.
> 	-Using the linux definition of NSEC_PER_SEC.
> 
> Change from v1 to v2:
> 	- Fix the potential 32-bit multiplication overflow issue.
> 	- Optimize the hareware adjustment code to improve efficiency as Richard suggested
> 	- Use ptp PTP_PIN_SET/GETFUNC interface to set PPS channel not device tree
> 	and add PTP_PF_PPS enumeration
> 	- Modify comments style
> 
> 
> Luwei Zhou (3):
>   net: fec: ptp: Use the 31-bit ptp timer.
>   net: fec: ptp: Use hardware algorithm to adjust PTP counter.
>   net: fec: ptp: Enable PPS output based on ptp clock

Acked-by: Richard Cochran <richardcochran@gmail.com>

^ permalink raw reply

* Re: [PATCH] ipv6: honour non local bind
From: Michele Dionisio @ 2014-10-10  7:23 UTC (permalink / raw)
  To: Lukas Tribus; +Cc: netdev@vger.kernel.org, yoshfuji@linux-ipv6.org
In-Reply-To: <DUB123-W108DA950E52906D29ADEC4EDA00@phx.gbl>

I have tested the socket option and it is perfectly working but a lot
of software does not use it and the option is linux specific.

Now I have read the previous patch and all the email related. I see
the reason to reject the previous patch, but this one does not add any
new parameter that has to be mantained. It can be usefull to use the
ipv4 parameter also because it is already used if ipv6 is
IPV6_ADDR_MAPPED

regards,

and many thank's for you time to answer me.

2014-10-09 19:07 GMT+02:00 Lukas Tribus <luky-37@hotmail.com>:
> Hi Michele,
>
>
>> I see that net.ipv4.ip_nonlocal_bind = 1 does not works on ipv6 if
>> you try to bind on not local ip.
>> An easy test to see the issue using python is:
>>
>> set net.ipv4.ip_nonlocal_bind = 1 and in python shell try to execute
>>
>> import socket
>> s = socket.socket(family=socket.AF_INET6)
>> s.bind(('2a01:84a0:1001:A001:0000:0:0015:8', 1234))
>>
>> you receive an error. In ipv4 it works.
>>
>> The patch attached solve the issue
>
>
> This has been proposed in the past and refused:
> http://marc.info/?t=132369669000005&r=1&w=2
>
> You will have to set the available socket options from the application.
>
>
>
> Regards,
>
> Lukas
>
>
>

^ permalink raw reply

* [PATCH iproute2] tests: Allow to run tests recursively
From: Vadim Kochan @ 2014-10-10  6:27 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan

Such approach allows to run *.t scripts from any
tests/ subdirectories.

One point is that tests from tests/cls/*.t (which are needed
by tests/cls-testbed.t but does not exist yet) will also
be ran aside with tests/cls-testbed.t which is not good
because in such case they will be ran twice, so renamed these
tests path to tests/cls/*.c in tests/cls-testbed.t

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
---
 testsuite/Makefile            | 11 ++++++++++-
 testsuite/tests/cls-testbed.t |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/testsuite/Makefile b/testsuite/Makefile
index d1bf359..2ba9547 100644
--- a/testsuite/Makefile
+++ b/testsuite/Makefile
@@ -4,7 +4,11 @@ PREFIX := sudo -E
 RESULTS_DIR := results
 ## -- End Config --
 
-TESTS := $(patsubst tests/%,%,$(wildcard tests/*.t))
+rwildcard=$(wildcard $1$2) $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2))
+
+TESTS := $(patsubst tests/%,%,$(call rwildcard,tests/,*.t))
+TESTS_DIR := $(dir $(TESTS))
+
 IPVERS := $(filter-out iproute2/Makefile,$(wildcard iproute2/*))
 
 ifneq (,$(wildcard /proc/config.gz))
@@ -34,6 +38,11 @@ distclean: clean
 
 $(TESTS):
 	@mkdir -p $(RESULTS_DIR)
+	
+	@for d in $(TESTS_DIR); do \
+	    mkdir -p $(RESULTS_DIR)/$$d; \
+	done
+	
 	@for i in $(IPVERS); do \
 		o=`echo $$i | sed -e 's/iproute2\///'`; \
 		echo -n "Running $@ [$$o/`uname -r`]: "; \
diff --git a/testsuite/tests/cls-testbed.t b/testsuite/tests/cls-testbed.t
index efae2a5..9fe86f5 100755
--- a/testsuite/tests/cls-testbed.t
+++ b/testsuite/tests/cls-testbed.t
@@ -8,7 +8,7 @@ QDISCS="cbq htb dsmark"
 for q in ${QDISCS}; do
 	ts_log "Preparing classifier testbed with qdisc $q"
 
-	for c in tests/cls/*.t; do
+	for c in tests/cls/*.c; do
 
 		case "$q" in
 		cbq)
-- 
2.1.0

^ permalink raw reply related

* [PATCH v3 1/3] net: fec: ptp: Use the 31-bit ptp timer.
From: Luwei Zhou @ 2014-10-10  5:15 UTC (permalink / raw)
  To: davem, richardcochran
  Cc: netdev, shawn.guo, bhutchings, R49496, b38611, b20596, stephen
In-Reply-To: <1412918130-18830-1-git-send-email-b45643@freescale.com>

When ptp switches from software adjustment to hardware ajustment, linux ptp can't converge.
It is caused by the IP limit. Hardware adjustment logcial have issue when ptp counter
runs over 0x80000000(31 bit counter). The internal IP reference manual already remove 32bit
free-running count support. This patch replace the 32-bit PTP timer with 31-bit.

Signed-off-by: Luwei Zhou <b45643@freescale.com>
Signed-off-by: Frank Li <Frank.Li@freescale.com>
---
 drivers/net/ethernet/freescale/fec_ptp.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index cca3617..8016bdd 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -70,6 +70,7 @@
 #define FEC_TS_TIMESTAMP	0x418
 
 #define FEC_CC_MULT	(1 << 31)
+#define FEC_COUNTER_PERIOD	(1 << 31)
 /**
  * fec_ptp_read - read raw cycle counter (to be used by time counter)
  * @cc: the cyclecounter structure
@@ -113,14 +114,15 @@ void fec_ptp_start_cyclecounter(struct net_device *ndev)
 	/* 1ns counter */
 	writel(inc << FEC_T_INC_OFFSET, fep->hwp + FEC_ATIME_INC);
 
-	/* use free running count */
-	writel(0, fep->hwp + FEC_ATIME_EVT_PERIOD);
+	/* use 31-bit timer counter */
+	writel(FEC_COUNTER_PERIOD, fep->hwp + FEC_ATIME_EVT_PERIOD);
 
-	writel(FEC_T_CTRL_ENABLE, fep->hwp + FEC_ATIME_CTRL);
+	writel(FEC_T_CTRL_ENABLE | FEC_T_CTRL_PERIOD_RST,
+		fep->hwp + FEC_ATIME_CTRL);
 
 	memset(&fep->cc, 0, sizeof(fep->cc));
 	fep->cc.read = fec_ptp_read;
-	fep->cc.mask = CLOCKSOURCE_MASK(32);
+	fep->cc.mask = CLOCKSOURCE_MASK(31);
 	fep->cc.shift = 31;
 	fep->cc.mult = FEC_CC_MULT;
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH v3 3/3] net: fec: ptp: Enable PPS output based on ptp clock
From: Luwei Zhou @ 2014-10-10  5:15 UTC (permalink / raw)
  To: davem, richardcochran
  Cc: netdev, shawn.guo, bhutchings, R49496, b38611, b20596, stephen
In-Reply-To: <1412918130-18830-1-git-send-email-b45643@freescale.com>

FEC ptp timer has 4 channel compare/trigger function. It can be used to
enable pps output.
The pulse would be ouput high exactly on N second. The pulse ouput high
on compare event mode is used to produce pulse per second.  The pulse
width would be one cycle based on ptp timer clock source.Since 31-bit
ptp hardware timer is used, the timer will wrap more than 2 seconds. We
need to reload the compare compare event about every 1 second.

Signed-off-by: Luwei Zhou <b45643@freescale.com>
---
 drivers/net/ethernet/freescale/fec.h      |   7 ++
 drivers/net/ethernet/freescale/fec_main.c |   2 +
 drivers/net/ethernet/freescale/fec_ptp.c  | 197 +++++++++++++++++++++++++++++-
 3 files changed, 205 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index b0e6025..1e65917 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -487,12 +487,19 @@ struct fec_enet_private {
 
 	/* ptp clock period in ns*/
 	unsigned int ptp_inc;
+
+	/* pps  */
+	int pps_channel;
+	unsigned int reload_period;
+	int pps_enable;
+	unsigned int next_counter;
 };
 
 void fec_ptp_init(struct platform_device *pdev);
 void fec_ptp_start_cyclecounter(struct net_device *ndev);
 int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr);
 int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr);
+uint fec_ptp_check_pps_event(struct fec_enet_private *fep);
 
 /****************************************************************************/
 #endif /* FEC_H */
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 87975b5..0167601 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1622,6 +1622,8 @@ fec_enet_interrupt(int irq, void *dev_id)
 		complete(&fep->mdio_done);
 	}
 
+	fec_ptp_check_pps_event(fep);
+
 	return ret;
 }
 
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index f5ee460..0fdcdc9 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -61,6 +61,24 @@
 #define FEC_T_INC_CORR_MASK             0x00007f00
 #define FEC_T_INC_CORR_OFFSET           8
 
+#define FEC_T_CTRL_PINPER		0x00000080
+#define FEC_T_TF0_MASK			0x00000001
+#define FEC_T_TF0_OFFSET		0
+#define FEC_T_TF1_MASK			0x00000002
+#define FEC_T_TF1_OFFSET		1
+#define FEC_T_TF2_MASK			0x00000004
+#define FEC_T_TF2_OFFSET		2
+#define FEC_T_TF3_MASK			0x00000008
+#define FEC_T_TF3_OFFSET		3
+#define FEC_T_TDRE_MASK			0x00000001
+#define FEC_T_TDRE_OFFSET		0
+#define FEC_T_TMODE_MASK		0x0000003C
+#define FEC_T_TMODE_OFFSET		2
+#define FEC_T_TIE_MASK			0x00000040
+#define FEC_T_TIE_OFFSET		6
+#define FEC_T_TF_MASK			0x00000080
+#define FEC_T_TF_OFFSET			7
+
 #define FEC_ATIME_CTRL		0x400
 #define FEC_ATIME		0x404
 #define FEC_ATIME_EVT_OFFSET	0x408
@@ -69,8 +87,143 @@
 #define FEC_ATIME_INC		0x414
 #define FEC_TS_TIMESTAMP	0x418
 
+#define FEC_TGSR		0x604
+#define FEC_TCSR(n)		(0x608 + n * 0x08)
+#define FEC_TCCR(n)		(0x60C + n * 0x08)
+#define MAX_TIMER_CHANNEL	3
+#define FEC_TMODE_TOGGLE	0x05
+#define FEC_HIGH_PULSE		0x0F
+
 #define FEC_CC_MULT	(1 << 31)
 #define FEC_COUNTER_PERIOD	(1 << 31)
+#define PPS_OUPUT_RELOAD_PERIOD	NSEC_PER_SEC
+#define FEC_CHANNLE_0		0
+#define DEFAULT_PPS_CHANNEL	FEC_CHANNLE_0
+
+/**
+ * fec_ptp_enable_pps
+ * @fep: the fec_enet_private structure handle
+ * @enable: enable the channel pps output
+ *
+ * This function enble the PPS ouput on the timer channel.
+ */
+static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable)
+{
+	unsigned long flags;
+	u32 val, tempval;
+	int inc;
+	struct timespec ts;
+	u64 ns;
+	u32 remainder;
+	val = 0;
+
+	if (!(fep->hwts_tx_en || fep->hwts_rx_en)) {
+		dev_err(&fep->pdev->dev, "No ptp stack is running\n");
+		return -EINVAL;
+	}
+
+	if (fep->pps_enable == enable)
+		return 0;
+
+	fep->pps_channel = DEFAULT_PPS_CHANNEL;
+	fep->reload_period = PPS_OUPUT_RELOAD_PERIOD;
+	inc = fep->ptp_inc;
+
+	spin_lock_irqsave(&fep->tmreg_lock, flags);
+
+	if (enable) {
+		/* clear capture or output compare interrupt status if have.
+		 */
+		writel(FEC_T_TF_MASK, fep->hwp + FEC_TCSR(fep->pps_channel));
+
+		/* It is recommended to doulbe check the TMODE field in the
+		 * TCSR register to be cleared before the first compare counter
+		 * is written into TCCR register. Just add a double check.
+		 */
+		val = readl(fep->hwp + FEC_TCSR(fep->pps_channel));
+		do {
+			val &= ~(FEC_T_TMODE_MASK);
+			writel(val, fep->hwp + FEC_TCSR(fep->pps_channel));
+			val = readl(fep->hwp + FEC_TCSR(fep->pps_channel));
+		} while (val & FEC_T_TMODE_MASK);
+
+		/* Dummy read counter to update the counter */
+		timecounter_read(&fep->tc);
+		/* We want to find the first compare event in the next
+		 * second point. So we need to know what the ptp time
+		 * is now and how many nanoseconds is ahead to get next second.
+		 * The remaining nanosecond ahead before the next second would be
+		 * NSEC_PER_SEC - ts.tv_nsec. Add the remaining nanoseconds
+		 * to current timer would be next second.
+		 */
+		tempval = readl(fep->hwp + FEC_ATIME_CTRL);
+		tempval |= FEC_T_CTRL_CAPTURE;
+		writel(tempval, fep->hwp + FEC_ATIME_CTRL);
+
+		tempval = readl(fep->hwp + FEC_ATIME);
+		/* Convert the ptp local counter to 1588 timestamp */
+		ns = timecounter_cyc2time(&fep->tc, tempval);
+		ts.tv_sec = div_u64_rem(ns, 1000000000ULL, &remainder);
+		ts.tv_nsec = remainder;
+
+		/* The tempval is  less than 3 seconds, and  so val is less than
+		 * 4 seconds. No overflow for 32bit calculation.
+		 */
+		val = NSEC_PER_SEC - (u32)ts.tv_nsec + tempval;
+
+		/* Need to consider the situation that the current time is
+		 * very close to the second point, which means NSEC_PER_SEC
+		 * - ts.tv_nsec is close to be zero(For example 20ns); Since the timer
+		 * is still running when we calculate the first compare event, it is
+		 * possible that the remaining nanoseonds run out before the compare
+		 * counter is calculated and written into TCCR register. To avoid
+		 * this possibility, we will set the compare event to be the next
+		 * of next second. The current setting is 31-bit timer and wrap
+		 * around over 2 seconds. So it is okay to set the next of next
+		 * seond for the timer.
+		 */
+		val += NSEC_PER_SEC;
+
+		/* We add (2 * NSEC_PER_SEC - (u32)ts.tv_nsec) to current
+		 * ptp counter, which maybe cause 32-bit wrap. Since the
+		 * (NSEC_PER_SEC - (u32)ts.tv_nsec) is less than 2 second.
+		 * We can ensure the wrap will not cause issue. If the offset
+		 * is bigger than fep->cc.mask would be a error.
+		 */
+		val &= fep->cc.mask;
+		writel(val, fep->hwp + FEC_TCCR(fep->pps_channel));
+
+		/* Calculate the second the compare event timestamp */
+		fep->next_counter = (val + fep->reload_period) & fep->cc.mask;
+
+		/* * Enable compare event when overflow */
+		val = readl(fep->hwp + FEC_ATIME_CTRL);
+		val |= FEC_T_CTRL_PINPER;
+		writel(val, fep->hwp + FEC_ATIME_CTRL);
+
+		/* Compare channel setting. */
+		val = readl(fep->hwp + FEC_TCSR(fep->pps_channel));
+		val |= (1 << FEC_T_TF_OFFSET | 1 << FEC_T_TIE_OFFSET);
+		val &= ~(1 << FEC_T_TDRE_OFFSET);
+		val &= ~(FEC_T_TMODE_MASK);
+		val |= (FEC_HIGH_PULSE << FEC_T_TMODE_OFFSET);
+		writel(val, fep->hwp + FEC_TCSR(fep->pps_channel));
+
+		/* Write the second compare event timestamp and calculate
+		 * the third timestamp. Refer the TCCR register detail in the spec.
+		 */
+		writel(fep->next_counter, fep->hwp + FEC_TCCR(fep->pps_channel));
+		fep->next_counter = (fep->next_counter + fep->reload_period) & fep->cc.mask;
+	} else {
+		writel(0, fep->hwp + FEC_TCSR(fep->pps_channel));
+	}
+
+	fep->pps_enable = enable;
+	spin_unlock_irqrestore(&fep->tmreg_lock, flags);
+
+	return 0;
+}
+
 /**
  * fec_ptp_read - read raw cycle counter (to be used by time counter)
  * @cc: the cyclecounter structure
@@ -314,6 +467,15 @@ static int fec_ptp_settime(struct ptp_clock_info *ptp,
 static int fec_ptp_enable(struct ptp_clock_info *ptp,
 			  struct ptp_clock_request *rq, int on)
 {
+	struct fec_enet_private *fep =
+	    container_of(ptp, struct fec_enet_private, ptp_caps);
+	int ret = 0;
+
+	if (rq->type == PTP_CLK_REQ_PPS) {
+		ret = fec_ptp_enable_pps(fep, on);
+
+		return ret;
+	}
 	return -EOPNOTSUPP;
 }
 
@@ -428,7 +590,7 @@ void fec_ptp_init(struct platform_device *pdev)
 	fep->ptp_caps.n_ext_ts = 0;
 	fep->ptp_caps.n_per_out = 0;
 	fep->ptp_caps.n_pins = 0;
-	fep->ptp_caps.pps = 0;
+	fep->ptp_caps.pps = 1;
 	fep->ptp_caps.adjfreq = fec_ptp_adjfreq;
 	fep->ptp_caps.adjtime = fec_ptp_adjtime;
 	fep->ptp_caps.gettime = fec_ptp_gettime;
@@ -452,3 +614,36 @@ void fec_ptp_init(struct platform_device *pdev)
 
 	schedule_delayed_work(&fep->time_keep, HZ);
 }
+
+/**
+ * fec_ptp_check_pps_event
+ * @fep: the fec_enet_private structure handle
+ *
+ * This function check the pps event and reload the timer compare counter.
+ */
+uint fec_ptp_check_pps_event(struct fec_enet_private *fep)
+{
+	u32 val;
+	u8 channel = fep->pps_channel;
+	struct ptp_clock_event event;
+
+	val = readl(fep->hwp + FEC_TCSR(channel));
+	if (val & FEC_T_TF_MASK) {
+		/* Write the next next compare(not the next according the spec)
+		 * value to the register
+		 */
+		writel(fep->next_counter, fep->hwp + FEC_TCCR(channel));
+		do {
+			writel(val, fep->hwp + FEC_TCSR(channel));
+		} while (readl(fep->hwp + FEC_TCSR(channel)) & FEC_T_TF_MASK);
+
+		/* Update the counter; */
+		fep->next_counter = (fep->next_counter + fep->reload_period) & fep->cc.mask;
+
+		event.type = PTP_CLOCK_PPS;
+		ptp_clock_event(fep->ptp_clock, &event);
+		return 1;
+	}
+
+	return 0;
+}
-- 
1.9.1

^ permalink raw reply related

* [PATCH v3 0/3] Enable FEC pps feather
From: Luwei Zhou @ 2014-10-10  5:15 UTC (permalink / raw)
  To: davem, richardcochran
  Cc: netdev, shawn.guo, bhutchings, R49496, b38611, b20596, stephen

Change from v2 to v3:
	-Using the default channel 0 to be PPS channel not PTP_PIN_SET/GETFUNC interface.
	-Using the linux definition of NSEC_PER_SEC.

Change from v1 to v2:
	- Fix the potential 32-bit multiplication overflow issue.
	- Optimize the hareware adjustment code to improve efficiency as Richard suggested
	- Use ptp PTP_PIN_SET/GETFUNC interface to set PPS channel not device tree
	and add PTP_PF_PPS enumeration
	- Modify comments style


Luwei Zhou (3):
  net: fec: ptp: Use the 31-bit ptp timer.
  net: fec: ptp: Use hardware algorithm to adjust PTP counter.
  net: fec: ptp: Enable PPS output based on ptp clock

 drivers/net/ethernet/freescale/fec.h      |  10 ++
 drivers/net/ethernet/freescale/fec_main.c |   2 +
 drivers/net/ethernet/freescale/fec_ptp.c  | 272 ++++++++++++++++++++++++++++--
 3 files changed, 267 insertions(+), 17 deletions(-)

-- 
1.9.1

^ permalink raw reply

* [PATCH v3 2/3] net: fec: ptp: Use hardware algorithm to adjust PTP counter.
From: Luwei Zhou @ 2014-10-10  5:15 UTC (permalink / raw)
  To: davem, richardcochran
  Cc: netdev, shawn.guo, bhutchings, R49496, b38611, b20596, stephen
In-Reply-To: <1412918130-18830-1-git-send-email-b45643@freescale.com>

The FEC IP supports hardware adjustment for ptp timer. Refer to the description of
ENET_ATCOR and ENET_ATINC registers in the spec about the hardware adjustment. This
patch uses hardware support to adjust the ptp offset and frequency on the slave side.

Signed-off-by: Luwei Zhou <b45643@freescale.com>
Signed-off-by: Frank Li <Frank.Li@freescale.com>
Signed-off-by: Fugang Duan <b38611@freescale.com>
---
 drivers/net/ethernet/freescale/fec.h     |  3 ++
 drivers/net/ethernet/freescale/fec_ptp.c | 65 ++++++++++++++++++++++++++------
 2 files changed, 56 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 1d5e182..b0e6025 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -484,6 +484,9 @@ struct fec_enet_private {
 	unsigned int itr_clk_rate;
 
 	u32 rx_copybreak;
+
+	/* ptp clock period in ns*/
+	unsigned int ptp_inc;
 };
 
 void fec_ptp_init(struct platform_device *pdev);
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index 8016bdd..f5ee460 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -145,32 +145,59 @@ void fec_ptp_start_cyclecounter(struct net_device *ndev)
  */
 static int fec_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 {
-	u64 diff;
 	unsigned long flags;
 	int neg_adj = 0;
-	u32 mult = FEC_CC_MULT;
+	u32 i, tmp;
+	u32 corr_inc, corr_period;
+	u32 corr_ns;
+	u64 lhs, rhs;
 
 	struct fec_enet_private *fep =
 	    container_of(ptp, struct fec_enet_private, ptp_caps);
 
+	if (ppb == 0)
+		return 0;
+
 	if (ppb < 0) {
 		ppb = -ppb;
 		neg_adj = 1;
 	}
 
-	diff = mult;
-	diff *= ppb;
-	diff = div_u64(diff, 1000000000ULL);
+	/* In theory, corr_inc/corr_period = ppb/NSEC_PER_SEC;
+	 * Try to find the corr_inc  between 1 to fep->ptp_inc to
+	 * meet adjustment requirement.
+	 */
+	lhs = NSEC_PER_SEC;
+	rhs = (u64)ppb * (u64)fep->ptp_inc;
+	for (i = 1; i <= fep->ptp_inc; i++) {
+		if (lhs >= rhs) {
+			corr_inc = i;
+			corr_period = div_u64(lhs, rhs);
+			break;
+		}
+		lhs += NSEC_PER_SEC;
+	}
+	/* Not found? Set it to high value - double speed
+	 * correct in every clock step.
+	 */
+	if (i > fep->ptp_inc) {
+		corr_inc = fep->ptp_inc;
+		corr_period = 1;
+	}
+
+	if (neg_adj)
+		corr_ns = fep->ptp_inc - corr_inc;
+	else
+		corr_ns = fep->ptp_inc + corr_inc;
 
 	spin_lock_irqsave(&fep->tmreg_lock, flags);
-	/*
-	 * dummy read to set cycle_last in tc to now.
-	 * So use adjusted mult to calculate when next call
-	 * timercounter_read.
-	 */
-	timecounter_read(&fep->tc);
 
-	fep->cc.mult = neg_adj ? mult - diff : mult + diff;
+	tmp = readl(fep->hwp + FEC_ATIME_INC) & FEC_T_INC_MASK;
+	tmp |= corr_ns << FEC_T_INC_CORR_OFFSET;
+	writel(tmp, fep->hwp + FEC_ATIME_INC);
+	writel(corr_period, fep->hwp + FEC_ATIME_CORR);
+	/* dummy read to update the timer. */
+	timecounter_read(&fep->tc);
 
 	spin_unlock_irqrestore(&fep->tmreg_lock, flags);
 
@@ -190,12 +217,19 @@ static int fec_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 	    container_of(ptp, struct fec_enet_private, ptp_caps);
 	unsigned long flags;
 	u64 now;
+	u32 counter;
 
 	spin_lock_irqsave(&fep->tmreg_lock, flags);
 
 	now = timecounter_read(&fep->tc);
 	now += delta;
 
+	/* Get the timer value based on adjusted timestamp.
+	 * Update the counter with the masked value.
+	 */
+	counter = now & fep->cc.mask;
+	writel(counter, fep->hwp + FEC_ATIME);
+
 	/* reset the timecounter */
 	timecounter_init(&fep->tc, &fep->cc, now);
 
@@ -246,6 +280,7 @@ static int fec_ptp_settime(struct ptp_clock_info *ptp,
 
 	u64 ns;
 	unsigned long flags;
+	u32 counter;
 
 	mutex_lock(&fep->ptp_clk_mutex);
 	/* Check the ptp clock */
@@ -256,8 +291,13 @@ static int fec_ptp_settime(struct ptp_clock_info *ptp,
 
 	ns = ts->tv_sec * 1000000000ULL;
 	ns += ts->tv_nsec;
+	/* Get the timer value based on timestamp.
+	 * Update the counter with the masked value.
+	 */
+	counter = ns & fep->cc.mask;
 
 	spin_lock_irqsave(&fep->tmreg_lock, flags);
+	writel(counter, fep->hwp + FEC_ATIME);
 	timecounter_init(&fep->tc, &fep->cc, ns);
 	spin_unlock_irqrestore(&fep->tmreg_lock, flags);
 	mutex_unlock(&fep->ptp_clk_mutex);
@@ -396,6 +436,7 @@ void fec_ptp_init(struct platform_device *pdev)
 	fep->ptp_caps.enable = fec_ptp_enable;
 
 	fep->cycle_speed = clk_get_rate(fep->clk_ptp);
+	fep->ptp_inc = NSEC_PER_SEC / fep->cycle_speed;
 
 	spin_lock_init(&fep->tmreg_lock);
 
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH][net-next][V2] net: filter: fix the comments
From: Alexei Starovoitov @ 2014-10-10  6:01 UTC (permalink / raw)
  To: roy.qing.li, Daniel Borkmann; +Cc: Network Development, Alexei Starovoitov
In-Reply-To: <1412920611-2094-1-git-send-email-roy.qing.li@gmail.com>

On Thu, Oct 9, 2014 at 10:56 PM,  <roy.qing.li@gmail.com> wrote:
> From: Li RongQing <roy.qing.li@gmail.com>
>
> 1. sk_run_filter has been renamed, sk_filter() is using SK_RUN_FILTER.
> 2. Remove wrong comments about storing intermediate value.
> 3. replace sk_run_filter with __bpf_prog_run for check_load_and_stores's
> comments
>
> Cc: Alexei Starovoitov <ast@plumgrid.com>
> Signed-off-by: Li RongQing <roy.qing.li@gmail.com>

Acked-by: Alexei Starovoitov <ast@plumgrid.com>

Thanks!

> ---
>  net/core/filter.c |    9 +++------
>  1 file changed, 3 insertions(+), 6 deletions(-)
>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index fcd3f67..647b122 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -51,9 +51,9 @@
>   *     @skb: buffer to filter
>   *
>   * Run the filter code and then cut skb->data to correct size returned by
> - * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
> + * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
>   * than pkt_len we keep whole skb->data. This is the socket level
> - * wrapper to sk_run_filter. It returns 0 if the packet should
> + * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
>   * be accepted or -EPERM if the packet should be tossed.
>   *
>   */
> @@ -566,11 +566,8 @@ err:
>
>  /* Security:
>   *
> - * A BPF program is able to use 16 cells of memory to store intermediate
> - * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
> - *
>   * As we dont want to clear mem[] array for each packet going through
> - * sk_run_filter(), we check that filter loaded by user never try to read
> + * __bpf_prog_run(), we check that filter loaded by user never try to read
>   * a cell if not previously written, and we check all branches to be sure
>   * a malicious user doesn't try to abuse us.
>   */
> --
> 1.7.10.4
>

^ permalink raw reply

* Re: [PATCH net 1/3] net: bcmgenet: fix off-by-one in incrementing read pointer
From: Petri Gynther @ 2014-10-10  6:01 UTC (permalink / raw)
  To: Florian Fainelli; +Cc: netdev, David Miller, jaedon.shin
In-Reply-To: <1412903197-19193-2-git-send-email-f.fainelli@gmail.com>

Hi Florian,

On Thu, Oct 9, 2014 at 6:06 PM, Florian Fainelli <f.fainelli@gmail.com> wrote:
> Commit b629be5c8399d7c423b92135eb43a86c924d1cbc ("net: bcmgenet: check
> harder for out of memory conditions") moved the increment of the local
> read pointer *before* reading from the hardware descriptor using
> dmadesc_get_length_status(), which creates an off-by-one situation.
>
> Fix this by moving again the read_ptr increment after we have read the
> hardware descriptor to get both the control block and the read pointer
> back in sync.
>
> Fixes: b629be5c8399 ("net: bcmgenet: check harder for out of memory conditions")
> Reported-by: Jaedon Shin <jaedon.shin@gmail.com>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> ---
>  drivers/net/ethernet/broadcom/genet/bcmgenet.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
> index fff2634b6f34..f1bcebcbba80 100644
> --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
> +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
> @@ -1287,9 +1287,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv,
>
>                 rxpktprocessed++;
>
> -               priv->rx_read_ptr++;
> -               priv->rx_read_ptr &= (priv->num_rx_bds - 1);
> -

Wouldn't it be better to move the three lines:
rxpktprocessed++;
priv->rx_read_ptr++;
priv->rx_read_ptr &= (priv->num_rx_bds - 1)

as the last lines of the while-loop, after the CB refill?

-- Petri


>                 /* We do not have a backing SKB, so we do not have a
>                  * corresponding DMA mapping for this incoming packet since
>                  * bcmgenet_rx_refill always either has both skb and mapping or
> @@ -1332,6 +1329,9 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv,
>                           __func__, p_index, priv->rx_c_index,
>                           priv->rx_read_ptr, dma_length_status);
>
> +               priv->rx_read_ptr++;
> +               priv->rx_read_ptr &= (priv->num_rx_bds - 1);
> +
>                 if (unlikely(!(dma_flag & DMA_EOP) || !(dma_flag & DMA_SOP))) {
>                         netif_err(priv, rx_status, dev,
>                                   "dropping fragmented packet!\n");
> --
> 1.9.1
>

^ permalink raw reply

* [PATCH][net-next][V2] net: filter: fix the comments
From: roy.qing.li @ 2014-10-10  5:56 UTC (permalink / raw)
  To: netdev; +Cc: ast, alexei.starovoitov

From: Li RongQing <roy.qing.li@gmail.com>

1. sk_run_filter has been renamed, sk_filter() is using SK_RUN_FILTER.
2. Remove wrong comments about storing intermediate value.
3. replace sk_run_filter with __bpf_prog_run for check_load_and_stores's
comments

Cc: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
---
 net/core/filter.c |    9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index fcd3f67..647b122 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -51,9 +51,9 @@
  *	@skb: buffer to filter
  *
  * Run the filter code and then cut skb->data to correct size returned by
- * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
  * than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to sk_run_filter. It returns 0 if the packet should
+ * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
  * be accepted or -EPERM if the packet should be tossed.
  *
  */
@@ -566,11 +566,8 @@ err:
 
 /* Security:
  *
- * A BPF program is able to use 16 cells of memory to store intermediate
- * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
- *
  * As we dont want to clear mem[] array for each packet going through
- * sk_run_filter(), we check that filter loaded by user never try to read
+ * __bpf_prog_run(), we check that filter loaded by user never try to read
  * a cell if not previously written, and we check all branches to be sure
  * a malicious user doesn't try to abuse us.
  */
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH net 2/5] igb: fix race accessing page->_count
From: Jeff Kirsher @ 2014-10-10  5:55 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, netdev, Alexander Duyck, Andres Lagar-Cavilla,
	Greg Thelen, Hugh Dickins, David Rientjes
In-Reply-To: <1412918694-22882-3-git-send-email-edumazet@google.com>

On Thu, Oct 9, 2014 at 10:24 PM, Eric Dumazet <edumazet@google.com> wrote:
> This is illegal to use atomic_set(&page->_count, 2) even if we 'own'
> the page. Other entities in the kernel need to use get_page_unless_zero()
> to get a reference to the page before testing page properties, so we could
> loose a refcount increment.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

Since this is apart of a series, if the changes to skbuff are ok, then
the changes to the Intel drivers are ok.

> ---
>  drivers/net/ethernet/intel/igb/igb_main.c | 7 +++----
>  1 file changed, 3 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
> index ae59c0b108c5..a21b14495ebd 100644
> --- a/drivers/net/ethernet/intel/igb/igb_main.c
> +++ b/drivers/net/ethernet/intel/igb/igb_main.c
> @@ -6545,11 +6545,10 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
>         /* flip page offset to other buffer */
>         rx_buffer->page_offset ^= IGB_RX_BUFSZ;
>
> -       /* since we are the only owner of the page and we need to
> -        * increment it, just set the value to 2 in order to avoid
> -        * an unnecessary locked operation
> +       /* Even if we own the page, we are not allowed to use atomic_set()
> +        * This would break get_page_unless_zero() users.
>          */
> -       atomic_set(&page->_count, 2);
> +       atomic_inc(&page->_count);
>  #else
>         /* move offset up to the next cache line */
>         rx_buffer->page_offset += truesize;
> --
> 2.1.0.rc2.206.gedb03e5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Cheers,
Jeff

^ permalink raw reply

* Re: [PATCH net 3/5] igb: fix race accessing page->_count
From: Jeff Kirsher @ 2014-10-10  5:54 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, netdev, Alexander Duyck, Andres Lagar-Cavilla,
	Greg Thelen, Hugh Dickins, David Rientjes
In-Reply-To: <1412918694-22882-4-git-send-email-edumazet@google.com>

On Thu, Oct 9, 2014 at 10:24 PM, Eric Dumazet <edumazet@google.com> wrote:
> This is illegal to use atomic_set(&page->_count, 2) even if we 'own'
> the page. Other entities in the kernel need to use get_page_unless_zero()
> to get a reference to the page before testing page properties, so we could
> loose a refcount increment.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Change the title to :ixgbe: ...", then you have my ACK.
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

Since this is apart of a series, if the changes to skbuff are ok, then
the changes to the Intel drivers are ok.

> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 8 +++-----
>  1 file changed, 3 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> index d677b5a23b58..fec5212d4337 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> @@ -1865,12 +1865,10 @@ static bool ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring,
>         /* flip page offset to other buffer */
>         rx_buffer->page_offset ^= truesize;
>
> -       /*
> -        * since we are the only owner of the page and we need to
> -        * increment it, just set the value to 2 in order to avoid
> -        * an unecessary locked operation
> +       /* Even if we own the page, we are not allowed to use atomic_set()
> +        * This would break get_page_unless_zero() users.
>          */
> -       atomic_set(&page->_count, 2);
> +       atomic_inc(&page->_count);
>  #else
>         /* move offset up to the next cache line */
>         rx_buffer->page_offset += truesize;
> --
> 2.1.0.rc2.206.gedb03e5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Cheers,
Jeff

^ permalink raw reply

* Re: [PATCH net 1/5] fm10k: fix race accessing page->_count
From: Jeff Kirsher @ 2014-10-10  5:53 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, netdev, Alexander Duyck, Andres Lagar-Cavilla,
	Greg Thelen, Hugh Dickins, David Rientjes
In-Reply-To: <1412918694-22882-2-git-send-email-edumazet@google.com>

On Thu, Oct 9, 2014 at 10:24 PM, Eric Dumazet <edumazet@google.com> wrote:
> This is illegal to use atomic_set(&page->_count, 2) even if we 'own'
> the page. Other entities in the kernel need to use get_page_unless_zero()
> to get a reference to the page before testing page properties, so we could
> loose a refcount increment.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

Since this is apart of a series, if the changes to skbuff are ok, then
the changes to the Intel drivers are ok.

> ---
>  drivers/net/ethernet/intel/fm10k/fm10k_main.c | 7 +++----
>  1 file changed, 3 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
> index 6c800a330d66..9d7118a0d67a 100644
> --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
> +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
> @@ -219,11 +219,10 @@ static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer,
>         /* flip page offset to other buffer */
>         rx_buffer->page_offset ^= FM10K_RX_BUFSZ;
>
> -       /* since we are the only owner of the page and we need to
> -        * increment it, just set the value to 2 in order to avoid
> -        * an unnecessary locked operation
> +       /* Even if we own the page, we are not allowed to use atomic_set()
> +        * This would break get_page_unless_zero() users.
>          */
> -       atomic_set(&page->_count, 2);
> +       atomic_inc(&page->_count);
>  #else
>         /* move offset up to the next cache line */
>         rx_buffer->page_offset += truesize;
> --
> 2.1.0.rc2.206.gedb03e5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Cheers,
Jeff

^ permalink raw reply

* Re: [PATCH net] stmmac: correct mc_filter local variable in set_filter and set_mac_addr call
From: Giuseppe CAVALLARO @ 2014-10-10  5:47 UTC (permalink / raw)
  To: Vince Bridgers, netdev, linux-kernel; +Cc: vbridger
In-Reply-To: <1412867436-22153-1-git-send-email-vbridger@opensource.altera.com>

On 10/9/2014 5:10 PM, Vince Bridgers wrote:
> Testing revealed that the local variable mc_filter was dimensioned
> incorrectly for all possible configurations and get_mac_addr should
> have been set_mac_addr (a typo). Make sure mc_filter is dimensioned
> to 8 32-bit unsigned longs - the largest size of the Synopsys
> multicast filter register set.
>
> Signed-off-by: Vince Bridgers <vbridger@opensource.altera.com>

Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>

> ---
>   drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
> index 5efe60e..0adcf73 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
> @@ -134,7 +134,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw,
>   	void __iomem *ioaddr = (void __iomem *)dev->base_addr;
>   	unsigned int value = 0;
>   	unsigned int perfect_addr_number = hw->unicast_filter_entries;
> -	u32 mc_filter[2];
> +	u32 mc_filter[8];
>   	int mcbitslog2 = hw->mcast_bits_log2;
>
>   	pr_debug("%s: # mcasts %d, # unicast %d\n", __func__,
> @@ -182,7 +182,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw,
>   		struct netdev_hw_addr *ha;
>
>   		netdev_for_each_uc_addr(ha, dev) {
> -			stmmac_get_mac_addr(ioaddr, ha->addr,
> +			stmmac_set_mac_addr(ioaddr, ha->addr,
>   					    GMAC_ADDR_HIGH(reg),
>   					    GMAC_ADDR_LOW(reg));
>   			reg++;
>

^ permalink raw reply

* Re: [PATCH net 0/5] net: fix races accessing page->_count
From: Eric Dumazet @ 2014-10-10  5:42 UTC (permalink / raw)
  To: Jeff Kirsher
  Cc: Eric Dumazet, David S. Miller, netdev, Alexander Duyck,
	Andres Lagar-Cavilla, Greg Thelen, Hugh Dickins, David Rientjes
In-Reply-To: <CAL3LdT4B3eN55ALtJFxC0ZxgkopOg+5B5JbpDajXJ=0-QOVfuA@mail.gmail.com>

On Thu, 2014-10-09 at 22:37 -0700, Jeff Kirsher wrote:

> Looks like the ixgbe patch has the incorrect title, or you patch igb twice. :-)

Yes, typo in the title, but content is OK, sorry.

^ permalink raw reply

* Re: [PATCH net 0/5] net: fix races accessing page->_count
From: Jeff Kirsher @ 2014-10-10  5:37 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, netdev, Alexander Duyck, Andres Lagar-Cavilla,
	Greg Thelen, Hugh Dickins, David Rientjes
In-Reply-To: <1412918694-22882-1-git-send-email-edumazet@google.com>

On Thu, Oct 9, 2014 at 10:24 PM, Eric Dumazet <edumazet@google.com> wrote:
> This is illegal to use atomic_set(&page->_count, ...) even if we 'own'
> the page. Other entities in the kernel need to use get_page_unless_zero()
> to get a reference to the page before testing page properties, so we could
> loose a refcount increment.
>
> The only case it is valid is when page->_count is 0, we can use this in
> __netdev_alloc_frag()
>
> Note that I never seen crashes caused by these races, the issue was reported
> by Andres Lagar-Cavilla and Hugh Dickins.
>
> Eric Dumazet (5):
>   fm10k: fix race accessing page->_count
>   igb: fix race accessing page->_count
>   igb: fix race accessing page->_count

Looks like the ixgbe patch has the incorrect title, or you patch igb twice. :-)

>   mlx4: fix race accessing page->_count
>   net: fix races in page->_count manipulation
>
>  drivers/net/ethernet/intel/fm10k/fm10k_main.c |  7 +++----
>  drivers/net/ethernet/intel/igb/igb_main.c     |  7 +++----
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  8 +++-----
>  drivers/net/ethernet/mellanox/mlx4/en_rx.c    |  6 +++---
>  net/core/skbuff.c                             | 25 ++++++++++++++++++-------
>  5 files changed, 30 insertions(+), 23 deletions(-)
>
> --
> 2.1.0.rc2.206.gedb03e5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Cheers,
Jeff

^ permalink raw reply

* [PATCH net 5/5] net: fix races in page->_count manipulation
From: Eric Dumazet @ 2014-10-10  5:24 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Alexander Duyck, Andres Lagar-Cavilla, Greg Thelen,
	Hugh Dickins, David Rientjes, Eric Dumazet
In-Reply-To: <1412918694-22882-1-git-send-email-edumazet@google.com>

This is illegal to use atomic_set(&page->_count, ...) even if we 'own'
the page. Other entities in the kernel need to use get_page_unless_zero()
to get a reference to the page before testing page properties, so we could
loose a refcount increment.

The only case it is valid is when page->_count is 0

Fixes: 540eb7bf0bbed ("net: Update alloc frag to reduce get/put page usage and recycle pages")
Signed-off-by: Eric Dumaze <edumazet@google.com>
---
 net/core/skbuff.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a30d750647e7..829d013745ab 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -360,18 +360,29 @@ refill:
 				goto end;
 		}
 		nc->frag.size = PAGE_SIZE << order;
-recycle:
-		atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
+		/* Even if we own the page, we do not use atomic_set().
+		 * This would break get_page_unless_zero() users.
+		 */
+		atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1,
+			   &nc->frag.page->_count);
 		nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
 		nc->frag.offset = 0;
 	}
 
 	if (nc->frag.offset + fragsz > nc->frag.size) {
-		/* avoid unnecessary locked operations if possible */
-		if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
-		    atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
-			goto recycle;
-		goto refill;
+		if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) {
+			if (!atomic_sub_and_test(nc->pagecnt_bias,
+						 &nc->frag.page->_count))
+				goto refill;
+			/* OK, page count is 0, we can safely set it */
+			atomic_set(&nc->frag.page->_count,
+				   NETDEV_PAGECNT_MAX_BIAS);
+		} else {
+			atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias,
+				   &nc->frag.page->_count);
+		}
+		nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
+		nc->frag.offset = 0;
 	}
 
 	data = page_address(nc->frag.page) + nc->frag.offset;
-- 
2.1.0.rc2.206.gedb03e5

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox