* [PATCH ethtool v3 2/2] Ethtool: Implements ETHTOOL_PHY_GTUNABLE/ETHTOOL_PHY_STUNABLE and PHY downshift
From: Allan W. Nielsen @ 2016-11-17 12:08 UTC (permalink / raw)
To: netdev; +Cc: andrew, f.fainelli, raju.lakkaraju, allan.nielsen, Raju Lakkaraju
In-Reply-To: <1479384480-31201-1-git-send-email-allan.nielsen@microsemi.com>
From: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Add ethtool get and set tunable to access PHY drivers.
Ethtool Help: ethtool -h for PHY tunables
ethtool --set-phy-tunable DEVNAME Set PHY tunable
[ downshift on|off [count N] ]
ethtool --get-phy-tunable DEVNAME Get PHY tunable
[ downshift ]
Ethtool ex:
ethtool --set-phy-tuanble eth0 downshift on
ethtool --set-phy-tuanble eth0 downshift off
ethtool --set-phy-tuanble eth0 downshift on count 2
ethtool --get-phy-tunable eth0 downshift
Signed-off-by: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Signed-off-by: Allan W. Nielsen <allan.nielsen@microsemi.com>
---
ethtool.8.in | 39 ++++++++++++++++
ethtool.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 183 insertions(+)
diff --git a/ethtool.8.in b/ethtool.8.in
index 9631847..337d0cf 100644
--- a/ethtool.8.in
+++ b/ethtool.8.in
@@ -340,6 +340,18 @@ ethtool \- query or control network driver and hardware settings
.B2 tx-lpi on off
.BN tx-timer
.BN advertise
+.HP
+.B ethtool \-\-set\-phy\-tunable
+.I devname
+.RB [
+.B downshift
+.A1 on off
+.BN count
+.RB ]
+.HP
+.B ethtool \-\-get\-phy\-tunable
+.I devname
+.RB [ downshift ]
.
.\" Adjust lines (i.e. full justification) and hyphenate.
.ad
@@ -947,6 +959,33 @@ Values are as for
Sets the amount of time the device should stay in idle mode prior to asserting
its Tx LPI (in microseconds). This has meaning only when Tx LPI is enabled.
.RE
+.TP
+.B \-\-set\-phy\-tunable
+Sets the PHY tunable parameters.
+.RS 4
+.TP
+.A2 downshift on off
+Specifies whether downshift should be enabled
+.TS
+nokeep;
+lB l.
+.BI count \ N
+Sets the PHY downshift re-tries count.
+.TE
+.PD
+.RE
+.TP
+.B \-\-get\-phy\-tunable
+Gets the PHY tunable parameters.
+.RS 4
+.TP
+.B downshift
+For operation in cabling environments that are incompatible with 1000BASE-T,
+PHY device provides an automatic link speed downshift operation.
+Link speed downshift after N failed 1000BASE-T auto-negotiation attempts.
+
+Gets the PHY downshift count/status.
+.RE
.SH BUGS
Not supported (in part or whole) on all network drivers.
.SH AUTHOR
diff --git a/ethtool.c b/ethtool.c
index 49ac94e..7dcd005 100644
--- a/ethtool.c
+++ b/ethtool.c
@@ -4520,6 +4520,146 @@ static int do_seee(struct cmd_context *ctx)
return 0;
}
+static int do_get_phy_tunable(struct cmd_context *ctx)
+{
+ int argc = ctx->argc;
+ char **argp = ctx->argp;
+ int err, i;
+ u8 downshift_changed = 0;
+
+ if (argc < 1)
+ exit_bad_args();
+ for (i = 0; i < argc; i++) {
+ if (!strcmp(argp[i], "downshift")) {
+ downshift_changed = 1;
+ i += 1;
+ if (i < argc)
+ exit_bad_args();
+ } else {
+ exit_bad_args();
+ }
+ }
+
+ if (downshift_changed) {
+ struct ethtool_tunable ds;
+ u8 count = 0;
+
+ ds.cmd = ETHTOOL_PHY_GTUNABLE;
+ ds.id = ETHTOOL_PHY_DOWNSHIFT;
+ ds.type_id = ETHTOOL_TUNABLE_U8;
+ ds.len = 1;
+ ds.data[0] = &count;
+ err = send_ioctl(ctx, &ds);
+ if (err < 0) {
+ perror("Cannot Get PHY downshift count");
+ return 87;
+ }
+ count = *((u8 *)&ds.data[0]);
+ if (count)
+ fprintf(stdout, "Downshift count: %d\n", count);
+ else
+ fprintf(stdout, "Downshift disabled\n");
+ }
+
+ return err;
+}
+
+static int parse_named_bool(struct cmd_context *ctx, const char *name, u8 *on)
+{
+ if (ctx->argc < 2)
+ return 0;
+
+ if (strcmp(*ctx->argp, name))
+ return 0;
+
+ if (!strcmp(*(ctx->argp + 1), "on")) {
+ *on = 1;
+ } else if (!strcmp(*(ctx->argp + 1), "off")) {
+ *on = 0;
+ } else {
+ fprintf(stderr, "Invalid boolean\n");
+ exit_bad_args();
+ }
+
+ ctx->argc -= 2;
+ ctx->argp += 2;
+
+ return 1;
+}
+
+static int parse_named_u8(struct cmd_context *ctx, const char *name, u8 *val)
+{
+ if (ctx->argc < 2)
+ return 0;
+
+ if (strcmp(*ctx->argp, name))
+ return 0;
+
+ *val = get_uint_range(*(ctx->argp + 1), 0, 0xff);
+
+ ctx->argc -= 2;
+ ctx->argp += 2;
+
+ return 1;
+}
+
+static int do_set_phy_tunable(struct cmd_context *ctx)
+{
+ int err = 0;
+ u8 ds_cnt = DOWNSHIFT_DEV_DEFAULT_COUNT;
+ u8 ds_changed = 0, ds_has_cnt = 0, ds_enable = 0;
+
+ if (ctx->argc == 0)
+ exit_bad_args();
+
+ /* Parse arguments */
+ while (ctx->argc) {
+ if (parse_named_bool(ctx, "downshift", &ds_enable)) {
+ ds_changed = 1;
+ ds_has_cnt = parse_named_u8(ctx, "count", &ds_cnt);
+ } else {
+ exit_bad_args();
+ }
+ }
+
+ /* Validate parameters */
+ if (ds_changed) {
+ if (!ds_enable && ds_has_cnt) {
+ fprintf(stderr, "'count' may not be set when downshift "
+ "is off.\n");
+ exit_bad_args();
+ }
+
+ if (ds_enable && ds_has_cnt && ds_cnt == 0) {
+ fprintf(stderr, "'count' may not be zero.\n");
+ exit_bad_args();
+ }
+
+ if (!ds_enable)
+ ds_cnt = DOWNSHIFT_DEV_DISABLE;
+ }
+
+ /* Do it */
+ if (ds_changed) {
+ struct ethtool_tunable ds;
+ u8 count;
+
+ ds.cmd = ETHTOOL_PHY_STUNABLE;
+ ds.id = ETHTOOL_PHY_DOWNSHIFT;
+ ds.type_id = ETHTOOL_TUNABLE_U8;
+ ds.len = 1;
+ ds.data[0] = &count;
+ *((u8 *)&ds.data[0]) = ds_cnt;
+ err = send_ioctl(ctx, &ds);
+ if (err < 0) {
+ perror("Cannot Set PHY downshift count");
+ err = 87;
+ }
+ }
+
+ return err;
+}
+
#ifndef TEST_ETHTOOL
int send_ioctl(struct cmd_context *ctx, void *cmd)
{
@@ -4681,6 +4821,10 @@ static const struct option {
" [ advertise %x ]\n"
" [ tx-lpi on|off ]\n"
" [ tx-timer %d ]\n"},
+ { "--set-phy-tunable", 1, do_set_phy_tunable, "Set PHY tunable",
+ " [ downshift on|off [count N] ]\n"},
+ { "--get-phy-tunable", 1, do_get_phy_tunable, "Get PHY tunable",
+ " [ downshift ]\n"},
{ "-h|--help", 0, show_usage, "Show this help" },
{ "--version", 0, do_version, "Show version number" },
{}
--
2.7.3
^ permalink raw reply related
* [PATCH net-next v3 4/5] ethtool: Core impl for ETHTOOL_PHY_DOWNSHIFT tunable
From: Allan W. Nielsen @ 2016-11-17 12:07 UTC (permalink / raw)
To: netdev; +Cc: andrew, f.fainelli, raju.lakkaraju, allan.nielsen, Raju Lakkaraju
In-Reply-To: <1479384444-31122-1-git-send-email-allan.nielsen@microsemi.com>
From: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Adding validation support for the ETHTOOL_PHY_DOWNSHIFT. Functional
implementation needs to be done in the individual PHY drivers.
Signed-off-by: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Allan W. Nielsen <allan.nielsen@microsemi.com>
---
net/core/ethtool.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 61aebdf..e9b45567 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -122,6 +122,7 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
static const char
phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
[ETHTOOL_ID_UNSPEC] = "Unspec",
+ [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift",
};
static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
@@ -2435,6 +2436,11 @@ static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna)
{
switch (tuna->id) {
+ case ETHTOOL_PHY_DOWNSHIFT:
+ if (tuna->len != sizeof(u8) ||
+ tuna->type_id != ETHTOOL_TUNABLE_U8)
+ return -EINVAL;
+ break;
default:
return -EINVAL;
}
--
2.7.3
^ permalink raw reply related
* Re: Netperf UDP issue with connected sockets
From: Rick Jones @ 2016-11-17 17:42 UTC (permalink / raw)
To: Jesper Dangaard Brouer, Eric Dumazet; +Cc: netdev, brouer
In-Reply-To: <20161117091638.5fab8494@redhat.com>
On 11/17/2016 12:16 AM, Jesper Dangaard Brouer wrote:
>> time to try IP_MTU_DISCOVER ;)
>
> To Rick, maybe you can find a good solution or option with Eric's hint,
> to send appropriate sized UDP packets with Don't Fragment (DF).
Well, I suppose adding another setsockopt() to the data socket creation
wouldn't be too difficult, along with another command-line option to
cause it to happen.
Could we leave things as "make sure you don't need fragmentation when
you use this" or would netperf have to start processing ICMP messages?
happy benchmarking,
rick jones
^ permalink raw reply
* [PATCH] net: sky2: Fix shutdown crash
From: Jeremy Linton @ 2016-11-17 15:14 UTC (permalink / raw)
To: netdev; +Cc: mlindner, stephen, Sudeep.Holla
The sky2 frequently crashes during machine shutdown with:
sky2_get_stats+0x60/0x3d8 [sky2]
dev_get_stats+0x68/0xd8
rtnl_fill_stats+0x54/0x140
rtnl_fill_ifinfo+0x46c/0xc68
rtmsg_ifinfo_build_skb+0x7c/0xf0
rtmsg_ifinfo.part.22+0x3c/0x70
rtmsg_ifinfo+0x50/0x5c
netdev_state_change+0x4c/0x58
linkwatch_do_dev+0x50/0x88
__linkwatch_run_queue+0x104/0x1a4
linkwatch_event+0x30/0x3c
process_one_work+0x140/0x3e0
worker_thread+0x60/0x44c
kthread+0xdc/0xf0
ret_from_fork+0x10/0x50
This is caused by the sky2 being called after it has been shutdown.
A previous thread about this can be found here:
https://lkml.org/lkml/2016/4/12/410
An alternative fix is to assure that IFF_UP gets cleared by
calling dev_close() during shutdown. This is similar to what the
bnx2/tg3/xgene and maybe others are doing to assure that the driver
isn't being called following _shutdown().
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
---
drivers/net/ethernet/marvell/sky2.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index f05ea56..941c8e2 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -5220,6 +5220,19 @@ static SIMPLE_DEV_PM_OPS(sky2_pm_ops, sky2_suspend, sky2_resume);
static void sky2_shutdown(struct pci_dev *pdev)
{
+ struct sky2_hw *hw = pci_get_drvdata(pdev);
+ int port;
+
+ for (port = 0; port < hw->ports; port++) {
+ struct net_device *ndev = hw->dev[port];
+
+ rtnl_lock();
+ if (netif_running(ndev)) {
+ dev_close(ndev);
+ netif_device_detach(ndev);
+ }
+ rtnl_unlock();
+ }
sky2_suspend(&pdev->dev);
pci_wake_from_d3(pdev, device_may_wakeup(&pdev->dev));
pci_set_power_state(pdev, PCI_D3hot);
--
2.5.5
^ permalink raw reply related
* [PATCH net-next v3 5/5] net: phy: Add downshift get/set support in Microsemi PHYs driver
From: Allan W. Nielsen @ 2016-11-17 12:07 UTC (permalink / raw)
To: netdev; +Cc: andrew, f.fainelli, raju.lakkaraju, allan.nielsen, Raju Lakkaraju
In-Reply-To: <1479384444-31122-1-git-send-email-allan.nielsen@microsemi.com>
From: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Implements the phy tunable function pointers and implement downshift
functionality for MSCC PHYs.
Signed-off-by: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Allan W. Nielsen <allan.nielsen@microsemi.com>
---
drivers/net/phy/mscc.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 100 insertions(+)
diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c
index d0026ab..92018ba 100644
--- a/drivers/net/phy/mscc.c
+++ b/drivers/net/phy/mscc.c
@@ -46,8 +46,15 @@ enum rgmii_rx_clock_delay {
#define MSCC_EXT_PAGE_ACCESS 31
#define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */
+#define MSCC_PHY_PAGE_EXTENDED 0x0001 /* Extended registers */
#define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */
+/* Extended Page 1 Registers */
+#define MSCC_PHY_ACTIPHY_CNTL 20
+#define DOWNSHIFT_CNTL_MASK 0x001C
+#define DOWNSHIFT_EN 0x0010
+#define DOWNSHIFT_CNTL_POS 2
+
/* Extended Page 2 Registers */
#define MSCC_PHY_RGMII_CNTL 20
#define RGMII_RX_CLK_DELAY_MASK 0x0070
@@ -75,6 +82,8 @@ enum rgmii_rx_clock_delay {
#define MSCC_VDDMAC_2500 2500
#define MSCC_VDDMAC_3300 3300
+#define DOWNSHIFT_COUNT_MAX 5
+
struct vsc8531_private {
int rate_magic;
};
@@ -101,6 +110,66 @@ static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page)
return rc;
}
+static int vsc85xx_downshift_get(struct phy_device *phydev, u8 *count)
+{
+ int rc;
+ u16 reg_val;
+
+ mutex_lock(&phydev->lock);
+ rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED);
+ if (rc != 0)
+ goto out_unlock;
+
+ reg_val = phy_read(phydev, MSCC_PHY_ACTIPHY_CNTL);
+ reg_val &= DOWNSHIFT_CNTL_MASK;
+ if (!(reg_val & DOWNSHIFT_EN))
+ *count = DOWNSHIFT_DEV_DISABLE;
+ else
+ *count = ((reg_val & ~DOWNSHIFT_EN) >> DOWNSHIFT_CNTL_POS) + 2;
+ rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD);
+
+out_unlock:
+ mutex_unlock(&phydev->lock);
+
+ return rc;
+}
+
+static int vsc85xx_downshift_set(struct phy_device *phydev, u8 count)
+{
+ int rc;
+ u16 reg_val;
+
+ if (count == DOWNSHIFT_DEV_DEFAULT_COUNT) {
+ /* Default downshift count 3 (i.e. Bit3:2 = 0b01) */
+ count = ((1 << DOWNSHIFT_CNTL_POS) | DOWNSHIFT_EN);
+ } else if (count > DOWNSHIFT_COUNT_MAX || count == 1) {
+ phydev_err(phydev, "Downshift count should be 2,3,4 or 5\n");
+ return -ERANGE;
+ } else if (count) {
+ /* Downshift count is either 2,3,4 or 5 */
+ count = (((count - 2) << DOWNSHIFT_CNTL_POS) | DOWNSHIFT_EN);
+ }
+
+ mutex_lock(&phydev->lock);
+ rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED);
+ if (rc != 0)
+ goto out_unlock;
+
+ reg_val = phy_read(phydev, MSCC_PHY_ACTIPHY_CNTL);
+ reg_val &= ~(DOWNSHIFT_CNTL_MASK);
+ reg_val |= count;
+ rc = phy_write(phydev, MSCC_PHY_ACTIPHY_CNTL, reg_val);
+ if (rc != 0)
+ goto out_unlock;
+
+ rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD);
+
+out_unlock:
+ mutex_unlock(&phydev->lock);
+
+ return rc;
+}
+
static int vsc85xx_wol_set(struct phy_device *phydev,
struct ethtool_wolinfo *wol)
{
@@ -329,6 +398,29 @@ static int vsc85xx_default_config(struct phy_device *phydev)
return rc;
}
+static int vsc85xx_get_tunable(struct phy_device *phydev,
+ struct ethtool_tunable *tuna, void *data)
+{
+ switch (tuna->id) {
+ case ETHTOOL_PHY_DOWNSHIFT:
+ return vsc85xx_downshift_get(phydev, (u8 *)data);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int vsc85xx_set_tunable(struct phy_device *phydev,
+ struct ethtool_tunable *tuna,
+ const void *data)
+{
+ switch (tuna->id) {
+ case ETHTOOL_PHY_DOWNSHIFT:
+ return vsc85xx_downshift_set(phydev, *(u8 *)data);
+ default:
+ return -EINVAL;
+ }
+}
+
static int vsc85xx_config_init(struct phy_device *phydev)
{
int rc;
@@ -418,6 +510,8 @@ static struct phy_driver vsc85xx_driver[] = {
.probe = &vsc85xx_probe,
.set_wol = &vsc85xx_wol_set,
.get_wol = &vsc85xx_wol_get,
+ .get_tunable = &vsc85xx_get_tunable,
+ .set_tunable = &vsc85xx_set_tunable,
},
{
.phy_id = PHY_ID_VSC8531,
@@ -437,6 +531,8 @@ static struct phy_driver vsc85xx_driver[] = {
.probe = &vsc85xx_probe,
.set_wol = &vsc85xx_wol_set,
.get_wol = &vsc85xx_wol_get,
+ .get_tunable = &vsc85xx_get_tunable,
+ .set_tunable = &vsc85xx_set_tunable,
},
{
.phy_id = PHY_ID_VSC8540,
@@ -456,6 +552,8 @@ static struct phy_driver vsc85xx_driver[] = {
.probe = &vsc85xx_probe,
.set_wol = &vsc85xx_wol_set,
.get_wol = &vsc85xx_wol_get,
+ .get_tunable = &vsc85xx_get_tunable,
+ .set_tunable = &vsc85xx_set_tunable,
},
{
.phy_id = PHY_ID_VSC8541,
@@ -475,6 +573,8 @@ static struct phy_driver vsc85xx_driver[] = {
.probe = &vsc85xx_probe,
.set_wol = &vsc85xx_wol_set,
.get_wol = &vsc85xx_wol_get,
+ .get_tunable = &vsc85xx_get_tunable,
+ .set_tunable = &vsc85xx_set_tunable,
}
};
--
2.7.3
^ permalink raw reply related
* [PATCH net-next v3 2/5] ethtool: Implements ETHTOOL_PHY_GTUNABLE/ETHTOOL_PHY_STUNABLE
From: Allan W. Nielsen @ 2016-11-17 12:07 UTC (permalink / raw)
To: netdev; +Cc: andrew, f.fainelli, raju.lakkaraju, allan.nielsen, Raju Lakkaraju
In-Reply-To: <1479384444-31122-1-git-send-email-allan.nielsen@microsemi.com>
From: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Adding get_tunable/set_tunable function pointer to the phy_driver
structure, and uses these function pointers to implement the
ETHTOOL_PHY_GTUNABLE/ETHTOOL_PHY_STUNABLE ioctls.
Signed-off-by: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Allan W. Nielsen <allan.nielsen@microsemi.com>
---
include/linux/phy.h | 7 +++++
net/core/ethtool.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 94 insertions(+)
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 9880d73..3d35c36 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -611,6 +611,13 @@ struct phy_driver {
void (*get_strings)(struct phy_device *dev, u8 *data);
void (*get_stats)(struct phy_device *dev,
struct ethtool_stats *stats, u64 *data);
+
+ /* Get and Set PHY tunables */
+ int (*get_tunable)(struct phy_device *dev,
+ struct ethtool_tunable *tuna, void *data);
+ int (*set_tunable)(struct phy_device *dev,
+ struct ethtool_tunable *tuna,
+ const void *data);
};
#define to_phy_driver(d) container_of(to_mdio_common_driver(d), \
struct phy_driver, mdiodrv)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 9774898..61aebdf 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -119,6 +119,11 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
[ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
};
+static const char
+phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
+ [ETHTOOL_ID_UNSPEC] = "Unspec",
+};
+
static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
{
struct ethtool_gfeatures cmd = {
@@ -227,6 +232,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
if (sset == ETH_SS_TUNABLES)
return ARRAY_SIZE(tunable_strings);
+ if (sset == ETH_SS_PHY_TUNABLES)
+ return ARRAY_SIZE(phy_tunable_strings);
+
if (sset == ETH_SS_PHY_STATS) {
if (dev->phydev)
return phy_get_sset_count(dev->phydev);
@@ -253,6 +261,8 @@ static void __ethtool_get_strings(struct net_device *dev,
sizeof(rss_hash_func_strings));
else if (stringset == ETH_SS_TUNABLES)
memcpy(data, tunable_strings, sizeof(tunable_strings));
+ else if (stringset == ETH_SS_PHY_TUNABLES)
+ memcpy(data, phy_tunable_strings, sizeof(phy_tunable_strings));
else if (stringset == ETH_SS_PHY_STATS) {
struct phy_device *phydev = dev->phydev;
@@ -2422,6 +2432,76 @@ static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
};
}
+static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna)
+{
+ switch (tuna->id) {
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int get_phy_tunable(struct net_device *dev, void __user *useraddr)
+{
+ int ret;
+ struct ethtool_tunable tuna;
+ struct phy_device *phydev = dev->phydev;
+ void *data;
+
+ if (!(phydev && phydev->drv && phydev->drv->get_tunable))
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+ return -EFAULT;
+ ret = ethtool_phy_tunable_valid(&tuna);
+ if (ret)
+ return ret;
+ data = kmalloc(tuna.len, GFP_USER);
+ if (!data)
+ return -ENOMEM;
+ ret = phydev->drv->get_tunable(phydev, &tuna, data);
+ if (ret)
+ goto out;
+ useraddr += sizeof(tuna);
+ ret = -EFAULT;
+ if (copy_to_user(useraddr, data, tuna.len))
+ goto out;
+ ret = 0;
+
+out:
+ kfree(data);
+ return ret;
+}
+
+static int set_phy_tunable(struct net_device *dev, void __user *useraddr)
+{
+ int ret;
+ struct ethtool_tunable tuna;
+ struct phy_device *phydev = dev->phydev;
+ void *data;
+
+ if (!(phydev && phydev->drv && phydev->drv->set_tunable))
+ return -EOPNOTSUPP;
+ if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+ return -EFAULT;
+ ret = ethtool_phy_tunable_valid(&tuna);
+ if (ret)
+ return ret;
+ data = kmalloc(tuna.len, GFP_USER);
+ if (!data)
+ return -ENOMEM;
+ useraddr += sizeof(tuna);
+ ret = -EFAULT;
+ if (copy_from_user(data, useraddr, tuna.len))
+ goto out;
+ ret = phydev->drv->set_tunable(phydev, &tuna, data);
+
+out:
+ kfree(data);
+ return ret;
+}
+
/* The main entry point in this file. Called from net/core/dev_ioctl.c */
int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -2479,6 +2559,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GET_TS_INFO:
case ETHTOOL_GEEE:
case ETHTOOL_GTUNABLE:
+ case ETHTOOL_PHY_GTUNABLE:
break;
default:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -2684,6 +2765,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SLINKSETTINGS:
rc = ethtool_set_link_ksettings(dev, useraddr);
break;
+ case ETHTOOL_PHY_GTUNABLE:
+ rc = get_phy_tunable(dev, useraddr);
+ break;
+ case ETHTOOL_PHY_STUNABLE:
+ rc = set_phy_tunable(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
--
2.7.3
^ permalink raw reply related
* [PATCH ethtool v3 0/2] Adding downshift support to ethtool
From: Allan W. Nielsen @ 2016-11-17 12:07 UTC (permalink / raw)
To: netdev; +Cc: andrew, f.fainelli, raju.lakkaraju, allan.nielsen
Hi All,
(This is a re-post of the v3 patch set with a new cover letter - I was not
aware that the cover letters was used as commit comments in merge commits).
This patch implements for set/get downshifting.
Downshifting can either be turned on/off, or it can be configured to a
specifc count.
"count" is optional.
Tested on Beaglebone Black with VSC 8531 PHY.
Change set:
v1:
- Initial version of set/get phy tunable with downshift feature.
v2:
- (ethtool) Syntax is changed from "--set-phy-tunable downshift on|off|%d"
to "--set-phy-tunable [downshift on|off [count N]]" - as requested by
Andrew.
v3:
- Fixed Spelling in "ethtool-copy.h:sync with net"
- Fixed "if send_ioctl() returns an error, print the error message and then
still print th value of count".
Raju Lakkaraju (2):
ethtool-copy.h:sync with net
Ethtool: Implements ETHTOOL_PHY_GTUNABLE/ETHTOOL_PHY_STUNABLE and PHY
downshift
ethtool-copy.h | 18 +++++++-
ethtool.8.in | 39 ++++++++++++++++
ethtool.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 200 insertions(+), 1 deletion(-)
--
2.7.4
^ permalink raw reply
* [PATCH ethtool v3 1/2] ethtool-copy.h:sync with net
From: Allan W. Nielsen @ 2016-11-17 12:07 UTC (permalink / raw)
To: netdev; +Cc: andrew, f.fainelli, raju.lakkaraju, allan.nielsen, Raju Lakkaraju
In-Reply-To: <1479384480-31201-1-git-send-email-allan.nielsen@microsemi.com>
From: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
This covers kernel changes upto:
commit f5a4732f85613b3fb43f8bc33a017e3db3b3605a
Author: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Date: Wed Nov 9 16:33:09 2016 +0530
ethtool: (uapi) Add ETHTOOL_PHY_DOWNSHIFT to PHY tunables
For operation in cabling environments that are incompatible with
1000BASE-T, PHY device may provide an automatic link speed downshift
operation. When enabled, the device automatically changes its 1000BASE-T
auto-negotiation to the next slower speed after a configured number of
failed attempts at 1000BASE-T. This feature is useful in setting up in
networks using older cable installations that include only pairs A and B,
and not pairs C and D.
Signed-off-by: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Signed-off-by: Allan W. Nielsen <allan.nielsen@microsemi.com>
---
ethtool-copy.h | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/ethtool-copy.h b/ethtool-copy.h
index 70748f5..2e2448f 100644
--- a/ethtool-copy.h
+++ b/ethtool-copy.h
@@ -247,6 +247,19 @@ struct ethtool_tunable {
void *data[0];
};
+#define DOWNSHIFT_DEV_DEFAULT_COUNT 0xff
+#define DOWNSHIFT_DEV_DISABLE 0
+
+enum phy_tunable_id {
+ ETHTOOL_PHY_ID_UNSPEC,
+ ETHTOOL_PHY_DOWNSHIFT,
+ /*
+ * Add your fresh new phy tunable attribute above and remember to update
+ * phy_tunable_strings[] in net/core/ethtool.c
+ */
+ __ETHTOOL_PHY_TUNABLE_COUNT,
+};
+
/**
* struct ethtool_regs - hardware register dump
* @cmd: Command number = %ETHTOOL_GREGS
@@ -547,6 +560,7 @@ struct ethtool_pauseparam {
* @ETH_SS_FEATURES: Device feature names
* @ETH_SS_RSS_HASH_FUNCS: RSS hush function names
* @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS
+ * @ETH_SS_PHY_TUNABLES: PHY tunable names
*/
enum ethtool_stringset {
ETH_SS_TEST = 0,
@@ -557,6 +571,7 @@ enum ethtool_stringset {
ETH_SS_RSS_HASH_FUNCS,
ETH_SS_TUNABLES,
ETH_SS_PHY_STATS,
+ ETH_SS_PHY_TUNABLES,
};
/**
@@ -1312,7 +1327,8 @@ struct ethtool_per_queue_op {
#define ETHTOOL_GLINKSETTINGS 0x0000004c /* Get ethtool_link_settings */
#define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */
-
+#define ETHTOOL_PHY_GTUNABLE 0x0000004e /* Get PHY tunable configuration */
+#define ETHTOOL_PHY_STUNABLE 0x0000004f /* Set PHY tunable configuration */
/* compatibility with older code */
#define SPARC_ETH_GSET ETHTOOL_GSET
--
2.7.3
^ permalink raw reply related
* [PATCH net-next v3 3/5] ethtool: (uapi) Add ETHTOOL_PHY_DOWNSHIFT to PHY tunables
From: Allan W. Nielsen @ 2016-11-17 12:07 UTC (permalink / raw)
To: netdev; +Cc: andrew, f.fainelli, raju.lakkaraju, allan.nielsen, Raju Lakkaraju
In-Reply-To: <1479384444-31122-1-git-send-email-allan.nielsen@microsemi.com>
From: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
For operation in cabling environments that are incompatible with
1000BASE-T, PHY device may provide an automatic link speed downshift
operation. When enabled, the device automatically changes its 1000BASE-T
auto-negotiation to the next slower speed after a configured number of
failed attempts at 1000BASE-T. This feature is useful in setting up in
networks using older cable installations that include only pairs A and B,
and not pairs C and D.
Signed-off-by: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Signed-off-by: Allan W. Nielsen <allan.nielsen@microsemi.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
---
include/uapi/linux/ethtool.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 42f696f..f0db778 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -248,9 +248,12 @@ struct ethtool_tunable {
void *data[0];
};
+#define DOWNSHIFT_DEV_DEFAULT_COUNT 0xff
+#define DOWNSHIFT_DEV_DISABLE 0
+
enum phy_tunable_id {
ETHTOOL_PHY_ID_UNSPEC,
-
+ ETHTOOL_PHY_DOWNSHIFT,
/*
* Add your fresh new phy tunable attribute above and remember to update
* phy_tunable_strings[] in net/core/ethtool.c
--
2.7.3
^ permalink raw reply related
* [PATCH net-next v3 0/5] Adding PHY-Tunables and downshift support
From: Allan W. Nielsen @ 2016-11-17 12:07 UTC (permalink / raw)
To: netdev; +Cc: andrew, f.fainelli, raju.lakkaraju, allan.nielsen
Hi All,
(This is a re-post of the v3 patch set with a new cover letter - I was not
aware that the cover letters was used a commit comments in merge commits).
This series add support for PHY tunables, and uses this facility to
configure downshifting. The downshifting mechanism is implemented for MSCC
phys.
This series tries to address the comments provided back in mid October when
this feature was posted along with fast-link-failure. Fast-link-failure has
been separated out, but we would like to pick continue on that if/when we
agree on how the phy-tunables and downshifting should be done.
The proposed generic interface is similar to
ETHTOOL_GTUNABLE/ETHTOOL_STUNABLE, it uses the same type
(ethtool_tunable/tunable_type_id) but a new enum (phy_tunable_id) is added
to reflect the PHY tunable.
The implementation just call the newly added function pointers in
get_tunable/set_tunable phy_device structure.
To configure downshifting, the ethtool_tunable structure is used. 'id' must
be set to 'ETHTOOL_PHY_DOWNSHIFT', 'type_id' must be set to
'ETHTOOL_TUNABLE_U8' and 'data' value configure the amount of downshift
re-tries.
If configured to DOWNSHIFT_DEV_DISABLE, then downshift is disabled If
configured to DOWNSHIFT_DEV_DEFAULT_COUNT, then it is up to the device to
choose a device-specific re-try count.
Tested on Beaglebone Black with VSC 8531 PHY.
Change set:
v0:
- Link Speed downshift and Fast Link failure-2 features coded by using
Device tree.
v1:
- Split the Downshift and FLF2 features in different set of patches.
- Removed DT access and implemented IOCTL access suggested by Andrew.
- Added function pointers in get_tunable/set_tunable phy_device structure
v2:
- Added trace message with a hist is printed when downshifting clould not
be eanbled with the requested count
- (ethtool) Syntax is changed from "--set-phy-tunable downshift on|off|%d"
to "--set-phy-tunable [downshift on|off [count N]]" - as requested by
Andrew.
v3:
- Fixed Spelling in "net: phy: Add downshift get/set support in Microsemi
PHYs driver"
Raju Lakkaraju (5):
ethtool: (uapi) Add ETHTOOL_PHY_GTUNABLE and ETHTOOL_PHY_STUNABLE
ethtool: Implements ETHTOOL_PHY_GTUNABLE/ETHTOOL_PHY_STUNABLE
ethtool: (uapi) Add ETHTOOL_PHY_DOWNSHIFT to PHY tunables
ethtool: Core impl for ETHTOOL_PHY_DOWNSHIFT tunable
net: phy: Add downshift get/set support in Microsemi PHYs driver
drivers/net/phy/mscc.c | 100 +++++++++++++++++++++++++++++++++++++++++++
include/linux/phy.h | 7 +++
include/uapi/linux/ethtool.h | 18 +++++++-
net/core/ethtool.c | 93 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 217 insertions(+), 1 deletion(-)
--
2.7.4
^ permalink raw reply
* RE: Netperf UDP issue with connected sockets
From: David Laight @ 2016-11-17 17:34 UTC (permalink / raw)
To: 'Jesper Dangaard Brouer', Eric Dumazet
Cc: Rick Jones, netdev@vger.kernel.org
In-Reply-To: <20161117155753.17b76f5a@redhat.com>
From: Jesper Dangaard Brouer
> Sent: 17 November 2016 14:58
> On Thu, 17 Nov 2016 06:17:38 -0800
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> > On Thu, 2016-11-17 at 14:42 +0100, Jesper Dangaard Brouer wrote:
> >
> > > I can see that qdisc layer does not activate xmit_more in this case.
> > >
> >
> > Sure. Not enough pressure from the sender(s).
> >
> > The bottleneck is not the NIC or qdisc in your case, meaning that BQL
> > limit is kept at a small value.
> >
> > (BTW not all NIC have expensive doorbells)
>
> I believe this NIC mlx5 (50G edition) does.
>
> I'm seeing UDP TX of 1656017.55 pps, which is per packet:
> 2414 cycles(tsc) 603.86 ns
>
> Perf top shows (with my own udp_flood, that avoids __ip_select_ident):
>
> Samples: 56K of event 'cycles', Event count (approx.): 51613832267
> Overhead Command Shared Object Symbol
> + 8.92% udp_flood [kernel.vmlinux] [k] _raw_spin_lock
> - _raw_spin_lock
> + 90.78% __dev_queue_xmit
> + 7.83% dev_queue_xmit
> + 1.30% ___slab_alloc
> + 5.59% udp_flood [kernel.vmlinux] [k] skb_set_owner_w
> + 4.77% udp_flood [mlx5_core] [k] mlx5e_sq_xmit
> + 4.09% udp_flood [kernel.vmlinux] [k] fib_table_lookup
> + 4.00% swapper [mlx5_core] [k] mlx5e_poll_tx_cq
> + 3.11% udp_flood [kernel.vmlinux] [k] __ip_route_output_key_hash
> + 2.49% swapper [kernel.vmlinux] [k] __slab_free
>
> In this setup the spinlock in __dev_queue_xmit should be uncongested.
> An uncongested spin_lock+unlock cost 32 cycles(tsc) 8.198 ns on this system.
>
> But 8.92% of the time is spend on it, which corresponds to a cost of 215
> cycles (2414*0.0892). This cost is too high, thus something else is
> going on... I claim this mysterious extra cost is the tailptr/doorbell.
Try adding code to ring the doorbell twice.
If this doesn't slow things down then it isn't (likely to be) responsible
for the delay you are seeing.
David
^ permalink raw reply
* Re: [PATCH] net: sky2: Fix shutdown crash
From: Sudeep Holla @ 2016-11-17 16:14 UTC (permalink / raw)
To: Jeremy Linton, netdev; +Cc: Sudeep Holla, mlindner, stephen
In-Reply-To: <1479395665-27784-1-git-send-email-jeremy.linton@arm.com>
On 17/11/16 15:14, Jeremy Linton wrote:
> The sky2 frequently crashes during machine shutdown with:
>
> sky2_get_stats+0x60/0x3d8 [sky2]
> dev_get_stats+0x68/0xd8
> rtnl_fill_stats+0x54/0x140
> rtnl_fill_ifinfo+0x46c/0xc68
> rtmsg_ifinfo_build_skb+0x7c/0xf0
> rtmsg_ifinfo.part.22+0x3c/0x70
> rtmsg_ifinfo+0x50/0x5c
> netdev_state_change+0x4c/0x58
> linkwatch_do_dev+0x50/0x88
> __linkwatch_run_queue+0x104/0x1a4
> linkwatch_event+0x30/0x3c
> process_one_work+0x140/0x3e0
> worker_thread+0x60/0x44c
> kthread+0xdc/0xf0
> ret_from_fork+0x10/0x50
>
> This is caused by the sky2 being called after it has been shutdown.
> A previous thread about this can be found here:
>
> https://lkml.org/lkml/2016/4/12/410
>
> An alternative fix is to assure that IFF_UP gets cleared by
> calling dev_close() during shutdown. This is similar to what the
> bnx2/tg3/xgene and maybe others are doing to assure that the driver
> isn't being called following _shutdown().
>
> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Since this issue has been very random and on/off recently, it's quite
hard to test this and confirm. However I did around 20 reboot/shutdown
and could not reproduce the issue after applying this patch. So,
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
--
Regards,
Sudeep
^ permalink raw reply
* Re: Netperf UDP issue with connected sockets
From: Jesper Dangaard Brouer @ 2016-11-17 14:57 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Rick Jones, netdev, brouer
In-Reply-To: <1479392258.8455.249.camel@edumazet-glaptop3.roam.corp.google.com>
On Thu, 17 Nov 2016 06:17:38 -0800
Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Thu, 2016-11-17 at 14:42 +0100, Jesper Dangaard Brouer wrote:
>
> > I can see that qdisc layer does not activate xmit_more in this case.
> >
>
> Sure. Not enough pressure from the sender(s).
>
> The bottleneck is not the NIC or qdisc in your case, meaning that BQL
> limit is kept at a small value.
>
> (BTW not all NIC have expensive doorbells)
I believe this NIC mlx5 (50G edition) does.
I'm seeing UDP TX of 1656017.55 pps, which is per packet:
2414 cycles(tsc) 603.86 ns
Perf top shows (with my own udp_flood, that avoids __ip_select_ident):
Samples: 56K of event 'cycles', Event count (approx.): 51613832267
Overhead Command Shared Object Symbol
+ 8.92% udp_flood [kernel.vmlinux] [k] _raw_spin_lock
- _raw_spin_lock
+ 90.78% __dev_queue_xmit
+ 7.83% dev_queue_xmit
+ 1.30% ___slab_alloc
+ 5.59% udp_flood [kernel.vmlinux] [k] skb_set_owner_w
+ 4.77% udp_flood [mlx5_core] [k] mlx5e_sq_xmit
+ 4.09% udp_flood [kernel.vmlinux] [k] fib_table_lookup
+ 4.00% swapper [mlx5_core] [k] mlx5e_poll_tx_cq
+ 3.11% udp_flood [kernel.vmlinux] [k] __ip_route_output_key_hash
+ 2.49% swapper [kernel.vmlinux] [k] __slab_free
In this setup the spinlock in __dev_queue_xmit should be uncongested.
An uncongested spin_lock+unlock cost 32 cycles(tsc) 8.198 ns on this system.
But 8.92% of the time is spend on it, which corresponds to a cost of 215
cycles (2414*0.0892). This cost is too high, thus something else is
going on... I claim this mysterious extra cost is the tailptr/doorbell.
--
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
Author of http://www.iptv-analyzer.org
LinkedIn: http://www.linkedin.com/in/brouer
^ permalink raw reply
* Re: Netperf UDP issue with connected sockets
From: Eric Dumazet @ 2016-11-17 14:17 UTC (permalink / raw)
To: Jesper Dangaard Brouer; +Cc: Rick Jones, netdev
In-Reply-To: <20161117144248.23500001@redhat.com>
On Thu, 2016-11-17 at 14:42 +0100, Jesper Dangaard Brouer wrote:
> I can see that qdisc layer does not activate xmit_more in this case.
>
Sure. Not enough pressure from the sender(s).
The bottleneck is not the NIC or qdisc in your case, meaning that BQL
limit is kept at a small value.
(BTW not all NIC have expensive doorbells)
^ permalink raw reply
* Re: [patch net-next 6/8] ipv4: fib: Add an API to request a FIB dump
From: Hannes Frederic Sowa @ 2016-11-17 17:20 UTC (permalink / raw)
To: David Miller
Cc: idosch, jiri, netdev, idosch, eladr, yotamg, nogahf, arkadis,
ogerlitz, roopa, dsa, nikolay, andy, vivien.didelot, andrew,
f.fainelli, alexander.h.duyck, kuznet, jmorris, yoshfuji, kaber
In-Reply-To: <20161117.114529.1851585222876647915.davem@davemloft.net>
Hi,
On 17.11.2016 17:45, David Miller wrote:
> From: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Date: Thu, 17 Nov 2016 15:36:48 +0100
>
>> The other way is the journal idea I had, which uses an rb-tree with
>> timestamps as keys (can be lamport timestamps). You insert into the tree
>> until the dump is finished and use it as queue later to shuffle stuff
>> into the hardware.
>
> If you have this "place" where pending inserts are stored, you have
> a policy decision to make.
>
> First of all what do other lookups see when there are pending entires?
I think this is a problem with the current approach already, as the
delayed work queue already postpones the insert for an undecidable
amount of time (and reorders depending on which CPU the entry was
inserted and the fib notifier was called).
For user space queries we would still query the in-kernel table.
> If, once inserted into the pending queue, you return success to the
> inserting entity, then you must make those pending entries visible
> to lookups.
I think this same problem is in this patch set already. The way I
understood it, is, that if a problem during insert emerges, the driver
calls abort and every packet will be send to user space, as the routing
table cannot be offloaded and it won't try it again, Ido?
Probably this is an artifact of the mellanox implementation and we can
implement this differently for other cards, but the schema to abort all
if the modification doesn't work, seems to be fundamental (I think we
require the all-or-nothing approach for now).
> If you block the inserting entity, well that doesn't make a lot of
> sense. If blocking is a workable solution, then we can just block the
> entire insert while this FIB dump to the device driver is happening.
I don't think we should really block (as in kernel-block) at any time.
I was suggesting something like:
rtnl_lock();
synchronize_rcu_expedited(); // barrier, all rounting modifications are
stable and no new can be added due to rtnl_lock
register notifier(); // notifier adds entries also into journal();
rtnl_unlock();
walk_fib_tree_rcu_into_journal();
// walk finished
start_syncing_journal_to_hw(); // if new entries show up we sync them
asap after this point
The journal would need a spin lock to protect its internal state and
order events correctly.
> But I am pretty sure the idea of blocking modifications for so long
> was considered undesirable.
Yes, this is also still my opinion.
Bye,
Hannes
^ permalink raw reply
* Re: [PATCH net v2 7/7] net: ethernet: ti: cpsw: fix fixed-link phy probe deferral
From: Johan Hovold @ 2016-11-17 17:19 UTC (permalink / raw)
To: David Miller
Cc: johan, mugunthanvnm, grygorii.strashko, linux-omap, netdev,
linux-kernel
In-Reply-To: <20161117.120416.1443476818715961812.davem@davemloft.net>
On Thu, Nov 17, 2016 at 12:04:16PM -0500, David Miller wrote:
> From: Johan Hovold <johan@kernel.org>
> Date: Thu, 17 Nov 2016 17:40:04 +0100
>
> > Make sure to propagate errors from of_phy_register_fixed_link() which
> > can fail with -EPROBE_DEFER.
> >
> > Fixes: 1f71e8c96fc6 ("drivers: net: cpsw: Add support for fixed-link
> > PHY")
> > Signed-off-by: Johan Hovold <johan@kernel.org>
>
> Johan, when you update a patch within a series you must post the
> entire series freshly to the lists, cover posting and all.
I'm quite sure that is exactly what I did. Did you only get this last
patch out of the seven?
Johan
^ permalink raw reply
* Re: [PATCH net-next v3 4/7] vxlan: improve vxlan route lookup checks.
From: Jiri Benc @ 2016-11-17 15:59 UTC (permalink / raw)
To: David Laight; +Cc: Pravin B Shelar, netdev@vger.kernel.org
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6DB0222355@AcuExch.aculab.com>
On Thu, 17 Nov 2016 10:17:01 +0000, David Laight wrote:
> Worse than arbitrary, it adds 4 bytes of pad on 64bit systems.
It does not, this is not a struct.
Jiri
^ permalink raw reply
* Re: Netperf UDP issue with connected sockets
From: Jesper Dangaard Brouer @ 2016-11-17 13:42 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Rick Jones, netdev, brouer
In-Reply-To: <1479388850.8455.240.camel@edumazet-glaptop3.roam.corp.google.com>
On Thu, 17 Nov 2016 05:20:50 -0800
Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Thu, 2016-11-17 at 09:16 +0100, Jesper Dangaard Brouer wrote:
>
> >
> > I noticed there is a Send-Q, and the perf-top2 is _raw_spin_lock, which
> > looks like it comes from __dev_queue_xmit(), but we know from
> > experience that this stall is actually caused by writing the
> > tailptr/doorbell in the HW. Thus, this could benefit a lot from
> > bulk/xmit_more into the qdisc layer.
>
> The Send-Q is there because of TX-completions being delayed a bit,
> because of IRQ mitigation.
>
> (ethtool -c eth0)
>
> It happens even if you do not have a qdisc in the first place.
>
> And we do have xmit_more in the qdisc layer already.
I can see that qdisc layer does not activate xmit_more in this case.
--
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
Author of http://www.iptv-analyzer.org
LinkedIn: http://www.linkedin.com/in/brouer
$ ethtool -c mlx5p4
Coalesce parameters for mlx5p4:
Adaptive RX: on TX: off
stats-block-usecs: 0
sample-interval: 0
pkt-rate-low: 0
pkt-rate-high: 0
rx-usecs: 3
rx-frames: 32
rx-usecs-irq: 0
rx-frames-irq: 0
tx-usecs: 16
tx-frames: 32
tx-usecs-irq: 0
tx-frames-irq: 0
rx-usecs-low: 0
rx-frame-low: 0
tx-usecs-low: 0
tx-frame-low: 0
rx-usecs-high: 0
rx-frame-high: 0
tx-usecs-high: 0
tx-frame-high: 0
^ permalink raw reply
* [PATCH net-next] amd-xgbe: Update connection validation for backplane mode
From: Tom Lendacky @ 2016-11-17 14:43 UTC (permalink / raw)
To: netdev; +Cc: David Miller
Update the connection type enumeration for backplane mode and return
an error when there is a mismatch between the mode and the connection
type.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 348cc8c..9d8c9530 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -164,6 +164,7 @@ enum xgbe_conn_type {
XGBE_CONN_TYPE_NONE = 0,
XGBE_CONN_TYPE_SFP,
XGBE_CONN_TYPE_MDIO,
+ XGBE_CONN_TYPE_RSVD1,
XGBE_CONN_TYPE_BACKPLANE,
XGBE_CONN_TYPE_MAX,
};
@@ -2831,6 +2832,7 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
if (xgbe_phy_conn_type_mismatch(pdata)) {
dev_err(pdata->dev, "phy mode/connection mismatch (%#x/%#x)\n",
phy_data->port_mode, phy_data->conn_type);
+ return -EINVAL;
}
/* Validate the mode requested */
^ permalink raw reply related
* Re: [patch net-next 6/8] ipv4: fib: Add an API to request a FIB dump
From: Hannes Frederic Sowa @ 2016-11-17 14:36 UTC (permalink / raw)
To: Ido Schimmel
Cc: Jiri Pirko, netdev, davem, idosch, eladr, yotamg, nogahf, arkadis,
ogerlitz, roopa, dsa, nikolay, andy, vivien.didelot, andrew,
f.fainelli, alexander.h.duyck, kuznet, jmorris, yoshfuji, kaber
In-Reply-To: <20161117131049.pevkla5os75h4zlc@splinter.mtl.com>
On 17.11.2016 14:10, Ido Schimmel wrote:
> Hi Hannes,
>
> On Wed, Nov 16, 2016 at 08:43:25PM +0100, Hannes Frederic Sowa wrote:
>> On 16.11.2016 19:51, Ido Schimmel wrote:
>>> On Wed, Nov 16, 2016 at 06:35:45PM +0100, Hannes Frederic Sowa wrote:
>>>> On 16.11.2016 16:18, Ido Schimmel wrote:
>>>>> On Wed, Nov 16, 2016 at 03:51:01PM +0100, Hannes Frederic Sowa wrote:
>>>>>> On 16.11.2016 15:09, Jiri Pirko wrote:
>>>>>>> From: Ido Schimmel <idosch@mellanox.com>
>>>>>>>
>>>>>>> Commit b90eb7549499 ("fib: introduce FIB notification infrastructure")
>>>>>>> introduced a new notification chain to notify listeners (f.e., switchdev
>>>>>>> drivers) about addition and deletion of routes.
>>>>>>>
>>>>>>> However, upon registration to the chain the FIB tables can already be
>>>>>>> populated, which means potential listeners will have an incomplete view
>>>>>>> of the tables.
>>>>>>>
>>>>>>> Solve that by adding an API to request a FIB dump. The dump itself it
>>>>>>> done using RCU in order not to starve consumers that need RTNL to make
>>>>>>> progress.
>>>>>>>
>>>>>>> Signed-off-by: Ido Schimmel <idosch@mellanox.com>
>>>>>>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>>>>>>
>>>>>> Have you looked at potential inconsistencies resulting of RCU walking
>>>>>> the table and having concurrent inserts?
>>>>>
>>>>> Yes. I did try to think about situations in which this approach will
>>>>> fail, but I could only find problems with concurrent removals, which I
>>>>> addressed in 5/8. In case of concurrent insertions, even if you missed
>>>>> the node, you would still get the ENTRY_ADD event to your listener.
>>>>
>>>> Theoretically a node could still be installed while the deletion event
>>>> fired before registering the notifier. E.g. a synchronize_net before
>>>> dumping could help here?
>>>
>>> If the deletion event fired for some fib alias, then by 5/8 we are
>>> guaranteed that it was already unlinked from the fib alias list in the
>>> leaf in which it was contained. So, while it's possible we didn't
>>> register our listener in time for the deletion event, we won't traverse
>>> this fib alias while dumping the trie anyway. Did I understand you
>>> correctly?
>>>
>>
>> Theoretically we can have the same problem for insertion:
>>
>> You receive a delete event from the notifier that is queued up first but
>> the dump will still see the entry in the fib due to being managed by RCU
>> (the notifier running on another CPU).
>>
>> The problem is that the fib_remove_alias->hlist_del_rcu->WRITE_ONCE is
>> still not strongly ordered against the local fib dump trie walk.
>
> OK, so I believe my analysis in 5/8 was wrong. Despite CPU A invoking
> fib_remove_alias() in t0 for fa1, it's possible for CPU B doing the fib
> dump to see fa1 in t1, which will lead to fa1 being permanently present
> in the listener's table.
Yep. :(
Also the delayed work queue is only partial ordered (only on one CPU),
so you don't know about when an event is processed from the dump and the
notifier. I think you need to create your own workqueue for doing so.
> Given the above, I think Dave's suggestion is the only applicable
> solution. Do you agree? Any other suggestions?
As I wrote before the problem with seqcounter is, that with a quagga
running on top and pushing updates you end up never getting a stable
view, so maybe some logic to abort in case it cannot be loaded after a
few tries would be fine.
The other way is the journal idea I had, which uses an rb-tree with
timestamps as keys (can be lamport timestamps). You insert into the tree
until the dump is finished and use it as queue later to shuffle stuff
into the hardware.
Because you should be able to hold refs to the fa_info or other structs
directly, I don't expect gigantic memory overhead, just for a cell to
store timetamp and pointer (rb_node).
Bye,
Hannes
^ permalink raw reply
* Re: [PATCH net v2] net: nsid cannot be allocated for a dead netns
From: Cong Wang @ 2016-11-17 16:50 UTC (permalink / raw)
To: Nicolas Dichtel
Cc: David Miller, Andrey Wagin, Linux Kernel Network Developers
In-Reply-To: <0fb84877-d43d-736a-c5a8-1ca954f90925@6wind.com>
On Thu, Nov 17, 2016 at 12:41 AM, Nicolas Dichtel
<nicolas.dichtel@6wind.com> wrote:
> Le 17/11/2016 à 07:32, Cong Wang a écrit :
> [snip]
>> since Nicolas' patch doesn't even compile...
> It's always surprising how agressive you are, really :(
> Can you show me the compilation error of this patch (we are talking about the v2
> patch here)?
Sorry about that, I didn't event look at your v2, because your patch
(no matter v2 or v1) is already wrong to me, the idr_for_each() before
alloc_netid() is clearly a use-after-destroy.
Based on the same reason, my patch is not your v3, we are patching
different places.
^ permalink raw reply
* Re: Netperf UDP issue with connected sockets
From: Eric Dumazet @ 2016-11-17 13:20 UTC (permalink / raw)
To: Jesper Dangaard Brouer; +Cc: Rick Jones, netdev
In-Reply-To: <20161117091638.5fab8494@redhat.com>
On Thu, 2016-11-17 at 09:16 +0100, Jesper Dangaard Brouer wrote:
>
> I noticed there is a Send-Q, and the perf-top2 is _raw_spin_lock, which
> looks like it comes from __dev_queue_xmit(), but we know from
> experience that this stall is actually caused by writing the
> tailptr/doorbell in the HW. Thus, this could benefit a lot from
> bulk/xmit_more into the qdisc layer.
The Send-Q is there because of TX-completions being delayed a bit,
because of IRQ mitigation.
(ethtool -c eth0)
It happens even if you do not have a qdisc in the first place.
And we do have xmit_more in the qdisc layer already.
^ permalink raw reply
* Re: [patch] amd-xgbe: Signedness bug in xgbe_phy_link_status()
From: Tom Lendacky @ 2016-11-17 14:40 UTC (permalink / raw)
To: Dan Carpenter; +Cc: netdev, kernel-janitors
In-Reply-To: <20161117105932.GA32143@mwanda>
On 11/17/2016 4:59 AM, Dan Carpenter wrote:
> "ret" needs to be signed for the error handling to work.
>
> Fixes: abf0a1c2b26a ("amd-xgbe: Add support for SFP+ modules")
> Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
>
Hi Dan,
This was already identified and patched:
8c5385cbb036 ("amd-xgbe: Fix up some coccinelle identified warnings")
Thanks,
Tom
> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> index 4ba4332..a2559c2 100644
> --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
> @@ -2346,7 +2346,8 @@ static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed)
> static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
> {
> struct xgbe_phy_data *phy_data = pdata->phy_data;
> - unsigned int ret, reg;
> + unsigned int reg;
> + int ret;
>
> *an_restart = 0;
>
>
^ permalink raw reply
* [PATCH net 1/3] net/mlx4_en: Remove netif_device_detach from stop port flow
From: Tariq Toukan @ 2016-11-17 15:40 UTC (permalink / raw)
To: David S. Miller
Cc: netdev, Eran Ben Elisha, Saeed Mahameed, Eugenia Emantayev,
Tariq Toukan
In-Reply-To: <1479397251-6932-1-git-send-email-tariqt@mellanox.com>
From: Eugenia Emantayev <eugenia@mellanox.com>
netif_device_detach() should be called from shutdown flow only,
in any other scenario netif_device_detach is not needed and may
result in -ENODEV error in certain cases.
In order to prevent TX timeout issue during heavy CPU load
netif_carrier_off will be called.
Fixes: 3484aac16149 ("net/mlx4_en: Fix transmit timeout when driver restarts port")
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 8 ++++----
drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 22 ++++++++--------------
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 +-
3 files changed, 13 insertions(+), 19 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index bdda17d2ea0f..3bb30f66aa07 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -932,7 +932,7 @@ static __be32 speed_set_ptys_admin(struct mlx4_en_priv *priv, u32 speed,
mutex_lock(&priv->mdev->state_lock);
if (priv->port_up) {
en_warn(priv, "Port link mode changed, restarting port...\n");
- mlx4_en_stop_port(dev, 1);
+ mlx4_en_stop_port(dev);
if (mlx4_en_start_port(dev))
en_err(priv, "Failed restarting port %d\n", priv->port);
}
@@ -1077,7 +1077,7 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
if (priv->port_up) {
port_up = 1;
- mlx4_en_stop_port(dev, 1);
+ mlx4_en_stop_port(dev);
}
mlx4_en_safe_replace_resources(priv, tmp);
@@ -1205,7 +1205,7 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index,
mutex_lock(&mdev->state_lock);
if (priv->port_up) {
port_up = 1;
- mlx4_en_stop_port(dev, 1);
+ mlx4_en_stop_port(dev);
}
if (ring_index)
@@ -1751,7 +1751,7 @@ static int mlx4_en_set_channels(struct net_device *dev,
if (priv->port_up) {
port_up = 1;
- mlx4_en_stop_port(dev, 1);
+ mlx4_en_stop_port(dev);
}
mlx4_en_safe_replace_resources(priv, tmp);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 3a47e83d3e07..01a680b66177 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1741,8 +1741,7 @@ int mlx4_en_start_port(struct net_device *dev)
napi_schedule(&priv->rx_cq[i]->napi);
netif_tx_start_all_queues(dev);
- netif_device_attach(dev);
-
+ netif_carrier_on(dev);
return 0;
tx_err:
@@ -1767,7 +1766,7 @@ int mlx4_en_start_port(struct net_device *dev)
}
-void mlx4_en_stop_port(struct net_device *dev, int detach)
+void mlx4_en_stop_port(struct net_device *dev)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_en_dev *mdev = priv->mdev;
@@ -1785,12 +1784,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
mlx4_CLOSE_PORT(mdev->dev, priv->port);
/* Synchronize with tx routine */
- netif_tx_lock_bh(dev);
- if (detach)
- netif_device_detach(dev);
- netif_tx_stop_all_queues(dev);
- netif_tx_unlock_bh(dev);
-
+ netif_carrier_off(dev);
netif_tx_disable(dev);
/* Set port as not active */
@@ -1903,7 +1897,7 @@ static void mlx4_en_restart(struct work_struct *work)
rtnl_lock();
mutex_lock(&mdev->state_lock);
if (priv->port_up) {
- mlx4_en_stop_port(dev, 1);
+ mlx4_en_stop_port(dev);
if (mlx4_en_start_port(dev))
en_err(priv, "Failed restarting port %d\n", priv->port);
}
@@ -1987,7 +1981,7 @@ static int mlx4_en_close(struct net_device *dev)
mutex_lock(&mdev->state_lock);
- mlx4_en_stop_port(dev, 0);
+ mlx4_en_stop_port(dev);
netif_carrier_off(dev);
mutex_unlock(&mdev->state_lock);
@@ -2231,7 +2225,7 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
* the port */
en_dbg(DRV, priv, "Change MTU called with card down!?\n");
} else {
- mlx4_en_stop_port(dev, 1);
+ mlx4_en_stop_port(dev);
err = mlx4_en_start_port(dev);
if (err) {
en_err(priv, "Failed restarting port:%d\n",
@@ -2687,7 +2681,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
mutex_lock(&mdev->state_lock);
if (priv->port_up) {
port_up = 1;
- mlx4_en_stop_port(dev, 1);
+ mlx4_en_stop_port(dev);
}
priv->xdp_ring_num = xdp_ring_num;
@@ -3406,7 +3400,7 @@ int mlx4_en_reset_config(struct net_device *dev,
if (priv->port_up) {
port_up = 1;
- mlx4_en_stop_port(dev, 1);
+ mlx4_en_stop_port(dev);
}
en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n",
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index a3528dd1e72e..fb17acdfe528 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -659,7 +659,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
struct mlx4_en_port_profile *prof);
int mlx4_en_start_port(struct net_device *dev);
-void mlx4_en_stop_port(struct net_device *dev, int detach);
+void mlx4_en_stop_port(struct net_device *dev);
void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
struct mlx4_en_stats_bitmap *stats_bitmap,
--
1.8.3.1
^ permalink raw reply related
* [PATCH net 2/3] net: Check netdevice presence on dev_get_phys_port_id
From: Tariq Toukan @ 2016-11-17 15:40 UTC (permalink / raw)
To: David S. Miller
Cc: netdev, Eran Ben Elisha, Saeed Mahameed, Eugenia Emantayev,
Tariq Toukan, Jiri Pirko
In-Reply-To: <1479397251-6932-1-git-send-email-tariqt@mellanox.com>
From: Eugenia Emantayev <eugenia@mellanox.com>
Check presence of network device before calling device driver
ndo_get_phys_port_id callback. Otherwise callback may access
non-existing data structures and cause kernel panic.
Fixes: 66b52b0dc82c ("net: add ndo to get id of physical port of the device")
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reported-by: Steve Wise <swise@opengridcomputing.com>
Cc: Jiri Pirko <jiri@mellanox.com>
---
net/core/dev.c | 2 ++
net/core/rtnetlink.c | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index 6666b28b6815..7de0d000a9f8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6602,6 +6602,8 @@ int dev_get_phys_port_id(struct net_device *dev,
if (!ops->ndo_get_phys_port_id)
return -EOPNOTSUPP;
+ if (!netif_device_present(dev))
+ return -ENODEV;
return ops->ndo_get_phys_port_id(dev, ppid);
}
EXPORT_SYMBOL(dev_get_phys_port_id);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a6529c55ffb7..b3dd81f82e70 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1036,7 +1036,7 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
err = dev_get_phys_port_id(dev, &ppid);
if (err) {
- if (err == -EOPNOTSUPP)
+ if (err == -EOPNOTSUPP || err == -ENODEV)
return 0;
return err;
}
--
1.8.3.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox