Netdev List
 help / color / mirror / Atom feed
* [PATCH ethtool v5 0/2]  Adding downshift support to ethtool
From: Allan W. Nielsen @ 2016-11-24  8:56 UTC (permalink / raw)
  To: netdev; +Cc: linville, andrew, f.fainelli, raju.lakkaraju, allan.nielsen

(downshift feature is applied in the net-next tree - d3c19c0a72)

This series adds support for downshift (using phy-tunables).

Downshifting can either be turned on/off, or it can be configured to a
specifc count. "count" is optional.

Change set:
v1:
- Initial version of set/get phy tunable with downshift feature.
v2:
- (ethtool) Syntax is changed from "--set-phy-tunable downshift on|off|%d"
  to "--set-phy-tunable [downshift on|off [count N]]" - as requested by
  Andrew.
v3:
- Fixed Spelling in "ethtool-copy.h:sync with net" 
- Fixed "if send_ioctl() returns an error, print the error message and then
  still print th value of count".
v4:
- Fixing spelling in the example included in the commit message
- Improve the description in the man-page
v5:
- re-sync ethtool.h from the net-next tree.


Allan W. Nielsen (1):
  ethtool-copy.h:sync with net-next

Raju Lakkaraju (1):
  Ethtool: Implements ETHTOOL_PHY_GTUNABLE/ETHTOOL_PHY_STUNABLE and PHY
    downshift

 ethtool-copy.h |  21 +++++++--
 ethtool.8.in   |  40 ++++++++++++++++
 ethtool.c      | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 202 insertions(+), 3 deletions(-)

-- 
2.7.3

^ permalink raw reply

* [PATCH ethtool v5 1/2] ethtool-copy.h:sync with net-next
From: Allan W. Nielsen @ 2016-11-24  8:56 UTC (permalink / raw)
  To: netdev; +Cc: linville, andrew, f.fainelli, raju.lakkaraju, allan.nielsen
In-Reply-To: <1479977811-5603-1-git-send-email-allan.nielsen@microsemi.com>

This covers kernel changes upto:

commit 607c7029146790201e90b58c4235ddff0304d6e0
Author: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Date:   Thu Nov 17 13:07:22 2016 +0100

    ethtool: (uapi) Add ETHTOOL_PHY_DOWNSHIFT to PHY tunables

    For operation in cabling environments that are incompatible with
    1000BASE-T, PHY device may provide an automatic link speed downshift
    operation. When enabled, the device automatically changes its 1000BASE-T
    auto-negotiation to the next slower speed after a configured number of
    failed attempts at 1000BASE-T.  This feature is useful in setting up in
    networks using older cable installations that include only pairs A and B,
    and not pairs C and D.

    Signed-off-by: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
    Signed-off-by: Allan W. Nielsen <allan.nielsen@microsemi.com>
    Reviewed-by: Andrew Lunn <andrew@lunn.ch>
    Signed-off-by: David S. Miller <davem@davemloft.net>

Signed-off-by: Allan W. Nielsen <allan.nielsen@microsemi.com>
---
 ethtool-copy.h | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/ethtool-copy.h b/ethtool-copy.h
index 70748f5..3d299e3 100644
--- a/ethtool-copy.h
+++ b/ethtool-copy.h
@@ -117,8 +117,7 @@ struct ethtool_cmd {
 static __inline__ void ethtool_cmd_speed_set(struct ethtool_cmd *ep,
 					 __u32 speed)
 {
-
-	ep->speed = (__u16)speed;
+	ep->speed = (__u16)(speed & 0xFFFF);
 	ep->speed_hi = (__u16)(speed >> 16);
 }
 
@@ -247,6 +246,19 @@ struct ethtool_tunable {
 	void	*data[0];
 };
 
+#define DOWNSHIFT_DEV_DEFAULT_COUNT	0xff
+#define DOWNSHIFT_DEV_DISABLE		0
+
+enum phy_tunable_id {
+	ETHTOOL_PHY_ID_UNSPEC,
+	ETHTOOL_PHY_DOWNSHIFT,
+	/*
+	 * Add your fresh new phy tunable attribute above and remember to update
+	 * phy_tunable_strings[] in net/core/ethtool.c
+	 */
+	__ETHTOOL_PHY_TUNABLE_COUNT,
+};
+
 /**
  * struct ethtool_regs - hardware register dump
  * @cmd: Command number = %ETHTOOL_GREGS
@@ -547,6 +559,7 @@ struct ethtool_pauseparam {
  * @ETH_SS_FEATURES: Device feature names
  * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names
  * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS
+ * @ETH_SS_PHY_TUNABLES: PHY tunable names
  */
 enum ethtool_stringset {
 	ETH_SS_TEST		= 0,
@@ -557,6 +570,7 @@ enum ethtool_stringset {
 	ETH_SS_RSS_HASH_FUNCS,
 	ETH_SS_TUNABLES,
 	ETH_SS_PHY_STATS,
+	ETH_SS_PHY_TUNABLES,
 };
 
 /**
@@ -1312,7 +1326,8 @@ struct ethtool_per_queue_op {
 
 #define ETHTOOL_GLINKSETTINGS	0x0000004c /* Get ethtool_link_settings */
 #define ETHTOOL_SLINKSETTINGS	0x0000004d /* Set ethtool_link_settings */
-
+#define ETHTOOL_PHY_GTUNABLE	0x0000004e /* Get PHY tunable configuration */
+#define ETHTOOL_PHY_STUNABLE	0x0000004f /* Set PHY tunable configuration */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
-- 
2.7.3

^ permalink raw reply related

* [PATCH ethtool v5 2/2] Ethtool: Implements ETHTOOL_PHY_GTUNABLE/ETHTOOL_PHY_STUNABLE and PHY downshift
From: Allan W. Nielsen @ 2016-11-24  8:56 UTC (permalink / raw)
  To: netdev
  Cc: linville, andrew, f.fainelli, raju.lakkaraju, allan.nielsen,
	Raju Lakkaraju
In-Reply-To: <1479977811-5603-1-git-send-email-allan.nielsen@microsemi.com>

From: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>

Add ethtool get and set tunable to access PHY drivers.

Ethtool Help: ethtool -h for PHY tunables
    ethtool --set-phy-tunable DEVNAME      Set PHY tunable
                [ downshift on|off [count N] ]
    ethtool --get-phy-tunable DEVNAME      Get PHY tunable
                [ downshift ]

Ethtool ex:
  ethtool --set-phy-tunable eth0 downshift on
  ethtool --set-phy-tunable eth0 downshift off
  ethtool --set-phy-tunable eth0 downshift on count 2

  ethtool --get-phy-tunable eth0 downshift

Signed-off-by: Raju Lakkaraju <Raju.Lakkaraju@microsemi.com>
Signed-off-by: Allan W. Nielsen <allan.nielsen@microsemi.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
---
 ethtool.8.in |  40 +++++++++++++++++
 ethtool.c    | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 184 insertions(+)

diff --git a/ethtool.8.in b/ethtool.8.in
index 9631847..5c36c06 100644
--- a/ethtool.8.in
+++ b/ethtool.8.in
@@ -340,6 +340,18 @@ ethtool \- query or control network driver and hardware settings
 .B2 tx-lpi on off
 .BN tx-timer
 .BN advertise
+.HP
+.B ethtool \-\-set\-phy\-tunable
+.I devname
+.RB [
+.B downshift
+.A1 on off
+.BN count
+.RB ]
+.HP
+.B ethtool \-\-get\-phy\-tunable
+.I devname
+.RB [ downshift ]
 .
 .\" Adjust lines (i.e. full justification) and hyphenate.
 .ad
@@ -947,6 +959,34 @@ Values are as for
 Sets the amount of time the device should stay in idle mode prior to asserting
 its Tx LPI (in microseconds). This has meaning only when Tx LPI is enabled.
 .RE
+.TP
+.B \-\-set\-phy\-tunable
+Sets the PHY tunable parameters.
+.RS 4
+.TP
+.A2 downshift on off
+Specifies whether downshift should be enabled
+.TS
+nokeep;
+lB	l.
+.BI count \ N
+Sets the PHY downshift re-tries count.
+.TE
+.PD
+.RE
+.TP
+.B \-\-get\-phy\-tunable
+Gets the PHY tunable parameters.
+.RS 4
+.TP
+.B downshift
+For operation in cabling environments that are incompatible with 1000BASE-T,
+PHY device provides an automatic link speed downshift operation.
+Link speed downshift after N failed 1000BASE-T auto-negotiation attempts.
+Downshift is useful where cable does not have the 4 pairs instance.
+
+Gets the PHY downshift count/status.
+.RE
 .SH BUGS
 Not supported (in part or whole) on all network drivers.
 .SH AUTHOR
diff --git a/ethtool.c b/ethtool.c
index 49ac94e..7dcd005 100644
--- a/ethtool.c
+++ b/ethtool.c
@@ -4520,6 +4520,146 @@ static int do_seee(struct cmd_context *ctx)
 	return 0;
 }
 
+static int do_get_phy_tunable(struct cmd_context *ctx)
+{
+	int argc = ctx->argc;
+	char **argp = ctx->argp;
+	int err, i;
+	u8 downshift_changed = 0;
+
+	if (argc < 1)
+		exit_bad_args();
+	for (i = 0; i < argc; i++) {
+		if (!strcmp(argp[i], "downshift")) {
+			downshift_changed = 1;
+			i += 1;
+			if (i < argc)
+				exit_bad_args();
+		} else  {
+			exit_bad_args();
+		}
+	}
+
+	if (downshift_changed) {
+		struct ethtool_tunable ds;
+		u8 count = 0;
+
+		ds.cmd = ETHTOOL_PHY_GTUNABLE;
+		ds.id = ETHTOOL_PHY_DOWNSHIFT;
+		ds.type_id = ETHTOOL_TUNABLE_U8;
+		ds.len = 1;
+		ds.data[0] = &count;
+		err = send_ioctl(ctx, &ds);
+		if (err < 0) {
+			perror("Cannot Get PHY downshift count");
+			return 87;
+		}
+		count = *((u8 *)&ds.data[0]);
+		if (count)
+			fprintf(stdout, "Downshift count: %d\n", count);
+		else
+			fprintf(stdout, "Downshift disabled\n");
+	}
+
+	return err;
+}
+
+static int parse_named_bool(struct cmd_context *ctx, const char *name, u8 *on)
+{
+	if (ctx->argc < 2)
+		return 0;
+
+	if (strcmp(*ctx->argp, name))
+		return 0;
+
+	if (!strcmp(*(ctx->argp + 1), "on")) {
+		*on = 1;
+	} else if (!strcmp(*(ctx->argp + 1), "off")) {
+		*on = 0;
+	} else {
+		fprintf(stderr, "Invalid boolean\n");
+		exit_bad_args();
+	}
+
+	ctx->argc -= 2;
+	ctx->argp += 2;
+
+	return 1;
+}
+
+static int parse_named_u8(struct cmd_context *ctx, const char *name, u8 *val)
+{
+	if (ctx->argc < 2)
+		return 0;
+
+	if (strcmp(*ctx->argp, name))
+		return 0;
+
+	*val = get_uint_range(*(ctx->argp + 1), 0, 0xff);
+
+	ctx->argc -= 2;
+	ctx->argp += 2;
+
+	return 1;
+}
+
+static int do_set_phy_tunable(struct cmd_context *ctx)
+{
+	int err = 0;
+	u8 ds_cnt = DOWNSHIFT_DEV_DEFAULT_COUNT;
+	u8 ds_changed = 0, ds_has_cnt = 0, ds_enable = 0;
+
+	if (ctx->argc == 0)
+		exit_bad_args();
+
+	/* Parse arguments */
+	while (ctx->argc) {
+		if (parse_named_bool(ctx, "downshift", &ds_enable)) {
+			ds_changed = 1;
+			ds_has_cnt = parse_named_u8(ctx, "count", &ds_cnt);
+		} else {
+			exit_bad_args();
+		}
+	}
+
+	/* Validate parameters */
+	if (ds_changed) {
+		if (!ds_enable && ds_has_cnt) {
+			fprintf(stderr, "'count' may not be set when downshift "
+				        "is off.\n");
+			exit_bad_args();
+		}
+
+		if (ds_enable && ds_has_cnt && ds_cnt == 0) {
+			fprintf(stderr, "'count' may not be zero.\n");
+			exit_bad_args();
+		}
+
+		if (!ds_enable)
+			ds_cnt = DOWNSHIFT_DEV_DISABLE;
+	}
+
+	/* Do it */
+	if (ds_changed) {
+		struct ethtool_tunable ds;
+		u8 count;
+
+		ds.cmd = ETHTOOL_PHY_STUNABLE;
+		ds.id = ETHTOOL_PHY_DOWNSHIFT;
+		ds.type_id = ETHTOOL_TUNABLE_U8;
+		ds.len = 1;
+		ds.data[0] = &count;
+		*((u8 *)&ds.data[0]) = ds_cnt;
+		err = send_ioctl(ctx, &ds);
+		if (err < 0) {
+			perror("Cannot Set PHY downshift count");
+			err = 87;
+		}
+	}
+
+	return err;
+}
+
 #ifndef TEST_ETHTOOL
 int send_ioctl(struct cmd_context *ctx, void *cmd)
 {
@@ -4681,6 +4821,10 @@ static const struct option {
 	  "		[ advertise %x ]\n"
 	  "		[ tx-lpi on|off ]\n"
 	  "		[ tx-timer %d ]\n"},
+	{ "--set-phy-tunable", 1, do_set_phy_tunable, "Set PHY tunable",
+	  "		[ downshift on|off [count N] ]\n"},
+	{ "--get-phy-tunable", 1, do_get_phy_tunable, "Get PHY tunable",
+	  "		[ downshift ]\n"},
 	{ "-h|--help", 0, show_usage, "Show this help" },
 	{ "--version", 0, do_version, "Show version number" },
 	{}
-- 
2.7.3

^ permalink raw reply related

* Re: [PATCH net-next 1/4] net: mvneta: Convert to be 64 bits compatible
From: Arnd Bergmann @ 2016-11-24  9:00 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: Jisheng Zhang, Marcin Wojtas, Gregory CLEMENT, Thomas Petazzoni,
	Andrew Lunn, Jason Cooper, netdev, linux-kernel, David S. Miller,
	Sebastian Hesselbarth
In-Reply-To: <20161124163327.1cc261ab@xhacker>

On Thursday, November 24, 2016 4:37:36 PM CET Jisheng Zhang wrote:
> solB (a SW shadow cookie) perhaps gives a better performance: in hot path,
> such as mvneta_rx(), the driver accesses buf_cookie and buf_phys_addr of
> rx_desc which is allocated by dma_alloc_coherent, it's noncacheable if the
> device isn't cache-coherent. I didn't measure the performance difference,
> because in fact we take solA as well internally. From your experience,
> can the performance gain deserve the complex code?

Yes, a read from uncached memory is fairly slow, so if you have a chance
to avoid that it will probably help. When adding complexity to the code,
it probably makes sense to take a runtime profile anyway quantify how
much it gains.

On machines that have cache-coherent DMA, accessing the descriptor
should be fine, as you already have to load the entire cache line
to read the status field.

Looking at this snippet:

                rx_status = rx_desc->status;
                rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
                data = (unsigned char *)rx_desc->buf_cookie;
                phys_addr = rx_desc->buf_phys_addr;
                pool_id = MVNETA_RX_GET_BM_POOL_ID(rx_desc);
                bm_pool = &pp->bm_priv->bm_pools[pool_id];

                if (!mvneta_rxq_desc_is_first_last(rx_status) ||
                    (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
err_drop_frame_ret_pool:
                        /* Return the buffer to the pool */
                        mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool,
                                              rx_desc->buf_phys_addr);
err_drop_frame:


I think there is more room for optimizing if you start: you read
the status field twice (the second one in MVNETA_RX_GET_BM_POOL_ID)
and you can cache the buf_phys_addr along with the virtual address
once you add that.

Generally speaking, I'd recommend using READ_ONCE()/WRITE_ONCE()
to access the descriptor fields, to ensure the compiler doesn't
add extra references as well as to annotate the expensive
operations.

	Arnd

^ permalink raw reply

* (unknown), 
From: Llorente Santos Jesus @ 2016-11-24  8:54 UTC (permalink / raw)
  To: netdev@vger.kernel.org

unsubscribe

^ permalink raw reply

* Re: net/arp: ARP cache aging failed.
From: YueHaibing @ 2016-11-24  9:06 UTC (permalink / raw)
  To: Julian Anastasov, Eric Dumazet; +Cc: Hannes Frederic Sowa, davem, netdev
In-Reply-To: <alpine.LFD.2.11.1611240931560.1685@ja.home.ssi.bg>

On 2016/11/24 15:51, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Wed, 23 Nov 2016, Eric Dumazet wrote:
> 
>> On Wed, 2016-11-23 at 15:37 +0100, Hannes Frederic Sowa wrote:
>>
>>> Irregardless about the question if bonding should keep the MAC address
>>> alive, a MAC address can certainly change below a TCP connection.
>>
>> Of course ;)
>>

I configured bonding fail_over_mac=1 ,so the bonding MAC always be the MAC
address of the currently active slave.

>>>
>>> dst_entry is 1:n to neigh_entry and as such we can end up confirming an
>>> aging neighbor while sending a reply with dst->pending_confirm set while
>>> the confirming packet actually came from a different neighbor.
>>>
>>> I agree with Julian, pending_confirm became useless in this way.
>>
>> Let's kill it then ;)
> 
> 	It works for traffic via gateway. I now see that
> we can even avoid write in dst_confirm:
> 
> 	if (!dst->pending_confirm)
> 		dst->pending_confirm = 1;
> 
> 	because it is called by non-dup TCP ACKs.
> 
> 	But for traffic to hosts on LAN we need different solution,
> i.e. for cached dsts with rt_gateway = 0 (last entry below).
> 
> rt_uses_gateway rt_gateway DST_NOCACHE Description
> ====================================================================
> 1               nh_gw      ANY         Traffic via gateway
> 0               LAN_host   1           FLOWI_FLAG_KNOWN_NH (nexthop
>                                        set by IPVS, hdrincl, xt_TEE)
> 0               0          0           1 dst for many subnet hosts
> 
> Regards
> 
> --
> Julian Anastasov <ja@ssi.bg>
> 
> .
> 

As above,Is there a plan to fix the problem ? Should we just not call dst_confirm
when in the case rt->rt_uses_gateway/DST_NOCACHE?

^ permalink raw reply

* Re: [PATCH 12/20] net/iucv: Convert to hotplug state machine
From: Sebastian Andrzej Siewior @ 2016-11-24  9:10 UTC (permalink / raw)
  To: Ursula Braun; +Cc: linux-kernel, rt, David S. Miller, linux-s390, netdev
In-Reply-To: <62f8abea-093c-cac5-ffb1-7b8710aa0a91@linux.vnet.ibm.com>

On 2016-11-23 19:04:16 [+0100], Ursula Braun wrote:
> Sebastian,
Hallo Ursula,

> your patch looks good to me. I run successfully some small tests with it.
> I want to suggest a small change in iucv_init() to keep the uniform technique
> of undo labels below. Do you agree?

So what you ask for is:

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index f0d6afc5d4a9..8f7ef167c45a 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -2038,16 +2038,16 @@ static int __init iucv_init(void)
 	rc = cpuhp_setup_state(CPUHP_NET_IUCV_PREPARE, "net/iucv:prepare",
 			       iucv_cpu_prepare, iucv_cpu_dead);
 	if (rc)
-		goto out_free;
+		goto out_dev;
 	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "net/iucv:online",
 			       iucv_cpu_online, iucv_cpu_down_prep);
 	if (rc < 0)
-		goto out_free;
+		goto out_prep;
 	iucv_online = rc;
 
 	rc = register_reboot_notifier(&iucv_reboot_notifier);
 	if (rc)
-		goto out_free;
+		goto out_remove_hp;
 	ASCEBC(iucv_error_no_listener, 16);
 	ASCEBC(iucv_error_no_memory, 16);
 	ASCEBC(iucv_error_pathid, 16);
@@ -2061,11 +2061,11 @@ static int __init iucv_init(void)
 
 out_reboot:
 	unregister_reboot_notifier(&iucv_reboot_notifier);
-out_free:
-	if (iucv_online)
-		cpuhp_remove_state(iucv_online);
+out_remove_hp:
+	cpuhp_remove_state(iucv_online);
+out_prep:
 	cpuhp_remove_state(CPUHP_NET_IUCV_PREPARE);
-
+out_dev:
 	root_device_unregister(iucv_root);
 out_int:
 	unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);

This is your change including the removal of the `if' check in the
`out_remove' label which got renamed to `out_remove_hp'.
Of course, I agree with this change and a proper patch will follow in a
few hours if nobody objects.

> Kind regards, Ursula

Sebastian

^ permalink raw reply related

* Re: [PATCH net-next 1/4] net: mvneta: Convert to be 64 bits compatible
From: Jisheng Zhang @ 2016-11-24  9:11 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: linux-arm-kernel, Marcin Wojtas, Gregory CLEMENT,
	Thomas Petazzoni, Andrew Lunn, Jason Cooper, netdev, linux-kernel,
	David S. Miller, Sebastian Hesselbarth
In-Reply-To: <21520380.oWTKcrq8DS@wuerfel>

On Thu, 24 Nov 2016 10:00:36 +0100
Arnd Bergmann <arnd@arndb.de> wrote:

> On Thursday, November 24, 2016 4:37:36 PM CET Jisheng Zhang wrote:
> > solB (a SW shadow cookie) perhaps gives a better performance: in hot path,
> > such as mvneta_rx(), the driver accesses buf_cookie and buf_phys_addr of
> > rx_desc which is allocated by dma_alloc_coherent, it's noncacheable if the
> > device isn't cache-coherent. I didn't measure the performance difference,
> > because in fact we take solA as well internally. From your experience,
> > can the performance gain deserve the complex code?  
> 
> Yes, a read from uncached memory is fairly slow, so if you have a chance
> to avoid that it will probably help. When adding complexity to the code,
> it probably makes sense to take a runtime profile anyway quantify how
> much it gains.
> 
> On machines that have cache-coherent DMA, accessing the descriptor
> should be fine, as you already have to load the entire cache line
> to read the status field.
> 
> Looking at this snippet:
> 
>                 rx_status = rx_desc->status;
>                 rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
>                 data = (unsigned char *)rx_desc->buf_cookie;
>                 phys_addr = rx_desc->buf_phys_addr;
>                 pool_id = MVNETA_RX_GET_BM_POOL_ID(rx_desc);
>                 bm_pool = &pp->bm_priv->bm_pools[pool_id];
> 
>                 if (!mvneta_rxq_desc_is_first_last(rx_status) ||
>                     (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
> err_drop_frame_ret_pool:
>                         /* Return the buffer to the pool */
>                         mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool,
>                                               rx_desc->buf_phys_addr);
> err_drop_frame:
> 
> 
> I think there is more room for optimizing if you start: you read
> the status field twice (the second one in MVNETA_RX_GET_BM_POOL_ID)
> and you can cache the buf_phys_addr along with the virtual address
> once you add that.

oh, yeah! buf_phy_addr could be included too.

> 
> Generally speaking, I'd recommend using READ_ONCE()/WRITE_ONCE()
> to access the descriptor fields, to ensure the compiler doesn't
> add extra references as well as to annotate the expensive
> operations.
> 
> 	Arnd

Got it. Thanks so much for the detailed guide. 

^ permalink raw reply

* [PATCH v3] cpsw: ethtool: add support for getting/setting EEE registers
From: yegorslists @ 2016-11-24  9:17 UTC (permalink / raw)
  To: netdev
  Cc: linux-omap, grygorii.strashko, mugunthanvnm, roszenrami,
	f.fainelli, Yegor Yefremov

From: Yegor Yefremov <yegorslists@googlemail.com>

Add the ability to query and set Energy Efficient Ethernet parameters
via ethtool for applicable devices.

This patch doesn't activate full EEE support in cpsw driver, but it
enables reading and writing EEE advertising settings. This way one
can disable advertising EEE for certain speeds.

Signed-off-by: Yegor Yefremov <yegorslists@googlemail.com>
Acked-by: Rami Rosen <roszenrami@gmail.com>
---
Changes:
	v3: explain what features will be available with this patch (Florian Fainelli)
	v2: make routines static (Rami Rosen)

 drivers/net/ethernet/ti/cpsw.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index c6cff3d..c706540 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2239,6 +2239,30 @@ static int cpsw_set_channels(struct net_device *ndev,
 	return ret;
 }
 
+static int cpsw_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
+
+	if (cpsw->slaves[slave_no].phy)
+		return phy_ethtool_get_eee(cpsw->slaves[slave_no].phy, edata);
+	else
+		return -EOPNOTSUPP;
+}
+
+static int cpsw_set_eee(struct net_device *ndev, struct ethtool_eee *edata)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
+
+	if (cpsw->slaves[slave_no].phy)
+		return phy_ethtool_set_eee(cpsw->slaves[slave_no].phy, edata);
+	else
+		return -EOPNOTSUPP;
+}
+
 static const struct ethtool_ops cpsw_ethtool_ops = {
 	.get_drvinfo	= cpsw_get_drvinfo,
 	.get_msglevel	= cpsw_get_msglevel,
@@ -2262,6 +2286,8 @@ static const struct ethtool_ops cpsw_ethtool_ops = {
 	.complete	= cpsw_ethtool_op_complete,
 	.get_channels	= cpsw_get_channels,
 	.set_channels	= cpsw_set_channels,
+	.get_eee	= cpsw_get_eee,
+	.set_eee	= cpsw_set_eee,
 };
 
 static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_common *cpsw,
-- 
2.1.4

^ permalink raw reply related

* [patch added to 3.12-stable] net: sctp, forbid negative length
From: Jiri Slaby @ 2016-11-24  9:17 UTC (permalink / raw)
  To: stable
  Cc: Jiri Slaby, Vlad Yasevich, Neil Horman, David S. Miller,
	linux-sctp, netdev
In-Reply-To: <20161124091800.14160-1-jslaby@suse.cz>

This patch has been added to the 3.12 stable tree. If you have any
objections, please let us know.

===============

[ Upstream commit a4b8e71b05c27bae6bad3bdecddbc6b68a3ad8cf ]

Most of getsockopt handlers in net/sctp/socket.c check len against
sizeof some structure like:
        if (len < sizeof(int))
                return -EINVAL;

On the first look, the check seems to be correct. But since len is int
and sizeof returns size_t, int gets promoted to unsigned size_t too. So
the test returns false for negative lengths. Yes, (-1 < sizeof(long)) is
false.

Fix this in sctp by explicitly checking len < 0 before any getsockopt
handler is called.

Note that sctp_getsockopt_events already handled the negative case.
Since we added the < 0 check elsewhere, this one can be removed.

If not checked, this is the result:
UBSAN: Undefined behaviour in ../mm/page_alloc.c:2722:19
shift exponent 52 is too large for 32-bit type 'int'
CPU: 1 PID: 24535 Comm: syz-executor Not tainted 4.8.1-0-syzkaller #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
 0000000000000000 ffff88006d99f2a8 ffffffffb2f7bdea 0000000041b58ab3
 ffffffffb4363c14 ffffffffb2f7bcde ffff88006d99f2d0 ffff88006d99f270
 0000000000000000 0000000000000000 0000000000000034 ffffffffb5096422
Call Trace:
 [<ffffffffb3051498>] ? __ubsan_handle_shift_out_of_bounds+0x29c/0x300
...
 [<ffffffffb273f0e4>] ? kmalloc_order+0x24/0x90
 [<ffffffffb27416a4>] ? kmalloc_order_trace+0x24/0x220
 [<ffffffffb2819a30>] ? __kmalloc+0x330/0x540
 [<ffffffffc18c25f4>] ? sctp_getsockopt_local_addrs+0x174/0xca0 [sctp]
 [<ffffffffc18d2bcd>] ? sctp_getsockopt+0x10d/0x1b0 [sctp]
 [<ffffffffb37c1219>] ? sock_common_getsockopt+0xb9/0x150
 [<ffffffffb37be2f5>] ? SyS_getsockopt+0x1a5/0x270

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: linux-sctp@vger.kernel.org
Cc: netdev@vger.kernel.org
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 net/sctp/socket.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ead3a8adca08..98cd6606f4a4 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4247,7 +4247,7 @@ static int sctp_getsockopt_disable_fragments(struct sock *sk, int len,
 static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval,
 				  int __user *optlen)
 {
-	if (len <= 0)
+	if (len == 0)
 		return -EINVAL;
 	if (len > sizeof(struct sctp_event_subscribe))
 		len = sizeof(struct sctp_event_subscribe);
@@ -5758,6 +5758,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 	if (get_user(len, optlen))
 		return -EFAULT;
 
+	if (len < 0)
+		return -EINVAL;
+
 	sctp_lock_sock(sk);
 
 	switch (optname) {
-- 
2.10.2

^ permalink raw reply related

* [PATCH 0/4] net: thunderx: Support for 80xx, RED, PFC e.t.c
From: sunil.kovvuri @ 2016-11-24  9:17 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, linux-arm-kernel, Sunil Goutham

From: Sunil Goutham <sgoutham@cavium.com>

This patch series adds support for SLM modules present on 80xx
silicon, enables ramdom early discard, backpressure generation,
PFC and some ethtool changes to display supported link modes e.t.c.

Sunil Goutham (3):
  net: thunderx: 80xx BGX0 configuration changes
  net: thunderx: Configure RED and backpressure levels
  net: thunderx: Pause frame support

Thanneeru Srinivasulu (1):
  net: thunderx: Add ethtool support for supported ports and link modes.

 drivers/net/ethernet/cavium/thunder/nic.h          | 19 +++++
 drivers/net/ethernet/cavium/thunder/nic_main.c     | 37 +++++++++
 .../net/ethernet/cavium/thunder/nicvf_ethtool.c    | 87 +++++++++++++++++++++-
 drivers/net/ethernet/cavium/thunder/nicvf_main.c   |  7 ++
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c |  9 ++-
 drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 24 ++++--
 drivers/net/ethernet/cavium/thunder/q_struct.h     |  8 +-
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c  | 74 +++++++++++++++++-
 drivers/net/ethernet/cavium/thunder/thunder_bgx.h  | 12 +++
 9 files changed, 262 insertions(+), 15 deletions(-)

-- 
2.7.4

^ permalink raw reply

* [PATCH 1/4] net: thunderx: 80xx BGX0 configuration changes
From: sunil.kovvuri @ 2016-11-24  9:18 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, linux-arm-kernel, Sunil Goutham
In-Reply-To: <1479979083-15963-1-git-send-email-sunil.kovvuri@gmail.com>

From: Sunil Goutham <sgoutham@cavium.com>

On 80xx only one lane of DLM0 and DLM1 (of BGX0) can be used
, so even though lmac count may be 2 but LMAC1 should use
serdes lane of DLM1. Since it's not possible to distinguish
80xx from 81xx as PCI devid are same, this patch adds this
config support by replying on what firmware configures the
lmacs with.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
---
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 050e21f..1d6214b 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -970,11 +970,25 @@ static void bgx_set_lmac_config(struct bgx *bgx, u8 idx)
 		lmac_set_training(bgx, lmac, lmac->lmacid);
 		lmac_set_lane2sds(bgx, lmac);
 
-		/* Set LMAC type of other lmac on same DLM i.e LMAC 1/3 */
 		olmac = &bgx->lmac[idx + 1];
-		olmac->lmac_type = lmac->lmac_type;
+		/*  Check if other LMAC on the same DLM is already configured by
+		 *  firmware, if so use the same config or else set as same, as
+		 *  that of LMAC 0/2.
+		 *  This check is needed as on 80xx only one lane of each of the
+		 *  DLM of BGX0 is used, so have to rely on firmware for
+		 *  distingushing 80xx from 81xx.
+		 */
+		cmr_cfg = bgx_reg_read(bgx, idx + 1, BGX_CMRX_CFG);
+		lmac_type = (u8)((cmr_cfg >> 8) & 0x07);
+		lane_to_sds = (u8)(cmr_cfg & 0xFF);
+		if ((lmac_type == 0) && (lane_to_sds == 0xE4)) {
+			olmac->lmac_type = lmac->lmac_type;
+			lmac_set_lane2sds(bgx, olmac);
+		} else {
+			olmac->lmac_type = lmac_type;
+			olmac->lane_to_sds = lane_to_sds;
+		}
 		lmac_set_training(bgx, olmac, olmac->lmacid);
-		lmac_set_lane2sds(bgx, olmac);
 	}
 }
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH 2/4] net: thunderx: Add ethtool support for supported ports and link modes.
From: sunil.kovvuri @ 2016-11-24  9:18 UTC (permalink / raw)
  To: netdev; +Cc: Sunil Goutham, Thanneeru Srinivasulu, linux-kernel,
	linux-arm-kernel
In-Reply-To: <1479979083-15963-1-git-send-email-sunil.kovvuri@gmail.com>

From: Thanneeru Srinivasulu <tsrinivasulu@cavium.com>

Signed-off-by: Thanneeru Srinivasulu <tsrinivasulu@cavium.com>
Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
---
 drivers/net/ethernet/cavium/thunder/nic.h          |  2 ++
 drivers/net/ethernet/cavium/thunder/nic_main.c     |  1 +
 .../net/ethernet/cavium/thunder/nicvf_ethtool.c    | 36 ++++++++++++++++++++--
 drivers/net/ethernet/cavium/thunder/nicvf_main.c   |  1 +
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c  |  1 +
 5 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 86bd93c..be8404a 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -292,6 +292,7 @@ struct nicvf {
 	u8			node;
 	u8			cpi_alg;
 	bool			link_up;
+	u8			mac_type;
 	u8			duplex;
 	u32			speed;
 	bool			tns_mode;
@@ -446,6 +447,7 @@ struct bgx_stats_msg {
 /* Physical interface link status */
 struct bgx_link_status {
 	u8    msg;
+	u8    mac_type;
 	u8    link_up;
 	u8    duplex;
 	u32   speed;
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 6677b96..b87d416 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -1258,6 +1258,7 @@ static void nic_poll_for_link(struct work_struct *work)
 			mbx.link_status.link_up = link.link_up;
 			mbx.link_status.duplex = link.duplex;
 			mbx.link_status.speed = link.speed;
+			mbx.link_status.mac_type = link.mac_type;
 			nic_send_msg_to_vf(nic, vf, &mbx);
 		}
 	}
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index 432bf6b..d4d76a7 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -130,12 +130,42 @@ static int nicvf_get_settings(struct net_device *netdev,
 		return 0;
 	}
 
-	if (nic->speed <= 1000) {
-		cmd->port = PORT_MII;
+	switch (nic->speed) {
+	case SPEED_1000:
+		cmd->port = PORT_MII | PORT_TP;
 		cmd->autoneg = AUTONEG_ENABLE;
-	} else {
+		cmd->supported |= SUPPORTED_MII | SUPPORTED_TP;
+		cmd->supported |= SUPPORTED_1000baseT_Full |
+				  SUPPORTED_1000baseT_Half |
+				  SUPPORTED_100baseT_Full  |
+				  SUPPORTED_100baseT_Half  |
+				  SUPPORTED_10baseT_Full   |
+				  SUPPORTED_10baseT_Half;
+		cmd->supported |= SUPPORTED_Autoneg;
+		cmd->advertising |= ADVERTISED_1000baseT_Full |
+				    ADVERTISED_1000baseT_Half |
+				    ADVERTISED_100baseT_Full  |
+				    ADVERTISED_100baseT_Half  |
+				    ADVERTISED_10baseT_Full   |
+				    ADVERTISED_10baseT_Half;
+		break;
+	case SPEED_10000:
+		if (nic->mac_type == BGX_MODE_RXAUI) {
+			cmd->port = PORT_TP;
+			cmd->supported |= SUPPORTED_TP;
+		} else {
+			cmd->port = PORT_FIBRE;
+			cmd->supported |= SUPPORTED_FIBRE;
+		}
+		cmd->autoneg = AUTONEG_DISABLE;
+		cmd->supported |= SUPPORTED_10000baseT_Full;
+		break;
+	case SPEED_40000:
 		cmd->port = PORT_FIBRE;
 		cmd->autoneg = AUTONEG_DISABLE;
+		cmd->supported |= SUPPORTED_FIBRE;
+		cmd->supported |= SUPPORTED_40000baseCR4_Full;
+		break;
 	}
 	cmd->duplex = nic->duplex;
 	ethtool_cmd_speed_set(cmd, nic->speed);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 7c2c373..c6c2303 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -221,6 +221,7 @@ static void  nicvf_handle_mbx_intr(struct nicvf *nic)
 		nic->link_up = mbx.link_status.link_up;
 		nic->duplex = mbx.link_status.duplex;
 		nic->speed = mbx.link_status.speed;
+		nic->mac_type = mbx.link_status.mac_type;
 		if (nic->link_up) {
 			netdev_info(nic->netdev, "%s: Link is Up %d Mbps %s\n",
 				    nic->netdev->name, nic->speed,
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 1d6214b..29c727f 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -161,6 +161,7 @@ void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status)
 		return;
 
 	lmac = &bgx->lmac[lmacid];
+	link->mac_type = lmac->lmac_type;
 	link->link_up = lmac->link_up;
 	link->duplex = lmac->last_duplex;
 	link->speed = lmac->last_speed;
-- 
2.7.4

^ permalink raw reply related

* [PATCH 3/4] net: thunderx: Configure RED and backpressure levels
From: sunil.kovvuri @ 2016-11-24  9:18 UTC (permalink / raw)
  To: netdev; +Cc: Sunil Goutham, linux-kernel, linux-arm-kernel
In-Reply-To: <1479979083-15963-1-git-send-email-sunil.kovvuri@gmail.com>

From: Sunil Goutham <sgoutham@cavium.com>

This patch enables moving average calculation of Rx pkt's resources
and configures RED and backpressure levels for both CQ and RBDR.
Also initialize SQ's CQ_LIMIT properly.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
---
 drivers/net/ethernet/cavium/thunder/nic_main.c     |  9 ++++++++
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c |  9 ++++++--
 drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 24 +++++++++++++++++-----
 drivers/net/ethernet/cavium/thunder/q_struct.h     |  8 ++++++--
 4 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index b87d416..17490ec 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -809,6 +809,15 @@ static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk)
 
 	bgx_lmac_internal_loopback(nic->node, bgx_idx, lmac_idx, lbk->enable);
 
+	/* Enable moving average calculation.
+	 * Keep the LVL/AVG delay to HW enforced minimum so that, not too many
+	 * packets sneek in between average calculations.
+	 */
+	nic_reg_write(nic, NIC_PF_CQ_AVG_CFG,
+		      (BIT_ULL(20) | 0x2ull << 14 | 0x1));
+	nic_reg_write(nic, NIC_PF_RRM_AVG_CFG,
+		      (BIT_ULL(20) | 0x3ull << 14 | 0x1));
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 747ef08..7b336cd 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -544,14 +544,18 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
 	nicvf_send_msg_to_pf(nic, &mbx);
 
 	mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
-	mbx.rq.cfg = (1ULL << 63) | (1ULL << 62) | (qs->vnic_id << 0);
+	mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
+		     (RQ_PASS_RBDR_LVL << 16) | (RQ_PASS_CQ_LVL << 8) |
+		     (qs->vnic_id << 0);
 	nicvf_send_msg_to_pf(nic, &mbx);
 
 	/* RQ drop config
 	 * Enable CQ drop to reserve sufficient CQEs for all tx packets
 	 */
 	mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
-	mbx.rq.cfg = (1ULL << 62) | (RQ_CQ_DROP << 8);
+	mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
+		     (RQ_PASS_RBDR_LVL << 40) | (RQ_DROP_RBDR_LVL << 32) |
+		     (RQ_PASS_CQ_LVL << 16) | (RQ_DROP_CQ_LVL << 8);
 	nicvf_send_msg_to_pf(nic, &mbx);
 
 	if (!nic->sqs_mode && (qidx == 0)) {
@@ -650,6 +654,7 @@ static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs,
 	sq_cfg.ldwb = 0;
 	sq_cfg.qsize = SND_QSIZE;
 	sq_cfg.tstmp_bgx_intf = 0;
+	sq_cfg.cq_limit = 0;
 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(u64 *)&sq_cfg);
 
 	/* Set threshold value for interrupt generation */
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 2e3c940..20511f2 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -85,12 +85,26 @@
 
 #define MAX_CQES_FOR_TX		((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \
 				 MAX_CQE_PER_PKT_XMIT)
-/* Calculate number of CQEs to reserve for all SQEs.
- * Its 1/256th level of CQ size.
- * '+ 1' to account for pipelining
+
+/* RED and Backpressure levels of CQ for pkt reception
+ * For CQ, level is a measure of emptiness i.e 0x0 means full
+ * eg: For CQ of size 4K, and for pass/drop levels of 128/96
+ * HW accepts pkt if unused CQE >= 2048
+ * RED accepts pkt if unused CQE < 2048 & >= 1536
+ * DROPs pkts if unused CQE < 1536
+ */
+#define RQ_PASS_CQ_LVL		128ULL
+#define RQ_DROP_CQ_LVL		96ULL
+
+/* RED and Backpressure levels of RBDR for pkt reception
+ * For RBDR, level is a measure of fullness i.e 0x0 means empty
+ * eg: For RBDR of size 8K, and for pass/drop levels of 4/0
+ * HW accepts pkt if unused RBs >= 256
+ * RED accepts pkt if unused RBs < 256 & >= 0
+ * DROPs pkts if unused RBs < 0
  */
-#define RQ_CQ_DROP		((256 / (CMP_QUEUE_LEN / \
-				 (CMP_QUEUE_LEN - MAX_CQES_FOR_TX))) + 1)
+#define RQ_PASS_RBDR_LVL	8ULL
+#define RQ_DROP_RBDR_LVL	0ULL
 
 /* Descriptor size in bytes */
 #define SND_QUEUE_DESC_SIZE	16
diff --git a/drivers/net/ethernet/cavium/thunder/q_struct.h b/drivers/net/ethernet/cavium/thunder/q_struct.h
index 9e6d987..f363472 100644
--- a/drivers/net/ethernet/cavium/thunder/q_struct.h
+++ b/drivers/net/ethernet/cavium/thunder/q_struct.h
@@ -624,7 +624,9 @@ struct cq_cfg {
 
 struct sq_cfg {
 #if defined(__BIG_ENDIAN_BITFIELD)
-	u64 reserved_20_63:44;
+	u64 reserved_32_63:32;
+	u64 cq_limit:8;
+	u64 reserved_20_23:4;
 	u64 ena:1;
 	u64 reserved_18_18:1;
 	u64 reset:1;
@@ -642,7 +644,9 @@ struct sq_cfg {
 	u64 reset:1;
 	u64 reserved_18_18:1;
 	u64 ena:1;
-	u64 reserved_20_63:44;
+	u64 reserved_20_23:4;
+	u64 cq_limit:8;
+	u64 reserved_32_63:32;
 #endif
 };
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH 4/4] net: thunderx: Pause frame support
From: sunil.kovvuri @ 2016-11-24  9:18 UTC (permalink / raw)
  To: netdev; +Cc: Sunil Goutham, linux-kernel, linux-arm-kernel
In-Reply-To: <1479979083-15963-1-git-send-email-sunil.kovvuri@gmail.com>

From: Sunil Goutham <sgoutham@cavium.com>

Enable pause frames on both Rx and Tx side, configure pause
interval e.t.c. Also support for enable/disable pause frames
on Rx/Tx via ethtool has been added.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
---
 drivers/net/ethernet/cavium/thunder/nic.h          | 17 +++++++
 drivers/net/ethernet/cavium/thunder/nic_main.c     | 27 +++++++++++
 .../net/ethernet/cavium/thunder/nicvf_ethtool.c    | 51 +++++++++++++++++++++
 drivers/net/ethernet/cavium/thunder/nicvf_main.c   |  6 +++
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c  | 53 ++++++++++++++++++++++
 drivers/net/ethernet/cavium/thunder/thunder_bgx.h  | 12 +++++
 6 files changed, 166 insertions(+)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index be8404a..e739c71 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -149,6 +149,12 @@ struct nicvf_rss_info {
 	u64 key[RSS_HASH_KEY_SIZE];
 } ____cacheline_aligned_in_smp;
 
+struct nicvf_pfc {
+	u8    autoneg;
+	u8    fc_rx;
+	u8    fc_tx;
+};
+
 enum rx_stats_reg_offset {
 	RX_OCTS = 0x0,
 	RX_UCAST = 0x1,
@@ -298,6 +304,7 @@ struct nicvf {
 	bool			tns_mode;
 	bool			loopback_supported;
 	struct nicvf_rss_info	rss_info;
+	struct nicvf_pfc	pfc;
 	struct tasklet_struct	qs_err_task;
 	struct work_struct	reset_task;
 
@@ -358,6 +365,7 @@ struct nicvf {
 #define	NIC_MBOX_MSG_SNICVF_PTR		0x15	/* Send sqet nicvf ptr to PVF */
 #define	NIC_MBOX_MSG_LOOPBACK		0x16	/* Set interface in loopback */
 #define	NIC_MBOX_MSG_RESET_STAT_COUNTER 0x17	/* Reset statistics counters */
+#define	NIC_MBOX_MSG_PFC		0x18	/* Pause frame control */
 #define	NIC_MBOX_MSG_CFG_DONE		0xF0	/* VF configuration done */
 #define	NIC_MBOX_MSG_SHUTDOWN		0xF1	/* VF is being shutdown */
 
@@ -500,6 +508,14 @@ struct reset_stat_cfg {
 	u16   sq_stat_mask;
 };
 
+struct pfc {
+	u8    msg;
+	u8    get; /* Get or set PFC settings */
+	u8    autoneg;
+	u8    fc_rx;
+	u8    fc_tx;
+};
+
 /* 128 bit shared memory between PF and each VF */
 union nic_mbx {
 	struct { u8 msg; }	msg;
@@ -518,6 +534,7 @@ union nic_mbx {
 	struct nicvf_ptr	nicvf;
 	struct set_loopback	lbk;
 	struct reset_stat_cfg	reset_stat;
+	struct pfc		pfc;
 };
 
 #define NIC_NODE_ID_MASK	0x03
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 17490ec..767234e 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -898,6 +898,30 @@ static void nic_enable_vf(struct nicpf *nic, int vf, bool enable)
 	bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, enable);
 }
 
+static void nic_pause_frame(struct nicpf *nic, int vf, struct pfc *cfg)
+{
+	int bgx, lmac;
+	struct pfc pfc;
+	union nic_mbx mbx = {};
+
+	if (vf >= nic->num_vf_en)
+		return;
+	bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+	lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+	if (cfg->get) {
+		bgx_lmac_get_pfc(nic->node, bgx, lmac, &pfc);
+		mbx.pfc.msg = NIC_MBOX_MSG_PFC;
+		mbx.pfc.autoneg = pfc.autoneg;
+		mbx.pfc.fc_rx = pfc.fc_rx;
+		mbx.pfc.fc_tx = pfc.fc_tx;
+		nic_send_msg_to_vf(nic, vf, &mbx);
+	} else {
+		bgx_lmac_set_pfc(nic->node, bgx, lmac, cfg);
+		nic_mbx_send_ack(nic, vf);
+	}
+}
+
 /* Interrupt handler to handle mailbox messages from VFs */
 static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
 {
@@ -1037,6 +1061,9 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
 	case NIC_MBOX_MSG_RESET_STAT_COUNTER:
 		ret = nic_reset_stat_counters(nic, vf, &mbx.reset_stat);
 		break;
+	case NIC_MBOX_MSG_PFC:
+		nic_pause_frame(nic, vf, &mbx.pfc);
+		goto unlock;
 	default:
 		dev_err(&nic->pdev->dev,
 			"Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index d4d76a7..b048241 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -720,6 +720,55 @@ static int nicvf_set_channels(struct net_device *dev,
 	return err;
 }
 
+static void nicvf_get_pauseparam(struct net_device *dev,
+				 struct ethtool_pauseparam *pause)
+{
+	struct nicvf *nic = netdev_priv(dev);
+	union nic_mbx mbx = {};
+
+	/* Supported only for 10G/40G interfaces */
+	if ((nic->mac_type == BGX_MODE_SGMII) ||
+	    (nic->mac_type == BGX_MODE_QSGMII) ||
+	    (nic->mac_type == BGX_MODE_RGMII))
+		return;
+
+	mbx.pfc.msg = NIC_MBOX_MSG_PFC;
+	mbx.pfc.get = 1;
+	if (!nicvf_send_msg_to_pf(nic, &mbx)) {
+		pause->autoneg = nic->pfc.autoneg;
+		pause->rx_pause = nic->pfc.fc_rx;
+		pause->tx_pause = nic->pfc.fc_tx;
+	}
+}
+
+static int nicvf_set_pauseparam(struct net_device *dev,
+				struct ethtool_pauseparam *pause)
+{
+	struct nicvf *nic = netdev_priv(dev);
+	union nic_mbx mbx = {};
+
+	/* Supported only for 10G/40G interfaces */
+	if ((nic->mac_type == BGX_MODE_SGMII) ||
+	    (nic->mac_type == BGX_MODE_QSGMII) ||
+	    (nic->mac_type == BGX_MODE_RGMII))
+		return -EOPNOTSUPP;
+
+	if (pause->autoneg)
+		return -EOPNOTSUPP;
+
+	mbx.pfc.msg = NIC_MBOX_MSG_PFC;
+	mbx.pfc.get = 0;
+	mbx.pfc.fc_rx = pause->rx_pause;
+	mbx.pfc.fc_tx = pause->tx_pause;
+	if (nicvf_send_msg_to_pf(nic, &mbx))
+		return -EAGAIN;
+
+	nic->pfc.fc_rx = pause->rx_pause;
+	nic->pfc.fc_tx = pause->tx_pause;
+
+	return 0;
+}
+
 static const struct ethtool_ops nicvf_ethtool_ops = {
 	.get_settings		= nicvf_get_settings,
 	.get_link		= nicvf_get_link,
@@ -741,6 +790,8 @@ static const struct ethtool_ops nicvf_ethtool_ops = {
 	.set_rxfh		= nicvf_set_rxfh,
 	.get_channels		= nicvf_get_channels,
 	.set_channels		= nicvf_set_channels,
+	.get_pauseparam         = nicvf_get_pauseparam,
+	.set_pauseparam         = nicvf_set_pauseparam,
 	.get_ts_info		= ethtool_op_get_ts_info,
 };
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index c6c2303..1eacec8 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -256,6 +256,12 @@ static void  nicvf_handle_mbx_intr(struct nicvf *nic)
 		nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf;
 		nic->pf_acked = true;
 		break;
+	case NIC_MBOX_MSG_PFC:
+		nic->pfc.autoneg = mbx.pfc.autoneg;
+		nic->pfc.fc_rx = mbx.pfc.fc_rx;
+		nic->pfc.fc_tx = mbx.pfc.fc_tx;
+		nic->pf_acked = true;
+		break;
 	default:
 		netdev_err(nic->netdev,
 			   "Invalid message from PF, msg 0x%x\n", mbx.msg.msg);
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 29c727f..9211c75 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -212,6 +212,47 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
 }
 EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
 
+void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause)
+{
+	struct pfc *pfc = (struct pfc *)pause;
+	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	struct lmac *lmac;
+	u64 cfg;
+
+	if (!bgx)
+		return;
+	lmac = &bgx->lmac[lmacid];
+	if (lmac->is_sgmii)
+		return;
+
+	cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_CBFC_CTL);
+	pfc->fc_rx = cfg & RX_EN;
+	pfc->fc_tx = cfg & TX_EN;
+	pfc->autoneg = 0;
+}
+EXPORT_SYMBOL(bgx_lmac_get_pfc);
+
+void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause)
+{
+	struct pfc *pfc = (struct pfc *)pause;
+	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	struct lmac *lmac;
+	u64 cfg;
+
+	if (!bgx)
+		return;
+	lmac = &bgx->lmac[lmacid];
+	if (lmac->is_sgmii)
+		return;
+
+	cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_CBFC_CTL);
+	cfg &= ~(RX_EN | TX_EN);
+	cfg |= (pfc->fc_rx ? RX_EN : 0x00);
+	cfg |= (pfc->fc_tx ? TX_EN : 0x00);
+	bgx_reg_write(bgx, lmacid, BGX_SMUX_CBFC_CTL, cfg);
+}
+EXPORT_SYMBOL(bgx_lmac_set_pfc);
+
 static void bgx_sgmii_change_link_state(struct lmac *lmac)
 {
 	struct bgx *bgx = lmac->bgx;
@@ -525,6 +566,18 @@ static int bgx_lmac_xaui_init(struct bgx *bgx, struct lmac *lmac)
 	cfg |= SMU_TX_CTL_DIC_EN;
 	bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_CTL, cfg);
 
+	/* Enable receive and transmission of pause frames */
+	bgx_reg_write(bgx, lmacid, BGX_SMUX_CBFC_CTL, ((0xffffULL << 32) |
+		      BCK_EN | DRP_EN | TX_EN | RX_EN));
+	/* Configure pause time and interval */
+	bgx_reg_write(bgx, lmacid,
+		      BGX_SMUX_TX_PAUSE_PKT_TIME, DEFAULT_PAUSE_TIME);
+	cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_PAUSE_PKT_INTERVAL);
+	cfg &= ~0xFFFFull;
+	bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_PAUSE_PKT_INTERVAL,
+		      cfg | (DEFAULT_PAUSE_TIME - 0x1000));
+	bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_PAUSE_ZERO, 0x01);
+
 	/* take lmac_count into account */
 	bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_THRESH, (0x100 - 1));
 	/* max packet size */
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 01cc7c8..c18ebfe 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -27,6 +27,7 @@
 #define    MAX_BGX_CHANS_PER_LMAC		16
 #define    MAX_DMAC_PER_LMAC			8
 #define    MAX_FRAME_SIZE			9216
+#define    DEFAULT_PAUSE_TIME			0xFFFF
 
 #define	   BGX_ID_MASK				0x3
 
@@ -126,7 +127,10 @@
 #define  SMU_RX_CTL_STATUS			(3ull << 0)
 #define BGX_SMUX_TX_APPEND		0x20100
 #define  SMU_TX_APPEND_FCS_D			BIT_ULL(2)
+#define BGX_SMUX_TX_PAUSE_PKT_TIME	0x20110
 #define BGX_SMUX_TX_MIN_PKT		0x20118
+#define BGX_SMUX_TX_PAUSE_PKT_INTERVAL	0x20120
+#define BGX_SMUX_TX_PAUSE_ZERO		0x20138
 #define BGX_SMUX_TX_INT			0x20140
 #define BGX_SMUX_TX_CTL			0x20178
 #define  SMU_TX_CTL_DIC_EN			BIT_ULL(0)
@@ -136,6 +140,11 @@
 #define BGX_SMUX_CTL			0x20200
 #define  SMU_CTL_RX_IDLE			BIT_ULL(0)
 #define  SMU_CTL_TX_IDLE			BIT_ULL(1)
+#define	BGX_SMUX_CBFC_CTL		0x20218
+#define	RX_EN					BIT_ULL(0)
+#define	TX_EN					BIT_ULL(1)
+#define	BCK_EN					BIT_ULL(2)
+#define	DRP_EN					BIT_ULL(3)
 
 #define BGX_GMP_PCS_MRX_CTL		0x30000
 #define	 PCS_MRX_CTL_RST_AN			BIT_ULL(9)
@@ -207,6 +216,9 @@ void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac);
 void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status);
 void bgx_lmac_internal_loopback(int node, int bgx_idx,
 				int lmac_idx, bool enable);
+void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause);
+void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause);
+
 void xcv_init_hw(void);
 void xcv_setup_link(bool link_up, int link_speed);
 
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH v2] cpsw: ethtool: add support for getting/setting EEE registers
From: Yegor Yefremov @ 2016-11-24  9:25 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: netdev, linux-omap@vger.kernel.org, Grygorii Strashko,
	N, Mugunthan V, Rami Rosen
In-Reply-To: <c91dcd74-3983-3ab6-91f6-3657b1e601af@gmail.com>

On Wed, Nov 23, 2016 at 9:15 PM, Florian Fainelli <f.fainelli@gmail.com> wrote:
> On 11/23/2016 12:08 PM, Yegor Yefremov wrote:
>> On Wed, Nov 23, 2016 at 6:33 PM, Florian Fainelli <f.fainelli@gmail.com> wrote:
>>> On 11/23/2016 06:38 AM, yegorslists@googlemail.com wrote:
>>>> From: Yegor Yefremov <yegorslists@googlemail.com>
>>>>
>>>> Add the ability to query and set Energy Efficient Ethernet parameters
>>>> via ethtool for applicable devices.
>>>
>>> Are you sure this is enough to actually enable EEE? I don't see where
>>> phy_init_eee() is called here, nor is the cpsw Ethernet controller part
>>> configured to enable/disable EEE. EEE is not just a PHY thing, it
>>> usually also needs to be configured properly at the Ethernet MAC/switch
>>> level as well.
>>>
>>> Just curious here.
>>
>> I'm sure I want to disable EEE :-) So I need this patch in order to
>> check and disable EEE advertising.
>
> OK, so you need this to disable EEE advertisement, which is great, but
> this also allows you to enable EEE, is it enough to just advertise EEE
> with your link partner for cpsw to work correctly? Just wondering, since
> your commit message is more than short.

I've sent v3 with a little bit more info. Basically this is needed for
this kind of issues [1]

As for enabling advertising and correct working of cpsw do you mean it
would be better to disable EEE in any PHY on cpsw initialization as
long as cpsw doesn't provide support for EEE?

We observe some strange behavior with our gigabit PHYs and a link
partner in a EEE-capable unmanaged NetGear switch. Disabling
advertising seems to help. Though we're still investigating the issue.

[1] http://www.spinics.net/lists/netdev/msg405396.html

Yegor

^ permalink raw reply

* RE: [RFC PATCH v2 1/2] macb: Add 1588 support in Cadence GEM.
From: Andrei.Pistirica @ 2016-11-24  9:36 UTC (permalink / raw)
  To: richardcochran
  Cc: tbultel, boris.brezillon, netdev, alexandre.belloni,
	nicolas.ferre, linux-kernel, harinikatakamlinux, michals, anirudh,
	punnaia, harini.katakam, davem, linux-arm-kernel
In-Reply-To: <20161123210318.GB2845@localhost.localdomain>



> -----Original Message-----
> From: Richard Cochran [mailto:richardcochran@gmail.com]
> Sent: Wednesday, November 23, 2016 11:03 PM
> To: Andrei Pistirica - M16132
> Cc: netdev@vger.kernel.org; linux-kernel@vger.kernel.org; linux-arm-
> kernel@lists.infradead.org; davem@davemloft.net;
> nicolas.ferre@atmel.com; harinikatakamlinux@gmail.com;
> harini.katakam@xilinx.com; punnaia@xilinx.com; michals@xilinx.com;
> anirudh@xilinx.com; boris.brezillon@free-electrons.com;
> alexandre.belloni@free-electrons.com; tbultel@pixelsurmer.com
> Subject: Re: [RFC PATCH v2 1/2] macb: Add 1588 support in Cadence GEM.
> 
> On Wed, Nov 23, 2016 at 02:34:03PM +0100, Andrei Pistirica wrote:
> > From what I understand, your suggestion is:
> > (ns | frac) * ppb = (total_ns | total_frac) (total_ns | total_frac) /
> > 10^9 = (adj_ns | adj_frac) This is correct iff total_ns/10^9 >= 1, but
> > the problem is that there are missed fractions due to the following
> > approximation:
> > frac*ppb =~
> > (ns*ppb+frac*ppb*2^16)*2^16-10^9*2^16*flor(ns*ppb+frac*ppb*2^16,
> > 10^9).
> 
> -ENOPARSE;
> 
> > An example which uses values from a real test:
> > let ppb=4891, ns=12 and frac=3158
> 
> That is a very strange example for nominal frequency.  The clock period is
> 12.048187255859375 nanoseconds, and so the frequency is
> 83000037.99 Hz.
> 
> But hey, let's go with it...
> 
> > - using suggested algorithm, yields: adj_ns = 0 and adj_frac = 0
> > - using in-place algorithm, yields: adj_ns = 0, adj_frac = 4 You can
> > check the calculus.
> 
> The test program, below, shows you what I meant.  (Of course, you should
> adjust this to fit the adjfine() method.)
> 
> Unfortunately, this device has a very coarse frequency resolution.
> Using a nominal period of ns=12 as an example, the resolution is
> 2^-16 / 12 or 1.27 ppm.  The 24 bit device is much better in this repect.
> 
> The output using your example numbers is:
> 
>    $ ./a.out 12 3158 4891
>    ns=12 frac=3158
>    ns=12 frac=3162
> 
>    $ ./a.out 12 3158 -4891
>    ns=12 frac=3158
>    ns=12 frac=3154
> 
> See how you get a result of +/- 4 with just one division?
> 
> Thanks,
> Richard
> 
> ---
> #include <stdint.h>
> #include <stdio.h>
> #include <stdlib.h>
> 
> static void adjfreq(uint32_t ns, uint32_t frac, int32_t ppb) {
> 	uint64_t adj;
> 	uint32_t diff, word;
> 	int neg_adj = 0;
> 
> 	printf("ns=%u frac=%u\n", ns, frac);
> 
> 	if (ppb < 0) {
> 		neg_adj = 1;
> 		ppb = -ppb;
> 	}
> 	word = (ns << 16) + frac;
> 	adj = word;
> 	adj *= ppb;
> 	adj += 500000000UL;
> 	diff = adj / 1000000000UL;
> 
> 	word = neg_adj ? word - diff : word + diff;
> 	printf("ns=%u frac=%u\n", word >> 16, word & 0xffff); }
> 
> int main(int argc, char *argv[])
> {
> 	uint32_t ns, frac;
> 	int32_t ppb;
> 
> 	if (argc != 4) {
> 		puts("need ns, frac, and ppb");
> 		return -1;
> 	}
> 	ns = atoi(argv[1]);
> 	frac = atoi(argv[2]);
> 	ppb = atoi(argv[3]);
> 	adjfreq(ns, frac, ppb);
> 	return 0;
> }

Ok, thanks.
I will use this one then.

Regards,
Andrei

^ permalink raw reply

* Re: [PATCH 3/5] ath10k: Remove unused wmi_p2p_noa_descriptor 'noa' in wmi-tlv
From: Michal Kazior @ 2016-11-24  9:50 UTC (permalink / raw)
  To: Kirtika Ruchandani
  Cc: Kalle Valo, Arnd Bergmann, Network Development, linux-wireless,
	Raja Mani
In-Reply-To: <99d0ff42e57d5f62560e72d926b4d69d5d7c418b.1479974100.git.kirtika-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>

On 24 November 2016 at 09:01, Kirtika Ruchandani
<kirtika.ruchandani-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
> Commit ca996ec56608 (ath10k: implement wmi-tlv backend)
> introduced ath10k_wmi_tlv_op_gen_vdev_start() where
> 'struct wmi_p2p_noa_descriptor *noa' is defined and set but not used.
> Compiling with W=1 gives the following warning, fix it.
> drivers/net/wireless/ath/ath10k/wmi-tlv.c: In function ‘ath10k_wmi_tlv_op_gen_vdev_start’:
> drivers/net/wireless/ath/ath10k/wmi-tlv.c:1647:33: warning: variable ‘noa’ set but not used [-Wunused-but-set-variable]
>
> Fixes: ca996ec56608 ("ath10k: implement wmi-tlv backend")
> Cc: Michal Kazior <michal.kazior-++hxYGjEMp0AvxtiuMwx3w@public.gmane.org>
> Cc: Kalle Valo <kvalo-A+ZNKFmMK5xy9aJCnZT0Uw@public.gmane.org>
> Signed-off-by: Kirtika Ruchandani <kirtika-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
> ---
>  drivers/net/wireless/ath/ath10k/wmi-tlv.c | 2 --
>  1 file changed, 2 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
> index e64f593..0e4bd29 100644
> --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
> +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
> @@ -1644,7 +1644,6 @@ ath10k_wmi_tlv_op_gen_vdev_start(struct ath10k *ar,
>  {
>         struct wmi_tlv_vdev_start_cmd *cmd;
>         struct wmi_channel *ch;
> -       struct wmi_p2p_noa_descriptor *noa;
>         struct wmi_tlv *tlv;
>         struct sk_buff *skb;
>         size_t len;
> @@ -1702,7 +1701,6 @@ ath10k_wmi_tlv_op_gen_vdev_start(struct ath10k *ar,
>         tlv = ptr;
>         tlv->tag = __cpu_to_le16(WMI_TLV_TAG_ARRAY_STRUCT);
>         tlv->len = 0;
> -       noa = (void *)tlv->value;
>
>         /* Note: This is a nested TLV containing:
>          * [wmi_tlv][wmi_p2p_noa_descriptor][wmi_tlv]..

I would rather keep this one as it serves as documentation. Would
"(void) noa;" be enough satisfy the compiler?


Michał

^ permalink raw reply

* [PATCH] net: ethtool: don't require CAP_NET_ADMIN for ETHTOOL_GLINKSETTINGS
From: Miroslav Lichvar @ 2016-11-24  9:55 UTC (permalink / raw)
  To: netdev; +Cc: David Decotigny

The ETHTOOL_GLINKSETTINGS command is deprecating the ETHTOOL_GSET
command and likewise it shouldn't require the CAP_NET_ADMIN capability.

Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com>
---
 net/core/ethtool.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 9774898..047a175 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -2479,6 +2479,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GET_TS_INFO:
 	case ETHTOOL_GEEE:
 	case ETHTOOL_GTUNABLE:
+	case ETHTOOL_GLINKSETTINGS:
 		break;
 	default:
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-- 
2.9.3

^ permalink raw reply related

* Re: [Patch net-next] net_sched: move the empty tp check from ->destroy() to ->delete()
From: Daniel Borkmann @ 2016-11-24 10:14 UTC (permalink / raw)
  To: Roi Dayan, Cong Wang, netdev; +Cc: jiri, John Fastabend
In-Reply-To: <5836A4D4.2010500@mellanox.com>

On 11/24/2016 09:29 AM, Roi Dayan wrote:
> Hi,
>
> I'm testing this patch with KASAN enabled and got into a new kernel crash I didn't hit before.
>
> [ 1860.725065] ==================================================================
> [ 1860.733893] BUG: KASAN: use-after-free in __netif_receive_skb_core+0x1ebe/0x29a0 at addr ffff880a68b04028
> [ 1860.745415] Read of size 8 by task CPU 0/KVM/5334
> [ 1860.751368] CPU: 8 PID: 5334 Comm: CPU 0/KVM Tainted: G O 4.9.0-rc3+ #18
> [ 1860.760547] Hardware name: HP ProLiant DL380p Gen8, BIOS P70 07/01/2015
> [ 1860.768036] Call Trace:
> [ 1860.771307]  [<ffffffffa9b6dc42>] dump_stack+0x63/0x81
> [ 1860.777167]  [<ffffffffa95fb751>] kasan_object_err+0x21/0x70
> [ 1860.783826]  [<ffffffffa95fb9dd>] kasan_report_error+0x1ed/0x4e0
> [ 1860.790640]  [<ffffffffa9b9b841>] ? csum_partial+0x11/0x20
> [ 1860.796871]  [<ffffffffaa44a6b9>] ? csum_partial_ext+0x9/0x10
> [ 1860.803571]  [<ffffffffaa453155>] ? __skb_checksum+0x115/0x8d0
> [ 1860.810370]  [<ffffffffa95fbe81>] __asan_report_load8_noabort+0x61/0x70
> [ 1860.818263]  [<ffffffffaa49c3fe>] ? __netif_receive_skb_core+0x1ebe/0x29a0
> [ 1860.826215]  [<ffffffffaa49c3fe>] __netif_receive_skb_core+0x1ebe/0x29a0
> [ 1860.833991]  [<ffffffffaa49a540>] ? netdev_info+0x100/0x100
> [ 1860.840529]  [<ffffffffaa671792>] ? udp4_gro_receive+0x802/0x1090
> [ 1860.847783]  [<ffffffffa9bb9a08>] ? find_next_bit+0x18/0x20
> [ 1860.854126]  [<ffffffffaa49cf04>] __netif_receive_skb+0x24/0x150
> [ 1860.861695]  [<ffffffffaa49d0d1>] netif_receive_skb_internal+0xa1/0x1d0
> [ 1860.869366]  [<ffffffffaa49d030>] ? __netif_receive_skb+0x150/0x150
> [ 1860.876464]  [<ffffffffaa49f7e9>] ? dev_gro_receive+0x969/0x1660
> [ 1860.883924]  [<ffffffffaa4a0e1f>] napi_gro_receive+0x1df/0x300
> [ 1860.890744]  [<ffffffffc02e885d>] mlx5e_handle_rx_cqe_rep+0x83d/0xd30 [mlx5_core]
>
> checking with gdb
>
> (gdb) l *(__netif_receive_skb_core+0x1ebe)
> 0xffffffff8249c3fe is in __netif_receive_skb_core (net/core/dev.c:3937).
> 3932                    *pt_prev = NULL;
> 3933            }
> 3934
> 3935            qdisc_skb_cb(skb)->pkt_len = skb->len;
> 3936            skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
> 3937            qdisc_bstats_cpu_update(cl->q, skb);
> 3938
> 3939            switch (tc_classify(skb, cl, &cl_res, false)) {
> 3940            case TC_ACT_OK:
> 3941            case TC_ACT_RECLASSIFY:

Can you elaborate some more on your test-case? Adding/dropping ingress qdisc with
some classifier on it in a loop while traffic goes through?

Thanks,
Daniel

^ permalink raw reply

* [PATCH 0/3] virtio/vringh: kill off ACCESS_ONCE()
From: Mark Rutland @ 2016-11-24 10:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: dave, dbueso, dvyukov, jasowang, kvm, mark.rutland, mst, netdev,
	paulmck, virtualization

For several reasons, it would be beneficial to kill off ACCESS_ONCE()
tree-wide, in favour of {READ,WRITE}_ONCE(). These work with aggregate types,
more obviously document their intended behaviour, and are necessary for tools
like KTSAN to work correctly (as otherwise reads and writes cannot be
instrumented separately).

While it's possible to script the bulk of this tree-wide conversion, some cases
such as the virtio code, require some manual intervention. This series moves
the virtio and vringh code over to {READ,WRITE}_ONCE(), in the process fixing a
bug in the virtio headers.

Thanks,
Mark.

Mark Rutland (3):
  tools/virtio: fix READ_ONCE()
  vringh: kill off ACCESS_ONCE()
  tools/virtio: use {READ,WRITE}_ONCE() in uaccess.h

 drivers/vhost/vringh.c        | 5 +++--
 tools/virtio/linux/compiler.h | 2 +-
 tools/virtio/linux/uaccess.h  | 9 +++++----
 3 files changed, 9 insertions(+), 7 deletions(-)

-- 
2.7.4

^ permalink raw reply

* [PATCH 1/3] tools/virtio: fix READ_ONCE()
From: Mark Rutland @ 2016-11-24 10:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: dave, dbueso, dvyukov, jasowang, kvm, mark.rutland, mst, netdev,
	paulmck, virtualization
In-Reply-To: <1479983114-17190-1-git-send-email-mark.rutland@arm.com>

The virtio tools implementation of READ_ONCE() has a single parameter called
'var', but erroneously refers to 'val' for its cast, and thus won't work unless
there's a variable of the correct type that happens to be called 'var'.

Fix this with s/var/val/, making READ_ONCE() work as expected regardless.

Fixes: a7c490333df3cff5 ("tools/virtio: use virt_xxx barriers")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
---
 tools/virtio/linux/compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
index 845960e..c9ccfd4 100644
--- a/tools/virtio/linux/compiler.h
+++ b/tools/virtio/linux/compiler.h
@@ -4,6 +4,6 @@
 #define WRITE_ONCE(var, val) \
 	(*((volatile typeof(val) *)(&(var))) = (val))
 
-#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var))))
+#define READ_ONCE(var) (*((volatile typeof(var) *)(&(var))))
 
 #endif
-- 
2.7.4

^ permalink raw reply related

* [PATCH 3/3] tools/virtio: use {READ,WRITE}_ONCE() in uaccess.h
From: Mark Rutland @ 2016-11-24 10:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: dave, dbueso, dvyukov, jasowang, kvm, mark.rutland, mst, netdev,
	paulmck, virtualization
In-Reply-To: <1479983114-17190-1-git-send-email-mark.rutland@arm.com>

As a step towards killing off ACCESS_ONCE, use {READ,WRITE}_ONCE() for the
virtio tools uaccess primitives, pulling these in from <linux/compiler.h>.

With this done, we can kill off the now-unused ACCESS_ONCE() definition.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
---
 tools/virtio/linux/uaccess.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tools/virtio/linux/uaccess.h b/tools/virtio/linux/uaccess.h
index 0a578fe..fa05d01 100644
--- a/tools/virtio/linux/uaccess.h
+++ b/tools/virtio/linux/uaccess.h
@@ -1,8 +1,9 @@
 #ifndef UACCESS_H
 #define UACCESS_H
-extern void *__user_addr_min, *__user_addr_max;
 
-#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+#include <linux/compiler.h>
+
+extern void *__user_addr_min, *__user_addr_max;
 
 static inline void __chk_user_ptr(const volatile void *p, size_t size)
 {
@@ -13,7 +14,7 @@ static inline void __chk_user_ptr(const volatile void *p, size_t size)
 ({								\
 	typeof(ptr) __pu_ptr = (ptr);				\
 	__chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr));		\
-	ACCESS_ONCE(*(__pu_ptr)) = x;				\
+	WRITE_ONCE(*(__pu_ptr), x);				\
 	0;							\
 })
 
@@ -21,7 +22,7 @@ static inline void __chk_user_ptr(const volatile void *p, size_t size)
 ({								\
 	typeof(ptr) __pu_ptr = (ptr);				\
 	__chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr));		\
-	x = ACCESS_ONCE(*(__pu_ptr));				\
+	x = READ_ONCE(*(__pu_ptr));				\
 	0;							\
 })
 
-- 
2.7.4


^ permalink raw reply related

* [PATCH 2/3] vringh: kill off ACCESS_ONCE()
From: Mark Rutland @ 2016-11-24 10:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: dave, dbueso, dvyukov, jasowang, kvm, mark.rutland, mst, netdev,
	paulmck, virtualization
In-Reply-To: <1479983114-17190-1-git-send-email-mark.rutland@arm.com>

Despite living under drivers/ vringh.c is also used as part of the userspace
virtio tools. Before we can kill off the ACCESS_ONCE()definition in the tools,
we must convert vringh.c to use {READ,WRITE}_ONCE().

This patch does so, along with the required include of <linux/compiler.h> for
the relevant definitions. The userspace tools provide their own definitions in
their own <linux/compiler.h>.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
---
 drivers/vhost/vringh.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 3bb02c6..bb8971f 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -3,6 +3,7 @@
  *
  * Since these may be in userspace, we use (inline) accessors.
  */
+#include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/vringh.h>
 #include <linux/virtio_ring.h>
@@ -820,13 +821,13 @@ EXPORT_SYMBOL(vringh_need_notify_user);
 static inline int getu16_kern(const struct vringh *vrh,
 			      u16 *val, const __virtio16 *p)
 {
-	*val = vringh16_to_cpu(vrh, ACCESS_ONCE(*p));
+	*val = vringh16_to_cpu(vrh, READ_ONCE(*p));
 	return 0;
 }
 
 static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val)
 {
-	ACCESS_ONCE(*p) = cpu_to_vringh16(vrh, val);
+	WRITE_ONCE(*p, cpu_to_vringh16(vrh, val));
 	return 0;
 }
 
-- 
2.7.4

^ permalink raw reply related

* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
From: Pavel Machek @ 2016-11-24 10:29 UTC (permalink / raw)
  To: peppe.cavallaro, netdev, kernel list, ezequiel, sonic.zhang,
	fabrice.gasnier
In-Reply-To: <20161124085506.GA25007@amd>

[-- Attachment #1: Type: text/plain, Size: 3938 bytes --]

Hi!

What is going on with stmmac_tso_xmit() vs. stmmac_xmit()? One seems
to be copy of another, with subtle differences -- like calling
netif_queue_stopped() under spin_lock(&priv->tx_lock), or not.

What is going on with all these likely()s? Likely new hardware owners
will not be happy... or anyone running a lot of jumbo frames. (Perhaps
CPU's branch prediction can do better job here, without explicit hints?)

     if (unlikely(is_jumbo) && likely(priv->synopsys_id <
                                              DWMAC_CORE_4_00)) {

Are you sure this is okay?

        if (unlikely(netif_queue_stopped(priv->dev) &&
                     stmmac_tx_avail(priv) > STMMAC_TX_THRESH)) {
		netif_tx_lock(priv->dev);
                if (netif_queue_stopped(priv->dev) &&
                    stmmac_tx_avail(priv) > STMMAC_TX_THRESH) {

---

Fix english in comments.

Signed-off-by: Pavel Machek <pavel@ucw.cz>

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 1f9ec02..e5a5a05 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1747,11 +1747,11 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	if (priv->hw->pcs && priv->hw->mac->pcs_ctrl_ane)
 		priv->hw->mac->pcs_ctrl_ane(priv->hw, 1, priv->hw->ps, 0);
 
-	/*  set TX ring length */
+	/* Set TX ring length */
 	if (priv->hw->dma->set_tx_ring_len)
 		priv->hw->dma->set_tx_ring_len(priv->ioaddr,
 					       (DMA_TX_SIZE - 1));
-	/*  set RX ring length */
+	/* Set RX ring length */
 	if (priv->hw->dma->set_rx_ring_len)
 		priv->hw->dma->set_rx_ring_len(priv->ioaddr,
 					       (DMA_RX_SIZE - 1));
@@ -2212,7 +2212,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	priv->tx_skbuff[first_entry] = skb;
 
 	enh_desc = priv->plat->enh_desc;
-	/* To program the descriptors according to the size of the frame */
+	/* Program the descriptors according to the size of the frame */
 	if (enh_desc)
 		is_jumbo = priv->hw->mode->is_jumbo_frm(skb->len, enh_desc);
 
@@ -2665,7 +2665,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
  *  @budget : maximum number of packets that the current CPU can receive from
  *	      all interfaces.
  *  Description :
- *  To look at the incoming frames and clear the tx resources.
+ *  Look at the incoming frames and clear the tx resources.
  */
 static int stmmac_poll(struct napi_struct *napi, int budget)
 {
@@ -2828,7 +2828,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 		return IRQ_NONE;
 	}
 
-	/* To handle GMAC own interrupts */
+	/* Handle GMAC own interrupts */
 	if ((priv->plat->has_gmac) || (priv->plat->has_gmac4)) {
 		int status = priv->hw->mac->host_irq_status(priv->hw,
 							    &priv->xstats);
@@ -2853,7 +2853,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 		}
 	}
 
-	/* To handle DMA interrupts */
+	/* Handle DMA interrupts */
 	stmmac_dma_interrupt(priv);
 
 	return IRQ_HANDLED;
@@ -3145,7 +3145,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 
 	priv->hw = mac;
 
-	/* To use the chained or ring mode */
+	/* Use the chained or ring mode */
 	if (priv->synopsys_id >= DWMAC_CORE_4_00) {
 		priv->hw->mode = &dwmac4_ring_mode_ops;
 	} else {
@@ -3191,7 +3191,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 	} else
 		pr_info(" No HW DMA feature register supported");
 
-	/* To use alternate (extended), normal or GMAC4 descriptor structures */
+	/* Use alternate (extended), normal or GMAC4 descriptor structures */
 	if (priv->synopsys_id >= DWMAC_CORE_4_00)
 		priv->hw->desc = &dwmac4_desc_ops;
 	else



-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox