Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 1/2] tcp: add max_quickacks param to tcp_incr_quickack and tcp_enter_quickack_mode
From: Eric Dumazet @ 2018-05-21 22:08 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, Van Jacobson, Neal Cardwell, Yuchung Cheng,
	Soheil Hassas Yeganeh, Eric Dumazet, Eric Dumazet
In-Reply-To: <20180521220857.229273-1-edumazet@google.com>

We want to add finer control of the number of ACK packets sent after
ECN events.

This patch is not changing current behavior, it only enables following
change.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/tcp_input.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index aebb29ab2fdf2ceaa182cd11928f145a886149ff..2e970e9f4e09d966b703af2d14d521a4328eba7e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -203,21 +203,23 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
 	}
 }
 
-static void tcp_incr_quickack(struct sock *sk)
+static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
 
 	if (quickacks == 0)
 		quickacks = 2;
+	quickacks = min(quickacks, max_quickacks);
 	if (quickacks > icsk->icsk_ack.quick)
-		icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
+		icsk->icsk_ack.quick = quickacks;
 }
 
-static void tcp_enter_quickack_mode(struct sock *sk)
+static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	tcp_incr_quickack(sk);
+
+	tcp_incr_quickack(sk, max_quickacks);
 	icsk->icsk_ack.pingpong = 0;
 	icsk->icsk_ack.ato = TCP_ATO_MIN;
 }
@@ -261,7 +263,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
 		 * it is probably a retransmit.
 		 */
 		if (tp->ecn_flags & TCP_ECN_SEEN)
-			tcp_enter_quickack_mode((struct sock *)tp);
+			tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
 		break;
 	case INET_ECN_CE:
 		if (tcp_ca_needs_ecn((struct sock *)tp))
@@ -269,7 +271,7 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
 
 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
 			/* Better not delay acks, sender can have a very low cwnd */
-			tcp_enter_quickack_mode((struct sock *)tp);
+			tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
 			tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
 		}
 		tp->ecn_flags |= TCP_ECN_SEEN;
@@ -686,7 +688,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 		/* The _first_ data packet received, initialize
 		 * delayed ACK engine.
 		 */
-		tcp_incr_quickack(sk);
+		tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
 		icsk->icsk_ack.ato = TCP_ATO_MIN;
 	} else {
 		int m = now - icsk->icsk_ack.lrcvtime;
@@ -702,7 +704,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 			/* Too long gap. Apparently sender failed to
 			 * restart window, so that we send ACKs quickly.
 			 */
-			tcp_incr_quickack(sk);
+			tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
 			sk_mem_reclaim(sk);
 		}
 	}
@@ -4179,7 +4181,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
 	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 	    before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
-		tcp_enter_quickack_mode(sk);
+		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 
 		if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
@@ -4706,7 +4708,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
 
 out_of_window:
-		tcp_enter_quickack_mode(sk);
+		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 		inet_csk_schedule_ack(sk);
 drop:
 		tcp_drop(sk, skb);
@@ -5790,7 +5792,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			 * to stand against the temptation 8)     --ANK
 			 */
 			inet_csk_schedule_ack(sk);
-			tcp_enter_quickack_mode(sk);
+			tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
 						  TCP_DELACK_MAX, TCP_RTO_MAX);
 
-- 
2.17.0.441.gb46fe60e1d-goog

^ permalink raw reply related

* [PATCH net-next 0/2] tcp: reduce quickack pressure for ECN
From: Eric Dumazet @ 2018-05-21 22:08 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, Van Jacobson, Neal Cardwell, Yuchung Cheng,
	Soheil Hassas Yeganeh, Eric Dumazet, Eric Dumazet

Small patch series changing TCP behavior vs quickack and ECN

First patch is a refactoring, adding parameter to tcp_incr_quickack()
and tcp_enter_quickack_mode() helpers.

Second patch implements the change, lowering number of ACK packets
sent after an ECN event.

Eric Dumazet (2):
  tcp: add max_quickacks param to tcp_incr_quickack and
    tcp_enter_quickack_mode
  tcp: do not aggressively quick ack after ECN events

 net/ipv4/tcp_input.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

-- 
2.17.0.441.gb46fe60e1d-goog

^ permalink raw reply

* Re: [PATCH net] tuntap: raise EPOLLOUT on device up
From: Michael S. Tsirkin @ 2018-05-21 22:08 UTC (permalink / raw)
  To: David Miller; +Cc: jasowang, netdev, linux-kernel, hannes, edumazet
In-Reply-To: <20180521.114742.427929977852677864.davem@davemloft.net>

On Mon, May 21, 2018 at 11:47:42AM -0400, David Miller wrote:
> From: Jason Wang <jasowang@redhat.com>
> Date: Fri, 18 May 2018 21:00:43 +0800
> 
> > We return -EIO on device down but can not raise EPOLLOUT after it was
> > up. This may confuse user like vhost which expects tuntap to raise
> > EPOLLOUT to re-enable its TX routine after tuntap is down. This could
> > be easily reproduced by transmitting packets from VM while down and up
> > the tap device. Fixing this by set SOCKWQ_ASYNC_NOSPACE on -EIO.
> > 
> > Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
> > Cc: Eric Dumazet <edumazet@google.com>
> > Fixes: 1bd4978a88ac2 ("tun: honor IFF_UP in tun_get_user()")
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> 
> I'm no so sure what to do with this patch.
> 
> Like Michael says, this flag bit is only checks upon transmit which
> may or may not happen after this point.  It doesn't seem to be
> guaranteed.

Jason, can't we detect a link up transition and respond accordingly?
What do you think?

-- 
MST

^ permalink raw reply

* Re: [PATCH net] tuntap: raise EPOLLOUT on device up
From: Michael S. Tsirkin @ 2018-05-21 22:06 UTC (permalink / raw)
  To: Jason Wang; +Cc: netdev, linux-kernel, Hannes Frederic Sowa, Eric Dumazet
In-Reply-To: <e178f169-43c4-8ac4-8334-bc3eba1bd10e@redhat.com>

On Sat, May 19, 2018 at 09:09:11AM +0800, Jason Wang wrote:
> 
> 
> On 2018年05月18日 22:46, Michael S. Tsirkin wrote:
> > On Fri, May 18, 2018 at 10:11:54PM +0800, Jason Wang wrote:
> > > 
> > > On 2018年05月18日 22:06, Michael S. Tsirkin wrote:
> > > > On Fri, May 18, 2018 at 10:00:31PM +0800, Jason Wang wrote:
> > > > > On 2018年05月18日 21:26, Jason Wang wrote:
> > > > > > On 2018年05月18日 21:13, Michael S. Tsirkin wrote:
> > > > > > > On Fri, May 18, 2018 at 09:00:43PM +0800, Jason Wang wrote:
> > > > > > > > We return -EIO on device down but can not raise EPOLLOUT after it was
> > > > > > > > up. This may confuse user like vhost which expects tuntap to raise
> > > > > > > > EPOLLOUT to re-enable its TX routine after tuntap is down. This could
> > > > > > > > be easily reproduced by transmitting packets from VM while down and up
> > > > > > > > the tap device. Fixing this by set SOCKWQ_ASYNC_NOSPACE on -EIO.
> > > > > > > > 
> > > > > > > > Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
> > > > > > > > Cc: Eric Dumazet <edumazet@google.com>
> > > > > > > > Fixes: 1bd4978a88ac2 ("tun: honor IFF_UP in tun_get_user()")
> > > > > > > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > > > > > > ---
> > > > > > > >     drivers/net/tun.c | 4 +++-
> > > > > > > >     1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > > > > 
> > > > > > > > diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> > > > > > > > index d45ac37..1b29761 100644
> > > > > > > > --- a/drivers/net/tun.c
> > > > > > > > +++ b/drivers/net/tun.c
> > > > > > > > @@ -1734,8 +1734,10 @@ static ssize_t tun_get_user(struct
> > > > > > > > tun_struct *tun, struct tun_file *tfile,
> > > > > > > >         int skb_xdp = 1;
> > > > > > > >         bool frags = tun_napi_frags_enabled(tun);
> > > > > > > >     -    if (!(tun->dev->flags & IFF_UP))
> > > > > > > > +    if (!(tun->dev->flags & IFF_UP)) {
> > > > > > > Isn't this racy?  What if flag is cleared at this point?
> > > > > > I think you mean "set at this point"? Then yes, so we probably need to
> > > > > > set the bit during tun_net_close().
> > > > > > 
> > > > > > Thanks
> > > > > Looks no need, vhost will poll socket after it see EIO. So we are ok here?
> > > > > 
> > > > > Thanks
> > > > In fact I don't even understand why does this help any longer.
> > > > 
> > > We disable tx polling and only enable it on demand for a better rx
> > > performance. You may want to have a look at :
> > > 
> > > commit feb8892cb441c742d4220cf7ced001e7fa070731
> > > Author: Jason Wang <jasowang@redhat.com>
> > > Date:   Mon Nov 13 11:45:34 2017 +0800
> > > 
> > >      vhost_net: conditionally enable tx polling
> > > 
> > > Thanks
> > 
> > Question is, what looks at SOCKWQ_ASYNC_NOSPACE.
> > I think it's tested when packet is transmitted,
> > but there is no guarantee here any packet will
> > ever be transmitted.
> > 
> 
> Well, actually, I do plan to disable vq polling from the beginning. But
> looks like you do not want this:
> 
> See https://patchwork.kernel.org/patch/10034025/
> 
> Thanks

Not sure I understand what you are saying, it's enabling polling we are
talking about.

-- 
MST

^ permalink raw reply

* [PATCH net-next 12/12] amd-xgbe: Improve SFP 100Mbps auto-negotiation
From: Tom Lendacky @ 2018-05-21 22:00 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

After changing speed to 100Mbps as a result of auto-negotiation (AN),
some 10/100/1000Mbps SFPs indicate a successful link (no faults or loss
of signal), but cannot successfully transmit or receive data.  These
SFPs required an extra auto-negotiation (AN) after the speed change in
order to operate properly.  Add a quirk for these SFPs so that if the
outcome of the AN actually results in changing to a new speed, re-initiate
AN at that new speed.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-mdio.c   |   77 +++++++++++++++------------
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c |    6 ++
 drivers/net/ethernet/amd/xgbe/xgbe.h        |    1 
 3 files changed, 50 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 450b89c..4b5d625 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -331,13 +331,15 @@ static void xgbe_switch_mode(struct xgbe_prv_data *pdata)
 	xgbe_change_mode(pdata, pdata->phy_if.phy_impl.switch_mode(pdata));
 }
 
-static void xgbe_set_mode(struct xgbe_prv_data *pdata,
+static bool xgbe_set_mode(struct xgbe_prv_data *pdata,
 			  enum xgbe_mode mode)
 {
 	if (mode == xgbe_cur_mode(pdata))
-		return;
+		return false;
 
 	xgbe_change_mode(pdata, mode);
+
+	return true;
 }
 
 static bool xgbe_use_mode(struct xgbe_prv_data *pdata,
@@ -1178,21 +1180,23 @@ static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata)
 	return 0;
 }
 
-static int __xgbe_phy_config_aneg(struct xgbe_prv_data *pdata)
+static int __xgbe_phy_config_aneg(struct xgbe_prv_data *pdata, bool set_mode)
 {
 	int ret;
 
+	mutex_lock(&pdata->an_mutex);
+
 	set_bit(XGBE_LINK_INIT, &pdata->dev_state);
 	pdata->link_check = jiffies;
 
 	ret = pdata->phy_if.phy_impl.an_config(pdata);
 	if (ret)
-		return ret;
+		goto out;
 
 	if (pdata->phy.autoneg != AUTONEG_ENABLE) {
 		ret = xgbe_phy_config_fixed(pdata);
 		if (ret || !pdata->kr_redrv)
-			return ret;
+			goto out;
 
 		netif_dbg(pdata, link, pdata->netdev, "AN redriver support\n");
 	} else {
@@ -1202,24 +1206,27 @@ static int __xgbe_phy_config_aneg(struct xgbe_prv_data *pdata)
 	/* Disable auto-negotiation interrupt */
 	disable_irq(pdata->an_irq);
 
-	/* Start auto-negotiation in a supported mode */
-	if (xgbe_use_mode(pdata, XGBE_MODE_KR)) {
-		xgbe_set_mode(pdata, XGBE_MODE_KR);
-	} else if (xgbe_use_mode(pdata, XGBE_MODE_KX_2500)) {
-		xgbe_set_mode(pdata, XGBE_MODE_KX_2500);
-	} else if (xgbe_use_mode(pdata, XGBE_MODE_KX_1000)) {
-		xgbe_set_mode(pdata, XGBE_MODE_KX_1000);
-	} else if (xgbe_use_mode(pdata, XGBE_MODE_SFI)) {
-		xgbe_set_mode(pdata, XGBE_MODE_SFI);
-	} else if (xgbe_use_mode(pdata, XGBE_MODE_X)) {
-		xgbe_set_mode(pdata, XGBE_MODE_X);
-	} else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_1000)) {
-		xgbe_set_mode(pdata, XGBE_MODE_SGMII_1000);
-	} else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_100)) {
-		xgbe_set_mode(pdata, XGBE_MODE_SGMII_100);
-	} else {
-		enable_irq(pdata->an_irq);
-		return -EINVAL;
+	if (set_mode) {
+		/* Start auto-negotiation in a supported mode */
+		if (xgbe_use_mode(pdata, XGBE_MODE_KR)) {
+			xgbe_set_mode(pdata, XGBE_MODE_KR);
+		} else if (xgbe_use_mode(pdata, XGBE_MODE_KX_2500)) {
+			xgbe_set_mode(pdata, XGBE_MODE_KX_2500);
+		} else if (xgbe_use_mode(pdata, XGBE_MODE_KX_1000)) {
+			xgbe_set_mode(pdata, XGBE_MODE_KX_1000);
+		} else if (xgbe_use_mode(pdata, XGBE_MODE_SFI)) {
+			xgbe_set_mode(pdata, XGBE_MODE_SFI);
+		} else if (xgbe_use_mode(pdata, XGBE_MODE_X)) {
+			xgbe_set_mode(pdata, XGBE_MODE_X);
+		} else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_1000)) {
+			xgbe_set_mode(pdata, XGBE_MODE_SGMII_1000);
+		} else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_100)) {
+			xgbe_set_mode(pdata, XGBE_MODE_SGMII_100);
+		} else {
+			enable_irq(pdata->an_irq);
+			ret = -EINVAL;
+			goto out;
+		}
 	}
 
 	/* Disable and stop any in progress auto-negotiation */
@@ -1239,16 +1246,7 @@ static int __xgbe_phy_config_aneg(struct xgbe_prv_data *pdata)
 	xgbe_an_init(pdata);
 	xgbe_an_restart(pdata);
 
-	return 0;
-}
-
-static int xgbe_phy_config_aneg(struct xgbe_prv_data *pdata)
-{
-	int ret;
-
-	mutex_lock(&pdata->an_mutex);
-
-	ret = __xgbe_phy_config_aneg(pdata);
+out:
 	if (ret)
 		set_bit(XGBE_LINK_ERR, &pdata->dev_state);
 	else
@@ -1259,6 +1257,16 @@ static int xgbe_phy_config_aneg(struct xgbe_prv_data *pdata)
 	return ret;
 }
 
+static int xgbe_phy_config_aneg(struct xgbe_prv_data *pdata)
+{
+	return __xgbe_phy_config_aneg(pdata, true);
+}
+
+static int xgbe_phy_reconfig_aneg(struct xgbe_prv_data *pdata)
+{
+	return __xgbe_phy_config_aneg(pdata, false);
+}
+
 static bool xgbe_phy_aneg_done(struct xgbe_prv_data *pdata)
 {
 	return (pdata->an_result == XGBE_AN_COMPLETE);
@@ -1315,7 +1323,8 @@ static void xgbe_phy_status_result(struct xgbe_prv_data *pdata)
 
 	pdata->phy.duplex = DUPLEX_FULL;
 
-	xgbe_set_mode(pdata, mode);
+	if (xgbe_set_mode(pdata, mode) && pdata->an_again)
+		xgbe_phy_reconfig_aneg(pdata);
 }
 
 static void xgbe_phy_status(struct xgbe_prv_data *pdata)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 194a569..3ceb4f9 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -902,6 +902,9 @@ static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata)
 		   XGBE_BEL_FUSE_VENDOR, XGBE_SFP_BASE_VENDOR_NAME_LEN))
 		return false;
 
+	/* For Bel-Fuse, use the extra AN flag */
+	pdata->an_again = 1;
+
 	if (memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_PN],
 		   XGBE_BEL_FUSE_PARTNO, XGBE_SFP_BASE_VENDOR_PN_LEN))
 		return false;
@@ -978,6 +981,9 @@ static int xgbe_phy_find_phy_device(struct xgbe_prv_data *pdata)
 	if (phy_data->phydev)
 		return 0;
 
+	/* Clear the extra AN flag */
+	pdata->an_again = 0;
+
 	/* Check for the use of an external PHY */
 	if (phy_data->phydev_mode == XGBE_MDIO_MODE_NONE)
 		return 0;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 7a412cf..47bcbcf 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -1261,6 +1261,7 @@ struct xgbe_prv_data {
 	enum xgbe_rx kr_state;
 	enum xgbe_rx kx_state;
 	struct work_struct an_work;
+	unsigned int an_again;
 	unsigned int an_supported;
 	unsigned int parallel_detect;
 	unsigned int fec_ability;

^ permalink raw reply related

* [PATCH net-next 11/12] amd-xgbe: Update the BelFuse quirk to support SGMII
From: Tom Lendacky @ 2018-05-21 22:00 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

Instead of using a quirk to make the BelFuse 1GBT-SFP06 part look like
a 1000baseX part, program the SFP PHY to support SGMII and 10/100/1000
baseT.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c |  109 +++++++++++++++++++--------
 1 file changed, 75 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index dd747f6..194a569 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -860,6 +860,9 @@ static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata)
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
 	unsigned int phy_id = phy_data->phydev->phy_id;
 
+	if (phy_data->port_mode != XGBE_PORT_MODE_SFP)
+		return false;
+
 	if ((phy_id & 0xfffffff0) != 0x01ff0cc0)
 		return false;
 
@@ -885,8 +888,80 @@ static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata)
 	return true;
 }
 
+static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom;
+	unsigned int phy_id = phy_data->phydev->phy_id;
+	int reg;
+
+	if (phy_data->port_mode != XGBE_PORT_MODE_SFP)
+		return false;
+
+	if (memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_NAME],
+		   XGBE_BEL_FUSE_VENDOR, XGBE_SFP_BASE_VENDOR_NAME_LEN))
+		return false;
+
+	if (memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_PN],
+		   XGBE_BEL_FUSE_PARTNO, XGBE_SFP_BASE_VENDOR_PN_LEN))
+		return false;
+
+	if ((phy_id & 0xfffffff0) != 0x03625d10)
+		return false;
+
+	/* Disable RGMII mode */
+	phy_write(phy_data->phydev, 0x18, 0x7007);
+	reg = phy_read(phy_data->phydev, 0x18);
+	phy_write(phy_data->phydev, 0x18, reg & ~0x0080);
+
+	/* Enable fiber register bank */
+	phy_write(phy_data->phydev, 0x1c, 0x7c00);
+	reg = phy_read(phy_data->phydev, 0x1c);
+	reg &= 0x03ff;
+	reg &= ~0x0001;
+	phy_write(phy_data->phydev, 0x1c, 0x8000 | 0x7c00 | reg | 0x0001);
+
+	/* Power down SerDes */
+	reg = phy_read(phy_data->phydev, 0x00);
+	phy_write(phy_data->phydev, 0x00, reg | 0x00800);
+
+	/* Configure SGMII-to-Copper mode */
+	phy_write(phy_data->phydev, 0x1c, 0x7c00);
+	reg = phy_read(phy_data->phydev, 0x1c);
+	reg &= 0x03ff;
+	reg &= ~0x0006;
+	phy_write(phy_data->phydev, 0x1c, 0x8000 | 0x7c00 | reg | 0x0004);
+
+	/* Power up SerDes */
+	reg = phy_read(phy_data->phydev, 0x00);
+	phy_write(phy_data->phydev, 0x00, reg & ~0x00800);
+
+	/* Enable copper register bank */
+	phy_write(phy_data->phydev, 0x1c, 0x7c00);
+	reg = phy_read(phy_data->phydev, 0x1c);
+	reg &= 0x03ff;
+	reg &= ~0x0001;
+	phy_write(phy_data->phydev, 0x1c, 0x8000 | 0x7c00 | reg);
+
+	/* Power up SerDes */
+	reg = phy_read(phy_data->phydev, 0x00);
+	phy_write(phy_data->phydev, 0x00, reg & ~0x00800);
+
+	phy_data->phydev->supported = PHY_GBIT_FEATURES;
+	phy_data->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+	phy_data->phydev->advertising = phy_data->phydev->supported;
+
+	netif_dbg(pdata, drv, pdata->netdev,
+		  "BelFuse PHY quirk in place\n");
+
+	return true;
+}
+
 static void xgbe_phy_external_phy_quirks(struct xgbe_prv_data *pdata)
 {
+	if (xgbe_phy_belfuse_phy_quirks(pdata))
+		return;
+
 	if (xgbe_phy_finisar_phy_quirks(pdata))
 		return;
 }
@@ -1027,37 +1102,6 @@ static bool xgbe_phy_check_sfp_mod_absent(struct xgbe_phy_data *phy_data)
 	return false;
 }
 
-static bool xgbe_phy_belfuse_parse_quirks(struct xgbe_prv_data *pdata)
-{
-	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom;
-
-	if (memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_NAME],
-		   XGBE_BEL_FUSE_VENDOR, XGBE_SFP_BASE_VENDOR_NAME_LEN))
-		return false;
-
-	if (!memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_PN],
-		    XGBE_BEL_FUSE_PARTNO, XGBE_SFP_BASE_VENDOR_PN_LEN)) {
-		phy_data->sfp_base = XGBE_SFP_BASE_1000_SX;
-		phy_data->sfp_cable = XGBE_SFP_CABLE_ACTIVE;
-		phy_data->sfp_speed = XGBE_SFP_SPEED_1000;
-		if (phy_data->sfp_changed)
-			netif_dbg(pdata, drv, pdata->netdev,
-				  "Bel-Fuse SFP quirk in place\n");
-		return true;
-	}
-
-	return false;
-}
-
-static bool xgbe_phy_sfp_parse_quirks(struct xgbe_prv_data *pdata)
-{
-	if (xgbe_phy_belfuse_parse_quirks(pdata))
-		return true;
-
-	return false;
-}
-
 static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
@@ -1076,9 +1120,6 @@ static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata)
 	phy_data->sfp_tx_fault = xgbe_phy_check_sfp_tx_fault(phy_data);
 	phy_data->sfp_rx_los = xgbe_phy_check_sfp_rx_los(phy_data);
 
-	if (xgbe_phy_sfp_parse_quirks(pdata))
-		return;
-
 	/* Assume ACTIVE cable unless told it is PASSIVE */
 	if (sfp_base[XGBE_SFP_BASE_CABLE] & XGBE_SFP_BASE_CABLE_PASSIVE) {
 		phy_data->sfp_cable = XGBE_SFP_CABLE_PASSIVE;

^ permalink raw reply related

* [PATCH net-next 10/12] amd-xgbe: Advertise FEC support with the KR re-driver
From: Tom Lendacky @ 2018-05-21 21:59 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

When a KR re-driver is present, indicate the FEC support is available
during auto-negotiation.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c |    4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 141bb13..dd747f6 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -1720,6 +1720,10 @@ static void xgbe_phy_an_advertising(struct xgbe_prv_data *pdata,
 	XGBE_CLR_ADV(dlks, 1000baseKX_Full);
 	XGBE_CLR_ADV(dlks, 10000baseKR_Full);
 
+	/* Advertise FEC support is present */
+	if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
+		XGBE_SET_ADV(dlks, 10000baseR_FEC);
+
 	switch (phy_data->port_mode) {
 	case XGBE_PORT_MODE_BACKPLANE:
 		XGBE_SET_ADV(dlks, 10000baseKR_Full);

^ permalink raw reply related

* [PATCH net-next 09/12] amd-xgbe: Always attempt link training in KR mode
From: Tom Lendacky @ 2018-05-21 21:59 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

Link training is always attempted when in KR mode, but the code is
structured to check if link training has been enabled before attempting
to perform it.  Since that check will always be true, simplify the code
to always enable and start link training during KR auto-negotiation.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-mdio.c |   69 +++++++----------------------
 1 file changed, 16 insertions(+), 53 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 9c39c72..450b89c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -216,31 +216,8 @@ static void xgbe_an_clear_interrupts_all(struct xgbe_prv_data *pdata)
 	xgbe_an37_clear_interrupts(pdata);
 }
 
-static void xgbe_an73_enable_kr_training(struct xgbe_prv_data *pdata)
-{
-	unsigned int reg;
-
-	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
-
-	reg |= XGBE_KR_TRAINING_ENABLE;
-	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
-}
-
-static void xgbe_an73_disable_kr_training(struct xgbe_prv_data *pdata)
-{
-	unsigned int reg;
-
-	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
-
-	reg &= ~XGBE_KR_TRAINING_ENABLE;
-	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
-}
-
 static void xgbe_kr_mode(struct xgbe_prv_data *pdata)
 {
-	/* Enable KR training */
-	xgbe_an73_enable_kr_training(pdata);
-
 	/* Set MAC to 10G speed */
 	pdata->hw_if.set_speed(pdata, SPEED_10000);
 
@@ -250,9 +227,6 @@ static void xgbe_kr_mode(struct xgbe_prv_data *pdata)
 
 static void xgbe_kx_2500_mode(struct xgbe_prv_data *pdata)
 {
-	/* Disable KR training */
-	xgbe_an73_disable_kr_training(pdata);
-
 	/* Set MAC to 2.5G speed */
 	pdata->hw_if.set_speed(pdata, SPEED_2500);
 
@@ -262,9 +236,6 @@ static void xgbe_kx_2500_mode(struct xgbe_prv_data *pdata)
 
 static void xgbe_kx_1000_mode(struct xgbe_prv_data *pdata)
 {
-	/* Disable KR training */
-	xgbe_an73_disable_kr_training(pdata);
-
 	/* Set MAC to 1G speed */
 	pdata->hw_if.set_speed(pdata, SPEED_1000);
 
@@ -278,9 +249,6 @@ static void xgbe_sfi_mode(struct xgbe_prv_data *pdata)
 	if (pdata->kr_redrv)
 		return xgbe_kr_mode(pdata);
 
-	/* Disable KR training */
-	xgbe_an73_disable_kr_training(pdata);
-
 	/* Set MAC to 10G speed */
 	pdata->hw_if.set_speed(pdata, SPEED_10000);
 
@@ -290,9 +258,6 @@ static void xgbe_sfi_mode(struct xgbe_prv_data *pdata)
 
 static void xgbe_x_mode(struct xgbe_prv_data *pdata)
 {
-	/* Disable KR training */
-	xgbe_an73_disable_kr_training(pdata);
-
 	/* Set MAC to 1G speed */
 	pdata->hw_if.set_speed(pdata, SPEED_1000);
 
@@ -302,9 +267,6 @@ static void xgbe_x_mode(struct xgbe_prv_data *pdata)
 
 static void xgbe_sgmii_1000_mode(struct xgbe_prv_data *pdata)
 {
-	/* Disable KR training */
-	xgbe_an73_disable_kr_training(pdata);
-
 	/* Set MAC to 1G speed */
 	pdata->hw_if.set_speed(pdata, SPEED_1000);
 
@@ -314,9 +276,6 @@ static void xgbe_sgmii_1000_mode(struct xgbe_prv_data *pdata)
 
 static void xgbe_sgmii_100_mode(struct xgbe_prv_data *pdata)
 {
-	/* Disable KR training */
-	xgbe_an73_disable_kr_training(pdata);
-
 	/* Set MAC to 1G speed */
 	pdata->hw_if.set_speed(pdata, SPEED_1000);
 
@@ -425,6 +384,12 @@ static void xgbe_an73_set(struct xgbe_prv_data *pdata, bool enable,
 {
 	unsigned int reg;
 
+	/* Disable KR training for now */
+	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
+	reg &= ~XGBE_KR_TRAINING_ENABLE;
+	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
+
+	/* Update AN settings */
 	reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1);
 	reg &= ~MDIO_AN_CTRL1_ENABLE;
 
@@ -522,21 +487,19 @@ static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata,
 	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_FECCTRL, reg);
 
 	/* Start KR training */
-	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
-	if (reg & XGBE_KR_TRAINING_ENABLE) {
-		if (pdata->phy_if.phy_impl.kr_training_pre)
-			pdata->phy_if.phy_impl.kr_training_pre(pdata);
+	if (pdata->phy_if.phy_impl.kr_training_pre)
+		pdata->phy_if.phy_impl.kr_training_pre(pdata);
 
-		reg |= XGBE_KR_TRAINING_START;
-		XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL,
-			    reg);
+	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
+	reg |= XGBE_KR_TRAINING_ENABLE;
+	reg |= XGBE_KR_TRAINING_START;
+	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
 
-		netif_dbg(pdata, link, pdata->netdev,
-			  "KR training initiated\n");
+	netif_dbg(pdata, link, pdata->netdev,
+		  "KR training initiated\n");
 
-		if (pdata->phy_if.phy_impl.kr_training_post)
-			pdata->phy_if.phy_impl.kr_training_post(pdata);
-	}
+	if (pdata->phy_if.phy_impl.kr_training_post)
+		pdata->phy_if.phy_impl.kr_training_post(pdata);
 
 	return XGBE_AN_PAGE_RECEIVED;
 }

^ permalink raw reply related

* [PATCH net-next 08/12] amd-xgbe: Add ethtool show/set channels support
From: Tom Lendacky @ 2018-05-21 21:59 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

Add ethtool support to show and set the device channel configuration.
Changing the channel configuration will result in a device restart.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c     |   25 +++++
 drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c |  131 ++++++++++++++++++++++++++
 drivers/net/ethernet/amd/xgbe/xgbe.h         |    4 +
 3 files changed, 160 insertions(+)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 397e3a0..24f1053 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1329,6 +1329,17 @@ static int xgbe_alloc_memory(struct xgbe_prv_data *pdata)
 	struct net_device *netdev = pdata->netdev;
 	int ret;
 
+	if (pdata->new_tx_ring_count) {
+		pdata->tx_ring_count = pdata->new_tx_ring_count;
+		pdata->tx_q_count = pdata->tx_ring_count;
+		pdata->new_tx_ring_count = 0;
+	}
+
+	if (pdata->new_rx_ring_count) {
+		pdata->rx_ring_count = pdata->new_rx_ring_count;
+		pdata->new_rx_ring_count = 0;
+	}
+
 	/* Calculate the Rx buffer size before allocating rings */
 	pdata->rx_buf_size = xgbe_calc_rx_buf_size(netdev, netdev->mtu);
 
@@ -1482,6 +1493,20 @@ static void xgbe_stopdev(struct work_struct *work)
 	netdev_alert(pdata->netdev, "device stopped\n");
 }
 
+void xgbe_full_restart_dev(struct xgbe_prv_data *pdata)
+{
+	/* If not running, "restart" will happen on open */
+	if (!netif_running(pdata->netdev))
+		return;
+
+	xgbe_stop(pdata);
+
+	xgbe_free_memory(pdata);
+	xgbe_alloc_memory(pdata);
+
+	xgbe_start(pdata);
+}
+
 void xgbe_restart_dev(struct xgbe_prv_data *pdata)
 {
 	/* If not running, "restart" will happen on open */
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index d12f982..d26fd95 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -705,6 +705,135 @@ static int xgbe_set_ringparam(struct net_device *netdev,
 	return 0;
 }
 
+static void xgbe_get_channels(struct net_device *netdev,
+			      struct ethtool_channels *channels)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	unsigned int rx, tx, combined;
+
+	/* Calculate maximums allowed:
+	 *   - Take into account the number of available IRQs
+	 *   - Do not take into account the number of online CPUs so that
+	 *     the user can over-subscribe if desired
+	 *   - Tx is additionally limited by the number of hardware queues
+	 */
+	rx = min(pdata->hw_feat.rx_ch_cnt, pdata->rx_max_channel_count);
+	rx = min(rx, pdata->channel_irq_count);
+	tx = min(pdata->hw_feat.tx_ch_cnt, pdata->tx_max_channel_count);
+	tx = min(tx, pdata->channel_irq_count);
+	tx = min(tx, pdata->tx_max_q_count);
+
+	combined = min(rx, tx);
+
+	channels->max_combined = combined;
+	channels->max_rx = rx;
+	channels->max_tx = tx;
+
+	/* Current running settings */
+	rx = pdata->rx_ring_count;
+	tx = pdata->tx_ring_count;
+
+	combined = min(rx, tx);
+	rx -= combined;
+	tx -= combined;
+
+	channels->combined_count = combined;
+	channels->rx_count = rx;
+	channels->tx_count = tx;
+}
+
+static void xgbe_print_set_channels_input(struct net_device *netdev,
+					  struct ethtool_channels *channels)
+{
+	netdev_err(netdev, "channel inputs: combined=%u, rx-only=%u, tx-only=%u\n",
+		   channels->combined_count, channels->rx_count,
+		   channels->tx_count);
+}
+
+static int xgbe_set_channels(struct net_device *netdev,
+			     struct ethtool_channels *channels)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	unsigned int rx, tx, combined;
+
+	/* Calculate maximums allowed:
+	 *   - Take into account the number of available IRQs
+	 *   - Do not take into account the number of online CPUs so that
+	 *     the user can over-subscribe if desired
+	 *   - Tx is additionally limited by the number of hardware queues
+	 */
+	rx = min(pdata->hw_feat.rx_ch_cnt, pdata->rx_max_channel_count);
+	rx = min(rx, pdata->channel_irq_count);
+	tx = min(pdata->hw_feat.tx_ch_cnt, pdata->tx_max_channel_count);
+	tx = min(tx, pdata->tx_max_q_count);
+	tx = min(tx, pdata->channel_irq_count);
+
+	combined = min(rx, tx);
+
+	/* Should not be setting other count */
+	if (channels->other_count) {
+		netdev_err(netdev,
+			   "other channel count must be zero\n");
+		return -EINVAL;
+	}
+
+	/* Require at least one Rx and Tx channel */
+	if (!channels->combined_count) {
+		if (!channels->rx_count || !channels->tx_count) {
+			netdev_err(netdev,
+				   "at least one Rx and one Tx channel is required\n");
+			xgbe_print_set_channels_input(netdev, channels);
+			return -EINVAL;
+		}
+	}
+
+	/* Check combined channels */
+	if (channels->combined_count > combined) {
+		netdev_err(netdev,
+			   "combined channel count cannot exceed %u\n",
+			   combined);
+		xgbe_print_set_channels_input(netdev, channels);
+		return -EINVAL;
+	}
+
+	/* Check for Rx/Tx specific channels */
+	combined = channels->combined_count;
+	tx -= combined;
+	rx -= combined;
+	if (channels->rx_count > rx) {
+		netdev_err(netdev,
+			   "Rx channel count cannot exceed %u when combined channel count is %u\n",
+			   rx, combined);
+		xgbe_print_set_channels_input(netdev, channels);
+		return -EINVAL;
+	}
+
+	if (channels->tx_count > tx) {
+		netdev_err(netdev,
+			   "Tx channel count cannot exceed %u when combined channel count is %u\n",
+			   tx, combined);
+		xgbe_print_set_channels_input(netdev, channels);
+		return -EINVAL;
+	}
+
+	rx = combined + channels->rx_count;
+	tx = combined + channels->tx_count;
+	netdev_notice(netdev, "final channel count assignment: combined=%u, rx-only=%u, tx-only=%u\n",
+		      min(rx, tx), rx - min(rx, tx), tx - min(rx, tx));
+
+	if ((rx == pdata->rx_ring_count) &&
+	    (tx == pdata->tx_ring_count))
+		goto out;
+
+	pdata->new_rx_ring_count = rx;
+	pdata->new_tx_ring_count = tx;
+
+	xgbe_full_restart_dev(pdata);
+
+out:
+	return 0;
+}
+
 static const struct ethtool_ops xgbe_ethtool_ops = {
 	.get_drvinfo = xgbe_get_drvinfo,
 	.get_msglevel = xgbe_get_msglevel,
@@ -729,6 +858,8 @@ static int xgbe_set_ringparam(struct net_device *netdev,
 	.get_module_eeprom = xgbe_get_module_eeprom,
 	.get_ringparam = xgbe_get_ringparam,
 	.set_ringparam = xgbe_set_ringparam,
+	.get_channels = xgbe_get_channels,
+	.set_channels = xgbe_set_channels,
 };
 
 const struct ethtool_ops *xgbe_get_ethtool_ops(void)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 7dc0fac..7a412cf 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -1122,6 +1122,9 @@ struct xgbe_prv_data {
 	unsigned int rx_ring_count;
 	unsigned int rx_desc_count;
 
+	unsigned int new_tx_ring_count;
+	unsigned int new_rx_ring_count;
+
 	unsigned int tx_max_q_count;
 	unsigned int rx_max_q_count;
 	unsigned int tx_q_count;
@@ -1336,6 +1339,7 @@ void xgbe_dump_rx_desc(struct xgbe_prv_data *, struct xgbe_ring *,
 void xgbe_init_rx_coalesce(struct xgbe_prv_data *);
 void xgbe_init_tx_coalesce(struct xgbe_prv_data *);
 void xgbe_restart_dev(struct xgbe_prv_data *pdata);
+void xgbe_full_restart_dev(struct xgbe_prv_data *pdata);
 
 #ifdef CONFIG_DEBUG_FS
 void xgbe_debugfs_init(struct xgbe_prv_data *);

^ permalink raw reply related

* [PATCH net-next 07/12] amd-xgbe: Prepare for ethtool set-channel support
From: Tom Lendacky @ 2018-05-21 21:59 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

In order to support being able to dynamically set/change the number of
Rx and Tx channels, update the code to:
 - Move alloc and free of device memory into callable functions
 - Move setting of the real number of Rx and Tx channels to device startup
 - Move mapping of the RSS channels to device startup

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c  |  108 ++++++++++++++++++-----------
 drivers/net/ethernet/amd/xgbe/xgbe-main.c |   20 -----
 2 files changed, 68 insertions(+), 60 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 2646c08..397e3a0 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1312,14 +1312,72 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller)
 	return 0;
 }
 
+static void xgbe_free_memory(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+
+	/* Free the ring descriptors and buffers */
+	desc_if->free_ring_resources(pdata);
+
+	/* Free the channel and ring structures */
+	xgbe_free_channels(pdata);
+}
+
+static int xgbe_alloc_memory(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+	struct net_device *netdev = pdata->netdev;
+	int ret;
+
+	/* Calculate the Rx buffer size before allocating rings */
+	pdata->rx_buf_size = xgbe_calc_rx_buf_size(netdev, netdev->mtu);
+
+	/* Allocate the channel and ring structures */
+	ret = xgbe_alloc_channels(pdata);
+	if (ret)
+		return ret;
+
+	/* Allocate the ring descriptors and buffers */
+	ret = desc_if->alloc_ring_resources(pdata);
+	if (ret)
+		goto err_channels;
+
+	/* Initialize the service and Tx timers */
+	xgbe_init_timers(pdata);
+
+	return 0;
+
+err_channels:
+	xgbe_free_memory(pdata);
+
+	return ret;
+}
+
 static int xgbe_start(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	struct xgbe_phy_if *phy_if = &pdata->phy_if;
 	struct net_device *netdev = pdata->netdev;
+	unsigned int i;
 	int ret;
 
-	DBGPR("-->xgbe_start\n");
+	/* Set the number of queues */
+	ret = netif_set_real_num_tx_queues(netdev, pdata->tx_ring_count);
+	if (ret) {
+		netdev_err(netdev, "error setting real tx queue count\n");
+		return ret;
+	}
+
+	ret = netif_set_real_num_rx_queues(netdev, pdata->rx_ring_count);
+	if (ret) {
+		netdev_err(netdev, "error setting real rx queue count\n");
+		return ret;
+	}
+
+	/* Set RSS lookup table data for programming */
+	for (i = 0; i < XGBE_RSS_MAX_TABLE_SIZE; i++)
+		XGMAC_SET_BITS(pdata->rss_table[i], MAC_RSSDR, DMCH,
+			       i % pdata->rx_ring_count);
 
 	ret = hw_if->init(pdata);
 	if (ret)
@@ -1347,8 +1405,6 @@ static int xgbe_start(struct xgbe_prv_data *pdata)
 
 	clear_bit(XGBE_STOPPED, &pdata->dev_state);
 
-	DBGPR("<--xgbe_start\n");
-
 	return 0;
 
 err_irqs:
@@ -1823,11 +1879,8 @@ static void xgbe_packet_info(struct xgbe_prv_data *pdata,
 static int xgbe_open(struct net_device *netdev)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
-	struct xgbe_desc_if *desc_if = &pdata->desc_if;
 	int ret;
 
-	DBGPR("-->xgbe_open\n");
-
 	/* Create the various names based on netdev name */
 	snprintf(pdata->an_name, sizeof(pdata->an_name) - 1, "%s-pcs",
 		 netdev_name(netdev));
@@ -1872,43 +1925,25 @@ static int xgbe_open(struct net_device *netdev)
 		goto err_sysclk;
 	}
 
-	/* Calculate the Rx buffer size before allocating rings */
-	ret = xgbe_calc_rx_buf_size(netdev, netdev->mtu);
-	if (ret < 0)
-		goto err_ptpclk;
-	pdata->rx_buf_size = ret;
-
-	/* Allocate the channel and ring structures */
-	ret = xgbe_alloc_channels(pdata);
-	if (ret)
-		goto err_ptpclk;
-
-	/* Allocate the ring descriptors and buffers */
-	ret = desc_if->alloc_ring_resources(pdata);
-	if (ret)
-		goto err_channels;
-
 	INIT_WORK(&pdata->service_work, xgbe_service);
 	INIT_WORK(&pdata->restart_work, xgbe_restart);
 	INIT_WORK(&pdata->stopdev_work, xgbe_stopdev);
 	INIT_WORK(&pdata->tx_tstamp_work, xgbe_tx_tstamp);
-	xgbe_init_timers(pdata);
+
+	ret = xgbe_alloc_memory(pdata);
+	if (ret)
+		goto err_ptpclk;
 
 	ret = xgbe_start(pdata);
 	if (ret)
-		goto err_rings;
+		goto err_mem;
 
 	clear_bit(XGBE_DOWN, &pdata->dev_state);
 
-	DBGPR("<--xgbe_open\n");
-
 	return 0;
 
-err_rings:
-	desc_if->free_ring_resources(pdata);
-
-err_channels:
-	xgbe_free_channels(pdata);
+err_mem:
+	xgbe_free_memory(pdata);
 
 err_ptpclk:
 	clk_disable_unprepare(pdata->ptpclk);
@@ -1928,18 +1963,11 @@ static int xgbe_open(struct net_device *netdev)
 static int xgbe_close(struct net_device *netdev)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
-	struct xgbe_desc_if *desc_if = &pdata->desc_if;
-
-	DBGPR("-->xgbe_close\n");
 
 	/* Stop the device */
 	xgbe_stop(pdata);
 
-	/* Free the ring descriptors and buffers */
-	desc_if->free_ring_resources(pdata);
-
-	/* Free the channel and ring structures */
-	xgbe_free_channels(pdata);
+	xgbe_free_memory(pdata);
 
 	/* Disable the clocks */
 	clk_disable_unprepare(pdata->ptpclk);
@@ -1953,8 +1981,6 @@ static int xgbe_close(struct net_device *netdev)
 
 	set_bit(XGBE_DOWN, &pdata->dev_state);
 
-	DBGPR("<--xgbe_close\n");
-
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index 441d0973..b41f236 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -265,7 +265,6 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata)
 {
 	struct net_device *netdev = pdata->netdev;
 	struct device *dev = pdata->dev;
-	unsigned int i;
 	int ret;
 
 	netdev->irq = pdata->dev_irq;
@@ -324,26 +323,9 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata)
 				pdata->tx_ring_count, pdata->rx_ring_count);
 	}
 
-	/* Set the number of queues */
-	ret = netif_set_real_num_tx_queues(netdev, pdata->tx_ring_count);
-	if (ret) {
-		dev_err(dev, "error setting real tx queue count\n");
-		return ret;
-	}
-
-	ret = netif_set_real_num_rx_queues(netdev, pdata->rx_ring_count);
-	if (ret) {
-		dev_err(dev, "error setting real rx queue count\n");
-		return ret;
-	}
-
-	/* Initialize RSS hash key and lookup table */
+	/* Initialize RSS hash key */
 	netdev_rss_key_fill(pdata->rss_key, sizeof(pdata->rss_key));
 
-	for (i = 0; i < XGBE_RSS_MAX_TABLE_SIZE; i++)
-		XGMAC_SET_BITS(pdata->rss_table[i], MAC_RSSDR, DMCH,
-			       i % pdata->rx_ring_count);
-
 	XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, IP2TE, 1);
 	XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, TCP4TE, 1);
 	XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1);

^ permalink raw reply related

* [PATCH net-next 06/12] amd-xgbe: Add ethtool show/set ring parameter support
From: Tom Lendacky @ 2018-05-21 21:59 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

Add ethtool support to show and set the number of the Rx and Tx ring
descriptors.  Changing the ring configuration will result in a device
restart.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c     |    6 --
 drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c |   65 ++++++++++++++++++++++++++
 drivers/net/ethernet/amd/xgbe/xgbe.h         |    6 ++
 3 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 7c204f0..2646c08 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1426,10 +1426,8 @@ static void xgbe_stopdev(struct work_struct *work)
 	netdev_alert(pdata->netdev, "device stopped\n");
 }
 
-static void xgbe_restart_dev(struct xgbe_prv_data *pdata)
+void xgbe_restart_dev(struct xgbe_prv_data *pdata)
 {
-	DBGPR("-->xgbe_restart_dev\n");
-
 	/* If not running, "restart" will happen on open */
 	if (!netif_running(pdata->netdev))
 		return;
@@ -1440,8 +1438,6 @@ static void xgbe_restart_dev(struct xgbe_prv_data *pdata)
 	xgbe_free_rx_data(pdata);
 
 	xgbe_start(pdata);
-
-	DBGPR("<--xgbe_restart_dev\n");
 }
 
 static void xgbe_restart(struct work_struct *work)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index 57394b77..d12f982 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -642,6 +642,69 @@ static int xgbe_get_module_eeprom(struct net_device *netdev,
 	return pdata->phy_if.module_eeprom(pdata, eeprom, data);
 }
 
+static void xgbe_get_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ringparam)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+
+	ringparam->rx_max_pending = XGBE_RX_DESC_CNT_MAX;
+	ringparam->tx_max_pending = XGBE_TX_DESC_CNT_MAX;
+	ringparam->rx_pending = pdata->rx_desc_count;
+	ringparam->tx_pending = pdata->tx_desc_count;
+}
+
+static int xgbe_set_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ringparam)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	unsigned int rx, tx;
+
+	if (ringparam->rx_mini_pending || ringparam->rx_jumbo_pending) {
+		netdev_err(netdev, "unsupported ring parameter\n");
+		return -EINVAL;
+	}
+
+	if ((ringparam->rx_pending < XGBE_RX_DESC_CNT_MIN) ||
+	    (ringparam->rx_pending > XGBE_RX_DESC_CNT_MAX)) {
+		netdev_err(netdev,
+			   "rx ring parameter must be between %u and %u\n",
+			   XGBE_RX_DESC_CNT_MIN, XGBE_RX_DESC_CNT_MAX);
+		return -EINVAL;
+	}
+
+	if ((ringparam->tx_pending < XGBE_TX_DESC_CNT_MIN) ||
+	    (ringparam->tx_pending > XGBE_TX_DESC_CNT_MAX)) {
+		netdev_err(netdev,
+			   "tx ring parameter must be between %u and %u\n",
+			   XGBE_TX_DESC_CNT_MIN, XGBE_TX_DESC_CNT_MAX);
+		return -EINVAL;
+	}
+
+	rx = __rounddown_pow_of_two(ringparam->rx_pending);
+	if (rx != ringparam->rx_pending)
+		netdev_notice(netdev,
+			      "rx ring parameter rounded to power of two: %u\n",
+			      rx);
+
+	tx = __rounddown_pow_of_two(ringparam->tx_pending);
+	if (tx != ringparam->tx_pending)
+		netdev_notice(netdev,
+			      "tx ring parameter rounded to power of two: %u\n",
+			      tx);
+
+	if ((rx == pdata->rx_desc_count) &&
+	    (tx == pdata->tx_desc_count))
+		goto out;
+
+	pdata->rx_desc_count = rx;
+	pdata->tx_desc_count = tx;
+
+	xgbe_restart_dev(pdata);
+
+out:
+	return 0;
+}
+
 static const struct ethtool_ops xgbe_ethtool_ops = {
 	.get_drvinfo = xgbe_get_drvinfo,
 	.get_msglevel = xgbe_get_msglevel,
@@ -664,6 +727,8 @@ static int xgbe_get_module_eeprom(struct net_device *netdev,
 	.set_link_ksettings = xgbe_set_link_ksettings,
 	.get_module_info = xgbe_get_module_info,
 	.get_module_eeprom = xgbe_get_module_eeprom,
+	.get_ringparam = xgbe_get_ringparam,
+	.set_ringparam = xgbe_set_ringparam,
 };
 
 const struct ethtool_ops *xgbe_get_ethtool_ops(void)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index f0f455b..7dc0fac 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -144,6 +144,11 @@
 #define XGBE_TX_DESC_MAX_PROC	(XGBE_TX_DESC_CNT >> 1)
 #define XGBE_RX_DESC_CNT	512
 
+#define XGBE_TX_DESC_CNT_MIN	64
+#define XGBE_TX_DESC_CNT_MAX	4096
+#define XGBE_RX_DESC_CNT_MIN	64
+#define XGBE_RX_DESC_CNT_MAX	4096
+
 #define XGBE_TX_MAX_BUF_SIZE	(0x3fff & ~(64 - 1))
 
 /* Descriptors required for maximum contiguous TSO/GSO packet */
@@ -1330,6 +1335,7 @@ void xgbe_dump_rx_desc(struct xgbe_prv_data *, struct xgbe_ring *,
 int xgbe_powerdown(struct net_device *, unsigned int);
 void xgbe_init_rx_coalesce(struct xgbe_prv_data *);
 void xgbe_init_tx_coalesce(struct xgbe_prv_data *);
+void xgbe_restart_dev(struct xgbe_prv_data *pdata);
 
 #ifdef CONFIG_DEBUG_FS
 void xgbe_debugfs_init(struct xgbe_prv_data *);

^ permalink raw reply related

* [PATCH net-next 05/12] amd-xgbe: Add ethtool support to retrieve SFP module info
From: Tom Lendacky @ 2018-05-21 21:59 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

Add support to get SFP module information using ethtool.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c |   18 +++
 drivers/net/ethernet/amd/xgbe/xgbe-mdio.c    |   21 ++++
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c  |  137 ++++++++++++++++++++++++++
 drivers/net/ethernet/amd/xgbe/xgbe.h         |   13 ++
 4 files changed, 189 insertions(+)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index ff397bb..57394b77 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -626,6 +626,22 @@ static int xgbe_get_ts_info(struct net_device *netdev,
 	return 0;
 }
 
+static int xgbe_get_module_info(struct net_device *netdev,
+				struct ethtool_modinfo *modinfo)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+
+	return pdata->phy_if.module_info(pdata, modinfo);
+}
+
+static int xgbe_get_module_eeprom(struct net_device *netdev,
+				  struct ethtool_eeprom *eeprom, u8 *data)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+
+	return pdata->phy_if.module_eeprom(pdata, eeprom, data);
+}
+
 static const struct ethtool_ops xgbe_ethtool_ops = {
 	.get_drvinfo = xgbe_get_drvinfo,
 	.get_msglevel = xgbe_get_msglevel,
@@ -646,6 +662,8 @@ static int xgbe_get_ts_info(struct net_device *netdev,
 	.get_ts_info = xgbe_get_ts_info,
 	.get_link_ksettings = xgbe_get_link_ksettings,
 	.set_link_ksettings = xgbe_set_link_ksettings,
+	.get_module_info = xgbe_get_module_info,
+	.get_module_eeprom = xgbe_get_module_eeprom,
 };
 
 const struct ethtool_ops *xgbe_get_ethtool_ops(void)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 1b45cd7..9c39c72 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -126,6 +126,24 @@
 #include "xgbe.h"
 #include "xgbe-common.h"
 
+static int xgbe_phy_module_eeprom(struct xgbe_prv_data *pdata,
+				  struct ethtool_eeprom *eeprom, u8 *data)
+{
+	if (!pdata->phy_if.phy_impl.module_eeprom)
+		return -ENXIO;
+
+	return pdata->phy_if.phy_impl.module_eeprom(pdata, eeprom, data);
+}
+
+static int xgbe_phy_module_info(struct xgbe_prv_data *pdata,
+				struct ethtool_modinfo *modinfo)
+{
+	if (!pdata->phy_if.phy_impl.module_info)
+		return -ENXIO;
+
+	return pdata->phy_if.phy_impl.module_info(pdata, modinfo);
+}
+
 static void xgbe_an37_clear_interrupts(struct xgbe_prv_data *pdata)
 {
 	int reg;
@@ -1639,4 +1657,7 @@ void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *phy_if)
 	phy_if->phy_valid_speed = xgbe_phy_valid_speed;
 
 	phy_if->an_isr          = xgbe_an_combined_isr;
+
+	phy_if->module_info     = xgbe_phy_module_info;
+	phy_if->module_eeprom   = xgbe_phy_module_eeprom;
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index cb15caf..141bb13 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -119,6 +119,7 @@
 #include <linux/kmod.h>
 #include <linux/mdio.h>
 #include <linux/phy.h>
+#include <linux/ethtool.h>
 
 #include "xgbe.h"
 #include "xgbe-common.h"
@@ -270,6 +271,15 @@ struct xgbe_sfp_eeprom {
 	u8 vendor[32];
 };
 
+#define XGBE_SFP_DIAGS_SUPPORTED(_x)			\
+	((_x)->extd[XGBE_SFP_EXTD_SFF_8472] &&		\
+	 !((_x)->extd[XGBE_SFP_EXTD_DIAG] & XGBE_SFP_EXTD_DIAG_ADDR_CHANGE))
+
+#define XGBE_SFP_EEPROM_BASE_LEN	256
+#define XGBE_SFP_EEPROM_DIAG_LEN	256
+#define XGBE_SFP_EEPROM_MAX		(XGBE_SFP_EEPROM_BASE_LEN +	\
+					 XGBE_SFP_EEPROM_DIAG_LEN)
+
 #define XGBE_BEL_FUSE_VENDOR	"BEL-FUSE        "
 #define XGBE_BEL_FUSE_PARTNO	"1GBT-SFP06      "
 
@@ -1301,6 +1311,130 @@ static void xgbe_phy_sfp_detect(struct xgbe_prv_data *pdata)
 	xgbe_phy_put_comm_ownership(pdata);
 }
 
+static int xgbe_phy_module_eeprom(struct xgbe_prv_data *pdata,
+				  struct ethtool_eeprom *eeprom, u8 *data)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	u8 eeprom_addr, eeprom_data[XGBE_SFP_EEPROM_MAX];
+	struct xgbe_sfp_eeprom *sfp_eeprom;
+	unsigned int i, j, rem;
+	int ret;
+
+	rem = eeprom->len;
+
+	if (!eeprom->len) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	if ((eeprom->offset + eeprom->len) > XGBE_SFP_EEPROM_MAX) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	if (phy_data->port_mode != XGBE_PORT_MODE_SFP) {
+		ret = -ENXIO;
+		goto done;
+	}
+
+	if (!netif_running(pdata->netdev)) {
+		ret = -EIO;
+		goto done;
+	}
+
+	if (phy_data->sfp_mod_absent) {
+		ret = -EIO;
+		goto done;
+	}
+
+	ret = xgbe_phy_get_comm_ownership(pdata);
+	if (ret) {
+		ret = -EIO;
+		goto done;
+	}
+
+	ret = xgbe_phy_sfp_get_mux(pdata);
+	if (ret) {
+		netdev_err(pdata->netdev, "I2C error setting SFP MUX\n");
+		ret = -EIO;
+		goto put_own;
+	}
+
+	/* Read the SFP serial ID eeprom */
+	eeprom_addr = 0;
+	ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_SERIAL_ID_ADDRESS,
+				&eeprom_addr, sizeof(eeprom_addr),
+				eeprom_data, XGBE_SFP_EEPROM_BASE_LEN);
+	if (ret) {
+		netdev_err(pdata->netdev,
+			   "I2C error reading SFP EEPROM\n");
+		ret = -EIO;
+		goto put_mux;
+	}
+
+	sfp_eeprom = (struct xgbe_sfp_eeprom *)eeprom_data;
+
+	if (XGBE_SFP_DIAGS_SUPPORTED(sfp_eeprom)) {
+		/* Read the SFP diagnostic eeprom */
+		eeprom_addr = 0;
+		ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_DIAG_INFO_ADDRESS,
+					&eeprom_addr, sizeof(eeprom_addr),
+					eeprom_data + XGBE_SFP_EEPROM_BASE_LEN,
+					XGBE_SFP_EEPROM_DIAG_LEN);
+		if (ret) {
+			netdev_err(pdata->netdev,
+				   "I2C error reading SFP DIAGS\n");
+			ret = -EIO;
+			goto put_mux;
+		}
+	}
+
+	for (i = 0, j = eeprom->offset; i < eeprom->len; i++, j++) {
+		if ((j >= XGBE_SFP_EEPROM_BASE_LEN) &&
+		    !XGBE_SFP_DIAGS_SUPPORTED(sfp_eeprom))
+			break;
+
+		data[i] = eeprom_data[j];
+		rem--;
+	}
+
+put_mux:
+	xgbe_phy_sfp_put_mux(pdata);
+
+put_own:
+	xgbe_phy_put_comm_ownership(pdata);
+
+done:
+	eeprom->len -= rem;
+
+	return ret;
+}
+
+static int xgbe_phy_module_info(struct xgbe_prv_data *pdata,
+				struct ethtool_modinfo *modinfo)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	if (phy_data->port_mode != XGBE_PORT_MODE_SFP)
+		return -ENXIO;
+
+	if (!netif_running(pdata->netdev))
+		return -EIO;
+
+	if (phy_data->sfp_mod_absent)
+		return -EIO;
+
+	if (XGBE_SFP_DIAGS_SUPPORTED(&phy_data->sfp_eeprom)) {
+		modinfo->type = ETH_MODULE_SFF_8472;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+	} else {
+		modinfo->type = ETH_MODULE_SFF_8079;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+	}
+
+	return 0;
+}
+
 static void xgbe_phy_phydev_flowctrl(struct xgbe_prv_data *pdata)
 {
 	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
@@ -3196,4 +3330,7 @@ void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *phy_if)
 
 	phy_impl->kr_training_pre	= xgbe_phy_kr_training_pre;
 	phy_impl->kr_training_post	= xgbe_phy_kr_training_post;
+
+	phy_impl->module_info		= xgbe_phy_module_info;
+	phy_impl->module_eeprom		= xgbe_phy_module_eeprom;
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 54e43ad3..f0f455b 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -835,6 +835,7 @@ struct xgbe_hw_if {
  *   Optional routines:
  *     an_pre, an_post
  *     kr_training_pre, kr_training_post
+ *     module_info, module_eeprom
  */
 struct xgbe_phy_impl_if {
 	/* Perform Setup/teardown actions */
@@ -883,6 +884,12 @@ struct xgbe_phy_impl_if {
 	/* Pre/Post KR training enablement support */
 	void (*kr_training_pre)(struct xgbe_prv_data *);
 	void (*kr_training_post)(struct xgbe_prv_data *);
+
+	/* SFP module related info */
+	int (*module_info)(struct xgbe_prv_data *pdata,
+			   struct ethtool_modinfo *modinfo);
+	int (*module_eeprom)(struct xgbe_prv_data *pdata,
+			     struct ethtool_eeprom *eeprom, u8 *data);
 };
 
 struct xgbe_phy_if {
@@ -905,6 +912,12 @@ struct xgbe_phy_if {
 	/* For single interrupt support */
 	irqreturn_t (*an_isr)(struct xgbe_prv_data *);
 
+	/* For ethtool PHY support */
+	int (*module_info)(struct xgbe_prv_data *pdata,
+			   struct ethtool_modinfo *modinfo);
+	int (*module_eeprom)(struct xgbe_prv_data *pdata,
+			     struct ethtool_eeprom *eeprom, u8 *data);
+
 	/* PHY implementation specific services */
 	struct xgbe_phy_impl_if phy_impl;
 };

^ permalink raw reply related

* [PATCH net-next 04/12] amd-xgbe: Remove field that indicates SFP diagnostic support
From: Tom Lendacky @ 2018-05-21 21:58 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

The driver currently sets an indication of whether the SFP supports, and
that the driver can obtain, diagnostics data.  This isn't currently used
by the driver and the logic to set this indicator is flawed because the
field is cleared each time the SFP is checked and only set when a new SFP
is detected.  Remove this field and the logic supporting it.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c |    9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 05003be..cb15caf 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -343,7 +343,6 @@ struct xgbe_phy_data {
 	unsigned int sfp_rx_los;
 	unsigned int sfp_tx_fault;
 	unsigned int sfp_mod_absent;
-	unsigned int sfp_diags;
 	unsigned int sfp_changed;
 	unsigned int sfp_phy_avail;
 	unsigned int sfp_cable_len;
@@ -1211,13 +1210,6 @@ static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata)
 
 		memcpy(&phy_data->sfp_eeprom, &sfp_eeprom, sizeof(sfp_eeprom));
 
-		if (sfp_eeprom.extd[XGBE_SFP_EXTD_SFF_8472]) {
-			u8 diag_type = sfp_eeprom.extd[XGBE_SFP_EXTD_DIAG];
-
-			if (!(diag_type & XGBE_SFP_EXTD_DIAG_ADDR_CHANGE))
-				phy_data->sfp_diags = 1;
-		}
-
 		xgbe_phy_free_phy_device(pdata);
 	} else {
 		phy_data->sfp_changed = 0;
@@ -1267,7 +1259,6 @@ static void xgbe_phy_sfp_reset(struct xgbe_phy_data *phy_data)
 	phy_data->sfp_rx_los = 0;
 	phy_data->sfp_tx_fault = 0;
 	phy_data->sfp_mod_absent = 1;
-	phy_data->sfp_diags = 0;
 	phy_data->sfp_base = XGBE_SFP_BASE_UNKNOWN;
 	phy_data->sfp_cable = XGBE_SFP_CABLE_UNKNOWN;
 	phy_data->sfp_speed = XGBE_SFP_SPEED_UNKNOWN;

^ permalink raw reply related

* [PATCH net-next 03/12] amd-xgbe: Remove use of comm_owned field
From: Tom Lendacky @ 2018-05-21 21:58 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

The comm_owned field can hide logic where double locking is attempted
and prevent multiple threads for the same device from accessing the
mutex properly.  Remove the comm_owned field and use the mutex API
exclusively for gaining ownership.  The current driver has been audited
and is obtaining communications ownership properly.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c |   16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 123ceb0..05003be 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -327,8 +327,6 @@ struct xgbe_phy_data {
 
 	unsigned int mdio_addr;
 
-	unsigned int comm_owned;
-
 	/* SFP Support */
 	enum xgbe_sfp_comm sfp_comm;
 	unsigned int sfp_mux_address;
@@ -382,12 +380,6 @@ struct xgbe_phy_data {
 static int xgbe_phy_i2c_xfer(struct xgbe_prv_data *pdata,
 			     struct xgbe_i2c_op *i2c_op)
 {
-	struct xgbe_phy_data *phy_data = pdata->phy_data;
-
-	/* Be sure we own the bus */
-	if (WARN_ON(!phy_data->comm_owned))
-		return -EIO;
-
 	return pdata->i2c_if.i2c_xfer(pdata, i2c_op);
 }
 
@@ -549,10 +541,6 @@ static int xgbe_phy_sfp_get_mux(struct xgbe_prv_data *pdata)
 
 static void xgbe_phy_put_comm_ownership(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_phy_data *phy_data = pdata->phy_data;
-
-	phy_data->comm_owned = 0;
-
 	mutex_unlock(&xgbe_phy_comm_lock);
 }
 
@@ -562,9 +550,6 @@ static int xgbe_phy_get_comm_ownership(struct xgbe_prv_data *pdata)
 	unsigned long timeout;
 	unsigned int mutex_id;
 
-	if (phy_data->comm_owned)
-		return 0;
-
 	/* The I2C and MDIO/GPIO bus is multiplexed between multiple devices,
 	 * the driver needs to take the software mutex and then the hardware
 	 * mutexes before being able to use the busses.
@@ -593,7 +578,6 @@ static int xgbe_phy_get_comm_ownership(struct xgbe_prv_data *pdata)
 		XP_IOWRITE(pdata, XP_I2C_MUTEX, mutex_id);
 		XP_IOWRITE(pdata, XP_MDIO_MUTEX, mutex_id);
 
-		phy_data->comm_owned = 1;
 		return 0;
 	}
 

^ permalink raw reply related

* [PATCH net-next 02/12] amd-xgbe: Read and save the port property registers during probe
From: Tom Lendacky @ 2018-05-21 21:58 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

Read and save the port property registers once during the device probe
and then use the saved values as they are needed.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-pci.c    |   34 ++++++++++----
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c |   68 ++++++++++++---------------
 drivers/net/ethernet/amd/xgbe/xgbe.h        |    7 +++
 3 files changed, 62 insertions(+), 47 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
index 7b63521..7b86240 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -335,12 +335,29 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pdata->awcr = XGBE_DMA_PCI_AWCR;
 	pdata->awarcr = XGBE_DMA_PCI_AWARCR;
 
+	/* Read the port property registers */
+	pdata->pp0 = XP_IOREAD(pdata, XP_PROP_0);
+	pdata->pp1 = XP_IOREAD(pdata, XP_PROP_1);
+	pdata->pp2 = XP_IOREAD(pdata, XP_PROP_2);
+	pdata->pp3 = XP_IOREAD(pdata, XP_PROP_3);
+	pdata->pp4 = XP_IOREAD(pdata, XP_PROP_4);
+	if (netif_msg_probe(pdata)) {
+		dev_dbg(dev, "port property 0 = %#010x\n", pdata->pp0);
+		dev_dbg(dev, "port property 1 = %#010x\n", pdata->pp1);
+		dev_dbg(dev, "port property 2 = %#010x\n", pdata->pp2);
+		dev_dbg(dev, "port property 3 = %#010x\n", pdata->pp3);
+		dev_dbg(dev, "port property 4 = %#010x\n", pdata->pp4);
+	}
+
 	/* Set the maximum channels and queues */
-	reg = XP_IOREAD(pdata, XP_PROP_1);
-	pdata->tx_max_channel_count = XP_GET_BITS(reg, XP_PROP_1, MAX_TX_DMA);
-	pdata->rx_max_channel_count = XP_GET_BITS(reg, XP_PROP_1, MAX_RX_DMA);
-	pdata->tx_max_q_count = XP_GET_BITS(reg, XP_PROP_1, MAX_TX_QUEUES);
-	pdata->rx_max_q_count = XP_GET_BITS(reg, XP_PROP_1, MAX_RX_QUEUES);
+	pdata->tx_max_channel_count = XP_GET_BITS(pdata->pp1, XP_PROP_1,
+						  MAX_TX_DMA);
+	pdata->rx_max_channel_count = XP_GET_BITS(pdata->pp1, XP_PROP_1,
+						  MAX_RX_DMA);
+	pdata->tx_max_q_count = XP_GET_BITS(pdata->pp1, XP_PROP_1,
+					    MAX_TX_QUEUES);
+	pdata->rx_max_q_count = XP_GET_BITS(pdata->pp1, XP_PROP_1,
+					    MAX_RX_QUEUES);
 	if (netif_msg_probe(pdata)) {
 		dev_dbg(dev, "max tx/rx channel count = %u/%u\n",
 			pdata->tx_max_channel_count,
@@ -353,12 +370,13 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	xgbe_set_counts(pdata);
 
 	/* Set the maximum fifo amounts */
-	reg = XP_IOREAD(pdata, XP_PROP_2);
-	pdata->tx_max_fifo_size = XP_GET_BITS(reg, XP_PROP_2, TX_FIFO_SIZE);
+	pdata->tx_max_fifo_size = XP_GET_BITS(pdata->pp2, XP_PROP_2,
+					      TX_FIFO_SIZE);
 	pdata->tx_max_fifo_size *= 16384;
 	pdata->tx_max_fifo_size = min(pdata->tx_max_fifo_size,
 				      pdata->vdata->tx_max_fifo_size);
-	pdata->rx_max_fifo_size = XP_GET_BITS(reg, XP_PROP_2, RX_FIFO_SIZE);
+	pdata->rx_max_fifo_size = XP_GET_BITS(pdata->pp2, XP_PROP_2,
+					      RX_FIFO_SIZE);
 	pdata->rx_max_fifo_size *= 16384;
 	pdata->rx_max_fifo_size = min(pdata->rx_max_fifo_size,
 				      pdata->vdata->rx_max_fifo_size);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index aac8843..123ceb0 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -2421,22 +2421,21 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
 static void xgbe_phy_sfp_gpio_setup(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	unsigned int reg;
-
-	reg = XP_IOREAD(pdata, XP_PROP_3);
 
 	phy_data->sfp_gpio_address = XGBE_GPIO_ADDRESS_PCA9555 +
-				     XP_GET_BITS(reg, XP_PROP_3, GPIO_ADDR);
+				     XP_GET_BITS(pdata->pp3, XP_PROP_3,
+						 GPIO_ADDR);
 
-	phy_data->sfp_gpio_mask = XP_GET_BITS(reg, XP_PROP_3, GPIO_MASK);
+	phy_data->sfp_gpio_mask = XP_GET_BITS(pdata->pp3, XP_PROP_3,
+					      GPIO_MASK);
 
-	phy_data->sfp_gpio_rx_los = XP_GET_BITS(reg, XP_PROP_3,
+	phy_data->sfp_gpio_rx_los = XP_GET_BITS(pdata->pp3, XP_PROP_3,
 						GPIO_RX_LOS);
-	phy_data->sfp_gpio_tx_fault = XP_GET_BITS(reg, XP_PROP_3,
+	phy_data->sfp_gpio_tx_fault = XP_GET_BITS(pdata->pp3, XP_PROP_3,
 						  GPIO_TX_FAULT);
-	phy_data->sfp_gpio_mod_absent = XP_GET_BITS(reg, XP_PROP_3,
+	phy_data->sfp_gpio_mod_absent = XP_GET_BITS(pdata->pp3, XP_PROP_3,
 						    GPIO_MOD_ABS);
-	phy_data->sfp_gpio_rate_select = XP_GET_BITS(reg, XP_PROP_3,
+	phy_data->sfp_gpio_rate_select = XP_GET_BITS(pdata->pp3, XP_PROP_3,
 						     GPIO_RATE_SELECT);
 
 	if (netif_msg_probe(pdata)) {
@@ -2458,18 +2457,17 @@ static void xgbe_phy_sfp_gpio_setup(struct xgbe_prv_data *pdata)
 static void xgbe_phy_sfp_comm_setup(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	unsigned int reg, mux_addr_hi, mux_addr_lo;
+	unsigned int mux_addr_hi, mux_addr_lo;
 
-	reg = XP_IOREAD(pdata, XP_PROP_4);
-
-	mux_addr_hi = XP_GET_BITS(reg, XP_PROP_4, MUX_ADDR_HI);
-	mux_addr_lo = XP_GET_BITS(reg, XP_PROP_4, MUX_ADDR_LO);
+	mux_addr_hi = XP_GET_BITS(pdata->pp4, XP_PROP_4, MUX_ADDR_HI);
+	mux_addr_lo = XP_GET_BITS(pdata->pp4, XP_PROP_4, MUX_ADDR_LO);
 	if (mux_addr_lo == XGBE_SFP_DIRECT)
 		return;
 
 	phy_data->sfp_comm = XGBE_SFP_COMM_PCA9545;
 	phy_data->sfp_mux_address = (mux_addr_hi << 2) + mux_addr_lo;
-	phy_data->sfp_mux_channel = XP_GET_BITS(reg, XP_PROP_4, MUX_CHAN);
+	phy_data->sfp_mux_channel = XP_GET_BITS(pdata->pp4, XP_PROP_4,
+						MUX_CHAN);
 
 	if (netif_msg_probe(pdata)) {
 		dev_dbg(pdata->dev, "SFP: mux_address=%#x\n",
@@ -2592,13 +2590,11 @@ static bool xgbe_phy_redrv_error(struct xgbe_phy_data *phy_data)
 static int xgbe_phy_mdio_reset_setup(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	unsigned int reg;
 
 	if (phy_data->conn_type != XGBE_CONN_TYPE_MDIO)
 		return 0;
 
-	reg = XP_IOREAD(pdata, XP_PROP_3);
-	phy_data->mdio_reset = XP_GET_BITS(reg, XP_PROP_3, MDIO_RESET);
+	phy_data->mdio_reset = XP_GET_BITS(pdata->pp3, XP_PROP_3, MDIO_RESET);
 	switch (phy_data->mdio_reset) {
 	case XGBE_MDIO_RESET_NONE:
 	case XGBE_MDIO_RESET_I2C_GPIO:
@@ -2612,12 +2608,12 @@ static int xgbe_phy_mdio_reset_setup(struct xgbe_prv_data *pdata)
 
 	if (phy_data->mdio_reset == XGBE_MDIO_RESET_I2C_GPIO) {
 		phy_data->mdio_reset_addr = XGBE_GPIO_ADDRESS_PCA9555 +
-					    XP_GET_BITS(reg, XP_PROP_3,
+					    XP_GET_BITS(pdata->pp3, XP_PROP_3,
 							MDIO_RESET_I2C_ADDR);
-		phy_data->mdio_reset_gpio = XP_GET_BITS(reg, XP_PROP_3,
+		phy_data->mdio_reset_gpio = XP_GET_BITS(pdata->pp3, XP_PROP_3,
 							MDIO_RESET_I2C_GPIO);
 	} else if (phy_data->mdio_reset == XGBE_MDIO_RESET_INT_GPIO) {
-		phy_data->mdio_reset_gpio = XP_GET_BITS(reg, XP_PROP_3,
+		phy_data->mdio_reset_gpio = XP_GET_BITS(pdata->pp3, XP_PROP_3,
 							MDIO_RESET_INT_GPIO);
 	}
 
@@ -2707,12 +2703,9 @@ static bool xgbe_phy_conn_type_mismatch(struct xgbe_prv_data *pdata)
 
 static bool xgbe_phy_port_enabled(struct xgbe_prv_data *pdata)
 {
-	unsigned int reg;
-
-	reg = XP_IOREAD(pdata, XP_PROP_0);
-	if (!XP_GET_BITS(reg, XP_PROP_0, PORT_SPEEDS))
+	if (!XP_GET_BITS(pdata->pp0, XP_PROP_0, PORT_SPEEDS))
 		return false;
-	if (!XP_GET_BITS(reg, XP_PROP_0, CONN_TYPE))
+	if (!XP_GET_BITS(pdata->pp0, XP_PROP_0, CONN_TYPE))
 		return false;
 
 	return true;
@@ -2921,7 +2914,6 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	struct xgbe_phy_data *phy_data;
 	struct mii_bus *mii;
-	unsigned int reg;
 	int ret;
 
 	/* Check if enabled */
@@ -2940,12 +2932,11 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 		return -ENOMEM;
 	pdata->phy_data = phy_data;
 
-	reg = XP_IOREAD(pdata, XP_PROP_0);
-	phy_data->port_mode = XP_GET_BITS(reg, XP_PROP_0, PORT_MODE);
-	phy_data->port_id = XP_GET_BITS(reg, XP_PROP_0, PORT_ID);
-	phy_data->port_speeds = XP_GET_BITS(reg, XP_PROP_0, PORT_SPEEDS);
-	phy_data->conn_type = XP_GET_BITS(reg, XP_PROP_0, CONN_TYPE);
-	phy_data->mdio_addr = XP_GET_BITS(reg, XP_PROP_0, MDIO_ADDR);
+	phy_data->port_mode = XP_GET_BITS(pdata->pp0, XP_PROP_0, PORT_MODE);
+	phy_data->port_id = XP_GET_BITS(pdata->pp0, XP_PROP_0, PORT_ID);
+	phy_data->port_speeds = XP_GET_BITS(pdata->pp0, XP_PROP_0, PORT_SPEEDS);
+	phy_data->conn_type = XP_GET_BITS(pdata->pp0, XP_PROP_0, CONN_TYPE);
+	phy_data->mdio_addr = XP_GET_BITS(pdata->pp0, XP_PROP_0, MDIO_ADDR);
 	if (netif_msg_probe(pdata)) {
 		dev_dbg(pdata->dev, "port mode=%u\n", phy_data->port_mode);
 		dev_dbg(pdata->dev, "port id=%u\n", phy_data->port_id);
@@ -2954,12 +2945,11 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 		dev_dbg(pdata->dev, "mdio addr=%u\n", phy_data->mdio_addr);
 	}
 
-	reg = XP_IOREAD(pdata, XP_PROP_4);
-	phy_data->redrv = XP_GET_BITS(reg, XP_PROP_4, REDRV_PRESENT);
-	phy_data->redrv_if = XP_GET_BITS(reg, XP_PROP_4, REDRV_IF);
-	phy_data->redrv_addr = XP_GET_BITS(reg, XP_PROP_4, REDRV_ADDR);
-	phy_data->redrv_lane = XP_GET_BITS(reg, XP_PROP_4, REDRV_LANE);
-	phy_data->redrv_model = XP_GET_BITS(reg, XP_PROP_4, REDRV_MODEL);
+	phy_data->redrv = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_PRESENT);
+	phy_data->redrv_if = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_IF);
+	phy_data->redrv_addr = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_ADDR);
+	phy_data->redrv_lane = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_LANE);
+	phy_data->redrv_model = XP_GET_BITS(pdata->pp4, XP_PROP_4, REDRV_MODEL);
 	if (phy_data->redrv && netif_msg_probe(pdata)) {
 		dev_dbg(pdata->dev, "redrv present\n");
 		dev_dbg(pdata->dev, "redrv i/f=%u\n", phy_data->redrv_if);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 95d4b56..54e43ad3 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -1027,6 +1027,13 @@ struct xgbe_prv_data {
 	void __iomem *xprop_regs;	/* XGBE property registers */
 	void __iomem *xi2c_regs;	/* XGBE I2C CSRs */
 
+	/* Port property registers */
+	unsigned int pp0;
+	unsigned int pp1;
+	unsigned int pp2;
+	unsigned int pp3;
+	unsigned int pp4;
+
 	/* Overall device lock */
 	spinlock_t lock;
 

^ permalink raw reply related

* [PATCH net-next 01/12] amd-xgbe: Fix debug output of max channel counts
From: Tom Lendacky @ 2018-05-21 21:58 UTC (permalink / raw)
  To: netdev; +Cc: David Miller
In-Reply-To: <20180521215818.8135.83100.stgit@tlendack-t1.amdoffice.net>

A debug output print statement uses the wrong variable to output the
maximum Rx channel count (cut and paste error, basically).  Fix the
statement to use the proper variable.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-pci.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
index 82d1f41..7b63521 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -344,7 +344,7 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (netif_msg_probe(pdata)) {
 		dev_dbg(dev, "max tx/rx channel count = %u/%u\n",
 			pdata->tx_max_channel_count,
-			pdata->tx_max_channel_count);
+			pdata->rx_max_channel_count);
 		dev_dbg(dev, "max tx/rx hw queue count = %u/%u\n",
 			pdata->tx_max_q_count, pdata->rx_max_q_count);
 	}

^ permalink raw reply related

* [PATCH net-next 00/12] amd-xgbe: AMD XGBE driver updates 2018-05-21
From: Tom Lendacky @ 2018-05-21 21:58 UTC (permalink / raw)
  To: netdev; +Cc: David Miller

The following updates are included in this driver update series:

- Fix the debug output for the max channels count
- Read (once) and save the port property registers during probe
- Remove the use of the comm_owned field
- Remove unused SFP diagnostic support indicator field
- Add ethtool --module-info support
- Add ethtool --show-ring/--set-ring support
- Update the driver in preparation for ethtool --set-channels support
- Add ethtool --show-channels/--set-channels support
- Update the driver to always perform link training in KR mode
- Advertise FEC support when using a KR re-driver
- Update the BelFuse quirk to now support SGMII
- Improve 100Mbps auto-negotiation for BelFuse parts

This patch series is based on net-next.

---

Tom Lendacky (12):
      amd-xgbe: Fix debug output of max channel counts
      amd-xgbe: Read and save the port property registers during probe
      amd-xgbe: Remove use of comm_owned field
      amd-xgbe: Remove field that indicates SFP diagnostic support
      amd-xgbe: Add ethtool support to retrieve SFP module info
      amd-xgbe: Add ethtool show/set ring parameter support
      amd-xgbe: Prepare for ethtool set-channel support
      amd-xgbe: Add ethtool show/set channels support
      amd-xgbe: Always attempt link training in KR mode
      amd-xgbe: Advertise FEC support with the KR re-driver
      amd-xgbe: Update the BelFuse quirk to support SGMII
      amd-xgbe: Improve SFP 100Mbps auto-negotiation


 drivers/net/ethernet/amd/xgbe/xgbe-drv.c     |  137 +++++++---
 drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c |  214 ++++++++++++++++
 drivers/net/ethernet/amd/xgbe/xgbe-main.c    |   20 -
 drivers/net/ethernet/amd/xgbe/xgbe-mdio.c    |  167 ++++++------
 drivers/net/ethernet/amd/xgbe/xgbe-pci.c     |   36 ++-
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c  |  349 +++++++++++++++++++-------
 drivers/net/ethernet/amd/xgbe/xgbe.h         |   31 ++
 7 files changed, 696 insertions(+), 258 deletions(-)

-- 
Tom Lendacky

^ permalink raw reply

* Re: [PATCH bpf-next 2/7] bpf: btf: Change how section is supported in btf_header
From: Martin KaFai Lau @ 2018-05-21 21:47 UTC (permalink / raw)
  To: Yonghong Song; +Cc: netdev, Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <eee61147-90a6-b1fb-5278-beb61afe7784@fb.com>

On Mon, May 21, 2018 at 01:15:24PM -0700, Yonghong Song wrote:
> 
> 
> On 5/18/18 5:16 PM, Martin KaFai Lau wrote:
> > There are currently unused section descriptions in the btf_header.  Those
> > sections are here to support future BTF use cases.  For example, the
> > func section (func_off) is to support function signature (e.g. the BPF
> > prog function signature).
> > 
> > Instead of spelling out all potential sections up-front in the btf_header.
> > This patch makes changes to btf_header such that extending it (e.g. adding
> > a section) is possible later.  The unused ones can be removed for now and
> > they can be added back later.
> > 
> > This patch:
> > 1. adds a hdr_len to the btf_header.  It will allow adding
> > sections (and other info like parent_label and parent_name)
> > later.  The check is similar to the existing bpf_attr.
> > If a user passes in a longer hdr_len, the kernel
> > ensures the extra tailing bytes are 0.
> > 
> > 2. allows the section order in the BTF object to be
> > different from its sec_off order in btf_header.
> > 
> > 3. each sec_off is followed by a sec_len.  It must not have gap or
> > overlapping among sections.
> > 
> > The string section is ensured to be at the end due to the 4 bytes
> > alignment requirement of the type section.
> > 
> > The above changes will allow enough flexibility to
> > add new sections (and other info) to the btf_header later.
> > 
> > This patch also removes an unnecessary !err check
> > at the end of btf_parse().
> > 
> > Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> > ---
> >   include/uapi/linux/btf.h |   8 +-
> >   kernel/bpf/btf.c         | 207 +++++++++++++++++++++++++++++++++++------------
> >   2 files changed, 158 insertions(+), 57 deletions(-)
> > 
> > diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
> > index bcb56ee47014..4fa479741a02 100644
> > --- a/include/uapi/linux/btf.h
> > +++ b/include/uapi/linux/btf.h
> > @@ -12,15 +12,11 @@ struct btf_header {
> >   	__u16	magic;
> >   	__u8	version;
> >   	__u8	flags;
> > -
> > -	__u32	parent_label;
> > -	__u32	parent_name;
> > +	__u32	hdr_len;
> >   	/* All offsets are in bytes relative to the end of this header */
> > -	__u32	label_off;	/* offset of label section	*/
> > -	__u32	object_off;	/* offset of data object section*/
> > -	__u32	func_off;	/* offset of function section	*/
> >   	__u32	type_off;	/* offset of type section	*/
> > +	__u32	type_len;	/* length of type section	*/
> >   	__u32	str_off;	/* offset of string section	*/
> >   	__u32	str_len;	/* length of string section	*/
> >   };
> > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > index ded10ab47b8a..536e5981ad8c 100644
> > --- a/kernel/bpf/btf.c
> > +++ b/kernel/bpf/btf.c
> > @@ -12,6 +12,7 @@
> >   #include <linux/uaccess.h>
> >   #include <linux/kernel.h>
> >   #include <linux/idr.h>
> > +#include <linux/sort.h>
> >   #include <linux/bpf_verifier.h>
> >   #include <linux/btf.h>
> > @@ -184,15 +185,13 @@ static DEFINE_IDR(btf_idr);
> >   static DEFINE_SPINLOCK(btf_idr_lock);
> >   struct btf {
> > -	union {
> > -		struct btf_header *hdr;
> > -		void *data;
> > -	};
> > +	void *data;
> >   	struct btf_type **types;
> >   	u32 *resolved_ids;
> >   	u32 *resolved_sizes;
> >   	const char *strings;
> >   	void *nohdr_data;
> > +	struct btf_header hdr;
> >   	u32 nr_types;
> >   	u32 types_size;
> >   	u32 data_size;
> > @@ -227,6 +226,12 @@ enum resolve_mode {
> >   };
> >   #define MAX_RESOLVE_DEPTH 32
> > +#define NR_SECS 2
> 
> Not sure whether it is necessary to define NR_SECS 2 here or not.
> See below.
> 
> > +
> > +struct btf_sec_info {
> > +	u32 off;
> > +	u32 len;
> > +};
> >   struct btf_verifier_env {
> >   	struct btf *btf;
> > @@ -418,14 +423,14 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
> >   static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
> >   {
> >   	return !BTF_STR_TBL_ELF_ID(offset) &&
> > -		BTF_STR_OFFSET(offset) < btf->hdr->str_len;
> > +		BTF_STR_OFFSET(offset) < btf->hdr.str_len;
> >   }
> >   static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
> >   {
> >   	if (!BTF_STR_OFFSET(offset))
> >   		return "(anon)";
> > -	else if (BTF_STR_OFFSET(offset) < btf->hdr->str_len)
> > +	else if (BTF_STR_OFFSET(offset) < btf->hdr.str_len)
> >   		return &btf->strings[BTF_STR_OFFSET(offset)];
> >   	else
> >   		return "(invalid-name-offset)";
> > @@ -536,7 +541,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
> >   	__btf_verifier_log(log, "\n");
> >   }
> > -static void btf_verifier_log_hdr(struct btf_verifier_env *env)
> > +static void btf_verifier_log_hdr(struct btf_verifier_env *env,
> > +				 u32 btf_data_size)
> >   {
> >   	struct bpf_verifier_log *log = &env->log;
> >   	const struct btf *btf = env->btf;
> > @@ -545,19 +551,16 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env)
> >   	if (!bpf_verifier_log_needed(log))
> >   		return;
> > -	hdr = btf->hdr;
> > +	hdr = &btf->hdr;
> >   	__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
> >   	__btf_verifier_log(log, "version: %u\n", hdr->version);
> >   	__btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
> > -	__btf_verifier_log(log, "parent_label: %u\n", hdr->parent_label);
> > -	__btf_verifier_log(log, "parent_name: %u\n", hdr->parent_name);
> > -	__btf_verifier_log(log, "label_off: %u\n", hdr->label_off);
> > -	__btf_verifier_log(log, "object_off: %u\n", hdr->object_off);
> > -	__btf_verifier_log(log, "func_off: %u\n", hdr->func_off);
> > +	__btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len);
> >   	__btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
> > +	__btf_verifier_log(log, "type_len: %u\n", hdr->type_len);
> >   	__btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
> >   	__btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
> > -	__btf_verifier_log(log, "btf_total_size: %u\n", btf->data_size);
> > +	__btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
> >   }
> >   static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
> > @@ -1754,9 +1757,9 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
> >   	struct btf_header *hdr;
> >   	void *cur, *end;
> > -	hdr = btf->hdr;
> > +	hdr = &btf->hdr;
> >   	cur = btf->nohdr_data + hdr->type_off;
> > -	end = btf->nohdr_data + hdr->str_off;
> > +	end = btf->nohdr_data + hdr->type_len;
> >   	env->log_type_id = 1;
> >   	while (cur < end) {
> > @@ -1866,8 +1869,20 @@ static int btf_check_all_types(struct btf_verifier_env *env)
> >   static int btf_parse_type_sec(struct btf_verifier_env *env)
> >   {
> > +	const struct btf_header *hdr = &env->btf->hdr;
> >   	int err;
> > +	/* Type section must align to 4 bytes */
> > +	if (hdr->type_off & (sizeof(u32) - 1)) {
> > +		btf_verifier_log(env, "Unaligned type_off");
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (!hdr->type_len) {
> > +		btf_verifier_log(env, "No type found");
> > +		return -EINVAL;
> > +	}
> > +
> >   	err = btf_check_all_metas(env);
> >   	if (err)
> >   		return err;
> > @@ -1881,10 +1896,15 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
> >   	struct btf *btf = env->btf;
> >   	const char *start, *end;
> > -	hdr = btf->hdr;
> > +	hdr = &btf->hdr;
> >   	start = btf->nohdr_data + hdr->str_off;
> >   	end = start + hdr->str_len;
> > +	if (end != btf->data + btf->data_size) {
> > +		btf_verifier_log(env, "String section is not at the end");
> > +		return -EINVAL;
> > +	}
> > +
> >   	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
> >   	    start[0] || end[-1]) {
> >   		btf_verifier_log(env, "Invalid string section");
> > @@ -1896,20 +1916,119 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
> >   	return 0;
> >   }
> > -static int btf_parse_hdr(struct btf_verifier_env *env)
> > +static const size_t btf_sec_info_offset[] = {
> > +	offsetof(struct btf_header, type_off),
> > +	offsetof(struct btf_header, str_off),
> > +};
> 
> Maybe we can define NR_SECS is
> ARRAY_SIZE(btf_sec_info_offset)/sizeof(size_t)?
> 
> > +
> > +static int btf_sec_info_cmp(const void *a, const void *b)
> >   {
> > +	const struct btf_sec_info *x = a;
> > +	const struct btf_sec_info *y = b;
> > +
> > +	return (int)(x->off - y->off) ? : (int)(x->len - y->len);
> > +}
> > +
> > +static int btf_check_sec_info(struct btf_verifier_env *env,
> > +			      u32 btf_data_size)
> > +{
> > +	struct btf_sec_info secs[NR_SECS];
> > +	u32 total, expected_total, i;
> >   	const struct btf_header *hdr;
> > -	struct btf *btf = env->btf;
> > -	u32 meta_left;
> > +	const struct btf *btf;
> > +
> > +	BUILD_BUG_ON(ARRAY_SIZE(btf_sec_info_offset) != NR_SECS);
> 
> If we define NR_SECS depending on the size of btf_sec_info_offset, this
> BUILD_BUG_ON is not needed.
Agree, BUILD_BUG_ON() can be avoided.  Will change.

> 
> > +
> > +	btf = env->btf;
> > +	hdr = &btf->hdr;
> > +
> > +	/* Populate the secs from hdr */
> > +	for (i = 0; i < NR_SECS; i++)
> > +		secs[i] = *(struct btf_sec_info *)((void *)hdr +
> > +						   btf_sec_info_offset[i]);
> > +
> > +	sort(secs, NR_SECS, sizeof(struct btf_sec_info),
> > +	     btf_sec_info_cmp, NULL);
> > +
> > +	/* Check for gaps and overlap among sections */
> > +	total = 0;
> > +	expected_total = btf_data_size - hdr->hdr_len;
> > +	for (i = 0; i < NR_SECS; i++) {
> > +		if (expected_total < secs[i].off) {
> > +			btf_verifier_log(env, "Invalid section offset");
> > +			return -EINVAL;
> > +		}
> > +		if (total < secs[i].off) {
> > +			/* gap */
> > +			btf_verifier_log(env, "Unsupported section found");
> > +			return -EINVAL;
> > +		}
> > +		if (total > secs[i].off) {
> > +			btf_verifier_log(env, "Section overlap found");
> > +			return -EINVAL;
> > +		}
> > +		if (expected_total - total < secs[i].len) {
> > +			btf_verifier_log(env,
> > +					 "Total section length too long");
> > +			return -EINVAL;
> > +		}
> > +		total += secs[i].len;
> > +	}
> > +
> > +	/* There is data other than hdr and known sections */
> > +	if (expected_total != total) {
> > +		btf_verifier_log(env, "Unsupported section found");
> > +		return -EINVAL;
> > +	}
> 
> Does this patch try to address compatibility issue? That is, the old btf
> format with 2 sections should still work with future kernel with more
> than 2 sections? The above comparision seems suggesting that newer kernel
> will not support non-compatible number of sections?
Yes, it should.  When an older btf passed in, the newer sec_off
and sec_len in btf->hdr will be zeros.  They will still pass the
for loop.

> 
> > +
> > +	return 0;
> > +}
> > +
> > +static int btf_parse_hdr(struct btf_verifier_env *env, void __user *btf_data,
> > +			 u32 btf_data_size)
> > +{
> > +	const struct btf_header *hdr;
> > +	u32 hdr_len, hdr_copy;
> > +	struct btf_min_header {
> > +		u16	magic;
> > +		u8	version;
> > +		u8	flags;
> > +		u32	hdr_len;
> > +	} __user *min_hdr;
> 
> This is the partial structure with the first four fields
> for struct btf_header.
> It is okay, but I feel a little bit duplication here.
> 
> Looks like only sizeof(*min_hdr) and min_hdr->hdr_len is used.
> Maybe we can just cast bpf_data to bpf_header and
> sizeof(*min_hdr) being offsetof(bpf_data, type_off), and
> min_hdr->hdr_len being bpf_data->hdr_len.
> 
> Do this work?
offsetof() was what I had.  I found a few lines of offsetof() become
so long that is actually hard to read.  Also, I think spelling out the
'btf_min_header' name here is helpful, so I did not leave it as
anon struct.

Agree that there is duplication but it will never be changed once we
release it to uapi.  I can make some comments here instead.

> 
> > +	struct btf *btf;
> > +	int err;
> > +
> > +	btf = env->btf;
> > +	min_hdr = btf_data;
> > +
> > +	if (btf_data_size < sizeof(*min_hdr)) {
> > +		btf_verifier_log(env, "hdr_len not found");
> > +		return -EINVAL;
> > +	}
> > -	if (btf->data_size < sizeof(*hdr)) {
> > +	if (get_user(hdr_len, &min_hdr->hdr_len))
> > +		return -EFAULT;
> > +
> > +	if (btf_data_size < hdr_len) {
> >   		btf_verifier_log(env, "btf_header not found");
> >   		return -EINVAL;
> >   	}
> > -	btf_verifier_log_hdr(env);
> > +	err = bpf_check_uarg_tail_zero(btf_data, sizeof(btf->hdr), hdr_len);
> > +	if (err) {
> > +		if (err == -E2BIG)
> > +			btf_verifier_log(env, "Unsupported btf_header");
> > +		return err;
> > +	}
> > +
> > +	hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr));
> > +	if (copy_from_user(&btf->hdr, btf_data, hdr_copy))
> > +		return -EFAULT;
> > +
> > +	hdr = &btf->hdr;
> > +
> > +	btf_verifier_log_hdr(env, btf_data_size);
> > -	hdr = btf->hdr;
> >   	if (hdr->magic != BTF_MAGIC) {
> >   		btf_verifier_log(env, "Invalid magic");
> >   		return -EINVAL;
> > @@ -1925,26 +2044,14 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
> >   		return -ENOTSUPP;
> >   	}
> > -	meta_left = btf->data_size - sizeof(*hdr);
> > -	if (!meta_left) {
> > +	if (btf_data_size == hdr->hdr_len) {
> >   		btf_verifier_log(env, "No data");
> >   		return -EINVAL;
> >   	}
> > -	if (meta_left < hdr->type_off || hdr->str_off <= hdr->type_off ||
> > -	    /* Type section must align to 4 bytes */
> > -	    hdr->type_off & (sizeof(u32) - 1)) {
> > -		btf_verifier_log(env, "Invalid type_off");
> > -		return -EINVAL;
> > -	}
> > -
> > -	if (meta_left < hdr->str_off ||
> > -	    meta_left - hdr->str_off < hdr->str_len) {
> > -		btf_verifier_log(env, "Invalid str_off or str_len");
> > -		return -EINVAL;
> > -	}
> > -
> > -	btf->nohdr_data = btf->hdr + 1;
> > +	err = btf_check_sec_info(env, btf_data_size);
> > +	if (err)
> > +		return err;
> >   	return 0;
> >   }
> > @@ -1987,6 +2094,11 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
> >   		err = -ENOMEM;
> >   		goto errout;
> >   	}
> > +	env->btf = btf;
> > +
> > +	err = btf_parse_hdr(env, btf_data, btf_data_size);
> > +	if (err)
> > +		goto errout;
> >   	data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
> >   	if (!data) {
> > @@ -1996,18 +2108,13 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
> >   	btf->data = data;
> >   	btf->data_size = btf_data_size;
> > +	btf->nohdr_data = btf->data + btf->hdr.hdr_len;
> >   	if (copy_from_user(data, btf_data, btf_data_size)) {
> >   		err = -EFAULT;
> >   		goto errout;
> >   	}
> > -	env->btf = btf;
> > -
> > -	err = btf_parse_hdr(env);
> > -	if (err)
> > -		goto errout;
> > -
> >   	err = btf_parse_str_sec(env);
> >   	if (err)
> >   		goto errout;
> > @@ -2016,16 +2123,14 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
> >   	if (err)
> >   		goto errout;
> > -	if (!err && log->level && bpf_verifier_log_full(log)) {
> > +	if (log->level && bpf_verifier_log_full(log)) {
> >   		err = -ENOSPC;
> >   		goto errout;
> >   	}
> > -	if (!err) {
> > -		btf_verifier_env_free(env);
> > -		refcount_set(&btf->refcnt, 1);
> > -		return btf;
> > -	}
> > +	btf_verifier_env_free(env);
> > +	refcount_set(&btf->refcnt, 1);
> > +	return btf;
> >   errout:
> >   	btf_verifier_env_free(env);
> > 

^ permalink raw reply

* Re: [PATCH] bpf: prevent memory disambiguation attack
From: Daniel Borkmann @ 2018-05-21 21:46 UTC (permalink / raw)
  To: Alexei Starovoitov, David S . Miller; +Cc: netdev, linux-kernel, kernel-team
In-Reply-To: <20180521211743.1492305-1-ast@kernel.org>

On 05/21/2018 11:17 PM, Alexei Starovoitov wrote:
> Detect code patterns where malicious 'speculative store bypass' can be used
> and sanitize such patterns.
> 
>  39: (bf) r3 = r10
>  40: (07) r3 += -216
>  41: (79) r8 = *(u64 *)(r7 +0)   // slow read
>  42: (7a) *(u64 *)(r10 -72) = 0  // verifier inserts this instruction
>  43: (7b) *(u64 *)(r8 +0) = r3   // this store becomes slow due to r8
>  44: (79) r1 = *(u64 *)(r6 +0)   // cpu speculatively executes this load
>  45: (71) r2 = *(u8 *)(r1 +0)    // speculatively arbitrary 'load byte'
>                                  // is now sanitized
> 
> Above code after x86 JIT becomes:
>  e5: mov    %rbp,%rdx
>  e8: add    $0xffffffffffffff28,%rdx
>  ef: mov    0x0(%r13),%r14
>  f3: movq   $0x0,-0x48(%rbp)
>  fb: mov    %rdx,0x0(%r14)
>  ff: mov    0x0(%rbx),%rdi
> 103: movzbq 0x0(%rdi),%rsi
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>

(No further action needed since already in Linus tree [1]. This went via the
 batch of x86 fixes on the speculative store buffer bypass from today [2].)

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=af86ca4e3088fe5eacf2f7e58c01fa68ca067672
[2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3b78ce4a34b761c7fe13520de822984019ff1a8f

^ permalink raw reply

* [PATCH] pcnet32: add an error handling path in pcnet32_probe_pci()
From: Bo Chen @ 2018-05-21 21:44 UTC (permalink / raw)
  To: pcnet32; +Cc: davem, netdev, linux-kernel, Bo Chen

Make sure to invoke pci_disable_device() when errors occur in
pcnet32_probe_pci().

Signed-off-by: Bo Chen <chenbo@pdx.edu>
---
 drivers/net/ethernet/amd/pcnet32.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index a561705f232c..be198cc0b10c 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -1552,22 +1552,26 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!ioaddr) {
 		if (pcnet32_debug & NETIF_MSG_PROBE)
 			pr_err("card has no PCI IO resources, aborting\n");
-		return -ENODEV;
+		err = -ENODEV;
+		goto err_disable_dev;
 	}
 
 	err = pci_set_dma_mask(pdev, PCNET32_DMA_MASK);
 	if (err) {
 		if (pcnet32_debug & NETIF_MSG_PROBE)
 			pr_err("architecture does not support 32bit PCI busmaster DMA\n");
-		return err;
+		goto err_disable_dev;
 	}
 	if (!request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci")) {
 		if (pcnet32_debug & NETIF_MSG_PROBE)
 			pr_err("io address range already allocated\n");
-		return -EBUSY;
+		err = -EBUSY;
+		goto err_disable_dev;
 	}
 
 	err = pcnet32_probe1(ioaddr, 1, pdev);
+
+err_disable_dev:
 	if (err < 0)
 		pci_disable_device(pdev);
 
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH 07/33] iwlwifi: mvm: use match_string() helper
From: Andy Shevchenko @ 2018-05-21 21:43 UTC (permalink / raw)
  To: Yisheng Xie
  Cc: Linux Kernel Mailing List, Kalle Valo, Intel Linux Wireless,
	Johannes Berg, Emmanuel Grumbach, open list:TI WILINK WIRELES...,
	netdev
In-Reply-To: <1526903890-35761-8-git-send-email-xieyisheng1@huawei.com>

On Mon, May 21, 2018 at 2:57 PM, Yisheng Xie <xieyisheng1@huawei.com> wrote:
> match_string() returns the index of an array for a matching string,
> which can be used intead of open coded variant.

>         int ret, bt_force_ant_mode;
>
> -       for (bt_force_ant_mode = 0;
> -            bt_force_ant_mode < ARRAY_SIZE(modes_str);
> -            bt_force_ant_mode++) {
> -               if (!strcmp(buf, modes_str[bt_force_ant_mode]))
> -                       break;
> -       }
> -
> -       if (bt_force_ant_mode >= ARRAY_SIZE(modes_str))

> +       bt_force_ant_mode = match_string(modes_str,
> +                                        ARRAY_SIZE(modes_str), buf);

One line?

> +       if (bt_force_ant_mode < 0)
>                 return -EINVAL;

I would rather use

ret = match_string();
if (ret < 0)
 return ret;

bt_force_... = ret;

But it's up tu Loca.

>
>         ret = 0;



-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply

* Re: [PATCH 05/33] cxgb4: use match_string() helper
From: Andy Shevchenko @ 2018-05-21 21:39 UTC (permalink / raw)
  To: Yisheng Xie; +Cc: Linux Kernel Mailing List, Ganesh Goudar, netdev
In-Reply-To: <1526903890-35761-6-git-send-email-xieyisheng1@huawei.com>

On Mon, May 21, 2018 at 2:57 PM, Yisheng Xie <xieyisheng1@huawei.com> wrote:
> match_string() returns the index of an array for a matching string,
> which can be used intead of open coded variant.

> -       for (i = 0; i < ARRAY_SIZE(cudbg_region); i++) {
> -               if (!strcmp(cudbg_region[i], region_name)) {
> -                       found = 1;
> -                       idx = i;
> -                       break;
> -               }
> -       }
> -       if (!found)
> -               return -EINVAL;
> +       rc = match_string(cudbg_region, ARRAY_SIZE(cudbg_region), region_name);
> +       if (rc < 0)
> +               return rc;
>
> -       found = 0;
> +       idx = rc;

Is found still in use after this?
If so, is it initialized properly now?

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply

* Re: [PATCH net-next] r8169: perform reset synchronously in __rtl8169_resume
From: Francois Romieu @ 2018-05-21 21:24 UTC (permalink / raw)
  To: Heiner Kallweit
  Cc: David Miller, Realtek linux nic maintainers,
	netdev@vger.kernel.org
In-Reply-To: <040eb6ae-bb21-2d27-14e0-b291cb4cc1c9@gmail.com>

Heiner Kallweit <hkallweit1@gmail.com> :
[...]
> diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
> index 75dfac024..1eb4f625a 100644
> --- a/drivers/net/ethernet/realtek/r8169.c
> +++ b/drivers/net/ethernet/realtek/r8169.c
> @@ -7327,9 +7327,9 @@ static void __rtl8169_resume(struct net_device *dev)
>  	rtl_lock_work(tp);
>  	napi_enable(&tp->napi);
>  	set_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags);
> +	if (netif_running(dev))
> +		rtl_reset_work(tp);
>  	rtl_unlock_work(tp);
> -
> -	rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
>  }
>  
>  static int rtl8169_resume(struct device *device)

netif_running() returns true before rtl_open(): issuing rtl_reset_work()
synchronously against uninitialized rings right at the start of rtl_open()
won't work as expected.

pm_runtime_get_sync() could be issued later in rtl_open() (but I would
not mind something different with runtime_{resume/suspend} in open/close).

-- 
Ueimor

^ permalink raw reply

* [net-next 6/6] net/mlx5e: Receive buffer support for DCBX
From: Saeed Mahameed @ 2018-05-21 21:05 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Huy Nguyen, Saeed Mahameed
In-Reply-To: <20180521210502.11082-1-saeedm@mellanox.com>

From: Huy Nguyen <huyn@mellanox.com>

Add dcbnl's set/get buffer configuration callback that allows user to
set/get buffer size configuration and priority to buffer mapping.

By default, firmware controls receive buffer configuration and priority
of buffer mapping based on the changes in pfc settings. When set buffer
call back is triggered, the buffer configuration changes to manual mode.

The manual mode means mlx5 driver will adjust the buffer configuration
accordingly based on the changes in pfc settings.

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   1 +
 .../ethernet/mellanox/mlx5/core/en_dcbnl.c    | 131 +++++++++++++++++-
 2 files changed, 125 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9ab7158a7ce7..c5c7a6d687ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -278,6 +278,7 @@ struct mlx5e_dcbx {
 	u8                         cap;
 
 	/* Buffer configuration */
+	bool                       manual_buffer;
 	u32                        cable_len;
 	u32                        xoff;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index c641d5656b2d..fa6cd8aa077c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -32,8 +32,8 @@
 #include <linux/device.h>
 #include <linux/netdevice.h>
 #include "en.h"
-
-#define MLX5E_MAX_PRIORITY 8
+#include "en/port.h"
+#include "en/port_buffer.h"
 
 #define MLX5E_100MB (100000)
 #define MLX5E_1GB   (1000000)
@@ -41,6 +41,9 @@
 #define MLX5E_CEE_STATE_UP    1
 #define MLX5E_CEE_STATE_DOWN  0
 
+/* Max supported cable length is 1000 meters */
+#define MLX5E_MAX_CABLE_LENGTH 1000
+
 enum {
 	MLX5E_VENDOR_TC_GROUP_NUM = 7,
 	MLX5E_LOWEST_PRIO_GROUP   = 0,
@@ -338,6 +341,9 @@ static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev,
 		pfc->indications[i] = PPORT_PER_PRIO_GET(pstats, i, rx_pause);
 	}
 
+	if (MLX5_BUFFER_SUPPORTED(mdev))
+		pfc->delay = priv->dcbx.cable_len;
+
 	return mlx5_query_port_pfc(mdev, &pfc->pfc_en, NULL);
 }
 
@@ -346,16 +352,39 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5_core_dev *mdev = priv->mdev;
+	u32 old_cable_len = priv->dcbx.cable_len;
+	struct ieee_pfc pfc_new;
+	u32 changed = 0;
 	u8 curr_pfc_en;
-	int ret;
+	int ret = 0;
 
+	/* pfc_en */
 	mlx5_query_port_pfc(mdev, &curr_pfc_en, NULL);
+	if (pfc->pfc_en != curr_pfc_en) {
+		ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en);
+		if (ret)
+			return ret;
+		mlx5_toggle_port_link(mdev);
+		changed |= MLX5E_PORT_BUFFER_PFC;
+	}
 
-	if (pfc->pfc_en == curr_pfc_en)
-		return 0;
+	if (pfc->delay &&
+	    pfc->delay < MLX5E_MAX_CABLE_LENGTH &&
+	    pfc->delay != priv->dcbx.cable_len) {
+		priv->dcbx.cable_len = pfc->delay;
+		changed |= MLX5E_PORT_BUFFER_CABLE_LEN;
+	}
 
-	ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en);
-	mlx5_toggle_port_link(mdev);
+	if (MLX5_BUFFER_SUPPORTED(mdev)) {
+		pfc_new.pfc_en = (changed & MLX5E_PORT_BUFFER_PFC) ? pfc->pfc_en : curr_pfc_en;
+		if (priv->dcbx.manual_buffer)
+			ret = mlx5e_port_manual_buffer_config(priv, changed,
+							      dev->mtu, &pfc_new,
+							      NULL, NULL);
+
+		if (ret && (changed & MLX5E_PORT_BUFFER_CABLE_LEN))
+			priv->dcbx.cable_len = old_cable_len;
+	}
 
 	if (!ret) {
 		mlx5e_dbg(HW, priv,
@@ -873,6 +902,89 @@ static void mlx5e_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
 	cee_cfg->pfc_enable = state;
 }
 
+static int mlx5e_dcbnl_getbuffer(struct net_device *dev,
+				 struct dcbnl_buffer *dcb_buffer)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_port_buffer port_buffer;
+	u8 buffer[MLX5E_MAX_PRIORITY];
+	int i, err;
+
+	if (!MLX5_BUFFER_SUPPORTED(mdev))
+		return -EOPNOTSUPP;
+
+	err = mlx5e_port_query_priority2buffer(mdev, buffer);
+	if (err)
+		return err;
+
+	for (i = 0; i < MLX5E_MAX_PRIORITY; i++)
+		dcb_buffer->prio2buffer[i] = buffer[i];
+
+	err = mlx5e_port_query_buffer(priv, &port_buffer);
+	if (err)
+		return err;
+
+	for (i = 0; i < MLX5E_MAX_BUFFER; i++)
+		dcb_buffer->buffer_size[i] = port_buffer.buffer[i].size;
+
+	return 0;
+}
+
+static int mlx5e_dcbnl_setbuffer(struct net_device *dev,
+				 struct dcbnl_buffer *dcb_buffer)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_port_buffer port_buffer;
+	u8 old_prio2buffer[MLX5E_MAX_PRIORITY];
+	u32 *buffer_size = NULL;
+	u8 *prio2buffer = NULL;
+	u32 changed = 0;
+	int i, err;
+
+	if (!MLX5_BUFFER_SUPPORTED(mdev))
+		return -EOPNOTSUPP;
+
+	for (i = 0; i < DCBX_MAX_BUFFERS; i++)
+		mlx5_core_dbg(mdev, "buffer[%d]=%d\n", i, dcb_buffer->buffer_size[i]);
+
+	for (i = 0; i < MLX5E_MAX_PRIORITY; i++)
+		mlx5_core_dbg(mdev, "priority %d buffer%d\n", i, dcb_buffer->prio2buffer[i]);
+
+	err = mlx5e_port_query_priority2buffer(mdev, old_prio2buffer);
+	if (err)
+		return err;
+
+	for (i = 0; i < MLX5E_MAX_PRIORITY; i++) {
+		if (dcb_buffer->prio2buffer[i] != old_prio2buffer[i]) {
+			changed |= MLX5E_PORT_BUFFER_PRIO2BUFFER;
+			prio2buffer = dcb_buffer->prio2buffer;
+			break;
+		}
+	}
+
+	err = mlx5e_port_query_buffer(priv, &port_buffer);
+	if (err)
+		return err;
+
+	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+		if (port_buffer.buffer[i].size != dcb_buffer->buffer_size[i]) {
+			changed |= MLX5E_PORT_BUFFER_SIZE;
+			buffer_size = dcb_buffer->buffer_size;
+			break;
+		}
+	}
+
+	if (!changed)
+		return 0;
+
+	priv->dcbx.manual_buffer = true;
+	err = mlx5e_port_manual_buffer_config(priv, changed, dev->mtu, NULL,
+					      buffer_size, prio2buffer);
+	return err;
+}
+
 const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
 	.ieee_getets	= mlx5e_dcbnl_ieee_getets,
 	.ieee_setets	= mlx5e_dcbnl_ieee_setets,
@@ -884,6 +996,8 @@ const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
 	.ieee_delapp    = mlx5e_dcbnl_ieee_delapp,
 	.getdcbx	= mlx5e_dcbnl_getdcbx,
 	.setdcbx	= mlx5e_dcbnl_setdcbx,
+	.dcbnl_getbuffer = mlx5e_dcbnl_getbuffer,
+	.dcbnl_setbuffer = mlx5e_dcbnl_setbuffer,
 
 /* CEE interfaces */
 	.setall         = mlx5e_dcbnl_setall,
@@ -1091,5 +1205,8 @@ void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv)
 	if (priv->dcbx.mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
 		priv->dcbx.cap |= DCB_CAP_DCBX_HOST;
 
+	priv->dcbx.manual_buffer = false;
+	priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN;
+
 	mlx5e_ets_init(priv);
 }
-- 
2.17.0

^ permalink raw reply related

* [net-next 5/6] net/mlx5e: Receive buffer configuration
From: Saeed Mahameed @ 2018-05-21 21:05 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Huy Nguyen, Saeed Mahameed
In-Reply-To: <20180521210502.11082-1-saeedm@mellanox.com>

From: Huy Nguyen <huyn@mellanox.com>

Add APIs for buffer configuration based on the changes in
pfc configuration, cable len, buffer size configuration,
and priority to buffer mapping.

Note that the xoff fomula is as below
  xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]
  xoff_threshold = buffer_size - xoff
  xon_threshold = xoff_threshold - MTU

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   5 +
 .../mellanox/mlx5/core/en/port_buffer.c       | 327 ++++++++++++++++++
 .../mellanox/mlx5/core/en/port_buffer.h       |  75 ++++
 4 files changed, 408 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 651cf3640420..9efbf193ad5a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -21,7 +21,7 @@ mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
 
 mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o en_rep.o en_tc.o
 
-mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
+mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o en/port_buffer.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib_vlan.o
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index d13a86a1d702..9ab7158a7ce7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -65,6 +65,7 @@ struct page_pool;
 #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
 #define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))
 
+#define MLX5E_MAX_PRIORITY      8
 #define MLX5E_MAX_DSCP          64
 #define MLX5E_MAX_NUM_TC	8
 
@@ -275,6 +276,10 @@ struct mlx5e_dcbx {
 	/* The only setting that cannot be read from FW */
 	u8                         tc_tsa[IEEE_8021QAZ_MAX_TCS];
 	u8                         cap;
+
+	/* Buffer configuration */
+	u32                        cable_len;
+	u32                        xoff;
 };
 
 struct mlx5e_dcbx_dp {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
new file mode 100644
index 000000000000..c047da8752da
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "port_buffer.h"
+
+int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
+			    struct mlx5e_port_buffer *port_buffer)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+	u32 total_used = 0;
+	void *buffer;
+	void *out;
+	int err;
+	int i;
+
+	out = kzalloc(sz, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	err = mlx5e_port_query_pbmc(mdev, out);
+	if (err)
+		goto out;
+
+	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+		buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]);
+		port_buffer->buffer[i].lossy =
+			MLX5_GET(bufferx_reg, buffer, lossy);
+		port_buffer->buffer[i].epsb =
+			MLX5_GET(bufferx_reg, buffer, epsb);
+		port_buffer->buffer[i].size =
+			MLX5_GET(bufferx_reg, buffer, size) << MLX5E_BUFFER_CELL_SHIFT;
+		port_buffer->buffer[i].xon =
+			MLX5_GET(bufferx_reg, buffer, xon_threshold) << MLX5E_BUFFER_CELL_SHIFT;
+		port_buffer->buffer[i].xoff =
+			MLX5_GET(bufferx_reg, buffer, xoff_threshold) << MLX5E_BUFFER_CELL_SHIFT;
+		total_used += port_buffer->buffer[i].size;
+
+		mlx5e_dbg(HW, priv, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", i,
+			  port_buffer->buffer[i].size,
+			  port_buffer->buffer[i].xon,
+			  port_buffer->buffer[i].xoff,
+			  port_buffer->buffer[i].epsb,
+			  port_buffer->buffer[i].lossy);
+	}
+
+	port_buffer->port_buffer_size =
+		MLX5_GET(pbmc_reg, out, port_buffer_size) << MLX5E_BUFFER_CELL_SHIFT;
+	port_buffer->spare_buffer_size =
+		port_buffer->port_buffer_size - total_used;
+
+	mlx5e_dbg(HW, priv, "total buffer size=%d, spare buffer size=%d\n",
+		  port_buffer->port_buffer_size,
+		  port_buffer->spare_buffer_size);
+out:
+	kfree(out);
+	return err;
+}
+
+static int port_set_buffer(struct mlx5e_priv *priv,
+			   struct mlx5e_port_buffer *port_buffer)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+	void *buffer;
+	void *in;
+	int err;
+	int i;
+
+	in = kzalloc(sz, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	err = mlx5e_port_query_pbmc(mdev, in);
+	if (err)
+		goto out;
+
+	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+		buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]);
+
+		MLX5_SET(bufferx_reg, buffer, size,
+			 port_buffer->buffer[i].size >> MLX5E_BUFFER_CELL_SHIFT);
+		MLX5_SET(bufferx_reg, buffer, lossy,
+			 port_buffer->buffer[i].lossy);
+		MLX5_SET(bufferx_reg, buffer, xoff_threshold,
+			 port_buffer->buffer[i].xoff >> MLX5E_BUFFER_CELL_SHIFT);
+		MLX5_SET(bufferx_reg, buffer, xon_threshold,
+			 port_buffer->buffer[i].xon >> MLX5E_BUFFER_CELL_SHIFT);
+	}
+
+	err = mlx5e_port_set_pbmc(mdev, in);
+out:
+	kfree(in);
+	return err;
+}
+
+/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) */
+static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
+{
+	u32 speed;
+	u32 xoff;
+	int err;
+
+	err = mlx5e_port_linkspeed(priv->mdev, &speed);
+	if (err)
+		return 0;
+
+	xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100;
+
+	mlx5e_dbg(HW, priv, "%s: xoff=%d\n", __func__, xoff);
+	return xoff;
+}
+
+static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
+				 u32 xoff, unsigned int mtu)
+{
+	int i;
+
+	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+		if (port_buffer->buffer[i].lossy) {
+			port_buffer->buffer[i].xoff = 0;
+			port_buffer->buffer[i].xon  = 0;
+			continue;
+		}
+
+		if (port_buffer->buffer[i].size <
+		    (xoff + mtu + (1 << MLX5E_BUFFER_CELL_SHIFT)))
+			return -ENOMEM;
+
+		port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff;
+		port_buffer->buffer[i].xon  = port_buffer->buffer[i].xoff - mtu;
+	}
+
+	return 0;
+}
+
+/**
+ * update_buffer_lossy()
+ *   mtu: device's MTU
+ *   pfc_en: <input> current pfc configuration
+ *   buffer: <input> current prio to buffer mapping
+ *   xoff:   <input> xoff value
+ *   port_buffer: <output> port receive buffer configuration
+ *   change: <output>
+ *
+ *   Update buffer configuration based on pfc configuraiton and priority
+ *   to buffer mapping.
+ *   Buffer's lossy bit is changed to:
+ *     lossless if there is at least one PFC enabled priority mapped to this buffer
+ *     lossy if all priorities mapped to this buffer are PFC disabled
+ *
+ *   Return:
+ *     Return 0 if no error.
+ *     Set change to true if buffer configuration is modified.
+ */
+static int update_buffer_lossy(unsigned int mtu,
+			       u8 pfc_en, u8 *buffer, u32 xoff,
+			       struct mlx5e_port_buffer *port_buffer,
+			       bool *change)
+{
+	bool changed = false;
+	u8 lossy_count;
+	u8 prio_count;
+	u8 lossy;
+	int prio;
+	int err;
+	int i;
+
+	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+		prio_count = 0;
+		lossy_count = 0;
+
+		for (prio = 0; prio < MLX5E_MAX_PRIORITY; prio++) {
+			if (buffer[prio] != i)
+				continue;
+
+			prio_count++;
+			lossy_count += !(pfc_en & (1 << prio));
+		}
+
+		if (lossy_count == prio_count)
+			lossy = 1;
+		else /* lossy_count < prio_count */
+			lossy = 0;
+
+		if (lossy != port_buffer->buffer[i].lossy) {
+			port_buffer->buffer[i].lossy = lossy;
+			changed = true;
+		}
+	}
+
+	if (changed) {
+		err = update_xoff_threshold(port_buffer, xoff, mtu);
+		if (err)
+			return err;
+
+		*change = true;
+	}
+
+	return 0;
+}
+
+int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
+				    u32 change, unsigned int mtu,
+				    struct ieee_pfc *pfc,
+				    u32 *buffer_size,
+				    u8 *prio2buffer)
+{
+	struct mlx5e_port_buffer port_buffer;
+	u32 xoff = calculate_xoff(priv, mtu);
+	bool update_prio2buffer = false;
+	u8 buffer[MLX5E_MAX_PRIORITY];
+	bool update_buffer = false;
+	u32 total_used = 0;
+	u8 curr_pfc_en;
+	int err;
+	int i;
+
+	mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change);
+
+	err = mlx5e_port_query_buffer(priv, &port_buffer);
+	if (err)
+		return err;
+
+	if (change & MLX5E_PORT_BUFFER_CABLE_LEN) {
+		update_buffer = true;
+		err = update_xoff_threshold(&port_buffer, xoff, mtu);
+		if (err)
+			return err;
+	}
+
+	if (change & MLX5E_PORT_BUFFER_PFC) {
+		err = mlx5e_port_query_priority2buffer(priv->mdev, buffer);
+		if (err)
+			return err;
+
+		err = update_buffer_lossy(mtu, pfc->pfc_en, buffer, xoff,
+					  &port_buffer, &update_buffer);
+		if (err)
+			return err;
+	}
+
+	if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) {
+		update_prio2buffer = true;
+		err = mlx5_query_port_pfc(priv->mdev, &curr_pfc_en, NULL);
+		if (err)
+			return err;
+
+		err = update_buffer_lossy(mtu, curr_pfc_en, prio2buffer, xoff,
+					  &port_buffer, &update_buffer);
+		if (err)
+			return err;
+	}
+
+	if (change & MLX5E_PORT_BUFFER_SIZE) {
+		for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+			mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]);
+			if (!port_buffer.buffer[i].lossy && !buffer_size[i]) {
+				mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n",
+					  __func__, i);
+				return -EINVAL;
+			}
+
+			port_buffer.buffer[i].size = buffer_size[i];
+			total_used += buffer_size[i];
+		}
+
+		mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used);
+
+		if (total_used > port_buffer.port_buffer_size)
+			return -EINVAL;
+
+		update_buffer = true;
+		err = update_xoff_threshold(&port_buffer, xoff, mtu);
+		if (err)
+			return err;
+	}
+
+	/* Need to update buffer configuration if xoff value is changed */
+	if (!update_buffer && xoff != priv->dcbx.xoff) {
+		update_buffer = true;
+		err = update_xoff_threshold(&port_buffer, xoff, mtu);
+		if (err)
+			return err;
+	}
+	priv->dcbx.xoff = xoff;
+
+	/* Apply the settings */
+	if (update_buffer) {
+		err = port_set_buffer(priv, &port_buffer);
+		if (err)
+			return err;
+	}
+
+	if (update_prio2buffer)
+		err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer);
+
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
new file mode 100644
index 000000000000..34f55b81a0de
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_PORT_BUFFER_H__
+#define __MLX5_EN_PORT_BUFFER_H__
+
+#include "en.h"
+#include "port.h"
+
+#define MLX5E_MAX_BUFFER 8
+#define MLX5E_BUFFER_CELL_SHIFT 7
+#define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */
+
+#define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \
+				     MLX5_CAP_PCAM_REG(mdev, pbmc) && \
+				     MLX5_CAP_PCAM_REG(mdev, pptb))
+
+enum {
+	MLX5E_PORT_BUFFER_CABLE_LEN   = BIT(0),
+	MLX5E_PORT_BUFFER_PFC         = BIT(1),
+	MLX5E_PORT_BUFFER_PRIO2BUFFER = BIT(2),
+	MLX5E_PORT_BUFFER_SIZE        = BIT(3),
+};
+
+struct mlx5e_bufferx_reg {
+	u8   lossy;
+	u8   epsb;
+	u32  size;
+	u32  xoff;
+	u32  xon;
+};
+
+struct mlx5e_port_buffer {
+	u32                       port_buffer_size;
+	u32                       spare_buffer_size;
+	struct mlx5e_bufferx_reg  buffer[MLX5E_MAX_BUFFER];
+};
+
+int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
+				    u32 change, unsigned int mtu,
+				    struct ieee_pfc *pfc,
+				    u32 *buffer_size,
+				    u8 *prio2buffer);
+
+int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
+			    struct mlx5e_port_buffer *port_buffer);
+#endif
-- 
2.17.0

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox