Netdev List
 help / color / mirror / Atom feed
* RE: [PATCH] net/fsl: Add mEMAC MDIO support to XGMAC MDIO
From: Shaohui Xie @ 2015-01-12  3:13 UTC (permalink / raw)
  To: Emilian Medve, shh.xie@gmail.com, netdev@vger.kernel.org,
	davem@davemloft.net
  Cc: Andy Fleming
In-Reply-To: <54B035F4.4020804@Freescale.com>

Hello Emil,

> -----Original Message-----
> From: Emil Medve [mailto:Emilian.Medve@Freescale.com]
> Sent: Saturday, January 10, 2015 4:12 AM
> To: shh.xie@gmail.com; netdev@vger.kernel.org; davem@davemloft.net
> Cc: Andy Fleming; Xie Shaohui-B21989
> Subject: Re: [PATCH] net/fsl: Add mEMAC MDIO support to XGMAC MDIO
> 
> Hello Shao-Hui,
> 
> 
> On 01/04/2015 03:36 AM, shh.xie@gmail.com wrote:
> > From: Andy Fleming <afleming@gmail.com>
> >
> > The Freescale mEMAC supports operating at 10/100/1000/10G, and its
> > associated MDIO controller is likewise capable of operating both
> > Clause 22 and Clause 45 MDIO buses. It is nearly identical to the MDIO
> > controller on the XGMAC, so we just modify that driver.
> >
> > Portions of this driver developed by:
> >
> > Sandeep Singh <sandeep@freescale.com>
> > Roy Zang <tie-fei.zang@freescale.com>
> >
> > Signed-off-by: Andy Fleming <afleming@gmail.com>
> > Signed-off-by: Shaohui Xie <Shaohui.Xie@freescale.com>
> > ---
> >  drivers/net/ethernet/freescale/Kconfig      |  3 +-
> >  drivers/net/ethernet/freescale/xgmac_mdio.c | 64
> > ++++++++++++++++++++++++-----
> >  2 files changed, 55 insertions(+), 12 deletions(-)
> >
> > 	...
> >
> > diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c
> > b/drivers/net/ethernet/freescale/xgmac_mdio.c
> > index a352445..e0fc3d1 100644
> > --- a/drivers/net/ethernet/freescale/xgmac_mdio.c
> > +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
> >
> > 	...
> >
> > @@ -123,21 +144,39 @@ static int xgmac_mdio_write(struct mii_bus *bus,
> > int phy_id, int regnum, u16 val  static int xgmac_mdio_read(struct
> > mii_bus *bus, int phy_id, int regnum)  {
> >  	struct tgec_mdio_controller __iomem *regs = bus->priv;
> > -	uint16_t dev_addr = regnum >> 16;
> > +	uint16_t dev_addr;
> > +	uint32_t mdio_stat;
> >  	uint32_t mdio_ctl;
> >  	uint16_t value;
> >  	int ret;
> >
> > +	mdio_stat = in_be32(&regs->mdio_stat);
> > +	if (regnum & MII_ADDR_C45) {
> > +		dev_addr = (regnum >> 16) & 0x1f;
> > +		mdio_stat |= MDIO_STAT_ENC;
> > +	} else {
> > +		dev_addr = regnum & 0x1f;
> > +		mdio_stat = ~MDIO_STAT_ENC;
> 
> Shouldn't this be 'mdio_stat &= ~MDIO_STAT_ENC'?
[S.H] You are right! should be "mdio_stat &= ~MDIO_STAT_ENC".
We have this bug for a long time!

I'll post a patch to fix it.

Thanks!
Shaohui

^ permalink raw reply

* [PATCH net-next] r8152: replace tasklet with NAPI
From: Hayes Wang @ 2015-01-12  4:06 UTC (permalink / raw)
  To: netdev; +Cc: nic_swsd, linux-kernel, linux-usb, Hayes Wang

Replace tasklet with NAPI.

Add rx_queue to queue the remaining rx packets if the number of the
rx packets is more than the request from poll().

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
---
 drivers/net/usb/r8152.c | 120 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 85 insertions(+), 35 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index b23426e..50387fe 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -27,7 +27,7 @@
 #include <linux/usb/cdc.h>
 
 /* Version Information */
-#define DRIVER_VERSION "v1.07.0 (2014/10/09)"
+#define DRIVER_VERSION "v1.08.0 (2015/01/13)"
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
 #define DRIVER_DESC "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters"
 #define MODULENAME "r8152"
@@ -448,6 +448,7 @@ enum rtl_register_content {
 #define RTL8152_RMS		(VLAN_ETH_FRAME_LEN + VLAN_HLEN)
 #define RTL8153_RMS		RTL8153_MAX_PACKET
 #define RTL8152_TX_TIMEOUT	(5 * HZ)
+#define RTL8152_NAPI_WEIGHT	64
 
 /* rtl8152 flags */
 enum rtl8152_flags {
@@ -457,7 +458,7 @@ enum rtl8152_flags {
 	RTL8152_LINK_CHG,
 	SELECTIVE_SUSPEND,
 	PHY_RESET,
-	SCHEDULE_TASKLET,
+	SCHEDULE_NAPI,
 };
 
 /* Define these values to match your device */
@@ -549,14 +550,14 @@ struct tx_agg {
 struct r8152 {
 	unsigned long flags;
 	struct usb_device *udev;
-	struct tasklet_struct tl;
+	struct napi_struct napi;
 	struct usb_interface *intf;
 	struct net_device *netdev;
 	struct urb *intr_urb;
 	struct tx_agg tx_info[RTL8152_MAX_TX];
 	struct rx_agg rx_info[RTL8152_MAX_RX];
 	struct list_head rx_done, tx_free;
-	struct sk_buff_head tx_queue;
+	struct sk_buff_head tx_queue, rx_queue;
 	spinlock_t rx_lock, tx_lock;
 	struct delayed_work schedule;
 	struct mii_if_info mii;
@@ -1062,7 +1063,7 @@ static void read_bulk_callback(struct urb *urb)
 		spin_lock(&tp->rx_lock);
 		list_add_tail(&agg->list, &tp->rx_done);
 		spin_unlock(&tp->rx_lock);
-		tasklet_schedule(&tp->tl);
+		napi_schedule(&tp->napi);
 		return;
 	case -ESHUTDOWN:
 		set_bit(RTL8152_UNPLUG, &tp->flags);
@@ -1126,7 +1127,7 @@ static void write_bulk_callback(struct urb *urb)
 		return;
 
 	if (!skb_queue_empty(&tp->tx_queue))
-		tasklet_schedule(&tp->tl);
+		napi_schedule(&tp->napi);
 }
 
 static void intr_callback(struct urb *urb)
@@ -1245,6 +1246,7 @@ static int alloc_all_mem(struct r8152 *tp)
 	spin_lock_init(&tp->tx_lock);
 	INIT_LIST_HEAD(&tp->tx_free);
 	skb_queue_head_init(&tp->tx_queue);
+	skb_queue_head_init(&tp->rx_queue);
 
 	for (i = 0; i < RTL8152_MAX_RX; i++) {
 		buf = kmalloc_node(agg_buf_sz, GFP_KERNEL, node);
@@ -1649,13 +1651,32 @@ return_result:
 	return checksum;
 }
 
-static void rx_bottom(struct r8152 *tp)
+static int rx_bottom(struct r8152 *tp, int budget)
 {
 	unsigned long flags;
 	struct list_head *cursor, *next, rx_queue;
+	int work_done = 0;
+
+	if (!skb_queue_empty(&tp->rx_queue)) {
+		while (work_done < budget) {
+			struct sk_buff *skb = __skb_dequeue(&tp->rx_queue);
+			struct net_device *netdev = tp->netdev;
+			struct net_device_stats *stats = &netdev->stats;
+			unsigned int pkt_len;
+
+			if (!skb)
+				break;
+
+			pkt_len = skb->len;
+			napi_gro_receive(&tp->napi, skb);
+			work_done++;
+			stats->rx_packets++;
+			stats->rx_bytes += pkt_len;
+		}
+	}
 
 	if (list_empty(&tp->rx_done))
-		return;
+		goto out1;
 
 	INIT_LIST_HEAD(&rx_queue);
 	spin_lock_irqsave(&tp->rx_lock, flags);
@@ -1708,9 +1729,14 @@ static void rx_bottom(struct r8152 *tp)
 			skb_put(skb, pkt_len);
 			skb->protocol = eth_type_trans(skb, netdev);
 			rtl_rx_vlan_tag(rx_desc, skb);
-			netif_receive_skb(skb);
-			stats->rx_packets++;
-			stats->rx_bytes += pkt_len;
+			if (work_done < budget) {
+				napi_gro_receive(&tp->napi, skb);
+				work_done++;
+				stats->rx_packets++;
+				stats->rx_bytes += pkt_len;
+			} else {
+				__skb_queue_tail(&tp->rx_queue, skb);
+			}
 
 find_next_rx:
 			rx_data = rx_agg_align(rx_data + pkt_len + CRC_SIZE);
@@ -1722,6 +1748,9 @@ find_next_rx:
 submit:
 		r8152_submit_rx(tp, agg, GFP_ATOMIC);
 	}
+
+out1:
+	return work_done;
 }
 
 static void tx_bottom(struct r8152 *tp)
@@ -1761,12 +1790,8 @@ static void tx_bottom(struct r8152 *tp)
 	} while (res == 0);
 }
 
-static void bottom_half(unsigned long data)
+static void bottom_half(struct r8152 *tp)
 {
-	struct r8152 *tp;
-
-	tp = (struct r8152 *)data;
-
 	if (test_bit(RTL8152_UNPLUG, &tp->flags))
 		return;
 
@@ -1778,12 +1803,28 @@ static void bottom_half(unsigned long data)
 	if (!netif_carrier_ok(tp->netdev))
 		return;
 
-	clear_bit(SCHEDULE_TASKLET, &tp->flags);
+	clear_bit(SCHEDULE_NAPI, &tp->flags);
 
-	rx_bottom(tp);
 	tx_bottom(tp);
 }
 
+static int r8152_poll(struct napi_struct *napi, int budget)
+{
+	struct r8152 *tp = container_of(napi, struct r8152, napi);
+	int work_done;
+
+	work_done = rx_bottom(tp, budget);
+	bottom_half(tp);
+
+	if (work_done < budget) {
+		napi_complete(napi);
+		if (!list_empty(&tp->rx_done))
+			napi_schedule(napi);
+	}
+
+	return work_done;
+}
+
 static
 int r8152_submit_rx(struct r8152 *tp, struct rx_agg *agg, gfp_t mem_flags)
 {
@@ -1810,7 +1851,11 @@ int r8152_submit_rx(struct r8152 *tp, struct rx_agg *agg, gfp_t mem_flags)
 		spin_lock_irqsave(&tp->rx_lock, flags);
 		list_add_tail(&agg->list, &tp->rx_done);
 		spin_unlock_irqrestore(&tp->rx_lock, flags);
-		tasklet_schedule(&tp->tl);
+
+		netif_err(tp, rx_err, tp->netdev,
+			  "Couldn't submit rx[%p], ret = %d\n", agg, ret);
+
+		napi_schedule(&tp->napi);
 	}
 
 	return ret;
@@ -1929,11 +1974,11 @@ static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb,
 
 	if (!list_empty(&tp->tx_free)) {
 		if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
-			set_bit(SCHEDULE_TASKLET, &tp->flags);
+			set_bit(SCHEDULE_NAPI, &tp->flags);
 			schedule_delayed_work(&tp->schedule, 0);
 		} else {
 			usb_mark_last_busy(tp->udev);
-			tasklet_schedule(&tp->tl);
+			napi_schedule(&tp->napi);
 		}
 	} else if (skb_queue_len(&tp->tx_queue) > tp->tx_qlen) {
 		netif_stop_queue(netdev);
@@ -2012,6 +2057,7 @@ static int rtl_start_rx(struct r8152 *tp)
 {
 	int i, ret = 0;
 
+	napi_disable(&tp->napi);
 	INIT_LIST_HEAD(&tp->rx_done);
 	for (i = 0; i < RTL8152_MAX_RX; i++) {
 		INIT_LIST_HEAD(&tp->rx_info[i].list);
@@ -2019,6 +2065,7 @@ static int rtl_start_rx(struct r8152 *tp)
 		if (ret)
 			break;
 	}
+	napi_enable(&tp->napi);
 
 	if (ret && ++i < RTL8152_MAX_RX) {
 		struct list_head rx_queue;
@@ -2049,6 +2096,9 @@ static int rtl_stop_rx(struct r8152 *tp)
 	for (i = 0; i < RTL8152_MAX_RX; i++)
 		usb_kill_urb(tp->rx_info[i].urb);
 
+	while (!skb_queue_empty(&tp->rx_queue))
+		dev_kfree_skb(__skb_dequeue(&tp->rx_queue));
+
 	return 0;
 }
 
@@ -2884,9 +2934,9 @@ static void set_carrier(struct r8152 *tp)
 	} else {
 		if (tp->speed & LINK_STATUS) {
 			netif_carrier_off(netdev);
-			tasklet_disable(&tp->tl);
+			napi_disable(&tp->napi);
 			tp->rtl_ops.disable(tp);
-			tasklet_enable(&tp->tl);
+			napi_enable(&tp->napi);
 		}
 	}
 	tp->speed = speed;
@@ -2919,10 +2969,11 @@ static void rtl_work_func_t(struct work_struct *work)
 	if (test_bit(RTL8152_SET_RX_MODE, &tp->flags))
 		_rtl8152_set_rx_mode(tp->netdev);
 
-	if (test_bit(SCHEDULE_TASKLET, &tp->flags) &&
+	/* don't schedule napi before linking */
+	if (test_bit(SCHEDULE_NAPI, &tp->flags) &&
 	    (tp->speed & LINK_STATUS)) {
-		clear_bit(SCHEDULE_TASKLET, &tp->flags);
-		tasklet_schedule(&tp->tl);
+		clear_bit(SCHEDULE_NAPI, &tp->flags);
+		napi_schedule(&tp->napi);
 	}
 
 	if (test_bit(PHY_RESET, &tp->flags))
@@ -2983,7 +3034,7 @@ static int rtl8152_open(struct net_device *netdev)
 			   res);
 		free_all_mem(tp);
 	} else {
-		tasklet_enable(&tp->tl);
+		napi_enable(&tp->napi);
 	}
 
 	mutex_unlock(&tp->control);
@@ -2999,7 +3050,7 @@ static int rtl8152_close(struct net_device *netdev)
 	struct r8152 *tp = netdev_priv(netdev);
 	int res = 0;
 
-	tasklet_disable(&tp->tl);
+	napi_disable(&tp->napi);
 	clear_bit(WORK_ENABLE, &tp->flags);
 	usb_kill_urb(tp->intr_urb);
 	cancel_delayed_work_sync(&tp->schedule);
@@ -3008,6 +3059,7 @@ static int rtl8152_close(struct net_device *netdev)
 	res = usb_autopm_get_interface(tp->intf);
 	if (res < 0) {
 		rtl_drop_queued_tx(tp);
+		rtl_stop_rx(tp);
 	} else {
 		mutex_lock(&tp->control);
 
@@ -3263,7 +3315,7 @@ static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
 	if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) {
 		clear_bit(WORK_ENABLE, &tp->flags);
 		usb_kill_urb(tp->intr_urb);
-		tasklet_disable(&tp->tl);
+		napi_disable(&tp->napi);
 		if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
 			rtl_stop_rx(tp);
 			rtl_runtime_suspend_enable(tp, true);
@@ -3271,7 +3323,7 @@ static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
 			cancel_delayed_work_sync(&tp->schedule);
 			tp->rtl_ops.down(tp);
 		}
-		tasklet_enable(&tp->tl);
+		napi_enable(&tp->napi);
 	}
 out1:
 	mutex_unlock(&tp->control);
@@ -3855,7 +3907,6 @@ static int rtl8152_probe(struct usb_interface *intf,
 	if (ret)
 		goto out;
 
-	tasklet_init(&tp->tl, bottom_half, (unsigned long)tp);
 	mutex_init(&tp->control);
 	INIT_DELAYED_WORK(&tp->schedule, rtl_work_func_t);
 
@@ -3891,6 +3942,7 @@ static int rtl8152_probe(struct usb_interface *intf,
 	set_ethernet_addr(tp);
 
 	usb_set_intfdata(intf, tp);
+	netif_napi_add(netdev, &tp->napi, r8152_poll, RTL8152_NAPI_WEIGHT);
 
 	ret = register_netdev(netdev);
 	if (ret != 0) {
@@ -3904,15 +3956,13 @@ static int rtl8152_probe(struct usb_interface *intf,
 	else
 		device_set_wakeup_enable(&udev->dev, false);
 
-	tasklet_disable(&tp->tl);
-
 	netif_info(tp, probe, netdev, "%s\n", DRIVER_VERSION);
 
 	return 0;
 
 out1:
+	netif_napi_del(&tp->napi);
 	usb_set_intfdata(intf, NULL);
-	tasklet_kill(&tp->tl);
 out:
 	free_netdev(netdev);
 	return ret;
@@ -3929,7 +3979,7 @@ static void rtl8152_disconnect(struct usb_interface *intf)
 		if (udev->state == USB_STATE_NOTATTACHED)
 			set_bit(RTL8152_UNPLUG, &tp->flags);
 
-		tasklet_kill(&tp->tl);
+		netif_napi_del(&tp->napi);
 		unregister_netdev(tp->netdev);
 		tp->rtl_ops.unload(tp);
 		free_netdev(tp->netdev);
-- 
2.1.0

^ permalink raw reply related

* Re: [PATCH 0/6] netfilter/ipvs fixes for net
From: David Miller @ 2015-01-12  5:15 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel, netdev
In-Reply-To: <1420915808-7160-1-git-send-email-pablo@netfilter.org>

From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 10 Jan 2015 19:50:02 +0100

> The following patchset contains netfilter/ipvs fixes, they are:
 ...
> Note that this batch comes later than usual because of the short
> winter holidays.
> 
> You can pull these changes from:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git
> 
> Have a happy new year, thanks!

Happy new year to you too Pablo!

Pulled, thanks a lot.

^ permalink raw reply

* Re: pull-request: wireless-drivers 2015-01-09
From: David Miller @ 2015-01-12  5:24 UTC (permalink / raw)
  To: kvalo-sgV2jX0FEOL9JmXXK+q4OQ
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <87lhlbj4pp.fsf-HodKDYzPHsUD5k0oWYwrnHL1okKdlPRT@public.gmane.org>

From: Kalle Valo <kvalo-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
Date: Fri, 09 Jan 2015 18:31:30 +0200

> here are few more fixes to 3.19. Please let me know if there are any
> problems.

Pulled, thanks Kalle.
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 1/1] csiostor:fix sparse warnings
From: David Miller @ 2015-01-12  5:40 UTC (permalink / raw)
  To: praveenm; +Cc: netdev, linux-scsi, JBottomley, hch, hariprasad
In-Reply-To: <1420818916-12185-1-git-send-email-praveenm@chelsio.com>

From: Praveen Madhavan <praveenm@chelsio.com>
Date: Fri,  9 Jan 2015 21:25:16 +0530

> This patch fixes sparse warning reported by kbuild.
> Apply this on net-next since it depends on previous commit.
> 
> drivers/scsi/csiostor/csio_hw.c:259:17: sparse: cast to restricted __le32
> drivers/scsi/csiostor/csio_hw.c:536:31: sparse: incorrect type in assignment
> (different base types)
> drivers/scsi/csiostor/csio_hw.c:536:31:    expected unsigned int [unsigned]
> [usertype] <noident>
> drivers/scsi/csiostor/csio_hw.c:536:31:    got restricted __be32 [usertype]
> <noident>
>>> drivers/scsi/csiostor/csio_hw.c:2012:5: sparse: symbol 'csio_hw_prep_fw' was
> not declared. Should it be static?
> 
> Signed-off-by: Praveen Madhavan <praveenm@chelsio.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next] doc: fix the compile fix of txtimestamp.c
From: Cong Wang @ 2015-01-12  5:43 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Linux Kernel Network Developers, David Miller, vlee,
	Carlos O'Donell
In-Reply-To: <1420909698-25025-1-git-send-email-willemb@google.com>

On Sat, Jan 10, 2015 at 9:08 AM, Willem de Bruijn <willemb@google.com> wrote:
>
> Due to the many libc headers included in txtimestamp.c, I was unable
> to make it compile cleanly by including the kernel header
> (linux/ipv6.h) first, as opposed to using the libc definition.
>
> Btw, should we add the libc-compat.h directly to that file, as opposed
> to reading it indirectly through linux/in6.h?
>

That seems fine.

Thanks for the fix!

^ permalink raw reply

* [PATCH v5 net-net 0/2] Increase the limit of tuntap queues
From: Pankaj Gupta @ 2015-01-12  6:11 UTC (permalink / raw)
  To: linux-kernel, netdev
  Cc: davem, jasowang, mst, dgibson, vfalico, edumazet, vyasevic, hkchu,
	wuzhy, xemul, therbert, bhutchings, xii, stephen, jiri,
	sergei.shtylyov, Pankaj Gupta

Networking under KVM works best if we allocate a per-vCPU rx and tx
queue in a virtual NIC. This requires a per-vCPU queue on the host side.
Modern physical NICs have multiqueue support for large number of queues.
To scale vNIC to run multiple queues parallel to maximum number of vCPU's
we need to increase number of queues support in tuntap.   

Changes from v4:
PATCH2: Michael.S.Tsirkin - Updated change comment message.

Changes from v3:
PATCH1: Michael.S.Tsirkin - Some cleanups and updated commit message.
                            Perf numbers on 10 Gbs NIC
Changes from v2:
PATCH 3: David Miller     - flex array adds extra level of indirection
                            for preallocated array.(dropped, as flow array
			    is allocated using kzalloc with failover to zalloc). 
Changes from v1:
PATCH 2: David Miller     - sysctl changes to limit number of queues 
                            not required for unprivileged users(dropped).

Changes from RFC
PATCH 1: Sergei Shtylyov  - Add an empty line after declarations.
PATCH 2: Jiri Pirko -       Do not introduce new module paramaters.
	 Michael.S.Tsirkin- We can use sysctl for limiting max number
                            of queues.

This series is to increase the number of tuntap queues. Original work is being 
done by 'jasowang@redhat.com'. I am taking this 'https://lkml.org/lkml/2013/6/19/29' 
patch series as a reference. As per discussion in the patch series:

There were two reasons which prevented us from increasing number of tun queues:

- The netdev_queue array in netdevice were allocated through kmalloc, which may 
  cause a high order memory allocation too when we have several queues. 
  E.g. sizeof(netdev_queue) is 320, which means a high order allocation would 
  happens when the device has more than 16 queues.

- We store the hash buckets in tun_struct which results a very large size of
  tun_struct, this high order memory allocation fail easily when the memory is
  fragmented.

The patch 60877a32bce00041528576e6b8df5abe9251fa73 increases the number of tx 
queues. Memory allocation fallback to vzalloc() when kmalloc() fails.

This series tries to address following issues:

- Increase the number of netdev_queue queues for rx similarly its done for tx 
  queues by falling back to vzalloc() when memory allocation with kmalloc() fails.

- Increase number of queues to 256, maximum number is equal to maximum number 
  of vCPUS allowed in a guest.

I have also done testing with multiple parallel Netperf sessions for different 
combination of queues and CPU's. It seems to be working fine without much increase 
in cpu load with increase in number of queues. I also see good increase in throughput
with increase in number of queues. Though i had limitation of 8 physical CPU's. 

For this test: Two Hosts(Host1 & Host2) are directly connected with cable  
Host1 is running Guest1. Data is sent from Host2 to Guest1 via Host1.

Host kernel: 3.19.0-rc2+, AMD Opteron(tm) Processor 6320
NIC : Emulex Corporation OneConnect 10Gb NIC (be3)

Patch Applied  %usr   %nice    %sys %iowait    %irq   %soft  %steal  %guest  %gnice   %idle  throughput
Single Queue, 2 vCPU's
-------------
Before Patch :all    0.19    0.00    0.16    0.07    0.04    0.10    0.00    0.18    0.00   99.26  57864.18
After  Patch :all    0.99    0.00    0.64    0.69    0.07    0.26    0.00    1.58    0.00   95.77  57735.77

With 2 Queues, 2 vCPU's    
---------------
Before Patch :all    0.19    0.00    0.19    0.10    0.04    0.11    0.00    0.28    0.00   99.08  63083.09
After  Patch :all    0.87    0.00    0.73    0.78    0.09    0.35    0.00    2.04    0.00   95.14  62917.03

With 4 Queues, 4 vCPU's
--------------
Before Patch :all    0.20    0.00    0.21    0.11    0.04    0.12    0.00    0.32    0.00   99.00  80865.06
After  Patch :all    0.71    0.00    0.93    0.85    0.11    0.51    0.00    2.62    0.00   94.27  86463.19

With 8 Queues, 8 vCPU's
--------------
Before Patch :all    0.19    0.00    0.18    0.09    0.04    0.11    0.00    0.23    0.00   99.17  86795.31
After  Patch :all    0.65    0.00    1.18    0.93    0.13    0.68    0.00    3.38    0.00   93.05  89459.93

With 16 Queues, 8 vCPU's
--------------
After  Patch :all    0.61    0.00    1.59    0.97    0.18    0.92    0.00    4.32    0.00   91.41  120951.60

Patches Summary:
  net: allow large number of rx queues
  tuntap: Increase the number of queues in tun

 drivers/net/tun.c |    7 +++----
 net/core/dev.c    |   13 ++++++++-----
 2 files changed, 11 insertions(+), 9 deletions(-)

^ permalink raw reply

* [PATCH v5 net-next 1/2] net: allow large number of rx queues
From: Pankaj Gupta @ 2015-01-12  6:11 UTC (permalink / raw)
  To: linux-kernel, netdev
  Cc: davem, jasowang, mst, dgibson, vfalico, edumazet, vyasevic, hkchu,
	wuzhy, xemul, therbert, bhutchings, xii, stephen, jiri,
	sergei.shtylyov, Pankaj Gupta
In-Reply-To: <1421043089-30103-1-git-send-email-pagupta@redhat.com>

netif_alloc_rx_queues() uses kcalloc() to allocate memory
for "struct netdev_queue *_rx" array.
If we are doing large rx queue allocation kcalloc() might
fail, so this patch does a fallback to vzalloc().
Similar implementation is done for tx queue allocation in
netif_alloc_netdev_queues().

We avoid failure of high order memory allocation
with the help of vzalloc(), this allows us to do large
rx and tx queue allocation which in turn helps us to
increase the number of queues in tun.

As vmalloc() adds overhead on a critical network path,
__GFP_REPEAT flag is used with kzalloc() to do this fallback
only when really needed.

Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: David Gibson <dgibson@redhat.com>
---
 net/core/dev.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 1ab168e..954591a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6145,13 +6145,16 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 {
 	unsigned int i, count = dev->num_rx_queues;
 	struct netdev_rx_queue *rx;
+	size_t sz = count * sizeof(*rx);
 
 	BUG_ON(count < 1);
 
-	rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
-	if (!rx)
-		return -ENOMEM;
-
+	rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+	if (!rx) {
+		rx = vzalloc(sz);
+		if (!rx)
+			return -ENOMEM;
+	}
 	dev->_rx = rx;
 
 	for (i = 0; i < count; i++)
@@ -6781,7 +6784,7 @@ void free_netdev(struct net_device *dev)
 
 	netif_free_tx_queues(dev);
 #ifdef CONFIG_SYSFS
-	kfree(dev->_rx);
+	kvfree(dev->_rx);
 #endif
 
 	kfree(rcu_dereference_protected(dev->ingress_queue, 1));
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v5 net-next 2/2] tuntap: Increase the number of queues in tun.
From: Pankaj Gupta @ 2015-01-12  6:11 UTC (permalink / raw)
  To: linux-kernel, netdev
  Cc: davem, jasowang, mst, dgibson, vfalico, edumazet, vyasevic, hkchu,
	wuzhy, xemul, therbert, bhutchings, xii, stephen, jiri,
	sergei.shtylyov, Pankaj Gupta
In-Reply-To: <1421043089-30103-1-git-send-email-pagupta@redhat.com>

Networking under kvm works best if we allocate a per-vCPU RX and TX
queue in a virtual NIC. This requires a per-vCPU queue on the host side.

It is now safe to increase the maximum number of queues.
Preceding patch: 'net: allow large number of rx queues'
made sure this won't cause failures due to high order memory
allocations. Increase it to 256: this is the max number of vCPUs
KVM supports.

Size of tun_struct changes from 8512 to 10496 after this patch. This keeps
pages allocated for tun_struct before and after the patch to 3.

Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
Reviewed-by: David Gibson <dgibson@redhat.com>
---
 drivers/net/tun.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e3fa65a..a3f13f7 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -113,10 +113,9 @@ struct tap_filter {
 	unsigned char	addr[FLT_EXACT_COUNT][ETH_ALEN];
 };
 
-/* DEFAULT_MAX_NUM_RSS_QUEUES were chosen to let the rx/tx queues allocated for
- * the netdevice to be fit in one page. So we can make sure the success of
- * memory allocation. TODO: increase the limit. */
-#define MAX_TAP_QUEUES DEFAULT_MAX_NUM_RSS_QUEUES
+/* MAX_TAP_QUEUES 256 is chosen to allow rx/tx queues to be equal
+ * to max number of VCPUs in guest. */
+#define MAX_TAP_QUEUES 256
 #define MAX_TAP_FLOWS  4096
 
 #define TUN_FLOW_EXPIRE (3 * HZ)
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH] brcmfmac: avoid duplicated suspend/resume operation
From: Fu, Zhonghui @ 2015-01-12  6:41 UTC (permalink / raw)
  To: brudley, arend@broadcom.com >> Arend van Spriel, Franky Lin,
	meuleman, kvalo, linville, pieterpg, hdegoede, wens,
	linux-wireless, brcm80211-dev-list, netdev,
	linux-kernel@vger.kernel.org

>From 8685c3c2746b4275fc808d9db23c364b2f54b52a Mon Sep 17 00:00:00 2001
From: Zhonghui Fu <zhonghui.fu@linux.intel.com>
Date: Mon, 12 Jan 2015 14:25:46 +0800
Subject: [PATCH] brcmfmac: avoid duplicated suspend/resume operation

WiFi chip has 2 SDIO functions, and PM core will trigger
twice suspend/resume operations for one WiFi chip to do
the same things. This patch avoid this case.

Acked-by: Arend van Spriel<arend@broadcom.com>
Acked-by: Sergei Shtylyov<sergei.shtylyov@cogentembedded.com>
Signed-off-by: Zhonghui Fu <zhonghui.fu@linux.intel.com>
---
 drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c |   21 +++++++++++++++++----
 1 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
index 9880dae..8f71485 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
@@ -1070,7 +1070,7 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func,
 	 */
 	if ((sdio_get_host_pm_caps(sdiodev->func[1]) & MMC_PM_KEEP_POWER) &&
 	    ((sdio_get_host_pm_caps(sdiodev->func[1]) & MMC_PM_WAKE_SDIO_IRQ) ||
-	     (sdiodev->pdata && sdiodev->pdata->oob_irq_supported)))
+	     (sdiodev->pdata->oob_irq_supported)))
 		bus_if->wowl_supported = true;
 #endif
 
@@ -1139,11 +1139,17 @@ void brcmf_sdio_wowl_config(struct device *dev, bool enabled)
 static int brcmf_ops_sdio_suspend(struct device *dev)
 {
 	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-	struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
+	struct brcmf_sdio_dev *sdiodev;
 	mmc_pm_flag_t sdio_flags;
+	struct sdio_func *func = dev_to_sdio_func(dev);
 
 	brcmf_dbg(SDIO, "Enter\n");
 
+	if (func->num == 2)
+		return 0;
+
+	sdiodev = bus_if->bus_priv.sdio;
+
 	atomic_set(&sdiodev->suspend, true);
 
 	if (sdiodev->wowl_enabled) {
@@ -1164,10 +1170,17 @@ static int brcmf_ops_sdio_suspend(struct device *dev)
 static int brcmf_ops_sdio_resume(struct device *dev)
 {
 	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-	struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
+	struct brcmf_sdio_dev *sdiodev;
+	struct sdio_func *func = dev_to_sdio_func(dev);
 
 	brcmf_dbg(SDIO, "Enter\n");
-	if (sdiodev->pdata && sdiodev->pdata->oob_irq_supported)
+
+	if (func->num == 2)
+		return 0;
+
+	sdiodev = bus_if->bus_priv.sdio;
+
+	if (sdiodev->pdata->oob_irq_supported)
 		disable_irq_wake(sdiodev->pdata->oob_irq_nr);
 	brcmf_sdio_wd_timer(sdiodev->bus, BRCMF_WD_POLL_MS);
 	atomic_set(&sdiodev->suspend, false);
-- 1.7.1

^ permalink raw reply related

* Re: [PATCH] brcmfmac: avoid duplicated suspend/resume operation
From: Fu, Zhonghui @ 2015-01-12  6:43 UTC (permalink / raw)
  To: Arend van Spriel
  Cc: brudley, Franky Lin, meuleman, kvalo, linville, pieterpg,
	hdegoede, wens, linux-wireless, brcm80211-dev-list, netdev,
	linux-kernel
In-Reply-To: <54AA6BEB.9060207@broadcom.com>


I sent a new version of this patch in another mail.


Thanks,
Zhonghui


On 2015/1/5 18:48, Arend van Spriel wrote:
> On 01/05/15 03:34, Fu, Zhonghui wrote:
>> Hi Arend,
>>
>> Where to find your patch for this?
>
> Well, we did not submit it. Hence my "Acked-by:" to your patch below.
>
> Regards,
> Arend
>
>>
>> Thanks,
>> Zhonghui
>>
>> On 2014/12/31 17:56, Arend van Spriel wrote:
>>> On 12/31/14 09:20, Fu, Zhonghui wrote:
>>>>    From e34419970a07bfcd365f9c66bdfa552188a0cd26 Mon Sep 17 00:00:00 2001
>>>> From: Zhonghui Fu<zhonghui.fu@linux.intel.com>
>>>> Date: Mon, 29 Dec 2014 21:25:31 +0800
>>>> Subject: [PATCH] brcmfmac: avoid duplicated suspend/resume operation
>>>>
>>>> WiFi chip has 2 SDIO functions, and PM core will trigger
>>>> twice suspend/resume operations for one WiFi chip to do
>>>> the same things. This patch avoid this case.
>>>
>>> We have a patch queued up for this as well, but this one looks good enough although I personally prefer container_of() instead of dev_to_sdio_func().
>>>
>>> Acked-by: Arend van Spriel<arend@broadcom.com>
>>>> Signed-off-by: Zhonghui Fu<zhonghui.fu@linux.intel.com>
>>>> ---
>>>>    drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c |   19 +++++++++++++++++--
>>>>    1 files changed, 17 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
>>>> index 3c06e93..eee7818 100644
>>>> --- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
>>>> +++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
>>>> @@ -1139,11 +1139,18 @@ void brcmf_sdio_wowl_config(struct device *dev, bool enabled)
>>>>    static int brcmf_ops_sdio_suspend(struct device *dev)
>>>>    {
>>>>        struct brcmf_bus *bus_if = dev_get_drvdata(dev);
>>>> -    struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
>>>> +    struct brcmf_sdio_dev *sdiodev;
>>>>        mmc_pm_flag_t sdio_flags;
>>>> +    struct sdio_func *func = dev_to_sdio_func(dev);
>>>>
>>>>        brcmf_dbg(SDIO, "Enter\n");
>>>>
>>>> +    if (func->num == 2) {
>>>> +        return 0;
>>>> +    }
>>>> +
>>>> +    sdiodev = bus_if->bus_priv.sdio;
>>>> +
>>>>        atomic_set(&sdiodev->suspend, true);
>>>>
>>>>        if (sdiodev->wowl_enabled) {
>>>> @@ -1164,9 +1171,17 @@ static int brcmf_ops_sdio_suspend(struct device *dev)
>>>>    static int brcmf_ops_sdio_resume(struct device *dev)
>>>>    {
>>>>        struct brcmf_bus *bus_if = dev_get_drvdata(dev);
>>>> -    struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
>>>> +    struct brcmf_sdio_dev *sdiodev;
>>>> +    struct sdio_func *func = dev_to_sdio_func(dev);
>>>>
>>>>        brcmf_dbg(SDIO, "Enter\n");
>>>> +
>>>> +    if (func->num == 2) {
>>>> +        return 0;
>>>> +    }
>>>> +
>>>> +    sdiodev = bus_if->bus_priv.sdio;
>>>> +
>>>>        if (sdiodev->pdata->oob_irq_supported)
>>>>            disable_irq_wake(sdiodev->pdata->oob_irq_nr);
>>>>        brcmf_sdio_wd_timer(sdiodev->bus, BRCMF_WD_POLL_MS);
>>>> -- 1.7.1
>>>>
>>>
>>> -- 
>>> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>
> -- 
> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH net-next v2 0/3] remove nl_sk_hash_lock from netlink socket
From: Ying Xue @ 2015-01-12  6:52 UTC (permalink / raw)
  To: tgraf; +Cc: davem, netdev

After tipc socket successfully avoids the involvement of an extra lock
with rhashtable_lookup_insert(), it's possible for netlink socket to
remove its hash socket lock now. But as netlink socket needs a compare
function to look for an object, we first introduce a new function
called rhashtable_lookup_compare_insert() in commit #1 which is
implemented based on original rhashtable_lookup_insert(). We
subsequently remove nl_sk_hash_lock from netlink socket with the new
introduced function in commit #2. Lastly, as Thomas requested, we add
commit #3 to indicate the implementation of what the grow and shrink
decision function must enforce min/max shift.

v2:
 As Thomas pointed out, there was a race between checking portid and
 then setting it in commit #2. Now use socket lock to make the process
 of both checking and setting portid atomic, and then eliminate the
 race.

Ying Xue (3):
  rhashtable: involve rhashtable_lookup_compare_insert routine
  netlink: eliminate nl_sk_hash_lock
  rhashtable: add a note for grow and shrink decision functions

 include/linux/rhashtable.h |    9 +++++++++
 lib/rhashtable.c           |   42 ++++++++++++++++++++++++++++++++++++++++--
 net/netlink/af_netlink.c   |   33 ++++++++++++++++++++-------------
 net/netlink/af_netlink.h   |    1 -
 net/netlink/diag.c         |   10 +++++-----
 5 files changed, 74 insertions(+), 21 deletions(-)

-- 
1.7.9.5

^ permalink raw reply

* [PATCH net-next v2 1/3] rhashtable: involve rhashtable_lookup_compare_insert routine
From: Ying Xue @ 2015-01-12  6:52 UTC (permalink / raw)
  To: tgraf; +Cc: davem, netdev
In-Reply-To: <1421045544-13670-1-git-send-email-ying.xue@windriver.com>

Introduce a new function called rhashtable_lookup_compare_insert()
which is very similar to rhashtable_lookup_insert(). But the former
makes use of users' given compare function to look for an object,
and then inserts it into hash table if found. As the entire process
of search and insertion is under protection of per bucket lock, this
can help users to avoid the involvement of extra lock.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Cc: Thomas Graf <tgraf@suug.ch>
Acked-by: Thomas Graf <tgraf@suug.ch>
---
 include/linux/rhashtable.h |    5 +++++
 lib/rhashtable.c           |   42 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 326acd8..7b9bd77 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -168,7 +168,12 @@ int rhashtable_shrink(struct rhashtable *ht);
 void *rhashtable_lookup(struct rhashtable *ht, const void *key);
 void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
 				bool (*compare)(void *, void *), void *arg);
+
 bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj);
+bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
+				      struct rhash_head *obj,
+				      bool (*compare)(void *, void *),
+				      void *arg);
 
 void rhashtable_destroy(struct rhashtable *ht);
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 8023b55..ed6ae1a 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -727,6 +727,43 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
  */
 bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
 {
+	struct rhashtable_compare_arg arg = {
+		.ht = ht,
+		.key = rht_obj(ht, obj) + ht->p.key_offset,
+	};
+
+	BUG_ON(!ht->p.key_len);
+
+	return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare,
+						&arg);
+}
+EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
+
+/**
+ * rhashtable_lookup_compare_insert - search and insert object to hash table
+ *                                    with compare function
+ * @ht:		hash table
+ * @obj:	pointer to hash head inside object
+ * @compare:	compare function, must return true on match
+ * @arg:	argument passed on to compare function
+ *
+ * Locks down the bucket chain in both the old and new table if a resize
+ * is in progress to ensure that writers can't remove from the old table
+ * and can't insert to the new table during the atomic operation of search
+ * and insertion. Searches for duplicates in both the old and new table if
+ * a resize is in progress.
+ *
+ * Lookups may occur in parallel with hashtable mutations and resizing.
+ *
+ * Will trigger an automatic deferred table resizing if the size grows
+ * beyond the watermark indicated by grow_decision() which can be passed
+ * to rhashtable_init().
+ */
+bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
+				      struct rhash_head *obj,
+				      bool (*compare)(void *, void *),
+				      void *arg)
+{
 	struct bucket_table *new_tbl, *old_tbl;
 	spinlock_t *new_bucket_lock, *old_bucket_lock;
 	u32 new_hash, old_hash;
@@ -747,7 +784,8 @@ bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
 	if (unlikely(old_tbl != new_tbl))
 		spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
 
-	if (rhashtable_lookup(ht, rht_obj(ht, obj) + ht->p.key_offset)) {
+	if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
+				      compare, arg)) {
 		success = false;
 		goto exit;
 	}
@@ -763,7 +801,7 @@ exit:
 
 	return success;
 }
-EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
+EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert);
 
 static size_t rounded_hashtable_size(struct rhashtable_params *params)
 {
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH net-next v2 3/3] rhashtable: add a note for grow and shrink decision functions
From: Ying Xue @ 2015-01-12  6:52 UTC (permalink / raw)
  To: tgraf; +Cc: davem, netdev
In-Reply-To: <1421045544-13670-1-git-send-email-ying.xue@windriver.com>

As commit c0c09bfdc415 ("rhashtable: avoid unnecessary wakeup for
worker queue") moves condition statements of verifying whether hash
table size exceeds its maximum threshold or reaches its minimum
threshold from resizing functions to resizing decision functions,
we should add a note in rhashtable.h to indicate the implementation
of what the grow and shrink decision function must enforce min/max
shift, otherwise, it's failed to take min/max shift's set watermarks
into effect.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Cc: Thomas Graf <tgraf@suug.ch>
Acked-by: Thomas Graf <tgraf@suug.ch>
---
 include/linux/rhashtable.h |    4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 7b9bd77..9570832 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -79,6 +79,10 @@ struct rhashtable;
  * @obj_hashfn: Function to hash object
  * @grow_decision: If defined, may return true if table should expand
  * @shrink_decision: If defined, may return true if table should shrink
+ *
+ * Note: when implementing the grow and shrink decision function, min/max
+ * shift must be enforced, otherwise, resizing watermarks they set may be
+ * useless.
  */
 struct rhashtable_params {
 	size_t			nelem_hint;
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH net-next v2 2/3] netlink: eliminate nl_sk_hash_lock
From: Ying Xue @ 2015-01-12  6:52 UTC (permalink / raw)
  To: tgraf; +Cc: davem, netdev
In-Reply-To: <1421045544-13670-1-git-send-email-ying.xue@windriver.com>

As rhashtable_lookup_compare_insert() can guarantee the process
of search and insertion is atomic, it's safe to eliminate the
nl_sk_hash_lock. After this, object insertion or removal will
be protected with per bucket lock on write side while object
lookup is guarded with rcu read lock on read side.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Cc: Thomas Graf <tgraf@suug.ch>
---
 net/netlink/af_netlink.c |   33 ++++++++++++++++++++-------------
 net/netlink/af_netlink.h |    1 -
 net/netlink/diag.c       |   10 +++++-----
 3 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 298e1df..01b702d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -98,7 +98,7 @@ static void netlink_skb_destructor(struct sk_buff *skb);
 
 /* nl_table locking explained:
  * Lookup and traversal are protected with an RCU read-side lock. Insertion
- * and removal are protected with nl_sk_hash_lock while using RCU list
+ * and removal are protected with per bucket lock while using RCU list
  * modification primitives and may run in parallel to RCU protected lookups.
  * Destruction of the Netlink socket may only occur *after* nl_table_lock has
  * been acquired * either during or after the socket has been removed from
@@ -110,10 +110,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
 
 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
 
-/* Protects netlink socket hash table mutations */
-DEFINE_MUTEX(nl_sk_hash_lock);
-EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
-
 static ATOMIC_NOTIFIER_HEAD(netlink_chain);
 
 static DEFINE_SPINLOCK(netlink_tap_lock);
@@ -998,6 +994,19 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
 					 &netlink_compare, &arg);
 }
 
+static bool __netlink_insert(struct netlink_table *table, struct sock *sk,
+			     struct net *net)
+{
+	struct netlink_compare_arg arg = {
+		.net = net,
+		.portid = nlk_sk(sk)->portid,
+	};
+
+	return rhashtable_lookup_compare_insert(&table->hash,
+						&nlk_sk(sk)->node,
+						&netlink_compare, &arg);
+}
+
 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
 {
 	struct netlink_table *table = &nl_table[protocol];
@@ -1043,9 +1052,7 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 	struct netlink_table *table = &nl_table[sk->sk_protocol];
 	int err = -EADDRINUSE;
 
-	mutex_lock(&nl_sk_hash_lock);
-	if (__netlink_lookup(table, portid, net))
-		goto err;
+	lock_sock(sk);
 
 	err = -EBUSY;
 	if (nlk_sk(sk)->portid)
@@ -1058,10 +1065,12 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 
 	nlk_sk(sk)->portid = portid;
 	sock_hold(sk);
-	rhashtable_insert(&table->hash, &nlk_sk(sk)->node);
-	err = 0;
+	if (__netlink_insert(table, sk, net))
+		err = 0;
+	else
+		sock_put(sk);
 err:
-	mutex_unlock(&nl_sk_hash_lock);
+	release_sock(sk);
 	return err;
 }
 
@@ -1069,13 +1078,11 @@ static void netlink_remove(struct sock *sk)
 {
 	struct netlink_table *table;
 
-	mutex_lock(&nl_sk_hash_lock);
 	table = &nl_table[sk->sk_protocol];
 	if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) {
 		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
 		__sock_put(sk);
 	}
-	mutex_unlock(&nl_sk_hash_lock);
 
 	netlink_table_grab();
 	if (nlk_sk(sk)->subscriptions) {
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index fd96fa7..7518375 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -74,6 +74,5 @@ struct netlink_table {
 
 extern struct netlink_table *nl_table;
 extern rwlock_t nl_table_lock;
-extern struct mutex nl_sk_hash_lock;
 
 #endif
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index fcca36d..bb59a7e 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -103,7 +103,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 {
 	struct netlink_table *tbl = &nl_table[protocol];
 	struct rhashtable *ht = &tbl->hash;
-	const struct bucket_table *htbl = rht_dereference(ht->tbl, ht);
+	const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht);
 	struct net *net = sock_net(skb->sk);
 	struct netlink_diag_req *req;
 	struct netlink_sock *nlsk;
@@ -115,7 +115,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 	for (i = 0; i < htbl->size; i++) {
 		struct rhash_head *pos;
 
-		rht_for_each_entry(nlsk, pos, htbl, i, node) {
+		rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) {
 			sk = (struct sock *)nlsk;
 
 			if (!net_eq(sock_net(sk), net))
@@ -172,7 +172,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	req = nlmsg_data(cb->nlh);
 
-	mutex_lock(&nl_sk_hash_lock);
+	rcu_read_lock();
 	read_lock(&nl_table_lock);
 
 	if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
@@ -186,7 +186,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	} else {
 		if (req->sdiag_protocol >= MAX_LINKS) {
 			read_unlock(&nl_table_lock);
-			mutex_unlock(&nl_sk_hash_lock);
+			rcu_read_unlock();
 			return -ENOENT;
 		}
 
@@ -194,7 +194,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 
 	read_unlock(&nl_table_lock);
-	mutex_unlock(&nl_sk_hash_lock);
+	rcu_read_unlock();
 
 	return skb->len;
 }
-- 
1.7.9.5

^ permalink raw reply related

* RE: [PATCH v4] can: Convert to runtime_pm
From: Appana Durga Kedareswara Rao @ 2015-01-12  6:59 UTC (permalink / raw)
  To: Marc Kleine-Budde
  Cc: linux-can@vger.kernel.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, Soren Brinkmann,
	grant.likely@linaro.org, wg@grandegger.com, Michal Simek
In-Reply-To: <54B299A0.1000504@pengutronix.de>

Hi Marc,

> -----Original Message-----
> From: Marc Kleine-Budde [mailto:mkl@pengutronix.de]
> Sent: Sunday, January 11, 2015 9:11 PM
> To: Appana Durga Kedareswara Rao
> Cc: linux-can@vger.kernel.org; netdev@vger.kernel.org; linux-
> kernel@vger.kernel.org; Soren Brinkmann; grant.likely@linaro.org;
> wg@grandegger.com
> Subject: Re: [PATCH v4] can: Convert to runtime_pm
>
> On 01/11/2015 06:34 AM, Appana Durga Kedareswara Rao wrote:
> [...]
> >>>             return ret;
> >>>     }
> >>>
> >>>     priv->write_reg(priv, XCAN_MSR_OFFSET, 0);
> >>>     priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_CEN_MASK);
> >>>
> >>>     if (netif_running(ndev)) {
> >>>             priv->can.state = CAN_STATE_ERROR_ACTIVE;
> >>
> >> What happens if the device was not in ACTIVE state prior to the
> >> runtime_suspend?
> >>
> >
> > I am not sure about the state of CAN at this point of time.
> > I just followed what other drivers are following for run time  suspend :).
>
> Please check the state of the hardware if you go with bus off into suspend
> and then resume.
>

        if (netif_running(ndev)) {
                        if (isr & XCAN_IXR_BSOFF_MASK) {
                                priv->can.state = CAN_STATE_BUS_OFF;
                           priv->write_reg(priv, XCAN_SRR_OFFSET,
                                        XCAN_SRR_RESET_MASK);
                } else if ((status & XCAN_SR_ESTAT_MASK) ==
                                        XCAN_SR_ESTAT_MASK) {
                        priv->can.state = CAN_STATE_ERROR_PASSIVE;
                } else if (status & XCAN_SR_ERRWRN_MASK) {
                        priv->can.state = CAN_STATE_ERROR_WARNING;
                } else {
                        priv->can.state = CAN_STATE_ERROR_ACTIVE;
                }
        }

Is the above code snippet ok for you?

> >>>             netif_device_attach(ndev);
> >>>             netif_start_queue(ndev);
> >>>     }
> >>>
> >>>     return 0;
> >>> }
> >>
> >>
> >>>
> >>> @@ -1020,9 +1035,9 @@ static int __maybe_unused xcan_resume(struct
> >>> device *dev)
> >>>
> >>>     priv->write_reg(priv, XCAN_MSR_OFFSET, 0);
> >>>     priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_CEN_MASK);
> >>> -   priv->can.state = CAN_STATE_ERROR_ACTIVE;
> >>>
> >>>     if (netif_running(ndev)) {
> >>> +           priv->can.state = CAN_STATE_ERROR_ACTIVE;
> >>>             netif_device_attach(ndev);
> >>>             netif_start_queue(ndev);
> >>>     }
> >>> @@ -1030,7 +1045,10 @@ static int __maybe_unused
> xcan_resume(struct
> >> device *dev)
> >>>     return 0;
> >>>  }
> >>>
> >>> -static SIMPLE_DEV_PM_OPS(xcan_dev_pm_ops, xcan_suspend,
> >> xcan_resume);
> >>> +static const struct dev_pm_ops xcan_dev_pm_ops = {
> >>> +   SET_SYSTEM_SLEEP_PM_OPS(xcan_suspend, xcan_resume)
> >>> +   SET_PM_RUNTIME_PM_OPS(xcan_runtime_suspend,
> >> xcan_runtime_resume,
> >>> +NULL) };
> >>>
> >>>  /**
> >>>   * xcan_probe - Platform registration call @@ -1071,7 +1089,7 @@
> >>> static int xcan_probe(struct platform_device *pdev)
> >>>             return -ENOMEM;
> >>>
> >>>     priv = netdev_priv(ndev);
> >>> -   priv->dev = ndev;
> >>> +   priv->dev = &pdev->dev;
> >>>     priv->can.bittiming_const = &xcan_bittiming_const;
> >>>     priv->can.do_set_mode = xcan_do_set_mode;
> >>>     priv->can.do_get_berr_counter = xcan_get_berr_counter; @@ -
> >> 1137,15
> >>> +1155,22 @@ static int xcan_probe(struct platform_device *pdev)
> >>>
> >>>     netif_napi_add(ndev, &priv->napi, xcan_rx_poll, rx_max);
> >>>
> >>> +   pm_runtime_set_active(&pdev->dev);
> >>> +   pm_runtime_irq_safe(&pdev->dev);
> >>> +   pm_runtime_enable(&pdev->dev);
> >>> +   pm_runtime_get_sync(&pdev->dev);
> >> Check error values?
> >
> > Ok
> >>> +
> >>>     ret = register_candev(ndev);
> >>>     if (ret) {
> >>>             dev_err(&pdev->dev, "fail to register failed
> >>> (err=%d)\n",
> >> ret);
> >>> +           pm_runtime_put(priv->dev);
> >>
> >> Please move the pm_runtime_put into the common error exit path.
> >>
> >
> > Ok
> >
> >>>             goto err_unprepare_disable_busclk;
> >>>     }
> >>>
> >>>     devm_can_led_init(ndev);
> >>> -   clk_disable_unprepare(priv->bus_clk);
> >>> -   clk_disable_unprepare(priv->can_clk);
> >>> +
> >>> +   pm_runtime_put(&pdev->dev);
> >>> +
> >>>     netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo
> >> depth:%d\n",
> >>>                     priv->reg_base, ndev->irq, priv->can.clock.freq,
> >>>                     priv->tx_max);
> >>>
> >>
> >> I think you have to convert the _remove() function, too. Have a look
> >> at the gpio-zynq.c driver:
> >>
> >>> static int zynq_gpio_remove(struct platform_device *pdev) {
> >>>     struct zynq_gpio *gpio = platform_get_drvdata(pdev);
> >>>
> >>>     pm_runtime_get_sync(&pdev->dev);
> >>
> >> However I don't understand why the get_sync() is here. Maybe Sören
> >> can help?
> >
> > I converted the remove function to use the run-time PM and .
> > Below is the remove code snippet.
> >
> >        ret = pm_runtime_get_sync(&pdev->dev);
> >         if (ret < 0) {
> >                 netdev_err(ndev, "%s: pm_runtime_get failed(%d)\n",
> >                                 __func__, ret);
> >                 return ret;
> >         }
> >
> >         if (set_reset_mode(ndev) < 0)
> >                 netdev_err(ndev, "mode resetting failed!\n");
> >
> >         unregister_candev(ndev);
> >         netif_napi_del(&priv->napi);
> >         free_candev(ndev);
>
> >         pm_runtime_disable(&pdev->dev);
>
> Can this make a call to xcan_runtime_*()? I'm asking since the ndev has
> been unregistered and already free()ed. Better move this directly after the
> set_reset_mode(). This way you are symmetric to the probe() function.

If I move the  pm_runtime_disable after the set_reset_mode
I am getting the below error.
ERROR:
xilinx_can e0008000.can can0 (unregistering): xcan_get_berr_counter: pm_runtime_get fail

If I move the pm_runtime_disable after unregister_candev everything is working fine.

Regards,
Kedar.

>
> > Observed the below things in the /sys/kernel/debug/clk/clk_summary.
> >
> >                                 Modprobe        ifconfig up    ifconfig down   rmmod
> Modprobe  ifconfig up   ifconfig down  rmmod  Modprobe  ifconfig up
> ifconfig down  rmmod
> > Clk_prepare count:                1             1               1               1       2       2               2
> 2       3       3               3               3
> > Clk_enable count:                 0             1               0               1       2       2               2
> 2       3       3               3               3
>
> As the numbers are increasing you have a clk_prepare_enable() leak. Your
> remove() function is missing the clk_disable_unprepare() for the can and
> bus clock (as you have clk_prepare_enable in probe()).
>
> Marc
>
> --
> Pengutronix e.K.                  | Marc Kleine-Budde           |
> Industrial Linux Solutions        | Phone: +49-231-2826-924     |
> Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
> Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |



This email and any attachments are intended for the sole use of the named recipient(s) and contain(s) confidential information that may be proprietary, privileged or copyrighted under applicable law. If you are not the intended recipient, do not read, copy, or forward this email message or any attachments. Delete this email message and any attachments immediately.


^ permalink raw reply

* Re: [PATCH] team: Remove dead code
From: Jiri Pirko @ 2015-01-12  7:10 UTC (permalink / raw)
  To: Kenneth Williams; +Cc: netdev
In-Reply-To: <20150112004351.GA8744@ubuntu>

Mon, Jan 12, 2015 at 01:43:54AM CET, ken@williamsclan.us wrote:
>The deleted lines are called from a function which is called:
>1) Only through __team_options_register via team_options_register and
>2) Only during initialization / mode initialization when there are no
>ports attached.
>Therefore the ports list is guarenteed to be empty and this code will
>never be executed.
>
>Signed-off-by: Kenneth Williams <ken@williamsclan.us>

Acked-by: Jiri Pirko <jiri@resnulli.us>

^ permalink raw reply

* [PATCH] gianfar: correct the bad expression while writing bit-pattern
From: Sanjeev Sharma @ 2015-01-12  7:43 UTC (permalink / raw)
  To: davem
  Cc: claudiu.manoil, peter.senna, shemminger, netdev, linux-kernel,
	Sanjeev Sharma, Sanjeev Sharma

This patch correct the bad expression while writing the
bit-pattern from software's buffer to hardware registers.

Signed-off-by: Sanjeev Sharma <Sanjeev_Sharma@mentor.com>
---
 drivers/net/ethernet/freescale/gianfar_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index 3e1a9c1..1ccca72 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -1586,7 +1586,7 @@ static int gfar_write_filer_table(struct gfar_private *priv,
 		return -EBUSY;
 
 	/* Fill regular entries */
-	for (; i < MAX_FILER_IDX - 1 && (tab->fe[i].ctrl | tab->fe[i].ctrl);
+	for (; i < MAX_FILER_IDX - 1 && ( i < tab->fe[i].ctrl);
 	     i++)
 		gfar_write_filer(priv, i, tab->fe[i].ctrl, tab->fe[i].prop);
 	/* Fill the rest with fall-troughs */
-- 
1.7.11.7

^ permalink raw reply related

* [PATCH net-next v11 1/3] Documentation: add Device tree bindings for Hisilicon hip04 ethernet
From: Ding Tianhong @ 2015-01-12  8:03 UTC (permalink / raw)
  To: arnd-r2nGTMty4D4, robh+dt-DgEjT+Ai2ygdnm+yROfE0A,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q, grant.likely-QSEj5FYQhm4dnm+yROfE0A
  Cc: sergei.shtylyov-M4DtvfQ/ZS1MRgGoP+s0PdBPR1lH4CV8,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w,
	xuwei5-C8/M+/jPZTeaMJb+Lgu22Q,
	zhangfei.gao-QSEj5FYQhm4dnm+yROfE0A,
	netdev-u79uwXL29TY76Z2rM5mHXA, devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-lFZ/pmaqli7XmaaqVzeoHQ
In-Reply-To: <1421049832-6224-1-git-send-email-dingtianhong-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

From: Zhangfei Gao <zhangfei.gao-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>

This patch adds the Device Tree bindings for the Hisilicon hip04
Ethernet controller, including 100M / 1000M controller.

Signed-off-by: Zhangfei Gao <zhangfei.gao-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
Signed-off-by: Ding Tianhong <dingtianhong-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
 .../bindings/net/hisilicon-hip04-net.txt           | 88 ++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt

diff --git a/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt b/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt
new file mode 100644
index 0000000..988fc69
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt
@@ -0,0 +1,88 @@
+Hisilicon hip04 Ethernet Controller
+
+* Ethernet controller node
+
+Required properties:
+- compatible: should be "hisilicon,hip04-mac".
+- reg: address and length of the register set for the device.
+- interrupts: interrupt for the device.
+- port-handle: <phandle port channel>
+	phandle, specifies a reference to the syscon ppe node
+	port, port number connected to the controller
+	channel, recv channel start from channel * number (RX_DESC_NUM)
+- phy-mode: see ethernet.txt [1].
+
+Optional properties:
+- phy-handle: see ethernet.txt [1].
+
+[1] Documentation/devicetree/bindings/net/ethernet.txt
+
+
+* Ethernet ppe node:
+Control rx & tx fifos of all ethernet controllers.
+Have 2048 recv channels shared by all ethernet controllers, only if no overlap.
+Each controller's recv channel start from channel * number (RX_DESC_NUM).
+
+Required properties:
+- compatible: "hisilicon,hip04-ppe", "syscon".
+- reg: address and length of the register set for the device.
+
+
+* MDIO bus node:
+
+Required properties:
+
+- compatible: should be "hisilicon,hip04-mdio".
+- Inherits from MDIO bus node binding [2]
+[2] Documentation/devicetree/bindings/net/phy.txt
+
+Example:
+	mdio {
+		compatible = "hisilicon,hip04-mdio";
+		reg = <0x28f1000 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		phy0: ethernet-phy@0 {
+			compatible = "ethernet-phy-ieee802.3-c22";
+			reg = <0>;
+			marvell,reg-init = <18 0x14 0 0x8001>;
+		};
+
+		phy1: ethernet-phy@1 {
+			compatible = "ethernet-phy-ieee802.3-c22";
+			reg = <1>;
+			marvell,reg-init = <18 0x14 0 0x8001>;
+		};
+	};
+
+	ppe: ppe@28c0000 {
+		compatible = "hisilicon,hip04-ppe", "syscon";
+		reg = <0x28c0000 0x10000>;
+	};
+
+	fe: ethernet@28b0000 {
+		compatible = "hisilicon,hip04-mac";
+		reg = <0x28b0000 0x10000>;
+		interrupts = <0 413 4>;
+		phy-mode = "mii";
+		port-handle = <&ppe 31 0>;
+	};
+
+	ge0: ethernet@2800000 {
+		compatible = "hisilicon,hip04-mac";
+		reg = <0x2800000 0x10000>;
+		interrupts = <0 402 4>;
+		phy-mode = "sgmii";
+		port-handle = <&ppe 0 1>;
+		phy-handle = <&phy0>;
+	};
+
+	ge8: ethernet@2880000 {
+		compatible = "hisilicon,hip04-mac";
+		reg = <0x2880000 0x10000>;
+		interrupts = <0 410 4>;
+		phy-mode = "sgmii";
+		port-handle = <&ppe 8 2>;
+		phy-handle = <&phy1>;
+	};
-- 
1.8.0


--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH net-next v11 0/3] add hisilicon hip04 ethernet driver
From: Ding Tianhong @ 2015-01-12  8:03 UTC (permalink / raw)
  To: arnd, robh+dt, davem, grant.likely
  Cc: sergei.shtylyov, linux-arm-kernel, eric.dumazet, xuwei5,
	zhangfei.gao, netdev, devicetree, linux

v11: Add ethtool support for tx coalecse getting and setting, the xmit_more
is not supported for this patch, but I think it could work for hip04,
will support it later after some tests for performance better.

Here are some performance test results by ping and iperf(add tx_coalesce_frames/users),
it looks that the performance and latency is more better by tx_coalesce_frames/usecs.

- Before:
$ ping 192.168.1.1 ...
--- 192.168.1.1 ping statistics ---
24 packets transmitted, 24 received, 0% packet loss, time 22999ms
rtt min/avg/max/mdev = 0.180/0.202/0.403/0.043 ms

$ iperf -c 192.168.1.1 ...
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0- 1.0 sec   115 MBytes   945 Mbits/sec

- After:
$ ping 192.168.1.1 ...
--- 192.168.1.1 ping statistics ---
24 packets transmitted, 24 received, 0% packet loss, time 22999ms
rtt min/avg/max/mdev = 0.178/0.190/0.380/0.041 ms

$ iperf -c 192.168.1.1 ...
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0- 1.0 sec   115 MBytes   965 Mbits/sec

v10:
- According Arnd's suggestion, remove the skb_orphan and use the hrtimer
  for the cleanup of the TX queue and add some modification for the hip04
  drivers.
  1) drop the broken skb_orphan call
  2) drop the workqueue
  3) batch cleanup based on tx_coalesce_frames/usecs for better throughput
  4) use a reasonable default tx timeout (200us, could be shorted
     based on measurements) with a range timer
  5) fix napi poll function return value
  6) use a lockless queue for cleanup

v9:
- There is no tx completion interrupts to free DMAd Tx packets, it means taht
  we rely on new tx packets arriving to run the destructors of completed packets,
  which open up space in their sockets's send queues. Sometimes we don't get such
  new packets causing Tx to stall, a single UDP transmitter is a good example of
  this situation, so we need a clean up workqueue to reclaims completed packets,
  the workqueue will only free the last packets which is already stay for several jiffies.
  Also fix some format cleanups.

v8:
- Use poll to reclaim xmitted buffer as workaround since no tx done interrupt 

v7:
- Remove select NET_CORE in 0002

v6:
- Suggest by Russell: Use netdev_sent_queue & netdev_completed_queue to solve latency issue 
  Also shorten the period of timer, which is used to wakeup the queue since no
  tx completed interrupt.

v5:
- no big change, fix typo

v4:
- Modify accoringly to the suggetion from Arnd, Florian, Eric, David
  Use of_parse_phandle_with_fixed_args & syscon_node_to_regmap get ppe info
  Add skb_orphan() and tx_timer for reclaim since no tx_finished interrupt
  Update timeout, and move of_phy_connect to probe to reuse open/stop

v3:
- Suggest from Arnd, use syscon & regmap_write/read to replace static void __iomem *ppebase.
  Modify hisilicon-hip04-net.txt accrordingly to suggestion from Florian and Sergei.

v2:
- Got many suggestions from Russell, Arnd, Florian, Mark and Sergei
  Remove memcpy, use dma_map/unmap_single, use dma_alloc_coherent rather than dma_pool, etc.
  Refer property in ethernet.txt, change ppe description, etc.

Ding Tianhong (1):
  net: hisilicon: new hip04 ethernet driver

Zhangfei Gao (2):
  Documentation: add Device tree bindings for Hisilicon hip04 ethernet
  net: hisilicon: new hip04 MDIO driver

 .../bindings/net/hisilicon-hip04-net.txt           |  88 ++
 drivers/net/ethernet/hisilicon/Kconfig             |   9 +
 drivers/net/ethernet/hisilicon/Makefile            |   1 +
 drivers/net/ethernet/hisilicon/hip04_eth.c         | 968 +++++++++++++++++++++
 drivers/net/ethernet/hisilicon/hip04_mdio.c        | 186 ++++
 5 files changed, 1252 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt
 create mode 100644 drivers/net/ethernet/hisilicon/hip04_eth.c
 create mode 100644 drivers/net/ethernet/hisilicon/hip04_mdio.c

-- 
1.8.0

^ permalink raw reply

* [PATCH net-next v11 3/3] net: hisilicon: new hip04 ethernet driver
From: Ding Tianhong @ 2015-01-12  8:03 UTC (permalink / raw)
  To: arnd, robh+dt, davem, grant.likely
  Cc: sergei.shtylyov, linux-arm-kernel, eric.dumazet, xuwei5,
	zhangfei.gao, netdev, devicetree, linux
In-Reply-To: <1421049832-6224-1-git-send-email-dingtianhong@huawei.com>

Support Hisilicon hip04 ethernet driver, including 100M / 1000M controller.
The controller has no tx done interrupt, reclaim xmitted buffer in the poll.

v11: Add ethtool support for tx coalecse getting and setting, the xmit_more
is not supported for this patch, but I think it could work for hip04,
will support it later after some tests for performance better.

Here are some performance test results by ping and iperf(add tx_coalesce_frames/users),
it looks that the performance and latency is more better by tx_coalesce_frames/usecs.

- Before:
$ ping 192.168.1.1 ...
--- 192.168.1.1 ping statistics ---
24 packets transmitted, 24 received, 0% packet loss, time 22999ms
rtt min/avg/max/mdev = 0.180/0.202/0.403/0.043 ms

$ iperf -c 192.168.1.1 ...
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0- 1.0 sec   115 MBytes   945 Mbits/sec

- After:
$ ping 192.168.1.1 ...
--- 192.168.1.1 ping statistics ---
24 packets transmitted, 24 received, 0% packet loss, time 22999ms
rtt min/avg/max/mdev = 0.178/0.190/0.380/0.041 ms

$ iperf -c 192.168.1.1 ...
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0- 1.0 sec   115 MBytes   965 Mbits/sec

v10: According David Miller and Arnd Bergmann's suggestion, add some modification
for v9 version
- drop the workqueue
- batch cleanup based on tx_coalesce_frames/usecs for better throughput
- use a reasonable default tx timeout (200us, could be shorted
  based on measurements) with a range timer
- fix napi poll function return value
- use a lockless queue for cleanup

Signed-off-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
 drivers/net/ethernet/hisilicon/Makefile    |   2 +-
 drivers/net/ethernet/hisilicon/hip04_eth.c | 968 +++++++++++++++++++++++++++++
 2 files changed, 969 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/hisilicon/hip04_eth.c

diff --git a/drivers/net/ethernet/hisilicon/Makefile b/drivers/net/ethernet/hisilicon/Makefile
index 40115a7..6c14540 100644
--- a/drivers/net/ethernet/hisilicon/Makefile
+++ b/drivers/net/ethernet/hisilicon/Makefile
@@ -3,4 +3,4 @@
 #
 
 obj-$(CONFIG_HIX5HD2_GMAC) += hix5hd2_gmac.o
-obj-$(CONFIG_HIP04_ETH) += hip04_mdio.o
+obj-$(CONFIG_HIP04_ETH) += hip04_mdio.o hip04_eth.o
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
new file mode 100644
index 0000000..c67c8b6
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -0,0 +1,968 @@
+
+/* Copyright (c) 2014 Linaro Ltd.
+ * Copyright (c) 2014 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/ktime.h>
+#include <linux/of_address.h>
+#include <linux/phy.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+
+#define PPE_CFG_RX_ADDR			0x100
+#define PPE_CFG_POOL_GRP		0x300
+#define PPE_CFG_RX_BUF_SIZE		0x400
+#define PPE_CFG_RX_FIFO_SIZE		0x500
+#define PPE_CURR_BUF_CNT		0xa200
+
+#define GE_DUPLEX_TYPE			0x08
+#define GE_MAX_FRM_SIZE_REG		0x3c
+#define GE_PORT_MODE			0x40
+#define GE_PORT_EN			0x44
+#define GE_SHORT_RUNTS_THR_REG		0x50
+#define GE_TX_LOCAL_PAGE_REG		0x5c
+#define GE_TRANSMIT_CONTROL_REG		0x60
+#define GE_CF_CRC_STRIP_REG		0x1b0
+#define GE_MODE_CHANGE_REG		0x1b4
+#define GE_RECV_CONTROL_REG		0x1e0
+#define GE_STATION_MAC_ADDRESS		0x210
+#define PPE_CFG_CPU_ADD_ADDR		0x580
+#define PPE_CFG_MAX_FRAME_LEN_REG	0x408
+#define PPE_CFG_BUS_CTRL_REG		0x424
+#define PPE_CFG_RX_CTRL_REG		0x428
+#define PPE_CFG_RX_PKT_MODE_REG		0x438
+#define PPE_CFG_QOS_VMID_GEN		0x500
+#define PPE_CFG_RX_PKT_INT		0x538
+#define PPE_INTEN			0x600
+#define PPE_INTSTS			0x608
+#define PPE_RINT			0x604
+#define PPE_CFG_STS_MODE		0x700
+#define PPE_HIS_RX_PKT_CNT		0x804
+
+/* REG_INTERRUPT */
+#define RCV_INT				BIT(10)
+#define RCV_NOBUF			BIT(8)
+#define RCV_DROP			BIT(7)
+#define TX_DROP				BIT(6)
+#define DEF_INT_ERR			(RCV_NOBUF | RCV_DROP | TX_DROP)
+#define DEF_INT_MASK			(RCV_INT | DEF_INT_ERR)
+
+/* TX descriptor config */
+#define TX_FREE_MEM			BIT(0)
+#define TX_READ_ALLOC_L3		BIT(1)
+#define TX_FINISH_CACHE_INV		BIT(2)
+#define TX_CLEAR_WB			BIT(4)
+#define TX_L3_CHECKSUM			BIT(5)
+#define TX_LOOP_BACK			BIT(11)
+
+/* RX error */
+#define RX_PKT_DROP			BIT(0)
+#define RX_L2_ERR			BIT(1)
+#define RX_PKT_ERR			(RX_PKT_DROP | RX_L2_ERR)
+
+#define SGMII_SPEED_1000		0x08
+#define SGMII_SPEED_100			0x07
+#define SGMII_SPEED_10			0x06
+#define MII_SPEED_100			0x01
+#define MII_SPEED_10			0x00
+
+#define GE_DUPLEX_FULL			BIT(0)
+#define GE_DUPLEX_HALF			0x00
+#define GE_MODE_CHANGE_EN		BIT(0)
+
+#define GE_TX_AUTO_NEG			BIT(5)
+#define GE_TX_ADD_CRC			BIT(6)
+#define GE_TX_SHORT_PAD_THROUGH		BIT(7)
+
+#define GE_RX_STRIP_CRC			BIT(0)
+#define GE_RX_STRIP_PAD			BIT(3)
+#define GE_RX_PAD_EN			BIT(4)
+
+#define GE_AUTO_NEG_CTL			BIT(0)
+
+#define GE_RX_INT_THRESHOLD		BIT(6)
+#define GE_RX_TIMEOUT			0x04
+
+#define GE_RX_PORT_EN			BIT(1)
+#define GE_TX_PORT_EN			BIT(2)
+
+#define PPE_CFG_STS_RX_PKT_CNT_RC	BIT(12)
+
+#define PPE_CFG_RX_PKT_ALIGN		BIT(18)
+#define PPE_CFG_QOS_VMID_MODE		BIT(14)
+#define PPE_CFG_QOS_VMID_GRP_SHIFT	8
+
+#define PPE_CFG_RX_FIFO_FSFU		BIT(11)
+#define PPE_CFG_RX_DEPTH_SHIFT		16
+#define PPE_CFG_RX_START_SHIFT		0
+#define PPE_CFG_RX_CTRL_ALIGN_SHIFT	11
+
+#define PPE_CFG_BUS_LOCAL_REL		BIT(14)
+#define PPE_CFG_BUS_BIG_ENDIEN		BIT(0)
+
+#define RX_DESC_NUM			128
+#define TX_DESC_NUM			256
+#define TX_NEXT(N)			(((N) + 1) & (TX_DESC_NUM-1))
+#define RX_NEXT(N)			(((N) + 1) & (RX_DESC_NUM-1))
+
+#define GMAC_PPE_RX_PKT_MAX_LEN		379
+#define GMAC_MAX_PKT_LEN		1516
+#define GMAC_MIN_PKT_LEN		31
+#define RX_BUF_SIZE			1600
+#define RESET_TIMEOUT			1000
+#define TX_TIMEOUT			(6 * HZ)
+
+#define DRV_NAME			"hip04-ether"
+#define DRV_VERSION			"v1.0"
+
+#define HIP04_MAX_TX_COALESCE_USECS	200
+#define HIP04_MIN_TX_COALESCE_USECS	100
+#define HIP04_MAX_TX_COALESCE_FRAMES	200
+#define HIP04_MIN_TX_COALESCE_FRAMES	100
+
+struct tx_desc {
+	u32 send_addr;
+	u32 send_size;
+	u32 next_addr;
+	u32 cfg;
+	u32 wb_addr;
+} __aligned(64);
+
+struct rx_desc {
+	u16 reserved_16;
+	u16 pkt_len;
+	u32 reserve1[3];
+	u32 pkt_err;
+	u32 reserve2[4];
+};
+
+struct hip04_priv {
+	void __iomem *base;
+	int phy_mode;
+	int chan;
+	unsigned int port;
+	unsigned int speed;
+	unsigned int duplex;
+	unsigned int reg_inten;
+
+	struct napi_struct napi;
+	struct net_device *ndev;
+
+	struct tx_desc *tx_desc;
+	dma_addr_t tx_desc_dma;
+	struct sk_buff *tx_skb[TX_DESC_NUM];
+	dma_addr_t tx_phys[TX_DESC_NUM];
+	unsigned int tx_head;
+
+	int tx_coalesce_frames;
+	int tx_coalesce_usecs;
+	struct hrtimer tx_coalesce_timer;
+
+	unsigned char *rx_buf[RX_DESC_NUM];
+	dma_addr_t rx_phys[RX_DESC_NUM];
+	unsigned int rx_head;
+	unsigned int rx_buf_size;
+
+	struct device_node *phy_node;
+	struct phy_device *phy;
+	struct regmap *map;
+	struct work_struct tx_timeout_task;
+
+	/* written only by tx cleanup */
+	unsigned int tx_tail ____cacheline_aligned_in_smp;
+};
+
+static inline unsigned int tx_count(unsigned int head, unsigned int tail)
+{
+	return (head - tail) % (TX_DESC_NUM - 1);
+}
+
+static void hip04_config_port(struct net_device *ndev, u32 speed, u32 duplex)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	u32 val;
+
+	priv->speed = speed;
+	priv->duplex = duplex;
+
+	switch (priv->phy_mode) {
+	case PHY_INTERFACE_MODE_SGMII:
+		if (speed == SPEED_1000)
+			val = SGMII_SPEED_1000;
+		else if (speed == SPEED_100)
+			val = SGMII_SPEED_100;
+		else
+			val = SGMII_SPEED_10;
+		break;
+	case PHY_INTERFACE_MODE_MII:
+		if (speed == SPEED_100)
+			val = MII_SPEED_100;
+		else
+			val = MII_SPEED_10;
+		break;
+	default:
+		netdev_warn(ndev, "not supported mode\n");
+		val = MII_SPEED_10;
+		break;
+	}
+	writel_relaxed(val, priv->base + GE_PORT_MODE);
+
+	val = duplex ? GE_DUPLEX_FULL : GE_DUPLEX_HALF;
+	writel_relaxed(val, priv->base + GE_DUPLEX_TYPE);
+
+	val = GE_MODE_CHANGE_EN;
+	writel_relaxed(val, priv->base + GE_MODE_CHANGE_REG);
+}
+
+static void hip04_reset_ppe(struct hip04_priv *priv)
+{
+	u32 val, tmp, timeout = 0;
+
+	do {
+		regmap_read(priv->map, priv->port * 4 + PPE_CURR_BUF_CNT, &val);
+		regmap_read(priv->map, priv->port * 4 + PPE_CFG_RX_ADDR, &tmp);
+		if (timeout++ > RESET_TIMEOUT)
+			break;
+	} while (val & 0xfff);
+}
+
+static void hip04_config_fifo(struct hip04_priv *priv)
+{
+	u32 val;
+
+	val = readl_relaxed(priv->base + PPE_CFG_STS_MODE);
+	val |= PPE_CFG_STS_RX_PKT_CNT_RC;
+	writel_relaxed(val, priv->base + PPE_CFG_STS_MODE);
+
+	val = BIT(priv->port);
+	regmap_write(priv->map, priv->port * 4 + PPE_CFG_POOL_GRP, val);
+
+	val = priv->port << PPE_CFG_QOS_VMID_GRP_SHIFT;
+	val |= PPE_CFG_QOS_VMID_MODE;
+	writel_relaxed(val, priv->base + PPE_CFG_QOS_VMID_GEN);
+
+	val = RX_BUF_SIZE;
+	regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_BUF_SIZE, val);
+
+	val = RX_DESC_NUM << PPE_CFG_RX_DEPTH_SHIFT;
+	val |= PPE_CFG_RX_FIFO_FSFU;
+	val |= priv->chan << PPE_CFG_RX_START_SHIFT;
+	regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_FIFO_SIZE, val);
+
+	val = NET_IP_ALIGN << PPE_CFG_RX_CTRL_ALIGN_SHIFT;
+	writel_relaxed(val, priv->base + PPE_CFG_RX_CTRL_REG);
+
+	val = PPE_CFG_RX_PKT_ALIGN;
+	writel_relaxed(val, priv->base + PPE_CFG_RX_PKT_MODE_REG);
+
+	val = PPE_CFG_BUS_LOCAL_REL | PPE_CFG_BUS_BIG_ENDIEN;
+	writel_relaxed(val, priv->base + PPE_CFG_BUS_CTRL_REG);
+
+	val = GMAC_PPE_RX_PKT_MAX_LEN;
+	writel_relaxed(val, priv->base + PPE_CFG_MAX_FRAME_LEN_REG);
+
+	val = GMAC_MAX_PKT_LEN;
+	writel_relaxed(val, priv->base + GE_MAX_FRM_SIZE_REG);
+
+	val = GMAC_MIN_PKT_LEN;
+	writel_relaxed(val, priv->base + GE_SHORT_RUNTS_THR_REG);
+
+	val = readl_relaxed(priv->base + GE_TRANSMIT_CONTROL_REG);
+	val |= GE_TX_AUTO_NEG | GE_TX_ADD_CRC | GE_TX_SHORT_PAD_THROUGH;
+	writel_relaxed(val, priv->base + GE_TRANSMIT_CONTROL_REG);
+
+	val = GE_RX_STRIP_CRC;
+	writel_relaxed(val, priv->base + GE_CF_CRC_STRIP_REG);
+
+	val = readl_relaxed(priv->base + GE_RECV_CONTROL_REG);
+	val |= GE_RX_STRIP_PAD | GE_RX_PAD_EN;
+	writel_relaxed(val, priv->base + GE_RECV_CONTROL_REG);
+
+	val = GE_AUTO_NEG_CTL;
+	writel_relaxed(val, priv->base + GE_TX_LOCAL_PAGE_REG);
+}
+
+static void hip04_mac_enable(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	u32 val;
+
+	/* enable tx & rx */
+	val = readl_relaxed(priv->base + GE_PORT_EN);
+	val |= GE_RX_PORT_EN | GE_TX_PORT_EN;
+	writel_relaxed(val, priv->base + GE_PORT_EN);
+
+	/* clear rx int */
+	val = RCV_INT;
+	writel_relaxed(val, priv->base + PPE_RINT);
+
+	/* config recv int */
+	val = GE_RX_INT_THRESHOLD | GE_RX_TIMEOUT;
+	writel_relaxed(val, priv->base + PPE_CFG_RX_PKT_INT);
+
+	/* enable interrupt */
+	priv->reg_inten = DEF_INT_MASK;
+	writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN);
+}
+
+static void hip04_mac_disable(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	u32 val;
+
+	/* disable int */
+	priv->reg_inten &= ~(DEF_INT_MASK);
+	writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN);
+
+	/* disable tx & rx */
+	val = readl_relaxed(priv->base + GE_PORT_EN);
+	val &= ~(GE_RX_PORT_EN | GE_TX_PORT_EN);
+	writel_relaxed(val, priv->base + GE_PORT_EN);
+}
+
+static void hip04_set_xmit_desc(struct hip04_priv *priv, dma_addr_t phys)
+{
+	writel(phys, priv->base + PPE_CFG_CPU_ADD_ADDR);
+}
+
+static void hip04_set_recv_desc(struct hip04_priv *priv, dma_addr_t phys)
+{
+	regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_ADDR, phys);
+}
+
+static u32 hip04_recv_cnt(struct hip04_priv *priv)
+{
+	return readl(priv->base + PPE_HIS_RX_PKT_CNT);
+}
+
+static void hip04_update_mac_address(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+
+	writel_relaxed(((ndev->dev_addr[0] << 8) | (ndev->dev_addr[1])),
+		       priv->base + GE_STATION_MAC_ADDRESS);
+	writel_relaxed(((ndev->dev_addr[2] << 24) | (ndev->dev_addr[3] << 16) |
+			(ndev->dev_addr[4] << 8) | (ndev->dev_addr[5])),
+		       priv->base + GE_STATION_MAC_ADDRESS + 4);
+}
+
+static int hip04_set_mac_address(struct net_device *ndev, void *addr)
+{
+	eth_mac_addr(ndev, addr);
+	hip04_update_mac_address(ndev);
+	return 0;
+}
+
+static int hip04_tx_reclaim(struct net_device *ndev, bool force)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	unsigned tx_tail = priv->tx_tail;
+	struct tx_desc *desc;
+	unsigned int bytes_compl = 0, pkts_compl = 0;
+	unsigned int count;
+
+	smp_rmb();
+	count = tx_count(ACCESS_ONCE(priv->tx_head), tx_tail);
+	if (count == 0)
+		goto out;
+
+	while (count) {
+		desc = &priv->tx_desc[tx_tail];
+		if (desc->send_addr != 0) {
+			if (force)
+				desc->send_addr = 0;
+			else
+				break;
+		}
+
+		if (priv->tx_phys[tx_tail]) {
+			dma_unmap_single(&ndev->dev, priv->tx_phys[tx_tail],
+					 priv->tx_skb[tx_tail]->len,
+					 DMA_TO_DEVICE);
+			priv->tx_phys[tx_tail] = 0;
+		}
+		pkts_compl++;
+		bytes_compl += priv->tx_skb[tx_tail]->len;
+		dev_kfree_skb(priv->tx_skb[tx_tail]);
+		priv->tx_skb[tx_tail] = NULL;
+		tx_tail = TX_NEXT(tx_tail);
+		count--;
+	}
+
+	priv->tx_tail = tx_tail;
+	smp_wmb(); /* Ensure tx_tail visible to xmit */
+
+out:
+	if (pkts_compl || bytes_compl)
+		netdev_completed_queue(ndev, pkts_compl, bytes_compl);
+
+	if (unlikely(netif_queue_stopped(ndev)) && (count < (TX_DESC_NUM - 1)))
+		netif_wake_queue(ndev);
+
+	return count;
+}
+
+static int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	unsigned int tx_head = priv->tx_head, count;
+	struct tx_desc *desc = &priv->tx_desc[tx_head];
+	dma_addr_t phys;
+
+	smp_rmb();
+	count = tx_count(tx_head, ACCESS_ONCE(priv->tx_tail));
+	if (count == (TX_DESC_NUM - 1)) {
+		netif_stop_queue(ndev);
+		return NETDEV_TX_BUSY;
+	}
+
+	phys = dma_map_single(&ndev->dev, skb->data, skb->len, DMA_TO_DEVICE);
+	if (dma_mapping_error(&ndev->dev, phys)) {
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
+	priv->tx_skb[tx_head] = skb;
+	priv->tx_phys[tx_head] = phys;
+	desc->send_addr = cpu_to_be32(phys);
+	desc->send_size = cpu_to_be32(skb->len);
+	desc->cfg = cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV);
+	phys = priv->tx_desc_dma + tx_head * sizeof(struct tx_desc);
+	desc->wb_addr = cpu_to_be32(phys);
+	skb_tx_timestamp(skb);
+
+	hip04_set_xmit_desc(priv, phys);
+	priv->tx_head = TX_NEXT(tx_head);
+	count++;
+	netdev_sent_queue(ndev, skb->len);
+
+	stats->tx_bytes += skb->len;
+	stats->tx_packets++;
+
+	/* Ensure tx_head update visible to tx reclaim */
+	smp_wmb();
+
+	/* queue is getting full, better start cleaning up now */
+	if (count >= priv->tx_coalesce_frames) {
+		if (napi_schedule_prep(&priv->napi)) {
+			/* disable rx interrupt and timer */
+			priv->reg_inten &= ~(RCV_INT);
+			writel_relaxed(DEF_INT_MASK & ~RCV_INT,
+				       priv->base + PPE_INTEN);
+			hrtimer_cancel(&priv->tx_coalesce_timer);
+			__napi_schedule(&priv->napi);
+		}
+	} else if (!hrtimer_is_queued(&priv->tx_coalesce_timer)) {
+		/* cleanup not pending yet, start a new timer */
+		hrtimer_start_expires(&priv->tx_coalesce_timer,
+				      HRTIMER_MODE_REL);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static int hip04_rx_poll(struct napi_struct *napi, int budget)
+{
+	struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi);
+	struct net_device *ndev = priv->ndev;
+	struct net_device_stats *stats = &ndev->stats;
+	unsigned int cnt = hip04_recv_cnt(priv);
+	struct rx_desc *desc;
+	struct sk_buff *skb;
+	unsigned char *buf;
+	bool last = false;
+	dma_addr_t phys;
+	int rx = 0;
+	int tx_remaining;
+	u16 len;
+	u32 err;
+
+	while (cnt && !last) {
+		buf = priv->rx_buf[priv->rx_head];
+		skb = build_skb(buf, priv->rx_buf_size);
+		if (unlikely(!skb))
+			net_dbg_ratelimited("build_skb failed\n");
+
+		dma_unmap_single(&ndev->dev, priv->rx_phys[priv->rx_head],
+				 RX_BUF_SIZE, DMA_FROM_DEVICE);
+		priv->rx_phys[priv->rx_head] = 0;
+
+		desc = (struct rx_desc *)skb->data;
+		len = be16_to_cpu(desc->pkt_len);
+		err = be32_to_cpu(desc->pkt_err);
+
+		if (0 == len) {
+			dev_kfree_skb_any(skb);
+			last = true;
+		} else if ((err & RX_PKT_ERR) || (len >= GMAC_MAX_PKT_LEN)) {
+			dev_kfree_skb_any(skb);
+			stats->rx_dropped++;
+			stats->rx_errors++;
+		} else {
+			skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+			skb_put(skb, len);
+			skb->protocol = eth_type_trans(skb, ndev);
+			napi_gro_receive(&priv->napi, skb);
+			stats->rx_packets++;
+			stats->rx_bytes += len;
+			rx++;
+		}
+
+		buf = netdev_alloc_frag(priv->rx_buf_size);
+		if (!buf)
+			goto done;
+		phys = dma_map_single(&ndev->dev, buf,
+				      RX_BUF_SIZE, DMA_FROM_DEVICE);
+		if (dma_mapping_error(&ndev->dev, phys))
+			goto done;
+		priv->rx_buf[priv->rx_head] = buf;
+		priv->rx_phys[priv->rx_head] = phys;
+		hip04_set_recv_desc(priv, phys);
+
+		priv->rx_head = RX_NEXT(priv->rx_head);
+		if (rx >= budget)
+			goto done;
+
+		if (--cnt == 0)
+			cnt = hip04_recv_cnt(priv);
+	}
+
+	if (!(priv->reg_inten & RCV_INT)) {
+		/* enable rx interrupt */
+		priv->reg_inten |= RCV_INT;
+		writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN);
+	}
+	napi_complete(napi);
+done:
+	/* clean up tx descriptors and start a new timer if necessary */
+	tx_remaining = hip04_tx_reclaim(ndev, false);
+	if (rx < budget && tx_remaining)
+		hrtimer_start_expires(&priv->tx_coalesce_timer, HRTIMER_MODE_REL);
+
+	return rx;
+}
+
+static irqreturn_t hip04_mac_interrupt(int irq, void *dev_id)
+{
+	struct net_device *ndev = (struct net_device *)dev_id;
+	struct hip04_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	u32 ists = readl_relaxed(priv->base + PPE_INTSTS);
+
+	if (!ists)
+		return IRQ_NONE;
+
+	writel_relaxed(DEF_INT_MASK, priv->base + PPE_RINT);
+
+	if (unlikely(ists & DEF_INT_ERR)) {
+		if (ists & (RCV_NOBUF | RCV_DROP))
+			stats->rx_errors++;
+			stats->rx_dropped++;
+			netdev_err(ndev, "rx drop\n");
+		if (ists & TX_DROP) {
+			stats->tx_dropped++;
+			netdev_err(ndev, "tx drop\n");
+		}
+	}
+
+	if (ists & RCV_INT && napi_schedule_prep(&priv->napi)) {
+		/* disable rx interrupt */
+		priv->reg_inten &= ~(RCV_INT);
+		writel_relaxed(DEF_INT_MASK & ~RCV_INT, priv->base + PPE_INTEN);
+		hrtimer_cancel(&priv->tx_coalesce_timer);
+		__napi_schedule(&priv->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
+enum hrtimer_restart tx_done(struct hrtimer *hrtimer)
+{
+	struct hip04_priv *priv;
+	priv = container_of(hrtimer, struct hip04_priv, tx_coalesce_timer);
+
+	if (napi_schedule_prep(&priv->napi)) {
+		/* disable rx interrupt */
+		priv->reg_inten &= ~(RCV_INT);
+		writel_relaxed(DEF_INT_MASK & ~RCV_INT, priv->base + PPE_INTEN);
+		__napi_schedule(&priv->napi);
+	}
+
+	return HRTIMER_NORESTART;
+}
+
+static void hip04_adjust_link(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	struct phy_device *phy = priv->phy;
+
+	if ((priv->speed != phy->speed) || (priv->duplex != phy->duplex)) {
+		hip04_config_port(ndev, phy->speed, phy->duplex);
+		phy_print_status(phy);
+	}
+}
+
+static int hip04_mac_open(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	int i;
+
+	priv->rx_head = 0;
+	priv->tx_head = 0;
+	priv->tx_tail = 0;
+	hip04_reset_ppe(priv);
+
+	for (i = 0; i < RX_DESC_NUM; i++) {
+		dma_addr_t phys;
+
+		phys = dma_map_single(&ndev->dev, priv->rx_buf[i],
+				      RX_BUF_SIZE, DMA_FROM_DEVICE);
+		if (dma_mapping_error(&ndev->dev, phys))
+			return -EIO;
+
+		priv->rx_phys[i] = phys;
+		hip04_set_recv_desc(priv, phys);
+	}
+
+	if (priv->phy)
+		phy_start(priv->phy);
+
+	netdev_reset_queue(ndev);
+	netif_start_queue(ndev);
+	hip04_mac_enable(ndev);
+	napi_enable(&priv->napi);
+
+	return 0;
+}
+
+static int hip04_mac_stop(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	int i;
+
+	napi_disable(&priv->napi);
+	netif_stop_queue(ndev);
+	hip04_mac_disable(ndev);
+	hip04_tx_reclaim(ndev, true);
+	hip04_reset_ppe(priv);
+
+	if (priv->phy)
+		phy_stop(priv->phy);
+
+	for (i = 0; i < RX_DESC_NUM; i++) {
+		if (priv->rx_phys[i]) {
+			dma_unmap_single(&ndev->dev, priv->rx_phys[i],
+					 RX_BUF_SIZE, DMA_FROM_DEVICE);
+			priv->rx_phys[i] = 0;
+		}
+	}
+
+	return 0;
+}
+
+static void hip04_timeout(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+
+	schedule_work(&priv->tx_timeout_task);
+}
+
+static void hip04_tx_timeout_task(struct work_struct *work)
+{
+	struct hip04_priv *priv;
+
+	priv = container_of(work, struct hip04_priv, tx_timeout_task);
+	hip04_mac_stop(priv->ndev);
+	hip04_mac_open(priv->ndev);
+}
+
+static struct net_device_stats *hip04_get_stats(struct net_device *ndev)
+{
+	return &ndev->stats;
+}
+
+static int hip04_get_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *ec)
+{
+	struct hip04_priv *priv = netdev_priv(netdev);
+
+	ec->tx_coalesce_usecs = priv->tx_coalesce_usecs;
+	ec->tx_max_coalesced_frames = priv->tx_coalesce_frames;
+
+	return 0;
+}
+
+static int hip04_set_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *ec)
+{
+	struct hip04_priv *priv = netdev_priv(netdev);
+
+	/* Check not supported parameters  */
+	if ((ec->rx_max_coalesced_frames) || (ec->rx_coalesce_usecs_irq) ||
+	    (ec->rx_max_coalesced_frames_irq) || (ec->tx_coalesce_usecs_irq) ||
+	    (ec->use_adaptive_rx_coalesce) || (ec->use_adaptive_tx_coalesce) ||
+	    (ec->pkt_rate_low) || (ec->rx_coalesce_usecs_low) ||
+	    (ec->rx_max_coalesced_frames_low) || (ec->tx_coalesce_usecs_high) ||
+	    (ec->tx_max_coalesced_frames_low) || (ec->pkt_rate_high) ||
+	    (ec->tx_coalesce_usecs_low) || (ec->rx_coalesce_usecs_high) ||
+	    (ec->rx_max_coalesced_frames_high) || (ec->rx_coalesce_usecs) ||
+	    (ec->tx_max_coalesced_frames_irq) ||
+	    (ec->stats_block_coalesce_usecs) ||
+	    (ec->tx_max_coalesced_frames_high) || (ec->rate_sample_interval))
+		return -EOPNOTSUPP;
+
+	if ((ec->tx_coalesce_usecs > HIP04_MAX_TX_COALESCE_USECS ||
+	     ec->tx_coalesce_usecs < HIP04_MIN_TX_COALESCE_USECS) ||
+	    (ec->tx_max_coalesced_frames > HIP04_MAX_TX_COALESCE_FRAMES ||
+	     ec->tx_max_coalesced_frames < HIP04_MIN_TX_COALESCE_FRAMES))
+		return -EINVAL;
+
+	priv->tx_coalesce_usecs = ec->tx_coalesce_usecs;
+	priv->tx_coalesce_frames = ec->tx_max_coalesced_frames;
+
+	return 0;
+}
+
+static void hip04_get_drvinfo(struct net_device *netdev,
+			      struct ethtool_drvinfo *drvinfo)
+{
+	strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
+}
+
+static struct ethtool_ops hip04_ethtool_ops = {
+	.get_coalesce		= hip04_get_coalesce,
+	.set_coalesce		= hip04_set_coalesce,
+	.get_drvinfo		= hip04_get_drvinfo,
+};
+
+static struct net_device_ops hip04_netdev_ops = {
+	.ndo_open		= hip04_mac_open,
+	.ndo_stop		= hip04_mac_stop,
+	.ndo_get_stats		= hip04_get_stats,
+	.ndo_start_xmit		= hip04_mac_start_xmit,
+	.ndo_set_mac_address	= hip04_set_mac_address,
+	.ndo_tx_timeout         = hip04_timeout,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_change_mtu		= eth_change_mtu,
+};
+
+static int hip04_alloc_ring(struct net_device *ndev, struct device *d)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	int i;
+
+	priv->tx_desc = dma_alloc_coherent(d,
+			TX_DESC_NUM * sizeof(struct tx_desc),
+			&priv->tx_desc_dma, GFP_KERNEL);
+	if (!priv->tx_desc)
+		return -ENOMEM;
+
+	priv->rx_buf_size = RX_BUF_SIZE +
+			    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	for (i = 0; i < RX_DESC_NUM; i++) {
+		priv->rx_buf[i] = netdev_alloc_frag(priv->rx_buf_size);
+		if (!priv->rx_buf[i])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void hip04_free_ring(struct net_device *ndev, struct device *d)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	int i;
+
+	for (i = 0; i < RX_DESC_NUM; i++)
+		if (priv->rx_buf[i])
+			put_page(virt_to_head_page(priv->rx_buf[i]));
+
+	for (i = 0; i < TX_DESC_NUM; i++)
+		if (priv->tx_skb[i])
+			dev_kfree_skb_any(priv->tx_skb[i]);
+
+	dma_free_coherent(d, TX_DESC_NUM * sizeof(struct tx_desc),
+			  priv->tx_desc, priv->tx_desc_dma);
+}
+
+static int hip04_mac_probe(struct platform_device *pdev)
+{
+	struct device *d = &pdev->dev;
+	struct device_node *node = d->of_node;
+	struct of_phandle_args arg;
+	struct net_device *ndev;
+	struct hip04_priv *priv;
+	struct resource *res;
+	unsigned int irq;
+	ktime_t txtime;
+	int ret;
+
+	ndev = alloc_etherdev(sizeof(struct hip04_priv));
+	if (!ndev)
+		return -ENOMEM;
+
+	priv = netdev_priv(ndev);
+	priv->ndev = ndev;
+	platform_set_drvdata(pdev, ndev);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(d, res);
+	if (IS_ERR(priv->base)) {
+		ret = PTR_ERR(priv->base);
+		goto init_fail;
+	}
+
+	ret = of_parse_phandle_with_fixed_args(node, "port-handle", 2, 0, &arg);
+	if (ret < 0) {
+		dev_warn(d, "no port-handle\n");
+		goto init_fail;
+	}
+
+	priv->port = arg.args[0];
+	priv->chan = arg.args[1] * RX_DESC_NUM;
+
+	hrtimer_init(&priv->tx_coalesce_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+	/*
+	 * BQL will try to keep the TX queue as short as possible, but it can't
+	 * be faster than tx_coalesce_usecs, so we need a fast timeout here,
+	 * but also long enough to gather up enough frames to ensure we don't
+	 * get more interrupts than necessary.
+	 * 200us is enough for 16 frames of 1500 bytes at gigabit ethernet rate
+	 */
+	priv->tx_coalesce_frames = TX_DESC_NUM * 3 / 4;
+	priv->tx_coalesce_usecs = 200;
+	/* allow timer to fire after half the time at the earliest */
+	txtime = ktime_set(0, priv->tx_coalesce_usecs * NSEC_PER_USEC / 2);
+	hrtimer_set_expires_range(&priv->tx_coalesce_timer, txtime, txtime);
+	priv->tx_coalesce_timer.function = tx_done;
+
+	priv->map = syscon_node_to_regmap(arg.np);
+	if (IS_ERR(priv->map)) {
+		dev_warn(d, "no syscon hisilicon,hip04-ppe\n");
+		ret = PTR_ERR(priv->map);
+		goto init_fail;
+	}
+
+	priv->phy_mode = of_get_phy_mode(node);
+	if (priv->phy_mode < 0) {
+		dev_warn(d, "not find phy-mode\n");
+		ret = -EINVAL;
+		goto init_fail;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		ret = -EINVAL;
+		goto init_fail;
+	}
+
+	ret = devm_request_irq(d, irq, hip04_mac_interrupt,
+			       0, pdev->name, ndev);
+	if (ret) {
+		netdev_err(ndev, "devm_request_irq failed\n");
+		goto init_fail;
+	}
+
+	priv->phy_node = of_parse_phandle(node, "phy-handle", 0);
+	if (priv->phy_node) {
+		priv->phy = of_phy_connect(ndev, priv->phy_node,
+			&hip04_adjust_link, 0, priv->phy_mode);
+		if (!priv->phy) {
+			ret = -EPROBE_DEFER;
+			goto init_fail;
+		}
+	}
+
+	INIT_WORK(&priv->tx_timeout_task, hip04_tx_timeout_task);
+
+	ether_setup(ndev);
+	ndev->netdev_ops = &hip04_netdev_ops;
+	ndev->ethtool_ops = &hip04_ethtool_ops;
+	ndev->watchdog_timeo = TX_TIMEOUT;
+	ndev->priv_flags |= IFF_UNICAST_FLT;
+	ndev->irq = irq;
+	netif_napi_add(ndev, &priv->napi, hip04_rx_poll, NAPI_POLL_WEIGHT);
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+
+	hip04_reset_ppe(priv);
+	if (priv->phy_mode == PHY_INTERFACE_MODE_MII)
+		hip04_config_port(ndev, SPEED_100, DUPLEX_FULL);
+
+	hip04_config_fifo(priv);
+	random_ether_addr(ndev->dev_addr);
+	hip04_update_mac_address(ndev);
+
+	ret = hip04_alloc_ring(ndev, d);
+	if (ret) {
+		netdev_err(ndev, "alloc ring fail\n");
+		goto alloc_fail;
+	}
+
+	ret = register_netdev(ndev);
+	if (ret) {
+		free_netdev(ndev);
+		goto alloc_fail;
+	}
+
+	return 0;
+
+alloc_fail:
+	hip04_free_ring(ndev, d);
+init_fail:
+	of_node_put(priv->phy_node);
+	free_netdev(ndev);
+	return ret;
+}
+
+static int hip04_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct hip04_priv *priv = netdev_priv(ndev);
+	struct device *d = &pdev->dev;
+
+	if (priv->phy)
+		phy_disconnect(priv->phy);
+
+	hip04_free_ring(ndev, d);
+	unregister_netdev(ndev);
+	free_irq(ndev->irq, ndev);
+	of_node_put(priv->phy_node);
+	cancel_work_sync(&priv->tx_timeout_task);
+	free_netdev(ndev);
+
+	return 0;
+}
+
+static const struct of_device_id hip04_mac_match[] = {
+	{ .compatible = "hisilicon,hip04-mac" },
+	{ }
+};
+
+static struct platform_driver hip04_mac_driver = {
+	.probe	= hip04_mac_probe,
+	.remove	= hip04_remove,
+	.driver	= {
+		.name		= DRV_NAME,
+		.owner		= THIS_MODULE,
+		.of_match_table	= hip04_mac_match,
+	},
+};
+module_platform_driver(hip04_mac_driver);
+
+MODULE_DESCRIPTION("HISILICON P04 Ethernet driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:hip04-ether");
-- 
1.8.0

^ permalink raw reply related

* [PATCH net-next v11 2/3] net: hisilicon: new hip04 MDIO driver
From: Ding Tianhong @ 2015-01-12  8:03 UTC (permalink / raw)
  To: arnd, robh+dt, davem, grant.likely
  Cc: sergei.shtylyov, linux-arm-kernel, eric.dumazet, xuwei5,
	zhangfei.gao, netdev, devicetree, linux
In-Reply-To: <1421049832-6224-1-git-send-email-dingtianhong@huawei.com>

From: Zhangfei Gao <zhangfei.gao@linaro.org>

Hisilicon hip04 platform mdio driver
Reuse Marvell phy drivers/net/phy/marvell.c

Signed-off-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
 drivers/net/ethernet/hisilicon/Kconfig      |   9 ++
 drivers/net/ethernet/hisilicon/Makefile     |   1 +
 drivers/net/ethernet/hisilicon/hip04_mdio.c | 186 ++++++++++++++++++++++++++++
 3 files changed, 196 insertions(+)
 create mode 100644 drivers/net/ethernet/hisilicon/hip04_mdio.c

diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index e942173..a54d897 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -24,4 +24,13 @@ config HIX5HD2_GMAC
 	help
 	  This selects the hix5hd2 mac family network device.
 
+config HIP04_ETH
+	tristate "HISILICON P04 Ethernet support"
+	select PHYLIB
+	select MARVELL_PHY
+	select MFD_SYSCON
+	---help---
+	  If you wish to compile a kernel for a hardware with hisilicon p04 SoC and
+	  want to use the internal ethernet then you should answer Y to this.
+
 endif # NET_VENDOR_HISILICON
diff --git a/drivers/net/ethernet/hisilicon/Makefile b/drivers/net/ethernet/hisilicon/Makefile
index 9175e846..40115a7 100644
--- a/drivers/net/ethernet/hisilicon/Makefile
+++ b/drivers/net/ethernet/hisilicon/Makefile
@@ -3,3 +3,4 @@
 #
 
 obj-$(CONFIG_HIX5HD2_GMAC) += hix5hd2_gmac.o
+obj-$(CONFIG_HIP04_ETH) += hip04_mdio.o
diff --git a/drivers/net/ethernet/hisilicon/hip04_mdio.c b/drivers/net/ethernet/hisilicon/hip04_mdio.c
new file mode 100644
index 0000000..b3bac25
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hip04_mdio.c
@@ -0,0 +1,186 @@
+/* Copyright (c) 2014 Linaro Ltd.
+ * Copyright (c) 2014 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/of_mdio.h>
+#include <linux/delay.h>
+
+#define MDIO_CMD_REG		0x0
+#define MDIO_ADDR_REG		0x4
+#define MDIO_WDATA_REG		0x8
+#define MDIO_RDATA_REG		0xc
+#define MDIO_STA_REG		0x10
+
+#define MDIO_START		BIT(14)
+#define MDIO_R_VALID		BIT(1)
+#define MDIO_READ	        (BIT(12) | BIT(11) | MDIO_START)
+#define MDIO_WRITE	        (BIT(12) | BIT(10) | MDIO_START)
+
+struct hip04_mdio_priv {
+	void __iomem *base;
+};
+
+#define WAIT_TIMEOUT 10
+static int hip04_mdio_wait_ready(struct mii_bus *bus)
+{
+	struct hip04_mdio_priv *priv = bus->priv;
+	int i;
+
+	for (i = 0; readl_relaxed(priv->base + MDIO_CMD_REG) & MDIO_START; i++) {
+		if (i == WAIT_TIMEOUT)
+			return -ETIMEDOUT;
+		msleep(20);
+	}
+
+	return 0;
+}
+
+static int hip04_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
+{
+	struct hip04_mdio_priv *priv = bus->priv;
+	u32 val;
+	int ret;
+
+	ret = hip04_mdio_wait_ready(bus);
+	if (ret < 0)
+		goto out;
+
+	val = regnum | (mii_id << 5) | MDIO_READ;
+	writel_relaxed(val, priv->base + MDIO_CMD_REG);
+
+	ret = hip04_mdio_wait_ready(bus);
+	if (ret < 0)
+		goto out;
+
+	val = readl_relaxed(priv->base + MDIO_STA_REG);
+	if (val & MDIO_R_VALID) {
+		dev_err(bus->parent, "SMI bus read not valid\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	val = readl_relaxed(priv->base + MDIO_RDATA_REG);
+	ret = val & 0xFFFF;
+out:
+	return ret;
+}
+
+static int hip04_mdio_write(struct mii_bus *bus, int mii_id,
+			    int regnum, u16 value)
+{
+	struct hip04_mdio_priv *priv = bus->priv;
+	u32 val;
+	int ret;
+
+	ret = hip04_mdio_wait_ready(bus);
+	if (ret < 0)
+		goto out;
+
+	writel_relaxed(value, priv->base + MDIO_WDATA_REG);
+	val = regnum | (mii_id << 5) | MDIO_WRITE;
+	writel_relaxed(val, priv->base + MDIO_CMD_REG);
+out:
+	return ret;
+}
+
+static int hip04_mdio_reset(struct mii_bus *bus)
+{
+	int temp, i;
+
+	for (i = 0; i < PHY_MAX_ADDR; i++) {
+		hip04_mdio_write(bus, i, 22, 0);
+		temp = hip04_mdio_read(bus, i, MII_BMCR);
+		if (temp < 0)
+			continue;
+
+		temp |= BMCR_RESET;
+		if (hip04_mdio_write(bus, i, MII_BMCR, temp) < 0)
+			continue;
+	}
+
+	mdelay(500);
+	return 0;
+}
+
+static int hip04_mdio_probe(struct platform_device *pdev)
+{
+	struct resource *r;
+	struct mii_bus *bus;
+	struct hip04_mdio_priv *priv;
+	int ret;
+
+	bus = mdiobus_alloc_size(sizeof(struct hip04_mdio_priv));
+	if (!bus) {
+		dev_err(&pdev->dev, "Cannot allocate MDIO bus\n");
+		return -ENOMEM;
+	}
+
+	bus->name = "hip04_mdio_bus";
+	bus->read = hip04_mdio_read;
+	bus->write = hip04_mdio_write;
+	bus->reset = hip04_mdio_reset;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(&pdev->dev));
+	bus->parent = &pdev->dev;
+	priv = bus->priv;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(priv->base)) {
+		ret = PTR_ERR(priv->base);
+		goto out_mdio;
+	}
+
+	ret = of_mdiobus_register(bus, pdev->dev.of_node);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Cannot register MDIO bus (%d)\n", ret);
+		goto out_mdio;
+	}
+
+	platform_set_drvdata(pdev, bus);
+
+	return 0;
+
+out_mdio:
+	mdiobus_free(bus);
+	return ret;
+}
+
+static int hip04_mdio_remove(struct platform_device *pdev)
+{
+	struct mii_bus *bus = platform_get_drvdata(pdev);
+
+	mdiobus_unregister(bus);
+	mdiobus_free(bus);
+
+	return 0;
+}
+
+static const struct of_device_id hip04_mdio_match[] = {
+	{ .compatible = "hisilicon,hip04-mdio" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, hip04_mdio_match);
+
+static struct platform_driver hip04_mdio_driver = {
+	.probe = hip04_mdio_probe,
+	.remove = hip04_mdio_remove,
+	.driver = {
+		.name = "hip04-mdio",
+		.owner = THIS_MODULE,
+		.of_match_table = hip04_mdio_match,
+	},
+};
+
+module_platform_driver(hip04_mdio_driver);
+
+MODULE_DESCRIPTION("HISILICON P04 MDIO interface driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:hip04-mdio");
-- 
1.8.0

^ permalink raw reply related

* Re: [PATCH 3/3] x_tables: Factor out 16bit aligment ifname_compare()
From: Richard Weinberger @ 2015-01-12  8:18 UTC (permalink / raw)
  To: David Miller
  Cc: joe, coreteam, netfilter-devel, linux-kernel, netdev, bhutchings,
	john.fastabend, herbert, vyasevic, jiri, vfalico, therbert,
	edumazet, yoshfuji, jmorris, kuznet, kadlec, kaber, pablo, kay,
	stephen
In-Reply-To: <20150111.215050.458358747958425038.davem@davemloft.net>

Am 12.01.2015 um 03:50 schrieb David Miller:
> From: Richard Weinberger <richard@nod.at>
> Date: Sun, 11 Jan 2015 22:42:37 +0100
> 
>> Joe, I really don't care. This is the least significant
>> patch of the series.
>> I'll no longer waste my time with that.
> 
> If you're not willing to fix stylistic issues now, then nobody should
> bother wasting their time on the high level issues of your patch.
> 
> Just fix these things now rather than being difficult, this is a part
> of patch review that everyone has to do, not just you.

I apologize, it was not my intention to be difficult.
Please note that the stylistic issue is not a warning produced
by checkpatch.pl. If you and netfilter folks now prefer bool
for such string compare functions I'll happily address this in
v2 of my series.

Thanks,
//richard

^ permalink raw reply

* Re: [PATCH net-next RFC 5/5] net-timestamp: tx timestamping default mode flag
From: Richard Cochran @ 2015-01-12  8:26 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Network Development, David Miller, Eric Dumazet, Andy Lutomirski
In-Reply-To: <CA+FuTSfDVbSCGfJDjY3kv=SuGT221OuYAFVxDvRqouNcrJbvYw@mail.gmail.com>

On Sun, Jan 11, 2015 at 08:49:00PM -0500, Willem de Bruijn wrote:
> Just so I understand: ptp has no use for the sw tstamps
> that would be generated with this flag, but is otherwise
> okay with enabling counters to order tx timestamps
> (OPT_ID) and disabling payload (OPT_TSONLY)?

Yes.

> In the documentation, I would like to strongly suggest all
> processes to enable these, even in absence of this default.
> because that is more robust wrt the sysctl (if merged).

Sounds good.

Thanks,
Richard

^ permalink raw reply

* [PATCH 1/5] net: atarilance: Remove obsolete IRQ_TYPE_PRIO
From: Geert Uytterhoeven @ 2015-01-12  8:40 UTC (permalink / raw)
  To: David S. Miller, Jean-Christophe Plagniol-Villard, Tomi Valkeinen,
	Jaroslav Kysela, Takashi Iwai
  Cc: linux-m68k, linux-kernel, Geert Uytterhoeven, netdev
In-Reply-To: <1421052021-12560-1-git-send-email-geert@linux-m68k.org>

IRQ_TYPE_PRIO is no longer used by the Atari platform interrupt code
since commit 734085651c9b80aa ("[PATCH] m68k: convert atari irq code")
in v2.6.18-rc1, so drop it.

Note that its value has been reused for a different purpose
(IRQ_TYPE_EDGE_FALLING) since commit 6a6de9ef5850d063 ("[PATCH] genirq:
core") in v2.6.18-rc1.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: netdev@vger.kernel.org
---
 drivers/net/ethernet/amd/atarilance.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c
index e07ce5ff2d48bf93..b10964e8cb5469ce 100644
--- a/drivers/net/ethernet/amd/atarilance.c
+++ b/drivers/net/ethernet/amd/atarilance.c
@@ -553,8 +553,8 @@ static unsigned long __init lance_probe1( struct net_device *dev,
 	if (lp->cardtype == PAM_CARD ||
 		memaddr == (unsigned short *)0xffe00000) {
 		/* PAMs card and Riebl on ST use level 5 autovector */
-		if (request_irq(IRQ_AUTO_5, lance_interrupt, IRQ_TYPE_PRIO,
-		            "PAM,Riebl-ST Ethernet", dev)) {
+		if (request_irq(IRQ_AUTO_5, lance_interrupt, 0,
+				"PAM,Riebl-ST Ethernet", dev)) {
 			printk( "Lance: request for irq %d failed\n", IRQ_AUTO_5 );
 			return 0;
 		}
@@ -567,8 +567,8 @@ static unsigned long __init lance_probe1( struct net_device *dev,
 			printk( "Lance: request for VME interrupt failed\n" );
 			return 0;
 		}
-		if (request_irq(irq, lance_interrupt, IRQ_TYPE_PRIO,
-		            "Riebl-VME Ethernet", dev)) {
+		if (request_irq(irq, lance_interrupt, 0, "Riebl-VME Ethernet",
+				dev)) {
 			printk( "Lance: request for irq %u failed\n", irq );
 			return 0;
 		}
-- 
1.9.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox