* [PATCH net-next v2 3/3] rhashtable: add a note for grow and shrink decision functions
From: Ying Xue @ 2015-01-12 6:52 UTC (permalink / raw)
To: tgraf; +Cc: davem, netdev
In-Reply-To: <1421045544-13670-1-git-send-email-ying.xue@windriver.com>
As commit c0c09bfdc415 ("rhashtable: avoid unnecessary wakeup for
worker queue") moves condition statements of verifying whether hash
table size exceeds its maximum threshold or reaches its minimum
threshold from resizing functions to resizing decision functions,
we should add a note in rhashtable.h to indicate the implementation
of what the grow and shrink decision function must enforce min/max
shift, otherwise, it's failed to take min/max shift's set watermarks
into effect.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Cc: Thomas Graf <tgraf@suug.ch>
Acked-by: Thomas Graf <tgraf@suug.ch>
---
include/linux/rhashtable.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 7b9bd77..9570832 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -79,6 +79,10 @@ struct rhashtable;
* @obj_hashfn: Function to hash object
* @grow_decision: If defined, may return true if table should expand
* @shrink_decision: If defined, may return true if table should shrink
+ *
+ * Note: when implementing the grow and shrink decision function, min/max
+ * shift must be enforced, otherwise, resizing watermarks they set may be
+ * useless.
*/
struct rhashtable_params {
size_t nelem_hint;
--
1.7.9.5
^ permalink raw reply related
* [PATCH net-next v2 2/3] netlink: eliminate nl_sk_hash_lock
From: Ying Xue @ 2015-01-12 6:52 UTC (permalink / raw)
To: tgraf; +Cc: davem, netdev
In-Reply-To: <1421045544-13670-1-git-send-email-ying.xue@windriver.com>
As rhashtable_lookup_compare_insert() can guarantee the process
of search and insertion is atomic, it's safe to eliminate the
nl_sk_hash_lock. After this, object insertion or removal will
be protected with per bucket lock on write side while object
lookup is guarded with rcu read lock on read side.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Cc: Thomas Graf <tgraf@suug.ch>
---
net/netlink/af_netlink.c | 33 ++++++++++++++++++++-------------
net/netlink/af_netlink.h | 1 -
net/netlink/diag.c | 10 +++++-----
3 files changed, 25 insertions(+), 19 deletions(-)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 298e1df..01b702d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -98,7 +98,7 @@ static void netlink_skb_destructor(struct sk_buff *skb);
/* nl_table locking explained:
* Lookup and traversal are protected with an RCU read-side lock. Insertion
- * and removal are protected with nl_sk_hash_lock while using RCU list
+ * and removal are protected with per bucket lock while using RCU list
* modification primitives and may run in parallel to RCU protected lookups.
* Destruction of the Netlink socket may only occur *after* nl_table_lock has
* been acquired * either during or after the socket has been removed from
@@ -110,10 +110,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
-/* Protects netlink socket hash table mutations */
-DEFINE_MUTEX(nl_sk_hash_lock);
-EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
-
static ATOMIC_NOTIFIER_HEAD(netlink_chain);
static DEFINE_SPINLOCK(netlink_tap_lock);
@@ -998,6 +994,19 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
&netlink_compare, &arg);
}
+static bool __netlink_insert(struct netlink_table *table, struct sock *sk,
+ struct net *net)
+{
+ struct netlink_compare_arg arg = {
+ .net = net,
+ .portid = nlk_sk(sk)->portid,
+ };
+
+ return rhashtable_lookup_compare_insert(&table->hash,
+ &nlk_sk(sk)->node,
+ &netlink_compare, &arg);
+}
+
static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
{
struct netlink_table *table = &nl_table[protocol];
@@ -1043,9 +1052,7 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
struct netlink_table *table = &nl_table[sk->sk_protocol];
int err = -EADDRINUSE;
- mutex_lock(&nl_sk_hash_lock);
- if (__netlink_lookup(table, portid, net))
- goto err;
+ lock_sock(sk);
err = -EBUSY;
if (nlk_sk(sk)->portid)
@@ -1058,10 +1065,12 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
nlk_sk(sk)->portid = portid;
sock_hold(sk);
- rhashtable_insert(&table->hash, &nlk_sk(sk)->node);
- err = 0;
+ if (__netlink_insert(table, sk, net))
+ err = 0;
+ else
+ sock_put(sk);
err:
- mutex_unlock(&nl_sk_hash_lock);
+ release_sock(sk);
return err;
}
@@ -1069,13 +1078,11 @@ static void netlink_remove(struct sock *sk)
{
struct netlink_table *table;
- mutex_lock(&nl_sk_hash_lock);
table = &nl_table[sk->sk_protocol];
if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) {
WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
__sock_put(sk);
}
- mutex_unlock(&nl_sk_hash_lock);
netlink_table_grab();
if (nlk_sk(sk)->subscriptions) {
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index fd96fa7..7518375 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -74,6 +74,5 @@ struct netlink_table {
extern struct netlink_table *nl_table;
extern rwlock_t nl_table_lock;
-extern struct mutex nl_sk_hash_lock;
#endif
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index fcca36d..bb59a7e 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -103,7 +103,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
{
struct netlink_table *tbl = &nl_table[protocol];
struct rhashtable *ht = &tbl->hash;
- const struct bucket_table *htbl = rht_dereference(ht->tbl, ht);
+ const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht);
struct net *net = sock_net(skb->sk);
struct netlink_diag_req *req;
struct netlink_sock *nlsk;
@@ -115,7 +115,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
for (i = 0; i < htbl->size; i++) {
struct rhash_head *pos;
- rht_for_each_entry(nlsk, pos, htbl, i, node) {
+ rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) {
sk = (struct sock *)nlsk;
if (!net_eq(sock_net(sk), net))
@@ -172,7 +172,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
req = nlmsg_data(cb->nlh);
- mutex_lock(&nl_sk_hash_lock);
+ rcu_read_lock();
read_lock(&nl_table_lock);
if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
@@ -186,7 +186,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
} else {
if (req->sdiag_protocol >= MAX_LINKS) {
read_unlock(&nl_table_lock);
- mutex_unlock(&nl_sk_hash_lock);
+ rcu_read_unlock();
return -ENOENT;
}
@@ -194,7 +194,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
}
read_unlock(&nl_table_lock);
- mutex_unlock(&nl_sk_hash_lock);
+ rcu_read_unlock();
return skb->len;
}
--
1.7.9.5
^ permalink raw reply related
* RE: [PATCH v4] can: Convert to runtime_pm
From: Appana Durga Kedareswara Rao @ 2015-01-12 6:59 UTC (permalink / raw)
To: Marc Kleine-Budde
Cc: linux-can@vger.kernel.org, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, Soren Brinkmann,
grant.likely@linaro.org, wg@grandegger.com, Michal Simek
In-Reply-To: <54B299A0.1000504@pengutronix.de>
Hi Marc,
> -----Original Message-----
> From: Marc Kleine-Budde [mailto:mkl@pengutronix.de]
> Sent: Sunday, January 11, 2015 9:11 PM
> To: Appana Durga Kedareswara Rao
> Cc: linux-can@vger.kernel.org; netdev@vger.kernel.org; linux-
> kernel@vger.kernel.org; Soren Brinkmann; grant.likely@linaro.org;
> wg@grandegger.com
> Subject: Re: [PATCH v4] can: Convert to runtime_pm
>
> On 01/11/2015 06:34 AM, Appana Durga Kedareswara Rao wrote:
> [...]
> >>> return ret;
> >>> }
> >>>
> >>> priv->write_reg(priv, XCAN_MSR_OFFSET, 0);
> >>> priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_CEN_MASK);
> >>>
> >>> if (netif_running(ndev)) {
> >>> priv->can.state = CAN_STATE_ERROR_ACTIVE;
> >>
> >> What happens if the device was not in ACTIVE state prior to the
> >> runtime_suspend?
> >>
> >
> > I am not sure about the state of CAN at this point of time.
> > I just followed what other drivers are following for run time suspend :).
>
> Please check the state of the hardware if you go with bus off into suspend
> and then resume.
>
if (netif_running(ndev)) {
if (isr & XCAN_IXR_BSOFF_MASK) {
priv->can.state = CAN_STATE_BUS_OFF;
priv->write_reg(priv, XCAN_SRR_OFFSET,
XCAN_SRR_RESET_MASK);
} else if ((status & XCAN_SR_ESTAT_MASK) ==
XCAN_SR_ESTAT_MASK) {
priv->can.state = CAN_STATE_ERROR_PASSIVE;
} else if (status & XCAN_SR_ERRWRN_MASK) {
priv->can.state = CAN_STATE_ERROR_WARNING;
} else {
priv->can.state = CAN_STATE_ERROR_ACTIVE;
}
}
Is the above code snippet ok for you?
> >>> netif_device_attach(ndev);
> >>> netif_start_queue(ndev);
> >>> }
> >>>
> >>> return 0;
> >>> }
> >>
> >>
> >>>
> >>> @@ -1020,9 +1035,9 @@ static int __maybe_unused xcan_resume(struct
> >>> device *dev)
> >>>
> >>> priv->write_reg(priv, XCAN_MSR_OFFSET, 0);
> >>> priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_CEN_MASK);
> >>> - priv->can.state = CAN_STATE_ERROR_ACTIVE;
> >>>
> >>> if (netif_running(ndev)) {
> >>> + priv->can.state = CAN_STATE_ERROR_ACTIVE;
> >>> netif_device_attach(ndev);
> >>> netif_start_queue(ndev);
> >>> }
> >>> @@ -1030,7 +1045,10 @@ static int __maybe_unused
> xcan_resume(struct
> >> device *dev)
> >>> return 0;
> >>> }
> >>>
> >>> -static SIMPLE_DEV_PM_OPS(xcan_dev_pm_ops, xcan_suspend,
> >> xcan_resume);
> >>> +static const struct dev_pm_ops xcan_dev_pm_ops = {
> >>> + SET_SYSTEM_SLEEP_PM_OPS(xcan_suspend, xcan_resume)
> >>> + SET_PM_RUNTIME_PM_OPS(xcan_runtime_suspend,
> >> xcan_runtime_resume,
> >>> +NULL) };
> >>>
> >>> /**
> >>> * xcan_probe - Platform registration call @@ -1071,7 +1089,7 @@
> >>> static int xcan_probe(struct platform_device *pdev)
> >>> return -ENOMEM;
> >>>
> >>> priv = netdev_priv(ndev);
> >>> - priv->dev = ndev;
> >>> + priv->dev = &pdev->dev;
> >>> priv->can.bittiming_const = &xcan_bittiming_const;
> >>> priv->can.do_set_mode = xcan_do_set_mode;
> >>> priv->can.do_get_berr_counter = xcan_get_berr_counter; @@ -
> >> 1137,15
> >>> +1155,22 @@ static int xcan_probe(struct platform_device *pdev)
> >>>
> >>> netif_napi_add(ndev, &priv->napi, xcan_rx_poll, rx_max);
> >>>
> >>> + pm_runtime_set_active(&pdev->dev);
> >>> + pm_runtime_irq_safe(&pdev->dev);
> >>> + pm_runtime_enable(&pdev->dev);
> >>> + pm_runtime_get_sync(&pdev->dev);
> >> Check error values?
> >
> > Ok
> >>> +
> >>> ret = register_candev(ndev);
> >>> if (ret) {
> >>> dev_err(&pdev->dev, "fail to register failed
> >>> (err=%d)\n",
> >> ret);
> >>> + pm_runtime_put(priv->dev);
> >>
> >> Please move the pm_runtime_put into the common error exit path.
> >>
> >
> > Ok
> >
> >>> goto err_unprepare_disable_busclk;
> >>> }
> >>>
> >>> devm_can_led_init(ndev);
> >>> - clk_disable_unprepare(priv->bus_clk);
> >>> - clk_disable_unprepare(priv->can_clk);
> >>> +
> >>> + pm_runtime_put(&pdev->dev);
> >>> +
> >>> netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo
> >> depth:%d\n",
> >>> priv->reg_base, ndev->irq, priv->can.clock.freq,
> >>> priv->tx_max);
> >>>
> >>
> >> I think you have to convert the _remove() function, too. Have a look
> >> at the gpio-zynq.c driver:
> >>
> >>> static int zynq_gpio_remove(struct platform_device *pdev) {
> >>> struct zynq_gpio *gpio = platform_get_drvdata(pdev);
> >>>
> >>> pm_runtime_get_sync(&pdev->dev);
> >>
> >> However I don't understand why the get_sync() is here. Maybe Sören
> >> can help?
> >
> > I converted the remove function to use the run-time PM and .
> > Below is the remove code snippet.
> >
> > ret = pm_runtime_get_sync(&pdev->dev);
> > if (ret < 0) {
> > netdev_err(ndev, "%s: pm_runtime_get failed(%d)\n",
> > __func__, ret);
> > return ret;
> > }
> >
> > if (set_reset_mode(ndev) < 0)
> > netdev_err(ndev, "mode resetting failed!\n");
> >
> > unregister_candev(ndev);
> > netif_napi_del(&priv->napi);
> > free_candev(ndev);
>
> > pm_runtime_disable(&pdev->dev);
>
> Can this make a call to xcan_runtime_*()? I'm asking since the ndev has
> been unregistered and already free()ed. Better move this directly after the
> set_reset_mode(). This way you are symmetric to the probe() function.
If I move the pm_runtime_disable after the set_reset_mode
I am getting the below error.
ERROR:
xilinx_can e0008000.can can0 (unregistering): xcan_get_berr_counter: pm_runtime_get fail
If I move the pm_runtime_disable after unregister_candev everything is working fine.
Regards,
Kedar.
>
> > Observed the below things in the /sys/kernel/debug/clk/clk_summary.
> >
> > Modprobe ifconfig up ifconfig down rmmod
> Modprobe ifconfig up ifconfig down rmmod Modprobe ifconfig up
> ifconfig down rmmod
> > Clk_prepare count: 1 1 1 1 2 2 2
> 2 3 3 3 3
> > Clk_enable count: 0 1 0 1 2 2 2
> 2 3 3 3 3
>
> As the numbers are increasing you have a clk_prepare_enable() leak. Your
> remove() function is missing the clk_disable_unprepare() for the can and
> bus clock (as you have clk_prepare_enable in probe()).
>
> Marc
>
> --
> Pengutronix e.K. | Marc Kleine-Budde |
> Industrial Linux Solutions | Phone: +49-231-2826-924 |
> Vertretung West/Dortmund | Fax: +49-5121-206917-5555 |
> Amtsgericht Hildesheim, HRA 2686 | http://www.pengutronix.de |
This email and any attachments are intended for the sole use of the named recipient(s) and contain(s) confidential information that may be proprietary, privileged or copyrighted under applicable law. If you are not the intended recipient, do not read, copy, or forward this email message or any attachments. Delete this email message and any attachments immediately.
^ permalink raw reply
* Re: [PATCH] team: Remove dead code
From: Jiri Pirko @ 2015-01-12 7:10 UTC (permalink / raw)
To: Kenneth Williams; +Cc: netdev
In-Reply-To: <20150112004351.GA8744@ubuntu>
Mon, Jan 12, 2015 at 01:43:54AM CET, ken@williamsclan.us wrote:
>The deleted lines are called from a function which is called:
>1) Only through __team_options_register via team_options_register and
>2) Only during initialization / mode initialization when there are no
>ports attached.
>Therefore the ports list is guarenteed to be empty and this code will
>never be executed.
>
>Signed-off-by: Kenneth Williams <ken@williamsclan.us>
Acked-by: Jiri Pirko <jiri@resnulli.us>
^ permalink raw reply
* [PATCH] gianfar: correct the bad expression while writing bit-pattern
From: Sanjeev Sharma @ 2015-01-12 7:43 UTC (permalink / raw)
To: davem
Cc: claudiu.manoil, peter.senna, shemminger, netdev, linux-kernel,
Sanjeev Sharma, Sanjeev Sharma
This patch correct the bad expression while writing the
bit-pattern from software's buffer to hardware registers.
Signed-off-by: Sanjeev Sharma <Sanjeev_Sharma@mentor.com>
---
drivers/net/ethernet/freescale/gianfar_ethtool.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index 3e1a9c1..1ccca72 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -1586,7 +1586,7 @@ static int gfar_write_filer_table(struct gfar_private *priv,
return -EBUSY;
/* Fill regular entries */
- for (; i < MAX_FILER_IDX - 1 && (tab->fe[i].ctrl | tab->fe[i].ctrl);
+ for (; i < MAX_FILER_IDX - 1 && ( i < tab->fe[i].ctrl);
i++)
gfar_write_filer(priv, i, tab->fe[i].ctrl, tab->fe[i].prop);
/* Fill the rest with fall-troughs */
--
1.7.11.7
^ permalink raw reply related
* [PATCH net-next v11 1/3] Documentation: add Device tree bindings for Hisilicon hip04 ethernet
From: Ding Tianhong @ 2015-01-12 8:03 UTC (permalink / raw)
To: arnd-r2nGTMty4D4, robh+dt-DgEjT+Ai2ygdnm+yROfE0A,
davem-fT/PcQaiUtIeIZ0/mPfg9Q, grant.likely-QSEj5FYQhm4dnm+yROfE0A
Cc: sergei.shtylyov-M4DtvfQ/ZS1MRgGoP+s0PdBPR1lH4CV8,
linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w,
xuwei5-C8/M+/jPZTeaMJb+Lgu22Q,
zhangfei.gao-QSEj5FYQhm4dnm+yROfE0A,
netdev-u79uwXL29TY76Z2rM5mHXA, devicetree-u79uwXL29TY76Z2rM5mHXA,
linux-lFZ/pmaqli7XmaaqVzeoHQ
In-Reply-To: <1421049832-6224-1-git-send-email-dingtianhong-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
From: Zhangfei Gao <zhangfei.gao-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
This patch adds the Device Tree bindings for the Hisilicon hip04
Ethernet controller, including 100M / 1000M controller.
Signed-off-by: Zhangfei Gao <zhangfei.gao-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
Signed-off-by: Ding Tianhong <dingtianhong-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
.../bindings/net/hisilicon-hip04-net.txt | 88 ++++++++++++++++++++++
1 file changed, 88 insertions(+)
create mode 100644 Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt
diff --git a/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt b/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt
new file mode 100644
index 0000000..988fc69
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt
@@ -0,0 +1,88 @@
+Hisilicon hip04 Ethernet Controller
+
+* Ethernet controller node
+
+Required properties:
+- compatible: should be "hisilicon,hip04-mac".
+- reg: address and length of the register set for the device.
+- interrupts: interrupt for the device.
+- port-handle: <phandle port channel>
+ phandle, specifies a reference to the syscon ppe node
+ port, port number connected to the controller
+ channel, recv channel start from channel * number (RX_DESC_NUM)
+- phy-mode: see ethernet.txt [1].
+
+Optional properties:
+- phy-handle: see ethernet.txt [1].
+
+[1] Documentation/devicetree/bindings/net/ethernet.txt
+
+
+* Ethernet ppe node:
+Control rx & tx fifos of all ethernet controllers.
+Have 2048 recv channels shared by all ethernet controllers, only if no overlap.
+Each controller's recv channel start from channel * number (RX_DESC_NUM).
+
+Required properties:
+- compatible: "hisilicon,hip04-ppe", "syscon".
+- reg: address and length of the register set for the device.
+
+
+* MDIO bus node:
+
+Required properties:
+
+- compatible: should be "hisilicon,hip04-mdio".
+- Inherits from MDIO bus node binding [2]
+[2] Documentation/devicetree/bindings/net/phy.txt
+
+Example:
+ mdio {
+ compatible = "hisilicon,hip04-mdio";
+ reg = <0x28f1000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy0: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <0>;
+ marvell,reg-init = <18 0x14 0 0x8001>;
+ };
+
+ phy1: ethernet-phy@1 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <1>;
+ marvell,reg-init = <18 0x14 0 0x8001>;
+ };
+ };
+
+ ppe: ppe@28c0000 {
+ compatible = "hisilicon,hip04-ppe", "syscon";
+ reg = <0x28c0000 0x10000>;
+ };
+
+ fe: ethernet@28b0000 {
+ compatible = "hisilicon,hip04-mac";
+ reg = <0x28b0000 0x10000>;
+ interrupts = <0 413 4>;
+ phy-mode = "mii";
+ port-handle = <&ppe 31 0>;
+ };
+
+ ge0: ethernet@2800000 {
+ compatible = "hisilicon,hip04-mac";
+ reg = <0x2800000 0x10000>;
+ interrupts = <0 402 4>;
+ phy-mode = "sgmii";
+ port-handle = <&ppe 0 1>;
+ phy-handle = <&phy0>;
+ };
+
+ ge8: ethernet@2880000 {
+ compatible = "hisilicon,hip04-mac";
+ reg = <0x2880000 0x10000>;
+ interrupts = <0 410 4>;
+ phy-mode = "sgmii";
+ port-handle = <&ppe 8 2>;
+ phy-handle = <&phy1>;
+ };
--
1.8.0
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH net-next v11 0/3] add hisilicon hip04 ethernet driver
From: Ding Tianhong @ 2015-01-12 8:03 UTC (permalink / raw)
To: arnd, robh+dt, davem, grant.likely
Cc: sergei.shtylyov, linux-arm-kernel, eric.dumazet, xuwei5,
zhangfei.gao, netdev, devicetree, linux
v11: Add ethtool support for tx coalecse getting and setting, the xmit_more
is not supported for this patch, but I think it could work for hip04,
will support it later after some tests for performance better.
Here are some performance test results by ping and iperf(add tx_coalesce_frames/users),
it looks that the performance and latency is more better by tx_coalesce_frames/usecs.
- Before:
$ ping 192.168.1.1 ...
--- 192.168.1.1 ping statistics ---
24 packets transmitted, 24 received, 0% packet loss, time 22999ms
rtt min/avg/max/mdev = 0.180/0.202/0.403/0.043 ms
$ iperf -c 192.168.1.1 ...
[ ID] Interval Transfer Bandwidth
[ 3] 0.0- 1.0 sec 115 MBytes 945 Mbits/sec
- After:
$ ping 192.168.1.1 ...
--- 192.168.1.1 ping statistics ---
24 packets transmitted, 24 received, 0% packet loss, time 22999ms
rtt min/avg/max/mdev = 0.178/0.190/0.380/0.041 ms
$ iperf -c 192.168.1.1 ...
[ ID] Interval Transfer Bandwidth
[ 3] 0.0- 1.0 sec 115 MBytes 965 Mbits/sec
v10:
- According Arnd's suggestion, remove the skb_orphan and use the hrtimer
for the cleanup of the TX queue and add some modification for the hip04
drivers.
1) drop the broken skb_orphan call
2) drop the workqueue
3) batch cleanup based on tx_coalesce_frames/usecs for better throughput
4) use a reasonable default tx timeout (200us, could be shorted
based on measurements) with a range timer
5) fix napi poll function return value
6) use a lockless queue for cleanup
v9:
- There is no tx completion interrupts to free DMAd Tx packets, it means taht
we rely on new tx packets arriving to run the destructors of completed packets,
which open up space in their sockets's send queues. Sometimes we don't get such
new packets causing Tx to stall, a single UDP transmitter is a good example of
this situation, so we need a clean up workqueue to reclaims completed packets,
the workqueue will only free the last packets which is already stay for several jiffies.
Also fix some format cleanups.
v8:
- Use poll to reclaim xmitted buffer as workaround since no tx done interrupt
v7:
- Remove select NET_CORE in 0002
v6:
- Suggest by Russell: Use netdev_sent_queue & netdev_completed_queue to solve latency issue
Also shorten the period of timer, which is used to wakeup the queue since no
tx completed interrupt.
v5:
- no big change, fix typo
v4:
- Modify accoringly to the suggetion from Arnd, Florian, Eric, David
Use of_parse_phandle_with_fixed_args & syscon_node_to_regmap get ppe info
Add skb_orphan() and tx_timer for reclaim since no tx_finished interrupt
Update timeout, and move of_phy_connect to probe to reuse open/stop
v3:
- Suggest from Arnd, use syscon & regmap_write/read to replace static void __iomem *ppebase.
Modify hisilicon-hip04-net.txt accrordingly to suggestion from Florian and Sergei.
v2:
- Got many suggestions from Russell, Arnd, Florian, Mark and Sergei
Remove memcpy, use dma_map/unmap_single, use dma_alloc_coherent rather than dma_pool, etc.
Refer property in ethernet.txt, change ppe description, etc.
Ding Tianhong (1):
net: hisilicon: new hip04 ethernet driver
Zhangfei Gao (2):
Documentation: add Device tree bindings for Hisilicon hip04 ethernet
net: hisilicon: new hip04 MDIO driver
.../bindings/net/hisilicon-hip04-net.txt | 88 ++
drivers/net/ethernet/hisilicon/Kconfig | 9 +
drivers/net/ethernet/hisilicon/Makefile | 1 +
drivers/net/ethernet/hisilicon/hip04_eth.c | 968 +++++++++++++++++++++
drivers/net/ethernet/hisilicon/hip04_mdio.c | 186 ++++
5 files changed, 1252 insertions(+)
create mode 100644 Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt
create mode 100644 drivers/net/ethernet/hisilicon/hip04_eth.c
create mode 100644 drivers/net/ethernet/hisilicon/hip04_mdio.c
--
1.8.0
^ permalink raw reply
* [PATCH net-next v11 3/3] net: hisilicon: new hip04 ethernet driver
From: Ding Tianhong @ 2015-01-12 8:03 UTC (permalink / raw)
To: arnd, robh+dt, davem, grant.likely
Cc: sergei.shtylyov, linux-arm-kernel, eric.dumazet, xuwei5,
zhangfei.gao, netdev, devicetree, linux
In-Reply-To: <1421049832-6224-1-git-send-email-dingtianhong@huawei.com>
Support Hisilicon hip04 ethernet driver, including 100M / 1000M controller.
The controller has no tx done interrupt, reclaim xmitted buffer in the poll.
v11: Add ethtool support for tx coalecse getting and setting, the xmit_more
is not supported for this patch, but I think it could work for hip04,
will support it later after some tests for performance better.
Here are some performance test results by ping and iperf(add tx_coalesce_frames/users),
it looks that the performance and latency is more better by tx_coalesce_frames/usecs.
- Before:
$ ping 192.168.1.1 ...
--- 192.168.1.1 ping statistics ---
24 packets transmitted, 24 received, 0% packet loss, time 22999ms
rtt min/avg/max/mdev = 0.180/0.202/0.403/0.043 ms
$ iperf -c 192.168.1.1 ...
[ ID] Interval Transfer Bandwidth
[ 3] 0.0- 1.0 sec 115 MBytes 945 Mbits/sec
- After:
$ ping 192.168.1.1 ...
--- 192.168.1.1 ping statistics ---
24 packets transmitted, 24 received, 0% packet loss, time 22999ms
rtt min/avg/max/mdev = 0.178/0.190/0.380/0.041 ms
$ iperf -c 192.168.1.1 ...
[ ID] Interval Transfer Bandwidth
[ 3] 0.0- 1.0 sec 115 MBytes 965 Mbits/sec
v10: According David Miller and Arnd Bergmann's suggestion, add some modification
for v9 version
- drop the workqueue
- batch cleanup based on tx_coalesce_frames/usecs for better throughput
- use a reasonable default tx timeout (200us, could be shorted
based on measurements) with a range timer
- fix napi poll function return value
- use a lockless queue for cleanup
Signed-off-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
drivers/net/ethernet/hisilicon/Makefile | 2 +-
drivers/net/ethernet/hisilicon/hip04_eth.c | 968 +++++++++++++++++++++++++++++
2 files changed, 969 insertions(+), 1 deletion(-)
create mode 100644 drivers/net/ethernet/hisilicon/hip04_eth.c
diff --git a/drivers/net/ethernet/hisilicon/Makefile b/drivers/net/ethernet/hisilicon/Makefile
index 40115a7..6c14540 100644
--- a/drivers/net/ethernet/hisilicon/Makefile
+++ b/drivers/net/ethernet/hisilicon/Makefile
@@ -3,4 +3,4 @@
#
obj-$(CONFIG_HIX5HD2_GMAC) += hix5hd2_gmac.o
-obj-$(CONFIG_HIP04_ETH) += hip04_mdio.o
+obj-$(CONFIG_HIP04_ETH) += hip04_mdio.o hip04_eth.o
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
new file mode 100644
index 0000000..c67c8b6
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -0,0 +1,968 @@
+
+/* Copyright (c) 2014 Linaro Ltd.
+ * Copyright (c) 2014 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/ktime.h>
+#include <linux/of_address.h>
+#include <linux/phy.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+
+#define PPE_CFG_RX_ADDR 0x100
+#define PPE_CFG_POOL_GRP 0x300
+#define PPE_CFG_RX_BUF_SIZE 0x400
+#define PPE_CFG_RX_FIFO_SIZE 0x500
+#define PPE_CURR_BUF_CNT 0xa200
+
+#define GE_DUPLEX_TYPE 0x08
+#define GE_MAX_FRM_SIZE_REG 0x3c
+#define GE_PORT_MODE 0x40
+#define GE_PORT_EN 0x44
+#define GE_SHORT_RUNTS_THR_REG 0x50
+#define GE_TX_LOCAL_PAGE_REG 0x5c
+#define GE_TRANSMIT_CONTROL_REG 0x60
+#define GE_CF_CRC_STRIP_REG 0x1b0
+#define GE_MODE_CHANGE_REG 0x1b4
+#define GE_RECV_CONTROL_REG 0x1e0
+#define GE_STATION_MAC_ADDRESS 0x210
+#define PPE_CFG_CPU_ADD_ADDR 0x580
+#define PPE_CFG_MAX_FRAME_LEN_REG 0x408
+#define PPE_CFG_BUS_CTRL_REG 0x424
+#define PPE_CFG_RX_CTRL_REG 0x428
+#define PPE_CFG_RX_PKT_MODE_REG 0x438
+#define PPE_CFG_QOS_VMID_GEN 0x500
+#define PPE_CFG_RX_PKT_INT 0x538
+#define PPE_INTEN 0x600
+#define PPE_INTSTS 0x608
+#define PPE_RINT 0x604
+#define PPE_CFG_STS_MODE 0x700
+#define PPE_HIS_RX_PKT_CNT 0x804
+
+/* REG_INTERRUPT */
+#define RCV_INT BIT(10)
+#define RCV_NOBUF BIT(8)
+#define RCV_DROP BIT(7)
+#define TX_DROP BIT(6)
+#define DEF_INT_ERR (RCV_NOBUF | RCV_DROP | TX_DROP)
+#define DEF_INT_MASK (RCV_INT | DEF_INT_ERR)
+
+/* TX descriptor config */
+#define TX_FREE_MEM BIT(0)
+#define TX_READ_ALLOC_L3 BIT(1)
+#define TX_FINISH_CACHE_INV BIT(2)
+#define TX_CLEAR_WB BIT(4)
+#define TX_L3_CHECKSUM BIT(5)
+#define TX_LOOP_BACK BIT(11)
+
+/* RX error */
+#define RX_PKT_DROP BIT(0)
+#define RX_L2_ERR BIT(1)
+#define RX_PKT_ERR (RX_PKT_DROP | RX_L2_ERR)
+
+#define SGMII_SPEED_1000 0x08
+#define SGMII_SPEED_100 0x07
+#define SGMII_SPEED_10 0x06
+#define MII_SPEED_100 0x01
+#define MII_SPEED_10 0x00
+
+#define GE_DUPLEX_FULL BIT(0)
+#define GE_DUPLEX_HALF 0x00
+#define GE_MODE_CHANGE_EN BIT(0)
+
+#define GE_TX_AUTO_NEG BIT(5)
+#define GE_TX_ADD_CRC BIT(6)
+#define GE_TX_SHORT_PAD_THROUGH BIT(7)
+
+#define GE_RX_STRIP_CRC BIT(0)
+#define GE_RX_STRIP_PAD BIT(3)
+#define GE_RX_PAD_EN BIT(4)
+
+#define GE_AUTO_NEG_CTL BIT(0)
+
+#define GE_RX_INT_THRESHOLD BIT(6)
+#define GE_RX_TIMEOUT 0x04
+
+#define GE_RX_PORT_EN BIT(1)
+#define GE_TX_PORT_EN BIT(2)
+
+#define PPE_CFG_STS_RX_PKT_CNT_RC BIT(12)
+
+#define PPE_CFG_RX_PKT_ALIGN BIT(18)
+#define PPE_CFG_QOS_VMID_MODE BIT(14)
+#define PPE_CFG_QOS_VMID_GRP_SHIFT 8
+
+#define PPE_CFG_RX_FIFO_FSFU BIT(11)
+#define PPE_CFG_RX_DEPTH_SHIFT 16
+#define PPE_CFG_RX_START_SHIFT 0
+#define PPE_CFG_RX_CTRL_ALIGN_SHIFT 11
+
+#define PPE_CFG_BUS_LOCAL_REL BIT(14)
+#define PPE_CFG_BUS_BIG_ENDIEN BIT(0)
+
+#define RX_DESC_NUM 128
+#define TX_DESC_NUM 256
+#define TX_NEXT(N) (((N) + 1) & (TX_DESC_NUM-1))
+#define RX_NEXT(N) (((N) + 1) & (RX_DESC_NUM-1))
+
+#define GMAC_PPE_RX_PKT_MAX_LEN 379
+#define GMAC_MAX_PKT_LEN 1516
+#define GMAC_MIN_PKT_LEN 31
+#define RX_BUF_SIZE 1600
+#define RESET_TIMEOUT 1000
+#define TX_TIMEOUT (6 * HZ)
+
+#define DRV_NAME "hip04-ether"
+#define DRV_VERSION "v1.0"
+
+#define HIP04_MAX_TX_COALESCE_USECS 200
+#define HIP04_MIN_TX_COALESCE_USECS 100
+#define HIP04_MAX_TX_COALESCE_FRAMES 200
+#define HIP04_MIN_TX_COALESCE_FRAMES 100
+
+struct tx_desc {
+ u32 send_addr;
+ u32 send_size;
+ u32 next_addr;
+ u32 cfg;
+ u32 wb_addr;
+} __aligned(64);
+
+struct rx_desc {
+ u16 reserved_16;
+ u16 pkt_len;
+ u32 reserve1[3];
+ u32 pkt_err;
+ u32 reserve2[4];
+};
+
+struct hip04_priv {
+ void __iomem *base;
+ int phy_mode;
+ int chan;
+ unsigned int port;
+ unsigned int speed;
+ unsigned int duplex;
+ unsigned int reg_inten;
+
+ struct napi_struct napi;
+ struct net_device *ndev;
+
+ struct tx_desc *tx_desc;
+ dma_addr_t tx_desc_dma;
+ struct sk_buff *tx_skb[TX_DESC_NUM];
+ dma_addr_t tx_phys[TX_DESC_NUM];
+ unsigned int tx_head;
+
+ int tx_coalesce_frames;
+ int tx_coalesce_usecs;
+ struct hrtimer tx_coalesce_timer;
+
+ unsigned char *rx_buf[RX_DESC_NUM];
+ dma_addr_t rx_phys[RX_DESC_NUM];
+ unsigned int rx_head;
+ unsigned int rx_buf_size;
+
+ struct device_node *phy_node;
+ struct phy_device *phy;
+ struct regmap *map;
+ struct work_struct tx_timeout_task;
+
+ /* written only by tx cleanup */
+ unsigned int tx_tail ____cacheline_aligned_in_smp;
+};
+
+static inline unsigned int tx_count(unsigned int head, unsigned int tail)
+{
+ return (head - tail) % (TX_DESC_NUM - 1);
+}
+
+static void hip04_config_port(struct net_device *ndev, u32 speed, u32 duplex)
+{
+ struct hip04_priv *priv = netdev_priv(ndev);
+ u32 val;
+
+ priv->speed = speed;
+ priv->duplex = duplex;
+
+ switch (priv->phy_mode) {
+ case PHY_INTERFACE_MODE_SGMII:
+ if (speed == SPEED_1000)
+ val = SGMII_SPEED_1000;
+ else if (speed == SPEED_100)
+ val = SGMII_SPEED_100;
+ else
+ val = SGMII_SPEED_10;
+ break;
+ case PHY_INTERFACE_MODE_MII:
+ if (speed == SPEED_100)
+ val = MII_SPEED_100;
+ else
+ val = MII_SPEED_10;
+ break;
+ default:
+ netdev_warn(ndev, "not supported mode\n");
+ val = MII_SPEED_10;
+ break;
+ }
+ writel_relaxed(val, priv->base + GE_PORT_MODE);
+
+ val = duplex ? GE_DUPLEX_FULL : GE_DUPLEX_HALF;
+ writel_relaxed(val, priv->base + GE_DUPLEX_TYPE);
+
+ val = GE_MODE_CHANGE_EN;
+ writel_relaxed(val, priv->base + GE_MODE_CHANGE_REG);
+}
+
+static void hip04_reset_ppe(struct hip04_priv *priv)
+{
+ u32 val, tmp, timeout = 0;
+
+ do {
+ regmap_read(priv->map, priv->port * 4 + PPE_CURR_BUF_CNT, &val);
+ regmap_read(priv->map, priv->port * 4 + PPE_CFG_RX_ADDR, &tmp);
+ if (timeout++ > RESET_TIMEOUT)
+ break;
+ } while (val & 0xfff);
+}
+
+static void hip04_config_fifo(struct hip04_priv *priv)
+{
+ u32 val;
+
+ val = readl_relaxed(priv->base + PPE_CFG_STS_MODE);
+ val |= PPE_CFG_STS_RX_PKT_CNT_RC;
+ writel_relaxed(val, priv->base + PPE_CFG_STS_MODE);
+
+ val = BIT(priv->port);
+ regmap_write(priv->map, priv->port * 4 + PPE_CFG_POOL_GRP, val);
+
+ val = priv->port << PPE_CFG_QOS_VMID_GRP_SHIFT;
+ val |= PPE_CFG_QOS_VMID_MODE;
+ writel_relaxed(val, priv->base + PPE_CFG_QOS_VMID_GEN);
+
+ val = RX_BUF_SIZE;
+ regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_BUF_SIZE, val);
+
+ val = RX_DESC_NUM << PPE_CFG_RX_DEPTH_SHIFT;
+ val |= PPE_CFG_RX_FIFO_FSFU;
+ val |= priv->chan << PPE_CFG_RX_START_SHIFT;
+ regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_FIFO_SIZE, val);
+
+ val = NET_IP_ALIGN << PPE_CFG_RX_CTRL_ALIGN_SHIFT;
+ writel_relaxed(val, priv->base + PPE_CFG_RX_CTRL_REG);
+
+ val = PPE_CFG_RX_PKT_ALIGN;
+ writel_relaxed(val, priv->base + PPE_CFG_RX_PKT_MODE_REG);
+
+ val = PPE_CFG_BUS_LOCAL_REL | PPE_CFG_BUS_BIG_ENDIEN;
+ writel_relaxed(val, priv->base + PPE_CFG_BUS_CTRL_REG);
+
+ val = GMAC_PPE_RX_PKT_MAX_LEN;
+ writel_relaxed(val, priv->base + PPE_CFG_MAX_FRAME_LEN_REG);
+
+ val = GMAC_MAX_PKT_LEN;
+ writel_relaxed(val, priv->base + GE_MAX_FRM_SIZE_REG);
+
+ val = GMAC_MIN_PKT_LEN;
+ writel_relaxed(val, priv->base + GE_SHORT_RUNTS_THR_REG);
+
+ val = readl_relaxed(priv->base + GE_TRANSMIT_CONTROL_REG);
+ val |= GE_TX_AUTO_NEG | GE_TX_ADD_CRC | GE_TX_SHORT_PAD_THROUGH;
+ writel_relaxed(val, priv->base + GE_TRANSMIT_CONTROL_REG);
+
+ val = GE_RX_STRIP_CRC;
+ writel_relaxed(val, priv->base + GE_CF_CRC_STRIP_REG);
+
+ val = readl_relaxed(priv->base + GE_RECV_CONTROL_REG);
+ val |= GE_RX_STRIP_PAD | GE_RX_PAD_EN;
+ writel_relaxed(val, priv->base + GE_RECV_CONTROL_REG);
+
+ val = GE_AUTO_NEG_CTL;
+ writel_relaxed(val, priv->base + GE_TX_LOCAL_PAGE_REG);
+}
+
+static void hip04_mac_enable(struct net_device *ndev)
+{
+ struct hip04_priv *priv = netdev_priv(ndev);
+ u32 val;
+
+ /* enable tx & rx */
+ val = readl_relaxed(priv->base + GE_PORT_EN);
+ val |= GE_RX_PORT_EN | GE_TX_PORT_EN;
+ writel_relaxed(val, priv->base + GE_PORT_EN);
+
+ /* clear rx int */
+ val = RCV_INT;
+ writel_relaxed(val, priv->base + PPE_RINT);
+
+ /* config recv int */
+ val = GE_RX_INT_THRESHOLD | GE_RX_TIMEOUT;
+ writel_relaxed(val, priv->base + PPE_CFG_RX_PKT_INT);
+
+ /* enable interrupt */
+ priv->reg_inten = DEF_INT_MASK;
+ writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN);
+}
+
+static void hip04_mac_disable(struct net_device *ndev)
+{
+ struct hip04_priv *priv = netdev_priv(ndev);
+ u32 val;
+
+ /* disable int */
+ priv->reg_inten &= ~(DEF_INT_MASK);
+ writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN);
+
+ /* disable tx & rx */
+ val = readl_relaxed(priv->base + GE_PORT_EN);
+ val &= ~(GE_RX_PORT_EN | GE_TX_PORT_EN);
+ writel_relaxed(val, priv->base + GE_PORT_EN);
+}
+
+static void hip04_set_xmit_desc(struct hip04_priv *priv, dma_addr_t phys)
+{
+ writel(phys, priv->base + PPE_CFG_CPU_ADD_ADDR);
+}
+
+static void hip04_set_recv_desc(struct hip04_priv *priv, dma_addr_t phys)
+{
+ regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_ADDR, phys);
+}
+
+static u32 hip04_recv_cnt(struct hip04_priv *priv)
+{
+ return readl(priv->base + PPE_HIS_RX_PKT_CNT);
+}
+
+static void hip04_update_mac_address(struct net_device *ndev)
+{
+ struct hip04_priv *priv = netdev_priv(ndev);
+
+ writel_relaxed(((ndev->dev_addr[0] << 8) | (ndev->dev_addr[1])),
+ priv->base + GE_STATION_MAC_ADDRESS);
+ writel_relaxed(((ndev->dev_addr[2] << 24) | (ndev->dev_addr[3] << 16) |
+ (ndev->dev_addr[4] << 8) | (ndev->dev_addr[5])),
+ priv->base + GE_STATION_MAC_ADDRESS + 4);
+}
+
+static int hip04_set_mac_address(struct net_device *ndev, void *addr)
+{
+ eth_mac_addr(ndev, addr);
+ hip04_update_mac_address(ndev);
+ return 0;
+}
+
+static int hip04_tx_reclaim(struct net_device *ndev, bool force)
+{
+ struct hip04_priv *priv = netdev_priv(ndev);
+ unsigned tx_tail = priv->tx_tail;
+ struct tx_desc *desc;
+ unsigned int bytes_compl = 0, pkts_compl = 0;
+ unsigned int count;
+
+ smp_rmb();
+ count = tx_count(ACCESS_ONCE(priv->tx_head), tx_tail);
+ if (count == 0)
+ goto out;
+
+ while (count) {
+ desc = &priv->tx_desc[tx_tail];
+ if (desc->send_addr != 0) {
+ if (force)
+ desc->send_addr = 0;
+ else
+ break;
+ }
+
+ if (priv->tx_phys[tx_tail]) {
+ dma_unmap_single(&ndev->dev, priv->tx_phys[tx_tail],
+ priv->tx_skb[tx_tail]->len,
+ DMA_TO_DEVICE);
+ priv->tx_phys[tx_tail] = 0;
+ }
+ pkts_compl++;
+ bytes_compl += priv->tx_skb[tx_tail]->len;
+ dev_kfree_skb(priv->tx_skb[tx_tail]);
+ priv->tx_skb[tx_tail] = NULL;
+ tx_tail = TX_NEXT(tx_tail);
+ count--;
+ }
+
+ priv->tx_tail = tx_tail;
+ smp_wmb(); /* Ensure tx_tail visible to xmit */
+
+out:
+ if (pkts_compl || bytes_compl)
+ netdev_completed_queue(ndev, pkts_compl, bytes_compl);
+
+ if (unlikely(netif_queue_stopped(ndev)) && (count < (TX_DESC_NUM - 1)))
+ netif_wake_queue(ndev);
+
+ return count;
+}
+
+static int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+ struct hip04_priv *priv = netdev_priv(ndev);
+ struct net_device_stats *stats = &ndev->stats;
+ unsigned int tx_head = priv->tx_head, count;
+ struct tx_desc *desc = &priv->tx_desc[tx_head];
+ dma_addr_t phys;
+
+ smp_rmb();
+ count = tx_count(tx_head, ACCESS_ONCE(priv->tx_tail));
+ if (count == (TX_DESC_NUM - 1)) {
+ netif_stop_queue(ndev);
+ return NETDEV_TX_BUSY;
+ }
+
+ phys = dma_map_single(&ndev->dev, skb->data, skb->len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&ndev->dev, phys)) {
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
+ priv->tx_skb[tx_head] = skb;
+ priv->tx_phys[tx_head] = phys;
+ desc->send_addr = cpu_to_be32(phys);
+ desc->send_size = cpu_to_be32(skb->len);
+ desc->cfg = cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV);
+ phys = priv->tx_desc_dma + tx_head * sizeof(struct tx_desc);
+ desc->wb_addr = cpu_to_be32(phys);
+ skb_tx_timestamp(skb);
+
+ hip04_set_xmit_desc(priv, phys);
+ priv->tx_head = TX_NEXT(tx_head);
+ count++;
+ netdev_sent_queue(ndev, skb->len);
+
+ stats->tx_bytes += skb->len;
+ stats->tx_packets++;
+
+ /* Ensure tx_head update visible to tx reclaim */
+ smp_wmb();
+
+ /* queue is getting full, better start cleaning up now */
+ if (count >= priv->tx_coalesce_frames) {
+ if (napi_schedule_prep(&priv->napi)) {
+ /* disable rx interrupt and timer */
+ priv->reg_inten &= ~(RCV_INT);
+ writel_relaxed(DEF_INT_MASK & ~RCV_INT,
+ priv->base + PPE_INTEN);
+ hrtimer_cancel(&priv->tx_coalesce_timer);
+ __napi_schedule(&priv->napi);
+ }
+ } else if (!hrtimer_is_queued(&priv->tx_coalesce_timer)) {
+ /* cleanup not pending yet, start a new timer */
+ hrtimer_start_expires(&priv->tx_coalesce_timer,
+ HRTIMER_MODE_REL);
+ }
+
+ return NETDEV_TX_OK;
+}
+
+static int hip04_rx_poll(struct napi_struct *napi, int budget)
+{
+ struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi);
+ struct net_device *ndev = priv->ndev;
+ struct net_device_stats *stats = &ndev->stats;
+ unsigned int cnt = hip04_recv_cnt(priv);
+ struct rx_desc *desc;
+ struct sk_buff *skb;
+ unsigned char *buf;
+ bool last = false;
+ dma_addr_t phys;
+ int rx = 0;
+ int tx_remaining;
+ u16 len;
+ u32 err;
+
+ while (cnt && !last) {
+ buf = priv->rx_buf[priv->rx_head];
+ skb = build_skb(buf, priv->rx_buf_size);
+ if (unlikely(!skb))
+ net_dbg_ratelimited("build_skb failed\n");
+
+ dma_unmap_single(&ndev->dev, priv->rx_phys[priv->rx_head],
+ RX_BUF_SIZE, DMA_FROM_DEVICE);
+ priv->rx_phys[priv->rx_head] = 0;
+
+ desc = (struct rx_desc *)skb->data;
+ len = be16_to_cpu(desc->pkt_len);
+ err = be32_to_cpu(desc->pkt_err);
+
+ if (0 == len) {
+ dev_kfree_skb_any(skb);
+ last = true;
+ } else if ((err & RX_PKT_ERR) || (len >= GMAC_MAX_PKT_LEN)) {
+ dev_kfree_skb_any(skb);
+ stats->rx_dropped++;
+ stats->rx_errors++;
+ } else {
+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+ skb_put(skb, len);
+ skb->protocol = eth_type_trans(skb, ndev);
+ napi_gro_receive(&priv->napi, skb);
+ stats->rx_packets++;
+ stats->rx_bytes += len;
+ rx++;
+ }
+
+ buf = netdev_alloc_frag(priv->rx_buf_size);
+ if (!buf)
+ goto done;
+ phys = dma_map_single(&ndev->dev, buf,
+ RX_BUF_SIZE, DMA_FROM_DEVICE);
+ if (dma_mapping_error(&ndev->dev, phys))
+ goto done;
+ priv->rx_buf[priv->rx_head] = buf;
+ priv->rx_phys[priv->rx_head] = phys;
+ hip04_set_recv_desc(priv, phys);
+
+ priv->rx_head = RX_NEXT(priv->rx_head);
+ if (rx >= budget)
+ goto done;
+
+ if (--cnt == 0)
+ cnt = hip04_recv_cnt(priv);
+ }
+
+ if (!(priv->reg_inten & RCV_INT)) {
+ /* enable rx interrupt */
+ priv->reg_inten |= RCV_INT;
+ writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN);
+ }
+ napi_complete(napi);
+done:
+ /* clean up tx descriptors and start a new timer if necessary */
+ tx_remaining = hip04_tx_reclaim(ndev, false);
+ if (rx < budget && tx_remaining)
+ hrtimer_start_expires(&priv->tx_coalesce_timer, HRTIMER_MODE_REL);
+
+ return rx;
+}
+
+static irqreturn_t hip04_mac_interrupt(int irq, void *dev_id)
+{
+ struct net_device *ndev = (struct net_device *)dev_id;
+ struct hip04_priv *priv = netdev_priv(ndev);
+ struct net_device_stats *stats = &ndev->stats;
+ u32 ists = readl_relaxed(priv->base + PPE_INTSTS);
+
+ if (!ists)
+ return IRQ_NONE;
+
+ writel_relaxed(DEF_INT_MASK, priv->base + PPE_RINT);
+
+ if (unlikely(ists & DEF_INT_ERR)) {
+ if (ists & (RCV_NOBUF | RCV_DROP))
+ stats->rx_errors++;
+ stats->rx_dropped++;
+ netdev_err(ndev, "rx drop\n");
+ if (ists & TX_DROP) {
+ stats->tx_dropped++;
+ netdev_err(ndev, "tx drop\n");
+ }
+ }
+
+ if (ists & RCV_INT && napi_schedule_prep(&priv->napi)) {
+ /* disable rx interrupt */
+ priv->reg_inten &= ~(RCV_INT);
+ writel_relaxed(DEF_INT_MASK & ~RCV_INT, priv->base + PPE_INTEN);
+ hrtimer_cancel(&priv->tx_coalesce_timer);
+ __napi_schedule(&priv->napi);
+ }
+
+ return IRQ_HANDLED;
+}
+
+enum hrtimer_restart tx_done(struct hrtimer *hrtimer)
+{
+ struct hip04_priv *priv;
+ priv = container_of(hrtimer, struct hip04_priv, tx_coalesce_timer);
+
+ if (napi_schedule_prep(&priv->napi)) {
+ /* disable rx interrupt */
+ priv->reg_inten &= ~(RCV_INT);
+ writel_relaxed(DEF_INT_MASK & ~RCV_INT, priv->base + PPE_INTEN);
+ __napi_schedule(&priv->napi);
+ }
+
+ return HRTIMER_NORESTART;
+}
+
+static void hip04_adjust_link(struct net_device *ndev)
+{
+ struct hip04_priv *priv = netdev_priv(ndev);
+ struct phy_device *phy = priv->phy;
+
+ if ((priv->speed != phy->speed) || (priv->duplex != phy->duplex)) {
+ hip04_config_port(ndev, phy->speed, phy->duplex);
+ phy_print_status(phy);
+ }
+}
+
+static int hip04_mac_open(struct net_device *ndev)
+{
+ struct hip04_priv *priv = netdev_priv(ndev);
+ int i;
+
+ priv->rx_head = 0;
+ priv->tx_head = 0;
+ priv->tx_tail = 0;
+ hip04_reset_ppe(priv);
+
+ for (i = 0; i < RX_DESC_NUM; i++) {
+ dma_addr_t phys;
+
+ phys = dma_map_single(&ndev->dev, priv->rx_buf[i],
+ RX_BUF_SIZE, DMA_FROM_DEVICE);
+ if (dma_mapping_error(&ndev->dev, phys))
+ return -EIO;
+
+ priv->rx_phys[i] = phys;
+ hip04_set_recv_desc(priv, phys);
+ }
+
+ if (priv->phy)
+ phy_start(priv->phy);
+
+ netdev_reset_queue(ndev);
+ netif_start_queue(ndev);
+ hip04_mac_enable(ndev);
+ napi_enable(&priv->napi);
+
+ return 0;
+}
+
+static int hip04_mac_stop(struct net_device *ndev)
+{
+ struct hip04_priv *priv = netdev_priv(ndev);
+ int i;
+
+ napi_disable(&priv->napi);
+ netif_stop_queue(ndev);
+ hip04_mac_disable(ndev);
+ hip04_tx_reclaim(ndev, true);
+ hip04_reset_ppe(priv);
+
+ if (priv->phy)
+ phy_stop(priv->phy);
+
+ for (i = 0; i < RX_DESC_NUM; i++) {
+ if (priv->rx_phys[i]) {
+ dma_unmap_single(&ndev->dev, priv->rx_phys[i],
+ RX_BUF_SIZE, DMA_FROM_DEVICE);
+ priv->rx_phys[i] = 0;
+ }
+ }
+
+ return 0;
+}
+
+static void hip04_timeout(struct net_device *ndev)
+{
+ struct hip04_priv *priv = netdev_priv(ndev);
+
+ schedule_work(&priv->tx_timeout_task);
+}
+
+static void hip04_tx_timeout_task(struct work_struct *work)
+{
+ struct hip04_priv *priv;
+
+ priv = container_of(work, struct hip04_priv, tx_timeout_task);
+ hip04_mac_stop(priv->ndev);
+ hip04_mac_open(priv->ndev);
+}
+
+static struct net_device_stats *hip04_get_stats(struct net_device *ndev)
+{
+ return &ndev->stats;
+}
+
+static int hip04_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec)
+{
+ struct hip04_priv *priv = netdev_priv(netdev);
+
+ ec->tx_coalesce_usecs = priv->tx_coalesce_usecs;
+ ec->tx_max_coalesced_frames = priv->tx_coalesce_frames;
+
+ return 0;
+}
+
+static int hip04_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec)
+{
+ struct hip04_priv *priv = netdev_priv(netdev);
+
+ /* Check not supported parameters */
+ if ((ec->rx_max_coalesced_frames) || (ec->rx_coalesce_usecs_irq) ||
+ (ec->rx_max_coalesced_frames_irq) || (ec->tx_coalesce_usecs_irq) ||
+ (ec->use_adaptive_rx_coalesce) || (ec->use_adaptive_tx_coalesce) ||
+ (ec->pkt_rate_low) || (ec->rx_coalesce_usecs_low) ||
+ (ec->rx_max_coalesced_frames_low) || (ec->tx_coalesce_usecs_high) ||
+ (ec->tx_max_coalesced_frames_low) || (ec->pkt_rate_high) ||
+ (ec->tx_coalesce_usecs_low) || (ec->rx_coalesce_usecs_high) ||
+ (ec->rx_max_coalesced_frames_high) || (ec->rx_coalesce_usecs) ||
+ (ec->tx_max_coalesced_frames_irq) ||
+ (ec->stats_block_coalesce_usecs) ||
+ (ec->tx_max_coalesced_frames_high) || (ec->rate_sample_interval))
+ return -EOPNOTSUPP;
+
+ if ((ec->tx_coalesce_usecs > HIP04_MAX_TX_COALESCE_USECS ||
+ ec->tx_coalesce_usecs < HIP04_MIN_TX_COALESCE_USECS) ||
+ (ec->tx_max_coalesced_frames > HIP04_MAX_TX_COALESCE_FRAMES ||
+ ec->tx_max_coalesced_frames < HIP04_MIN_TX_COALESCE_FRAMES))
+ return -EINVAL;
+
+ priv->tx_coalesce_usecs = ec->tx_coalesce_usecs;
+ priv->tx_coalesce_frames = ec->tx_max_coalesced_frames;
+
+ return 0;
+}
+
+static void hip04_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
+}
+
+static struct ethtool_ops hip04_ethtool_ops = {
+ .get_coalesce = hip04_get_coalesce,
+ .set_coalesce = hip04_set_coalesce,
+ .get_drvinfo = hip04_get_drvinfo,
+};
+
+static struct net_device_ops hip04_netdev_ops = {
+ .ndo_open = hip04_mac_open,
+ .ndo_stop = hip04_mac_stop,
+ .ndo_get_stats = hip04_get_stats,
+ .ndo_start_xmit = hip04_mac_start_xmit,
+ .ndo_set_mac_address = hip04_set_mac_address,
+ .ndo_tx_timeout = hip04_timeout,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = eth_change_mtu,
+};
+
+static int hip04_alloc_ring(struct net_device *ndev, struct device *d)
+{
+ struct hip04_priv *priv = netdev_priv(ndev);
+ int i;
+
+ priv->tx_desc = dma_alloc_coherent(d,
+ TX_DESC_NUM * sizeof(struct tx_desc),
+ &priv->tx_desc_dma, GFP_KERNEL);
+ if (!priv->tx_desc)
+ return -ENOMEM;
+
+ priv->rx_buf_size = RX_BUF_SIZE +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ for (i = 0; i < RX_DESC_NUM; i++) {
+ priv->rx_buf[i] = netdev_alloc_frag(priv->rx_buf_size);
+ if (!priv->rx_buf[i])
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void hip04_free_ring(struct net_device *ndev, struct device *d)
+{
+ struct hip04_priv *priv = netdev_priv(ndev);
+ int i;
+
+ for (i = 0; i < RX_DESC_NUM; i++)
+ if (priv->rx_buf[i])
+ put_page(virt_to_head_page(priv->rx_buf[i]));
+
+ for (i = 0; i < TX_DESC_NUM; i++)
+ if (priv->tx_skb[i])
+ dev_kfree_skb_any(priv->tx_skb[i]);
+
+ dma_free_coherent(d, TX_DESC_NUM * sizeof(struct tx_desc),
+ priv->tx_desc, priv->tx_desc_dma);
+}
+
+static int hip04_mac_probe(struct platform_device *pdev)
+{
+ struct device *d = &pdev->dev;
+ struct device_node *node = d->of_node;
+ struct of_phandle_args arg;
+ struct net_device *ndev;
+ struct hip04_priv *priv;
+ struct resource *res;
+ unsigned int irq;
+ ktime_t txtime;
+ int ret;
+
+ ndev = alloc_etherdev(sizeof(struct hip04_priv));
+ if (!ndev)
+ return -ENOMEM;
+
+ priv = netdev_priv(ndev);
+ priv->ndev = ndev;
+ platform_set_drvdata(pdev, ndev);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ priv->base = devm_ioremap_resource(d, res);
+ if (IS_ERR(priv->base)) {
+ ret = PTR_ERR(priv->base);
+ goto init_fail;
+ }
+
+ ret = of_parse_phandle_with_fixed_args(node, "port-handle", 2, 0, &arg);
+ if (ret < 0) {
+ dev_warn(d, "no port-handle\n");
+ goto init_fail;
+ }
+
+ priv->port = arg.args[0];
+ priv->chan = arg.args[1] * RX_DESC_NUM;
+
+ hrtimer_init(&priv->tx_coalesce_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+ /*
+ * BQL will try to keep the TX queue as short as possible, but it can't
+ * be faster than tx_coalesce_usecs, so we need a fast timeout here,
+ * but also long enough to gather up enough frames to ensure we don't
+ * get more interrupts than necessary.
+ * 200us is enough for 16 frames of 1500 bytes at gigabit ethernet rate
+ */
+ priv->tx_coalesce_frames = TX_DESC_NUM * 3 / 4;
+ priv->tx_coalesce_usecs = 200;
+ /* allow timer to fire after half the time at the earliest */
+ txtime = ktime_set(0, priv->tx_coalesce_usecs * NSEC_PER_USEC / 2);
+ hrtimer_set_expires_range(&priv->tx_coalesce_timer, txtime, txtime);
+ priv->tx_coalesce_timer.function = tx_done;
+
+ priv->map = syscon_node_to_regmap(arg.np);
+ if (IS_ERR(priv->map)) {
+ dev_warn(d, "no syscon hisilicon,hip04-ppe\n");
+ ret = PTR_ERR(priv->map);
+ goto init_fail;
+ }
+
+ priv->phy_mode = of_get_phy_mode(node);
+ if (priv->phy_mode < 0) {
+ dev_warn(d, "not find phy-mode\n");
+ ret = -EINVAL;
+ goto init_fail;
+ }
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq <= 0) {
+ ret = -EINVAL;
+ goto init_fail;
+ }
+
+ ret = devm_request_irq(d, irq, hip04_mac_interrupt,
+ 0, pdev->name, ndev);
+ if (ret) {
+ netdev_err(ndev, "devm_request_irq failed\n");
+ goto init_fail;
+ }
+
+ priv->phy_node = of_parse_phandle(node, "phy-handle", 0);
+ if (priv->phy_node) {
+ priv->phy = of_phy_connect(ndev, priv->phy_node,
+ &hip04_adjust_link, 0, priv->phy_mode);
+ if (!priv->phy) {
+ ret = -EPROBE_DEFER;
+ goto init_fail;
+ }
+ }
+
+ INIT_WORK(&priv->tx_timeout_task, hip04_tx_timeout_task);
+
+ ether_setup(ndev);
+ ndev->netdev_ops = &hip04_netdev_ops;
+ ndev->ethtool_ops = &hip04_ethtool_ops;
+ ndev->watchdog_timeo = TX_TIMEOUT;
+ ndev->priv_flags |= IFF_UNICAST_FLT;
+ ndev->irq = irq;
+ netif_napi_add(ndev, &priv->napi, hip04_rx_poll, NAPI_POLL_WEIGHT);
+ SET_NETDEV_DEV(ndev, &pdev->dev);
+
+ hip04_reset_ppe(priv);
+ if (priv->phy_mode == PHY_INTERFACE_MODE_MII)
+ hip04_config_port(ndev, SPEED_100, DUPLEX_FULL);
+
+ hip04_config_fifo(priv);
+ random_ether_addr(ndev->dev_addr);
+ hip04_update_mac_address(ndev);
+
+ ret = hip04_alloc_ring(ndev, d);
+ if (ret) {
+ netdev_err(ndev, "alloc ring fail\n");
+ goto alloc_fail;
+ }
+
+ ret = register_netdev(ndev);
+ if (ret) {
+ free_netdev(ndev);
+ goto alloc_fail;
+ }
+
+ return 0;
+
+alloc_fail:
+ hip04_free_ring(ndev, d);
+init_fail:
+ of_node_put(priv->phy_node);
+ free_netdev(ndev);
+ return ret;
+}
+
+static int hip04_remove(struct platform_device *pdev)
+{
+ struct net_device *ndev = platform_get_drvdata(pdev);
+ struct hip04_priv *priv = netdev_priv(ndev);
+ struct device *d = &pdev->dev;
+
+ if (priv->phy)
+ phy_disconnect(priv->phy);
+
+ hip04_free_ring(ndev, d);
+ unregister_netdev(ndev);
+ free_irq(ndev->irq, ndev);
+ of_node_put(priv->phy_node);
+ cancel_work_sync(&priv->tx_timeout_task);
+ free_netdev(ndev);
+
+ return 0;
+}
+
+static const struct of_device_id hip04_mac_match[] = {
+ { .compatible = "hisilicon,hip04-mac" },
+ { }
+};
+
+static struct platform_driver hip04_mac_driver = {
+ .probe = hip04_mac_probe,
+ .remove = hip04_remove,
+ .driver = {
+ .name = DRV_NAME,
+ .owner = THIS_MODULE,
+ .of_match_table = hip04_mac_match,
+ },
+};
+module_platform_driver(hip04_mac_driver);
+
+MODULE_DESCRIPTION("HISILICON P04 Ethernet driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:hip04-ether");
--
1.8.0
^ permalink raw reply related
* [PATCH net-next v11 2/3] net: hisilicon: new hip04 MDIO driver
From: Ding Tianhong @ 2015-01-12 8:03 UTC (permalink / raw)
To: arnd, robh+dt, davem, grant.likely
Cc: sergei.shtylyov, linux-arm-kernel, eric.dumazet, xuwei5,
zhangfei.gao, netdev, devicetree, linux
In-Reply-To: <1421049832-6224-1-git-send-email-dingtianhong@huawei.com>
From: Zhangfei Gao <zhangfei.gao@linaro.org>
Hisilicon hip04 platform mdio driver
Reuse Marvell phy drivers/net/phy/marvell.c
Signed-off-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
drivers/net/ethernet/hisilicon/Kconfig | 9 ++
drivers/net/ethernet/hisilicon/Makefile | 1 +
drivers/net/ethernet/hisilicon/hip04_mdio.c | 186 ++++++++++++++++++++++++++++
3 files changed, 196 insertions(+)
create mode 100644 drivers/net/ethernet/hisilicon/hip04_mdio.c
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index e942173..a54d897 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -24,4 +24,13 @@ config HIX5HD2_GMAC
help
This selects the hix5hd2 mac family network device.
+config HIP04_ETH
+ tristate "HISILICON P04 Ethernet support"
+ select PHYLIB
+ select MARVELL_PHY
+ select MFD_SYSCON
+ ---help---
+ If you wish to compile a kernel for a hardware with hisilicon p04 SoC and
+ want to use the internal ethernet then you should answer Y to this.
+
endif # NET_VENDOR_HISILICON
diff --git a/drivers/net/ethernet/hisilicon/Makefile b/drivers/net/ethernet/hisilicon/Makefile
index 9175e846..40115a7 100644
--- a/drivers/net/ethernet/hisilicon/Makefile
+++ b/drivers/net/ethernet/hisilicon/Makefile
@@ -3,3 +3,4 @@
#
obj-$(CONFIG_HIX5HD2_GMAC) += hix5hd2_gmac.o
+obj-$(CONFIG_HIP04_ETH) += hip04_mdio.o
diff --git a/drivers/net/ethernet/hisilicon/hip04_mdio.c b/drivers/net/ethernet/hisilicon/hip04_mdio.c
new file mode 100644
index 0000000..b3bac25
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hip04_mdio.c
@@ -0,0 +1,186 @@
+/* Copyright (c) 2014 Linaro Ltd.
+ * Copyright (c) 2014 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/of_mdio.h>
+#include <linux/delay.h>
+
+#define MDIO_CMD_REG 0x0
+#define MDIO_ADDR_REG 0x4
+#define MDIO_WDATA_REG 0x8
+#define MDIO_RDATA_REG 0xc
+#define MDIO_STA_REG 0x10
+
+#define MDIO_START BIT(14)
+#define MDIO_R_VALID BIT(1)
+#define MDIO_READ (BIT(12) | BIT(11) | MDIO_START)
+#define MDIO_WRITE (BIT(12) | BIT(10) | MDIO_START)
+
+struct hip04_mdio_priv {
+ void __iomem *base;
+};
+
+#define WAIT_TIMEOUT 10
+static int hip04_mdio_wait_ready(struct mii_bus *bus)
+{
+ struct hip04_mdio_priv *priv = bus->priv;
+ int i;
+
+ for (i = 0; readl_relaxed(priv->base + MDIO_CMD_REG) & MDIO_START; i++) {
+ if (i == WAIT_TIMEOUT)
+ return -ETIMEDOUT;
+ msleep(20);
+ }
+
+ return 0;
+}
+
+static int hip04_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
+{
+ struct hip04_mdio_priv *priv = bus->priv;
+ u32 val;
+ int ret;
+
+ ret = hip04_mdio_wait_ready(bus);
+ if (ret < 0)
+ goto out;
+
+ val = regnum | (mii_id << 5) | MDIO_READ;
+ writel_relaxed(val, priv->base + MDIO_CMD_REG);
+
+ ret = hip04_mdio_wait_ready(bus);
+ if (ret < 0)
+ goto out;
+
+ val = readl_relaxed(priv->base + MDIO_STA_REG);
+ if (val & MDIO_R_VALID) {
+ dev_err(bus->parent, "SMI bus read not valid\n");
+ ret = -ENODEV;
+ goto out;
+ }
+
+ val = readl_relaxed(priv->base + MDIO_RDATA_REG);
+ ret = val & 0xFFFF;
+out:
+ return ret;
+}
+
+static int hip04_mdio_write(struct mii_bus *bus, int mii_id,
+ int regnum, u16 value)
+{
+ struct hip04_mdio_priv *priv = bus->priv;
+ u32 val;
+ int ret;
+
+ ret = hip04_mdio_wait_ready(bus);
+ if (ret < 0)
+ goto out;
+
+ writel_relaxed(value, priv->base + MDIO_WDATA_REG);
+ val = regnum | (mii_id << 5) | MDIO_WRITE;
+ writel_relaxed(val, priv->base + MDIO_CMD_REG);
+out:
+ return ret;
+}
+
+static int hip04_mdio_reset(struct mii_bus *bus)
+{
+ int temp, i;
+
+ for (i = 0; i < PHY_MAX_ADDR; i++) {
+ hip04_mdio_write(bus, i, 22, 0);
+ temp = hip04_mdio_read(bus, i, MII_BMCR);
+ if (temp < 0)
+ continue;
+
+ temp |= BMCR_RESET;
+ if (hip04_mdio_write(bus, i, MII_BMCR, temp) < 0)
+ continue;
+ }
+
+ mdelay(500);
+ return 0;
+}
+
+static int hip04_mdio_probe(struct platform_device *pdev)
+{
+ struct resource *r;
+ struct mii_bus *bus;
+ struct hip04_mdio_priv *priv;
+ int ret;
+
+ bus = mdiobus_alloc_size(sizeof(struct hip04_mdio_priv));
+ if (!bus) {
+ dev_err(&pdev->dev, "Cannot allocate MDIO bus\n");
+ return -ENOMEM;
+ }
+
+ bus->name = "hip04_mdio_bus";
+ bus->read = hip04_mdio_read;
+ bus->write = hip04_mdio_write;
+ bus->reset = hip04_mdio_reset;
+ snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(&pdev->dev));
+ bus->parent = &pdev->dev;
+ priv = bus->priv;
+
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ priv->base = devm_ioremap_resource(&pdev->dev, r);
+ if (IS_ERR(priv->base)) {
+ ret = PTR_ERR(priv->base);
+ goto out_mdio;
+ }
+
+ ret = of_mdiobus_register(bus, pdev->dev.of_node);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Cannot register MDIO bus (%d)\n", ret);
+ goto out_mdio;
+ }
+
+ platform_set_drvdata(pdev, bus);
+
+ return 0;
+
+out_mdio:
+ mdiobus_free(bus);
+ return ret;
+}
+
+static int hip04_mdio_remove(struct platform_device *pdev)
+{
+ struct mii_bus *bus = platform_get_drvdata(pdev);
+
+ mdiobus_unregister(bus);
+ mdiobus_free(bus);
+
+ return 0;
+}
+
+static const struct of_device_id hip04_mdio_match[] = {
+ { .compatible = "hisilicon,hip04-mdio" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, hip04_mdio_match);
+
+static struct platform_driver hip04_mdio_driver = {
+ .probe = hip04_mdio_probe,
+ .remove = hip04_mdio_remove,
+ .driver = {
+ .name = "hip04-mdio",
+ .owner = THIS_MODULE,
+ .of_match_table = hip04_mdio_match,
+ },
+};
+
+module_platform_driver(hip04_mdio_driver);
+
+MODULE_DESCRIPTION("HISILICON P04 MDIO interface driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:hip04-mdio");
--
1.8.0
^ permalink raw reply related
* Re: [PATCH 3/3] x_tables: Factor out 16bit aligment ifname_compare()
From: Richard Weinberger @ 2015-01-12 8:18 UTC (permalink / raw)
To: David Miller
Cc: joe, coreteam, netfilter-devel, linux-kernel, netdev, bhutchings,
john.fastabend, herbert, vyasevic, jiri, vfalico, therbert,
edumazet, yoshfuji, jmorris, kuznet, kadlec, kaber, pablo, kay,
stephen
In-Reply-To: <20150111.215050.458358747958425038.davem@davemloft.net>
Am 12.01.2015 um 03:50 schrieb David Miller:
> From: Richard Weinberger <richard@nod.at>
> Date: Sun, 11 Jan 2015 22:42:37 +0100
>
>> Joe, I really don't care. This is the least significant
>> patch of the series.
>> I'll no longer waste my time with that.
>
> If you're not willing to fix stylistic issues now, then nobody should
> bother wasting their time on the high level issues of your patch.
>
> Just fix these things now rather than being difficult, this is a part
> of patch review that everyone has to do, not just you.
I apologize, it was not my intention to be difficult.
Please note that the stylistic issue is not a warning produced
by checkpatch.pl. If you and netfilter folks now prefer bool
for such string compare functions I'll happily address this in
v2 of my series.
Thanks,
//richard
^ permalink raw reply
* Re: [PATCH net-next RFC 5/5] net-timestamp: tx timestamping default mode flag
From: Richard Cochran @ 2015-01-12 8:26 UTC (permalink / raw)
To: Willem de Bruijn
Cc: Network Development, David Miller, Eric Dumazet, Andy Lutomirski
In-Reply-To: <CA+FuTSfDVbSCGfJDjY3kv=SuGT221OuYAFVxDvRqouNcrJbvYw@mail.gmail.com>
On Sun, Jan 11, 2015 at 08:49:00PM -0500, Willem de Bruijn wrote:
> Just so I understand: ptp has no use for the sw tstamps
> that would be generated with this flag, but is otherwise
> okay with enabling counters to order tx timestamps
> (OPT_ID) and disabling payload (OPT_TSONLY)?
Yes.
> In the documentation, I would like to strongly suggest all
> processes to enable these, even in absence of this default.
> because that is more robust wrt the sysctl (if merged).
Sounds good.
Thanks,
Richard
^ permalink raw reply
* [PATCH 1/5] net: atarilance: Remove obsolete IRQ_TYPE_PRIO
From: Geert Uytterhoeven @ 2015-01-12 8:40 UTC (permalink / raw)
To: David S. Miller, Jean-Christophe Plagniol-Villard, Tomi Valkeinen,
Jaroslav Kysela, Takashi Iwai
Cc: linux-m68k, linux-kernel, Geert Uytterhoeven, netdev
In-Reply-To: <1421052021-12560-1-git-send-email-geert@linux-m68k.org>
IRQ_TYPE_PRIO is no longer used by the Atari platform interrupt code
since commit 734085651c9b80aa ("[PATCH] m68k: convert atari irq code")
in v2.6.18-rc1, so drop it.
Note that its value has been reused for a different purpose
(IRQ_TYPE_EDGE_FALLING) since commit 6a6de9ef5850d063 ("[PATCH] genirq:
core") in v2.6.18-rc1.
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: netdev@vger.kernel.org
---
drivers/net/ethernet/amd/atarilance.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c
index e07ce5ff2d48bf93..b10964e8cb5469ce 100644
--- a/drivers/net/ethernet/amd/atarilance.c
+++ b/drivers/net/ethernet/amd/atarilance.c
@@ -553,8 +553,8 @@ static unsigned long __init lance_probe1( struct net_device *dev,
if (lp->cardtype == PAM_CARD ||
memaddr == (unsigned short *)0xffe00000) {
/* PAMs card and Riebl on ST use level 5 autovector */
- if (request_irq(IRQ_AUTO_5, lance_interrupt, IRQ_TYPE_PRIO,
- "PAM,Riebl-ST Ethernet", dev)) {
+ if (request_irq(IRQ_AUTO_5, lance_interrupt, 0,
+ "PAM,Riebl-ST Ethernet", dev)) {
printk( "Lance: request for irq %d failed\n", IRQ_AUTO_5 );
return 0;
}
@@ -567,8 +567,8 @@ static unsigned long __init lance_probe1( struct net_device *dev,
printk( "Lance: request for VME interrupt failed\n" );
return 0;
}
- if (request_irq(irq, lance_interrupt, IRQ_TYPE_PRIO,
- "Riebl-VME Ethernet", dev)) {
+ if (request_irq(irq, lance_interrupt, 0, "Riebl-VME Ethernet",
+ dev)) {
printk( "Lance: request for irq %u failed\n", irq );
return 0;
}
--
1.9.1
^ permalink raw reply related
* Re: [PATCH 3/3] x_tables: Factor out 16bit aligment ifname_compare()
From: Joe Perches @ 2015-01-12 8:40 UTC (permalink / raw)
To: Richard Weinberger
Cc: David Miller, coreteam, netfilter-devel, linux-kernel, netdev,
bhutchings, john.fastabend, herbert, vyasevic, jiri, vfalico,
therbert, edumazet, yoshfuji, jmorris, kuznet, kadlec, kaber,
pablo, kay, stephen
In-Reply-To: <54B3834D.6090602@nod.at>
On Mon, 2015-01-12 at 09:18 +0100, Richard Weinberger wrote:
> Am 12.01.2015 um 03:50 schrieb David Miller:
> > From: Richard Weinberger <richard@nod.at>
> > Date: Sun, 11 Jan 2015 22:42:37 +0100
> >
> >> Joe, I really don't care. This is the least significant
> >> patch of the series.
> >> I'll no longer waste my time with that.
> >
> > If you're not willing to fix stylistic issues now, then nobody should
> > bother wasting their time on the high level issues of your patch.
> >
> > Just fix these things now rather than being difficult, this is a part
> > of patch review that everyone has to do, not just you.
>
> I apologize, it was not my intention to be difficult.
No worries.
The unsigned long return is kind of odd with a
compare_<foo> name as those are generally, as Jan
mentioned, signed comparison style return values.
I'd probably use a different function name too
bool ifname_equal(const char *a, const char *b, const char *mask)
{
}
to try to make the return value more obvious too.
> If you and netfilter folks now prefer bool
> for such string compare functions I'll happily address this in
> v2 of my series.
Thanks
^ permalink raw reply
* Re: [PATCH net-next v11 3/3] net: hisilicon: new hip04 ethernet driver
From: Arnd Bergmann @ 2015-01-12 8:52 UTC (permalink / raw)
To: linux-arm-kernel
Cc: Ding Tianhong, robh+dt, davem, grant.likely, devicetree, linux,
sergei.shtylyov, eric.dumazet, netdev, xuwei5, zhangfei.gao
In-Reply-To: <1421049832-6224-4-git-send-email-dingtianhong@huawei.com>
On Monday 12 January 2015 16:03:52 Ding Tianhong wrote:
> Support Hisilicon hip04 ethernet driver, including 100M / 1000M controller.
> The controller has no tx done interrupt, reclaim xmitted buffer in the poll.
Looks great to me in the latest version. Dave, unless you have further
concerns, please apply.
> v11: Add ethtool support for tx coalecse getting and setting, the xmit_more
> is not supported for this patch, but I think it could work for hip04,
> will support it later after some tests for performance better.
>
> Here are some performance test results by ping and iperf(add tx_coalesce_frames/users),
> it looks that the performance and latency is more better by tx_coalesce_frames/usecs.
>
> - Before:
> $ ping 192.168.1.1 ...
> --- 192.168.1.1 ping statistics ---
> 24 packets transmitted, 24 received, 0% packet loss, time 22999ms
> rtt min/avg/max/mdev = 0.180/0.202/0.403/0.043 ms
>
> $ iperf -c 192.168.1.1 ...
> [ ID] Interval Transfer Bandwidth
> [ 3] 0.0- 1.0 sec 115 MBytes 945 Mbits/sec
Just to be sure: you were running the two tests simultaneously, right?
Arnd
^ permalink raw reply
* Re: [PATCH net] alx: fix alx_poll()
From: Johannes Berg @ 2015-01-12 8:55 UTC (permalink / raw)
To: Eric Dumazet
Cc: Oded Gabbay, David S. Miller, Eric Dumazet,
netdev@vger.kernel.org, Willem de Bruijn, Bridgman, John,
Elifaz, Dana
In-Reply-To: <1421001138.5947.100.camel@edumazet-glaptop2.roam.corp.google.com>
On Sun, 2015-01-11 at 18:32 +0000, Eric Dumazet wrote:
> Commit d75b1ade567f ("net: less interrupt masking in NAPI") uncovered
> wrong alx_poll() behavior.
[...]
Thanks Eric and Oded! :)
johannes
^ permalink raw reply
* Re: [PATCH net-next v11 3/3] net: hisilicon: new hip04 ethernet driver
From: Ding Tianhong @ 2015-01-12 9:04 UTC (permalink / raw)
To: Arnd Bergmann, linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
Cc: robh+dt-DgEjT+Ai2ygdnm+yROfE0A, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
grant.likely-QSEj5FYQhm4dnm+yROfE0A,
devicetree-u79uwXL29TY76Z2rM5mHXA, linux-lFZ/pmaqli7XmaaqVzeoHQ,
sergei.shtylyov-M4DtvfQ/ZS1MRgGoP+s0PdBPR1lH4CV8,
eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w,
netdev-u79uwXL29TY76Z2rM5mHXA, xuwei5-C8/M+/jPZTeaMJb+Lgu22Q,
zhangfei.gao-QSEj5FYQhm4dnm+yROfE0A
In-Reply-To: <19225293.OPozO1Cbk5@wuerfel>
On 2015/1/12 16:52, Arnd Bergmann wrote:
> On Monday 12 January 2015 16:03:52 Ding Tianhong wrote:
>> Support Hisilicon hip04 ethernet driver, including 100M / 1000M controller.
>> The controller has no tx done interrupt, reclaim xmitted buffer in the poll.
>
> Looks great to me in the latest version. Dave, unless you have further
> concerns, please apply.
>
>> v11: Add ethtool support for tx coalecse getting and setting, the xmit_more
>> is not supported for this patch, but I think it could work for hip04,
>> will support it later after some tests for performance better.
>>
>> Here are some performance test results by ping and iperf(add tx_coalesce_frames/users),
>> it looks that the performance and latency is more better by tx_coalesce_frames/usecs.
>>
>> - Before:
>> $ ping 192.168.1.1 ...
>> --- 192.168.1.1 ping statistics ---
>> 24 packets transmitted, 24 received, 0% packet loss, time 22999ms
>> rtt min/avg/max/mdev = 0.180/0.202/0.403/0.043 ms
>>
>> $ iperf -c 192.168.1.1 ...
>> [ ID] Interval Transfer Bandwidth
>> [ 3] 0.0- 1.0 sec 115 MBytes 945 Mbits/sec
>
> Just to be sure: you were running the two tests simultaneously, right?
>
> Arnd
>
Yes, sure, and try several times, record the more appropriate test results. :)
Ding
> .
>
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: Build regressions/improvements in v3.19-rc4
From: Geert Uytterhoeven @ 2015-01-12 9:09 UTC (permalink / raw)
To: linux-kernel@vger.kernel.org; +Cc: netdev@vger.kernel.org
In-Reply-To: <1421052212-13287-1-git-send-email-geert@linux-m68k.org>
On Mon, Jan 12, 2015 at 9:43 AM, Geert Uytterhoeven
<geert@linux-m68k.org> wrote:
> JFYI, when comparing v3.19-rc4[1] to v3.19-rc3[3], the summaries are:
> - build errors: +6/-67
Nothing serious, but
+ error: br_input.c: undefined reference to `arp_send': => .text+0x82e77)
+ error: br_input.c: undefined reference to `arp_tbl': => .text+0x82d8b)
i386-randconfig
> [1] http://kisskb.ellerman.id.au/kisskb/head/8294/ (255 out of 262 configs)
> [3] http://kisskb.ellerman.id.au/kisskb/head/8277/ (255 out of 262 configs)
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply
* [PATCH] Corrected the comment describing the ndo operations to reflect the actual prototype for couple of operations
From: B Viswanath @ 2015-01-12 9:16 UTC (permalink / raw)
To: netdev; +Cc: B Viswanath
Corrected the comment describing the ndo operations to
reflect the actual prototype for couple of operations
Signed-off-by: B Viswanath <marichika4@gmail.com>
---
include/linux/netdevice.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 679e6e9..7f794db 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -852,11 +852,11 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
* 3. Update dev->stats asynchronously and atomically, and define
* neither operation.
*
- * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16t vid);
+ * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid);
* If device support VLAN filtering this function is called when a
* VLAN id is registered.
*
- * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid);
+ * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid);
* If device support VLAN filtering this function is called when a
* VLAN id is unregistered.
*
--
2.1.0
^ permalink raw reply related
* [PATCH net-next] openvswitch: Introduce vport_route_lookup
From: Fan Du @ 2015-01-12 9:14 UTC (permalink / raw)
To: pshelar; +Cc: dev, netdev, fengyuleidian0615
Introduce vport_route_lookup to consolidate route lookup
shared by vxlan, gre, and geneve ports.
Signed-off-by: Fan Du <fan.du@intel.com>
---
net/openvswitch/vport-geneve.c | 11 +----------
net/openvswitch/vport-gre.c | 10 +---------
net/openvswitch/vport-vxlan.c | 10 +---------
net/openvswitch/vport.c | 20 ++++++++++++++++++++
net/openvswitch/vport.h | 6 ++++++
5 files changed, 29 insertions(+), 28 deletions(-)
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 484864d..8eef7f6 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -191,16 +191,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
}
tun_key = &tun_info->tunnel;
-
- /* Route lookup */
- memset(&fl, 0, sizeof(fl));
- fl.daddr = tun_key->ipv4_dst;
- fl.saddr = tun_key->ipv4_src;
- fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
- fl.flowi4_mark = skb->mark;
- fl.flowi4_proto = IPPROTO_UDP;
-
- rt = ip_route_output_key(net, &fl);
+ rt = vport_route_lookup(tun_key, skb, &fl, net, IPPROTO_UDP);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto error;
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index d4168c4..2f8fdbe 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -148,15 +148,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
}
tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
- /* Route lookup */
- memset(&fl, 0, sizeof(fl));
- fl.daddr = tun_key->ipv4_dst;
- fl.saddr = tun_key->ipv4_src;
- fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
- fl.flowi4_mark = skb->mark;
- fl.flowi4_proto = IPPROTO_GRE;
-
- rt = ip_route_output_key(net, &fl);
+ rt = vport_route_lookup(tun_key, skb, &fl, net, IPPROTO_GRE);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto err_free_skb;
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index d7c46b3..13a64ae 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -158,15 +158,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
}
tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
- /* Route lookup */
- memset(&fl, 0, sizeof(fl));
- fl.daddr = tun_key->ipv4_dst;
- fl.saddr = tun_key->ipv4_src;
- fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
- fl.flowi4_mark = skb->mark;
- fl.flowi4_proto = IPPROTO_UDP;
-
- rt = ip_route_output_key(net, &fl);
+ rt = vport_route_lookup(tun_key, skb, &fl, net, IPPROTO_UDP);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto error;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 2034c6d..32b0edf 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -633,3 +633,23 @@ int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
return vport->ops->get_egress_tun_info(vport, skb, info);
}
+
+struct rtable *vport_route_lookup(struct ovs_key_ipv4_tunnel *tun_key,
+ struct sk_buff *skb,
+ struct flowi4 *fl,
+ struct net *net,
+ u8 protocol)
+{
+ struct rtable *rt;
+
+ memset(fl, 0, sizeof(*fl));
+ fl->daddr = tun_key->ipv4_dst;
+ fl->saddr = tun_key->ipv4_src;
+ fl->flowi4_tos = RT_TOS(tun_key->ipv4_tos);
+ fl->flowi4_mark = skb->mark;
+ fl->flowi4_proto = protocol;
+
+ rt = ip_route_output_key(net, fl);
+ return rt;
+}
+EXPORT_SYMBOL_GPL(vport_route_lookup);
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 99c8e71..4487a97 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -68,6 +68,12 @@ int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
struct ovs_tunnel_info *info);
+struct rtable *vport_route_lookup(struct ovs_key_ipv4_tunnel *tun_key,
+ struct sk_buff *skb,
+ struct flowi4 *fl,
+ struct net *net,
+ u8 protocol);
+
/* The following definitions are for implementers of vport devices: */
struct vport_err_stats {
--
1.7.1
^ permalink raw reply related
* [PATCH net-next v3] tcp: avoid reducing cwnd when ACK+DSACK is received
From: Sébastien Barré @ 2015-01-12 9:30 UTC (permalink / raw)
To: David Miller
Cc: Sébastien Barré, Neal Cardwell, Yuchung Cheng,
Eric Dumazet, netdev, Gregory Detal, Nandita Dukkipati
With TLP, the peer may reply to a probe with an
ACK+D-SACK, with ack value set to tlp_high_seq. In the current code,
such ACK+DSACK will be missed and only at next, higher ack will the TLP
episode be considered done. Since the DSACK is not present anymore,
this will cost a cwnd reduction.
This patch ensures that this scenario does not cause a cwnd reduction, since
receiving an ACK+DSACK indicates that both the initial segment and the probe
have been received by the peer.
The following packetdrill test, from Neal Cardwell, validates this patch:
// Establish a connection.
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 6>
+.020 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
// Send 1 packet.
+0 write(4, ..., 1000) = 1000
+0 > P. 1:1001(1000) ack 1
// Loss probe retransmission.
// packets_out == 1 => schedule PTO in max(2*RTT, 1.5*RTT + 200ms)
// In this case, this means: 1.5*RTT + 200ms = 230ms
+.230 > P. 1:1001(1000) ack 1
+0 %{ assert tcpi_snd_cwnd == 10 }%
// Receiver ACKs at tlp_high_seq with a DSACK,
// indicating they received the original packet and probe.
+.020 < . 1:1(0) ack 1001 win 257 <sack 1:1001,nop,nop>
+0 %{ assert tcpi_snd_cwnd == 10 }%
// Send another packet.
+0 write(4, ..., 1000) = 1000
+0 > P. 1001:2001(1000) ack 1
// Receiver ACKs above tlp_high_seq, which should end the TLP episode
// if we haven't already. We should not reduce cwnd.
+.020 < . 1:1(0) ack 2001 win 257
+0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
Credits:
-Gregory helped in finding that tcp_process_tlp_ack was where the cwnd
got reduced in our MPTCP tests.
-Neal wrote the packetdrill test above
-Yuchung reworked the patch to make it more readable.
Cc: Gregory Detal <gregory.detal@uclouvain.be>
Cc: Nandita Dukkipati <nanditad@google.com>
Tested-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Yuchung Cheng <ycheng@google.com>
Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Sébastien Barré <sebastien.barre@uclouvain.be>
---
Changes:
-Added Neal's test in commit text
-applied Yuchung's changes to if conditions (for readability)
-removed delayed ack as main reason for triggering an ACK+DSACK, as
Eric mentioned that lost ack has higher chances to be the trigger.
Neal, Yuchung, Eric: I added Tested-by/Reviewed-by as I thought it was
appropriate, please correct if it is not. Thanks again for you help !
net/ipv4/tcp_input.c | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 075ab4d..71fb37c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3358,34 +3358,34 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
}
/* This routine deals with acks during a TLP episode.
+ * We mark the end of a TLP episode on receiving TLP dupack or when
+ * ack is after tlp_high_seq.
* Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
*/
static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
- bool is_tlp_dupack = (ack == tp->tlp_high_seq) &&
- !(flag & (FLAG_SND_UNA_ADVANCED |
- FLAG_NOT_DUP | FLAG_DATA_SACKED));
- /* Mark the end of TLP episode on receiving TLP dupack or when
- * ack is after tlp_high_seq.
- */
- if (is_tlp_dupack) {
- tp->tlp_high_seq = 0;
+ if (before(ack, tp->tlp_high_seq))
return;
- }
- if (after(ack, tp->tlp_high_seq)) {
+ if (flag & FLAG_DSACKING_ACK) {
+ /* This DSACK means original and TLP probe arrived; no loss */
+ tp->tlp_high_seq = 0;
+ } else if (after(ack, tp->tlp_high_seq)) {
+ /* ACK advances: there was a loss, so reduce cwnd. Reset
+ * tlp_high_seq in tcp_init_cwnd_reduction()
+ */
+ tcp_init_cwnd_reduction(sk);
+ tcp_set_ca_state(sk, TCP_CA_CWR);
+ tcp_end_cwnd_reduction(sk);
+ tcp_try_keep_open(sk);
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPLOSSPROBERECOVERY);
+ } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
+ FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
+ /* Pure dupack: original and TLP probe arrived; no loss */
tp->tlp_high_seq = 0;
- /* Don't reduce cwnd if DSACK arrives for TLP retrans. */
- if (!(flag & FLAG_DSACKING_ACK)) {
- tcp_init_cwnd_reduction(sk);
- tcp_set_ca_state(sk, TCP_CA_CWR);
- tcp_end_cwnd_reduction(sk);
- tcp_try_keep_open(sk);
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPLOSSPROBERECOVERY);
- }
}
}
--
tg: (44d84d7..) net-next/tlp-dsack-handling (depends on: net-next/master)
^ permalink raw reply related
* Re: [PATCH] brcmfmac: avoid duplicated suspend/resume operation
From: Arend van Spriel @ 2015-01-12 10:00 UTC (permalink / raw)
To: Fu, Zhonghui
Cc: brudley, Franky Lin, meuleman, kvalo, linville, pieterpg,
hdegoede, wens, linux-wireless, brcm80211-dev-list, netdev,
linux-kernel@vger.kernel.org
In-Reply-To: <54B36C88.3030609@linux.intel.com>
On 01/12/15 07:41, Fu, Zhonghui wrote:
> From 8685c3c2746b4275fc808d9db23c364b2f54b52a Mon Sep 17 00:00:00 2001
> From: Zhonghui Fu<zhonghui.fu@linux.intel.com>
> Date: Mon, 12 Jan 2015 14:25:46 +0800
> Subject: [PATCH] brcmfmac: avoid duplicated suspend/resume operation
>
> WiFi chip has 2 SDIO functions, and PM core will trigger
> twice suspend/resume operations for one WiFi chip to do
> the same things. This patch avoid this case.
>
> Acked-by: Arend van Spriel<arend@broadcom.com>
> Acked-by: Sergei Shtylyov<sergei.shtylyov@cogentembedded.com>
> Signed-off-by: Zhonghui Fu<zhonghui.fu@linux.intel.com>
This patch needs to be rebased.
Kalle,
Please drop this one.
Regards,
Arend
> ---
> drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c | 21 +++++++++++++++++----
> 1 files changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
> index 9880dae..8f71485 100644
> --- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
> +++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
> @@ -1070,7 +1070,7 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func,
> */
> if ((sdio_get_host_pm_caps(sdiodev->func[1])& MMC_PM_KEEP_POWER)&&
> ((sdio_get_host_pm_caps(sdiodev->func[1])& MMC_PM_WAKE_SDIO_IRQ) ||
> - (sdiodev->pdata&& sdiodev->pdata->oob_irq_supported)))
> + (sdiodev->pdata->oob_irq_supported)))
> bus_if->wowl_supported = true;
> #endif
>
> @@ -1139,11 +1139,17 @@ void brcmf_sdio_wowl_config(struct device *dev, bool enabled)
> static int brcmf_ops_sdio_suspend(struct device *dev)
> {
> struct brcmf_bus *bus_if = dev_get_drvdata(dev);
> - struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
> + struct brcmf_sdio_dev *sdiodev;
> mmc_pm_flag_t sdio_flags;
> + struct sdio_func *func = dev_to_sdio_func(dev);
>
> brcmf_dbg(SDIO, "Enter\n");
>
> + if (func->num == 2)
> + return 0;
> +
> + sdiodev = bus_if->bus_priv.sdio;
> +
> atomic_set(&sdiodev->suspend, true);
>
> if (sdiodev->wowl_enabled) {
> @@ -1164,10 +1170,17 @@ static int brcmf_ops_sdio_suspend(struct device *dev)
> static int brcmf_ops_sdio_resume(struct device *dev)
> {
> struct brcmf_bus *bus_if = dev_get_drvdata(dev);
> - struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
> + struct brcmf_sdio_dev *sdiodev;
> + struct sdio_func *func = dev_to_sdio_func(dev);
>
> brcmf_dbg(SDIO, "Enter\n");
> - if (sdiodev->pdata&& sdiodev->pdata->oob_irq_supported)
> +
> + if (func->num == 2)
> + return 0;
> +
> + sdiodev = bus_if->bus_priv.sdio;
> +
> + if (sdiodev->pdata->oob_irq_supported)
> disable_irq_wake(sdiodev->pdata->oob_irq_nr);
> brcmf_sdio_wd_timer(sdiodev->bus, BRCMF_WD_POLL_MS);
> atomic_set(&sdiodev->suspend, false);
> -- 1.7.1
>
^ permalink raw reply
* [PATCH] moving from pci to dma
From: Quentin Lambert @ 2015-01-12 10:02 UTC (permalink / raw)
To: Chas Williams; +Cc: linux-atm-general, netdev, linux-kernel
This patch replaces the references to the deprecated pci api with the
corresponding dma api.
To ensure that it was possible to access the dev field of pci_dev without
checking for nullity we looked for similar access in the execution
flow.
The most straight forward are "&id->dev" and "pci_get_drvdata(id)" where
id is the variable whose type is pci_dev.
We also found "pci_enable_device(id)" to be satisfying since the call
accesses other field without checking for nullity.
Quentin Lambert (1):
atm: remove deprecated use of pci api
drivers/atm/eni.c | 8 +++++---
drivers/atm/he.c | 2 +-
drivers/atm/lanai.c | 9 +++++----
drivers/atm/nicstar.c | 4 ++--
drivers/atm/solos-pci.c | 2 +-
drivers/atm/zatm.c | 8 +++++---
6 files changed, 19 insertions(+), 14 deletions(-)
--
1.9.1
^ permalink raw reply
* [PATCH] atm: remove deprecated use of pci api
From: Quentin Lambert @ 2015-01-12 10:03 UTC (permalink / raw)
To: Chas Williams; +Cc: linux-atm-general, netdev, linux-kernel
Replace occurences of the pci api by appropriate call to the dma api.
A simplified version of the semantic patch that finds this problem is as
follows: (http://coccinelle.lip6.fr)
@deprecated@
idexpression id;
position p;
@@
(
pci_dma_supported@p ( id, ...)
|
pci_alloc_consistent@p ( id, ...)
)
@bad1@
idexpression id;
position deprecated.p;
@@
...when != &id->dev
when != pci_get_drvdata ( id )
when != pci_enable_device ( id )
(
pci_dma_supported@p ( id, ...)
|
pci_alloc_consistent@p ( id, ...)
)
@depends on !bad1@
idexpression id;
expression direction;
position deprecated.p;
@@
(
- pci_dma_supported@p ( id,
+ dma_supported ( &id->dev,
...
+ , GFP_ATOMIC
)
|
- pci_alloc_consistent@p ( id,
+ dma_alloc_coherent ( &id->dev,
...
+ , GFP_ATOMIC
)
)
Signed-off-by: Quentin Lambert <lambert.quentin@gmail.com>
---
drivers/atm/eni.c | 8 +++++---
drivers/atm/he.c | 2 +-
drivers/atm/lanai.c | 9 +++++----
drivers/atm/nicstar.c | 4 ++--
drivers/atm/solos-pci.c | 2 +-
drivers/atm/zatm.c | 8 +++++---
6 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index c7fab3e..a128020 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -2246,7 +2246,8 @@ static int eni_init_one(struct pci_dev *pci_dev,
goto err_disable;
zero = &eni_dev->zero;
- zero->addr = pci_alloc_consistent(pci_dev, ENI_ZEROES_SIZE, &zero->dma);
+ zero->addr = dma_alloc_coherent(&pci_dev->dev, ENI_ZEROES_SIZE,
+ &zero->dma, GFP_ATOMIC);
if (!zero->addr)
goto err_kfree;
@@ -2277,7 +2278,8 @@ err_eni_release:
err_unregister:
atm_dev_deregister(dev);
err_free_consistent:
- pci_free_consistent(pci_dev, ENI_ZEROES_SIZE, zero->addr, zero->dma);
+ dma_free_coherent(&pci_dev->dev, ENI_ZEROES_SIZE, zero->addr,
+ zero->dma);
err_kfree:
kfree(eni_dev);
err_disable:
@@ -2302,7 +2304,7 @@ static void eni_remove_one(struct pci_dev *pdev)
eni_do_release(dev);
atm_dev_deregister(dev);
- pci_free_consistent(pdev, ENI_ZEROES_SIZE, zero->addr, zero->dma);
+ dma_free_coherent(&pdev->dev, ENI_ZEROES_SIZE, zero->addr, zero->dma);
kfree(ed);
pci_disable_device(pdev);
}
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index c39702b..69a2598 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -359,7 +359,7 @@ static int he_init_one(struct pci_dev *pci_dev,
if (pci_enable_device(pci_dev))
return -EIO;
- if (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32)) != 0) {
+ if (dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(32)) != 0) {
printk(KERN_WARNING "he: no suitable dma available\n");
err = -EIO;
goto init_one_failure;
diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c
index 93eaf8d..70fe734 100644
--- a/drivers/atm/lanai.c
+++ b/drivers/atm/lanai.c
@@ -346,7 +346,8 @@ static void lanai_buf_allocate(struct lanai_buffer *buf,
* everything, but the way the lanai uses DMA memory would
* make that a terrific pain. This is much simpler.
*/
- buf->start = pci_alloc_consistent(pci, size, &buf->dmaaddr);
+ buf->start = dma_alloc_coherent(&pci->dev, size, &buf->dmaaddr,
+ GFP_ATOMIC);
if (buf->start != NULL) { /* Success */
/* Lanai requires 256-byte alignment of DMA bufs */
APRINTK((buf->dmaaddr & ~0xFFFFFF00) == 0,
@@ -372,7 +373,7 @@ static void lanai_buf_deallocate(struct lanai_buffer *buf,
struct pci_dev *pci)
{
if (buf->start != NULL) {
- pci_free_consistent(pci, lanai_buf_size(buf),
+ dma_free_coherent(&pci->dev, lanai_buf_size(buf),
buf->start, buf->dmaaddr);
buf->start = buf->end = buf->ptr = NULL;
}
@@ -1953,12 +1954,12 @@ static int lanai_pci_start(struct lanai_dev *lanai)
return -ENXIO;
}
pci_set_master(pci);
- if (pci_set_dma_mask(pci, DMA_BIT_MASK(32)) != 0) {
+ if (dma_set_mask(&pci->dev, DMA_BIT_MASK(32)) != 0) {
printk(KERN_WARNING DEV_LABEL
"(itf %d): No suitable DMA available.\n", lanai->number);
return -EBUSY;
}
- if (pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(32)) != 0) {
+ if (dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(32)) != 0) {
printk(KERN_WARNING DEV_LABEL
"(itf %d): No suitable DMA available.\n", lanai->number);
return -EBUSY;
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index 9988ac9..aabb528 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -370,8 +370,8 @@ static int ns_init_card(int i, struct pci_dev *pcidev)
ns_init_card_error(card, error);
return error;
}
- if ((pci_set_dma_mask(pcidev, DMA_BIT_MASK(32)) != 0) ||
- (pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(32)) != 0)) {
+ if ((dma_set_mask(&pcidev->dev, DMA_BIT_MASK(32)) != 0) ||
+ (dma_set_coherent_mask(&pcidev->dev, DMA_BIT_MASK(32)) != 0)) {
printk(KERN_WARNING
"nicstar%d: No suitable DMA available.\n", i);
error = 2;
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 21b0bc6..48531b8 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -1210,7 +1210,7 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id)
goto out;
}
- err = pci_set_dma_mask(dev, DMA_BIT_MASK(32));
+ err = dma_set_mask(&dev->dev, DMA_BIT_MASK(32));
if (err) {
dev_warn(&dev->dev, "Failed to set 32-bit DMA mask\n");
goto out;
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index 969c3c2..b6456b1 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -1306,7 +1306,8 @@ static int zatm_start(struct atm_dev *dev)
if (!mbx_entries[i])
continue;
- mbx = pci_alloc_consistent(pdev, 2*MBX_SIZE(i), &mbx_dma);
+ mbx = dma_alloc_coherent(&pdev->dev, 2*MBX_SIZE(i), &mbx_dma,
+ GFP_ATOMIC);
if (!mbx) {
error = -ENOMEM;
goto out;
@@ -1318,7 +1319,8 @@ static int zatm_start(struct atm_dev *dev)
if (((unsigned long)mbx ^ mbx_dma) & 0xffff) {
printk(KERN_ERR DEV_LABEL "(itf %d): system "
"bus incompatible with driver\n", dev->number);
- pci_free_consistent(pdev, 2*MBX_SIZE(i), mbx, mbx_dma);
+ dma_free_coherent(&pdev->dev, 2*MBX_SIZE(i), mbx,
+ mbx_dma);
error = -ENODEV;
goto out;
}
@@ -1354,7 +1356,7 @@ out_tx:
kfree(zatm_dev->tx_map);
out:
while (i-- > 0) {
- pci_free_consistent(pdev, 2*MBX_SIZE(i),
+ dma_free_coherent(&pdev->dev, 2*MBX_SIZE(i),
(void *)zatm_dev->mbx_start[i],
zatm_dev->mbx_dma[i]);
}
--
1.9.1
^ permalink raw reply related
* Re: [PATCH v4 4/4] can: kvaser_usb: Retry first bulk transfer on -ETIMEDOUT
From: Ahmed S. Darwish @ 2015-01-12 10:14 UTC (permalink / raw)
To: Marc Kleine-Budde
Cc: Olivier Sobrie, Oliver Hartkopp, Wolfgang Grandegger,
Greg Kroah-Hartman, Linux-USB, Linux-CAN, netdev, LKML
In-Reply-To: <54B2E23E.1090108@pengutronix.de>
On Sun, Jan 11, 2015 at 09:51:10PM +0100, Marc Kleine-Budde wrote:
> On 01/11/2015 09:45 PM, Ahmed S. Darwish wrote:
> > From: Ahmed S. Darwish <ahmed.darwish@valeo.com>
> >
> > (This is a draft patch, I'm not sure if this fixes the USB
> > bug or only its psymptom. Feedback from the linux-usb folks
> > is really appreciated.)
> >
> > When plugging the Kvaser USB/CAN dongle the first time, everything
> > works as expected and all of the transfers from and to the USB
> > device succeeds.
> >
> > Meanwhile, after unplugging the device and plugging it again, the
> > first bulk transfer _always_ returns an -ETIMEDOUT. The following
> > behaviour was observied:
> >
> > - Setting higher timeout values for the first bulk transfer never
> > solved the issue.
> >
> > - Unloading, then loading, our kvaser_usb module in question
> > __always__ solved the issue.
> >
> > - Checking first bulk transfer status, and retry the transfer
> > again in case of an -ETIMEDOUT also __always__ solved the issue.
> > This is what the patch below does.
> >
> > - In the testing done so far, this issue appears only on laptops
> > but never on PCs (possibly power related?)
> >
> > Signed-off-by: Ahmed S. Darwish <ahmed.darwish@valeo.com>
>
> Does this patch apply apply between 3 and 4? If not, please re-arrange
> the series. As this is a bug fix, patches 1, 2 and 4 will go via
> net/master, 3 will go via net-next/master.
>
Since no one complained earlier, I guess this issue only affects
USBCAN devices. That's why I've based it above patch #3: adding
USBCAN hardware support.
Nonetheless, it won't do any harm for the current Leaf-only
driver. So _if_ this is the correct fix, I will update the commit
log, refactor the check into a 'do { } while()' loop, and then
base it above the Leaf-only net/master fixes on patch #1, and #2.
Any feedback on the USB side of things?
Thanks,
Darwish
^ permalink raw reply
* [patch net-next] net: sched: fix skb->protocol use in case of accelerated vlan path
From: Jiri Pirko @ 2015-01-12 10:19 UTC (permalink / raw)
To: netdev; +Cc: davem, jhs
tc code implicitly considers skb->protocol even in case of accelerated
vlan paths and expects vlan protocol type here. However, on rx path,
if the vlan header was already stripped, skb->protocol contains value
of next header. Similar situation is on tx path.
So for skbs that use skb->vlan_tci for tagging, use skb->vlan_proto instead.
Reported-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
Note that this is present since vlan accel was introduced, pre-git times.
Please consider this for stable.
include/net/pkt_sched.h | 12 ++++++++++++
net/sched/act_csum.c | 2 +-
net/sched/cls_flow.c | 8 ++++----
net/sched/em_ipset.c | 2 +-
net/sched/em_meta.c | 2 +-
net/sched/sch_api.c | 2 +-
net/sched/sch_dsmark.c | 6 +++---
net/sched/sch_teql.c | 4 ++--
8 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 27a3383..cd590f7 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -3,6 +3,7 @@
#include <linux/jiffies.h>
#include <linux/ktime.h>
+#include <linux/if_vlan.h>
#include <net/sch_generic.h>
struct qdisc_walker {
@@ -114,6 +115,17 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res);
+static inline __be16 tc_skb_protocol(struct sk_buff *skb)
+{
+ /* We need to take extra care in case the skb came via
+ * vlan accelerated path. In that case, use skb->vlan_proto
+ * as the original vlan header was already stripped.
+ */
+ if (vlan_tx_tag_present(skb))
+ return skb->vlan_proto;
+ return skb->protocol;
+}
+
/* Calculate maximal size of packet seen by hard_start_xmit
routine of this device.
*/
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index edbf40d..4cd5cf1 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -509,7 +509,7 @@ static int tcf_csum(struct sk_buff *skb,
if (unlikely(action == TC_ACT_SHOT))
goto drop;
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case cpu_to_be16(ETH_P_IP):
if (!tcf_csum_ipv4(skb, update_flags))
goto drop;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 15d68f2..4614103 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -77,7 +77,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
if (flow->dst)
return ntohl(flow->dst);
- return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
+ return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
@@ -98,7 +98,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys
if (flow->ports)
return ntohs(flow->port16[1]);
- return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
+ return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
static u32 flow_get_iif(const struct sk_buff *skb)
@@ -144,7 +144,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, src.u3.ip));
case htons(ETH_P_IPV6):
@@ -156,7 +156,7 @@ fallback:
static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, dst.u3.ip));
case htons(ETH_P_IPV6):
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 5b4a4ef..a3d79c8 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
struct net_device *dev, *indev = NULL;
int ret, network_offset;
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
acpar.family = NFPROTO_IPV4;
if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index c8f8c39..2159981 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -197,7 +197,7 @@ META_COLLECTOR(int_priority)
META_COLLECTOR(int_protocol)
{
/* Let userspace take care of the byte ordering */
- dst->value = skb->protocol;
+ dst->value = tc_skb_protocol(skb);
}
META_COLLECTOR(int_pkttype)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 76f402e..243b7d1 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1807,7 +1807,7 @@ done:
int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- __be16 protocol = skb->protocol;
+ __be16 protocol = tc_skb_protocol(skb);
int err;
for (; tp; tp = rcu_dereference_bh(tp->next)) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 227114f..66700a6 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -203,7 +203,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
if (p->set_tc_index) {
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
if (skb_cow_head(skb, sizeof(struct iphdr)))
goto drop;
@@ -289,7 +289,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
index = skb->tc_index & (p->indices - 1);
pr_debug("index %d->%d\n", skb->tc_index, index);
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
p->value[index]);
@@ -306,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
*/
if (p->mask[index] != 0xff || p->value[index])
pr_warn("%s: unsupported protocol %d\n",
- __func__, ntohs(skb->protocol));
+ __func__, ntohs(tc_skb_protocol(skb)));
break;
}
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 6ada423..2ad0c40 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -249,8 +249,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
char haddr[MAX_ADDR_LEN];
neigh_ha_snapshot(haddr, n, dev);
- err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
- NULL, skb->len);
+ err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
+ haddr, NULL, skb->len);
if (err < 0)
err = -EINVAL;
--
1.9.3
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox