* [PATCH RFC v3 09/12] vti4: Use the on xfrm_lookup returned dst_entry directly
From: Steffen Klassert @ 2014-01-27 10:29 UTC (permalink / raw)
To: netdev; +Cc: Steffen Klassert, Christophe Gouault, Saurabh Mohan
In-Reply-To: <1390818577-19589-1-git-send-email-steffen.klassert@secunet.com>
We need to be protocol family indepenent to support
inter addresss family tunneling with vti. So use a
dst_entry instead of the ipv4 rtable in vti_tunnel_xmit.
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv4/ip_vti.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1432bec..b8d6184 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -111,7 +111,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct rtable *rt; /* Route to the other host */
+ struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev; /* Device to other host */
struct flowi fl;
int err;
@@ -123,14 +123,14 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb->mark = be32_to_cpu(tunnel->parms.o_key);
xfrm_decode_session(skb, &fl, AF_INET);
- if (!skb_dst(skb)) {
+ if (!dst) {
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
}
- dst_hold(skb_dst(skb));
- rt = (struct rtable *)xfrm_lookup(tunnel->net, skb_dst(skb), &fl, NULL, 0);
- if (IS_ERR(rt)) {
+ dst_hold(dst);
+ dst = xfrm_lookup(tunnel->net, dst, &fl, NULL, 0);
+ if (IS_ERR(dst)) {
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
}
@@ -138,16 +138,16 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
/* if there is no transform then this tunnel is not functional.
* Or if the xfrm is not mode tunnel.
*/
- if (!rt->dst.xfrm ||
- rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) {
+ if (!dst->xfrm ||
+ dst->xfrm->props.mode != XFRM_MODE_TUNNEL) {
dev->stats.tx_carrier_errors++;
- ip_rt_put(rt);
+ dst_release(dst);
goto tx_error_icmp;
}
- tdev = rt->dst.dev;
+ tdev = dst->dev;
if (tdev == dev) {
- ip_rt_put(rt);
+ dst_release(dst);
dev->stats.collisions++;
goto tx_error;
}
@@ -163,7 +163,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
- skb_dst_set(skb, &rt->dst);
+ skb_dst_set(skb, dst);
skb->dev = skb_dst(skb)->dev;
err = dst_output(skb);
--
1.7.9.5
^ permalink raw reply related
* [PATCH RFC v3 07/12] vti: Update the ipv4 side to use it's own receive hook.
From: Steffen Klassert @ 2014-01-27 10:29 UTC (permalink / raw)
To: netdev; +Cc: Steffen Klassert, Christophe Gouault, Saurabh Mohan
In-Reply-To: <1390818577-19589-1-git-send-email-steffen.klassert@secunet.com>
With this patch, vti uses the IPsec protocol multiplexer to
register it's own receive side hooks for ESP, AH and IPCOMP.
Vti now does the following on receive side:
1. Do an input policy check for the IPsec packet we received.
This is required because this packet could be already
prosecces by IPsec, so an inbuond policy check is needed.
2. Mark the packet with the i_key. The policy and the state
must match this key now. Policy and state belong to the outer
namespace and policy enforcement is done at the further layers.
3. Call the generic xfrm layer to do decryption and decapsulation.
4. Wait for a callback from the xfrm layer to properly clean the
skb to not leak informations on namespace and to update the
device statistics.
On transmit side:
1. Mark the packet with the o_key. The policy and the state
must match this key now.
2. Do a xfrm_lookup on the original packet with the mark applied.
3. Check if we got an IPsec route.
4. Clean the skb to not leak informations on namespace
transitions.
5. Attach the dst_enty we got from the xfrm_lookup to the skb.
6. Call dst_output to do the IPsec processing.
7. Do the device statistics.
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv4/ip_vti.c | 212 +++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 166 insertions(+), 46 deletions(-)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 48eafae..1432bec 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -49,7 +49,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly;
static int vti_net_id __read_mostly;
static int vti_tunnel_init(struct net_device *dev);
-/* We dont digest the packet therefore let the packet pass */
static int vti_rcv(struct sk_buff *skb)
{
struct ip_tunnel *tunnel;
@@ -60,66 +59,82 @@ static int vti_rcv(struct sk_buff *skb)
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
if (tunnel != NULL) {
- struct pcpu_sw_netstats *tstats;
- u32 oldmark = skb->mark;
- int ret;
-
-
- /* temporarily mark the skb with the tunnel o_key, to
- * only match policies with this mark.
- */
- skb->mark = be32_to_cpu(tunnel->parms.o_key);
- ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb);
- skb->mark = oldmark;
- if (!ret)
- return -1;
-
- tstats = this_cpu_ptr(tunnel->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
-
- secpath_reset(skb);
- skb->dev = tunnel->dev;
- return 1;
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
+ skb->mark = be32_to_cpu(tunnel->parms.i_key);
+
+ return xfrm4_rcv(skb);
}
return -1;
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int vti_rcv_cb(struct sk_buff *skb, int err)
+{
+ struct net_device *dev;
+ struct pcpu_sw_netstats *tstats;
+ struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
+
+ if (!tunnel)
+ return -1;
+
+ dev = tunnel->dev;
+
+ if (err) {
+ dev->stats.rx_errors++;
+ dev->stats.rx_dropped++;
+
+ return 0;
+ }
+
+ skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev)));
+ skb->dev = dev;
+
+ tstats = this_cpu_ptr(dev->tstats);
+
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
+
+ return 0;
}
/* This function assumes it is being called from dev_queue_xmit()
* and that skb is filled properly by that function.
*/
-
static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct iphdr *tiph = &tunnel->parms.iph;
- u8 tos;
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
- struct iphdr *old_iph = ip_hdr(skb);
- __be32 dst = tiph->daddr;
- struct flowi4 fl4;
+ struct flowi fl;
int err;
if (skb->protocol != htons(ETH_P_IP))
goto tx_error;
- tos = old_iph->tos;
+ memset(&fl, 0, sizeof(fl));
+ skb->mark = be32_to_cpu(tunnel->parms.o_key);
+ xfrm_decode_session(skb, &fl, AF_INET);
+
+ if (!skb_dst(skb)) {
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
- memset(&fl4, 0, sizeof(fl4));
- flowi4_init_output(&fl4, tunnel->parms.link,
- be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos),
- RT_SCOPE_UNIVERSE,
- IPPROTO_IPIP, 0,
- dst, tiph->saddr, 0, 0);
- rt = ip_route_output_key(dev_net(dev), &fl4);
+ dst_hold(skb_dst(skb));
+ rt = (struct rtable *)xfrm_lookup(tunnel->net, skb_dst(skb), &fl, NULL, 0);
if (IS_ERR(rt)) {
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
}
+
/* if there is no transform then this tunnel is not functional.
* Or if the xfrm is not mode tunnel.
*/
@@ -147,9 +162,8 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
}
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- skb_dst_drop(skb);
+ skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
skb_dst_set(skb, &rt->dst);
- nf_reset(skb);
skb->dev = skb_dst(skb)->dev;
err = dst_output(skb);
@@ -166,6 +180,65 @@ tx_error:
return NETDEV_TX_OK;
}
+static int vti4_err(struct sk_buff *skb, u32 info)
+{
+ __be32 spi;
+ struct xfrm_state *x;
+ struct ip_tunnel *tunnel;
+ struct ip_esp_hdr *esph;
+ struct ip_auth_hdr *ah ;
+ struct ip_comp_hdr *ipch;
+ struct net *net = dev_net(skb->dev);
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
+ int protocol = iph->protocol;
+ struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
+
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ iph->daddr, iph->saddr, 0);
+ if (!tunnel)
+ return -1;
+
+ switch (protocol) {
+ case IPPROTO_ESP:
+ esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
+ spi = esph->spi;
+ break;
+ case IPPROTO_AH:
+ ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
+ spi = ah->spi;
+ break;
+ case IPPROTO_COMP:
+ ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
+ spi = htonl(ntohs(ipch->cpi));
+ break;
+ default:
+ return 0;
+ }
+
+ switch (icmp_hdr(skb)->type) {
+ case ICMP_DEST_UNREACH:
+ if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ return 0;
+ case ICMP_REDIRECT:
+ break;
+ default:
+ return 0;
+ }
+
+ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ spi, protocol, AF_INET);
+ if (!x)
+ return 0;
+
+ if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+ ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
+ else
+ ipv4_redirect(skb, net, 0, 0, protocol, 0);
+ xfrm_state_put(x);
+
+ return 0;
+}
+
static int
vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -181,12 +254,13 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EINVAL;
}
+ p.i_flags |= VTI_ISVTI;
err = ip_tunnel_ioctl(dev, &p, cmd);
if (err)
return err;
if (cmd != SIOCDELTUNNEL) {
- p.i_flags |= GRE_KEY | VTI_ISVTI;
+ p.i_flags |= GRE_KEY;
p.o_flags |= GRE_KEY;
}
@@ -241,9 +315,25 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev)
iph->ihl = 5;
}
-static struct xfrm_tunnel_notifier vti_handler __read_mostly = {
+static struct xfrm4_protocol vti_esp4_protocol __read_mostly = {
+ .handler = vti_rcv,
+ .cb_handler = vti_rcv_cb,
+ .err_handler = vti4_err,
+ .priority = 100,
+};
+
+static struct xfrm4_protocol vti_ah4_protocol __read_mostly = {
+ .handler = vti_rcv,
+ .cb_handler = vti_rcv_cb,
+ .err_handler = vti4_err,
+ .priority = 100,
+};
+
+static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
.handler = vti_rcv,
- .priority = 1,
+ .cb_handler = vti_rcv_cb,
+ .err_handler = vti4_err,
+ .priority = 100,
};
static int __net_init vti_init_net(struct net *net)
@@ -287,6 +377,8 @@ static void vti_netlink_parms(struct nlattr *data[],
if (!data)
return;
+ parms->i_flags = VTI_ISVTI;
+
if (data[IFLA_VTI_LINK])
parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
@@ -382,10 +474,31 @@ static int __init vti_init(void)
err = register_pernet_device(&vti_net_ops);
if (err < 0)
return err;
- err = xfrm4_mode_tunnel_input_register(&vti_handler);
+ err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP);
+ if (err < 0) {
+ unregister_pernet_device(&vti_net_ops);
+ pr_info("vti init: can't register tunnel\n");
+
+ return err;
+ }
+
+ err = xfrm4_protocol_register(&vti_ah4_protocol, IPPROTO_AH);
+ if (err < 0) {
+ xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
+ unregister_pernet_device(&vti_net_ops);
+ pr_info("vti init: can't register tunnel\n");
+
+ return err;
+ }
+
+ err = xfrm4_protocol_register(&vti_ipcomp4_protocol, IPPROTO_COMP);
if (err < 0) {
+ xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
+ xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti_net_ops);
pr_info("vti init: can't register tunnel\n");
+
+ return err;
}
err = rtnl_link_register(&vti_link_ops);
@@ -395,7 +508,9 @@ static int __init vti_init(void)
return err;
rtnl_link_failed:
- xfrm4_mode_tunnel_input_deregister(&vti_handler);
+ xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
+ xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
+ xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti_net_ops);
return err;
}
@@ -403,8 +518,13 @@ rtnl_link_failed:
static void __exit vti_fini(void)
{
rtnl_link_unregister(&vti_link_ops);
- if (xfrm4_mode_tunnel_input_deregister(&vti_handler))
+ if (xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP))
+ pr_info("vti close: can't deregister tunnel\n");
+ if (xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH))
pr_info("vti close: can't deregister tunnel\n");
+ if (xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP))
+ pr_info("vti close: can't deregister tunnel\n");
+
unregister_pernet_device(&vti_net_ops);
}
--
1.7.9.5
^ permalink raw reply related
* [PATCH RFC v3 10/12] vti4: Support inter address family tunneling.
From: Steffen Klassert @ 2014-01-27 10:29 UTC (permalink / raw)
To: netdev; +Cc: Steffen Klassert, Christophe Gouault, Saurabh Mohan
In-Reply-To: <1390818577-19589-1-git-send-email-steffen.klassert@secunet.com>
With this patch we can tunnel ipv6 traffic via a vti4
interface. A vti4 interface can now have an ipv6 address
and ipv6 traffic can be routed via a vti4 interface.
The resulting traffic is xfrm transformed and tunneled
throuhg ipv4 if matching IPsec policies and states are
present.
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv4/ip_vti.c | 48 ++++++++++++++++++++++++++++++++++--------------
1 file changed, 34 insertions(+), 14 deletions(-)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index b8d6184..1708fc1 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -34,6 +34,7 @@
#include <linux/init.h>
#include <linux/netfilter_ipv4.h>
#include <linux/if_ether.h>
+#include <linux/icmpv6.h>
#include <net/sock.h>
#include <net/ip.h>
@@ -105,31 +106,21 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
return 0;
}
-/* This function assumes it is being called from dev_queue_xmit()
- * and that skb is filled properly by that function.
- */
-static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
+ struct flowi *fl)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev; /* Device to other host */
- struct flowi fl;
int err;
- if (skb->protocol != htons(ETH_P_IP))
- goto tx_error;
-
- memset(&fl, 0, sizeof(fl));
- skb->mark = be32_to_cpu(tunnel->parms.o_key);
- xfrm_decode_session(skb, &fl, AF_INET);
-
if (!dst) {
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
}
dst_hold(dst);
- dst = xfrm_lookup(tunnel->net, dst, &fl, NULL, 0);
+ dst = xfrm_lookup(tunnel->net, dst, fl, NULL, 0);
if (IS_ERR(dst)) {
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
@@ -161,7 +152,6 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
tunnel->err_count = 0;
}
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
skb_dst_set(skb, dst);
skb->dev = skb_dst(skb)->dev;
@@ -180,6 +170,36 @@ tx_error:
return NETDEV_TX_OK;
}
+/* This function assumes it is being called from dev_queue_xmit()
+ * and that skb is filled properly by that function.
+ */
+static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+
+ skb->mark = be32_to_cpu(tunnel->parms.o_key);
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ xfrm_decode_session(skb, &fl, AF_INET);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ break;
+ case htons(ETH_P_IPV6):
+ xfrm_decode_session(skb, &fl, AF_INET6);
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ break;
+ default:
+ dev->stats.tx_errors++;
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
+ return vti_xmit(skb, dev, &fl);
+}
+
static int vti4_err(struct sk_buff *skb, u32 info)
{
__be32 spi;
--
1.7.9.5
^ permalink raw reply related
* [PATCH RFC v3 11/12] vti4: Check the tunnel endpoints of the xfrm state and the vti interface
From: Steffen Klassert @ 2014-01-27 10:29 UTC (permalink / raw)
To: netdev; +Cc: Steffen Klassert, Christophe Gouault, Saurabh Mohan
In-Reply-To: <1390818577-19589-1-git-send-email-steffen.klassert@secunet.com>
The tunnel endpoints of the xfrm_state we got from the xfrm_lookup
must match the tunnel endpoints of the vti interface. This patch
ensures this matching.
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv4/ip_vti.c | 29 ++++++++++++++++++++++++-----
1 file changed, 24 insertions(+), 5 deletions(-)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1708fc1..1415c4a 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -106,10 +106,32 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
return 0;
}
+static bool vti_state_check(const struct xfrm_state *x, __be32 dst, __be32 src)
+{
+ xfrm_address_t *daddr = (xfrm_address_t *)&dst;
+ xfrm_address_t *saddr = (xfrm_address_t *)&src;
+
+ /* if there is no transform then this tunnel is not functional.
+ * Or if the xfrm is not mode tunnel.
+ */
+ if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
+ x->props.family != AF_INET)
+ return false;
+
+ if (!dst)
+ return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET);
+
+ if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET))
+ return false;
+
+ return true;
+}
+
static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
struct flowi *fl)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct ip_tunnel_parm *parms = &tunnel->parms;
struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev; /* Device to other host */
int err;
@@ -126,15 +148,12 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error_icmp;
}
- /* if there is no transform then this tunnel is not functional.
- * Or if the xfrm is not mode tunnel.
- */
- if (!dst->xfrm ||
- dst->xfrm->props.mode != XFRM_MODE_TUNNEL) {
+ if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) {
dev->stats.tx_carrier_errors++;
dst_release(dst);
goto tx_error_icmp;
}
+
tdev = dst->dev;
if (tdev == dev) {
--
1.7.9.5
^ permalink raw reply related
* [PATCH RFC v3 12/12] vti4: Enable namespace changing
From: Steffen Klassert @ 2014-01-27 10:29 UTC (permalink / raw)
To: netdev; +Cc: Steffen Klassert, Christophe Gouault, Saurabh Mohan
In-Reply-To: <1390818577-19589-1-git-send-email-steffen.klassert@secunet.com>
vti4 is now fully namespace aware, so allow namespace changing
for vti devices
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv4/ip_vti.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1415c4a..7b1542c 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -337,7 +337,6 @@ static int vti_tunnel_init(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->iflink = 0;
dev->addr_len = 4;
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->features |= NETIF_F_LLTX;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
--
1.7.9.5
^ permalink raw reply related
* Re: [PATCH net] net: hyperv: initialize link status correctly
From: Ben Hutchings @ 2014-01-27 10:30 UTC (permalink / raw)
To: Jason Wang; +Cc: netdev, haiyangz, linux-kernel, devel, David Miller
In-Reply-To: <52E634DA.3040200@redhat.com>
[-- Attachment #1.1: Type: text/plain, Size: 1785 bytes --]
On Mon, 2014-01-27 at 18:28 +0800, Jason Wang wrote:
> On 01/27/2014 06:22 PM, Ben Hutchings wrote:
> > On Mon, 2014-01-27 at 17:40 +0800, Jason Wang wrote:
> >> On 01/27/2014 04:35 PM, David Miller wrote:
> >>> From: Jason Wang <jasowang@redhat.com>
> >>> Date: Mon, 27 Jan 2014 15:30:54 +0800
> >>>
> >>>> Call netif_carrier_on() after register_device(). Otherwise it won't work since
> >>>> the device was still in NETREG_UNINITIALIZED state.
> >>>>
> >>>> Fixes a68f9614614749727286f675d15f1e09d13cb54a
> >>>> (hyperv: Fix race between probe and open calls)
> >>>>
> >>>> Cc: Haiyang Zhang <haiyangz@microsoft.com>
> >>>> Cc: K. Y. Srinivasan <kys@microsoft.com>
> >>>> Reported-by: Di Nie <dnie@redhat.com>
> >>>> Tested-by: Di Nie <dnie@redhat.com>
> >>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> >>> A device up can occur at the moment you call register_netdevice(),
> >>> therefore that up call can see the carrier as down and fail or
> >>> similar. So you really cannot resolve the carrier to be on in this
> >>> way.
> >> True, we need a workqueue to synchronize them.
> > Whatever for? All you need to do is:
> >
> > rtnl_lock();
> > register_netdevice();
> > netif_carrier_on();
> > rtnl_unlock();
> >
> > It would be nice if we could make the current code work with a change in
> > the core, though.
> >
> > Ben.
> >
>
> Looks like the link status interrupt may happen during this (after
> netvsc_device_add() was called by rndis_filter_device_add()) without any
> synchronization. This may lead a wrong link status here.
Now I'm confused - if there's a link status interrupt, why are you
setting the carrier on initially?
Ben.
--
Ben Hutchings
If at first you don't succeed, you're doing about average.
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 828 bytes --]
[-- Attachment #2: Type: text/plain, Size: 169 bytes --]
_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
^ permalink raw reply
* BUSINESS DEAL CONTACT ME DR.MA WEIHUA
From: Dr.ma weihua @ 2014-01-27 9:51 UTC (permalink / raw)
^ permalink raw reply
* Re: [PATCH net] net: hyperv: initialize link status correctly
From: Jason Wang @ 2014-01-27 10:37 UTC (permalink / raw)
To: Ben Hutchings; +Cc: netdev, haiyangz, linux-kernel, devel, David Miller
In-Reply-To: <1390818638.2735.136.camel@deadeye.wl.decadent.org.uk>
On 01/27/2014 06:30 PM, Ben Hutchings wrote:
> On Mon, 2014-01-27 at 18:28 +0800, Jason Wang wrote:
>> On 01/27/2014 06:22 PM, Ben Hutchings wrote:
>>> On Mon, 2014-01-27 at 17:40 +0800, Jason Wang wrote:
>>>> On 01/27/2014 04:35 PM, David Miller wrote:
>>>>> From: Jason Wang <jasowang@redhat.com>
>>>>> Date: Mon, 27 Jan 2014 15:30:54 +0800
>>>>>
>>>>>> Call netif_carrier_on() after register_device(). Otherwise it won't work since
>>>>>> the device was still in NETREG_UNINITIALIZED state.
>>>>>>
>>>>>> Fixes a68f9614614749727286f675d15f1e09d13cb54a
>>>>>> (hyperv: Fix race between probe and open calls)
>>>>>>
>>>>>> Cc: Haiyang Zhang <haiyangz@microsoft.com>
>>>>>> Cc: K. Y. Srinivasan <kys@microsoft.com>
>>>>>> Reported-by: Di Nie <dnie@redhat.com>
>>>>>> Tested-by: Di Nie <dnie@redhat.com>
>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>>> A device up can occur at the moment you call register_netdevice(),
>>>>> therefore that up call can see the carrier as down and fail or
>>>>> similar. So you really cannot resolve the carrier to be on in this
>>>>> way.
>>>> True, we need a workqueue to synchronize them.
>>> Whatever for? All you need to do is:
>>>
>>> rtnl_lock();
>>> register_netdevice();
>>> netif_carrier_on();
>>> rtnl_unlock();
>>>
>>> It would be nice if we could make the current code work with a change in
>>> the core, though.
>>>
>>> Ben.
>>>
>> Looks like the link status interrupt may happen during this (after
>> netvsc_device_add() was called by rndis_filter_device_add()) without any
>> synchronization. This may lead a wrong link status here.
> Now I'm confused - if there's a link status interrupt, why are you
> setting the carrier on initially?
>
> Ben.
>
I realize that setting carrier on initially was a bug after David's
comment. So I think we need a workqueue.
^ permalink raw reply
* How to identify 6to4 and 6in4 tunnels
From: zhuyj @ 2014-01-27 10:39 UTC (permalink / raw)
To: David S. Miller, netdev, kuznet, jmorris, yoshfuji, kaber,
linux-kernel, zhuyj
Hi, Maintainers
In our scene, we will create the 6in4/6to4 tunnel firstly and need to
check the tunnel type, secondly, we will configure the ip address on it.
So, Could we have any way to get the actual tunnel for 6in4 and 6to4
from current linux version?
Both 6in4 and 6to4 have the same protocol “IPPROTO_IPV6” in Linux
kernel. The only difference is the ip address on the tunnel. Can we
distinguish them in Linux kernel?
Best Regards
Zhu Yanjun
^ permalink raw reply
* Re: [PATCH 0/2] sctp: fix a problem with net_namespace
From: Neil Horman @ 2014-01-27 11:49 UTC (permalink / raw)
To: Wang Weidong; +Cc: davem, vyasevich, dborkman, netdev
In-Reply-To: <1390794543-1008-1-git-send-email-wangweidong1@huawei.com>
On Mon, Jan 27, 2014 at 11:49:01AM +0800, Wang Weidong wrote:
> fix a problem with net_namespace, and optimize
> the sctp_sysctl_net_register.
>
> Wang Weidong (2):
> sctp: fix a missed .data initialization
> sctp: optimize the sctp_sysctl_net_register
>
> net/sctp/sysctl.c | 17 ++++++++++-------
> 1 file changed, 10 insertions(+), 7 deletions(-)
>
> --
> 1.7.12
>
>
>
I don't see that either of these patches are needed. In sctp_init_net, the
sctp_hmac_alg pointer gets initalized before calling sctp_sysctl_net_register,
and sctp_proc_do_hmac_alg is written to specifically expect NULL values, so this
code may change behavior regarding default cookie selection.
This was coded so that poniters to entires in the string table could be used,
rather than needing to allocate or maintain character buffers. That said, it
does look like that for loop in sctp_sysctl_register_table might compute an odd
offset when cloning the table. I think the right fix for that is likely to just
move the sysctl value initalization in sctp_init_net to below the sysctl
register function.
Neil
^ permalink raw reply
* Re: [PATCH] sky2: initialize napi before registering device
From: Stanislaw Gruszka @ 2014-01-27 11:53 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: netdev, Mirko Lindner
In-Reply-To: <20140125170120.2e78cdb4@nehalam.linuxnetplumber.net>
On Sat, Jan 25, 2014 at 05:01:20PM -0800, Stephen Hemminger wrote:
> On Sat, 25 Jan 2014 11:34:54 +0100
> Stanislaw Gruszka <stf_xl@wp.pl> wrote:
>
> > There is race condition when call netif_napi_add() after
> > register_netdevice(), as ->open() can be called without napi initialized
> > and trigger BUG_ON() on napi_enable(), like on below messages:
> >
> > [ 9.699863] sky2: driver version 1.30
> > [ 9.699960] sky2 0000:02:00.0: Yukon-2 EC Ultra chip revision 2
> > [ 9.700020] sky2 0000:02:00.0: irq 45 for MSI/MSI-X
> > [ 9.700498] ------------[ cut here ]------------
> > [ 9.703391] kernel BUG at include/linux/netdevice.h:501!
> > [ 9.703391] invalid opcode: 0000 [#1] PREEMPT SMP
> > <snip>
> > [ 9.830018] Call Trace:
> > [ 9.830018] [<fa996169>] sky2_open+0x309/0x360 [sky2]
> > [ 9.830018] [<c1007210>] ? via_no_dac+0x40/0x40
> > [ 9.830018] [<c1007210>] ? via_no_dac+0x40/0x40
> > [ 9.830018] [<c135ed4b>] __dev_open+0x9b/0x120
> > [ 9.830018] [<c1431cbe>] ? _raw_spin_unlock_bh+0x1e/0x20
> > [ 9.830018] [<c135efd9>] __dev_change_flags+0x89/0x150
> > [ 9.830018] [<c135f148>] dev_change_flags+0x18/0x50
> > [ 9.830018] [<c13bb8e0>] devinet_ioctl+0x5d0/0x6e0
> > [ 9.830018] [<c13bcced>] inet_ioctl+0x6d/0xa0
> >
> > To fix the problem patch changes the order of initialization.
> >
> > Bug report:
> > https://bugzilla.kernel.org/show_bug.cgi?id=67151
> >
> > Reported-and-tested-by: ebrahim.azarisooreh@gmail.com
> > Signed-off-by: Stanislaw Gruszka <stf_xl@wp.pl>
>
> That looks good, problem was introduced years ago.
> I wonder if netif_napi_del() should be in unwind if registration
> of either devices fails?
napif_napi_del() is called internally by free_netdev(), so we are
fine regarding error unwind and driver remove.
Thanks
Stanislaw
^ permalink raw reply
* [PATCH 0/3] Resubmit: net: via-rhine: add support for on-chip Rhine controllers
From: Alexey Charkov @ 2014-01-27 11:51 UTC (permalink / raw)
To: netdev-u79uwXL29TY76Z2rM5mHXA, linux-ci5G2KO2hbZ+pU9mqzGVBQ,
devicetree-u79uwXL29TY76Z2rM5mHXA, rl-7uj+XXdSDtwfv37vnLkPlQ,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
Cc: Alexey Charkov
In-Reply-To: <1385227995-3956-1-git-send-email-alchark-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
This series introduces platform bus (OpenFirmware) binding for
via-rhine, as used in various ARM-based Systems-on-Chip by
VIA/WonderMedia.
This has been tested in OF configuration by myself on a WM8950-based VIA
APC Rock development board, and in PCI configuration by Roger.
Unfortunately, I can't find my original submission from 30 Nov in any of
the mailing list archives (must have done something stupid while
sending). However, Roger seems to have received the patches fine, tested
them on PCI and signed off, so I'm including his Signed-off-by: with
this submission along with my own.
Please note that this series does not include any ifdefs for either PCI
or OF case, so in Roger's set-up the third patch increased module size
from 39372 to 40868 bytes (+3.8%). I'm following the example of
via-velocity here, which didn't have ifdefs either (and I believe it is
cleaner this way).
Not sure if it's appropriate for 3.14 at this point, but getting it
merged to -next would be much appreciated.
Best regards,
Alexey
Alexey Charkov (3):
net: via-rhine: switch to generic DMA functions
net: via-rhine: reduce usage of the PCI-specific struct
net: via-rhine: add OF bus binding
.../devicetree/bindings/net/via-rhine.txt | 18 +
drivers/net/ethernet/via/Kconfig | 2 +-
drivers/net/ethernet/via/via-rhine.c | 403 ++++++++++++---------
3 files changed, 260 insertions(+), 163 deletions(-)
create mode 100644 Documentation/devicetree/bindings/net/via-rhine.txt
--
1.8.5.1
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* [PATCH 1/3] net: via-rhine: switch to generic DMA functions
From: Alexey Charkov @ 2014-01-27 11:51 UTC (permalink / raw)
To: netdev, linux, devicetree, rl, linux-kernel; +Cc: Alexey Charkov
In-Reply-To: <1390823503-24087-1-git-send-email-alchark@gmail.com>
Remove legacy PCI DMA wrappers and instead use generic DMA functions
directly in preparation for OF bus binding
Signed-off-by: Alexey Charkov <alchark@gmail.com>
Signed-off-by: Roger Luethi <rl@hellgate.ch>
---
drivers/net/ethernet/via/via-rhine.c | 56 +++++++++++++++++++-----------------
1 file changed, 29 insertions(+), 27 deletions(-)
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index ef312bc..fee8732 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -919,10 +919,10 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out;
/* this should always be supported */
- rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (rc) {
dev_err(&pdev->dev,
- "32-bit PCI DMA addresses not supported by the card!?\n");
+ "32-bit DMA addresses not supported by the card!?\n");
goto err_out;
}
@@ -1094,20 +1094,22 @@ static int alloc_ring(struct net_device* dev)
void *ring;
dma_addr_t ring_dma;
- ring = pci_alloc_consistent(rp->pdev,
+ ring = dma_alloc_coherent(&rp->pdev->dev,
RX_RING_SIZE * sizeof(struct rx_desc) +
TX_RING_SIZE * sizeof(struct tx_desc),
- &ring_dma);
+ &ring_dma,
+ GFP_ATOMIC);
if (!ring) {
netdev_err(dev, "Could not allocate DMA memory\n");
return -ENOMEM;
}
if (rp->quirks & rqRhineI) {
- rp->tx_bufs = pci_alloc_consistent(rp->pdev,
+ rp->tx_bufs = dma_alloc_coherent(&rp->pdev->dev,
PKT_BUF_SZ * TX_RING_SIZE,
- &rp->tx_bufs_dma);
+ &rp->tx_bufs_dma,
+ GFP_ATOMIC);
if (rp->tx_bufs == NULL) {
- pci_free_consistent(rp->pdev,
+ dma_free_coherent(&rp->pdev->dev,
RX_RING_SIZE * sizeof(struct rx_desc) +
TX_RING_SIZE * sizeof(struct tx_desc),
ring, ring_dma);
@@ -1127,14 +1129,14 @@ static void free_ring(struct net_device* dev)
{
struct rhine_private *rp = netdev_priv(dev);
- pci_free_consistent(rp->pdev,
+ dma_free_coherent(&rp->pdev->dev,
RX_RING_SIZE * sizeof(struct rx_desc) +
TX_RING_SIZE * sizeof(struct tx_desc),
rp->rx_ring, rp->rx_ring_dma);
rp->tx_ring = NULL;
if (rp->tx_bufs)
- pci_free_consistent(rp->pdev, PKT_BUF_SZ * TX_RING_SIZE,
+ dma_free_coherent(&rp->pdev->dev, PKT_BUF_SZ * TX_RING_SIZE,
rp->tx_bufs, rp->tx_bufs_dma);
rp->tx_bufs = NULL;
@@ -1172,8 +1174,8 @@ static void alloc_rbufs(struct net_device *dev)
break;
rp->rx_skbuff_dma[i] =
- pci_map_single(rp->pdev, skb->data, rp->rx_buf_sz,
- PCI_DMA_FROMDEVICE);
+ dma_map_single(&rp->pdev->dev, skb->data, rp->rx_buf_sz,
+ DMA_FROM_DEVICE);
if (dma_mapping_error(&rp->pdev->dev, rp->rx_skbuff_dma[i])) {
rp->rx_skbuff_dma[i] = 0;
dev_kfree_skb(skb);
@@ -1195,9 +1197,9 @@ static void free_rbufs(struct net_device* dev)
rp->rx_ring[i].rx_status = 0;
rp->rx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */
if (rp->rx_skbuff[i]) {
- pci_unmap_single(rp->pdev,
+ dma_unmap_single(&rp->pdev->dev,
rp->rx_skbuff_dma[i],
- rp->rx_buf_sz, PCI_DMA_FROMDEVICE);
+ rp->rx_buf_sz, DMA_FROM_DEVICE);
dev_kfree_skb(rp->rx_skbuff[i]);
}
rp->rx_skbuff[i] = NULL;
@@ -1236,10 +1238,10 @@ static void free_tbufs(struct net_device* dev)
rp->tx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */
if (rp->tx_skbuff[i]) {
if (rp->tx_skbuff_dma[i]) {
- pci_unmap_single(rp->pdev,
+ dma_unmap_single(&rp->pdev->dev,
rp->tx_skbuff_dma[i],
rp->tx_skbuff[i]->len,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
}
dev_kfree_skb(rp->tx_skbuff[i]);
}
@@ -1693,8 +1695,8 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
rp->tx_bufs));
} else {
rp->tx_skbuff_dma[entry] =
- pci_map_single(rp->pdev, skb->data, skb->len,
- PCI_DMA_TODEVICE);
+ dma_map_single(&rp->pdev->dev, skb->data, skb->len,
+ DMA_TO_DEVICE);
if (dma_mapping_error(&rp->pdev->dev, rp->tx_skbuff_dma[entry])) {
dev_kfree_skb(skb);
rp->tx_skbuff_dma[entry] = 0;
@@ -1829,10 +1831,10 @@ static void rhine_tx(struct net_device *dev)
}
/* Free the original skb. */
if (rp->tx_skbuff_dma[entry]) {
- pci_unmap_single(rp->pdev,
+ dma_unmap_single(&rp->pdev->dev,
rp->tx_skbuff_dma[entry],
rp->tx_skbuff[entry]->len,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
}
dev_kfree_skb(rp->tx_skbuff[entry]);
rp->tx_skbuff[entry] = NULL;
@@ -1922,19 +1924,19 @@ static int rhine_rx(struct net_device *dev, int limit)
if (pkt_len < rx_copybreak)
skb = netdev_alloc_skb_ip_align(dev, pkt_len);
if (skb) {
- pci_dma_sync_single_for_cpu(rp->pdev,
+ dma_sync_single_for_cpu(&rp->pdev->dev,
rp->rx_skbuff_dma[entry],
rp->rx_buf_sz,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
skb_copy_to_linear_data(skb,
rp->rx_skbuff[entry]->data,
pkt_len);
skb_put(skb, pkt_len);
- pci_dma_sync_single_for_device(rp->pdev,
+ dma_sync_single_for_device(&rp->pdev->dev,
rp->rx_skbuff_dma[entry],
rp->rx_buf_sz,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
} else {
skb = rp->rx_skbuff[entry];
if (skb == NULL) {
@@ -1943,10 +1945,10 @@ static int rhine_rx(struct net_device *dev, int limit)
}
rp->rx_skbuff[entry] = NULL;
skb_put(skb, pkt_len);
- pci_unmap_single(rp->pdev,
+ dma_unmap_single(&rp->pdev->dev,
rp->rx_skbuff_dma[entry],
rp->rx_buf_sz,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
}
if (unlikely(desc_length & DescTag))
@@ -1977,9 +1979,9 @@ static int rhine_rx(struct net_device *dev, int limit)
if (skb == NULL)
break; /* Better luck next round. */
rp->rx_skbuff_dma[entry] =
- pci_map_single(rp->pdev, skb->data,
+ dma_map_single(&rp->pdev->dev, skb->data,
rp->rx_buf_sz,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
if (dma_mapping_error(&rp->pdev->dev, rp->rx_skbuff_dma[entry])) {
dev_kfree_skb(skb);
rp->rx_skbuff_dma[entry] = 0;
--
1.8.5.1
^ permalink raw reply related
* [PATCH 2/3] net: via-rhine: reduce usage of the PCI-specific struct
From: Alexey Charkov @ 2014-01-27 11:51 UTC (permalink / raw)
To: netdev, linux, devicetree, rl, linux-kernel; +Cc: Alexey Charkov
In-Reply-To: <1390823503-24087-1-git-send-email-alchark@gmail.com>
Use more generic data structures instead of struct pci_dev wherever
possible in preparation for OF bus binding
Signed-off-by: Alexey Charkov <alchark@gmail.com>
Signed-off-by: Roger Luethi <rl@hellgate.ch>
---
drivers/net/ethernet/via/via-rhine.c | 116 +++++++++++++++++++----------------
1 file changed, 62 insertions(+), 54 deletions(-)
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index fee8732..95c2e93 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -446,7 +446,8 @@ struct rhine_private {
unsigned char *tx_bufs;
dma_addr_t tx_bufs_dma;
- struct pci_dev *pdev;
+ int revision;
+ int irq;
long pioaddr;
struct net_device *dev;
struct napi_struct napi;
@@ -701,7 +702,7 @@ static void rhine_reload_eeprom(long pioaddr, struct net_device *dev)
static void rhine_poll(struct net_device *dev)
{
struct rhine_private *rp = netdev_priv(dev);
- const int irq = rp->pdev->irq;
+ const int irq = rp->irq;
disable_irq(irq);
rhine_interrupt(irq, dev);
@@ -871,6 +872,8 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct net_device *dev;
struct rhine_private *rp;
+ struct device *hwdev = &pdev->dev;
+ int revision = pdev->revision;
int i, rc;
u32 quirks;
long pioaddr;
@@ -893,21 +896,19 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
phy_id = 0;
quirks = 0;
name = "Rhine";
- if (pdev->revision < VTunknown0) {
+ if (revision < VTunknown0) {
quirks = rqRhineI;
io_size = 128;
- }
- else if (pdev->revision >= VT6102) {
+ } else if (revision >= VT6102) {
quirks = rqWOL | rqForceReset;
- if (pdev->revision < VT6105) {
+ if (revision < VT6105) {
name = "Rhine II";
quirks |= rqStatusWBRace; /* Rhine-II exclusive */
- }
- else {
+ } else {
phy_id = 1; /* Integrated PHY, phy_id fixed to 1 */
- if (pdev->revision >= VT6105_B0)
+ if (revision >= VT6105_B0)
quirks |= rq6patterns;
- if (pdev->revision < VT6105M)
+ if (revision < VT6105M)
name = "Rhine III";
else
name = "Rhine III (Management Adapter)";
@@ -919,10 +920,9 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out;
/* this should always be supported */
- rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+ rc = dma_set_mask(hwdev, DMA_BIT_MASK(32));
if (rc) {
- dev_err(&pdev->dev,
- "32-bit DMA addresses not supported by the card!?\n");
+ dev_err(hwdev, "32-bit DMA addresses not supported by the card!?\n");
goto err_out;
}
@@ -930,7 +930,7 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if ((pci_resource_len(pdev, 0) < io_size) ||
(pci_resource_len(pdev, 1) < io_size)) {
rc = -EIO;
- dev_err(&pdev->dev, "Insufficient PCI resources, aborting\n");
+ dev_err(hwdev, "Insufficient PCI resources, aborting\n");
goto err_out;
}
@@ -944,13 +944,13 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
rc = -ENOMEM;
goto err_out;
}
- SET_NETDEV_DEV(dev, &pdev->dev);
+ SET_NETDEV_DEV(dev, hwdev);
rp = netdev_priv(dev);
rp->dev = dev;
+ rp->revision = revision;
rp->quirks = quirks;
rp->pioaddr = pioaddr;
- rp->pdev = pdev;
rp->msg_enable = netif_msg_init(debug, RHINE_MSG_DEFAULT);
rc = pci_request_regions(pdev, DRV_NAME);
@@ -960,9 +960,9 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
ioaddr = pci_iomap(pdev, bar, io_size);
if (!ioaddr) {
rc = -EIO;
- dev_err(&pdev->dev,
+ dev_err(hwdev,
"ioremap failed for device %s, region 0x%X @ 0x%lX\n",
- pci_name(pdev), io_size, memaddr);
+ dev_name(hwdev), io_size, memaddr);
goto err_out_free_res;
}
@@ -977,7 +977,7 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
unsigned char b = readb(ioaddr+reg);
if (a != b) {
rc = -EIO;
- dev_err(&pdev->dev,
+ dev_err(hwdev,
"MMIO do not match PIO [%02x] (%02x != %02x)\n",
reg, a, b);
goto err_out_unmap;
@@ -986,6 +986,7 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
#endif /* USE_MMIO */
rp->base = ioaddr;
+ rp->irq = pdev->irq;
u64_stats_init(&rp->tx_stats.syncp);
u64_stats_init(&rp->rx_stats.syncp);
@@ -1030,7 +1031,7 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rp->quirks & rqRhineI)
dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM;
- if (pdev->revision >= VT6105M)
+ if (rp->revision >= VT6105M)
dev->features |= NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER;
@@ -1047,9 +1048,9 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
#else
(long)ioaddr,
#endif
- dev->dev_addr, pdev->irq);
+ dev->dev_addr, rp->irq);
- pci_set_drvdata(pdev, dev);
+ dev_set_drvdata(hwdev, dev);
{
u16 mii_cmd;
@@ -1091,10 +1092,11 @@ err_out:
static int alloc_ring(struct net_device* dev)
{
struct rhine_private *rp = netdev_priv(dev);
+ struct device *hwdev = dev->dev.parent;
void *ring;
dma_addr_t ring_dma;
- ring = dma_alloc_coherent(&rp->pdev->dev,
+ ring = dma_alloc_coherent(hwdev,
RX_RING_SIZE * sizeof(struct rx_desc) +
TX_RING_SIZE * sizeof(struct tx_desc),
&ring_dma,
@@ -1104,12 +1106,12 @@ static int alloc_ring(struct net_device* dev)
return -ENOMEM;
}
if (rp->quirks & rqRhineI) {
- rp->tx_bufs = dma_alloc_coherent(&rp->pdev->dev,
+ rp->tx_bufs = dma_alloc_coherent(hwdev,
PKT_BUF_SZ * TX_RING_SIZE,
&rp->tx_bufs_dma,
GFP_ATOMIC);
if (rp->tx_bufs == NULL) {
- dma_free_coherent(&rp->pdev->dev,
+ dma_free_coherent(hwdev,
RX_RING_SIZE * sizeof(struct rx_desc) +
TX_RING_SIZE * sizeof(struct tx_desc),
ring, ring_dma);
@@ -1128,15 +1130,16 @@ static int alloc_ring(struct net_device* dev)
static void free_ring(struct net_device* dev)
{
struct rhine_private *rp = netdev_priv(dev);
+ struct device *hwdev = dev->dev.parent;
- dma_free_coherent(&rp->pdev->dev,
+ dma_free_coherent(hwdev,
RX_RING_SIZE * sizeof(struct rx_desc) +
TX_RING_SIZE * sizeof(struct tx_desc),
rp->rx_ring, rp->rx_ring_dma);
rp->tx_ring = NULL;
if (rp->tx_bufs)
- dma_free_coherent(&rp->pdev->dev, PKT_BUF_SZ * TX_RING_SIZE,
+ dma_free_coherent(hwdev, PKT_BUF_SZ * TX_RING_SIZE,
rp->tx_bufs, rp->tx_bufs_dma);
rp->tx_bufs = NULL;
@@ -1146,6 +1149,7 @@ static void free_ring(struct net_device* dev)
static void alloc_rbufs(struct net_device *dev)
{
struct rhine_private *rp = netdev_priv(dev);
+ struct device *hwdev = dev->dev.parent;
dma_addr_t next;
int i;
@@ -1174,9 +1178,9 @@ static void alloc_rbufs(struct net_device *dev)
break;
rp->rx_skbuff_dma[i] =
- dma_map_single(&rp->pdev->dev, skb->data, rp->rx_buf_sz,
+ dma_map_single(hwdev, skb->data, rp->rx_buf_sz,
DMA_FROM_DEVICE);
- if (dma_mapping_error(&rp->pdev->dev, rp->rx_skbuff_dma[i])) {
+ if (dma_mapping_error(hwdev, rp->rx_skbuff_dma[i])) {
rp->rx_skbuff_dma[i] = 0;
dev_kfree_skb(skb);
break;
@@ -1190,6 +1194,7 @@ static void alloc_rbufs(struct net_device *dev)
static void free_rbufs(struct net_device* dev)
{
struct rhine_private *rp = netdev_priv(dev);
+ struct device *hwdev = dev->dev.parent;
int i;
/* Free all the skbuffs in the Rx queue. */
@@ -1197,7 +1202,7 @@ static void free_rbufs(struct net_device* dev)
rp->rx_ring[i].rx_status = 0;
rp->rx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */
if (rp->rx_skbuff[i]) {
- dma_unmap_single(&rp->pdev->dev,
+ dma_unmap_single(hwdev,
rp->rx_skbuff_dma[i],
rp->rx_buf_sz, DMA_FROM_DEVICE);
dev_kfree_skb(rp->rx_skbuff[i]);
@@ -1230,6 +1235,7 @@ static void alloc_tbufs(struct net_device* dev)
static void free_tbufs(struct net_device* dev)
{
struct rhine_private *rp = netdev_priv(dev);
+ struct device *hwdev = dev->dev.parent;
int i;
for (i = 0; i < TX_RING_SIZE; i++) {
@@ -1238,7 +1244,7 @@ static void free_tbufs(struct net_device* dev)
rp->tx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */
if (rp->tx_skbuff[i]) {
if (rp->tx_skbuff_dma[i]) {
- dma_unmap_single(&rp->pdev->dev,
+ dma_unmap_single(hwdev,
rp->tx_skbuff_dma[i],
rp->tx_skbuff[i]->len,
DMA_TO_DEVICE);
@@ -1469,7 +1475,7 @@ static void init_registers(struct net_device *dev)
rhine_set_rx_mode(dev);
- if (rp->pdev->revision >= VT6105M)
+ if (rp->revision >= VT6105M)
rhine_init_cam_filter(dev);
napi_enable(&rp->napi);
@@ -1581,16 +1587,15 @@ static int rhine_open(struct net_device *dev)
void __iomem *ioaddr = rp->base;
int rc;
- rc = request_irq(rp->pdev->irq, rhine_interrupt, IRQF_SHARED, dev->name,
- dev);
+ rc = request_irq(rp->irq, rhine_interrupt, IRQF_SHARED, dev->name, dev);
if (rc)
return rc;
- netif_dbg(rp, ifup, dev, "%s() irq %d\n", __func__, rp->pdev->irq);
+ netif_dbg(rp, ifup, dev, "%s() irq %d\n", __func__, rp->irq);
rc = alloc_ring(dev);
if (rc) {
- free_irq(rp->pdev->irq, dev);
+ free_irq(rp->irq, dev);
return rc;
}
alloc_rbufs(dev);
@@ -1659,6 +1664,7 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
struct net_device *dev)
{
struct rhine_private *rp = netdev_priv(dev);
+ struct device *hwdev = dev->dev.parent;
void __iomem *ioaddr = rp->base;
unsigned entry;
@@ -1695,9 +1701,9 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
rp->tx_bufs));
} else {
rp->tx_skbuff_dma[entry] =
- dma_map_single(&rp->pdev->dev, skb->data, skb->len,
+ dma_map_single(hwdev, skb->data, skb->len,
DMA_TO_DEVICE);
- if (dma_mapping_error(&rp->pdev->dev, rp->tx_skbuff_dma[entry])) {
+ if (dma_mapping_error(hwdev, rp->tx_skbuff_dma[entry])) {
dev_kfree_skb(skb);
rp->tx_skbuff_dma[entry] = 0;
dev->stats.tx_dropped++;
@@ -1788,6 +1794,7 @@ static irqreturn_t rhine_interrupt(int irq, void *dev_instance)
static void rhine_tx(struct net_device *dev)
{
struct rhine_private *rp = netdev_priv(dev);
+ struct device *hwdev = dev->dev.parent;
int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE;
/* find and cleanup dirty tx descriptors */
@@ -1831,7 +1838,7 @@ static void rhine_tx(struct net_device *dev)
}
/* Free the original skb. */
if (rp->tx_skbuff_dma[entry]) {
- dma_unmap_single(&rp->pdev->dev,
+ dma_unmap_single(hwdev,
rp->tx_skbuff_dma[entry],
rp->tx_skbuff[entry]->len,
DMA_TO_DEVICE);
@@ -1863,6 +1870,7 @@ static inline u16 rhine_get_vlan_tci(struct sk_buff *skb, int data_size)
static int rhine_rx(struct net_device *dev, int limit)
{
struct rhine_private *rp = netdev_priv(dev);
+ struct device *hwdev = dev->dev.parent;
int count;
int entry = rp->cur_rx % RX_RING_SIZE;
@@ -1924,7 +1932,7 @@ static int rhine_rx(struct net_device *dev, int limit)
if (pkt_len < rx_copybreak)
skb = netdev_alloc_skb_ip_align(dev, pkt_len);
if (skb) {
- dma_sync_single_for_cpu(&rp->pdev->dev,
+ dma_sync_single_for_cpu(hwdev,
rp->rx_skbuff_dma[entry],
rp->rx_buf_sz,
DMA_FROM_DEVICE);
@@ -1933,7 +1941,7 @@ static int rhine_rx(struct net_device *dev, int limit)
rp->rx_skbuff[entry]->data,
pkt_len);
skb_put(skb, pkt_len);
- dma_sync_single_for_device(&rp->pdev->dev,
+ dma_sync_single_for_device(hwdev,
rp->rx_skbuff_dma[entry],
rp->rx_buf_sz,
DMA_FROM_DEVICE);
@@ -1945,7 +1953,7 @@ static int rhine_rx(struct net_device *dev, int limit)
}
rp->rx_skbuff[entry] = NULL;
skb_put(skb, pkt_len);
- dma_unmap_single(&rp->pdev->dev,
+ dma_unmap_single(hwdev,
rp->rx_skbuff_dma[entry],
rp->rx_buf_sz,
DMA_FROM_DEVICE);
@@ -1979,10 +1987,11 @@ static int rhine_rx(struct net_device *dev, int limit)
if (skb == NULL)
break; /* Better luck next round. */
rp->rx_skbuff_dma[entry] =
- dma_map_single(&rp->pdev->dev, skb->data,
+ dma_map_single(hwdev, skb->data,
rp->rx_buf_sz,
DMA_FROM_DEVICE);
- if (dma_mapping_error(&rp->pdev->dev, rp->rx_skbuff_dma[entry])) {
+ if (dma_mapping_error(hwdev,
+ rp->rx_skbuff_dma[entry])) {
dev_kfree_skb(skb);
rp->rx_skbuff_dma[entry] = 0;
break;
@@ -2103,7 +2112,7 @@ static void rhine_set_rx_mode(struct net_device *dev)
/* Too many to match, or accept all multicasts. */
iowrite32(0xffffffff, ioaddr + MulticastFilter0);
iowrite32(0xffffffff, ioaddr + MulticastFilter1);
- } else if (rp->pdev->revision >= VT6105M) {
+ } else if (rp->revision >= VT6105M) {
int i = 0;
u32 mCAMmask = 0; /* 32 mCAMs (6105M and better) */
netdev_for_each_mc_addr(ha, dev) {
@@ -2125,7 +2134,7 @@ static void rhine_set_rx_mode(struct net_device *dev)
iowrite32(mc_filter[1], ioaddr + MulticastFilter1);
}
/* enable/disable VLAN receive filtering */
- if (rp->pdev->revision >= VT6105M) {
+ if (rp->revision >= VT6105M) {
if (dev->flags & IFF_PROMISC)
BYTE_REG_BITS_OFF(BCR1_VIDFR, ioaddr + PCIBusConfig1);
else
@@ -2136,11 +2145,11 @@ static void rhine_set_rx_mode(struct net_device *dev)
static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
{
- struct rhine_private *rp = netdev_priv(dev);
+ struct device *hwdev = dev->dev.parent;
strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
strlcpy(info->version, DRV_VERSION, sizeof(info->version));
- strlcpy(info->bus_info, pci_name(rp->pdev), sizeof(info->bus_info));
+ strlcpy(info->bus_info, dev_name(hwdev), sizeof(info->bus_info));
}
static int netdev_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
@@ -2277,7 +2286,7 @@ static int rhine_close(struct net_device *dev)
/* Stop the chip's Tx and Rx processes. */
iowrite16(CmdStop, ioaddr + ChipCmd);
- free_irq(rp->pdev->irq, dev);
+ free_irq(rp->irq, dev);
free_rbufs(dev);
free_tbufs(dev);
free_ring(dev);
@@ -2354,8 +2363,7 @@ static void rhine_shutdown (struct pci_dev *pdev)
#ifdef CONFIG_PM_SLEEP
static int rhine_suspend(struct device *device)
{
- struct pci_dev *pdev = to_pci_dev(device);
- struct net_device *dev = pci_get_drvdata(pdev);
+ struct net_device *dev = dev_get_drvdata(device);
struct rhine_private *rp = netdev_priv(dev);
if (!netif_running(dev))
@@ -2367,15 +2375,15 @@ static int rhine_suspend(struct device *device)
netif_device_detach(dev);
- rhine_shutdown(pdev);
+ if (!strncmp(device->bus->name, "pci", 3))
+ rhine_shutdown(to_pci_dev(device));
return 0;
}
static int rhine_resume(struct device *device)
{
- struct pci_dev *pdev = to_pci_dev(device);
- struct net_device *dev = pci_get_drvdata(pdev);
+ struct net_device *dev = dev_get_drvdata(device);
struct rhine_private *rp = netdev_priv(dev);
if (!netif_running(dev))
--
1.8.5.1
^ permalink raw reply related
* [PATCH 3/3] net: via-rhine: add OF bus binding
From: Alexey Charkov @ 2014-01-27 11:51 UTC (permalink / raw)
To: netdev, linux, devicetree, rl, linux-kernel; +Cc: Alexey Charkov
In-Reply-To: <1390823503-24087-1-git-send-email-alchark@gmail.com>
This should make the driver usable with VIA/WonderMedia ARM-based
Systems-on-Chip integrated Rhine III adapters. Note that these
are always in MMIO mode, and don't have any known EEPROM.
Signed-off-by: Alexey Charkov <alchark@gmail.com>
Signed-off-by: Roger Luethi <rl@hellgate.ch>
---
.../devicetree/bindings/net/via-rhine.txt | 18 ++
drivers/net/ethernet/via/Kconfig | 2 +-
drivers/net/ethernet/via/via-rhine.c | 293 +++++++++++++--------
3 files changed, 200 insertions(+), 113 deletions(-)
create mode 100644 Documentation/devicetree/bindings/net/via-rhine.txt
diff --git a/Documentation/devicetree/bindings/net/via-rhine.txt b/Documentation/devicetree/bindings/net/via-rhine.txt
new file mode 100644
index 0000000..684dd3a
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/via-rhine.txt
@@ -0,0 +1,18 @@
+* VIA Rhine 10/100 Network Controller
+
+Required properties:
+- compatible : Should be "via,rhine"
+- reg : Address and length of the io space
+- interrupts : Should contain the controller interrupt line
+- rhine,revision : Rhine core revision, used to inform the
+ driver of quirks and capabilities to expect from
+ the device. Mimics the respective PCI attribute.
+
+Examples:
+
+ethernet@d8004000 {
+ compatible = "via,rhine";
+ reg = <0xd8004000 0x100>;
+ interrupts = <10>;
+ rhine,revision = <0x84>;
+};
diff --git a/drivers/net/ethernet/via/Kconfig b/drivers/net/ethernet/via/Kconfig
index 8a049a2..f66ddae 100644
--- a/drivers/net/ethernet/via/Kconfig
+++ b/drivers/net/ethernet/via/Kconfig
@@ -19,7 +19,7 @@ if NET_VENDOR_VIA
config VIA_RHINE
tristate "VIA Rhine support"
- depends on PCI
+ depends on (PCI || USE_OF)
select CRC32
select MII
---help---
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 95c2e93..1c609c0 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -94,6 +94,10 @@ static const int multicast_filter_limit = 32;
#include <linux/ioport.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
#include <linux/dma-mapping.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
@@ -279,6 +283,11 @@ static DEFINE_PCI_DEVICE_TABLE(rhine_pci_tbl) = {
};
MODULE_DEVICE_TABLE(pci, rhine_pci_tbl);
+static struct of_device_id rhine_of_tbl[] = {
+ { .compatible = "via,rhine" },
+ { } /* terminate list */
+};
+MODULE_DEVICE_TABLE(of, rhine_of_tbl);
/* Offsets to the device registers. */
enum register_offsets {
@@ -847,7 +856,8 @@ static void rhine_hw_init(struct net_device *dev, long pioaddr)
msleep(5);
/* Reload EEPROM controlled bytes cleared by soft reset */
- rhine_reload_eeprom(pioaddr, dev);
+ if (!strncmp(dev->dev.parent->bus->name, "pci", 3))
+ rhine_reload_eeprom(pioaddr, dev);
}
static const struct net_device_ops rhine_netdev_ops = {
@@ -868,56 +878,13 @@ static const struct net_device_ops rhine_netdev_ops = {
#endif
};
-static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+static int rhine_init_one_common(struct device *hwdev, int revision,
+ long pioaddr, void __iomem *ioaddr, int irq)
{
struct net_device *dev;
struct rhine_private *rp;
- struct device *hwdev = &pdev->dev;
- int revision = pdev->revision;
- int i, rc;
- u32 quirks;
- long pioaddr;
- long memaddr;
- void __iomem *ioaddr;
- int io_size, phy_id;
+ int i, rc, phy_id;
const char *name;
-#ifdef USE_MMIO
- int bar = 1;
-#else
- int bar = 0;
-#endif
-
-/* when built into the kernel, we only print version if device is found */
-#ifndef MODULE
- pr_info_once("%s\n", version);
-#endif
-
- io_size = 256;
- phy_id = 0;
- quirks = 0;
- name = "Rhine";
- if (revision < VTunknown0) {
- quirks = rqRhineI;
- io_size = 128;
- } else if (revision >= VT6102) {
- quirks = rqWOL | rqForceReset;
- if (revision < VT6105) {
- name = "Rhine II";
- quirks |= rqStatusWBRace; /* Rhine-II exclusive */
- } else {
- phy_id = 1; /* Integrated PHY, phy_id fixed to 1 */
- if (revision >= VT6105_B0)
- quirks |= rq6patterns;
- if (revision < VT6105M)
- name = "Rhine III";
- else
- name = "Rhine III (Management Adapter)";
- }
- }
-
- rc = pci_enable_device(pdev);
- if (rc)
- goto err_out;
/* this should always be supported */
rc = dma_set_mask(hwdev, DMA_BIT_MASK(32));
@@ -926,19 +893,6 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out;
}
- /* sanity check */
- if ((pci_resource_len(pdev, 0) < io_size) ||
- (pci_resource_len(pdev, 1) < io_size)) {
- rc = -EIO;
- dev_err(hwdev, "Insufficient PCI resources, aborting\n");
- goto err_out;
- }
-
- pioaddr = pci_resource_start(pdev, 0);
- memaddr = pci_resource_start(pdev, 1);
-
- pci_set_master(pdev);
-
dev = alloc_etherdev(sizeof(struct rhine_private));
if (!dev) {
rc = -ENOMEM;
@@ -949,44 +903,30 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
rp = netdev_priv(dev);
rp->dev = dev;
rp->revision = revision;
- rp->quirks = quirks;
rp->pioaddr = pioaddr;
+ rp->base = ioaddr;
+ rp->irq = irq;
rp->msg_enable = netif_msg_init(debug, RHINE_MSG_DEFAULT);
- rc = pci_request_regions(pdev, DRV_NAME);
- if (rc)
- goto err_out_free_netdev;
-
- ioaddr = pci_iomap(pdev, bar, io_size);
- if (!ioaddr) {
- rc = -EIO;
- dev_err(hwdev,
- "ioremap failed for device %s, region 0x%X @ 0x%lX\n",
- dev_name(hwdev), io_size, memaddr);
- goto err_out_free_res;
- }
-
-#ifdef USE_MMIO
- enable_mmio(pioaddr, quirks);
-
- /* Check that selected MMIO registers match the PIO ones */
- i = 0;
- while (mmio_verify_registers[i]) {
- int reg = mmio_verify_registers[i++];
- unsigned char a = inb(pioaddr+reg);
- unsigned char b = readb(ioaddr+reg);
- if (a != b) {
- rc = -EIO;
- dev_err(hwdev,
- "MMIO do not match PIO [%02x] (%02x != %02x)\n",
- reg, a, b);
- goto err_out_unmap;
+ phy_id = 0;
+ name = "Rhine";
+ if (revision < VTunknown0) {
+ rp->quirks = rqRhineI;
+ } else if (revision >= VT6102) {
+ rp->quirks = rqWOL | rqForceReset;
+ if (revision < VT6105) {
+ name = "Rhine II";
+ rp->quirks |= rqStatusWBRace; /* Rhine-II exclusive */
+ } else {
+ phy_id = 1; /* Integrated PHY, phy_id fixed to 1 */
+ if (revision >= VT6105_B0)
+ rp->quirks |= rq6patterns;
+ if (revision < VT6105M)
+ name = "Rhine III";
+ else
+ name = "Rhine III (Management Adapter)";
}
}
-#endif /* USE_MMIO */
-
- rp->base = ioaddr;
- rp->irq = pdev->irq;
u64_stats_init(&rp->tx_stats.syncp);
u64_stats_init(&rp->rx_stats.syncp);
@@ -1039,16 +979,10 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* dev->name not defined before register_netdev()! */
rc = register_netdev(dev);
if (rc)
- goto err_out_unmap;
+ goto err_out_free_netdev;
netdev_info(dev, "VIA %s at 0x%lx, %pM, IRQ %d\n",
- name,
-#ifdef USE_MMIO
- memaddr,
-#else
- (long)ioaddr,
-#endif
- dev->dev_addr, rp->irq);
+ name, (long)ioaddr, dev->dev_addr, rp->irq);
dev_set_drvdata(hwdev, dev);
@@ -1079,16 +1013,118 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
return 0;
+err_out_free_netdev:
+ free_netdev(dev);
+err_out:
+ return rc;
+}
+
+static int rhine_init_one_pci(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct device *hwdev = &pdev->dev;
+ int i, rc;
+ long pioaddr, memaddr;
+ void __iomem *ioaddr;
+ int io_size = pdev->revision < VTunknown0 ? 128 : 256;
+ u32 quirks = pdev->revision < VTunknown0 ? rqRhineI : 0;
+#ifdef USE_MMIO
+ int bar = 1;
+#else
+ int bar = 0;
+#endif
+
+/* when built into the kernel, we only print version if device is found */
+#ifndef MODULE
+ pr_info_once("%s\n", version);
+#endif
+
+ rc = pci_enable_device(pdev);
+ if (rc)
+ goto err_out;
+
+ /* sanity check */
+ if ((pci_resource_len(pdev, 0) < io_size) ||
+ (pci_resource_len(pdev, 1) < io_size)) {
+ rc = -EIO;
+ dev_err(hwdev, "Insufficient PCI resources, aborting\n");
+ goto err_out;
+ }
+
+ pioaddr = pci_resource_start(pdev, 0);
+ memaddr = pci_resource_start(pdev, 1);
+
+ pci_set_master(pdev);
+
+ rc = pci_request_regions(pdev, DRV_NAME);
+ if (rc)
+ goto err_out;
+
+ ioaddr = pci_iomap(pdev, bar, io_size);
+ if (!ioaddr) {
+ rc = -EIO;
+ dev_err(hwdev,
+ "ioremap failed for device %s, region 0x%X @ 0x%lX\n",
+ dev_name(hwdev), io_size, memaddr);
+ goto err_out_free_res;
+ }
+
+#ifdef USE_MMIO
+ enable_mmio(pioaddr, quirks);
+
+ /* Check that selected MMIO registers match the PIO ones */
+ i = 0;
+ while (mmio_verify_registers[i]) {
+ int reg = mmio_verify_registers[i++];
+ unsigned char a = inb(pioaddr+reg);
+ unsigned char b = readb(ioaddr+reg);
+ if (a != b) {
+ rc = -EIO;
+ dev_err(hwdev,
+ "MMIO do not match PIO [%02x] (%02x != %02x)\n",
+ reg, a, b);
+ goto err_out_unmap;
+ }
+ }
+#endif /* USE_MMIO */
+
+ rc = rhine_init_one_common(&pdev->dev, pdev->revision,
+ pioaddr, ioaddr, pdev->irq);
+ if (!rc)
+ return 0;
+
err_out_unmap:
pci_iounmap(pdev, ioaddr);
err_out_free_res:
pci_release_regions(pdev);
-err_out_free_netdev:
- free_netdev(dev);
err_out:
return rc;
}
+static int rhine_init_one_platform(struct platform_device *pdev)
+{
+ const u32 *revision;
+ int irq;
+ struct resource *res;
+ void __iomem *ioaddr;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ ioaddr = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(ioaddr))
+ return PTR_ERR(ioaddr);
+
+ irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+ if (!irq)
+ return -EINVAL;
+
+ revision = of_get_property(pdev->dev.of_node, "rhine,revision", NULL);
+ if (!revision)
+ return -EINVAL;
+
+ return rhine_init_one_common(&pdev->dev, *revision,
+ (long)ioaddr, ioaddr, irq);
+}
+
static int alloc_ring(struct net_device* dev)
{
struct rhine_private *rp = netdev_priv(dev);
@@ -2295,7 +2331,7 @@ static int rhine_close(struct net_device *dev)
}
-static void rhine_remove_one(struct pci_dev *pdev)
+static void rhine_remove_one_pci(struct pci_dev *pdev)
{
struct net_device *dev = pci_get_drvdata(pdev);
struct rhine_private *rp = netdev_priv(dev);
@@ -2309,7 +2345,21 @@ static void rhine_remove_one(struct pci_dev *pdev)
pci_disable_device(pdev);
}
-static void rhine_shutdown (struct pci_dev *pdev)
+static int rhine_remove_one_platform(struct platform_device *pdev)
+{
+ struct net_device *dev = platform_get_drvdata(pdev);
+ struct rhine_private *rp = netdev_priv(dev);
+
+ unregister_netdev(dev);
+
+ iounmap(rp->base);
+
+ free_netdev(dev);
+
+ return 0;
+}
+
+static void rhine_shutdown_pci(struct pci_dev *pdev)
{
struct net_device *dev = pci_get_drvdata(pdev);
struct rhine_private *rp = netdev_priv(dev);
@@ -2376,7 +2426,7 @@ static int rhine_suspend(struct device *device)
netif_device_detach(dev);
if (!strncmp(device->bus->name, "pci", 3))
- rhine_shutdown(to_pci_dev(device));
+ rhine_shutdown_pci(to_pci_dev(device));
return 0;
}
@@ -2416,15 +2466,26 @@ static SIMPLE_DEV_PM_OPS(rhine_pm_ops, rhine_suspend, rhine_resume);
#endif /* !CONFIG_PM_SLEEP */
-static struct pci_driver rhine_driver = {
+static struct pci_driver rhine_driver_pci = {
.name = DRV_NAME,
.id_table = rhine_pci_tbl,
- .probe = rhine_init_one,
- .remove = rhine_remove_one,
- .shutdown = rhine_shutdown,
+ .probe = rhine_init_one_pci,
+ .remove = rhine_remove_one_pci,
+ .shutdown = rhine_shutdown_pci,
.driver.pm = RHINE_PM_OPS,
};
+static struct platform_driver rhine_driver_platform = {
+ .probe = rhine_init_one_platform,
+ .remove = rhine_remove_one_platform,
+ .driver = {
+ .name = DRV_NAME,
+ .owner = THIS_MODULE,
+ .of_match_table = rhine_of_tbl,
+ .pm = RHINE_PM_OPS,
+ }
+};
+
static struct dmi_system_id rhine_dmi_table[] __initdata = {
{
.ident = "EPIA-M",
@@ -2445,6 +2506,8 @@ static struct dmi_system_id rhine_dmi_table[] __initdata = {
static int __init rhine_init(void)
{
+ int ret_pci, ret_platform;
+
/* when a module, this is printed whether or not devices are found in probe */
#ifdef MODULE
pr_info("%s\n", version);
@@ -2457,13 +2520,19 @@ static int __init rhine_init(void)
else if (avoid_D3)
pr_info("avoid_D3 set\n");
- return pci_register_driver(&rhine_driver);
+ ret_pci = pci_register_driver(&rhine_driver_pci);
+ ret_platform = platform_driver_register(&rhine_driver_platform);
+ if ((ret_pci < 0) && (ret_platform < 0))
+ return ret_pci;
+
+ return 0;
}
static void __exit rhine_cleanup(void)
{
- pci_unregister_driver(&rhine_driver);
+ platform_driver_unregister(&rhine_driver_platform);
+ pci_unregister_driver(&rhine_driver_pci);
}
--
1.8.5.1
^ permalink raw reply related
* Re: [PATCH net-next] 8139cp: remove a won't occurred BUG_ON
From: Ben Hutchings @ 2014-01-27 11:54 UTC (permalink / raw)
To: Wang Weidong; +Cc: David Miller, netdev
In-Reply-To: <52E5B309.1080402@huawei.com>
[-- Attachment #1: Type: text/plain, Size: 1087 bytes --]
On Mon, 2014-01-27 at 09:14 +0800, Wang Weidong wrote:
> On 2014/1/27 7:23, Ben Hutchings wrote:
> > On Sun, 2014-01-26 at 16:33 +0800, Wang Weidong wrote:
> >> when variable i go to the BUG_ON the value is equal to the CP_NUM_STATS,
> >> so the BUG_ON won't occur, so remove it
> >
> > We hope that every BUG_ON() does not occur, but that doesn't mean they
> > should be removed. This check is meant to catch mistakes when adding
> > new statistics.
> >
> > Ben.
> >
> Hi, Ben.
>
> Yeah, but I think If someone would add new statistics, he should take into account
> it instead the BUG_ON helper.
>
> And that, I found some other drivers' get_ethtool_stats no have BUG_ON. Should we
> add the BUG_ON into them?
[...]
The important thing is that the get_stats, get_sset_count and
get_strings operations are consistent. Depending on how they are
implemented, a BUG_ON or BUILD_BUG_ON may be useful to check that. I
don't think there's any universal best practice.
Ben.
--
Ben Hutchings
If at first you don't succeed, you're doing about average.
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 828 bytes --]
^ permalink raw reply
* Re: [Patch net-next v2 1/5] net_sched: act: hide struct tcf_common from API
From: Jamal Hadi Salim @ 2014-01-27 12:11 UTC (permalink / raw)
To: Cong Wang, netdev; +Cc: David S. Miller
In-Reply-To: <1390516525-8556-2-git-send-email-xiyou.wangcong@gmail.com>
On 01/23/14 17:35, Cong Wang wrote:
> Now we can totally hide it from modules. tcf_hash_*() API's
> will operate on struct tc_action, modules don't need to care about
> the details.
>
> Cc: Jamal Hadi Salim <jhs@mojatatu.com>
> Cc: David S. Miller <davem@davemloft.net>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
> ---
Cong, this patch does not compile by itself. I did not try the rest.
For git bisect to work:
Every patch should be compilable standalone and all tests should
pass with every single patch (the later part is my view, the first
is common practise)
Since Dave already closed the door - no rush, when you get the
cycles please fix this up. I am hoping to do a quick test run
when you are done.
cheers,
jamal
^ permalink raw reply
* Re: [PATCH net-next] 8139cp: remove a won't occurred BUG_ON
From: Wang Weidong @ 2014-01-27 12:57 UTC (permalink / raw)
To: Ben Hutchings, Wang Weidong; +Cc: David Miller, netdev
In-Reply-To: <1390823686.2735.138.camel@deadeye.wl.decadent.org.uk>
From: Wang Weidong <wangweidong1@huawei.com>
On 2014/1/27 19:54, Ben Hutchings wrote:
> On Mon, 2014-01-27 at 09:14 +0800, Wang Weidong wrote:
>> On 2014/1/27 7:23, Ben Hutchings wrote:
>>> On Sun, 2014-01-26 at 16:33 +0800, Wang Weidong wrote:
>>>> when variable i go to the BUG_ON the value is equal to the CP_NUM_STATS,
>>>> so the BUG_ON won't occur, so remove it
>>>
>>> We hope that every BUG_ON() does not occur, but that doesn't mean they
>>> should be removed. This check is meant to catch mistakes when adding
>>> new statistics.
>>>
>>> Ben.
>>>
>> Hi, Ben.
>>
>> Yeah, but I think If someone would add new statistics, he should take into account
>> it instead the BUG_ON helper.
>>
>> And that, I found some other drivers' get_ethtool_stats no have BUG_ON. Should we
>> add the BUG_ON into them?
> [...]
>
> The important thing is that the get_stats, get_sset_count and
> get_strings operations are consistent. Depending on how they are
> implemented, a BUG_ON or BUILD_BUG_ON may be useful to check that. I
> don't think there's any universal best practice.
>
> Ben.
>
Ok, Got it.
Thanks for your answers.
Regards,
Wang
^ permalink raw reply
* Re: How to identify 6to4 and 6in4 tunnels
From: Nicolas Dichtel @ 2014-01-27 12:59 UTC (permalink / raw)
To: zhuyj, David S. Miller, netdev, kuznet, jmorris, yoshfuji, kaber,
linux-kernel
In-Reply-To: <52E63779.5090101@gmail.com>
Le 27/01/2014 11:39, zhuyj a écrit :
> Hi, Maintainers
>
> In our scene, we will create the 6in4/6to4 tunnel firstly and need to check the
> tunnel type, secondly, we will configure the ip address on it. So, Could we have
> any way to get the actual tunnel for 6in4 and 6to4 from current linux version?
>
> Both 6in4 and 6to4 have the same protocol “IPPROTO_IPV6” in Linux kernel. The
> only difference is the ip address on the tunnel. Can we distinguish them in
> Linux kernel?
Just check the prefix, like it is done in check_6rd().
Regards,
Nicolas
^ permalink raw reply
* Re: [PATCH 0/2] sctp: fix a problem with net_namespace
From: Wang Weidong @ 2014-01-27 13:05 UTC (permalink / raw)
To: Neil Horman, Wang Weidong; +Cc: davem, vyasevich, dborkman, netdev
In-Reply-To: <20140127114904.GA17143@hmsreliant.think-freely.org>
From: Wang Weidong <wangweidong1@huawei.com>
On 2014/1/27 19:49, Neil Horman wrote:
> On Mon, Jan 27, 2014 at 11:49:01AM +0800, Wang Weidong wrote:
>> fix a problem with net_namespace, and optimize
>> the sctp_sysctl_net_register.
>>
>> Wang Weidong (2):
>> sctp: fix a missed .data initialization
>> sctp: optimize the sctp_sysctl_net_register
>>
>> net/sctp/sysctl.c | 17 ++++++++++-------
>> 1 file changed, 10 insertions(+), 7 deletions(-)
>>
>> --
>> 1.7.12
>>
>>
>>
> I don't see that either of these patches are needed. In sctp_init_net, the
> sctp_hmac_alg pointer gets initalized before calling sctp_sysctl_net_register,
> and sctp_proc_do_hmac_alg is written to specifically expect NULL values, so this
> code may change behavior regarding default cookie selection.
>
> This was coded so that poniters to entires in the string table could be used,
> rather than needing to allocate or maintain character buffers. That said, it
> does look like that for loop in sctp_sysctl_register_table might compute an odd
> offset when cloning the table. I think the right fix for that is likely to just
> move the sysctl value initalization in sctp_init_net to below the sysctl
> register function.
>
> Neil
>
Thanks Neil,
I will try to refix it as you said tomorrow because I am not at the office.
Regards,
Wang
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply
* Re: [BUG - v3.10.27] sit: Bad list pointer
From: Nicolas Dichtel @ 2014-01-27 13:21 UTC (permalink / raw)
To: Steven Rostedt, LKML, netdev, stable, David Miller
Cc: Clark Williams, Luis Claudio R. Goncalves
In-Reply-To: <20140125133600.7482d428@gandalf.local.home>
Le 25/01/2014 19:36, Steven Rostedt a écrit :
> On 3.10.27, loading and then unloading the sit module gives me the
> following bug:
>
> [ 35.400878] sit: IPv6 over IPv4 tunneling driver
> [ 36.959308] ------------[ cut here ]------------
> [ 36.963983] WARNING: at /home/rostedt/work/git/linux-rt.git/lib/list_debug.c:59 __list_del_entry+0xa1/0xd0()
> [ 36.973874] list_del corruption. prev->next should be ffff88011656d070, but was ffff880115fe5ea8
> [ 36.982684] Modules linked in: sit(-) ip_tunnel tunnel4 bnep lockd bluetooth nf_conntrack_ipv4 ip6t_REJECT nf_defrag_ipv4 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_intel snd_hda_codec tpm_infineon snd_hwdep hp_wmi rfkill tpm_tis tpm coretemp snd_seq lpc_ich snd_seq_device snd_pcm sparse_keymap uinput serio_raw pcspkr mfd_core tpm_bios i2c_i801 microcode wmi snd_page_alloc snd_timer snd soundcore i915 e1000e i2c_algo_bit ptp drm_kms_helper crc32c_intel drm pps_core i2c_core video sunrpc
> [ 37.034430] CPU: 0 PID: 1071 Comm: rmmod Not tainted 3.10.27-test #143
> [ 37.040972] Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012
> [ 37.049962] ffffffff81a10e78 ffff880115fe5d18 ffffffff8161b3c7 ffff880115fe5d58
> [ 37.057439] ffffffff8104b2a0 ffff880115fe5dd8 ffff880115fe5df8 ffff88011656d070
> [ 37.064911] 0000000000000080 0000000000000018 ffff880115cb4000 ffff880115fe5db8
> [ 37.072405] Call Trace:
> [ 37.074869] [<ffffffff8161b3c7>] dump_stack+0x19/0x1b
> [ 37.080031] [<ffffffff8104b2a0>] warn_slowpath_common+0x70/0xa0
> [ 37.086051] [<ffffffff8104b386>] warn_slowpath_fmt+0x46/0x50
> [ 37.091814] [<ffffffff812f56a1>] __list_del_entry+0xa1/0xd0
> [ 37.097491] [<ffffffff815168a5>] unregister_netdevice_queue+0x35/0xa0
> [ 37.104036] [<ffffffffa038df82>] sit_exit_net+0xc2/0xf0 [sit]
> [ 37.109893] [<ffffffff81511278>] ops_exit_list.isra.4+0x38/0x60
> [ 37.115917] [<ffffffff815113d0>] unregister_pernet_operations+0x70/0xb0
> [ 37.122633] [<ffffffff8151143e>] unregister_pernet_device+0x2e/0x60
> [ 37.129005] [<ffffffffa038f86f>] sit_cleanup+0x2d/0x7be [sit]
> [ 37.134864] [<ffffffff810b0aee>] SyS_delete_module+0x19e/0x2a0
> [ 37.140801] [<ffffffff8162983b>] tracesys+0xdd/0xe2
> [ 37.145779] ---[ end trace e45e22e840e55d00 ]---
> [ 37.150427] ------------[ cut here ]------------
>
> Investigating differences between 3.10.27 and newer kernels, I found
> that the below change is not there. It was part of commit 205983c43700
> "sit: allow to use rtnl ops on fb tunnel" which happens to be
> backported to 3.10 but in 3.10 backport commit 20300db1bd1b9 this part
> of the commit is missing.
Thank you for fixing this. It's the same problem that commit 22c3ec552c29
("ip6tnl: fix use after free of fb_tnl_dev", branch linux-3.10.y).
The upstream commit 205983c43700 ("sit: allow to use rtnl ops on fb tunnel")
(backported into linux-3.10.y) left a bug which was fixed upstream by commit
9434266f2c64 ("sit: fix use after free of fb_tunnel_dev").
The problem is a bit different in linux-3.10.y, because there is no x-netns
support (upstream commit 5e6700b3bf98 ("sit: add support of x-netns")).
When sit.ko is unloaded, FB device is deleted by rtnl_link_unregister()
and then we try to delete it again in sit_exit_net().
>
> When I add this change, the removing of the module no longer gives this
> bug.
>
> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
>
> diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
> index 0491264..02300e8 100644
> --- a/net/ipv6/sit.c
> +++ b/net/ipv6/sit.c
> @@ -1592,7 +1592,6 @@ static void __net_exit sit_exit_net(struct net *net)
>
> rtnl_lock();
> sit_destroy_tunnels(sitn, &list);
> - unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
> unregister_netdevice_many(&list);
> rtnl_unlock();
> }
>
^ permalink raw reply
* [PATCH v3 net-next 0/2] bonding: fix locking in bond_ab_arp_probe
From: Veaceslav Falico @ 2014-01-27 13:33 UTC (permalink / raw)
To: netdev; +Cc: Jay Vosburgh, Andy Gospodarek, Veaceslav Falico
Hi,
After the latest patches, on every call of bond_ab_arp_probe() without an
active slave I see the following warning:
[ 7.912314] RTNL: assertion failed at net/core/dev.c (4494)
...
[ 7.922495] [<ffffffff817acc6f>] dump_stack+0x51/0x72
[ 7.923714] [<ffffffff8168795e>] netdev_master_upper_dev_get+0x6e/0x70
[ 7.924940] [<ffffffff816a2a66>] rtnl_link_fill+0x116/0x260
[ 7.926143] [<ffffffff817acc6f>] ? dump_stack+0x51/0x72
[ 7.927333] [<ffffffff816a350c>] rtnl_fill_ifinfo+0x95c/0xb90
[ 7.928529] [<ffffffff8167af2b>] ? __kmalloc_reserve+0x3b/0xa0
[ 7.929681] [<ffffffff8167bfcf>] ? __alloc_skb+0x9f/0x1e0
[ 7.930827] [<ffffffff816a3b64>] rtmsg_ifinfo+0x84/0x100
[ 7.931960] [<ffffffffa00bca07>] bond_ab_arp_probe+0x1a7/0x370 [bonding]
[ 7.933133] [<ffffffffa00bcd78>] bond_activebackup_arp_mon+0x1a8/0x2f0 [bonding]
...
It happens because in bond_ab_arp_probe() we change the flags of a slave
without holding the RTNL lock.
To fix this - remove the useless curr_active_lock, RCUify it and lock RTNL
while changing the slave's flags. Also, remove bond_ab_arp_probe() from
under any locks in bond_ab_arp_mon().
CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: netdev@vger.kernel.org
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
drivers/net/bonding/bond_main.c | 48 +++++++++++++++++++++++------------------
1 file changed, 27 insertions(+), 21 deletions(-)
^ permalink raw reply
* [PATCH v3 net-next 1/2] bonding: RCUify bond_ab_arp_probe
From: Veaceslav Falico @ 2014-01-27 13:33 UTC (permalink / raw)
To: netdev; +Cc: Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1390829613-1842-1-git-send-email-vfalico@redhat.com>
Currently bond_ab_arp_probe() is always called under rcu_read_lock(),
however to work with curr_active_slave we're still holding the
curr_slave_lock.
To remove that curr_slave_lock - rcu_dereference the bond's
curr_active_slave and use it further - so that we're sure the slave won't
go away, and we don't care if it will change in the meanwhile.
CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
drivers/net/bonding/bond_main.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a7db819..27e6fdd 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2605,25 +2605,21 @@ do_failover:
static void bond_ab_arp_probe(struct bonding *bond)
{
struct slave *slave, *before = NULL, *new_slave = NULL,
- *curr_arp_slave = rcu_dereference(bond->current_arp_slave);
+ *curr_arp_slave = rcu_dereference(bond->current_arp_slave),
+ *curr_active_slave = rcu_dereference(bond->curr_active_slave);
struct list_head *iter;
bool found = false;
- read_lock(&bond->curr_slave_lock);
-
- if (curr_arp_slave && bond->curr_active_slave)
+ if (curr_arp_slave && curr_active_slave)
pr_info("PROBE: c_arp %s && cas %s BAD\n",
curr_arp_slave->dev->name,
- bond->curr_active_slave->dev->name);
+ curr_active_slave->dev->name);
- if (bond->curr_active_slave) {
- bond_arp_send_all(bond, bond->curr_active_slave);
- read_unlock(&bond->curr_slave_lock);
+ if (curr_active_slave) {
+ bond_arp_send_all(bond, curr_active_slave);
return;
}
- read_unlock(&bond->curr_slave_lock);
-
/* if we don't have a curr_active_slave, search for the next available
* backup slave from the current_arp_slave and make it the candidate
* for becoming the curr_active_slave
--
1.8.4
^ permalink raw reply related
* [PATCH v3 net-next 2/2] bonding: restructure locking of bond_ab_arp_probe()
From: Veaceslav Falico @ 2014-01-27 13:33 UTC (permalink / raw)
To: netdev; +Cc: Veaceslav Falico, Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1390829613-1842-1-git-send-email-vfalico@redhat.com>
Currently we're calling it from under RCU context, however we're using some
functions that require rtnl to be held.
Fix this by restructuring the locking - don't call it under any locks,
aquire rcu_read_lock() if we're sending _only_ (i.e. we have the active
slave present), and use rtnl locking otherwise - if we need to modify
(in)active flags of a slave.
CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
Notes:
v2->v3:
Use rtnl_trylock(), not to race with queue cancelling.
v1->v2:
Add two steps - one for sending/rcu, another for modifying/rtnl.
drivers/net/bonding/bond_main.c | 38 ++++++++++++++++++++++++--------------
1 file changed, 24 insertions(+), 14 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 27e6fdd..7de0256 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2605,11 +2605,14 @@ do_failover:
static void bond_ab_arp_probe(struct bonding *bond)
{
struct slave *slave, *before = NULL, *new_slave = NULL,
- *curr_arp_slave = rcu_dereference(bond->current_arp_slave),
- *curr_active_slave = rcu_dereference(bond->curr_active_slave);
+ *curr_arp_slave, *curr_active_slave;
struct list_head *iter;
bool found = false;
+ rcu_read_lock();
+ curr_arp_slave = rcu_dereference(bond->current_arp_slave);
+ curr_active_slave = rcu_dereference(bond->curr_active_slave);
+
if (curr_arp_slave && curr_active_slave)
pr_info("PROBE: c_arp %s && cas %s BAD\n",
curr_arp_slave->dev->name,
@@ -2617,23 +2620,31 @@ static void bond_ab_arp_probe(struct bonding *bond)
if (curr_active_slave) {
bond_arp_send_all(bond, curr_active_slave);
+ rcu_read_unlock();
return;
}
+ rcu_read_unlock();
/* if we don't have a curr_active_slave, search for the next available
* backup slave from the current_arp_slave and make it the candidate
* for becoming the curr_active_slave
*/
+ rtnl_lock();
+ /* curr_arp_slave might have gone away */
+ curr_arp_slave = rcu_dereference(bond->current_arp_slave);
+
if (!curr_arp_slave) {
- curr_arp_slave = bond_first_slave_rcu(bond);
- if (!curr_arp_slave)
+ curr_arp_slave = bond_first_slave(bond);
+ if (!curr_arp_slave) {
+ rtnl_unlock();
return;
+ }
}
bond_set_slave_inactive_flags(curr_arp_slave);
- bond_for_each_slave_rcu(bond, slave, iter) {
+ bond_for_each_slave(bond, slave, iter) {
if (!found && !before && IS_UP(slave->dev))
before = slave;
@@ -2663,21 +2674,24 @@ static void bond_ab_arp_probe(struct bonding *bond)
if (!new_slave && before)
new_slave = before;
- if (!new_slave)
+ if (!new_slave) {
+ rtnl_unlock();
return;
+ }
new_slave->link = BOND_LINK_BACK;
bond_set_slave_active_flags(new_slave);
bond_arp_send_all(bond, new_slave);
new_slave->jiffies = jiffies;
rcu_assign_pointer(bond->current_arp_slave, new_slave);
+ rtnl_unlock();
}
static void bond_activebackup_arp_mon(struct work_struct *work)
{
struct bonding *bond = container_of(work, struct bonding,
arp_work.work);
- bool should_notify_peers = false;
+ bool should_notify_peers = false, should_commit = false;
int delta_in_ticks;
delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
@@ -2686,12 +2700,11 @@ static void bond_activebackup_arp_mon(struct work_struct *work)
goto re_arm;
rcu_read_lock();
-
should_notify_peers = bond_should_notify_peers(bond);
+ should_commit = bond_ab_arp_inspect(bond);
+ rcu_read_unlock();
- if (bond_ab_arp_inspect(bond)) {
- rcu_read_unlock();
-
+ if (should_commit) {
/* Race avoidance with bond_close flush of workqueue */
if (!rtnl_trylock()) {
delta_in_ticks = 1;
@@ -2700,13 +2713,10 @@ static void bond_activebackup_arp_mon(struct work_struct *work)
}
bond_ab_arp_commit(bond);
-
rtnl_unlock();
- rcu_read_lock();
}
bond_ab_arp_probe(bond);
- rcu_read_unlock();
re_arm:
if (bond->params.arp_interval)
--
1.8.4
^ permalink raw reply related
* Re: [PATCH v3 net-next 2/2] bonding: restructure locking of bond_ab_arp_probe()
From: Veaceslav Falico @ 2014-01-27 13:36 UTC (permalink / raw)
To: netdev; +Cc: Jay Vosburgh, Andy Gospodarek
In-Reply-To: <1390829613-1842-3-git-send-email-vfalico@redhat.com>
On Mon, Jan 27, 2014 at 02:33:33PM +0100, Veaceslav Falico wrote:
>Currently we're calling it from under RCU context, however we're using some
>functions that require rtnl to be held.
>
>Fix this by restructuring the locking - don't call it under any locks,
>aquire rcu_read_lock() if we're sending _only_ (i.e. we have the active
>slave present), and use rtnl locking otherwise - if we need to modify
>(in)active flags of a slave.
>
>CC: Jay Vosburgh <fubar@us.ibm.com>
>CC: Andy Gospodarek <andy@greyhouse.net>
>Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
>---
>
>Notes:
> v2->v3:
> Use rtnl_trylock(), not to race with queue cancelling.
>
> v1->v2:
> Add two steps - one for sending/rcu, another for modifying/rtnl.
>
> drivers/net/bonding/bond_main.c | 38 ++++++++++++++++++++++++--------------
> 1 file changed, 24 insertions(+), 14 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 27e6fdd..7de0256 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -2605,11 +2605,14 @@ do_failover:
> static void bond_ab_arp_probe(struct bonding *bond)
> {
> struct slave *slave, *before = NULL, *new_slave = NULL,
>- *curr_arp_slave = rcu_dereference(bond->current_arp_slave),
>- *curr_active_slave = rcu_dereference(bond->curr_active_slave);
>+ *curr_arp_slave, *curr_active_slave;
> struct list_head *iter;
> bool found = false;
>
>+ rcu_read_lock();
>+ curr_arp_slave = rcu_dereference(bond->current_arp_slave);
>+ curr_active_slave = rcu_dereference(bond->curr_active_slave);
>+
> if (curr_arp_slave && curr_active_slave)
> pr_info("PROBE: c_arp %s && cas %s BAD\n",
> curr_arp_slave->dev->name,
>@@ -2617,23 +2620,31 @@ static void bond_ab_arp_probe(struct bonding *bond)
>
> if (curr_active_slave) {
> bond_arp_send_all(bond, curr_active_slave);
>+ rcu_read_unlock();
> return;
> }
>+ rcu_read_unlock();
>
> /* if we don't have a curr_active_slave, search for the next available
> * backup slave from the current_arp_slave and make it the candidate
> * for becoming the curr_active_slave
> */
>
>+ rtnl_lock();
Right, git commit --amend would be great after git add...
Sorry, forgot to actually commit changes, will re-send.
>+ /* curr_arp_slave might have gone away */
>+ curr_arp_slave = rcu_dereference(bond->current_arp_slave);
>+
> if (!curr_arp_slave) {
>- curr_arp_slave = bond_first_slave_rcu(bond);
>- if (!curr_arp_slave)
>+ curr_arp_slave = bond_first_slave(bond);
>+ if (!curr_arp_slave) {
>+ rtnl_unlock();
> return;
>+ }
> }
>
> bond_set_slave_inactive_flags(curr_arp_slave);
>
>- bond_for_each_slave_rcu(bond, slave, iter) {
>+ bond_for_each_slave(bond, slave, iter) {
> if (!found && !before && IS_UP(slave->dev))
> before = slave;
>
>@@ -2663,21 +2674,24 @@ static void bond_ab_arp_probe(struct bonding *bond)
> if (!new_slave && before)
> new_slave = before;
>
>- if (!new_slave)
>+ if (!new_slave) {
>+ rtnl_unlock();
> return;
>+ }
>
> new_slave->link = BOND_LINK_BACK;
> bond_set_slave_active_flags(new_slave);
> bond_arp_send_all(bond, new_slave);
> new_slave->jiffies = jiffies;
> rcu_assign_pointer(bond->current_arp_slave, new_slave);
>+ rtnl_unlock();
> }
>
> static void bond_activebackup_arp_mon(struct work_struct *work)
> {
> struct bonding *bond = container_of(work, struct bonding,
> arp_work.work);
>- bool should_notify_peers = false;
>+ bool should_notify_peers = false, should_commit = false;
> int delta_in_ticks;
>
> delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
>@@ -2686,12 +2700,11 @@ static void bond_activebackup_arp_mon(struct work_struct *work)
> goto re_arm;
>
> rcu_read_lock();
>-
> should_notify_peers = bond_should_notify_peers(bond);
>+ should_commit = bond_ab_arp_inspect(bond);
>+ rcu_read_unlock();
>
>- if (bond_ab_arp_inspect(bond)) {
>- rcu_read_unlock();
>-
>+ if (should_commit) {
> /* Race avoidance with bond_close flush of workqueue */
> if (!rtnl_trylock()) {
> delta_in_ticks = 1;
>@@ -2700,13 +2713,10 @@ static void bond_activebackup_arp_mon(struct work_struct *work)
> }
>
> bond_ab_arp_commit(bond);
>-
> rtnl_unlock();
>- rcu_read_lock();
> }
>
> bond_ab_arp_probe(bond);
>- rcu_read_unlock();
>
> re_arm:
> if (bond->params.arp_interval)
>--
>1.8.4
>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox