Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: linux-next: manual merge of the net-next tree with the net tree
From: David Miller @ 2012-09-25  5:13 UTC (permalink / raw)
  To: eric.dumazet; +Cc: sfr, netdev, linux-next, linux-kernel, edumazet
In-Reply-To: <1348549842.26828.1897.camel@edumazet-glaptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 25 Sep 2012 07:10:42 +0200

> Oops, my bad, net/ipv4/raw.c changes in 5640f7685831 ("net: use a per
> task frag allocator") should not be there :
> 
> I accidentally left a debugging version of the patch I sent to fix the
> icmp bug.
> 
> Sorry David for this, I am not sure how I can help on this ?

The thing to do is send me a patch to revert the raw.c change from
net-next, right?

^ permalink raw reply

* IT Notification -- Attachment Removed
From: Postmaster @ 2012-09-25  5:01 UTC (permalink / raw)
  To: netdev

-------------------------------------------------------------------
IT has detected restricted attachments within an email message
-------------------------------------------------------------------

>From      : netdev@vger.kernel.org
To        : nandy@ntb.org.np
Subject   : [***SPAM*** Score/Req: 05.1/5.0] Mail System Error - Returned Mail
Message-ID: 

---------------------
Attachment(s) removed
---------------------
TEXT.SCR

^ permalink raw reply

* IT Notification -- Attachment Removed
From: Postmaster @ 2012-09-25  5:01 UTC (permalink / raw)
  To: netdev

-------------------------------------------------------------------
IT has detected restricted attachments within an email message
-------------------------------------------------------------------

>From      : netdev@vger.kernel.org
To        : abaral@ntb.org.np
Subject   : Delivery reports about your e-mail
Message-ID: 

---------------------
Attachment(s) removed
---------------------
text.zip

^ permalink raw reply

* Re: linux-next: manual merge of the net-next tree with the net tree
From: Eric Dumazet @ 2012-09-25  5:10 UTC (permalink / raw)
  To: Stephen Rothwell
  Cc: David Miller, netdev, linux-next, linux-kernel, Eric Dumazet
In-Reply-To: <20120925123438.bbf3126889513ee5cc195e5c@canb.auug.org.au>

On Tue, 2012-09-25 at 12:34 +1000, Stephen Rothwell wrote:
> Hi all,
> 
> Today's linux-next merge of the net-next tree got a conflict in
> net/ipv4/raw.c between commit ab43ed8b7490 ("ipv4: raw: fix icmp_filter
> ()") from the net tree and commit 5640f7685831 ("net: use a per task frag
> allocator") from the net-next tree.
> 
> They are basically the same patch (for this file) except the net-next
> version adds two pr_err() calls. I used the net-next version and can carry
> the fix as necessary (no action is required).
> 
> I do wonder if this change belongs in the net-next patch?

Oops, my bad, net/ipv4/raw.c changes in 5640f7685831 ("net: use a per
task frag allocator") should not be there :

I accidentally left a debugging version of the patch I sent to fix the
icmp bug.

Sorry David for this, I am not sure how I can help on this ?

^ permalink raw reply

* [PATCH] team: fix return value check
From: Wei Yongjun @ 2012-09-25  4:29 UTC (permalink / raw)
  To: jpirko; +Cc: yongjun_wei, netdev

From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>

In case of error, the function genlmsg_put() returns NULL pointer
not ERR_PTR(). The IS_ERR() test in the return value check should
be replaced with NULL test.

dpatch engine is used to auto generate this patch.
(https://github.com/weiyj/dpatch)

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
---
 drivers/net/team/team.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 341b65d..e19da26 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1652,8 +1652,8 @@ static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
 
 	hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
 			  &team_nl_family, 0, TEAM_CMD_NOOP);
-	if (IS_ERR(hdr)) {
-		err = PTR_ERR(hdr);
+	if (!hdr) {
+		err = -EMSGSIZE;
 		goto err_msg_put;
 	}
 
@@ -1847,8 +1847,8 @@ start_again:
 
 	hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags | NLM_F_MULTI,
 			  TEAM_CMD_OPTIONS_GET);
-	if (IS_ERR(hdr))
-		return PTR_ERR(hdr);
+	if (!hdr)
+		return -EMSGSIZE;
 
 	if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex))
 		goto nla_put_failure;
@@ -2067,8 +2067,8 @@ static int team_nl_fill_port_list_get(struct sk_buff *skb,
 
 	hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags,
 			  TEAM_CMD_PORT_LIST_GET);
-	if (IS_ERR(hdr))
-		return PTR_ERR(hdr);
+	if (!hdr)
+		return -EMSGSIZE;
 
 	if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex))
 		goto nla_put_failure;

^ permalink raw reply related

* [PATCH] l2tp: fix return value check
From: Wei Yongjun @ 2012-09-25  4:29 UTC (permalink / raw)
  To: davem; +Cc: yongjun_wei, netdev

From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>

In case of error, the function genlmsg_put() returns NULL pointer
not ERR_PTR(). The IS_ERR() test in the return value check should
be replaced with NULL test.

dpatch engine is used to auto generate this patch.
(https://github.com/weiyj/dpatch)

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
---
 net/l2tp/l2tp_netlink.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index d71cd92..6f93635 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -80,8 +80,8 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
 
 	hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
 			  &l2tp_nl_family, 0, L2TP_CMD_NOOP);
-	if (IS_ERR(hdr)) {
-		ret = PTR_ERR(hdr);
+	if (!hdr) {
+		ret = -EMSGSIZE;
 		goto err_out;
 	}
 
@@ -250,8 +250,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
 
 	hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags,
 			  L2TP_CMD_TUNNEL_GET);
-	if (IS_ERR(hdr))
-		return PTR_ERR(hdr);
+	if (!hdr)
+		return -EMSGSIZE;
 
 	if (nla_put_u8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version) ||
 	    nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) ||
@@ -617,8 +617,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags
 	sk = tunnel->sock;
 
 	hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET);
-	if (IS_ERR(hdr))
-		return PTR_ERR(hdr);
+	if (!hdr)
+		return -EMSGSIZE;
 
 	if (nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) ||
 	    nla_put_u32(skb, L2TP_ATTR_SESSION_ID, session->session_id) ||

^ permalink raw reply related

* [PATCH net-next 4/4] tunnel: drop packet if ECN present with not-ECT
From: Stephen Hemminger @ 2012-09-25  4:12 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <20120925041222.056704869@vyatta.com>

[-- Attachment #1: gre-rfc-6040.patch --]
[-- Type: text/plain, Size: 7420 bytes --]

Linux tunnels were written before RFC6040 and therefore never
implemented the corner case of ECN getting set in the outer header
and the inner header not being ready for it.

Section 4.2.  Default Tunnel Egress Behaviour.
 The new code addresses:
 o If the inner ECN field is Not-ECT, the decapsulator MUST NOT
      propagate any other ECN codepoint onwards.  This is because the
      inner Not-ECT marking is set by transports that rely on dropped
      packets as an indication of congestion and would not understand or
      respond to any other ECN codepoint [RFC4774].  Specifically:

      *  If the inner ECN field is Not-ECT and the outer ECN field is
         CE, the decapsulator MUST drop the packet.

      *  If the inner ECN field is Not-ECT and the outer ECN field is
         Not-ECT, ECT(0), or ECT(1), the decapsulator MUST forward the
         outgoing packet with the ECN field cleared to Not-ECT.

Overloads rx_frame_error to keep track of this error.
This was caught by Chris Wright while reviewing the new VXLAN tunnel.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

---
Note: supersedes earlier patch which only did Ipv4 GRE.

 net/ipv4/ip_gre.c            |   23 +++++++++++------
 net/ipv4/ipip.c              |   19 ++++++++------
 net/ipv4/xfrm4_mode_tunnel.c |   12 +++++---
 net/ipv6/ip6_gre.c           |   58 +++++++++++++++++++++++--------------------
 4 files changed, 67 insertions(+), 45 deletions(-)

--- a/net/ipv4/ip_gre.c	2012-09-24 18:11:52.000000000 -0700
+++ b/net/ipv4/ip_gre.c	2012-09-24 18:13:13.098719907 -0700
@@ -204,7 +204,9 @@ static struct rtnl_link_stats64 *ipgre_g
 	tot->rx_crc_errors = dev->stats.rx_crc_errors;
 	tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
 	tot->rx_length_errors = dev->stats.rx_length_errors;
+	tot->rx_frame_errors = dev->stats.rx_frame_errors;
 	tot->rx_errors = dev->stats.rx_errors;
+
 	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
 	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
 	tot->tx_dropped = dev->stats.tx_dropped;
@@ -587,15 +589,16 @@ static void ipgre_err(struct sk_buff *sk
 	t->err_time = jiffies;
 }
 
-static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
+static int ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
 {
 	if (INET_ECN_is_ce(iph->tos)) {
 		if (skb->protocol == htons(ETH_P_IP)) {
-			IP_ECN_set_ce(ip_hdr(skb));
+			return IP_ECN_set_ce(ip_hdr(skb));
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
-			IP6_ECN_set_ce(ipv6_hdr(skb));
+			return IP6_ECN_set_ce(ipv6_hdr(skb));
 		}
 	}
+	return 1;
 }
 
 static inline u8
@@ -723,17 +726,21 @@ static int ipgre_rcv(struct sk_buff *skb
 			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 		}
 
+		__skb_tunnel_rx(skb, tunnel->dev);
+
+		skb_reset_network_header(skb);
+		if (!ipgre_ecn_decapsulate(iph, skb)) {
+			++tunnel->dev->stats.rx_frame_errors;
+			++tunnel->dev->stats.rx_errors;
+			goto drop;
+		}
+
 		tstats = this_cpu_ptr(tunnel->dev->tstats);
 		u64_stats_update_begin(&tstats->syncp);
 		tstats->rx_packets++;
 		tstats->rx_bytes += skb->len;
 		u64_stats_update_end(&tstats->syncp);
 
-		__skb_tunnel_rx(skb, tunnel->dev);
-
-		skb_reset_network_header(skb);
-		ipgre_ecn_decapsulate(iph, skb);
-
 		netif_rx(skb);
 
 		return 0;
--- a/net/ipv4/ipip.c	2012-09-24 18:12:33.000000000 -0700
+++ b/net/ipv4/ipip.c	2012-09-24 18:13:13.098719907 -0700
@@ -400,13 +400,14 @@ out:
 	return err;
 }
 
-static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
-					struct sk_buff *skb)
+static int ipip_ecn_decapsulate(const struct iphdr *outer_iph,
+				struct sk_buff *skb)
 {
 	struct iphdr *inner_iph = ip_hdr(skb);
 
 	if (INET_ECN_is_ce(outer_iph->tos))
-		IP_ECN_set_ce(inner_iph);
+		return IP_ECN_set_ce(inner_iph);
+	return 1;
 }
 
 static int ipip_rcv(struct sk_buff *skb)
@@ -430,16 +431,20 @@ static int ipip_rcv(struct sk_buff *skb)
 		skb->protocol = htons(ETH_P_IP);
 		skb->pkt_type = PACKET_HOST;
 
+		__skb_tunnel_rx(skb, tunnel->dev);
+
+		if (!ipip_ecn_decapsulate(iph, skb)) {
+			++tunnel->dev->stats.rx_frame_errors;
+			++tunnel->dev->stats.rx_errors;
+			return 0;
+		}
+
 		tstats = this_cpu_ptr(tunnel->dev->tstats);
 		u64_stats_update_begin(&tstats->syncp);
 		tstats->rx_packets++;
 		tstats->rx_bytes += skb->len;
 		u64_stats_update_end(&tstats->syncp);
 
-		__skb_tunnel_rx(skb, tunnel->dev);
-
-		ipip_ecn_decapsulate(iph, skb);
-
 		netif_rx(skb);
 		return 0;
 	}
--- a/net/ipv6/ip6_gre.c	2012-09-24 18:12:00.783449055 -0700
+++ b/net/ipv6/ip6_gre.c	2012-09-24 18:13:13.098719907 -0700
@@ -149,7 +149,9 @@ static struct rtnl_link_stats64 *ip6gre_
 	tot->rx_crc_errors = dev->stats.rx_crc_errors;
 	tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
 	tot->rx_length_errors = dev->stats.rx_length_errors;
+	tot->rx_frame_errors = dev->stats.rx_frame_errors;
 	tot->rx_errors = dev->stats.rx_errors;
+
 	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
 	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
 	tot->tx_dropped = dev->stats.tx_dropped;
@@ -489,26 +491,28 @@ static void ip6gre_err(struct sk_buff *s
 	t->err_time = jiffies;
 }
 
-static inline void ip6gre_ecn_decapsulate_ipv4(const struct ip6_tnl *t,
-		const struct ipv6hdr *ipv6h, struct sk_buff *skb)
-{
-	__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
-
-	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
-		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
-
-	if (INET_ECN_is_ce(dsfield))
-		IP_ECN_set_ce(ip_hdr(skb));
-}
+static int ip6gre_ecn_decapsulate(const struct ip6_tnl *t,
+				  const struct ipv6hdr *ipv6h,
+				  struct sk_buff *skb)
+{
+	__u8 dsfield = ipv6_get_dsfield(ipv6h);
+	if (skb->protocol == htons(ETH_P_IP)) {
+		dsfield  &= ~INET_ECN_MASK;
+
+		if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+			ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK,
+					    dsfield);
+		if (INET_ECN_is_ce(dsfield))
+			return IP_ECN_set_ce(ip_hdr(skb));
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+			ipv6_copy_dscp(dsfield, ipv6_hdr(skb));
 
-static inline void ip6gre_ecn_decapsulate_ipv6(const struct ip6_tnl *t,
-		const struct ipv6hdr *ipv6h, struct sk_buff *skb)
-{
-	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
-		ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
+		if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
+			return IP6_ECN_set_ce(ipv6_hdr(skb));
+	}
 
-	if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
-		IP6_ECN_set_ce(ipv6_hdr(skb));
+	return 1;
 }
 
 static int ip6gre_rcv(struct sk_buff *skb)
@@ -625,20 +629,22 @@ static int ip6gre_rcv(struct sk_buff *sk
 			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 		}
 
+		__skb_tunnel_rx(skb, tunnel->dev);
+
+		skb_reset_network_header(skb);
+
+		if (!ip6gre_ecn_decapsulate(tunnel, ipv6h, skb)) {
+			++tunnel->dev->stats.rx_frame_errors;
+			++tunnel->dev->stats.rx_errors;
+			goto drop;
+		}
+
 		tstats = this_cpu_ptr(tunnel->dev->tstats);
 		u64_stats_update_begin(&tstats->syncp);
 		tstats->rx_packets++;
 		tstats->rx_bytes += skb->len;
 		u64_stats_update_end(&tstats->syncp);
 
-		__skb_tunnel_rx(skb, tunnel->dev);
-
-		skb_reset_network_header(skb);
-		if (skb->protocol == htons(ETH_P_IP))
-			ip6gre_ecn_decapsulate_ipv4(tunnel, ipv6h, skb);
-		else if (skb->protocol == htons(ETH_P_IPV6))
-			ip6gre_ecn_decapsulate_ipv6(tunnel, ipv6h, skb);
-
 		netif_rx(skb);
 
 		return 0;

^ permalink raw reply

* [PATCH net-next 3/4] xfrm: remove extranous rcu_read_lock
From: Stephen Hemminger @ 2012-09-25  4:12 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <20120925041222.056704869@vyatta.com>

[-- Attachment #1: ipip-no-rcu.patch --]
[-- Type: text/plain, Size: 3493 bytes --]

The handlers for xfrm_tunnel are always invoked with rcu read lock
already.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

---
 net/ipv4/ip_vti.c |    5 -----
 net/ipv4/ipip.c   |    9 +--------
 net/ipv6/sit.c    |    6 ------
 3 files changed, 1 insertion(+), 19 deletions(-)

--- a/net/ipv4/ip_vti.c	2012-08-15 08:59:22.958704223 -0700
+++ b/net/ipv4/ip_vti.c	2012-09-24 18:01:20.521904773 -0700
@@ -304,7 +304,6 @@ static int vti_err(struct sk_buff *skb,
 
 	err = -ENOENT;
 
-	rcu_read_lock();
 	t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
 	if (t == NULL)
 		goto out;
@@ -326,7 +325,6 @@ static int vti_err(struct sk_buff *skb,
 		t->err_count = 1;
 	t->err_time = jiffies;
 out:
-	rcu_read_unlock();
 	return err;
 }
 
@@ -336,7 +334,6 @@ static int vti_rcv(struct sk_buff *skb)
 	struct ip_tunnel *tunnel;
 	const struct iphdr *iph = ip_hdr(skb);
 
-	rcu_read_lock();
 	tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
 	if (tunnel != NULL) {
 		struct pcpu_tstats *tstats;
@@ -348,10 +345,8 @@ static int vti_rcv(struct sk_buff *skb)
 		u64_stats_update_end(&tstats->syncp);
 
 		skb->dev = tunnel->dev;
-		rcu_read_unlock();
 		return 1;
 	}
-	rcu_read_unlock();
 
 	return -1;
 }
--- a/net/ipv4/ipip.c	2012-09-24 17:59:26.127058210 -0700
+++ b/net/ipv4/ipip.c	2012-09-24 18:00:28.410430210 -0700
@@ -365,8 +365,6 @@ static int ipip_err(struct sk_buff *skb,
 	}
 
 	err = -ENOENT;
-
-	rcu_read_lock();
 	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
 	if (t == NULL)
 		goto out;
@@ -398,7 +396,7 @@ static int ipip_err(struct sk_buff *skb,
 		t->err_count = 1;
 	t->err_time = jiffies;
 out:
-	rcu_read_unlock();
+
 	return err;
 }
 
@@ -416,13 +414,11 @@ static int ipip_rcv(struct sk_buff *skb)
 	struct ip_tunnel *tunnel;
 	const struct iphdr *iph = ip_hdr(skb);
 
-	rcu_read_lock();
 	tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
 	if (tunnel != NULL) {
 		struct pcpu_tstats *tstats;
 
 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-			rcu_read_unlock();
 			kfree_skb(skb);
 			return 0;
 		}
@@ -445,11 +441,8 @@ static int ipip_rcv(struct sk_buff *skb)
 		ipip_ecn_decapsulate(iph, skb);
 
 		netif_rx(skb);
-
-		rcu_read_unlock();
 		return 0;
 	}
-	rcu_read_unlock();
 
 	return -1;
 }
--- a/net/ipv6/sit.c	2012-08-15 08:59:22.986703941 -0700
+++ b/net/ipv6/sit.c	2012-09-24 18:02:28.753216804 -0700
@@ -545,7 +545,6 @@ static int ipip6_err(struct sk_buff *skb
 
 	err = -ENOENT;
 
-	rcu_read_lock();
 	t = ipip6_tunnel_lookup(dev_net(skb->dev),
 				skb->dev,
 				iph->daddr,
@@ -579,7 +578,6 @@ static int ipip6_err(struct sk_buff *skb
 		t->err_count = 1;
 	t->err_time = jiffies;
 out:
-	rcu_read_unlock();
 	return err;
 }
 
@@ -599,7 +597,6 @@ static int ipip6_rcv(struct sk_buff *skb
 
 	iph = ip_hdr(skb);
 
-	rcu_read_lock();
 	tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
 				     iph->saddr, iph->daddr);
 	if (tunnel != NULL) {
@@ -615,7 +612,6 @@ static int ipip6_rcv(struct sk_buff *skb
 		if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
 		    !isatap_chksrc(skb, iph, tunnel)) {
 			tunnel->dev->stats.rx_errors++;
-			rcu_read_unlock();
 			kfree_skb(skb);
 			return 0;
 		}
@@ -630,12 +626,10 @@ static int ipip6_rcv(struct sk_buff *skb
 
 		netif_rx(skb);
 
-		rcu_read_unlock();
 		return 0;
 	}
 
 	/* no tunnel matched,  let upstream know, ipsec may handle it */
-	rcu_read_unlock();
 	return 1;
 out:
 	kfree_skb(skb);

^ permalink raw reply

* [PATCH net-next 0/4] Tunnel related patches
From: Stephen Hemminger @ 2012-09-25  4:12 UTC (permalink / raw)
  To: davem; +Cc: netdev

These are a set of small tunnel related patches.

^ permalink raw reply

* [PATCH net-next 2/4] gre: remove unnecessary rcu_read_lock/unlock
From: Stephen Hemminger @ 2012-09-25  4:12 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <20120925041222.056704869@vyatta.com>

[-- Attachment #1: gre-rcu.patch --]
[-- Type: text/plain, Size: 4043 bytes --]

The gre function pointers for receive and error handling are
always called (from gre.c) with rcu_read_lock already held.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>


---
 net/ipv4/ip_gre.c  |   19 ++++++-------------
 net/ipv6/ip6_gre.c |   10 +---------
 2 files changed, 7 insertions(+), 22 deletions(-)

--- a/net/ipv6/ip6_gre.c	2012-09-24 17:33:22.766821472 -0700
+++ b/net/ipv6/ip6_gre.c	2012-09-24 18:12:00.783449055 -0700
@@ -437,14 +437,12 @@ static void ip6gre_err(struct sk_buff *s
 	ipv6h = (const struct ipv6hdr *)skb->data;
 	p = (__be16 *)(skb->data + offset);
 
-	rcu_read_lock();
-
 	t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
 				flags & GRE_KEY ?
 				*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
 				p[1]);
 	if (t == NULL)
-		goto out;
+		return;
 
 	switch (type) {
 		__u32 teli;
@@ -489,8 +487,6 @@ static void ip6gre_err(struct sk_buff *s
 	else
 		t->err_count = 1;
 	t->err_time = jiffies;
-out:
-	rcu_read_unlock();
 }
 
 static inline void ip6gre_ecn_decapsulate_ipv4(const struct ip6_tnl *t,
@@ -528,7 +524,7 @@ static int ip6gre_rcv(struct sk_buff *sk
 	__be16 gre_proto;
 
 	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
-		goto drop_nolock;
+		goto drop;
 
 	ipv6h = ipv6_hdr(skb);
 	h = skb->data;
@@ -539,7 +535,7 @@ static int ip6gre_rcv(struct sk_buff *sk
 		   - We do not support routing headers.
 		 */
 		if (flags&(GRE_VERSION|GRE_ROUTING))
-			goto drop_nolock;
+			goto drop;
 
 		if (flags&GRE_CSUM) {
 			switch (skb->ip_summed) {
@@ -567,7 +563,6 @@ static int ip6gre_rcv(struct sk_buff *sk
 
 	gre_proto = *(__be16 *)(h + 2);
 
-	rcu_read_lock();
 	tunnel = ip6gre_tunnel_lookup(skb->dev,
 					  &ipv6h->saddr, &ipv6h->daddr, key,
 					  gre_proto);
@@ -646,14 +641,11 @@ static int ip6gre_rcv(struct sk_buff *sk
 
 		netif_rx(skb);
 
-		rcu_read_unlock();
 		return 0;
 	}
 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 
 drop:
-	rcu_read_unlock();
-drop_nolock:
 	kfree_skb(skb);
 	return 0;
 }
--- a/net/ipv4/ip_gre.c	2012-09-24 17:33:33.298715280 -0700
+++ b/net/ipv4/ip_gre.c	2012-09-24 18:11:52.383533752 -0700
@@ -557,37 +557,34 @@ static void ipgre_err(struct sk_buff *sk
 		break;
 	}
 
-	rcu_read_lock();
 	t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
 				flags, key, p[1]);
 
 	if (t == NULL)
-		goto out;
+		return;
 
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 				 t->parms.link, 0, IPPROTO_GRE, 0);
-		goto out;
+		return;
 	}
 	if (type == ICMP_REDIRECT) {
 		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
 			      IPPROTO_GRE, 0);
-		goto out;
+		return;
 	}
 	if (t->parms.iph.daddr == 0 ||
 	    ipv4_is_multicast(t->parms.iph.daddr))
-		goto out;
+		return;
 
 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
-		goto out;
+		return;
 
 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 		t->err_count++;
 	else
 		t->err_count = 1;
 	t->err_time = jiffies;
-out:
-	rcu_read_unlock();
 }
 
 static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
@@ -625,7 +622,7 @@ static int ipgre_rcv(struct sk_buff *skb
 	__be16 gre_proto;
 
 	if (!pskb_may_pull(skb, 16))
-		goto drop_nolock;
+		goto drop;
 
 	iph = ip_hdr(skb);
 	h = skb->data;
@@ -636,7 +633,7 @@ static int ipgre_rcv(struct sk_buff *skb
 		   - We do not support routing headers.
 		 */
 		if (flags&(GRE_VERSION|GRE_ROUTING))
-			goto drop_nolock;
+			goto drop;
 
 		if (flags&GRE_CSUM) {
 			switch (skb->ip_summed) {
@@ -664,7 +661,6 @@ static int ipgre_rcv(struct sk_buff *skb
 
 	gre_proto = *(__be16 *)(h + 2);
 
-	rcu_read_lock();
 	tunnel = ipgre_tunnel_lookup(skb->dev,
 				     iph->saddr, iph->daddr, flags, key,
 				     gre_proto);
@@ -740,14 +736,11 @@ static int ipgre_rcv(struct sk_buff *skb
 
 		netif_rx(skb);
 
-		rcu_read_unlock();
 		return 0;
 	}
 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 
 drop:
-	rcu_read_unlock();
-drop_nolock:
 	kfree_skb(skb);
 	return 0;
 }

^ permalink raw reply

* [PATCH net-next 1/4] gre: fix handling of key 0
From: Stephen Hemminger @ 2012-09-25  4:12 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <20120925041222.056704869@vyatta.com>

[-- Attachment #1: gre-key0.patch --]
[-- Type: text/plain, Size: 3885 bytes --]

GRE driver incorrectly uses zero as a flag value. Zero is a perfectly
valid value for key, and the tunnel should match packets with no key only
with tunnels created without key, and vice versa.

This is a slightly visible  change since previously it might be possible to
construct a working tunnel that sent key 0 and received only because
of the key wildcard of zero.  I.e the sender sent key of zero, but tunnel
was defined without key.

Note: using gre key 0 requires iproute2 utilities v3.2 or later.
The original utility code was broken as well.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>


--- a/net/ipv4/ip_gre.c	2012-08-15 08:59:22.958704223 -0700
+++ b/net/ipv4/ip_gre.c	2012-09-12 09:40:04.420959235 -0700
@@ -214,11 +214,25 @@ static struct rtnl_link_stats64 *ipgre_g
 	return tot;
 }
 
+/* Does key in tunnel parameters match packet */
+static bool ipgre_key_match(const struct ip_tunnel_parm *p,
+			    __u32 flags, __be32 key)
+{
+	if (p->i_flags & GRE_KEY) {
+		if (flags & GRE_KEY)
+			return key == p->i_key;
+		else
+			return false;	/* key expected, none present */
+	} else
+		return !(flags & GRE_KEY);
+}
+
 /* Given src, dst and key, find appropriate for input tunnel. */
 
 static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
 					     __be32 remote, __be32 local,
-					     __be32 key, __be16 gre_proto)
+					     __u32 flags, __be32 key,
+					     __be16 gre_proto)
 {
 	struct net *net = dev_net(dev);
 	int link = dev->ifindex;
@@ -233,10 +247,12 @@ static struct ip_tunnel *ipgre_tunnel_lo
 	for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
 		if (local != t->parms.iph.saddr ||
 		    remote != t->parms.iph.daddr ||
-		    key != t->parms.i_key ||
 		    !(t->dev->flags & IFF_UP))
 			continue;
 
+		if (!ipgre_key_match(&t->parms, flags, key))
+			continue;
+
 		if (t->dev->type != ARPHRD_IPGRE &&
 		    t->dev->type != dev_type)
 			continue;
@@ -257,10 +273,12 @@ static struct ip_tunnel *ipgre_tunnel_lo
 
 	for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
 		if (remote != t->parms.iph.daddr ||
-		    key != t->parms.i_key ||
 		    !(t->dev->flags & IFF_UP))
 			continue;
 
+		if (!ipgre_key_match(&t->parms, flags, key))
+			continue;
+
 		if (t->dev->type != ARPHRD_IPGRE &&
 		    t->dev->type != dev_type)
 			continue;
@@ -283,10 +301,12 @@ static struct ip_tunnel *ipgre_tunnel_lo
 		if ((local != t->parms.iph.saddr &&
 		     (local != t->parms.iph.daddr ||
 		      !ipv4_is_multicast(local))) ||
-		    key != t->parms.i_key ||
 		    !(t->dev->flags & IFF_UP))
 			continue;
 
+		if (!ipgre_key_match(&t->parms, flags, key))
+			continue;
+
 		if (t->dev->type != ARPHRD_IPGRE &&
 		    t->dev->type != dev_type)
 			continue;
@@ -489,6 +509,7 @@ static void ipgre_err(struct sk_buff *sk
 	const int code = icmp_hdr(skb)->code;
 	struct ip_tunnel *t;
 	__be16 flags;
+	__be32 key = 0;
 
 	flags = p[0];
 	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
@@ -505,6 +526,9 @@ static void ipgre_err(struct sk_buff *sk
 	if (skb_headlen(skb) < grehlen)
 		return;
 
+	if (flags & GRE_KEY)
+		key = *(((__be32 *)p) + (grehlen / 4) - 1);
+
 	switch (type) {
 	default:
 	case ICMP_PARAMETERPROB:
@@ -535,9 +559,8 @@ static void ipgre_err(struct sk_buff *sk
 
 	rcu_read_lock();
 	t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
-				flags & GRE_KEY ?
-				*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
-				p[1]);
+				flags, key, p[1]);
+
 	if (t == NULL)
 		goto out;
 
@@ -642,9 +665,10 @@ static int ipgre_rcv(struct sk_buff *skb
 	gre_proto = *(__be16 *)(h + 2);
 
 	rcu_read_lock();
-	if ((tunnel = ipgre_tunnel_lookup(skb->dev,
-					  iph->saddr, iph->daddr, key,
-					  gre_proto))) {
+	tunnel = ipgre_tunnel_lookup(skb->dev,
+				     iph->saddr, iph->daddr, flags, key,
+				     gre_proto);
+	if (tunnel) {
 		struct pcpu_tstats *tstats;
 
 		secpath_reset(skb);

^ permalink raw reply

* [PATCH] inet_diag: fix panic when unload inet_diag
From: Gao feng @ 2012-09-25  2:48 UTC (permalink / raw)
  To: davem; +Cc: stephen.hemminger, jengelh, eric.dumazet, kuznet, netdev,
	Gao feng

when inet_diag being compiled as module, inet_diag_handler_dump
set netlink_dump_control.dump to inet_diag_dump,so if module
inet_diag is unloaded,netlink will still try to call this function
in netlink_dump. this will cause kernel panic.

fix this by adding a reference of inet_diag module before
setting netlink_callback, and release this reference in
netlink_callback.done.

Thanks for all help from Stephen,Jan and Eric.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 net/ipv4/inet_diag.c |   46 ++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 570e61f..e573090 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -903,6 +903,12 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh), bc);
 }
 
+static int inet_diag_done(struct netlink_callback *cb)
+{
+	module_put(THIS_MODULE);
+	return 0;
+}
+
 static inline int inet_diag_type2proto(int type)
 {
 	switch (type) {
@@ -972,8 +978,26 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 		{
 			struct netlink_dump_control c = {
 				.dump = inet_diag_dump_compat,
+				.done = inet_diag_done,
 			};
-			return netlink_dump_start(net->diag_nlsk, skb, nlh, &c);
+			int err;
+			/*
+			 * netlink_dump will call inet_diag_dump_compat,
+			 * so we need a reference of THIS_MODULE.
+			 */
+			if (!try_module_get(THIS_MODULE))
+				return -EPROTONOSUPPORT;
+
+			err = netlink_dump_start(net->diag_nlsk, skb, nlh, &c);
+
+			if ((err != -EINTR) && (err != -ENOBUFS)) {
+				/*
+				 * netlink_callback set failed, release the
+				 * referenct of THIS_MODULE.
+				 */
+				module_put(THIS_MODULE);
+			}
+			return err;
 		}
 	}
 
@@ -1001,8 +1025,26 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 		{
 			struct netlink_dump_control c = {
 				.dump = inet_diag_dump,
+				.done = inet_diag_done,
 			};
-			return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+			int err;
+			/*
+			 * netlink_dump will call inet_diag_dump,
+			 * so we need a reference of THIS_MODULE.
+			 */
+			if (!try_module_get(THIS_MODULE))
+				return -EPROTONOSUPPORT;
+
+			err = netlink_dump_start(net->diag_nlsk, skb, h, &c);
+
+			if ((err != -EINTR) && (err != -ENOBUFS)) {
+				/*
+				 * netlink_callback set failed, release the
+				 * referenct of THIS_MODULE.
+				 */
+				module_put(THIS_MODULE);
+			}
+			return err;
 		}
 	}
 
-- 
1.7.7

^ permalink raw reply related

* IGMP snooping problem
From: Lin Ming @ 2012-09-25  2:59 UTC (permalink / raw)
  To: Herbert Xu; +Cc: networking

Hi,

I'm testing IGMP snooping on a router which has 4 LAN ports.

# brctl show
bridge name     bridge id                      STP enabled     interfaces
br0                  8000.00037fbef050       yes                   eth0

          eth1

          eth2

          eth3

          ath0

          ath1

          ath2

          ath3

ath0, 1, 2, 3 are WIFI.
IGMP snooping works well with this configuration.

The router supports a "switch mode", namely the 4 LAN ports connected
to a on-board switch.
In this mode, there is only a eth0, no eth1,2,3.
One benefit of this mode is that the traffic between LAN ports don't
need to go to CPU.
The LAN ports traffic will be handled by hardware switch directly.

# brctl show
bridge name     bridge id                      STP enabled     interfaces
br0                  8000.00037fbef050       yes                   eth0

          ath0

          ath1

          ath2

          ath3

In this mode, the IGMP snooping among the 4 LAN ports won't work.

Any idea how to resolve this problem?

Thanks,
Lin Ming

^ permalink raw reply

* linux-next: manual merge of the net-next tree with the net tree
From: Stephen Rothwell @ 2012-09-25  2:34 UTC (permalink / raw)
  To: David Miller, netdev; +Cc: linux-next, linux-kernel, Eric Dumazet

[-- Attachment #1: Type: text/plain, Size: 590 bytes --]

Hi all,

Today's linux-next merge of the net-next tree got a conflict in
net/ipv4/raw.c between commit ab43ed8b7490 ("ipv4: raw: fix icmp_filter
()") from the net tree and commit 5640f7685831 ("net: use a per task frag
allocator") from the net-next tree.

They are basically the same patch (for this file) except the net-next
version adds two pr_err() calls. I used the net-next version and can carry
the fix as necessary (no action is required).

I do wonder if this change belongs in the net-next patch?
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH] inet_diag: make config INET_DIAG bool
From: Gao feng @ 2012-09-25  2:18 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Jan Engelhardt, Stephen Hemminger, netdev, davem, kuznet
In-Reply-To: <1348486346.26828.511.camel@edumazet-glaptop>

于 2012年09月24日 19:32, Eric Dumazet 写道:
> On Mon, 2012-09-24 at 18:17 +0800, Gao feng wrote:
>> 于 2012年09月24日 17:42, Eric Dumazet 写道:
>>> In fact I didnt fully understand the problem you try to address.
>>>
>>> If you want to prevent module being unloaded, you need to add proper
>>> module_get()/module_put()
>>>
>>> So I would add a "struct module *module;" in struct sock_diag_handler
>>> and use it appropriately.
>>
>> Yes, I try to add reference of the module,but I can't find a proper
>> location to call module_get and module_put.
>>
>> module_get should be called when userspace program use netlink to
>> send dump request to the kernel,and module_put should be called when
>> the dump is completed. I am right?
>>
>> BUT the userspace program may only call netlink_sendmsg without call
>> netlink_recvmsg.so the reference of the module will be incorrect.
> 
> check ->dump() and ->done() methods
> 

I miss that cb->done will be called when netlink sock being destructed.
so add a reference of the inet_diag module is doable.

I will send a v2 patch.

Thanks!

^ permalink raw reply

* [PATCH 2/2] Fix a typo in PTP_1588_CLOCK_PCH Kconfig help info.
From: Haicheng Li @ 2012-09-25  0:24 UTC (permalink / raw)
  To: David S. Miller, netdev
  Cc: Takahiroi Shimizu, linux-kernel@vger.kernel.org, haicheng.lee
In-Reply-To: <50600A49.7040902@linux.intel.com>

 From 5911413366d37aafcc19ddfc9c0f2db31855431e Mon Sep 17 00:00:00 2001
From: Haicheng Li <haicheng.li@linux.intel.com>
Date: Mon, 24 Sep 2012 15:55:27 +0800
Subject: [PATCH 2/2] Fix a typo in PTP_1588_CLOCK_PCH Kconfig help info.

Signed-off-by: Haicheng Li <haicheng.lee@gmail.com>
---
  drivers/ptp/Kconfig |    2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index ffdf712..82c4a26 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -87,6 +87,6 @@ config PTP_1588_CLOCK_PCH
  	  SO_TIMESTAMPING API.

  	  To compile this driver as a module, choose M here: the module
-	  will be called ptp_pch.
+	  will be called by pch_ptp.

  endmenu
-- 
1.7.1

^ permalink raw reply related

* [PATCH 1/2] Fix build error caused by broken PCH_PTP module dependency.
From: Haicheng Li @ 2012-09-25  0:23 UTC (permalink / raw)
  To: Haicheng Li, David S. Miller, netdev
  Cc: Takahiroi Shimizu, linux-kernel@vger.kernel.org, haicheng.lee
In-Reply-To: <506009A4.8000202@linux.intel.com>

 From 898e3214b3406c620571cedf704719784b0df049 Mon Sep 17 00:00:00 2001
From: Haicheng Li <haicheng.li@linux.intel.com>
Date: Mon, 24 Sep 2012 15:52:30 +0800
Subject: [PATCH 1/2] Fix build error caused by broken PCH_PTP module dependency.

The .config is:
         CONFIG_PCH_GBE=y
         CONFIG_PCH_PTP=y
         CONFIG_PTP_1588_CLOCK=m

The build error:

drivers/built-in.o: In function `pch_tx_timestamp':
.../pch_gbe_main.c:215: undefined reference to `pch_ch_event_read'
.../pch_gbe_main.c:225: undefined reference to `pch_tx_snap_read'
.../pch_gbe_main.c:231: undefined reference to `pch_ch_event_write'

.../pch_gbe_main.c:170: undefined reference to `pch_ch_event_read'
.../pch_gbe_main.c:175: undefined reference to `pch_src_uuid_lo_read'
.../pch_gbe_main.c:176: undefined reference to `pch_src_uuid_hi_read'
.../pch_gbe_main.c:190: undefined reference to `pch_ch_event_write'
.../pch_gbe_main.c:184: undefined reference to `pch_rx_snap_read'

.../pch_gbe_main.c:267: undefined reference to `pch_ch_control_write'
.../pch_gbe_main.c:271: undefined reference to `pch_ch_control_write'
.../pch_gbe_main.c:275: undefined reference to `pch_ch_control_write'
.../pch_gbe_main.c:281: undefined reference to `pch_ch_control_write'
.../pch_gbe_main.c:283: undefined reference to `pch_set_station_address'
.../pch_gbe_main.c:290: undefined reference to `pch_ch_event_write'

Signed-off-by: Haicheng Li <haicheng.lee@gmail.com>
---
  drivers/net/ethernet/oki-semi/pch_gbe/Kconfig |    2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig 
b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
index bce0164..6c8aed4 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
@@ -26,7 +26,7 @@ if PCH_GBE
  config PCH_PTP
  	bool "PCH PTP clock support"
  	default n
-	depends on PTP_1588_CLOCK_PCH
+	depends on PTP_1588_CLOCK_PCH = PCH_GBE
  	---help---
  	  Say Y here if you want to use Precision Time Protocol (PTP) in the
  	  driver. PTP is a method to precisely synchronize distributed clocks
-- 
1.7.1

^ permalink raw reply related

* [PATCH 10/26 v3] rdma/cm: Add support for AF_IB to rdma_resolve_addr
From: Hefty, Sean @ 2012-09-24 23:55 UTC (permalink / raw)
  To: linux-rdma (linux-rdma@vger.kernel.org), netdev@vger.kernel.org

Allow the user to specify the remote address using AF_IB format.
When AF_IB is used, the remote address simply needs to be recorded,
and no resolution using ARP is done.  The local address may still
need to be matched with a local IB device.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
---
resending with netdev copied

 drivers/infiniband/core/cma.c |  106 +++++++++++++++++++++++++++++++++++++++--
 1 files changed, 100 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index fa4ac4f..9d37c2f 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -433,6 +433,61 @@ out:
 	return ret;
 }
 
+/*
+ * Select the source IB device and address to reach the destination IB address.
+ */
+static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
+{
+	struct cma_device *cma_dev, *cur_dev;
+	struct sockaddr_ib *addr;
+	union ib_gid gid, sgid, *dgid;
+	u16 pkey, index;
+	u8 port, p;
+	int i;
+
+	cma_dev = NULL;
+	addr = (struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr;
+	dgid = (union ib_gid *) &addr->sib_addr;
+	pkey = ntohs(addr->sib_pkey);
+
+	list_for_each_entry(cur_dev, &dev_list, list) {
+		if (rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB)
+			continue;
+
+		for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+			if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
+				continue;
+
+			for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) {
+				if (!memcmp(&gid, dgid, sizeof(gid))) {
+					cma_dev = cur_dev;
+					sgid = gid;
+					port = p;
+					goto found;
+				}
+
+				if (!cma_dev && (gid.global.subnet_prefix ==
+						 dgid->global.subnet_prefix)) {
+					cma_dev = cur_dev;
+					sgid = gid;
+					port = p;
+				}
+			}
+		}
+	}
+
+	if (!cma_dev)
+		return -ENODEV;
+
+found:
+	cma_attach_to_dev(id_priv, cma_dev);
+	id_priv->id.port_num = port;
+	addr = (struct sockaddr_ib *) &id_priv->id.route.addr.src_addr;
+	memcpy(&addr->sib_addr, &sgid, sizeof sgid);
+	cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
+	return 0;
+}
+
 static void cma_deref_id(struct rdma_id_private *id_priv)
 {
 	if (atomic_dec_and_test(&id_priv->refcount))
@@ -2091,14 +2146,48 @@ err:
 	return ret;
 }
 
+static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
+{
+	struct cma_work *work;
+	int ret;
+
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (!work)
+		return -ENOMEM;
+
+	if (!id_priv->cma_dev) {
+		ret = cma_resolve_ib_dev(id_priv);
+		if (ret)
+			goto err;
+	}
+
+	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
+		&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
+
+	work->id = id_priv;
+	INIT_WORK(&work->work, cma_work_handler);
+	work->old_state = RDMA_CM_ADDR_QUERY;
+	work->new_state = RDMA_CM_ADDR_RESOLVED;
+	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+	queue_work(cma_wq, &work->work);
+	return 0;
+err:
+	kfree(work);
+	return ret;
+}
+
 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 			 struct sockaddr *dst_addr)
 {
 	if (!src_addr || !src_addr->sa_family) {
 		src_addr = (struct sockaddr *) &id->route.addr.src_addr;
-		if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
+		src_addr->sa_family = dst_addr->sa_family;
+		if (dst_addr->sa_family == AF_INET6) {
 			((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
 				((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
+		} else if (dst_addr->sa_family == AF_IB) {
+			((struct sockaddr_ib *) src_addr)->sib_pkey =
+				((struct sockaddr_ib *) dst_addr)->sib_pkey;
 		}
 	}
 	return rdma_bind_addr(id, src_addr);
@@ -2125,12 +2214,17 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 
 	atomic_inc(&id_priv->refcount);
 	memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
-	if (cma_any_addr(dst_addr))
+	if (cma_any_addr(dst_addr)) {
 		ret = cma_resolve_loopback(id_priv);
-	else
-		ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
-				      dst_addr, &id->route.addr.dev_addr,
-				      timeout_ms, addr_handler, id_priv);
+	} else {
+		if (dst_addr->sa_family == AF_IB) {
+			ret = cma_resolve_ib_addr(id_priv);
+		} else {
+			ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
+					      dst_addr, &id->route.addr.dev_addr,
+					      timeout_ms, addr_handler, id_priv);
+		}
+	}
 	if (ret)
 		goto err;
 

^ permalink raw reply related

* [PATCH 23/26 v3] rdma/ucm: Allow user space to pass AF_IB into resolve
From: Hefty, Sean @ 2012-09-24 23:55 UTC (permalink / raw)
  To: linux-rdma (linux-rdma@vger.kernel.org), netdev@vger.kernel.org

Allow user space applications to call resolve_addr using
AF_IB.  To support sockaddr_ib, we need to define a new
structure capable of handling the larger address size.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
---
resending with netdev copied

 drivers/infiniband/core/ucma.c |   30 +++++++++++++++++++++++++++++-
 include/rdma/rdma_user_cm.h    |   13 ++++++++++++-
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index d23bc1bf..ecbe404 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -594,6 +594,33 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file,
 	return ret;
 }
 
+static ssize_t ucma_resolve_addr(struct ucma_file *file,
+				 const char __user *inbuf,
+				 int in_len, int out_len)
+{
+	struct rdma_ucm_resolve_addr cmd;
+	struct sockaddr *src, *dst;
+	struct ucma_context *ctx;
+	int ret;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	src = (struct sockaddr *) &cmd.src_addr;
+	dst = (struct sockaddr *) &cmd.dst_addr;
+	if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) ||
+	    !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst)))
+		return -EINVAL;
+
+	ctx = ucma_get_ctx(file, cmd.id);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms);
+	ucma_put_ctx(ctx);
+	return ret;
+}
+
 static ssize_t ucma_resolve_route(struct ucma_file *file,
 				  const char __user *inbuf,
 				  int in_len, int out_len)
@@ -1438,7 +1465,8 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
 	[RDMA_USER_CM_CMD_LEAVE_MCAST]	 = ucma_leave_multicast,
 	[RDMA_USER_CM_CMD_MIGRATE_ID]	 = ucma_migrate_id,
 	[RDMA_USER_CM_CMD_QUERY]	 = ucma_query,
-	[RDMA_USER_CM_CMD_BIND]		 = ucma_bind
+	[RDMA_USER_CM_CMD_BIND]		 = ucma_bind,
+	[RDMA_USER_CM_CMD_RESOLVE_ADDR]	 = ucma_resolve_addr
 };
 
 static ssize_t ucma_write(struct file *filp, const char __user *buf,
diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h
index cae3e27..b731b5e 100644
--- a/include/rdma/rdma_user_cm.h
+++ b/include/rdma/rdma_user_cm.h
@@ -63,7 +63,8 @@ enum {
 	RDMA_USER_CM_CMD_LEAVE_MCAST,
 	RDMA_USER_CM_CMD_MIGRATE_ID,
 	RDMA_USER_CM_CMD_QUERY,
-	RDMA_USER_CM_CMD_BIND
+	RDMA_USER_CM_CMD_BIND,
+	RDMA_USER_CM_CMD_RESOLVE_ADDR
 };
 
 /*
@@ -117,6 +118,16 @@ struct rdma_ucm_resolve_ip {
 	__u32 timeout_ms;
 };
 
+struct rdma_ucm_resolve_addr {
+	__u32 id;
+	__u32 timeout_ms;
+	__u16 src_size;
+	__u16 dst_size;
+	__u32 reserved;
+	struct sockaddr_storage src_addr;
+	struct sockaddr_storage dst_addr;
+};
+
 struct rdma_ucm_resolve_route {
 	__u32 id;
 	__u32 timeout_ms;

^ permalink raw reply related

* [PATCH 13/26 v3] rdma/cm: Expose private data when using AF_IB
From: Hefty, Sean @ 2012-09-24 23:55 UTC (permalink / raw)
  To: linux-rdma (linux-rdma@vger.kernel.org), netdev@vger.kernel.org

If the source or destination address is AF_IB, then do not
reserve a portion of the private data in the IB CM REQ or SIDR
REQ messages for the cma header.  Instead, all private data
should be exported to the user.  When AF_IB is used, the
rdma cm does not have sufficient information to fill in the
cma header.  Additionally, this will be necessary to support
any IB connection through the rdma cm interface,

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
---
resending with netdev copied

 drivers/infiniband/core/cma.c |   24 ++++++++++--------------
 1 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 4e205db..1264e97 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -907,14 +907,10 @@ static void cma_save_net_info(struct rdma_addr *addr,
 	}
 }
 
-static inline int cma_user_data_offset(enum rdma_port_space ps)
+static inline int cma_user_data_offset(struct rdma_id_private *id_priv)
 {
-	switch (ps) {
-	case RDMA_PS_SDP:
-		return 0;
-	default:
-		return sizeof(struct cma_hdr);
-	}
+	return (cma_family(id_priv) == AF_IB || id_priv->id.ps == RDMA_PS_SDP) ?
+		0 : sizeof(struct cma_hdr);
 }
 
 static void cma_cancel_route(struct rdma_id_private *id_priv)
@@ -1310,7 +1306,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		return -ECONNABORTED;
 
 	memset(&event, 0, sizeof event);
-	offset = cma_user_data_offset(listen_id->id.ps);
+	offset = cma_user_data_offset(listen_id);
 	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
 	if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
 		conn_id = cma_new_udp_id(&listen_id->id, ib_event);
@@ -2637,7 +2633,7 @@ static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
 			cma_hdr->port = src4->sin_port;
 			break;
 		}
-	} else {
+	} else if (cma_family(id_priv) == AF_INET6) {
 		struct sockaddr_in6 *src6, *dst6;
 
 		src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
@@ -2735,10 +2731,10 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
 {
 	struct ib_cm_sidr_req_param req;
 	struct ib_cm_id	*id;
-	int ret;
+	int offset, ret;
 
-	req.private_data_len = sizeof(struct cma_hdr) +
-			       conn_param->private_data_len;
+	offset = cma_user_data_offset(id_priv);
+	req.private_data_len = offset + conn_param->private_data_len;
 	if (req.private_data_len < conn_param->private_data_len)
 		return -EINVAL;
 
@@ -2747,7 +2743,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
 		return -ENOMEM;
 
 	if (conn_param->private_data && conn_param->private_data_len)
-		memcpy((void *) req.private_data + sizeof(struct cma_hdr),
+		memcpy((void *) req.private_data + offset,
 		       conn_param->private_data, conn_param->private_data_len);
 
 	ret = cma_format_hdr((void *) req.private_data, id_priv);
@@ -2787,7 +2783,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
 	int offset, ret;
 
 	memset(&req, 0, sizeof req);
-	offset = cma_user_data_offset(id_priv->id.ps);
+	offset = cma_user_data_offset(id_priv);
 	req.private_data_len = offset + conn_param->private_data_len;
 	if (req.private_data_len < conn_param->private_data_len)
 		return -EINVAL;

^ permalink raw reply related

* [PATCH 21/26 v3] rdma/ucm: Name changes to indicate only IP addresses supported
From: Hefty, Sean @ 2012-09-24 23:55 UTC (permalink / raw)
  To: linux-rdma (linux-rdma@vger.kernel.org), netdev@vger.kernel.org

Several commands into the RDMA CM from user space are
restricted to supporting addresses which fit into a sockaddr_in6
structure: bind address, resolve address, and join multicast.

With the addition of AF_IB, we need to support addresses
which are larger than sockaddr_in6.  This will be done by
adding new commands that exchange address information using
sockaddr_storage.  However, to support existing applications,
we maintain the current commands and structures, but rename
them to indicate that they only support IPv4 and v6 addresses.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
---
resending with netdev copied

 drivers/infiniband/core/ucma.c |   60 ++++++++++++++++++++--------------------
 include/rdma/rdma_user_cm.h    |   12 ++++----
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 199b076..6fc36ba 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -529,10 +529,10 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
 	return ret;
 }
 
-static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf,
+static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
 			      int in_len, int out_len)
 {
-	struct rdma_ucm_bind_addr cmd;
+	struct rdma_ucm_bind_ip cmd;
 	struct ucma_context *ctx;
 	int ret;
 
@@ -548,11 +548,11 @@ static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf,
 	return ret;
 }
 
-static ssize_t ucma_resolve_addr(struct ucma_file *file,
-				 const char __user *inbuf,
-				 int in_len, int out_len)
+static ssize_t ucma_resolve_ip(struct ucma_file *file,
+			       const char __user *inbuf,
+			       int in_len, int out_len)
 {
-	struct rdma_ucm_resolve_addr cmd;
+	struct rdma_ucm_resolve_ip cmd;
 	struct ucma_context *ctx;
 	int ret;
 
@@ -1193,11 +1193,11 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
 	return ret;
 }
 
-static ssize_t ucma_join_multicast(struct ucma_file *file,
-				   const char __user *inbuf,
-				   int in_len, int out_len)
+static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
+				      const char __user *inbuf,
+				      int in_len, int out_len)
 {
-	struct rdma_ucm_join_mcast cmd;
+	struct rdma_ucm_join_ip_mcast cmd;
 	struct rdma_ucm_create_id_resp resp;
 	struct ucma_context *ctx;
 	struct ucma_multicast *mc;
@@ -1394,26 +1394,26 @@ file_put:
 static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
 				   const char __user *inbuf,
 				   int in_len, int out_len) = {
-	[RDMA_USER_CM_CMD_CREATE_ID]	= ucma_create_id,
-	[RDMA_USER_CM_CMD_DESTROY_ID]	= ucma_destroy_id,
-	[RDMA_USER_CM_CMD_BIND_ADDR]	= ucma_bind_addr,
-	[RDMA_USER_CM_CMD_RESOLVE_ADDR]	= ucma_resolve_addr,
-	[RDMA_USER_CM_CMD_RESOLVE_ROUTE]= ucma_resolve_route,
-	[RDMA_USER_CM_CMD_QUERY_ROUTE]	= ucma_query_route,
-	[RDMA_USER_CM_CMD_CONNECT]	= ucma_connect,
-	[RDMA_USER_CM_CMD_LISTEN]	= ucma_listen,
-	[RDMA_USER_CM_CMD_ACCEPT]	= ucma_accept,
-	[RDMA_USER_CM_CMD_REJECT]	= ucma_reject,
-	[RDMA_USER_CM_CMD_DISCONNECT]	= ucma_disconnect,
-	[RDMA_USER_CM_CMD_INIT_QP_ATTR]	= ucma_init_qp_attr,
-	[RDMA_USER_CM_CMD_GET_EVENT]	= ucma_get_event,
-	[RDMA_USER_CM_CMD_GET_OPTION]	= NULL,
-	[RDMA_USER_CM_CMD_SET_OPTION]	= ucma_set_option,
-	[RDMA_USER_CM_CMD_NOTIFY]	= ucma_notify,
-	[RDMA_USER_CM_CMD_JOIN_MCAST]	= ucma_join_multicast,
-	[RDMA_USER_CM_CMD_LEAVE_MCAST]	= ucma_leave_multicast,
-	[RDMA_USER_CM_CMD_MIGRATE_ID]	= ucma_migrate_id,
-	[RDMA_USER_CM_CMD_QUERY]	= ucma_query
+	[RDMA_USER_CM_CMD_CREATE_ID] 	 = ucma_create_id,
+	[RDMA_USER_CM_CMD_DESTROY_ID]	 = ucma_destroy_id,
+	[RDMA_USER_CM_CMD_BIND_IP]	 = ucma_bind_ip,
+	[RDMA_USER_CM_CMD_RESOLVE_IP]	 = ucma_resolve_ip,
+	[RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route,
+	[RDMA_USER_CM_CMD_QUERY_ROUTE]	 = ucma_query_route,
+	[RDMA_USER_CM_CMD_CONNECT]	 = ucma_connect,
+	[RDMA_USER_CM_CMD_LISTEN]	 = ucma_listen,
+	[RDMA_USER_CM_CMD_ACCEPT]	 = ucma_accept,
+	[RDMA_USER_CM_CMD_REJECT]	 = ucma_reject,
+	[RDMA_USER_CM_CMD_DISCONNECT]	 = ucma_disconnect,
+	[RDMA_USER_CM_CMD_INIT_QP_ATTR]	 = ucma_init_qp_attr,
+	[RDMA_USER_CM_CMD_GET_EVENT]	 = ucma_get_event,
+	[RDMA_USER_CM_CMD_GET_OPTION]	 = NULL,
+	[RDMA_USER_CM_CMD_SET_OPTION]	 = ucma_set_option,
+	[RDMA_USER_CM_CMD_NOTIFY]	 = ucma_notify,
+	[RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast,
+	[RDMA_USER_CM_CMD_LEAVE_MCAST]	 = ucma_leave_multicast,
+	[RDMA_USER_CM_CMD_MIGRATE_ID]	 = ucma_migrate_id,
+	[RDMA_USER_CM_CMD_QUERY]	 = ucma_query
 };
 
 static ssize_t ucma_write(struct file *filp, const char __user *buf,
diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h
index b0634d4..bfb2ca7 100644
--- a/include/rdma/rdma_user_cm.h
+++ b/include/rdma/rdma_user_cm.h
@@ -45,8 +45,8 @@
 enum {
 	RDMA_USER_CM_CMD_CREATE_ID,
 	RDMA_USER_CM_CMD_DESTROY_ID,
-	RDMA_USER_CM_CMD_BIND_ADDR,
-	RDMA_USER_CM_CMD_RESOLVE_ADDR,
+	RDMA_USER_CM_CMD_BIND_IP,
+	RDMA_USER_CM_CMD_RESOLVE_IP,
 	RDMA_USER_CM_CMD_RESOLVE_ROUTE,
 	RDMA_USER_CM_CMD_QUERY_ROUTE,
 	RDMA_USER_CM_CMD_CONNECT,
@@ -59,7 +59,7 @@ enum {
 	RDMA_USER_CM_CMD_GET_OPTION,
 	RDMA_USER_CM_CMD_SET_OPTION,
 	RDMA_USER_CM_CMD_NOTIFY,
-	RDMA_USER_CM_CMD_JOIN_MCAST,
+	RDMA_USER_CM_CMD_JOIN_IP_MCAST,
 	RDMA_USER_CM_CMD_LEAVE_MCAST,
 	RDMA_USER_CM_CMD_MIGRATE_ID,
 	RDMA_USER_CM_CMD_QUERY
@@ -96,13 +96,13 @@ struct rdma_ucm_destroy_id_resp {
 	__u32 events_reported;
 };
 
-struct rdma_ucm_bind_addr {
+struct rdma_ucm_bind_ip {
 	__u64 response;
 	struct sockaddr_in6 addr;
 	__u32 id;
 };
 
-struct rdma_ucm_resolve_addr {
+struct rdma_ucm_resolve_ip {
 	struct sockaddr_in6 src_addr;
 	struct sockaddr_in6 dst_addr;
 	__u32 id;
@@ -216,7 +216,7 @@ struct rdma_ucm_notify {
 	__u32 event;
 };
 
-struct rdma_ucm_join_mcast {
+struct rdma_ucm_join_ip_mcast {
 	__u64 response;		/* rdma_ucm_create_id_resp */
 	__u64 uid;
 	struct sockaddr_in6 addr;

^ permalink raw reply related

* [PATCH 17/26 v3] ib/sa: Export function to pack a path record into wire format
From: Hefty, Sean @ 2012-09-24 23:55 UTC (permalink / raw)
  To: linux-rdma (linux-rdma@vger.kernel.org), netdev@vger.kernel.org

Allow converting from struct ib_sa_path_rec to the IB defined
SA path record wire format.  This will be used to report path
data from the rdma cm into user space.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
---
resending with netdev copied

 drivers/infiniband/core/sa_query.c |    6 ++++++
 include/rdma/ib_sa.h               |    6 ++++++
 2 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index fbbfa24..027c797 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -616,6 +616,12 @@ void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
 }
 EXPORT_SYMBOL(ib_sa_unpack_path);
 
+void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute)
+{
+	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
+}
+EXPORT_SYMBOL(ib_sa_pack_path);
+
 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
 				    int status,
 				    struct ib_sa_mad *mad)
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index d44a563..ade03ca 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -385,4 +385,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
  */
 void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec);
 
+/**
+ * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec
+ * to IB MAD wire format.
+ */
+void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute);
+
 #endif /* IB_SA_H */

^ permalink raw reply related

* [PATCH 20/26 v3] rdma/ucm: Add ability to query GID addresses
From: Hefty, Sean @ 2012-09-24 23:55 UTC (permalink / raw)
  To: linux-rdma (linux-rdma@vger.kernel.org), netdev@vger.kernel.org

Part of address resolution is mapping IP addresses to IB GIDs.
With the changes to support querying larger addresses and more
path records, also provide a way to query IB GIDs after
resolution completes.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
---
resending with netdev copied

 drivers/infiniband/core/ucma.c |   50 ++++++++++++++++++++++++++++++++++++++++
 include/rdma/rdma_user_cm.h    |    3 ++
 2 files changed, 52 insertions(+), 1 deletions(-)

diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 9b60338..199b076 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -48,6 +48,7 @@
 #include <rdma/rdma_cm.h>
 #include <rdma/rdma_cm_ib.h>
 #include <rdma/ib_addr.h>
+#include <rdma/ib.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -799,6 +800,52 @@ static ssize_t ucma_query_path(struct ucma_context *ctx,
 	return ret;
 }
 
+static ssize_t ucma_query_gid(struct ucma_context *ctx,
+			      void __user *response, int out_len)
+{
+	struct rdma_ucm_query_addr_resp resp;
+	struct sockaddr_ib *addr;
+	int ret = 0;
+
+	if (out_len < sizeof(resp))
+		return -ENOSPC;
+
+	memset(&resp, 0, sizeof resp);
+
+	ucma_query_device_addr(ctx->cm_id, &resp);
+
+	addr = (struct sockaddr_ib *) &resp.src_addr;
+	resp.src_size = sizeof(*addr);
+	if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) {
+		memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size);
+	} else {
+		addr->sib_family = AF_IB;
+		addr->sib_pkey = (__force __be16) resp.pkey;
+		rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr,
+				   (union ib_gid *) &addr->sib_addr);
+		addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
+						    &ctx->cm_id->route.addr.src_addr);
+	}
+
+	addr = (struct sockaddr_ib *) &resp.dst_addr;
+	resp.dst_size = sizeof(*addr);
+	if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) {
+		memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size);
+	} else {
+		addr->sib_family = AF_IB;
+		addr->sib_pkey = (__force __be16) resp.pkey;
+		rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr,
+				   (union ib_gid *) &addr->sib_addr);
+		addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
+						    &ctx->cm_id->route.addr.dst_addr);
+	}
+
+	if (copy_to_user(response, &resp, sizeof(resp)))
+		ret = -EFAULT;
+
+	return ret;
+}
+
 static ssize_t ucma_query(struct ucma_file *file,
 			  const char __user *inbuf,
 			  int in_len, int out_len)
@@ -823,6 +870,9 @@ static ssize_t ucma_query(struct ucma_file *file,
 	case RDMA_USER_CM_QUERY_PATH:
 		ret = ucma_query_path(ctx, response, out_len);
 		break;
+	case RDMA_USER_CM_QUERY_GID:
+		ret = ucma_query_gid(ctx, response, out_len);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h
index effcb75..b0634d4 100644
--- a/include/rdma/rdma_user_cm.h
+++ b/include/rdma/rdma_user_cm.h
@@ -116,7 +116,8 @@ struct rdma_ucm_resolve_route {
 
 enum {
 	RDMA_USER_CM_QUERY_ADDR,
-	RDMA_USER_CM_QUERY_PATH
+	RDMA_USER_CM_QUERY_PATH,
+	RDMA_USER_CM_QUERY_GID
 };
 
 struct rdma_ucm_query {

^ permalink raw reply related

* [PATCH 3/26 v3] ib/addr: Add AF_IB support to ip_addr_size
From: Hefty, Sean @ 2012-09-24 23:55 UTC (permalink / raw)
  To: linux-rdma (linux-rdma@vger.kernel.org), netdev@vger.kernel.org

Add support for AF_IB to ip_addr_size, and rename the function
to account for the change.  Give the compiler more control over
whether the call should be inline or not by moving the definition
into the .c file, removing the static inline, and exporting it.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
---
resending with netdev copied

 drivers/infiniband/core/addr.c |   20 ++++++++++++++++++--
 drivers/infiniband/core/cma.c  |   12 ++++++------
 include/rdma/ib_addr.h         |    6 +-----
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 6ef660c..c7998b9 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -45,6 +45,7 @@
 #include <net/addrconf.h>
 #include <net/ip6_route.h>
 #include <rdma/ib_addr.h>
+#include <rdma/ib.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("IB Address Translation");
@@ -70,6 +71,21 @@ static LIST_HEAD(req_list);
 static DECLARE_DELAYED_WORK(work, process_req);
 static struct workqueue_struct *addr_wq;
 
+int rdma_addr_size(struct sockaddr *addr)
+{
+	switch (addr->sa_family) {
+	case AF_INET:
+		return sizeof(struct sockaddr_in);
+	case AF_INET6:
+		return sizeof(struct sockaddr_in6);
+	case AF_IB:
+		return sizeof(struct sockaddr_ib);
+	default:
+		return 0;
+	}
+}
+EXPORT_SYMBOL(rdma_addr_size);
+
 void rdma_addr_register_client(struct rdma_addr_client *client)
 {
 	atomic_set(&client->refcount, 1);
@@ -371,12 +387,12 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
 			goto err;
 		}
 
-		memcpy(src_in, src_addr, ip_addr_size(src_addr));
+		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
 	} else {
 		src_in->sa_family = dst_addr->sa_family;
 	}
 
-	memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
+	memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
 	req->addr = addr;
 	req->callback = callback;
 	req->context = context;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e3d2837..af21aa4 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1575,7 +1575,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 
 	dev_id_priv->state = RDMA_CM_ADDR_BOUND;
 	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
-	       ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
+	       rdma_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
 
 	cma_attach_to_dev(dev_id_priv, cma_dev);
 	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
@@ -1979,7 +1979,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 		event.status = status;
 	} else {
 		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
-		       ip_addr_size(src_addr));
+		       rdma_addr_size(src_addr));
 		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
 	}
 
@@ -2069,7 +2069,7 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 		return -EINVAL;
 
 	atomic_inc(&id_priv->refcount);
-	memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
+	memcpy(&id->route.addr.dst_addr, dst_addr, rdma_addr_size(dst_addr));
 	if (cma_any_addr(dst_addr))
 		ret = cma_resolve_loopback(id_priv);
 	else
@@ -2378,7 +2378,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 			goto err1;
 	}
 
-	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
+	memcpy(&id->route.addr.src_addr, addr, rdma_addr_size(addr));
 	ret = cma_get_port(id_priv);
 	if (ret)
 		goto err2;
@@ -3149,7 +3149,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 	if (!mc)
 		return -ENOMEM;
 
-	memcpy(&mc->addr, addr, ip_addr_size(addr));
+	memcpy(&mc->addr, addr, rdma_addr_size(addr));
 	mc->context = context;
 	mc->id_priv = id_priv;
 
@@ -3194,7 +3194,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
 	id_priv = container_of(id, struct rdma_id_private, id);
 	spin_lock_irq(&id_priv->lock);
 	list_for_each_entry(mc, &id_priv->mc_list, list) {
-		if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
+		if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
 			list_del(&mc->list);
 			spin_unlock_irq(&id_priv->lock);
 
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 9996539..f3ac0f2 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -102,11 +102,7 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr);
 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 	      const unsigned char *dst_dev_addr);
 
-static inline int ip_addr_size(struct sockaddr *addr)
-{
-	return addr->sa_family == AF_INET6 ?
-	       sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in);
-}
+int rdma_addr_size(struct sockaddr *addr);
 
 static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
 {

^ permalink raw reply related

* [PATCH 5/26 v3] rdma/cm: Allow user to specify AF_IB when binding
From: Hefty, Sean @ 2012-09-24 23:55 UTC (permalink / raw)
  To: linux-rdma (linux-rdma@vger.kernel.org), netdev@vger.kernel.org

Modify rdma_bind_addr to allow the user to specify AF_IB when
binding to a device.  AF_IB indicates that the user is not
mapping an IP address to the native IB addressing.  (The mapping
may have already been done, or is not needed.)

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
---
resending with netdev copied

 drivers/infiniband/core/cma.c |   34 ++++++++++++++++++++++++++++------
 1 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index f569127..f09450e 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -353,6 +353,27 @@ static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_nu
 	return -EAGAIN;
 }
 
+static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
+{
+	dev_addr->dev_type = ARPHRD_INFINIBAND;
+	rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr);
+	ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey));
+}
+
+static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+{
+	int ret;
+
+	if (addr->sa_family != AF_IB) {
+		ret = rdma_translate_ip(addr, dev_addr);
+	} else {
+		cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
+		ret = 0;
+	}
+
+	return ret;
+}
+
 static int cma_acquire_dev(struct rdma_id_private *id_priv)
 {
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
@@ -1131,8 +1152,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
 		rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
 		ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
 	} else {
-		ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
-					&rt->addr.dev_addr);
+		ret = cma_translate_addr((struct sockaddr *) &rt->addr.src_addr,
+					 &rt->addr.dev_addr);
 		if (ret)
 			goto err;
 	}
@@ -1171,8 +1192,8 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
 			  ip_ver, port, src, dst);
 
 	if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
-		ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
-					&id->route.addr.dev_addr);
+		ret = cma_translate_addr((struct sockaddr *) &id->route.addr.src_addr,
+					 &id->route.addr.dev_addr);
 		if (ret)
 			goto err;
 	}
@@ -2422,7 +2443,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 	struct rdma_id_private *id_priv;
 	int ret;
 
-	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
+	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
+	    addr->sa_family != AF_IB)
 		return -EAFNOSUPPORT;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
@@ -2434,7 +2456,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 		goto err1;
 
 	if (!cma_any_addr(addr)) {
-		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
+		ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
 		if (ret)
 			goto err1;
 

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox