* [PATCH 1/3] net/rds: Declare SO_RDS_TRANSPORT and RDS_TRANS_* constants in uapi/linux/rds.h
2015-05-29 21:28 [PATCH 0/3] net/rds: SOL_RDS socket option to explicitly select transport Sowmini Varadhan
@ 2015-05-29 21:28 ` Sowmini Varadhan
2015-05-29 21:28 ` [PATCH 2/3] net/rds: Add setsockopt support for SO_RDS_TRANSPORT Sowmini Varadhan
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Sowmini Varadhan @ 2015-05-29 21:28 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: chien.yen, davem, rds-devel, ajaykumar.hotchandani, igor.maximov,
Sowmini Varadhan
User space applications that desire to explicitly select the
underlying transport for a PF_RDS socket may do so by using the
SO_RDS_TRANSPORT socket option at the SOL_RDS level before bind().
The integer argument provided to the socket option would be one
of the RDS_TRANS_* values, e.g., RDS_TRANS_TCP. This commit exports
the constant values need by such applications via <linux/rds.h>
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
---
include/uapi/linux/rds.h | 10 ++++++++++
net/rds/rds.h | 5 -----
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index 9195095..0f9265c 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -38,6 +38,8 @@
#define RDS_IB_ABI_VERSION 0x301
+#define SOL_RDS 276
+
/*
* setsockopt/getsockopt for SOL_RDS
*/
@@ -48,6 +50,14 @@
#define RDS_RECVERR 5
#define RDS_CONG_MONITOR 6
#define RDS_GET_MR_FOR_DEST 7
+#define SO_RDS_TRANSPORT 8
+
+/* supported values for SO_RDS_TRANSPORT */
+#define RDS_TRANS_IB 0
+#define RDS_TRANS_IWARP 1
+#define RDS_TRANS_TCP 2
+#define RDS_TRANS_COUNT 3
+#define RDS_TRANS_NONE (~0)
/*
* Control message types for SOL_RDS.
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 0d41155..76db508 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -408,11 +408,6 @@ struct rds_notifier {
* should try hard not to block.
*/
-#define RDS_TRANS_IB 0
-#define RDS_TRANS_IWARP 1
-#define RDS_TRANS_TCP 2
-#define RDS_TRANS_COUNT 3
-
struct rds_transport {
char t_name[TRANSNAMSIZ];
struct list_head t_item;
--
1.7.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 2/3] net/rds: Add setsockopt support for SO_RDS_TRANSPORT
2015-05-29 21:28 [PATCH 0/3] net/rds: SOL_RDS socket option to explicitly select transport Sowmini Varadhan
2015-05-29 21:28 ` [PATCH 1/3] net/rds: Declare SO_RDS_TRANSPORT and RDS_TRANS_* constants in uapi/linux/rds.h Sowmini Varadhan
@ 2015-05-29 21:28 ` Sowmini Varadhan
2015-05-29 21:28 ` [PATCH 3/3] net/rds Add getsockopt " Sowmini Varadhan
2015-06-01 4:47 ` [PATCH 0/3] net/rds: SOL_RDS socket option to explicitly select transport David Miller
3 siblings, 0 replies; 5+ messages in thread
From: Sowmini Varadhan @ 2015-05-29 21:28 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: chien.yen, davem, rds-devel, ajaykumar.hotchandani, igor.maximov,
Sowmini Varadhan
An application may deterministically attach the underlying transport for
a PF_RDS socket by invoking setsockopt(2) with the SO_RDS_TRANSPORT
option at the SOL_RDS level. The integer argument to setsockopt must be
one of the RDS_TRANS_* transport types, e.g., RDS_TRANS_TCP. The option
must be specified before invoking bind(2) on the socket, and may only
be used once on the socket. An attempt to set the option on a bound
socket, or to invoke the option after a successful SO_RDS_TRANSPORT
attachment, will return EOPNOTSUPP.
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
---
net/rds/af_rds.c | 27 +++++++++++++++++++++++++++
net/rds/bind.c | 4 ++++
net/rds/rds.h | 1 +
net/rds/transport.c | 21 +++++++++++++++++++++
4 files changed, 53 insertions(+), 0 deletions(-)
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 3d83641..0487744 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -270,6 +270,28 @@ static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
return ret;
}
+static int rds_set_transport(struct rds_sock *rs, char __user *optval,
+ int optlen)
+{
+ int t_type;
+
+ if (rs->rs_transport)
+ return -EOPNOTSUPP; /* previously attached to transport */
+
+ if (optlen != sizeof(int))
+ return -EINVAL;
+
+ if (copy_from_user(&t_type, (int __user *)optval, sizeof(t_type)))
+ return -EFAULT;
+
+ if (t_type < 0 || t_type >= RDS_TRANS_COUNT)
+ return -EINVAL;
+
+ rs->rs_transport = rds_trans_get(t_type);
+
+ return rs->rs_transport ? 0 : -ENOPROTOOPT;
+}
+
static int rds_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
@@ -300,6 +322,11 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
case RDS_CONG_MONITOR:
ret = rds_cong_monitor(rs, optval, optlen);
break;
+ case SO_RDS_TRANSPORT:
+ lock_sock(sock->sk);
+ ret = rds_set_transport(rs, optval, optlen);
+ release_sock(sock->sk);
+ break;
default:
ret = -ENOPROTOOPT;
}
diff --git a/net/rds/bind.c b/net/rds/bind.c
index a2e6562..4ebd29c 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -181,6 +181,10 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (ret)
goto out;
+ if (rs->rs_transport) { /* previously bound */
+ ret = 0;
+ goto out;
+ }
trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
if (!trans) {
ret = -EADDRNOTAVAIL;
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 76db508..a33fb4a 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -798,6 +798,7 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr);
void rds_trans_put(struct rds_transport *trans);
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
unsigned int avail);
+struct rds_transport *rds_trans_get(int t_type);
int rds_trans_init(void);
void rds_trans_exit(void);
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 7f2ac4f..8b4a6cd 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -101,6 +101,27 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr)
return ret;
}
+struct rds_transport *rds_trans_get(int t_type)
+{
+ struct rds_transport *ret = NULL;
+ struct rds_transport *trans;
+ unsigned int i;
+
+ down_read(&rds_trans_sem);
+ for (i = 0; i < RDS_TRANS_COUNT; i++) {
+ trans = transports[i];
+
+ if (trans && trans->t_type == t_type &&
+ (!trans->t_owner || try_module_get(trans->t_owner))) {
+ ret = trans;
+ break;
+ }
+ }
+ up_read(&rds_trans_sem);
+
+ return ret;
+}
+
/*
* This returns the number of stats entries in the snapshot and only
* copies them using the iter if there is enough space for them. The
--
1.7.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 3/3] net/rds Add getsockopt support for SO_RDS_TRANSPORT
2015-05-29 21:28 [PATCH 0/3] net/rds: SOL_RDS socket option to explicitly select transport Sowmini Varadhan
2015-05-29 21:28 ` [PATCH 1/3] net/rds: Declare SO_RDS_TRANSPORT and RDS_TRANS_* constants in uapi/linux/rds.h Sowmini Varadhan
2015-05-29 21:28 ` [PATCH 2/3] net/rds: Add setsockopt support for SO_RDS_TRANSPORT Sowmini Varadhan
@ 2015-05-29 21:28 ` Sowmini Varadhan
2015-06-01 4:47 ` [PATCH 0/3] net/rds: SOL_RDS socket option to explicitly select transport David Miller
3 siblings, 0 replies; 5+ messages in thread
From: Sowmini Varadhan @ 2015-05-29 21:28 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: chien.yen, davem, rds-devel, ajaykumar.hotchandani, igor.maximov,
Sowmini Varadhan
The currently attached transport for a PF_RDS socket may be obtained
from user space by invoking getsockopt(2) using the SO_RDS_TRANSPORT
option at the SOL_RDS level. The integer optval returned will be one
of the RDS_TRANS_* constants defined in linux/rds.h.
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
---
net/rds/af_rds.c | 14 ++++++++++++++
1 files changed, 14 insertions(+), 0 deletions(-)
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 0487744..2ad9032 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -339,6 +339,7 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
{
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
int ret = -ENOPROTOOPT, len;
+ int trans;
if (level != SOL_RDS)
goto out;
@@ -364,6 +365,19 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
else
ret = 0;
break;
+ case SO_RDS_TRANSPORT:
+ if (len < sizeof(int)) {
+ ret = -EINVAL;
+ break;
+ }
+ trans = (rs->rs_transport ? rs->rs_transport->t_type :
+ RDS_TRANS_NONE); /* unbound */
+ if (put_user(trans, (int __user *)optval) ||
+ put_user(sizeof(int), optlen))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ break;
default:
break;
}
--
1.7.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH 0/3] net/rds: SOL_RDS socket option to explicitly select transport
2015-05-29 21:28 [PATCH 0/3] net/rds: SOL_RDS socket option to explicitly select transport Sowmini Varadhan
` (2 preceding siblings ...)
2015-05-29 21:28 ` [PATCH 3/3] net/rds Add getsockopt " Sowmini Varadhan
@ 2015-06-01 4:47 ` David Miller
3 siblings, 0 replies; 5+ messages in thread
From: David Miller @ 2015-06-01 4:47 UTC (permalink / raw)
To: sowmini.varadhan
Cc: netdev, linux-kernel, chien.yen, rds-devel, ajaykumar.hotchandani,
igor.maximov
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Fri, 29 May 2015 17:28:06 -0400
> Today the underlying transport (TCP or IB) for a PF_RDS socket is
> implicitly selected based on the local address used to bind(2) the
> PF_RDS socket. This results in some non-deterministic behavior when
> there are un-numbered and IPoIB interfaces sharing the same IP address.
> It also places the constraint that the IB interface must have an IP
> address (and thus, IPoIB) configured on it.
>
> The non-determinism may be avoided by providing the user-space application
> a socket option that allows it to explicitly select the transport
> prior to bind(2).
>
> Patch 1 of this series provides the constant definitions needed by
> the application via <linux/rds.h>.
>
> Patch 2 provides the setsockopt support, and Patch 3 provides the
> getsockopt support.
Seems reasonable, series applied to net-next, thanks.
^ permalink raw reply [flat|nested] 5+ messages in thread