netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] Implement IP_UNICAST_IF socket option.
@ 2012-02-02 21:36 Erich E. Hoover
  2012-02-02 21:57 ` Eric Dumazet
  0 siblings, 1 reply; 13+ messages in thread
From: Erich E. Hoover @ 2012-02-02 21:36 UTC (permalink / raw)
  To: Linux Netdev; +Cc: Erich E. Hoover


The IP_UNICAST_IF feature is needed by the Wine project.  This patch implements the feature by setting the outgoing interface in a similar fashion to that of IP_PKTINFO.

Signed-off-by: Erich E. Hoover <ehoover@mines.edu>
---
 include/linux/in.h      |    1 +
 include/net/inet_sock.h |    2 +
 include/net/ip.h        |    1 +
 net/ipv4/af_inet.c      |    2 +
 net/ipv4/ip_sockglue.c  |   68 +++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/ping.c         |    2 +-
 net/ipv4/raw.c          |    2 +-
 net/ipv4/udp.c          |    2 +-
 8 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/include/linux/in.h b/include/linux/in.h
index 01129c0..89f6682 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -86,6 +86,7 @@ struct in_addr {
 
 #define IP_MINTTL       21
 #define IP_NODEFRAG     22
+#define IP_UNICAST_IF   23
 
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index e3e4051..ad517f5 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -132,6 +132,7 @@ struct rtable;
  * @tos - TOS
  * @mc_ttl - Multicasting TTL
  * @is_icsk - is this an inet_connection_sock?
+ * @outif_index - Outgoing device index
  * @mc_index - Multicast device index
  * @mc_list - Group array
  * @cork - info to build ip hdr on each ip frag while socket is corked
@@ -167,6 +168,7 @@ struct inet_sock {
 				transparent:1,
 				mc_all:1,
 				nodefrag:1;
+	int			outif_index;
 	int			mc_index;
 	__be32			mc_addr;
 	struct ip_mc_socklist __rcu	*mc_list;
diff --git a/include/net/ip.h b/include/net/ip.h
index 775009f..2c1a261 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -452,6 +452,7 @@ extern int ip_options_rcv_srr(struct sk_buff *skb);
 
 extern void	ipv4_pktinfo_prepare(struct sk_buff *skb);
 extern void	ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb);
+extern int	ip_default_ifindex(struct sock *sk);
 extern int	ip_cmsg_send(struct net *net,
 			     struct msghdr *msg, struct ipcm_cookie *ipc);
 extern int	ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f7b5670..a5855cd 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -375,6 +375,8 @@ lookup_protocol:
 	sk->sk_protocol	   = protocol;
 	sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
 
+	inet->outif_index  = 0;
+
 	inet->uc_ttl	= -1;
 	inet->mc_loop	= 1;
 	inet->mc_ttl	= 1;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8aa87c1..d7b507d 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -186,6 +186,18 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(ip_cmsg_recv);
 
+int ip_default_ifindex(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	int ifindex = sk->sk_bound_dev_if;
+
+	/* Set the outgoing interface from the value set by IP_UNICAST_IF */
+	if (inet->outif_index)
+		ifindex = inet->outif_index;
+
+	return ifindex;
+}
+
 int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
 {
 	int err;
@@ -628,6 +640,48 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			goto e_inval;
 		inet->mc_loop = !!val;
 		break;
+
+	case IP_UNICAST_IF:
+	{
+		struct net_device *dev = NULL;
+		__be32 iface;
+		int ifindex;
+
+		/*
+		 *	Check the arguments are allowable
+		 */
+
+		if (optlen != sizeof(__be32))
+			goto e_inval;
+
+		err = -EFAULT;
+		if (copy_from_user(&iface, optval, sizeof(__be32)))
+			break;
+
+		ifindex = ntohl(iface);
+		if (ifindex & ntohl(0xFF)) /* first octet must be zero */
+			goto e_inval;
+
+		if (ifindex == 0) {
+			inet->outif_index = 0;
+			err = 0;
+			break;
+		} else
+			dev = dev_get_by_index(sock_net(sk), ifindex);
+
+		err = -EADDRNOTAVAIL;
+		if (!dev)
+			break;
+		dev_put(dev);
+
+		err = -EINVAL;
+		if (sk->sk_bound_dev_if && ifindex != sk->sk_bound_dev_if)
+			break;
+
+		inet->outif_index = ifindex;
+		err = 0;
+		break;
+	}
 	case IP_MULTICAST_IF:
 	{
 		struct ip_mreqn mreq;
@@ -1178,6 +1232,20 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_MULTICAST_LOOP:
 		val = inet->mc_loop;
 		break;
+	case IP_UNICAST_IF:
+	{
+		__be32 iface;
+
+		len = sizeof(__be32);
+		iface = htonl(inet->outif_index);
+		release_sock(sk);
+
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &iface, len))
+			return -EFAULT;
+		return 0;
+	}
 	case IP_MULTICAST_IF:
 	{
 		struct in_addr addr;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index aea5a19..abeb454 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -510,7 +510,7 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	ipc.addr = inet->inet_saddr;
 	ipc.opt = NULL;
-	ipc.oif = sk->sk_bound_dev_if;
+	ipc.oif = ip_default_ifindex(sk);
 	ipc.tx_flags = 0;
 	err = sock_tx_timestamp(sk, &ipc.tx_flags);
 	if (err)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 3ccda5a..000d9fb 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -515,7 +515,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	ipc.addr = inet->inet_saddr;
 	ipc.opt = NULL;
 	ipc.tx_flags = 0;
-	ipc.oif = sk->sk_bound_dev_if;
+	ipc.oif = ip_default_ifindex(sk);
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sock_net(sk), msg, &ipc);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5d075b5..651eb62 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -869,7 +869,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	}
 	ipc.addr = inet->inet_saddr;
 
-	ipc.oif = sk->sk_bound_dev_if;
+	ipc.oif = ip_default_ifindex(sk);
 	err = sock_tx_timestamp(sk, &ipc.tx_flags);
 	if (err)
 		return err;
-- 
1.7.5.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Implement IP_UNICAST_IF socket option.
  2012-02-02 21:36 [PATCH v2] Implement IP_UNICAST_IF socket option Erich E. Hoover
@ 2012-02-02 21:57 ` Eric Dumazet
  2012-02-02 22:29   ` Erich E. Hoover
  0 siblings, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2012-02-02 21:57 UTC (permalink / raw)
  To: Erich E. Hoover; +Cc: Linux Netdev

Le jeudi 02 février 2012 à 14:36 -0700, Erich E. Hoover a écrit :
> The IP_UNICAST_IF feature is needed by the Wine project.  This patch implements the feature by setting the outgoing interface in a similar fashion to that of IP_PKTINFO.
> 
> Signed-off-by: Erich E. Hoover <ehoover@mines.edu>
> ---
>  include/linux/in.h      |    1 +
>  include/net/inet_sock.h |    2 +
>  include/net/ip.h        |    1 +
>  net/ipv4/af_inet.c      |    2 +
>  net/ipv4/ip_sockglue.c  |   68 +++++++++++++++++++++++++++++++++++++++++++++++
>  net/ipv4/ping.c         |    2 +-
>  net/ipv4/raw.c          |    2 +-
>  net/ipv4/udp.c          |    2 +-
>  8 files changed, 77 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/in.h b/include/linux/in.h
> index 01129c0..89f6682 100644
> --- a/include/linux/in.h
> +++ b/include/linux/in.h
> @@ -86,6 +86,7 @@ struct in_addr {
>  
>  #define IP_MINTTL       21
>  #define IP_NODEFRAG     22
> +#define IP_UNICAST_IF   23
>  
>  /* IP_MTU_DISCOVER values */
>  #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> index e3e4051..ad517f5 100644
> --- a/include/net/inet_sock.h
> +++ b/include/net/inet_sock.h
> @@ -132,6 +132,7 @@ struct rtable;
>   * @tos - TOS
>   * @mc_ttl - Multicasting TTL
>   * @is_icsk - is this an inet_connection_sock?
> + * @outif_index - Outgoing device index
>   * @mc_index - Multicast device index
>   * @mc_list - Group array
>   * @cork - info to build ip hdr on each ip frag while socket is corked
> @@ -167,6 +168,7 @@ struct inet_sock {
>  				transparent:1,
>  				mc_all:1,
>  				nodefrag:1;
> +	int			outif_index;
>  	int			mc_index;
>  	__be32			mc_addr;
>  	struct ip_mc_socklist __rcu	*mc_list;
> diff --git a/include/net/ip.h b/include/net/ip.h
> index 775009f..2c1a261 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -452,6 +452,7 @@ extern int ip_options_rcv_srr(struct sk_buff *skb);
>  
>  extern void	ipv4_pktinfo_prepare(struct sk_buff *skb);
>  extern void	ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb);
> +extern int	ip_default_ifindex(struct sock *sk);
>  extern int	ip_cmsg_send(struct net *net,
>  			     struct msghdr *msg, struct ipcm_cookie *ipc);
>  extern int	ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen);
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index f7b5670..a5855cd 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -375,6 +375,8 @@ lookup_protocol:
>  	sk->sk_protocol	   = protocol;
>  	sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
>  
> +	inet->outif_index  = 0;
> +
>  	inet->uc_ttl	= -1;
>  	inet->mc_loop	= 1;
>  	inet->mc_ttl	= 1;
> diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
> index 8aa87c1..d7b507d 100644
> --- a/net/ipv4/ip_sockglue.c
> +++ b/net/ipv4/ip_sockglue.c
> @@ -186,6 +186,18 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
>  }
>  EXPORT_SYMBOL(ip_cmsg_recv);
>  
> +int ip_default_ifindex(struct sock *sk)

const struct sock *sk

> +{
> +	struct inet_sock *inet = inet_sk(sk);
> +	int ifindex = sk->sk_bound_dev_if;
> +
> +	/* Set the outgoing interface from the value set by IP_UNICAST_IF */
> +	if (inet->outif_index)
> +		ifindex = inet->outif_index;
> +
> +	return ifindex;
> +}
> +
>  int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
>  {
>  	int err;
> @@ -628,6 +640,48 @@ static int do_ip_setsockopt(struct sock *sk, int level,
>  			goto e_inval;
>  		inet->mc_loop = !!val;
>  		break;
> +
> +	case IP_UNICAST_IF:
> +	{
> +		struct net_device *dev = NULL;
> +		__be32 iface;
> +		int ifindex;
> +
> +		/*
> +		 *	Check the arguments are allowable
> +		 */
> +
> +		if (optlen != sizeof(__be32))
> +			goto e_inval;
> +
> +		err = -EFAULT;
> +		if (copy_from_user(&iface, optval, sizeof(__be32)))
> +			break;
> +
> +		ifindex = ntohl(iface);
> +		if (ifindex & ntohl(0xFF)) /* first octet must be zero */


This looks wrong.

Maybe you wanted : if (ifindex & 0xFF000000) 

> +			goto e_inval;
> +
> +		if (ifindex == 0) {
> +			inet->outif_index = 0;
> +			err = 0;
> +			break;
> +		} else

you can avoid the else (because of the break;)

> +			dev = dev_get_by_index(sock_net(sk), ifindex);
> +
> +		err = -EADDRNOTAVAIL;
> +		if (!dev)
> +			break;
> +		dev_put(dev);
> +
> +		err = -EINVAL;
> +		if (sk->sk_bound_dev_if && ifindex != sk->sk_bound_dev_if)
> +			break;
> +
> +		inet->outif_index = ifindex;

what happens if later sk_bound_dev_if is changed ? Should we redo the
above tests ?

> +		err = 0;
> +		break;
> +	}
>  	case IP_MULTICAST_IF:
>  	{
>  		struct ip_mreqn mreq;
> @@ -1178,6 +1232,20 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
>  	case IP_MULTICAST_LOOP:
>  		val = inet->mc_loop;
>  		break;
> +	case IP_UNICAST_IF:
> +	{
> +		__be32 iface;
> +
> +		len = sizeof(__be32);
> +		iface = htonl(inet->outif_index);
> +		release_sock(sk);
> +
> +		if (put_user(len, optlen))
> +			return -EFAULT;
> +		if (copy_to_user(optval, &iface, len))
> +			return -EFAULT;
> +		return 0;

wow.... thats not pretty...

	what about :

	case IP_UNICASE_IF:
		val = htonl(inet->outif_index);
		break;

> +	}
>  	case IP_MULTICAST_IF:
>  	{
>  		struct in_addr addr;
> diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
> index aea5a19..abeb454 100644
> --- a/net/ipv4/ping.c
> +++ b/net/ipv4/ping.c
> @@ -510,7 +510,7 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  
>  	ipc.addr = inet->inet_saddr;
>  	ipc.opt = NULL;
> -	ipc.oif = sk->sk_bound_dev_if;
> +	ipc.oif = ip_default_ifindex(sk);
>  	ipc.tx_flags = 0;
>  	err = sock_tx_timestamp(sk, &ipc.tx_flags);
>  	if (err)
> diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
> index 3ccda5a..000d9fb 100644
> --- a/net/ipv4/raw.c
> +++ b/net/ipv4/raw.c
> @@ -515,7 +515,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  	ipc.addr = inet->inet_saddr;
>  	ipc.opt = NULL;
>  	ipc.tx_flags = 0;
> -	ipc.oif = sk->sk_bound_dev_if;
> +	ipc.oif = ip_default_ifindex(sk);
>  
>  	if (msg->msg_controllen) {
>  		err = ip_cmsg_send(sock_net(sk), msg, &ipc);
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 5d075b5..651eb62 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -869,7 +869,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  	}
>  	ipc.addr = inet->inet_saddr;
>  
> -	ipc.oif = sk->sk_bound_dev_if;
> +	ipc.oif = ip_default_ifindex(sk);
>  	err = sock_tx_timestamp(sk, &ipc.tx_flags);
>  	if (err)
>  		return err;


IPv6 not supported ?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Implement IP_UNICAST_IF socket option.
  2012-02-02 21:57 ` Eric Dumazet
@ 2012-02-02 22:29   ` Erich E. Hoover
  2012-02-02 22:45     ` Eric Dumazet
  2012-02-03  1:19     ` Xiaochun Lu
  0 siblings, 2 replies; 13+ messages in thread
From: Erich E. Hoover @ 2012-02-02 22:29 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Linux Netdev

On Thu, Feb 2, 2012 at 2:57 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le jeudi 02 février 2012 à 14:36 -0700, Erich E. Hoover a écrit :
> > ...
> > +             if (sk->sk_bound_dev_if && ifindex != sk->sk_bound_dev_if)
> > +                     break;
>
> what happens if later sk_bound_dev_if is changed ? Should we redo the
> above tests ?

Would it be better if ip_default_ifindex performed the same comparison
and prioritized the sk->sk_bound_dev_if value?

> > ...
> > +     case IP_UNICAST_IF:
> > +     {
> > +             __be32 iface;
> > +
> > +             len = sizeof(__be32);
> > +             iface = htonl(inet->outif_index);
> > +             release_sock(sk);
> > +
> > +             if (put_user(len, optlen))
> > +                     return -EFAULT;
> > +             if (copy_to_user(optval, &iface, len))
> > +                     return -EFAULT;
> > +             return 0;
>
> wow.... thats not pretty...
>
>        what about :
>
>        case IP_UNICASE_IF:
>                val = htonl(inet->outif_index);
>                break;
>

The return value needs to be 4 bytes even if sizeof(int) != 4.

> > ...
> IPv6 not supported ?

Nope, at least according to the documentation the option is only for
IPv4 (IPPROTO_IP).

Erich Hoover
ehoover@mines.edu

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Implement IP_UNICAST_IF socket option.
  2012-02-02 22:29   ` Erich E. Hoover
@ 2012-02-02 22:45     ` Eric Dumazet
  2012-02-03  1:22       ` Erich E. Hoover
  2012-02-03  1:19     ` Xiaochun Lu
  1 sibling, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2012-02-02 22:45 UTC (permalink / raw)
  To: Erich E. Hoover; +Cc: Linux Netdev

Le jeudi 02 février 2012 à 15:29 -0700, Erich E. Hoover a écrit :
> On Thu, Feb 2, 2012 at 2:57 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > Le jeudi 02 février 2012 à 14:36 -0700, Erich E. Hoover a écrit :
> > > ...
> > > +             if (sk->sk_bound_dev_if && ifindex != sk->sk_bound_dev_if)
> > > +                     break;
> >
> > what happens if later sk_bound_dev_if is changed ? Should we redo the
> > above tests ?
> 
> Would it be better if ip_default_ifindex performed the same comparison
> and prioritized the sk->sk_bound_dev_if value?
> 

I guess you should mimic other OS behavior. Tests are probably needed.

> > > ...
> > > +     case IP_UNICAST_IF:
> > > +     {
> > > +             __be32 iface;
> > > +
> > > +             len = sizeof(__be32);
> > > +             iface = htonl(inet->outif_index);
> > > +             release_sock(sk);
> > > +
> > > +             if (put_user(len, optlen))
> > > +                     return -EFAULT;
> > > +             if (copy_to_user(optval, &iface, len))
> > > +                     return -EFAULT;
> > > +             return 0;
> >
> > wow.... thats not pretty...
> >
> >        what about :
> >
> >        case IP_UNICASE_IF:
> >                val = htonl(inet->outif_index);
> >                break;
> >
> 
> The return value needs to be 4 bytes even if sizeof(int) != 4.
> 

On all arches supported by linux, sizeof(int) == 4

Maybe avoid sparse warning since val is not __be32

	val = (__force int)htonl(inet->outif_index);
	break;

> > > ...
> > IPv6 not supported ?
> 
> Nope, at least according to the documentation the option is only for
> IPv4 (IPPROTO_IP).

Probably because of the mix on interface index and IPv4 addr on "other
OS".

But if we implement interface index only, it could be used on IPv6
too... I dont know...

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Implement IP_UNICAST_IF socket option.
  2012-02-02 22:29   ` Erich E. Hoover
  2012-02-02 22:45     ` Eric Dumazet
@ 2012-02-03  1:19     ` Xiaochun Lu
  2012-02-03  1:38       ` Erich E. Hoover
  2012-02-03  1:50       ` David Miller
  1 sibling, 2 replies; 13+ messages in thread
From: Xiaochun Lu @ 2012-02-03  1:19 UTC (permalink / raw)
  To: Erich E. Hoover; +Cc: Eric Dumazet, Linux Netdev, shawn.lu

Hi, Erich E. Hoover:

Can you give more detail on why we need invent another binding to
device option in your patch log?  Why SO_BINDTODEVICE or IP_PKTINFO
can't server your need?

Thanks!

Shawn Lu

On 2/2/12, Erich E. Hoover <ehoover@mines.edu> wrote:
> On Thu, Feb 2, 2012 at 2:57 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> Le jeudi 02 février 2012 à 14:36 -0700, Erich E. Hoover a écrit :
>> > ...
>> > +             if (sk->sk_bound_dev_if && ifindex != sk->sk_bound_dev_if)
>> > +                     break;
>>
>> what happens if later sk_bound_dev_if is changed ? Should we redo the
>> above tests ?
>
> Would it be better if ip_default_ifindex performed the same comparison
> and prioritized the sk->sk_bound_dev_if value?
>
>> > ...
>> > +     case IP_UNICAST_IF:
>> > +     {
>> > +             __be32 iface;
>> > +
>> > +             len = sizeof(__be32);
>> > +             iface = htonl(inet->outif_index);
>> > +             release_sock(sk);
>> > +
>> > +             if (put_user(len, optlen))
>> > +                     return -EFAULT;
>> > +             if (copy_to_user(optval, &iface, len))
>> > +                     return -EFAULT;
>> > +             return 0;
>>
>> wow.... thats not pretty...
>>
>>        what about :
>>
>>        case IP_UNICASE_IF:
>>                val = htonl(inet->outif_index);
>>                break;
>>
>
> The return value needs to be 4 bytes even if sizeof(int) != 4.
>
>> > ...
>> IPv6 not supported ?
>
> Nope, at least according to the documentation the option is only for
> IPv4 (IPPROTO_IP).
>
> Erich Hoover
> ehoover@mines.edu
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Implement IP_UNICAST_IF socket option.
  2012-02-02 22:45     ` Eric Dumazet
@ 2012-02-03  1:22       ` Erich E. Hoover
  2012-02-03  1:51         ` David Miller
  2012-02-03 10:01         ` Eric Dumazet
  0 siblings, 2 replies; 13+ messages in thread
From: Erich E. Hoover @ 2012-02-03  1:22 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Linux Netdev

On Thu, Feb 2, 2012 at 3:45 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le jeudi 02 février 2012 à 15:29 -0700, Erich E. Hoover a écrit :
> > ...
> > Would it be better if ip_default_ifindex performed the same comparison
> > and prioritized the sk->sk_bound_dev_if value?
> I guess you should mimic other OS behavior. Tests are probably needed.

This isn't the best example since the test machines only have one NIC
and the loopback adapter (the loopback adapter acts differently), but
I generated some test results on the Wine TestBot.

Where bind() and IP_UNICAST_IF match:
https://testbot.winehq.org/JobDetails.pl?Key=16802

Mismatch between bind() and IP_UNICAST_IF:
https://testbot.winehq.org/JobDetails.pl?Key=16803

In both cases the loopback adapter rejects IP_UNICAST_IF and the test
"succeeds" for the NIC (the packet comes back on the same interface).
In both cases IP_UNICAST_IF is set before bind(), so this would seem
to indicate that bind() overrides IP_UNICAST_IF even if the bind()
occurs afterward.

> > > IPv6 not supported ?
> >
> > Nope, at least according to the documentation the option is only for
> > IPv4 (IPPROTO_IP).
>
> Probably because of the mix on interface index and IPv4 addr on "other
> OS".
>
> But if we implement interface index only, it could be used on IPv6
> too... I dont know...

I though I should double check this, and apparently I missed that
there's an equivalent IPV6_UNICAST_IF on "other OS".  Should that be a
patch 2 or should it be included in this one?

Erich Hoover
ehoover@mines.edu

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Implement IP_UNICAST_IF socket option.
  2012-02-03  1:19     ` Xiaochun Lu
@ 2012-02-03  1:38       ` Erich E. Hoover
  2012-02-03  2:15         ` Xiaochun Lu
  2012-02-03  1:50       ` David Miller
  1 sibling, 1 reply; 13+ messages in thread
From: Erich E. Hoover @ 2012-02-03  1:38 UTC (permalink / raw)
  To: Xiaochun Lu; +Cc: Eric Dumazet, Linux Netdev, shawn.lu

On Thu, Feb 2, 2012 at 6:19 PM, Xiaochun Lu <xiaoclu@gmail.com> wrote:
> Hi, Erich E. Hoover:
>
> Can you give more detail on why we need invent another binding to
> device option in your patch log?  Why SO_BINDTODEVICE or IP_PKTINFO
> can't server your need?

Sure, how about adding something like the following?:
---
The IP_UNICAST_IF feature is needed by the Wine project.  This patch
implements the feature by setting the outgoing interface in a similar
fashion to that of IP_PKTINFO.  A separate option is needed to handle
this feature since the existing options do not provide all of the
characteristics required by IP_UNICAST_IF, a summary is provided
below.

SO_BINDTODEVICE:
* SO_BINDTODEVICE requires administrative privileges, IP_UNICAST_IF
does not.  From reading some old mailing list articles my
understanding is that SO_BINDTODEVICE requires administrative
privileges because it can override the administrator's routing
settings.
* The SO_BINDTODEVICE option restricts both outbound and inbound
traffic, IP_UNICAST_IF only impacts outbound traffic.

IP_PKTINFO:
* Since IP_PKTINFO and IP_UNICAST_IF are independent options,
implementing IP_UNICAST_IF with IP_PKTINFO will likely break some
applications.
* Implementing IP_UNICAST_IF on top of IP_PKTINFO significantly
complicates the Wine codebase and reduces the socket performance
(doing this requires a lot of extra communication between the "server"
and "user" layers).

bind():
* bind() does not work on broadcast packets, IP_UNICAST_IF is
specifically intended to work with broadcast packets.
---

Erich Hoover
ehoover@mines.edu

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Implement IP_UNICAST_IF socket option.
  2012-02-03  1:19     ` Xiaochun Lu
  2012-02-03  1:38       ` Erich E. Hoover
@ 2012-02-03  1:50       ` David Miller
  1 sibling, 0 replies; 13+ messages in thread
From: David Miller @ 2012-02-03  1:50 UTC (permalink / raw)
  To: xiaoclu; +Cc: ehoover, eric.dumazet, netdev, shawn.lu

From: Xiaochun Lu <xiaoclu@gmail.com>
Date: Thu, 2 Feb 2012 17:19:11 -0800

> Hi, Erich E. Hoover:
> 
> Can you give more detail on why we need invent another binding to
> device option in your patch log?  Why SO_BINDTODEVICE or IP_PKTINFO
> can't server your need?

Windows apps via wine will make use of this, and you can't just use
bind()'ing or IP_PKTINFO because the former doesn't work on broadcast
addreses and the latter might be used by the wine application at the
same time as IP_UNICAST_IF.

He explained this fully in his previous patch discussion, DO NOT repeat
this whole discussion it's a waste of time.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Implement IP_UNICAST_IF socket option.
  2012-02-03  1:22       ` Erich E. Hoover
@ 2012-02-03  1:51         ` David Miller
  2012-02-03 10:01         ` Eric Dumazet
  1 sibling, 0 replies; 13+ messages in thread
From: David Miller @ 2012-02-03  1:51 UTC (permalink / raw)
  To: ehoover; +Cc: eric.dumazet, netdev

From: "Erich E. Hoover" <ehoover@mines.edu>
Date: Thu, 2 Feb 2012 18:22:51 -0700

> I though I should double check this, and apparently I missed that
> there's an equivalent IPV6_UNICAST_IF on "other OS".  Should that be a
> patch 2 or should it be included in this one?

I'm ambivalent, you can do it either way as far as I'm concerned.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Implement IP_UNICAST_IF socket option.
  2012-02-03  1:38       ` Erich E. Hoover
@ 2012-02-03  2:15         ` Xiaochun Lu
  0 siblings, 0 replies; 13+ messages in thread
From: Xiaochun Lu @ 2012-02-03  2:15 UTC (permalink / raw)
  To: Erich E. Hoover; +Cc: Eric Dumazet, Linux Netdev, shawn.lu

Thanks for explanation.  The log is good to me

On 2/2/12, Erich E. Hoover <ehoover@mines.edu> wrote:
> On Thu, Feb 2, 2012 at 6:19 PM, Xiaochun Lu <xiaoclu@gmail.com> wrote:
>> Hi, Erich E. Hoover:
>>
>> Can you give more detail on why we need invent another binding to
>> device option in your patch log?  Why SO_BINDTODEVICE or IP_PKTINFO
>> can't server your need?
>
> Sure, how about adding something like the following?:
> ---
> The IP_UNICAST_IF feature is needed by the Wine project.  This patch
> implements the feature by setting the outgoing interface in a similar
> fashion to that of IP_PKTINFO.  A separate option is needed to handle
> this feature since the existing options do not provide all of the
> characteristics required by IP_UNICAST_IF, a summary is provided
> below.
>
> SO_BINDTODEVICE:
> * SO_BINDTODEVICE requires administrative privileges, IP_UNICAST_IF
> does not.  From reading some old mailing list articles my
> understanding is that SO_BINDTODEVICE requires administrative
> privileges because it can override the administrator's routing
> settings.
> * The SO_BINDTODEVICE option restricts both outbound and inbound
> traffic, IP_UNICAST_IF only impacts outbound traffic.
>
> IP_PKTINFO:
> * Since IP_PKTINFO and IP_UNICAST_IF are independent options,
> implementing IP_UNICAST_IF with IP_PKTINFO will likely break some
> applications.
> * Implementing IP_UNICAST_IF on top of IP_PKTINFO significantly
> complicates the Wine codebase and reduces the socket performance
> (doing this requires a lot of extra communication between the "server"
> and "user" layers).
>
> bind():
> * bind() does not work on broadcast packets, IP_UNICAST_IF is
> specifically intended to work with broadcast packets.
> ---
>
> Erich Hoover
> ehoover@mines.edu
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Implement IP_UNICAST_IF socket option.
  2012-02-03  1:22       ` Erich E. Hoover
  2012-02-03  1:51         ` David Miller
@ 2012-02-03 10:01         ` Eric Dumazet
  2012-02-03 15:21           ` Erich E. Hoover
  1 sibling, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2012-02-03 10:01 UTC (permalink / raw)
  To: Erich E. Hoover; +Cc: Linux Netdev

Le jeudi 02 février 2012 à 18:22 -0700, Erich E. Hoover a écrit :

> I though I should double check this, and apparently I missed that
> there's an equivalent IPV6_UNICAST_IF on "other OS".  Should that be a
> patch 2 or should it be included in this one?

Do as you prefer.

BTW, do we really want the htonl() thing, and force device indexes in
2^24 range ? This seems odd anyway.

If yes, following patch is needed :

diff --git a/net/core/dev.c b/net/core/dev.c
index f124947..ce7ebde 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5193,7 +5193,10 @@ static int dev_new_index(struct net *net)
 {
 	static int ifindex;
 	for (;;) {
-		if (++ifindex <= 0)
+		/* Because of IP_UNICAST_IF support, we must limit indexes
+		 * to 24 bits. Zero value is also reserved.
+		 */
+		if (++ifindex >= (1<<24))
 			ifindex = 1;
 		if (!__dev_get_by_index(net, ifindex))
 			return ifindex;

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Implement IP_UNICAST_IF socket option.
  2012-02-03 10:01         ` Eric Dumazet
@ 2012-02-03 15:21           ` Erich E. Hoover
  2012-02-03 15:43             ` Eric Dumazet
  0 siblings, 1 reply; 13+ messages in thread
From: Erich E. Hoover @ 2012-02-03 15:21 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Linux Netdev

On Fri, Feb 3, 2012 at 3:01 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> ...
> BTW, do we really want the htonl() thing, and force device indexes in
> 2^24 range ? This seems odd anyway.
> ...

I can easily perform both of these steps in Wine.  The 2^24 limitation
in particular seems artificial, and especially silly to add to the
kernel if it will change the fundamental range of the indices.  With
regard to htonl(), David Miller indicated that the API should match
what other systems use (a 32-bit network byte order word), though he
may not have meant his comment that specifically.  I can see the
merits of doing it both ways and I'm not particularly attached to
either one.

Erich Hoover
ehoover@mines.edu

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Implement IP_UNICAST_IF socket option.
  2012-02-03 15:21           ` Erich E. Hoover
@ 2012-02-03 15:43             ` Eric Dumazet
  0 siblings, 0 replies; 13+ messages in thread
From: Eric Dumazet @ 2012-02-03 15:43 UTC (permalink / raw)
  To: Erich E. Hoover; +Cc: Linux Netdev

Le vendredi 03 février 2012 à 08:21 -0700, Erich E. Hoover a écrit :
> On Fri, Feb 3, 2012 at 3:01 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > ...
> > BTW, do we really want the htonl() thing, and force device indexes in
> > 2^24 range ? This seems odd anyway.
> > ...
> 
> I can easily perform both of these steps in Wine.  The 2^24 limitation
> in particular seems artificial, and especially silly to add to the
> kernel if it will change the fundamental range of the indices.  With
> regard to htonl(), David Miller indicated that the API should match
> what other systems use (a 32-bit network byte order word), though he
> may not have meant his comment that specifically.  I can see the
> merits of doing it both ways and I'm not particularly attached to
> either one.

I have no strong feeling, I only wanted to point out our current device
indexes range is [1 .. (2^31 - 1) ]

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2012-02-03 15:43 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-02 21:36 [PATCH v2] Implement IP_UNICAST_IF socket option Erich E. Hoover
2012-02-02 21:57 ` Eric Dumazet
2012-02-02 22:29   ` Erich E. Hoover
2012-02-02 22:45     ` Eric Dumazet
2012-02-03  1:22       ` Erich E. Hoover
2012-02-03  1:51         ` David Miller
2012-02-03 10:01         ` Eric Dumazet
2012-02-03 15:21           ` Erich E. Hoover
2012-02-03 15:43             ` Eric Dumazet
2012-02-03  1:19     ` Xiaochun Lu
2012-02-03  1:38       ` Erich E. Hoover
2012-02-03  2:15         ` Xiaochun Lu
2012-02-03  1:50       ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).