* [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index @ 2015-11-19 20:32 David Ahern 2015-11-19 20:32 ` [PATCH net-next 2/2] net: Add support for vrf-global TCP servers David Ahern 2015-11-22 17:23 ` [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index David Miller 0 siblings, 2 replies; 10+ messages in thread From: David Ahern @ 2015-11-19 20:32 UTC (permalink / raw) To: netdev; +Cc: David Ahern Add helper to lookup master index given a device index. Signed-off-by: David Ahern <dsa@cumulusnetworks.com> --- include/net/l3mdev.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 774d85b2d5d9..786226f8e77b 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -51,6 +51,24 @@ static inline int l3mdev_master_ifindex(struct net_device *dev) return ifindex; } +static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) +{ + struct net_device *dev; + int rc = 0; + + if (likely(ifindex)) { + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (dev) + rc = l3mdev_master_ifindex_rcu(dev); + + rcu_read_unlock(); + } + + return rc; +} + /* get index of an interface to use for FIB lookups. For devices * enslaved to an L3 master device FIB lookups are based on the * master index @@ -167,6 +185,11 @@ static inline int l3mdev_master_ifindex(struct net_device *dev) return 0; } +static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) +{ + return 0; +} + static inline int l3mdev_fib_oif_rcu(struct net_device *dev) { return dev ? dev->ifindex : 0; -- 1.9.1 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH net-next 2/2] net: Add support for vrf-global TCP servers 2015-11-19 20:32 [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index David Ahern @ 2015-11-19 20:32 ` David Ahern 2015-11-22 17:23 ` [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index David Miller 1 sibling, 0 replies; 10+ messages in thread From: David Ahern @ 2015-11-19 20:32 UTC (permalink / raw) To: netdev; +Cc: David Ahern Allow a process to have a "VRF-global" listen socket. In this case the listen socket is not bound to a VRF device but child sockets derived from it are bound to the VRF device the original packet was received on. This avoids the need to replicate services or create additional listen sockets as VRF devices are created (and the corresponding tear down as VRFs are deleted). Processes can learn the VRF association for the child socket using getsockopt and the SO_BINDTODEVICE option. Signed-off-by: David Ahern <dsa@cumulusnetworks.com> --- net/ipv4/syncookies.c | 6 ++++-- net/ipv4/tcp_input.c | 4 +++- net/ipv4/tcp_ipv4.c | 1 + net/ipv6/syncookies.c | 7 +++++-- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 4cbe9f0a4281..85ff306a5930 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -18,6 +18,7 @@ #include <linux/export.h> #include <net/tcp.h> #include <net/route.h> +#include <net/l3mdev.h> extern int sysctl_tcp_syncookies; @@ -351,7 +352,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack.v64 = 0; treq->tfo_listener = false; - ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_iif = sk->sk_bound_dev_if ? : + l3mdev_master_ifindex_by_index(sock_net(sk), skb->skb_iif); /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -371,7 +373,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark, + flowi4_init_output(&fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fdd88c3803a6..af884f237332 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -72,6 +72,7 @@ #include <net/dst.h> #include <net/tcp.h> #include <net/inet_common.h> +#include <net/l3mdev.h> #include <linux/ipsec.h> #include <asm/unaligned.h> #include <linux/errqueue.h> @@ -6187,7 +6188,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_openreq_init(req, &tmp_opt, skb, sk); /* Note: tcp_v6_init_req() might override ir_iif for link locals */ - inet_rsk(req)->ir_iif = sk->sk_bound_dev_if; + inet_rsk(req)->ir_iif = sk->sk_bound_dev_if ? : + l3mdev_master_ifindex_by_index(sock_net(sk), skb->skb_iif); af_ops->init_req(req, sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ba09016d1bfd..d3a10f9f763b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1275,6 +1275,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, ireq = inet_rsk(req); sk_daddr_set(newsk, ireq->ir_rmt_addr); sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); + newsk->sk_bound_dev_if = ireq->ir_iif; newinet->inet_saddr = ireq->ir_loc_addr; inet_opt = ireq->opt; rcu_assign_pointer(newinet->inet_opt, inet_opt); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index bb8f2fa1c7fb..d33bec50c711 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -20,6 +20,7 @@ #include <linux/kernel.h> #include <net/ipv6.h> #include <net/tcp.h> +#include <net/l3mdev.h> #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) @@ -193,7 +194,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->pktopts = skb; } - ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_iif = sk->sk_bound_dev_if ? : + l3mdev_master_ifindex_by_index(sock_net(sk), skb->skb_iif); + /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) @@ -224,7 +227,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(&fl6, np->opt, &final); fl6.saddr = ireq->ir_v6_loc_addr; - fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_oif = ireq->ir_iif; fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; -- 1.9.1 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index 2015-11-19 20:32 [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index David Ahern 2015-11-19 20:32 ` [PATCH net-next 2/2] net: Add support for vrf-global TCP servers David Ahern @ 2015-11-22 17:23 ` David Miller 2015-11-22 17:30 ` David Ahern 1 sibling, 1 reply; 10+ messages in thread From: David Miller @ 2015-11-22 17:23 UTC (permalink / raw) To: dsa; +Cc: netdev From: David Ahern <dsa@cumulusnetworks.com> Date: Thu, 19 Nov 2015 12:32:00 -0800 > Add helper to lookup master index given a device index. > > Signed-off-by: David Ahern <dsa@cumulusnetworks.com> I don't like where this is going. sk->sk_bound_dev_if is for device bindings which the user has explicitly asked for. We should never, therefore, automatically set it without the user's consent. I'm not applying these patches, sorry. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index 2015-11-22 17:23 ` [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index David Miller @ 2015-11-22 17:30 ` David Ahern 2015-11-22 18:17 ` David Miller 0 siblings, 1 reply; 10+ messages in thread From: David Ahern @ 2015-11-22 17:30 UTC (permalink / raw) To: David Miller; +Cc: netdev On 11/22/15 10:23 AM, David Miller wrote: > From: David Ahern <dsa@cumulusnetworks.com> > Date: Thu, 19 Nov 2015 12:32:00 -0800 > >> Add helper to lookup master index given a device index. >> >> Signed-off-by: David Ahern <dsa@cumulusnetworks.com> > > I don't like where this is going. > > sk->sk_bound_dev_if is for device bindings which the user has > explicitly asked for. > > We should never, therefore, automatically set it without the user's > consent. In this case the user is running a daemon (bgpd) where a single instance works across all VRFs. The listen socket is not bound to a device, so this does not override what the user ask for. Child sockets are then bound to the VRF device the connection originates over, so it narrows the scope of accepted connections to a single VRF. If you look at the change, e.g.,: ireq->ir_iif = sk->sk_bound_dev_if ? : l3mdev_master_ifindex_by_index(sock_net(sk), skb->skb_iif); It keeps user requested sk_bound_dev_if if it is set. If not, applies the limited scope of a VRF device if the skb originated on a device enslaved to a VRF device. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index 2015-11-22 17:30 ` David Ahern @ 2015-11-22 18:17 ` David Miller 2015-11-23 4:02 ` David Ahern 0 siblings, 1 reply; 10+ messages in thread From: David Miller @ 2015-11-22 18:17 UTC (permalink / raw) To: dsa; +Cc: netdev From: David Ahern <dsa@cumulusnetworks.com> Date: Sun, 22 Nov 2015 10:30:32 -0700 > In this case ... I understand the problem you are trying to solve, but I am saying you can't use sk_bound_dev_if to use it. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index 2015-11-22 18:17 ` David Miller @ 2015-11-23 4:02 ` David Ahern 2015-11-23 4:35 ` David Miller 0 siblings, 1 reply; 10+ messages in thread From: David Ahern @ 2015-11-23 4:02 UTC (permalink / raw) To: David Miller; +Cc: netdev On 11/22/15 11:17 AM, David Miller wrote: > From: David Ahern <dsa@cumulusnetworks.com> > Date: Sun, 22 Nov 2015 10:30:32 -0700 > >> In this case ... > > I understand the problem you are trying to solve, but I am saying > you can't use sk_bound_dev_if to use it. > I am confused by that response given that sk_bound_dev_if is one of the key principals for the VRF implementation. Applications wanting to communicate over interfaces in a VRF have to set sk_bound_dev_if. If sk_bound_dev_if is not set by the kernel when the child socket is created the TCP handshake will not complete. It is not something that can be deferred until after accept. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index 2015-11-23 4:02 ` David Ahern @ 2015-11-23 4:35 ` David Miller 2015-11-23 18:28 ` David Ahern 0 siblings, 1 reply; 10+ messages in thread From: David Miller @ 2015-11-23 4:35 UTC (permalink / raw) To: dsa; +Cc: netdev From: David Ahern <dsa@cumulusnetworks.com> Date: Sun, 22 Nov 2015 21:02:04 -0700 > I am confused by that response given that sk_bound_dev_if is one of > the key principals for the VRF implementation. Applications wanting to > communicate over interfaces in a VRF have to set sk_bound_dev_if. Yes, they have to set it explicitly. You are setting it for them in response to the connection creation, and that's what I object to. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index 2015-11-23 4:35 ` David Miller @ 2015-11-23 18:28 ` David Ahern 0 siblings, 0 replies; 10+ messages in thread From: David Ahern @ 2015-11-23 18:28 UTC (permalink / raw) To: David Miller; +Cc: netdev On 11/22/15 9:35 PM, David Miller wrote: > From: David Ahern <dsa@cumulusnetworks.com> > Date: Sun, 22 Nov 2015 21:02:04 -0700 > >> I am confused by that response given that sk_bound_dev_if is one of >> the key principals for the VRF implementation. Applications wanting to >> communicate over interfaces in a VRF have to set sk_bound_dev_if. > > Yes, they have to set it explicitly. > > You are setting it for them in response to the connection > creation, and that's what I object to. > The intent is to not require having N-listen sockets/threads/tasks to support N-vrfs for scalability reasons. Having a special DEVICE_ANY index adds complexity to socket lookups, so I dropped that idea long ago. Would guarding this behavior by a sysctl be acceptable? ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH net-next v2 0/2] net: Allow accepted sockets to be bound to l3mdev domain @ 2015-12-15 17:18 David Ahern 2015-12-15 17:18 ` [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index David Ahern 0 siblings, 1 reply; 10+ messages in thread From: David Ahern @ 2015-12-15 17:18 UTC (permalink / raw) To: netdev; +Cc: David Ahern Allow accepted sockets to derive their sk_bound_dev_if setting from the l3mdev domain in which the packets originated. This version adds a sysctl to control whether the setting is inherited, making the functionality similar to sk_mark and its sysctl_tcp_fwmark_accept setting. This effectively allow a process to have a "VRF-global" listen socket, with child sockets bound to the VRF device in which the packet originated. A similar behavior can be achieved using sk_mark, but a solution using marks is incomplete as it does not handle duplicate addresses in different L3 domains/VRFs. Allowing sockets to inherit the sk_bound_dev_if from l3mdev domain provides a complete solution. v2 - Wrap the sk_bound_dev_if inheritance behavior in a new sysctl that defaults to disabled so a user has to opt-in David Ahern (2): net: l3mdev: Add master device lookup by index net: Allow accepted sockets to be bound to l3mdev domain Documentation/networking/ip-sysctl.txt | 7 +++++++ include/net/inet_sock.h | 12 ++++++++++++ include/net/l3mdev.h | 23 +++++++++++++++++++++++ include/net/netns/ipv4.h | 1 + net/ipv4/syncookies.c | 4 ++-- net/ipv4/sysctl_net_ipv4.c | 9 +++++++++ net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 1 + net/ipv6/syncookies.c | 4 ++-- 9 files changed, 58 insertions(+), 5 deletions(-) -- 1.9.1 ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index 2015-12-15 17:18 [PATCH net-next v2 0/2] net: Allow accepted sockets to be bound to l3mdev domain David Ahern @ 2015-12-15 17:18 ` David Ahern 0 siblings, 0 replies; 10+ messages in thread From: David Ahern @ 2015-12-15 17:18 UTC (permalink / raw) To: netdev; +Cc: David Ahern Add helper to lookup l3mdev master index given a device index. Signed-off-by: David Ahern <dsa@cumulusnetworks.com> --- include/net/l3mdev.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 774d85b2d5d9..786226f8e77b 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -51,6 +51,24 @@ static inline int l3mdev_master_ifindex(struct net_device *dev) return ifindex; } +static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) +{ + struct net_device *dev; + int rc = 0; + + if (likely(ifindex)) { + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (dev) + rc = l3mdev_master_ifindex_rcu(dev); + + rcu_read_unlock(); + } + + return rc; +} + /* get index of an interface to use for FIB lookups. For devices * enslaved to an L3 master device FIB lookups are based on the * master index @@ -167,6 +185,11 @@ static inline int l3mdev_master_ifindex(struct net_device *dev) return 0; } +static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) +{ + return 0; +} + static inline int l3mdev_fib_oif_rcu(struct net_device *dev) { return dev ? dev->ifindex : 0; -- 1.9.1 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH net-next v3 0/2] net: Allow accepted sockets to be bound to l3mdev domain @ 2015-12-16 21:20 David Ahern 2015-12-16 21:20 ` [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index David Ahern 0 siblings, 1 reply; 10+ messages in thread From: David Ahern @ 2015-12-16 21:20 UTC (permalink / raw) To: netdev; +Cc: David Ahern Allow accepted sockets to derive their sk_bound_dev_if setting from the l3mdev domain in which the packets originated. This version adds a sysctl to control whether the setting is inherited, making the functionality similar to sk_mark and its sysctl_tcp_fwmark_accept setting. This effectively allow a process to have a "VRF-global" listen socket, with child sockets bound to the VRF device in which the packet originated. A similar behavior can be achieved using sk_mark, but a solution using marks is incomplete as it does not handle duplicate addresses in different L3 domains/VRFs. Allowing sockets to inherit the sk_bound_dev_if from l3mdev domain provides a complete solution. David Ahern (2): net: l3mdev: Add master device lookup by index net: Allow accepted sockets to be bound to l3mdev domain Documentation/networking/ip-sysctl.txt | 8 ++++++++ include/net/inet_sock.h | 14 ++++++++++++++ include/net/l3mdev.h | 23 +++++++++++++++++++++++ include/net/netns/ipv4.h | 3 +++ net/ipv4/syncookies.c | 4 ++-- net/ipv4/sysctl_net_ipv4.c | 11 +++++++++++ net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 1 + net/ipv6/syncookies.c | 4 ++-- 9 files changed, 65 insertions(+), 5 deletions(-) -- 1.9.1 ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index 2015-12-16 21:20 [PATCH net-next v3 0/2] net: Allow accepted sockets to be bound to l3mdev domain David Ahern @ 2015-12-16 21:20 ` David Ahern 0 siblings, 0 replies; 10+ messages in thread From: David Ahern @ 2015-12-16 21:20 UTC (permalink / raw) To: netdev; +Cc: David Ahern Add helper to lookup l3mdev master index given a device index. Signed-off-by: David Ahern <dsa@cumulusnetworks.com> --- include/net/l3mdev.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 774d85b2d5d9..786226f8e77b 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -51,6 +51,24 @@ static inline int l3mdev_master_ifindex(struct net_device *dev) return ifindex; } +static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) +{ + struct net_device *dev; + int rc = 0; + + if (likely(ifindex)) { + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (dev) + rc = l3mdev_master_ifindex_rcu(dev); + + rcu_read_unlock(); + } + + return rc; +} + /* get index of an interface to use for FIB lookups. For devices * enslaved to an L3 master device FIB lookups are based on the * master index @@ -167,6 +185,11 @@ static inline int l3mdev_master_ifindex(struct net_device *dev) return 0; } +static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) +{ + return 0; +} + static inline int l3mdev_fib_oif_rcu(struct net_device *dev) { return dev ? dev->ifindex : 0; -- 1.9.1 ^ permalink raw reply related [flat|nested] 10+ messages in thread
end of thread, other threads:[~2015-12-16 21:20 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2015-11-19 20:32 [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index David Ahern 2015-11-19 20:32 ` [PATCH net-next 2/2] net: Add support for vrf-global TCP servers David Ahern 2015-11-22 17:23 ` [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index David Miller 2015-11-22 17:30 ` David Ahern 2015-11-22 18:17 ` David Miller 2015-11-23 4:02 ` David Ahern 2015-11-23 4:35 ` David Miller 2015-11-23 18:28 ` David Ahern -- strict thread matches above, loose matches on Subject: below -- 2015-12-15 17:18 [PATCH net-next v2 0/2] net: Allow accepted sockets to be bound to l3mdev domain David Ahern 2015-12-15 17:18 ` [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index David Ahern 2015-12-16 21:20 [PATCH net-next v3 0/2] net: Allow accepted sockets to be bound to l3mdev domain David Ahern 2015-12-16 21:20 ` [PATCH net-next 1/2] net: l3mdev: Add master device lookup by index David Ahern
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).