netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/3] net: add a sysctl to reflect the fwmark on replies
@ 2014-05-10  3:08 Lorenzo Colitti
  2014-05-10  3:08 ` [PATCH v2 2/3] net: Use fwmark reflection in PMTU discovery Lorenzo Colitti
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Lorenzo Colitti @ 2014-05-10  3:08 UTC (permalink / raw)
  To: netdev; +Cc: jpa, davem, ja, hannes, eric.dumazet, Lorenzo Colitti

Kernel-originated IP packets that have no user socket associated
with them (e.g., ICMP errors and echo replies, TCP RSTs, etc.)
are emitted with a mark of zero. Add a sysctl to make them have
the same mark as the packet they are replying to.

This allows an administrator that wishes to do so to use
mark-based routing, firewalling, etc. for these replies by
marking the original packets inbound.

Tested using user-mode linux:
 - ICMP/ICMPv6 echo replies and errors.
 - TCP RST packets (IPv4 and IPv6).

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 Documentation/networking/ip-sysctl.txt | 14 ++++++++++++++
 include/net/ip.h                       |  3 +++
 include/net/ipv6.h                     |  3 +++
 include/net/netns/ipv4.h               |  2 ++
 include/net/netns/ipv6.h               |  1 +
 net/ipv4/icmp.c                        | 11 +++++++++--
 net/ipv4/ip_output.c                   |  3 ++-
 net/ipv4/sysctl_net_ipv4.c             |  7 +++++++
 net/ipv6/icmp.c                        |  6 ++++++
 net/ipv6/sysctl_net_ipv6.c             |  7 +++++++
 net/ipv6/tcp_ipv6.c                    |  1 +
 11 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ab42c95..83e51a5 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -56,6 +56,13 @@ ip_forward_use_pmtu - BOOLEAN
 	0 - disabled
 	1 - enabled
 
+fwmark_reflect - BOOLEAN
+	Controls the fwmark of kernel-generated IPv4 reply packets that are not
+	associated with a socket for example, TCP RSTs or ICMP echo replies).
+	If unset, these packets have a fwmark of zero. If set, they have the
+	fwmark of the packet they are replying to.
+	Default: 0
+
 route/max_size - INTEGER
 	Maximum number of routes allowed in the kernel.  Increase
 	this when using large numbers of interfaces and/or routes.
@@ -1181,6 +1188,13 @@ conf/all/forwarding - BOOLEAN
 proxy_ndp - BOOLEAN
 	Do proxy ndp.
 
+fwmark_reflect - BOOLEAN
+	Controls the fwmark of kernel-generated IPv6 reply packets that are not
+	associated with a socket for example, TCP RSTs or ICMPv6 echo replies).
+	If unset, these packets have a fwmark of zero. If set, they have the
+	fwmark of the packet they are replying to.
+	Default: 0
+
 conf/interface/*:
 	Change special settings per interface.
 
diff --git a/include/net/ip.h b/include/net/ip.h
index 16146b6..a4b52c4 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -231,6 +231,9 @@ void ipfrag_init(void);
 
 void ip_static_sysctl_init(void);
 
+#define IP4_REPLY_MARK(net, mark) \
+	((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
+
 static inline bool ip_is_fragment(const struct iphdr *iph)
 {
 	return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 5b40ad2..ba810d0 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -113,6 +113,9 @@ struct frag_hdr {
 #define	IP6_MF		0x0001
 #define	IP6_OFFSET	0xFFF8
 
+#define IP6_REPLY_MARK(net, mark) \
+	((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0)
+
 #include <net/sock.h>
 
 /* sysctls */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 80f500a..8e1a9c0 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -72,6 +72,8 @@ struct netns_ipv4 {
 	int sysctl_ip_no_pmtu_disc;
 	int sysctl_ip_fwd_use_pmtu;
 
+	int sysctl_fwmark_reflect;
+
 	kgid_t sysctl_ping_group_range[2];
 
 	atomic_t dev_addr_genid;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 21edaf1..19d3446 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 {
 	int flowlabel_consistency;
 	int icmpv6_time;
 	int anycast_src_echo_reply;
+	int fwmark_reflect;
 };
 
 struct netns_ipv6 {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fe52666..79c3d94 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	struct sock *sk;
 	struct inet_sock *inet;
 	__be32 daddr, saddr;
+	u32 mark = IP4_REPLY_MARK(net, skb->mark);
 
 	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
 		return;
@@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	icmp_param->data.icmph.checksum = 0;
 
 	inet->tos = ip_hdr(skb)->tos;
+	sk->sk_mark = mark;
 	daddr = ipc.addr = ip_hdr(skb)->saddr;
 	saddr = fib_compute_spec_dst(skb);
 	ipc.opt = NULL;
@@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	memset(&fl4, 0, sizeof(fl4));
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
+	fl4.flowi4_mark = mark;
 	fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
 	fl4.flowi4_proto = IPPROTO_ICMP;
 	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
@@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
 					struct flowi4 *fl4,
 					struct sk_buff *skb_in,
 					const struct iphdr *iph,
-					__be32 saddr, u8 tos,
+					__be32 saddr, u8 tos, u32 mark,
 					int type, int code,
 					struct icmp_bxm *param)
 {
@@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
 	fl4->daddr = (param->replyopts.opt.opt.srr ?
 		      param->replyopts.opt.opt.faddr : iph->saddr);
 	fl4->saddr = saddr;
+	fl4->flowi4_mark = mark;
 	fl4->flowi4_tos = RT_TOS(tos);
 	fl4->flowi4_proto = IPPROTO_ICMP;
 	fl4->fl4_icmp_type = type;
@@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	struct flowi4 fl4;
 	__be32 saddr;
 	u8  tos;
+	u32 mark;
 	struct net *net;
 	struct sock *sk;
 
@@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
 					   IPTOS_PREC_INTERNETCONTROL) :
 					  iph->tos;
+	mark = IP4_REPLY_MARK(net, skb_in->mark);
 
 	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in))
 		goto out_unlock;
@@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	icmp_param->skb	  = skb_in;
 	icmp_param->offset = skb_network_offset(skb_in);
 	inet_sk(sk)->tos = tos;
+	sk->sk_mark = mark;
 	ipc.addr = iph->saddr;
 	ipc.opt = &icmp_param->replyopts.opt;
 	ipc.tx_flags = 0;
 	ipc.ttl = 0;
 	ipc.tos = -1;
 
-	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
+	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
 			       type, code, icmp_param);
 	if (IS_ERR(rt))
 		goto out_unlock;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1cbeba5..dd1a0d6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1501,7 +1501,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 			daddr = replyopts.opt.opt.faddr;
 	}
 
-	flowi4_init_output(&fl4, arg->bound_dev_if, 0,
+	flowi4_init_output(&fl4, arg->bound_dev_if,
+			   IP4_REPLY_MARK(net, skb->mark),
 			   RT_TOS(arg->tos),
 			   RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
 			   ip_reply_arg_flowi_flags(arg),
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 44eba05..e40a738 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -838,6 +838,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "fwmark_reflect",
+		.data		= &init_net.ipv4.sysctl_fwmark_reflect,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 8d39527..f6c84a6 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	int len;
 	int hlimit;
 	int err = 0;
+	u32 mark = IP6_REPLY_MARK(net, skb->mark);
 
 	if ((u8 *)hdr < skb->head ||
 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
@@ -466,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	fl6.daddr = hdr->saddr;
 	if (saddr)
 		fl6.saddr = *saddr;
+	fl6.flowi6_mark = mark;
 	fl6.flowi6_oif = iif;
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
@@ -474,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	sk = icmpv6_xmit_lock(net);
 	if (sk == NULL)
 		return;
+	sk->sk_mark = mark;
 	np = inet6_sk(sk);
 
 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
@@ -551,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	int err = 0;
 	int hlimit;
 	u8 tclass;
+	u32 mark = IP6_REPLY_MARK(net, skb->mark);
 
 	saddr = &ipv6_hdr(skb)->daddr;
 
@@ -569,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 		fl6.saddr = *saddr;
 	fl6.flowi6_oif = skb->dev->ifindex;
 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
+	fl6.flowi6_mark = mark;
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
 	if (sk == NULL)
 		return;
+	sk->sk_mark = mark;
 	np = inet6_sk(sk);
 
 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7f405a1..058f3ec 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "fwmark_reflect",
+		.data		= &init_net.ipv6.sysctl.fwmark_reflect,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{ }
 };
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7fa6743..994572c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -802,6 +802,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 		fl6.flowi6_oif = inet6_iif(skb);
 	else
 		fl6.flowi6_oif = oif;
+	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
 	fl6.fl6_dport = t1->dest;
 	fl6.fl6_sport = t1->source;
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
-- 
1.9.1.423.g4596e3a

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 2/3] net: Use fwmark reflection in PMTU discovery.
  2014-05-10  3:08 [PATCH v2 1/3] net: add a sysctl to reflect the fwmark on replies Lorenzo Colitti
@ 2014-05-10  3:08 ` Lorenzo Colitti
  2014-05-10  3:08 ` [PATCH v2 3/3] net: support marking accepting TCP sockets Lorenzo Colitti
  2014-05-13 16:52 ` [PATCH v2 1/3] net: add a sysctl to reflect the fwmark on replies David Miller
  2 siblings, 0 replies; 5+ messages in thread
From: Lorenzo Colitti @ 2014-05-10  3:08 UTC (permalink / raw)
  To: netdev; +Cc: jpa, davem, ja, hannes, eric.dumazet, Lorenzo Colitti

Currently, routing lookups used for Path PMTU Discovery in
absence of a socket or on unmarked sockets use a mark of 0.
This causes PMTUD not to work when using routing based on
netfilter fwmark mangling and fwmark ip rules, such as:

  iptables -j MARK --set-mark 17
  ip rule add fwmark 17 lookup 100

This patch causes these route lookups to use the fwmark from the
received ICMP error when the fwmark_reflect sysctl is enabled.
This allows the administrator to make PMTUD work by configuring
appropriate fwmark rules to mark the inbound ICMP packets.

Black-box tested using user-mode linux by pointing different
fwmarks at routing tables egressing on different interfaces, and
using iptables mangling to mark packets inbound on each interface
with the interface's fwmark. ICMPv4 and ICMPv6 PMTU discovery
work as expected when mark reflection is enabled and fail when
it is disabled.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 Documentation/networking/ip-sysctl.txt | 8 ++++++--
 net/ipv4/route.c                       | 7 +++++++
 net/ipv6/route.c                       | 2 +-
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 83e51a5..03c4bde 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -60,7 +60,9 @@ fwmark_reflect - BOOLEAN
 	Controls the fwmark of kernel-generated IPv4 reply packets that are not
 	associated with a socket for example, TCP RSTs or ICMP echo replies).
 	If unset, these packets have a fwmark of zero. If set, they have the
-	fwmark of the packet they are replying to.
+	fwmark of the packet they are replying to. Similarly affects the fwmark
+	used by internal routing lookups triggered by incoming packets, such as
+	the ones used for Path MTU Discovery.
 	Default: 0
 
 route/max_size - INTEGER
@@ -1192,7 +1194,9 @@ fwmark_reflect - BOOLEAN
 	Controls the fwmark of kernel-generated IPv6 reply packets that are not
 	associated with a socket for example, TCP RSTs or ICMPv6 echo replies).
 	If unset, these packets have a fwmark of zero. If set, they have the
-	fwmark of the packet they are replying to.
+	fwmark of the packet they are replying to. Similarly affects the fwmark
+	used by internal routing lookups triggered by incoming packets, such as
+	the ones used for Path MTU Discovery.
 	Default: 0
 
 conf/interface/*:
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index db1e0da..50e1e0f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -993,6 +993,9 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
 	struct flowi4 fl4;
 	struct rtable *rt;
 
+	if (!mark)
+		mark = IP4_REPLY_MARK(net, skb->mark);
+
 	__build_flow_key(&fl4, NULL, iph, oif,
 			 RT_TOS(iph->tos), protocol, mark, flow_flags);
 	rt = __ip_route_output_key(net, &fl4);
@@ -1010,6 +1013,10 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
 	struct rtable *rt;
 
 	__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+
+	if (!fl4.flowi4_mark)
+		fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
+
 	rt = __ip_route_output_key(sock_net(sk), &fl4);
 	if (!IS_ERR(rt)) {
 		__ip_rt_update_pmtu(rt, &fl4, mtu);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4011617..63fbddb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1176,7 +1176,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_oif = oif;
-	fl6.flowi6_mark = mark;
+	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
 	fl6.daddr = iph->daddr;
 	fl6.saddr = iph->saddr;
 	fl6.flowlabel = ip6_flowinfo(iph);
-- 
1.9.1.423.g4596e3a

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 3/3] net: support marking accepting TCP sockets
  2014-05-10  3:08 [PATCH v2 1/3] net: add a sysctl to reflect the fwmark on replies Lorenzo Colitti
  2014-05-10  3:08 ` [PATCH v2 2/3] net: Use fwmark reflection in PMTU discovery Lorenzo Colitti
@ 2014-05-10  3:08 ` Lorenzo Colitti
  2014-05-13 16:52 ` [PATCH v2 1/3] net: add a sysctl to reflect the fwmark on replies David Miller
  2 siblings, 0 replies; 5+ messages in thread
From: Lorenzo Colitti @ 2014-05-10  3:08 UTC (permalink / raw)
  To: netdev; +Cc: jpa, davem, ja, hannes, eric.dumazet, Lorenzo Colitti

When using mark-based routing, sockets returned from accept()
may need to be marked differently depending on the incoming
connection request.

This is the case, for example, if different socket marks identify
different networks: a listening socket may want to accept
connections from all networks, but each connection should be
marked with the network that the request came in on, so that
subsequent packets are sent on the correct network.

This patch adds a sysctl to mark TCP sockets based on the fwmark
of the incoming SYN packet. If enabled, and an unmarked socket
receives a SYN, then the SYN packet's fwmark is written to the
connection's inet_request_sock, and later written back to the
accepted socket when the connection is established.  If the
socket already has a nonzero mark, then the behaviour is the same
as it is today, i.e., the listening socket's fwmark is used.

Black-box tested using user-mode linux:

- IPv4/IPv6 SYN+ACK, FIN, etc. packets are routed based on the
  mark of the incoming SYN packet.
- The socket returned by accept() is marked with the mark of the
  incoming SYN packet.
- Tested with syncookies=1 and syncookies=2.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 Documentation/networking/ip-sysctl.txt | 10 ++++++++++
 include/net/inet_sock.h                |  9 +++++++++
 include/net/netns/ipv4.h               |  1 +
 net/ipv4/inet_connection_sock.c        |  6 ++++--
 net/ipv4/syncookies.c                  |  3 ++-
 net/ipv4/sysctl_net_ipv4.c             |  7 +++++++
 net/ipv4/tcp_ipv4.c                    |  1 +
 net/ipv6/inet6_connection_sock.c       |  2 +-
 net/ipv6/syncookies.c                  |  4 +++-
 net/ipv6/tcp_ipv6.c                    |  1 +
 10 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 03c4bde..6a6267a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -514,6 +514,16 @@ tcp_fastopen - INTEGER
 
 	See include/net/tcp.h and the code for more details.
 
+tcp_fwmark_accept - BOOLEAN
+	If set, incoming connections to listening sockets that do not have a
+	socket mark will set the mark of the accepting socket to the fwmark of
+	the incoming SYN packet. This will cause all packets on that connection
+	(starting from the first SYNACK) to be sent with that fwmark. The
+	listening socket's mark is unchanged. Listening sockets that already
+	have a fwmark set via setsockopt(SOL_SOCKET, SO_MARK, ...) are
+	unaffected.
+	Default: 0
+
 tcp_syn_retries - INTEGER
 	Number of times initial SYNs for an active TCP connection attempt
 	will be retransmitted. Should not be higher than 255. Default value
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 1833c3f..adb0fbe 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -88,6 +88,7 @@ struct inet_request_sock {
 				acked	   : 1,
 				no_srccheck: 1;
 	kmemcheck_bitfield_end(flags);
+	u32                     ir_mark;
 	struct ip_options_rcu	*opt;
 	struct sk_buff		*pktopts;
 };
@@ -97,6 +98,14 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
 	return (struct inet_request_sock *)sk;
 }
 
+static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb)
+{
+	if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
+		return skb->mark;
+
+	return sk->sk_mark;
+}
+
 struct inet_cork {
 	unsigned int		flags;
 	__be32			addr;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 8e1a9c0..c701843 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -73,6 +73,7 @@ struct netns_ipv4 {
 	int sysctl_ip_fwd_use_pmtu;
 
 	int sysctl_fwmark_reflect;
+	int sysctl_tcp_fwmark_accept;
 
 	kgid_t sysctl_ping_group_range[2];
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0d1e2cb..5ae71ec 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -408,7 +408,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 	struct net *net = sock_net(sk);
 	int flags = inet_sk_flowi_flags(sk);
 
-	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+	flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol,
 			   flags,
@@ -445,7 +445,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
 
 	rcu_read_lock();
 	opt = rcu_dereference(newinet->inet_opt);
-	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+	flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
@@ -680,6 +680,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 		inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
 		newsk->sk_write_space = sk_stream_write_space;
 
+		newsk->sk_mark = inet_rsk(req)->ir_mark;
+
 		newicsk->icsk_retransmits = 0;
 		newicsk->icsk_backoff	  = 0;
 		newicsk->icsk_probes_out  = 0;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f2ed13c..c86624b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -303,6 +303,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->ir_rmt_port	= th->source;
 	ireq->ir_loc_addr	= ip_hdr(skb)->daddr;
 	ireq->ir_rmt_addr	= ip_hdr(skb)->saddr;
+	ireq->ir_mark		= inet_request_mark(sk, skb);
 	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
@@ -339,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	 * hasn't changed since we received the original syn, but I see
 	 * no easy way to do this.
 	 */
-	flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
+	flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
 			   inet_sk_flowi_flags(sk),
 			   (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e40a738..6480281 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -845,6 +845,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "tcp_fwmark_accept",
+		.data		= &init_net.ipv4.sysctl_tcp_fwmark_accept,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ad166dc..6c1b6f6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1507,6 +1507,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq->ir_rmt_addr = saddr;
 	ireq->no_srccheck = inet_sk(sk)->transparent;
 	ireq->opt = tcp_v4_save_options(skb);
+	ireq->ir_mark = inet_request_mark(sk, skb);
 
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index d4ade34..a245e5d 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -81,7 +81,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
 	final_p = fl6_update_dst(fl6, np->opt, &final);
 	fl6->saddr = ireq->ir_v6_loc_addr;
 	fl6->flowi6_oif = ireq->ir_iif;
-	fl6->flowi6_mark = sk->sk_mark;
+	fl6->flowi6_mark = ireq->ir_mark;
 	fl6->fl6_dport = ireq->ir_rmt_port;
 	fl6->fl6_sport = htons(ireq->ir_num);
 	security_req_classify_flow(req, flowi6_to_flowi(fl6));
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb53a5e7..a822b88 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -216,6 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
 		ireq->ir_iif = inet6_iif(skb);
 
+	ireq->ir_mark = inet_request_mark(sk, skb);
+
 	req->expires = 0UL;
 	req->num_retrans = 0;
 	ireq->ecn_ok		= ecn_ok;
@@ -242,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		final_p = fl6_update_dst(&fl6, np->opt, &final);
 		fl6.saddr = ireq->ir_v6_loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
-		fl6.flowi6_mark = sk->sk_mark;
+		fl6.flowi6_mark = ireq->ir_mark;
 		fl6.fl6_dport = ireq->ir_rmt_port;
 		fl6.fl6_sport = inet_sk(sk)->inet_sport;
 		security_req_classify_flow(req, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 994572c..37c8334 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1017,6 +1017,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		TCP_ECN_create_request(req, skb, sock_net(sk));
 
 	ireq->ir_iif = sk->sk_bound_dev_if;
+	ireq->ir_mark = inet_request_mark(sk, skb);
 
 	/* So that link locals have meaning */
 	if (!sk->sk_bound_dev_if &&
-- 
1.9.1.423.g4596e3a

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/3] net: add a sysctl to reflect the fwmark on replies
  2014-05-10  3:08 [PATCH v2 1/3] net: add a sysctl to reflect the fwmark on replies Lorenzo Colitti
  2014-05-10  3:08 ` [PATCH v2 2/3] net: Use fwmark reflection in PMTU discovery Lorenzo Colitti
  2014-05-10  3:08 ` [PATCH v2 3/3] net: support marking accepting TCP sockets Lorenzo Colitti
@ 2014-05-13 16:52 ` David Miller
  2014-05-13 17:44   ` Lorenzo Colitti
  2 siblings, 1 reply; 5+ messages in thread
From: David Miller @ 2014-05-13 16:52 UTC (permalink / raw)
  To: lorenzo; +Cc: netdev, jpa, ja, hannes, eric.dumazet


These patches do not apply cleanly to net-next.

Also, you did not provide a top level initial "[PATCH 0/N] " posting
giving a high level overview of what these changes do, and why we
should make these changes.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/3] net: add a sysctl to reflect the fwmark on replies
  2014-05-13 16:52 ` [PATCH v2 1/3] net: add a sysctl to reflect the fwmark on replies David Miller
@ 2014-05-13 17:44   ` Lorenzo Colitti
  0 siblings, 0 replies; 5+ messages in thread
From: Lorenzo Colitti @ 2014-05-13 17:44 UTC (permalink / raw)
  To: David Miller
  Cc: netdev@vger.kernel.org, JP Abgrall, Julian Anastasov,
	Hannes Frederic Sowa, Eric Dumazet

On Wed, May 14, 2014 at 1:52 AM, David Miller <davem@davemloft.net> wrote:
> These patches do not apply cleanly to net-next.
>
> Also, you did not provide a top level initial "[PATCH 0/N] " posting
> giving a high level overview of what these changes do, and why we
> should make these changes.

Ack. I just sent out a new patchset which should apply cleanly and has
a cover letter (the first submission had one, but patch v2 didn't).

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2014-05-13 17:44 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-05-10  3:08 [PATCH v2 1/3] net: add a sysctl to reflect the fwmark on replies Lorenzo Colitti
2014-05-10  3:08 ` [PATCH v2 2/3] net: Use fwmark reflection in PMTU discovery Lorenzo Colitti
2014-05-10  3:08 ` [PATCH v2 3/3] net: support marking accepting TCP sockets Lorenzo Colitti
2014-05-13 16:52 ` [PATCH v2 1/3] net: add a sysctl to reflect the fwmark on replies David Miller
2014-05-13 17:44   ` Lorenzo Colitti

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).