Author: Harry Mason Author: Daniel Collins Description: Allow use of the 'socket' iptables match in OUTPUT, for the purpose of capturing and rerouting fragmentation-needed messages generated in response to a tproxy socket trying to send messages larger than permitted by the MTU of the egress interface chosen by rerouting. --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -115,16 +115,16 @@ xt_socket_get_sock_v4(struct net *net, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, - const struct net_device *in) + const int ifindex) { switch (protocol) { case IPPROTO_TCP: return __inet_lookup(net, &tcp_hashinfo, saddr, sport, daddr, dport, - in->ifindex); + ifindex); case IPPROTO_UDP: return udp4_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); + ifindex); } return NULL; } @@ -183,10 +183,35 @@ } #endif - if (!sk) - sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol, + /* For input packets, sk is the destination socket, so if it is already + * defined there is no need to search again. + * + * For output packets, sk will be the source socket, but we are + * interested in the destination socket, so force a lookup. This + * supports locally generated ICMP errors for sockets with non-local + * addresses. + */ + if (!par->in || !sk) { + /* Check for sockets in the network namespace associated with + * the packets device, if it has one (i.e. is an incomming packet), + * else use the outgoing device made by the routing decision. + * + * Stolen from net/ipv4/icmp.c + */ + struct net *net = dev_net(skb->dev ?: skb_dst(skb)->dev); + + /* ifindex is used when looking up socket if any sockets are + * bound to a specific interface, we know the device on the + * input side, but resort to ignoring any such sockets on the + * output side. + */ + int ifindex = par->in ? par->in->ifindex : 0; + + sk = xt_socket_get_sock_v4(net, protocol, saddr, daddr, sport, dport, - par->in); + ifindex); + } + if (sk) { bool wildcard; bool transparent = true; @@ -417,7 +442,8 @@ .family = NFPROTO_IPV4, .match = socket_mt4_v0, .hooks = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_LOCAL_IN), + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT), .me = THIS_MODULE, }, { @@ -428,7 +454,8 @@ .checkentry = socket_mt_v1_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_LOCAL_IN), + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT), .me = THIS_MODULE, }, #ifdef XT_SOCKET_HAVE_IPV6 @@ -452,7 +479,8 @@ .checkentry = socket_mt_v2_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_LOCAL_IN), + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT), .me = THIS_MODULE, }, #ifdef XT_SOCKET_HAVE_IPV6