From: Edward Cree <ecree@solarflare.com>
To: <netdev@vger.kernel.org>, David Miller <davem@davemloft.net>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>,
<linux-net-drivers@solarflare.com>
Subject: [RFC PATCH net-next 7/8] net: ipv4: listified version of ip_rcv
Date: Tue, 19 Apr 2016 14:37:01 +0100 [thread overview]
Message-ID: <5716347D.3030808@solarflare.com> (raw)
In-Reply-To: <5716338E.4050003@solarflare.com>
Also involved adding a way to run a netfilter hook over a list of packets.
Rather than attempting to make netfilter know about lists (which would be
horrendous) we just let it call the regular okfn (in this case
ip_rcv_finish()) for any packets it steals, and have it give us back a list
of packets it's synchronously accepted (which normally NF_HOOK would
automatically call okfn() on, but we want to be able to potentially pass
the list to a listified version of okfn().)
There is potential for out-of-order receives if the netfilter hook ends up
synchronously stealing packets, as they will be processed before any accepts
earlier in the list. However, it was already possible for an asynchronous
accept to cause out-of-order receives, so hopefully I haven't broken
anything that wasn't broken already.
Signed-off-by: Edward Cree <ecree@solarflare.com>
---
include/linux/netdevice.h | 3 ++
include/linux/netfilter.h | 27 +++++++++++++++++
include/net/ip.h | 2 ++
net/core/dev.c | 11 +++++--
net/ipv4/af_inet.c | 1 +
net/ipv4/ip_input.c | 75 ++++++++++++++++++++++++++++++++++++++++++-----
6 files changed, 110 insertions(+), 9 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 682d0ad..292f2d5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2143,6 +2143,9 @@ struct packet_type {
struct net_device *,
struct packet_type *,
struct net_device *);
+ void (*list_func) (struct sk_buff_head *,
+ struct packet_type *,
+ struct net_device *);
bool (*id_match)(struct packet_type *ptype,
struct sock *sk);
void *af_packet_priv;
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 9230f9a..e18e91b 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -220,6 +220,24 @@ NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
return ret;
}
+static inline void
+NF_HOOK_LIST_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
+ struct sk_buff_head *list, struct sk_buff_head *sublist,
+ struct net_device *in, struct net_device *out,
+ int (*okfn)(struct net *, struct sock *, struct sk_buff *),
+ int thresh)
+{
+ struct sk_buff *skb;
+
+ __skb_queue_head_init(sublist); /* list of synchronously ACCEPTed skbs */
+ while ((skb = __skb_dequeue(list)) != NULL) {
+ int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn,
+ thresh);
+ if (ret == 1)
+ __skb_queue_tail(sublist, skb);
+ }
+}
+
static inline int
NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
struct sk_buff *skb, struct net_device *in, struct net_device *out,
@@ -242,6 +260,15 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct
return NF_HOOK_THRESH(pf, hook, net, sk, skb, in, out, okfn, INT_MIN);
}
+static inline void
+NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
+ struct sk_buff_head *list, struct sk_buff_head *sublist,
+ struct net_device *in, struct net_device *out,
+ int (*okfn)(struct net *, struct sock *, struct sk_buff *))
+{
+ NF_HOOK_LIST_THRESH(pf, hook, net, sk, list, sublist, in, out, okfn, INT_MIN);
+}
+
/* Call setsockopt() */
int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
unsigned int len);
diff --git a/include/net/ip.h b/include/net/ip.h
index 93725e5..c994c44 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -106,6 +106,8 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
struct ip_options_rcu *opt);
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
struct net_device *orig_dev);
+void ip_list_rcv(struct sk_buff_head *list, struct packet_type *pt,
+ struct net_device *orig_dev);
int ip_local_deliver(struct sk_buff *skb);
int ip_mr_input(struct sk_buff *skb);
int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index db1d16a..da768e2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4230,8 +4230,15 @@ static inline void __netif_receive_skb_list_ptype(struct sk_buff_head *list,
{
struct sk_buff *skb;
- while ((skb = __skb_dequeue(list)) != NULL)
- pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+ if (!pt_prev)
+ return;
+ if (skb_queue_empty(list))
+ return;
+ if (pt_prev->list_func != NULL)
+ pt_prev->list_func(list, pt_prev, orig_dev);
+ else
+ while ((skb = __skb_dequeue(list)) != NULL)
+ pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
static void __netif_receive_skb_list_core(struct sk_buff_head *list, bool pfmemalloc)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 2e6e65f..1424147 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1757,6 +1757,7 @@ fs_initcall(ipv4_offload_init);
static struct packet_type ip_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_IP),
.func = ip_rcv,
+ .list_func = ip_list_rcv,
};
static int __init inet_init(void)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e3d7827..e7d0d85 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -395,10 +395,9 @@ drop:
/*
* Main IP Receive routine.
*/
-int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
{
const struct iphdr *iph;
- struct net *net;
u32 len;
/* When the interface is in promisc. mode, drop all the crap
@@ -408,7 +407,6 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
goto drop;
- net = dev_net(dev);
IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_IN, skb->len);
skb = skb_share_check(skb, GFP_ATOMIC);
@@ -475,9 +473,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
- return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
- net, NULL, skb, dev, NULL,
- ip_rcv_finish);
+ return skb;
csum_error:
IP_INC_STATS_BH(net, IPSTATS_MIB_CSUMERRORS);
@@ -486,5 +482,70 @@ inhdr_error:
drop:
kfree_skb(skb);
out:
- return NET_RX_DROP;
+ return NULL;
+}
+
+/*
+ * IP receive entry point
+ */
+int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
+ struct net_device *orig_dev)
+{
+ struct net *net = dev_net(dev);
+
+ skb = ip_rcv_core(skb, net);
+ if (skb == NULL)
+ return NET_RX_DROP;
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+ net, NULL, skb, dev, NULL,
+ ip_rcv_finish);
+}
+
+static void ip_sublist_rcv(struct sk_buff_head *list, struct net_device *dev,
+ struct net *net)
+{
+ struct sk_buff_head sublist;
+ struct sk_buff *skb;
+
+ NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
+ list, &sublist, dev, NULL, ip_rcv_finish);
+ while ((skb = __skb_dequeue(&sublist)) != NULL)
+ ip_rcv_finish(net, NULL, skb);
+}
+
+/* Receive a list of IP packets */
+void ip_list_rcv(struct sk_buff_head *list, struct packet_type *pt,
+ struct net_device *orig_dev)
+{
+ struct net_device *curr_dev = NULL;
+ struct net *curr_net = NULL;
+ struct sk_buff_head sublist;
+ struct sk_buff *skb;
+
+ __skb_queue_head_init(&sublist);
+
+ while ((skb = __skb_dequeue(list)) != NULL) {
+ struct net_device *dev = skb->dev;
+ struct net *net = dev_net(dev);
+
+ skb = ip_rcv_core(skb, net);
+ if (skb == NULL)
+ continue;
+
+ if (skb_queue_empty(&sublist)) {
+ curr_dev = dev;
+ curr_net = net;
+ } else if (curr_dev != dev || curr_net != net) {
+ /* dispatch old sublist */
+ ip_sublist_rcv(&sublist, dev, net);
+ /* start new sublist */
+ __skb_queue_head_init(&sublist);
+ curr_dev = dev;
+ curr_net = net;
+ }
+ /* add to current sublist */
+ __skb_queue_tail(&sublist, skb);
+ }
+ /* dispatch final sublist */
+ ip_sublist_rcv(&sublist, curr_dev, curr_net);
}
next prev parent reply other threads:[~2016-04-19 13:37 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-04-19 13:33 [RFC PATCH net-next 0/8] Handle multiple received packets at each stage Edward Cree
2016-04-19 13:34 ` [RFC PATCH net-next 1/8] net: core: trivial netif_receive_skb_list() entry point Edward Cree
2016-04-19 13:35 ` [RFC PATCH net-next 2/8] sfc: batch up RX delivery on EF10 Edward Cree
2016-04-19 14:47 ` Eric Dumazet
2016-04-19 16:36 ` Edward Cree
2016-04-19 17:20 ` Eric Dumazet
2016-04-19 17:42 ` Edward Cree
2016-04-19 18:02 ` Eric Dumazet
2016-04-19 13:35 ` [RFC PATCH net-next 3/8] net: core: unwrap skb list receive slightly further Edward Cree
2016-04-19 13:35 ` [RFC PATCH net-next 4/8] net: core: Another step of skb receive list processing Edward Cree
2016-04-19 13:36 ` [RFC PATCH net-next 5/8] net: core: another layer of lists, around PF_MEMALLOC skb handling Edward Cree
2016-04-19 13:36 ` [RFC PATCH net-next 6/8] net: core: propagate SKB lists through packet_type lookup Edward Cree
2016-04-19 13:37 ` Edward Cree [this message]
2016-04-19 14:50 ` [RFC PATCH net-next 7/8] net: ipv4: listified version of ip_rcv Eric Dumazet
2016-04-19 15:46 ` Tom Herbert
2016-04-19 16:54 ` Eric Dumazet
2016-04-19 17:12 ` Edward Cree
2016-04-19 17:54 ` Eric Dumazet
2016-04-19 18:38 ` Tom Herbert
2016-04-19 16:50 ` Edward Cree
2016-04-19 18:06 ` Eric Dumazet
2016-04-21 17:24 ` Edward Cree
2016-04-19 13:37 ` [RFC PATCH net-next 8/8] net: ipv4: listify ip_rcv_finish Edward Cree
2016-04-19 19:11 ` [RFC PATCH net-next 0/8] Handle multiple received packets at each stage Jesper Dangaard Brouer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5716347D.3030808@solarflare.com \
--to=ecree@solarflare.com \
--cc=brouer@redhat.com \
--cc=davem@davemloft.net \
--cc=linux-net-drivers@solarflare.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.