* [PATCH] netfilter: use idr instead of list to speed up packet lookup by id
@ 2010-04-07 3:38 Changli Gao
2010-04-07 11:48 ` Patrick McHardy
0 siblings, 1 reply; 11+ messages in thread
From: Changli Gao @ 2010-04-07 3:38 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netfilter-devel, xiaosuo
use idr instead of list to speed up packet lookup by id.
The current implementations of nfnetlink_queue, ip_queue and ip6_queue
are all use list to save the packets queued. If the verdicts aren't
received in order, the lookup for the corresponding packets isn't
efficient. As the ids is generated and maintained in kernel, we can use
idr to speed up the lookup. The side effect of this patch is fixing the
potential id overlap in nfnetlink_queue.
Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
include/net/netfilter/nf_queue.h | 1
net/ipv4/netfilter/ip_queue.c | 64 +++++++++++++++++++++------------------
net/ipv6/netfilter/ip6_queue.c | 64 +++++++++++++++++++++------------------
net/netfilter/nfnetlink_queue.c | 59 +++++++++++++++++------------------
4 files changed, 99 insertions(+), 89 deletions(-)
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 252fd10..c90db4a 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -3,7 +3,6 @@
/* Each queued (to userspace) skbuff has one of these. */
struct nf_queue_entry {
- struct list_head list;
struct sk_buff *skb;
unsigned int id;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index e278704..24890dc 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -49,16 +49,9 @@ static unsigned int queue_total;
static unsigned int queue_dropped = 0;
static unsigned int queue_user_dropped = 0;
static struct sock *ipqnl __read_mostly;
-static LIST_HEAD(queue_list);
+static struct idr idr;
static DEFINE_MUTEX(ipqnl_mutex);
-static inline void
-__ipq_enqueue_entry(struct nf_queue_entry *entry)
-{
- list_add_tail(&entry->list, &queue_list);
- queue_total++;
-}
-
static inline int
__ipq_set_mode(unsigned char mode, unsigned int range)
{
@@ -99,19 +92,13 @@ __ipq_reset(void)
static struct nf_queue_entry *
ipq_find_dequeue_entry(unsigned long id)
{
- struct nf_queue_entry *entry = NULL, *i;
+ struct nf_queue_entry *entry;
write_lock_bh(&queue_lock);
- list_for_each_entry(i, &queue_list, list) {
- if ((unsigned long)i == id) {
- entry = i;
- break;
- }
- }
-
- if (entry) {
- list_del(&entry->list);
+ entry = idr_find(&idr, id);
+ if (entry != NULL) {
+ idr_remove(&idr, id);
queue_total--;
}
@@ -122,14 +109,16 @@ ipq_find_dequeue_entry(unsigned long id)
static void
__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
{
- struct nf_queue_entry *entry, *next;
+ struct nf_queue_entry *entry;
+ int id = 0;
- list_for_each_entry_safe(entry, next, &queue_list, list) {
+ while ((entry = idr_get_next(&idr, &id)) != NULL) {
if (!cmpfn || cmpfn(entry, data)) {
- list_del(&entry->list);
+ idr_remove(&idr, id);
queue_total--;
nf_reinject(entry, NF_DROP);
}
+ ++id;
}
}
@@ -142,7 +131,7 @@ ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
}
static struct sk_buff *
-ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
+ipq_build_packet_message(struct nf_queue_entry *entry, int *errp, int *pid)
{
sk_buff_data_t old_tail;
size_t size = 0;
@@ -151,8 +140,9 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
struct ipq_packet_msg *pmsg;
struct nlmsghdr *nlh;
struct timeval tv;
+ int id;
- read_lock_bh(&queue_lock);
+ write_lock_bh(&queue_lock);
switch (copy_mode) {
case IPQ_COPY_META:
@@ -164,7 +154,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
(*errp = skb_checksum_help(entry->skb))) {
- read_unlock_bh(&queue_lock);
+ write_unlock_bh(&queue_lock);
return NULL;
}
if (copy_range == 0 || copy_range > entry->skb->len)
@@ -177,11 +167,18 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
default:
*errp = -EINVAL;
- read_unlock_bh(&queue_lock);
+ write_unlock_bh(&queue_lock);
return NULL;
}
- read_unlock_bh(&queue_lock);
+ if (!idr_pre_get(&idr, GFP_ATOMIC) ||
+ idr_get_new(&idr, entry, &id) != 0) {
+ *errp = -ENOMEM;
+ write_unlock_bh(&queue_lock);
+ return NULL;
+ }
+
+ write_unlock_bh(&queue_lock);
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb)
@@ -192,7 +189,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
pmsg = NLMSG_DATA(nlh);
memset(pmsg, 0, sizeof(*pmsg));
- pmsg->packet_id = (unsigned long )entry;
+ pmsg->packet_id = (unsigned long )id;
pmsg->data_len = data_len;
tv = ktime_to_timeval(entry->skb->tstamp);
pmsg->timestamp_sec = tv.tv_sec;
@@ -222,11 +219,15 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
BUG();
nlh->nlmsg_len = skb->tail - old_tail;
+ *pid = id;
return skb;
nlmsg_failure:
*errp = -EINVAL;
printk(KERN_ERR "ip_queue: error creating packet message\n");
+ write_lock_bh(&queue_lock);
+ idr_remove(&idr, id);
+ write_unlock_bh(&queue_lock);
return NULL;
}
@@ -234,12 +235,13 @@ static int
ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
{
int status = -EINVAL;
+ int id;
struct sk_buff *nskb;
if (copy_mode == IPQ_COPY_NONE)
return -EAGAIN;
- nskb = ipq_build_packet_message(entry, &status);
+ nskb = ipq_build_packet_message(entry, &status, &id);
if (nskb == NULL)
return status;
@@ -265,7 +267,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
goto err_out_unlock;
}
- __ipq_enqueue_entry(entry);
+ queue_total++;
write_unlock_bh(&queue_lock);
return status;
@@ -274,6 +276,7 @@ err_out_free_nskb:
kfree_skb(nskb);
err_out_unlock:
+ idr_remove(&idr, id);
write_unlock_bh(&queue_lock);
return status;
}
@@ -575,6 +578,8 @@ static int __init ip_queue_init(void)
int status = -ENOMEM;
struct proc_dir_entry *proc __maybe_unused;
+ idr_init(&idr);
+
netlink_register_notifier(&ipq_nl_notifier);
ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
ipq_rcv_skb, NULL, THIS_MODULE);
@@ -623,6 +628,7 @@ static void __exit ip_queue_fini(void)
nf_unregister_queue_handlers(&nfqh);
ipq_flush(NULL, 0);
+ idr_destroy(&idr);
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(ipq_sysctl_header);
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 6a68a74..645eed2 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -50,16 +50,9 @@ static unsigned int queue_total;
static unsigned int queue_dropped = 0;
static unsigned int queue_user_dropped = 0;
static struct sock *ipqnl __read_mostly;
-static LIST_HEAD(queue_list);
+static struct idr idr;
static DEFINE_MUTEX(ipqnl_mutex);
-static inline void
-__ipq_enqueue_entry(struct nf_queue_entry *entry)
-{
- list_add_tail(&entry->list, &queue_list);
- queue_total++;
-}
-
static inline int
__ipq_set_mode(unsigned char mode, unsigned int range)
{
@@ -100,19 +93,13 @@ __ipq_reset(void)
static struct nf_queue_entry *
ipq_find_dequeue_entry(unsigned long id)
{
- struct nf_queue_entry *entry = NULL, *i;
+ struct nf_queue_entry *entry;
write_lock_bh(&queue_lock);
- list_for_each_entry(i, &queue_list, list) {
- if ((unsigned long)i == id) {
- entry = i;
- break;
- }
- }
-
- if (entry) {
- list_del(&entry->list);
+ entry = idr_find(&idr, id);
+ if (entry != NULL) {
+ idr_remove(&idr, id);
queue_total--;
}
@@ -123,14 +110,16 @@ ipq_find_dequeue_entry(unsigned long id)
static void
__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
{
- struct nf_queue_entry *entry, *next;
+ struct nf_queue_entry *entry;
+ int id = 0;
- list_for_each_entry_safe(entry, next, &queue_list, list) {
+ while ((entry = idr_get_next(&idr, &id)) != NULL) {
if (!cmpfn || cmpfn(entry, data)) {
- list_del(&entry->list);
+ idr_remove(&idr, id);
queue_total--;
nf_reinject(entry, NF_DROP);
}
+ ++id;
}
}
@@ -143,7 +132,7 @@ ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
}
static struct sk_buff *
-ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
+ipq_build_packet_message(struct nf_queue_entry *entry, int *errp, int *pid)
{
sk_buff_data_t old_tail;
size_t size = 0;
@@ -152,8 +141,9 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
struct ipq_packet_msg *pmsg;
struct nlmsghdr *nlh;
struct timeval tv;
+ int id;
- read_lock_bh(&queue_lock);
+ write_lock_bh(&queue_lock);
switch (copy_mode) {
case IPQ_COPY_META:
@@ -165,7 +155,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
(*errp = skb_checksum_help(entry->skb))) {
- read_unlock_bh(&queue_lock);
+ write_unlock_bh(&queue_lock);
return NULL;
}
if (copy_range == 0 || copy_range > entry->skb->len)
@@ -178,11 +168,18 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
default:
*errp = -EINVAL;
- read_unlock_bh(&queue_lock);
+ write_unlock_bh(&queue_lock);
return NULL;
}
- read_unlock_bh(&queue_lock);
+ if (!idr_pre_get(&idr, GFP_ATOMIC) ||
+ idr_get_new(&idr, entry, &id) != 0) {
+ *errp = -ENOMEM;
+ write_unlock_bh(&queue_lock);
+ return NULL;
+ }
+
+ write_unlock_bh(&queue_lock);
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb)
@@ -193,7 +190,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
pmsg = NLMSG_DATA(nlh);
memset(pmsg, 0, sizeof(*pmsg));
- pmsg->packet_id = (unsigned long )entry;
+ pmsg->packet_id = (unsigned long )id;
pmsg->data_len = data_len;
tv = ktime_to_timeval(entry->skb->tstamp);
pmsg->timestamp_sec = tv.tv_sec;
@@ -222,11 +219,15 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
BUG();
nlh->nlmsg_len = skb->tail - old_tail;
+ *pid = id;
return skb;
nlmsg_failure:
*errp = -EINVAL;
printk(KERN_ERR "ip6_queue: error creating packet message\n");
+ write_lock_bh(&queue_lock);
+ idr_remove(&idr, id);
+ write_unlock_bh(&queue_lock);
return NULL;
}
@@ -234,12 +235,13 @@ static int
ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
{
int status = -EINVAL;
+ int id;
struct sk_buff *nskb;
if (copy_mode == IPQ_COPY_NONE)
return -EAGAIN;
- nskb = ipq_build_packet_message(entry, &status);
+ nskb = ipq_build_packet_message(entry, &status, &id);
if (nskb == NULL)
return status;
@@ -265,7 +267,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
goto err_out_unlock;
}
- __ipq_enqueue_entry(entry);
+ queue_total++;
write_unlock_bh(&queue_lock);
return status;
@@ -274,6 +276,7 @@ err_out_free_nskb:
kfree_skb(nskb);
err_out_unlock:
+ idr_remove(&idr, id);
write_unlock_bh(&queue_lock);
return status;
}
@@ -576,6 +579,8 @@ static int __init ip6_queue_init(void)
int status = -ENOMEM;
struct proc_dir_entry *proc __maybe_unused;
+ idr_init(&idr);
+
netlink_register_notifier(&ipq_nl_notifier);
ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0,
ipq_rcv_skb, NULL, THIS_MODULE);
@@ -625,6 +630,7 @@ static void __exit ip6_queue_fini(void)
nf_unregister_queue_handlers(&nfqh);
ipq_flush(NULL, 0);
+ idr_destroy(&idr);
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(ipq_sysctl_header);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index e70a6ef..6a6fd07 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -50,14 +50,12 @@ struct nfqnl_instance {
unsigned int queue_dropped;
unsigned int queue_user_dropped;
- unsigned int id_sequence; /* 'sequence' of pkt ids */
-
u_int16_t queue_num; /* number of this queue */
u_int8_t copy_mode;
spinlock_t lock;
- struct list_head queue_list; /* packets in queue */
+ struct idr idr;
};
typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
@@ -112,7 +110,7 @@ instance_create(u_int16_t queue_num, int pid)
inst->copy_range = 0xfffff;
inst->copy_mode = NFQNL_COPY_NONE;
spin_lock_init(&inst->lock);
- INIT_LIST_HEAD(&inst->queue_list);
+ idr_init(&inst->idr);
if (!try_module_get(THIS_MODULE)) {
err = -EAGAIN;
@@ -143,6 +141,7 @@ instance_destroy_rcu(struct rcu_head *head)
rcu);
nfqnl_flush(inst, NULL, 0);
+ idr_destroy(&inst->idr);
kfree(inst);
module_put(THIS_MODULE);
}
@@ -162,29 +161,16 @@ instance_destroy(struct nfqnl_instance *inst)
spin_unlock(&instances_lock);
}
-static inline void
-__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
-{
- list_add_tail(&entry->list, &queue->queue_list);
- queue->queue_total++;
-}
-
static struct nf_queue_entry *
find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
{
- struct nf_queue_entry *entry = NULL, *i;
+ struct nf_queue_entry *entry;
spin_lock_bh(&queue->lock);
- list_for_each_entry(i, &queue->queue_list, list) {
- if (i->id == id) {
- entry = i;
- break;
- }
- }
-
- if (entry) {
- list_del(&entry->list);
+ entry = idr_find(&queue->idr, id);
+ if (entry != NULL) {
+ idr_remove(&queue->idr, id);
queue->queue_total--;
}
@@ -196,22 +182,24 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
static void
nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
{
- struct nf_queue_entry *entry, *next;
+ struct nf_queue_entry *entry;
+ int id = 0;
spin_lock_bh(&queue->lock);
- list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
+ while ((entry = idr_get_next(&queue->idr, &id)) != NULL) {
if (!cmpfn || cmpfn(entry, data)) {
- list_del(&entry->list);
+ idr_remove(&queue->idr, id);
queue->queue_total--;
nf_reinject(entry, NF_DROP);
}
+ ++id;
}
spin_unlock_bh(&queue->lock);
}
static struct sk_buff *
nfqnl_build_packet_message(struct nfqnl_instance *queue,
- struct nf_queue_entry *entry)
+ struct nf_queue_entry *entry, u32 *pid)
{
sk_buff_data_t old_tail;
size_t size;
@@ -223,6 +211,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
struct sk_buff *entskb = entry->skb;
struct net_device *indev;
struct net_device *outdev;
+ u32 id;
size = NLMSG_SPACE(sizeof(struct nfgenmsg))
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
@@ -262,7 +251,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
break;
}
- entry->id = queue->id_sequence++;
+ if (!idr_pre_get(&queue->idr, GFP_ATOMIC) ||
+ idr_get_new(&queue->idr, entry, &id) != 0) {
+ spin_unlock_bh(&queue->lock);
+ return NULL;
+ }
spin_unlock_bh(&queue->lock);
@@ -279,7 +272,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(queue->queue_num);
- pmsg.packet_id = htonl(entry->id);
+ pmsg.packet_id = htonl(id);
pmsg.hw_protocol = entskb->protocol;
pmsg.hook = entry->hook;
@@ -375,6 +368,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
}
nlh->nlmsg_len = skb->tail - old_tail;
+ *pid = id;
return skb;
nlmsg_failure:
@@ -383,6 +377,9 @@ nla_put_failure:
kfree_skb(skb);
if (net_ratelimit())
printk(KERN_ERR "nf_queue: error creating packet message\n");
+ spin_lock_bh(&queue->lock);
+ idr_remove(&queue->idr, id);
+ spin_unlock_bh(&queue->lock);
return NULL;
}
@@ -392,6 +389,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
struct sk_buff *nskb;
struct nfqnl_instance *queue;
int err;
+ u32 id;
/* rcu_read_lock()ed by nf_hook_slow() */
queue = instance_lookup(queuenum);
@@ -401,7 +399,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
if (queue->copy_mode == NFQNL_COPY_NONE)
goto err_out;
- nskb = nfqnl_build_packet_message(queue, entry);
+ nskb = nfqnl_build_packet_message(queue, entry, &id);
if (nskb == NULL)
goto err_out;
@@ -426,7 +424,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
goto err_out_unlock;
}
- __enqueue_entry(queue, entry);
+ queue->queue_total++;
spin_unlock_bh(&queue->lock);
return 0;
@@ -434,6 +432,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
err_out_free_nskb:
kfree_skb(nskb);
err_out_unlock:
+ idr_remove(&queue->idr, id);
spin_unlock_bh(&queue->lock);
err_out:
return -1;
@@ -867,7 +866,7 @@ static int seq_show(struct seq_file *s, void *v)
inst->peer_pid, inst->queue_total,
inst->copy_mode, inst->copy_range,
inst->queue_dropped, inst->queue_user_dropped,
- inst->id_sequence, 1);
+ 0, 1);
}
static const struct seq_operations nfqnl_seq_ops = {
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH] netfilter: use idr instead of list to speed up packet lookup by id
2010-04-07 3:38 [PATCH] netfilter: use idr instead of list to speed up packet lookup by id Changli Gao
@ 2010-04-07 11:48 ` Patrick McHardy
2010-04-07 12:25 ` Eric Dumazet
2010-04-07 13:30 ` Changli Gao
0 siblings, 2 replies; 11+ messages in thread
From: Patrick McHardy @ 2010-04-07 11:48 UTC (permalink / raw)
To: xiaosuo; +Cc: netfilter-devel
Changli Gao wrote:
> use idr instead of list to speed up packet lookup by id.
>
> The current implementations of nfnetlink_queue, ip_queue and ip6_queue
> are all use list to save the packets queued. If the verdicts aren't
> received in order, the lookup for the corresponding packets isn't
> efficient. As the ids is generated and maintained in kernel, we can use
> idr to speed up the lookup. The side effect of this patch is fixing the
> potential id overlap in nfnetlink_queue.
This doesn't compile:
net/netfilter/nfnetlink_queue.c:57: error: field 'idr' has incomplete type
net/netfilter/nfnetlink_queue.c: In function 'instance_create':
net/netfilter/nfnetlink_queue.c:112: error: implicit declaration of
function 'idr_init'
net/netfilter/nfnetlink_queue.c: In function 'instance_destroy_rcu':
net/netfilter/nfnetlink_queue.c:143: error: implicit declaration of
function 'idr_destroy'
net/netfilter/nfnetlink_queue.c: In function 'find_dequeue_entry':
net/netfilter/nfnetlink_queue.c:170: error: implicit declaration of
function 'idr_find'
net/netfilter/nfnetlink_queue.c:170: warning: assignment makes pointer
from integer without a cast
net/netfilter/nfnetlink_queue.c:172: error: implicit declaration of
function 'idr_remove'
net/netfilter/nfnetlink_queue.c: In function 'nfqnl_flush':
net/netfilter/nfnetlink_queue.c:188: error: implicit declaration of
function 'idr_get_next'
net/netfilter/nfnetlink_queue.c:188: warning: assignment makes pointer
from integer without a cast
net/netfilter/nfnetlink_queue.c: In function 'nfqnl_build_packet_message':
net/netfilter/nfnetlink_queue.c:253: error: implicit declaration of
function 'idr_pre_get'
net/netfilter/nfnetlink_queue.c:254: error: implicit declaration of
function 'idr_get_new'
I'm interested in how this affects performance for the vast majority
of users, which process messages in order. A simple hash table looks
like a better choice here since we know the maximum number of entries
in advance and also could have the user specify the desired hash size.
> @@ -223,6 +211,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
> struct sk_buff *entskb = entry->skb;
> struct net_device *indev;
> struct net_device *outdev;
> + u32 id;
>
> size = NLMSG_SPACE(sizeof(struct nfgenmsg))
> + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
> @@ -262,7 +251,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
> break;
> }
>
> - entry->id = queue->id_sequence++;
> + if (!idr_pre_get(&queue->idr, GFP_ATOMIC) ||
> + idr_get_new(&queue->idr, entry, &id) != 0) {
> + spin_unlock_bh(&queue->lock);
> + return NULL;
> + }
>
> spin_unlock_bh(&queue->lock);
You probably shouldn't be making the entry visible before the message
is successfully built and sent.
>
> @@ -279,7 +272,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
> nfmsg->version = NFNETLINK_V0;
> nfmsg->res_id = htons(queue->queue_num);
>
> - pmsg.packet_id = htonl(entry->id);
> + pmsg.packet_id = htonl(id);
> pmsg.hw_protocol = entskb->protocol;
> pmsg.hook = entry->hook;
>
> @@ -375,6 +368,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
> }
>
> nlh->nlmsg_len = skb->tail - old_tail;
> + *pid = id;
> return skb;
>
> nlmsg_failure:
> @@ -383,6 +377,9 @@ nla_put_failure:
> kfree_skb(skb);
> if (net_ratelimit())
> printk(KERN_ERR "nf_queue: error creating packet message\n");
> + spin_lock_bh(&queue->lock);
> + idr_remove(&queue->idr, id);
> + spin_unlock_bh(&queue->lock);
> return NULL;
> }
>
> @@ -392,6 +389,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
> struct sk_buff *nskb;
> struct nfqnl_instance *queue;
> int err;
> + u32 id;
>
> /* rcu_read_lock()ed by nf_hook_slow() */
> queue = instance_lookup(queuenum);
> @@ -401,7 +399,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
> if (queue->copy_mode == NFQNL_COPY_NONE)
> goto err_out;
>
> - nskb = nfqnl_build_packet_message(queue, entry);
> + nskb = nfqnl_build_packet_message(queue, entry, &id);
> if (nskb == NULL)
> goto err_out;
>
> @@ -426,7 +424,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
> goto err_out_unlock;
> }
>
> - __enqueue_entry(queue, entry);
> + queue->queue_total++;
>
> spin_unlock_bh(&queue->lock);
> return 0;
> @@ -434,6 +432,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
> err_out_free_nskb:
> kfree_skb(nskb);
> err_out_unlock:
> + idr_remove(&queue->idr, id);
> spin_unlock_bh(&queue->lock);
> err_out:
> return -1;
> @@ -867,7 +866,7 @@ static int seq_show(struct seq_file *s, void *v)
> inst->peer_pid, inst->queue_total,
> inst->copy_mode, inst->copy_range,
> inst->queue_dropped, inst->queue_user_dropped,
> - inst->id_sequence, 1);
> + 0, 1);
> }
>
> static const struct seq_operations nfqnl_seq_ops = {
>
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] netfilter: use idr instead of list to speed up packet lookup by id
2010-04-07 11:48 ` Patrick McHardy
@ 2010-04-07 12:25 ` Eric Dumazet
2010-04-07 13:03 ` Patrick McHardy
2010-04-07 13:42 ` Changli Gao
2010-04-07 13:30 ` Changli Gao
1 sibling, 2 replies; 11+ messages in thread
From: Eric Dumazet @ 2010-04-07 12:25 UTC (permalink / raw)
To: Patrick McHardy; +Cc: xiaosuo, netfilter-devel
Le mercredi 07 avril 2010 à 13:48 +0200, Patrick McHardy a écrit :
> Changli Gao wrote:
> > use idr instead of list to speed up packet lookup by id.
> >
> > The current implementations of nfnetlink_queue, ip_queue and ip6_queue
> > are all use list to save the packets queued. If the verdicts aren't
> > received in order, the lookup for the corresponding packets isn't
> > efficient. As the ids is generated and maintained in kernel, we can use
> > idr to speed up the lookup. The side effect of this patch is fixing the
> > potential id overlap in nfnetlink_queue.
>
> I'm interested in how this affects performance for the vast majority
> of users, which process messages in order. A simple hash table looks
> like a better choice here since we know the maximum number of entries
> in advance and also could have the user specify the desired hash size.
Yes, a hash table would be good, but cost 8 bytes per slot.
Changli, did you tried RBL tree ? It might fit better both your needs
and Patrick concerns...
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] netfilter: use idr instead of list to speed up packet lookup by id
2010-04-07 12:25 ` Eric Dumazet
@ 2010-04-07 13:03 ` Patrick McHardy
2010-04-07 13:42 ` Changli Gao
1 sibling, 0 replies; 11+ messages in thread
From: Patrick McHardy @ 2010-04-07 13:03 UTC (permalink / raw)
To: Eric Dumazet; +Cc: xiaosuo, netfilter-devel
Eric Dumazet wrote:
> Le mercredi 07 avril 2010 à 13:48 +0200, Patrick McHardy a écrit :
>> Changli Gao wrote:
>>> use idr instead of list to speed up packet lookup by id.
>>>
>>> The current implementations of nfnetlink_queue, ip_queue and ip6_queue
>>> are all use list to save the packets queued. If the verdicts aren't
>>> received in order, the lookup for the corresponding packets isn't
>>> efficient. As the ids is generated and maintained in kernel, we can use
>>> idr to speed up the lookup. The side effect of this patch is fixing the
>>> potential id overlap in nfnetlink_queue.
>> I'm interested in how this affects performance for the vast majority
>> of users, which process messages in order. A simple hash table looks
>> like a better choice here since we know the maximum number of entries
>> in advance and also could have the user specify the desired hash size.
>
> Yes, a hash table would be good, but cost 8 bytes per slot.
>
> Changli, did you tried RBL tree ? It might fit better both your needs
> and Patrick concerns...
RB-trees OTOH cost 8 bytes extra per element. If we'd use a hash,
I think the size should be configurable by userspace and default
to 1 (simple list as used currently). That way only those people
actually processing packets out of order have to pay the price.
But an RB-tree would be fine too I guess.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] netfilter: use idr instead of list to speed up packet lookup by id
2010-04-07 11:48 ` Patrick McHardy
2010-04-07 12:25 ` Eric Dumazet
@ 2010-04-07 13:30 ` Changli Gao
2010-04-07 13:40 ` Patrick McHardy
1 sibling, 1 reply; 11+ messages in thread
From: Changli Gao @ 2010-04-07 13:30 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netfilter-devel
On Wed, Apr 7, 2010 at 7:48 PM, Patrick McHardy <kaber@trash.net> wrote:
> Changli Gao wrote:
>
> This doesn't compile:
>
> net/netfilter/nfnetlink_queue.c:57: error: field 'idr' has incomplete type
> net/netfilter/nfnetlink_queue.c: In function 'instance_create':
> net/netfilter/nfnetlink_queue.c:112: error: implicit declaration of
> function 'idr_init'
> net/netfilter/nfnetlink_queue.c: In function 'instance_destroy_rcu':
> net/netfilter/nfnetlink_queue.c:143: error: implicit declaration of
> function 'idr_destroy'
> net/netfilter/nfnetlink_queue.c: In function 'find_dequeue_entry':
> net/netfilter/nfnetlink_queue.c:170: error: implicit declaration of
> function 'idr_find'
> net/netfilter/nfnetlink_queue.c:170: warning: assignment makes pointer
> from integer without a cast
> net/netfilter/nfnetlink_queue.c:172: error: implicit declaration of
> function 'idr_remove'
> net/netfilter/nfnetlink_queue.c: In function 'nfqnl_flush':
> net/netfilter/nfnetlink_queue.c:188: error: implicit declaration of
> function 'idr_get_next'
> net/netfilter/nfnetlink_queue.c:188: warning: assignment makes pointer
> from integer without a cast
> net/netfilter/nfnetlink_queue.c: In function 'nfqnl_build_packet_message':
> net/netfilter/nfnetlink_queue.c:253: error: implicit declaration of
> function 'idr_pre_get'
> net/netfilter/nfnetlink_queue.c:254: error: implicit declaration of
> function 'idr_get_new'
I cooked this patch aginst linux-next. And I have noticed net-next
wasn't synced with linux-next yet.
>
> I'm interested in how this affects performance for the vast majority
> of users, which process messages in order. A simple hash table looks
> like a better choice here since we know the maximum number of entries
> in advance and also could have the user specify the desired hash size.
>
If there aren't much packets queued, the packet ids will be around 0,
and idr isn't much slower.
>
> You probably shouldn't be making the entry visible before the message
> is successfully built and sent.
I wanted to keep the changeset small. But it seems wrong.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] netfilter: use idr instead of list to speed up packet lookup by id
2010-04-07 13:30 ` Changli Gao
@ 2010-04-07 13:40 ` Patrick McHardy
2010-04-07 14:07 ` Changli Gao
0 siblings, 1 reply; 11+ messages in thread
From: Patrick McHardy @ 2010-04-07 13:40 UTC (permalink / raw)
To: Changli Gao; +Cc: netfilter-devel
Changli Gao wrote:
> On Wed, Apr 7, 2010 at 7:48 PM, Patrick McHardy <kaber@trash.net> wrote:
>> Changli Gao wrote:
>>
>> I'm interested in how this affects performance for the vast majority
>> of users, which process messages in order. A simple hash table looks
>> like a better choice here since we know the maximum number of entries
>> in advance and also could have the user specify the desired hash size.
>>
>
> If there aren't much packets queued, the packet ids will be around 0,
> and idr isn't much slower.
Processing packets in order doesn't necessarily mean that there aren't
many packets queued. Its just that the current scheme is pretty much
optimal for this case since the verdict will always refer to the first
entry in the list.
>> You probably shouldn't be making the entry visible before the message
>> is successfully built and sent.
>
> I wanted to keep the changeset small. But it seems wrong.
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] netfilter: use idr instead of list to speed up packet lookup by id
2010-04-07 12:25 ` Eric Dumazet
2010-04-07 13:03 ` Patrick McHardy
@ 2010-04-07 13:42 ` Changli Gao
1 sibling, 0 replies; 11+ messages in thread
From: Changli Gao @ 2010-04-07 13:42 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Patrick McHardy, netfilter-devel
On Wed, Apr 7, 2010 at 8:25 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> Yes, a hash table would be good, but cost 8 bytes per slot.
>
> Changli, did you tried RBL tree ? It might fit better both your needs
> and Patrick concerns...
I don't think rbtree is better than idr. rbtree's time complexity is
O(log2(n)), but idr is a kind of radix tree with O(1) time complexity.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] netfilter: use idr instead of list to speed up packet lookup by id
2010-04-07 13:40 ` Patrick McHardy
@ 2010-04-07 14:07 ` Changli Gao
2010-04-07 14:16 ` Patrick McHardy
0 siblings, 1 reply; 11+ messages in thread
From: Changli Gao @ 2010-04-07 14:07 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netfilter-devel
On Wed, Apr 7, 2010 at 9:40 PM, Patrick McHardy <kaber@trash.net> wrote:
> Processing packets in order doesn't necessarily mean that there aren't
> many packets queued. Its just that the current scheme is pretty much
> optimal for this case since the verdict will always refer to the first
> entry in the list.
>
How about keeping the old behavior by default, and replace the single
list with idr when an unorder vedict is received.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] netfilter: use idr instead of list to speed up packet lookup by id
2010-04-07 14:07 ` Changli Gao
@ 2010-04-07 14:16 ` Patrick McHardy
2010-04-07 14:33 ` Changli Gao
0 siblings, 1 reply; 11+ messages in thread
From: Patrick McHardy @ 2010-04-07 14:16 UTC (permalink / raw)
To: Changli Gao; +Cc: netfilter-devel
Changli Gao wrote:
> On Wed, Apr 7, 2010 at 9:40 PM, Patrick McHardy <kaber@trash.net> wrote:
>> Processing packets in order doesn't necessarily mean that there aren't
>> many packets queued. Its just that the current scheme is pretty much
>> optimal for this case since the verdict will always refer to the first
>> entry in the list.
>>
>
> How about keeping the old behavior by default, and replace the single
> list with idr when an unorder vedict is received.
You only know that during runtime, so this will get more complicated
than necessary. Why not simply use a hash table with a size specified
by userspace in the queue creation command? The default behaviour
would be a size of 1, which is equivalent to the currently used single
list.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] netfilter: use idr instead of list to speed up packet lookup by id
2010-04-07 14:16 ` Patrick McHardy
@ 2010-04-07 14:33 ` Changli Gao
2010-04-07 14:41 ` Patrick McHardy
0 siblings, 1 reply; 11+ messages in thread
From: Changli Gao @ 2010-04-07 14:33 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netfilter-devel
On Wed, Apr 7, 2010 at 10:16 PM, Patrick McHardy <kaber@trash.net> wrote:
>
> You only know that during runtime, so this will get more complicated
> than necessary. Why not simply use a hash table with a size specified
> by userspace in the queue creation command? The default behaviour
> would be a size of 1, which is equivalent to the currently used single
> list.
>
We'd better not expose too many internal implementation details to
userspace. If we do so, we can't change its implementation easily
later. And letting user choose the size of hash table is much like the
orginal epoll(2) design, there will be security consern, such as too
much memory usage.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] netfilter: use idr instead of list to speed up packet lookup by id
2010-04-07 14:33 ` Changli Gao
@ 2010-04-07 14:41 ` Patrick McHardy
0 siblings, 0 replies; 11+ messages in thread
From: Patrick McHardy @ 2010-04-07 14:41 UTC (permalink / raw)
To: Changli Gao; +Cc: netfilter-devel
Changli Gao wrote:
> On Wed, Apr 7, 2010 at 10:16 PM, Patrick McHardy <kaber@trash.net> wrote:
>> You only know that during runtime, so this will get more complicated
>> than necessary. Why not simply use a hash table with a size specified
>> by userspace in the queue creation command? The default behaviour
>> would be a size of 1, which is equivalent to the currently used single
>> list.
>>
>
> We'd better not expose too many internal implementation details to
> userspace. If we do so, we can't change its implementation easily
> later. And letting user choose the size of hash table is much like the
> orginal epoll(2) design, there will be security consern, such as too
> much memory usage.
Userspace queueing is limited to root, so there's no concern about
memory usage. Regarding implementation details: alternatively add
a flag to specify out of order handling and size the hash table
based on the maximum number of queue entries.
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2010-04-07 14:42 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-07 3:38 [PATCH] netfilter: use idr instead of list to speed up packet lookup by id Changli Gao
2010-04-07 11:48 ` Patrick McHardy
2010-04-07 12:25 ` Eric Dumazet
2010-04-07 13:03 ` Patrick McHardy
2010-04-07 13:42 ` Changli Gao
2010-04-07 13:30 ` Changli Gao
2010-04-07 13:40 ` Patrick McHardy
2010-04-07 14:07 ` Changli Gao
2010-04-07 14:16 ` Patrick McHardy
2010-04-07 14:33 ` Changli Gao
2010-04-07 14:41 ` Patrick McHardy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).