From: Jamal Hadi <hadi@shell.cyberus.ca>
To: Stephen Hemminger <shemminger@osdl.org>
Cc: "David S. Miller" <davem@redhat.com>, netdev@oss.sgi.com
Subject: Re: [PATCH 2.5.69] Network packet type using RCU
Date: Wed, 14 May 2003 07:23:32 -0400 (EDT) [thread overview]
Message-ID: <20030514072006.D23367@shell.cyberus.ca> (raw)
In-Reply-To: <20030509142538.548cbd71.shemminger@osdl.org>
Apologies i was offline for a bit so dont know the history here.
I see a lot of move to rcu. There are benefits to using RCU in certain
cases (example occasional list edit with a lot more reads). Why does
there seem to be a massive migration it seems to RCU? Something wrong
with the linked lists?
cheers,
jamal
On Fri, 9 May 2003, Stephen Hemminger wrote:
> Convert network packet type list from using brlock to RCU based list.
> Some collataral changes to af_packet were necessary since it needs to
> unregister packet types without sleeping.
>
> Summary:
> * packet type converted from linked list to list_macro
> * writer lock replaced with spin lock, readers use RCU
> * add __dev_remove_pack for callers that can't sleep.
> * af_packet changes to handle and sleeping requirements, and possible
> races that could cause.
>
> Tested on 8-way SMP running 2.5.69 latest.
>
> Dave, please wait till Monday to apply this in case there are any
> comments.
>
> diff -urNp -X dontdiff linux-2.5/include/linux/netdevice.h linux-2.5-nbr/include/linux/netdevice.h
> --- linux-2.5/include/linux/netdevice.h 2003-04-14 13:32:21.000000000 -0700
> +++ linux-2.5-nbr/include/linux/netdevice.h 2003-05-02 15:07:06.000000000 -0700
> @@ -456,7 +456,7 @@ struct packet_type
> int (*func) (struct sk_buff *, struct net_device *,
> struct packet_type *);
> void *data; /* Private to the packet type */
> - struct packet_type *next;
> + struct list_head list;
> };
>
>
> @@ -472,6 +472,7 @@ extern int netdev_boot_setup_check(st
> extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr);
> extern void dev_add_pack(struct packet_type *pt);
> extern void dev_remove_pack(struct packet_type *pt);
> +extern void __dev_remove_pack(struct packet_type *pt);
> extern int dev_get(const char *name);
> extern struct net_device *dev_get_by_flags(unsigned short flags,
> unsigned short mask);
> diff -urNp -X dontdiff linux-2.5/net/core/dev.c linux-2.5-nbr/net/core/dev.c
> --- linux-2.5/net/core/dev.c 2003-05-05 09:41:03.000000000 -0700
> +++ linux-2.5-nbr/net/core/dev.c 2003-05-05 09:44:36.000000000 -0700
> @@ -90,7 +90,6 @@
> #include <linux/etherdevice.h>
> #include <linux/notifier.h>
> #include <linux/skbuff.h>
> -#include <linux/brlock.h>
> #include <net/sock.h>
> #include <linux/rtnetlink.h>
> #include <linux/proc_fs.h>
> @@ -170,8 +169,9 @@ const char *if_port_text[] = {
> * 86DD IPv6
> */
>
> -static struct packet_type *ptype_base[16]; /* 16 way hashed list */
> -static struct packet_type *ptype_all; /* Taps */
> +static spinlock_t ptype_lock = SPIN_LOCK_UNLOCKED;
> +static struct list_head ptype_base[16]; /* 16 way hashed list */
> +static struct list_head ptype_all; /* Taps */
>
> #ifdef OFFLINE_SAMPLE
> static void sample_queue(unsigned long dummy);
> @@ -239,14 +239,17 @@ int netdev_nit;
> * Add a protocol handler to the networking stack. The passed &packet_type
> * is linked into kernel lists and may not be freed until it has been
> * removed from the kernel lists.
> + *
> + * This call does not sleep therefore it can not
> + * guarantee all CPU's that are in middle of receiving packets
> + * will see the new packet type (until the next received packet).
> */
>
> void dev_add_pack(struct packet_type *pt)
> {
> int hash;
>
> - br_write_lock_bh(BR_NETPROTO_LOCK);
> -
> + spin_lock_bh(&ptype_lock);
> #ifdef CONFIG_NET_FASTROUTE
> /* Hack to detect packet socket */
> if (pt->data && (long)(pt->data) != 1) {
> @@ -256,52 +259,76 @@ void dev_add_pack(struct packet_type *pt
> #endif
> if (pt->type == htons(ETH_P_ALL)) {
> netdev_nit++;
> - pt->next = ptype_all;
> - ptype_all = pt;
> + list_add_rcu(&pt->list, &ptype_all);
> } else {
> hash = ntohs(pt->type) & 15;
> - pt->next = ptype_base[hash];
> - ptype_base[hash] = pt;
> + list_add_rcu(&pt->list, &ptype_base[hash]);
> }
> - br_write_unlock_bh(BR_NETPROTO_LOCK);
> + spin_unlock_bh(&ptype_lock);
> }
>
> extern void linkwatch_run_queue(void);
>
> +
> +
> /**
> - * dev_remove_pack - remove packet handler
> + * __dev_remove_pack - remove packet handler
> * @pt: packet type declaration
> *
> * Remove a protocol handler that was previously added to the kernel
> * protocol handlers by dev_add_pack(). The passed &packet_type is removed
> * from the kernel lists and can be freed or reused once this function
> - * returns.
> + * returns.
> + *
> + * The packet type might still be in use by receivers
> + * and must not be freed until after all the CPU's have gone
> + * through a quiescent state.
> */
> -void dev_remove_pack(struct packet_type *pt)
> +void __dev_remove_pack(struct packet_type *pt)
> {
> - struct packet_type **pt1;
> + struct list_head *head;
> + struct packet_type *pt1;
>
> - br_write_lock_bh(BR_NETPROTO_LOCK);
> + spin_lock_bh(&ptype_lock);
>
> if (pt->type == htons(ETH_P_ALL)) {
> netdev_nit--;
> - pt1 = &ptype_all;
> + head = &ptype_all;
> } else
> - pt1 = &ptype_base[ntohs(pt->type) & 15];
> + head = &ptype_base[ntohs(pt->type) & 15];
>
> - for (; *pt1; pt1 = &((*pt1)->next)) {
> - if (pt == *pt1) {
> - *pt1 = pt->next;
> + list_for_each_entry(pt1, head, list) {
> + if (pt == pt1) {
> #ifdef CONFIG_NET_FASTROUTE
> if (pt->data)
> netdev_fastroute_obstacles--;
> #endif
> + list_del_rcu(&pt->list);
> goto out;
> }
> }
> +
> printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
> out:
> - br_write_unlock_bh(BR_NETPROTO_LOCK);
> + spin_unlock_bh(&ptype_lock);
> +}
> +/**
> + * dev_remove_pack - remove packet handler
> + * @pt: packet type declaration
> + *
> + * Remove a protocol handler that was previously added to the kernel
> + * protocol handlers by dev_add_pack(). The passed &packet_type is removed
> + * from the kernel lists and can be freed or reused once this function
> + * returns.
> + *
> + * This call sleeps to guarantee that no CPU is looking at the packet
> + * type after return.
> + */
> +void dev_remove_pack(struct packet_type *pt)
> +{
> + __dev_remove_pack(pt);
> +
> + synchronize_net();
> }
>
> /******************************************************************************
> @@ -943,8 +970,8 @@ void dev_queue_xmit_nit(struct sk_buff *
> struct packet_type *ptype;
> do_gettimeofday(&skb->stamp);
>
> - br_read_lock(BR_NETPROTO_LOCK);
> - for (ptype = ptype_all; ptype; ptype = ptype->next) {
> + rcu_read_lock();
> + list_for_each_entry_rcu(ptype, &ptype_all, list) {
> /* Never send packets back to the socket
> * they originated from - MvS (miquels@drinkel.ow.org)
> */
> @@ -974,7 +1001,7 @@ void dev_queue_xmit_nit(struct sk_buff *
> ptype->func(skb2, skb->dev, ptype);
> }
> }
> - br_read_unlock(BR_NETPROTO_LOCK);
> + rcu_read_unlock();
> }
>
> /* Calculate csum in the case, when packet is misrouted.
> @@ -1488,7 +1515,8 @@ int netif_receive_skb(struct sk_buff *sk
> skb->h.raw = skb->nh.raw = skb->data;
>
> pt_prev = NULL;
> - for (ptype = ptype_all; ptype; ptype = ptype->next) {
> + rcu_read_lock();
> + list_for_each_entry_rcu(ptype, &ptype_all, list) {
> if (!ptype->dev || ptype->dev == skb->dev) {
> if (pt_prev) {
> if (!pt_prev->data) {
> @@ -1511,17 +1539,15 @@ int netif_receive_skb(struct sk_buff *sk
>
> #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
> if (skb->dev->br_port) {
> - int ret;
> -
> ret = handle_bridge(skb, pt_prev);
> if (br_handle_frame_hook(skb) == 0)
> - return ret;
> + goto out;
>
> pt_prev = NULL;
> }
> #endif
>
> - for (ptype = ptype_base[ntohs(type) & 15]; ptype; ptype = ptype->next) {
> + list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
> if (ptype->type == type &&
> (!ptype->dev || ptype->dev == skb->dev)) {
> if (pt_prev) {
> @@ -1552,6 +1578,8 @@ int netif_receive_skb(struct sk_buff *sk
> ret = NET_RX_DROP;
> }
>
> + out:
> + rcu_read_unlock();
> return ret;
> }
>
> @@ -1625,7 +1653,8 @@ static void net_rx_action(struct softirq
> unsigned long start_time = jiffies;
> int budget = netdev_max_backlog;
>
> - br_read_lock(BR_NETPROTO_LOCK);
> +
> + preempt_disable();
> local_irq_disable();
>
> while (!list_empty(&queue->poll_list)) {
> @@ -1654,7 +1683,7 @@ static void net_rx_action(struct softirq
> }
> out:
> local_irq_enable();
> - br_read_unlock(BR_NETPROTO_LOCK);
> + preempt_enable();
> return;
>
> softnet_break:
> @@ -1995,9 +2024,9 @@ int netdev_set_master(struct net_device
> dev_hold(master);
> }
>
> - br_write_lock_bh(BR_NETPROTO_LOCK);
> slave->master = master;
> - br_write_unlock_bh(BR_NETPROTO_LOCK);
> +
> + synchronize_net();
>
> if (old)
> dev_put(old);
> @@ -2661,8 +2690,8 @@ int netdev_finish_unregister(struct net_
> /* Synchronize with packet receive processing. */
> void synchronize_net(void)
> {
> - br_write_lock_bh(BR_NETPROTO_LOCK);
> - br_write_unlock_bh(BR_NETPROTO_LOCK);
> + might_sleep();
> + synchronize_kernel();
> }
>
> /**
> @@ -2846,6 +2875,10 @@ static int __init net_dev_init(void)
>
> subsystem_register(&net_subsys);
>
> + INIT_LIST_HEAD(&ptype_all);
> + for (i = 0; i < 16; i++)
> + INIT_LIST_HEAD(&ptype_base[i]);
> +
> #ifdef CONFIG_NET_DIVERT
> dv_init();
> #endif /* CONFIG_NET_DIVERT */
> diff -urNp -X dontdiff linux-2.5/net/netsyms.c linux-2.5-nbr/net/netsyms.c
> --- linux-2.5/net/netsyms.c 2003-05-05 09:41:03.000000000 -0700
> +++ linux-2.5-nbr/net/netsyms.c 2003-05-05 09:44:36.000000000 -0700
> @@ -577,6 +577,7 @@ EXPORT_SYMBOL(netif_rx);
> EXPORT_SYMBOL(netif_receive_skb);
> EXPORT_SYMBOL(dev_add_pack);
> EXPORT_SYMBOL(dev_remove_pack);
> +EXPORT_SYMBOL(__dev_remove_pack);
> EXPORT_SYMBOL(dev_get);
> EXPORT_SYMBOL(dev_alloc);
> EXPORT_SYMBOL(dev_alloc_name);
> diff -urNp -X dontdiff linux-2.5/net/packet/af_packet.c linux-2.5-nbr/net/packet/af_packet.c
> --- linux-2.5/net/packet/af_packet.c 2003-05-05 09:41:03.000000000 -0700
> +++ linux-2.5-nbr/net/packet/af_packet.c 2003-05-05 11:14:52.000000000 -0700
> @@ -774,6 +774,7 @@ static int packet_release(struct socket
> */
> dev_remove_pack(&po->prot_hook);
> po->running = 0;
> + po->num = 0;
> __sock_put(sk);
> }
>
> @@ -819,9 +820,12 @@ static int packet_do_bind(struct sock *s
>
> spin_lock(&po->bind_lock);
> if (po->running) {
> - dev_remove_pack(&po->prot_hook);
> __sock_put(sk);
> po->running = 0;
> + po->num = 0;
> + spin_unlock(&po->bind_lock);
> + dev_remove_pack(&po->prot_hook);
> + spin_lock(&po->bind_lock);
> }
>
> po->num = protocol;
> @@ -1374,7 +1378,7 @@ static int packet_notifier(struct notifi
> if (dev->ifindex == po->ifindex) {
> spin_lock(&po->bind_lock);
> if (po->running) {
> - dev_remove_pack(&po->prot_hook);
> + __dev_remove_pack(&po->prot_hook);
> __sock_put(sk);
> po->running = 0;
> sk->err = ENETDOWN;
> @@ -1618,9 +1622,14 @@ static int packet_set_ring(struct sock *
>
> /* Detach socket from network */
> spin_lock(&po->bind_lock);
> - if (po->running)
> - dev_remove_pack(&po->prot_hook);
> + if (po->running) {
> + __dev_remove_pack(&po->prot_hook);
> + po->num = 0;
> + po->running = 0;
> + }
> spin_unlock(&po->bind_lock);
> +
> + synchronize_net();
>
> err = -EBUSY;
> if (closing || atomic_read(&po->mapped) == 0) {
>
>
>
next prev parent reply other threads:[~2003-05-14 11:23 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-05-09 21:25 [PATCH 2.5.69] Network packet type using RCU Stephen Hemminger
2003-05-14 11:23 ` Jamal Hadi [this message]
2003-05-14 19:19 ` David S. Miller
2003-05-19 7:22 ` David S. Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20030514072006.D23367@shell.cyberus.ca \
--to=hadi@shell.cyberus.ca \
--cc=davem@redhat.com \
--cc=netdev@oss.sgi.com \
--cc=shemminger@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).