From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jamal Hadi Subject: Re: [PATCH 2.5.69] Network packet type using RCU Date: Wed, 14 May 2003 07:23:32 -0400 (EDT) Sender: netdev-bounce@oss.sgi.com Message-ID: <20030514072006.D23367@shell.cyberus.ca> References: <20030509142538.548cbd71.shemminger@osdl.org> Mime-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Cc: "David S. Miller" , netdev@oss.sgi.com Return-path: To: Stephen Hemminger In-Reply-To: <20030509142538.548cbd71.shemminger@osdl.org> Errors-to: netdev-bounce@oss.sgi.com List-Id: netdev.vger.kernel.org Apologies i was offline for a bit so dont know the history here. I see a lot of move to rcu. There are benefits to using RCU in certain cases (example occasional list edit with a lot more reads). Why does there seem to be a massive migration it seems to RCU? Something wrong with the linked lists? cheers, jamal On Fri, 9 May 2003, Stephen Hemminger wrote: > Convert network packet type list from using brlock to RCU based list. > Some collataral changes to af_packet were necessary since it needs to > unregister packet types without sleeping. > > Summary: > * packet type converted from linked list to list_macro > * writer lock replaced with spin lock, readers use RCU > * add __dev_remove_pack for callers that can't sleep. > * af_packet changes to handle and sleeping requirements, and possible > races that could cause. > > Tested on 8-way SMP running 2.5.69 latest. > > Dave, please wait till Monday to apply this in case there are any > comments. > > diff -urNp -X dontdiff linux-2.5/include/linux/netdevice.h linux-2.5-nbr/include/linux/netdevice.h > --- linux-2.5/include/linux/netdevice.h 2003-04-14 13:32:21.000000000 -0700 > +++ linux-2.5-nbr/include/linux/netdevice.h 2003-05-02 15:07:06.000000000 -0700 > @@ -456,7 +456,7 @@ struct packet_type > int (*func) (struct sk_buff *, struct net_device *, > struct packet_type *); > void *data; /* Private to the packet type */ > - struct packet_type *next; > + struct list_head list; > }; > > > @@ -472,6 +472,7 @@ extern int netdev_boot_setup_check(st > extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr); > extern void dev_add_pack(struct packet_type *pt); > extern void dev_remove_pack(struct packet_type *pt); > +extern void __dev_remove_pack(struct packet_type *pt); > extern int dev_get(const char *name); > extern struct net_device *dev_get_by_flags(unsigned short flags, > unsigned short mask); > diff -urNp -X dontdiff linux-2.5/net/core/dev.c linux-2.5-nbr/net/core/dev.c > --- linux-2.5/net/core/dev.c 2003-05-05 09:41:03.000000000 -0700 > +++ linux-2.5-nbr/net/core/dev.c 2003-05-05 09:44:36.000000000 -0700 > @@ -90,7 +90,6 @@ > #include > #include > #include > -#include > #include > #include > #include > @@ -170,8 +169,9 @@ const char *if_port_text[] = { > * 86DD IPv6 > */ > > -static struct packet_type *ptype_base[16]; /* 16 way hashed list */ > -static struct packet_type *ptype_all; /* Taps */ > +static spinlock_t ptype_lock = SPIN_LOCK_UNLOCKED; > +static struct list_head ptype_base[16]; /* 16 way hashed list */ > +static struct list_head ptype_all; /* Taps */ > > #ifdef OFFLINE_SAMPLE > static void sample_queue(unsigned long dummy); > @@ -239,14 +239,17 @@ int netdev_nit; > * Add a protocol handler to the networking stack. The passed &packet_type > * is linked into kernel lists and may not be freed until it has been > * removed from the kernel lists. > + * > + * This call does not sleep therefore it can not > + * guarantee all CPU's that are in middle of receiving packets > + * will see the new packet type (until the next received packet). > */ > > void dev_add_pack(struct packet_type *pt) > { > int hash; > > - br_write_lock_bh(BR_NETPROTO_LOCK); > - > + spin_lock_bh(&ptype_lock); > #ifdef CONFIG_NET_FASTROUTE > /* Hack to detect packet socket */ > if (pt->data && (long)(pt->data) != 1) { > @@ -256,52 +259,76 @@ void dev_add_pack(struct packet_type *pt > #endif > if (pt->type == htons(ETH_P_ALL)) { > netdev_nit++; > - pt->next = ptype_all; > - ptype_all = pt; > + list_add_rcu(&pt->list, &ptype_all); > } else { > hash = ntohs(pt->type) & 15; > - pt->next = ptype_base[hash]; > - ptype_base[hash] = pt; > + list_add_rcu(&pt->list, &ptype_base[hash]); > } > - br_write_unlock_bh(BR_NETPROTO_LOCK); > + spin_unlock_bh(&ptype_lock); > } > > extern void linkwatch_run_queue(void); > > + > + > /** > - * dev_remove_pack - remove packet handler > + * __dev_remove_pack - remove packet handler > * @pt: packet type declaration > * > * Remove a protocol handler that was previously added to the kernel > * protocol handlers by dev_add_pack(). The passed &packet_type is removed > * from the kernel lists and can be freed or reused once this function > - * returns. > + * returns. > + * > + * The packet type might still be in use by receivers > + * and must not be freed until after all the CPU's have gone > + * through a quiescent state. > */ > -void dev_remove_pack(struct packet_type *pt) > +void __dev_remove_pack(struct packet_type *pt) > { > - struct packet_type **pt1; > + struct list_head *head; > + struct packet_type *pt1; > > - br_write_lock_bh(BR_NETPROTO_LOCK); > + spin_lock_bh(&ptype_lock); > > if (pt->type == htons(ETH_P_ALL)) { > netdev_nit--; > - pt1 = &ptype_all; > + head = &ptype_all; > } else > - pt1 = &ptype_base[ntohs(pt->type) & 15]; > + head = &ptype_base[ntohs(pt->type) & 15]; > > - for (; *pt1; pt1 = &((*pt1)->next)) { > - if (pt == *pt1) { > - *pt1 = pt->next; > + list_for_each_entry(pt1, head, list) { > + if (pt == pt1) { > #ifdef CONFIG_NET_FASTROUTE > if (pt->data) > netdev_fastroute_obstacles--; > #endif > + list_del_rcu(&pt->list); > goto out; > } > } > + > printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); > out: > - br_write_unlock_bh(BR_NETPROTO_LOCK); > + spin_unlock_bh(&ptype_lock); > +} > +/** > + * dev_remove_pack - remove packet handler > + * @pt: packet type declaration > + * > + * Remove a protocol handler that was previously added to the kernel > + * protocol handlers by dev_add_pack(). The passed &packet_type is removed > + * from the kernel lists and can be freed or reused once this function > + * returns. > + * > + * This call sleeps to guarantee that no CPU is looking at the packet > + * type after return. > + */ > +void dev_remove_pack(struct packet_type *pt) > +{ > + __dev_remove_pack(pt); > + > + synchronize_net(); > } > > /****************************************************************************** > @@ -943,8 +970,8 @@ void dev_queue_xmit_nit(struct sk_buff * > struct packet_type *ptype; > do_gettimeofday(&skb->stamp); > > - br_read_lock(BR_NETPROTO_LOCK); > - for (ptype = ptype_all; ptype; ptype = ptype->next) { > + rcu_read_lock(); > + list_for_each_entry_rcu(ptype, &ptype_all, list) { > /* Never send packets back to the socket > * they originated from - MvS (miquels@drinkel.ow.org) > */ > @@ -974,7 +1001,7 @@ void dev_queue_xmit_nit(struct sk_buff * > ptype->func(skb2, skb->dev, ptype); > } > } > - br_read_unlock(BR_NETPROTO_LOCK); > + rcu_read_unlock(); > } > > /* Calculate csum in the case, when packet is misrouted. > @@ -1488,7 +1515,8 @@ int netif_receive_skb(struct sk_buff *sk > skb->h.raw = skb->nh.raw = skb->data; > > pt_prev = NULL; > - for (ptype = ptype_all; ptype; ptype = ptype->next) { > + rcu_read_lock(); > + list_for_each_entry_rcu(ptype, &ptype_all, list) { > if (!ptype->dev || ptype->dev == skb->dev) { > if (pt_prev) { > if (!pt_prev->data) { > @@ -1511,17 +1539,15 @@ int netif_receive_skb(struct sk_buff *sk > > #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) > if (skb->dev->br_port) { > - int ret; > - > ret = handle_bridge(skb, pt_prev); > if (br_handle_frame_hook(skb) == 0) > - return ret; > + goto out; > > pt_prev = NULL; > } > #endif > > - for (ptype = ptype_base[ntohs(type) & 15]; ptype; ptype = ptype->next) { > + list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { > if (ptype->type == type && > (!ptype->dev || ptype->dev == skb->dev)) { > if (pt_prev) { > @@ -1552,6 +1578,8 @@ int netif_receive_skb(struct sk_buff *sk > ret = NET_RX_DROP; > } > > + out: > + rcu_read_unlock(); > return ret; > } > > @@ -1625,7 +1653,8 @@ static void net_rx_action(struct softirq > unsigned long start_time = jiffies; > int budget = netdev_max_backlog; > > - br_read_lock(BR_NETPROTO_LOCK); > + > + preempt_disable(); > local_irq_disable(); > > while (!list_empty(&queue->poll_list)) { > @@ -1654,7 +1683,7 @@ static void net_rx_action(struct softirq > } > out: > local_irq_enable(); > - br_read_unlock(BR_NETPROTO_LOCK); > + preempt_enable(); > return; > > softnet_break: > @@ -1995,9 +2024,9 @@ int netdev_set_master(struct net_device > dev_hold(master); > } > > - br_write_lock_bh(BR_NETPROTO_LOCK); > slave->master = master; > - br_write_unlock_bh(BR_NETPROTO_LOCK); > + > + synchronize_net(); > > if (old) > dev_put(old); > @@ -2661,8 +2690,8 @@ int netdev_finish_unregister(struct net_ > /* Synchronize with packet receive processing. */ > void synchronize_net(void) > { > - br_write_lock_bh(BR_NETPROTO_LOCK); > - br_write_unlock_bh(BR_NETPROTO_LOCK); > + might_sleep(); > + synchronize_kernel(); > } > > /** > @@ -2846,6 +2875,10 @@ static int __init net_dev_init(void) > > subsystem_register(&net_subsys); > > + INIT_LIST_HEAD(&ptype_all); > + for (i = 0; i < 16; i++) > + INIT_LIST_HEAD(&ptype_base[i]); > + > #ifdef CONFIG_NET_DIVERT > dv_init(); > #endif /* CONFIG_NET_DIVERT */ > diff -urNp -X dontdiff linux-2.5/net/netsyms.c linux-2.5-nbr/net/netsyms.c > --- linux-2.5/net/netsyms.c 2003-05-05 09:41:03.000000000 -0700 > +++ linux-2.5-nbr/net/netsyms.c 2003-05-05 09:44:36.000000000 -0700 > @@ -577,6 +577,7 @@ EXPORT_SYMBOL(netif_rx); > EXPORT_SYMBOL(netif_receive_skb); > EXPORT_SYMBOL(dev_add_pack); > EXPORT_SYMBOL(dev_remove_pack); > +EXPORT_SYMBOL(__dev_remove_pack); > EXPORT_SYMBOL(dev_get); > EXPORT_SYMBOL(dev_alloc); > EXPORT_SYMBOL(dev_alloc_name); > diff -urNp -X dontdiff linux-2.5/net/packet/af_packet.c linux-2.5-nbr/net/packet/af_packet.c > --- linux-2.5/net/packet/af_packet.c 2003-05-05 09:41:03.000000000 -0700 > +++ linux-2.5-nbr/net/packet/af_packet.c 2003-05-05 11:14:52.000000000 -0700 > @@ -774,6 +774,7 @@ static int packet_release(struct socket > */ > dev_remove_pack(&po->prot_hook); > po->running = 0; > + po->num = 0; > __sock_put(sk); > } > > @@ -819,9 +820,12 @@ static int packet_do_bind(struct sock *s > > spin_lock(&po->bind_lock); > if (po->running) { > - dev_remove_pack(&po->prot_hook); > __sock_put(sk); > po->running = 0; > + po->num = 0; > + spin_unlock(&po->bind_lock); > + dev_remove_pack(&po->prot_hook); > + spin_lock(&po->bind_lock); > } > > po->num = protocol; > @@ -1374,7 +1378,7 @@ static int packet_notifier(struct notifi > if (dev->ifindex == po->ifindex) { > spin_lock(&po->bind_lock); > if (po->running) { > - dev_remove_pack(&po->prot_hook); > + __dev_remove_pack(&po->prot_hook); > __sock_put(sk); > po->running = 0; > sk->err = ENETDOWN; > @@ -1618,9 +1622,14 @@ static int packet_set_ring(struct sock * > > /* Detach socket from network */ > spin_lock(&po->bind_lock); > - if (po->running) > - dev_remove_pack(&po->prot_hook); > + if (po->running) { > + __dev_remove_pack(&po->prot_hook); > + po->num = 0; > + po->running = 0; > + } > spin_unlock(&po->bind_lock); > + > + synchronize_net(); > > err = -EBUSY; > if (closing || atomic_read(&po->mapped) == 0) { > > >