netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jamal Hadi <hadi@shell.cyberus.ca>
To: Stephen Hemminger <shemminger@osdl.org>
Cc: "David S. Miller" <davem@redhat.com>, netdev@oss.sgi.com
Subject: Re: [PATCH 2.5.69] Network packet type using RCU
Date: Wed, 14 May 2003 07:23:32 -0400 (EDT)	[thread overview]
Message-ID: <20030514072006.D23367@shell.cyberus.ca> (raw)
In-Reply-To: <20030509142538.548cbd71.shemminger@osdl.org>


Apologies i was offline for a bit so dont know the history here.
I see a lot of move to rcu. There are benefits to using RCU in certain
cases (example occasional list edit with a lot more reads). Why does
there  seem to be a massive migration it seems to RCU? Something wrong
with the linked lists?

cheers,
jamal

On Fri, 9 May 2003, Stephen Hemminger wrote:

> Convert network packet type list from using brlock to RCU based list.
> Some collataral changes to af_packet were necessary since it needs to
> unregister packet types without sleeping.
>
> Summary:
> * packet type converted from linked list to list_macro
> * writer lock replaced with spin lock, readers use RCU
> * add __dev_remove_pack for callers that can't sleep.
> * af_packet changes to handle and sleeping requirements, and possible
>   races that could cause.
>
> Tested on 8-way SMP running 2.5.69 latest.
>
> Dave, please wait till Monday to apply this in case there are any
> comments.
>
> diff -urNp -X dontdiff linux-2.5/include/linux/netdevice.h linux-2.5-nbr/include/linux/netdevice.h
> --- linux-2.5/include/linux/netdevice.h	2003-04-14 13:32:21.000000000 -0700
> +++ linux-2.5-nbr/include/linux/netdevice.h	2003-05-02 15:07:06.000000000 -0700
> @@ -456,7 +456,7 @@ struct packet_type
>  	int			(*func) (struct sk_buff *, struct net_device *,
>  					 struct packet_type *);
>  	void			*data;	/* Private to the packet type		*/
> -	struct packet_type	*next;
> +	struct list_head	list;
>  };
>
>
> @@ -472,6 +472,7 @@ extern int 			netdev_boot_setup_check(st
>  extern struct net_device    *dev_getbyhwaddr(unsigned short type, char *hwaddr);
>  extern void		dev_add_pack(struct packet_type *pt);
>  extern void		dev_remove_pack(struct packet_type *pt);
> +extern void		__dev_remove_pack(struct packet_type *pt);
>  extern int		dev_get(const char *name);
>  extern struct net_device	*dev_get_by_flags(unsigned short flags,
>  						  unsigned short mask);
> diff -urNp -X dontdiff linux-2.5/net/core/dev.c linux-2.5-nbr/net/core/dev.c
> --- linux-2.5/net/core/dev.c	2003-05-05 09:41:03.000000000 -0700
> +++ linux-2.5-nbr/net/core/dev.c	2003-05-05 09:44:36.000000000 -0700
> @@ -90,7 +90,6 @@
>  #include <linux/etherdevice.h>
>  #include <linux/notifier.h>
>  #include <linux/skbuff.h>
> -#include <linux/brlock.h>
>  #include <net/sock.h>
>  #include <linux/rtnetlink.h>
>  #include <linux/proc_fs.h>
> @@ -170,8 +169,9 @@ const char *if_port_text[] = {
>   *		86DD	IPv6
>   */
>
> -static struct packet_type *ptype_base[16];	/* 16 way hashed list */
> -static struct packet_type *ptype_all;		/* Taps */
> +static spinlock_t ptype_lock = SPIN_LOCK_UNLOCKED;
> +static struct list_head ptype_base[16];	/* 16 way hashed list */
> +static struct list_head ptype_all;		/* Taps */
>
>  #ifdef OFFLINE_SAMPLE
>  static void sample_queue(unsigned long dummy);
> @@ -239,14 +239,17 @@ int netdev_nit;
>   *	Add a protocol handler to the networking stack. The passed &packet_type
>   *	is linked into kernel lists and may not be freed until it has been
>   *	removed from the kernel lists.
> + *
> + *	This call does not sleep therefore it can not
> + *	guarantee all CPU's that are in middle of receiving packets
> + *	will see the new packet type (until the next received packet).
>   */
>
>  void dev_add_pack(struct packet_type *pt)
>  {
>  	int hash;
>
> -	br_write_lock_bh(BR_NETPROTO_LOCK);
> -
> +	spin_lock_bh(&ptype_lock);
>  #ifdef CONFIG_NET_FASTROUTE
>  	/* Hack to detect packet socket */
>  	if (pt->data && (long)(pt->data) != 1) {
> @@ -256,52 +259,76 @@ void dev_add_pack(struct packet_type *pt
>  #endif
>  	if (pt->type == htons(ETH_P_ALL)) {
>  		netdev_nit++;
> -		pt->next  = ptype_all;
> -		ptype_all = pt;
> +		list_add_rcu(&pt->list, &ptype_all);
>  	} else {
>  		hash = ntohs(pt->type) & 15;
> -		pt->next = ptype_base[hash];
> -		ptype_base[hash] = pt;
> +		list_add_rcu(&pt->list, &ptype_base[hash]);
>  	}
> -	br_write_unlock_bh(BR_NETPROTO_LOCK);
> +	spin_unlock_bh(&ptype_lock);
>  }
>
>  extern void linkwatch_run_queue(void);
>
> +
> +
>  /**
> - *	dev_remove_pack	 - remove packet handler
> + *	__dev_remove_pack	 - remove packet handler
>   *	@pt: packet type declaration
>   *
>   *	Remove a protocol handler that was previously added to the kernel
>   *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
>   *	from the kernel lists and can be freed or reused once this function
> - *	returns.
> + *	returns.
> + *
> + *      The packet type might still be in use by receivers
> + *	and must not be freed until after all the CPU's have gone
> + *	through a quiescent state.
>   */
> -void dev_remove_pack(struct packet_type *pt)
> +void __dev_remove_pack(struct packet_type *pt)
>  {
> -	struct packet_type **pt1;
> +	struct list_head *head;
> +	struct packet_type *pt1;
>
> -	br_write_lock_bh(BR_NETPROTO_LOCK);
> +	spin_lock_bh(&ptype_lock);
>
>  	if (pt->type == htons(ETH_P_ALL)) {
>  		netdev_nit--;
> -		pt1 = &ptype_all;
> +		head = &ptype_all;
>  	} else
> -		pt1 = &ptype_base[ntohs(pt->type) & 15];
> +		head = &ptype_base[ntohs(pt->type) & 15];
>
> -	for (; *pt1; pt1 = &((*pt1)->next)) {
> -		if (pt == *pt1) {
> -			*pt1 = pt->next;
> +	list_for_each_entry(pt1, head, list) {
> +		if (pt == pt1) {
>  #ifdef CONFIG_NET_FASTROUTE
>  			if (pt->data)
>  				netdev_fastroute_obstacles--;
>  #endif
> +			list_del_rcu(&pt->list);
>  			goto out;
>  		}
>  	}
> +
>  	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
>  out:
> -	br_write_unlock_bh(BR_NETPROTO_LOCK);
> +	spin_unlock_bh(&ptype_lock);
> +}
> +/**
> + *	dev_remove_pack	 - remove packet handler
> + *	@pt: packet type declaration
> + *
> + *	Remove a protocol handler that was previously added to the kernel
> + *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
> + *	from the kernel lists and can be freed or reused once this function
> + *	returns.
> + *
> + *	This call sleeps to guarantee that no CPU is looking at the packet
> + *	type after return.
> + */
> +void dev_remove_pack(struct packet_type *pt)
> +{
> +	__dev_remove_pack(pt);
> +
> +	synchronize_net();
>  }
>
>  /******************************************************************************
> @@ -943,8 +970,8 @@ void dev_queue_xmit_nit(struct sk_buff *
>  	struct packet_type *ptype;
>  	do_gettimeofday(&skb->stamp);
>
> -	br_read_lock(BR_NETPROTO_LOCK);
> -	for (ptype = ptype_all; ptype; ptype = ptype->next) {
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(ptype, &ptype_all, list) {
>  		/* Never send packets back to the socket
>  		 * they originated from - MvS (miquels@drinkel.ow.org)
>  		 */
> @@ -974,7 +1001,7 @@ void dev_queue_xmit_nit(struct sk_buff *
>  			ptype->func(skb2, skb->dev, ptype);
>  		}
>  	}
> -	br_read_unlock(BR_NETPROTO_LOCK);
> +	rcu_read_unlock();
>  }
>
>  /* Calculate csum in the case, when packet is misrouted.
> @@ -1488,7 +1515,8 @@ int netif_receive_skb(struct sk_buff *sk
>  	skb->h.raw = skb->nh.raw = skb->data;
>
>  	pt_prev = NULL;
> -	for (ptype = ptype_all; ptype; ptype = ptype->next) {
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(ptype, &ptype_all, list) {
>  		if (!ptype->dev || ptype->dev == skb->dev) {
>  			if (pt_prev) {
>  				if (!pt_prev->data) {
> @@ -1511,17 +1539,15 @@ int netif_receive_skb(struct sk_buff *sk
>
>  #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
>  	if (skb->dev->br_port) {
> -		int ret;
> -
>  		ret = handle_bridge(skb, pt_prev);
>  		if (br_handle_frame_hook(skb) == 0)
> -			return ret;
> +			goto out;
>
>  		pt_prev = NULL;
>  	}
>  #endif
>
> -	for (ptype = ptype_base[ntohs(type) & 15]; ptype; ptype = ptype->next) {
> +	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
>  		if (ptype->type == type &&
>  		    (!ptype->dev || ptype->dev == skb->dev)) {
>  			if (pt_prev) {
> @@ -1552,6 +1578,8 @@ int netif_receive_skb(struct sk_buff *sk
>  		ret = NET_RX_DROP;
>  	}
>
> + out:
> +	rcu_read_unlock();
>  	return ret;
>  }
>
> @@ -1625,7 +1653,8 @@ static void net_rx_action(struct softirq
>  	unsigned long start_time = jiffies;
>  	int budget = netdev_max_backlog;
>
> -	br_read_lock(BR_NETPROTO_LOCK);
> +
> +	preempt_disable();
>  	local_irq_disable();
>
>  	while (!list_empty(&queue->poll_list)) {
> @@ -1654,7 +1683,7 @@ static void net_rx_action(struct softirq
>  	}
>  out:
>  	local_irq_enable();
> -	br_read_unlock(BR_NETPROTO_LOCK);
> +	preempt_enable();
>  	return;
>
>  softnet_break:
> @@ -1995,9 +2024,9 @@ int netdev_set_master(struct net_device
>  		dev_hold(master);
>  	}
>
> -	br_write_lock_bh(BR_NETPROTO_LOCK);
>  	slave->master = master;
> -	br_write_unlock_bh(BR_NETPROTO_LOCK);
> +
> +	synchronize_net();
>
>  	if (old)
>  		dev_put(old);
> @@ -2661,8 +2690,8 @@ int netdev_finish_unregister(struct net_
>  /* Synchronize with packet receive processing. */
>  void synchronize_net(void)
>  {
> -	br_write_lock_bh(BR_NETPROTO_LOCK);
> -	br_write_unlock_bh(BR_NETPROTO_LOCK);
> +	might_sleep();
> +	synchronize_kernel();
>  }
>
>  /**
> @@ -2846,6 +2875,10 @@ static int __init net_dev_init(void)
>
>  	subsystem_register(&net_subsys);
>
> +	INIT_LIST_HEAD(&ptype_all);
> +	for (i = 0; i < 16; i++)
> +		INIT_LIST_HEAD(&ptype_base[i]);
> +
>  #ifdef CONFIG_NET_DIVERT
>  	dv_init();
>  #endif /* CONFIG_NET_DIVERT */
> diff -urNp -X dontdiff linux-2.5/net/netsyms.c linux-2.5-nbr/net/netsyms.c
> --- linux-2.5/net/netsyms.c	2003-05-05 09:41:03.000000000 -0700
> +++ linux-2.5-nbr/net/netsyms.c	2003-05-05 09:44:36.000000000 -0700
> @@ -577,6 +577,7 @@ EXPORT_SYMBOL(netif_rx);
>  EXPORT_SYMBOL(netif_receive_skb);
>  EXPORT_SYMBOL(dev_add_pack);
>  EXPORT_SYMBOL(dev_remove_pack);
> +EXPORT_SYMBOL(__dev_remove_pack);
>  EXPORT_SYMBOL(dev_get);
>  EXPORT_SYMBOL(dev_alloc);
>  EXPORT_SYMBOL(dev_alloc_name);
> diff -urNp -X dontdiff linux-2.5/net/packet/af_packet.c linux-2.5-nbr/net/packet/af_packet.c
> --- linux-2.5/net/packet/af_packet.c	2003-05-05 09:41:03.000000000 -0700
> +++ linux-2.5-nbr/net/packet/af_packet.c	2003-05-05 11:14:52.000000000 -0700
> @@ -774,6 +774,7 @@ static int packet_release(struct socket
>  		 */
>  		dev_remove_pack(&po->prot_hook);
>  		po->running = 0;
> +		po->num = 0;
>  		__sock_put(sk);
>  	}
>
> @@ -819,9 +820,12 @@ static int packet_do_bind(struct sock *s
>
>  	spin_lock(&po->bind_lock);
>  	if (po->running) {
> -		dev_remove_pack(&po->prot_hook);
>  		__sock_put(sk);
>  		po->running = 0;
> +		po->num = 0;
> +		spin_unlock(&po->bind_lock);
> +		dev_remove_pack(&po->prot_hook);
> +		spin_lock(&po->bind_lock);
>  	}
>
>  	po->num = protocol;
> @@ -1374,7 +1378,7 @@ static int packet_notifier(struct notifi
>  			if (dev->ifindex == po->ifindex) {
>  				spin_lock(&po->bind_lock);
>  				if (po->running) {
> -					dev_remove_pack(&po->prot_hook);
> +					__dev_remove_pack(&po->prot_hook);
>  					__sock_put(sk);
>  					po->running = 0;
>  					sk->err = ENETDOWN;
> @@ -1618,9 +1622,14 @@ static int packet_set_ring(struct sock *
>
>  	/* Detach socket from network */
>  	spin_lock(&po->bind_lock);
> -	if (po->running)
> -		dev_remove_pack(&po->prot_hook);
> +	if (po->running) {
> +		__dev_remove_pack(&po->prot_hook);
> +		po->num = 0;
> +		po->running = 0;
> +	}
>  	spin_unlock(&po->bind_lock);
> +
> +	synchronize_net();
>
>  	err = -EBUSY;
>  	if (closing || atomic_read(&po->mapped) == 0) {
>
>
>

  reply	other threads:[~2003-05-14 11:23 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-05-09 21:25 [PATCH 2.5.69] Network packet type using RCU Stephen Hemminger
2003-05-14 11:23 ` Jamal Hadi [this message]
2003-05-14 19:19   ` David S. Miller
2003-05-19  7:22   ` David S. Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20030514072006.D23367@shell.cyberus.ca \
    --to=hadi@shell.cyberus.ca \
    --cc=davem@redhat.com \
    --cc=netdev@oss.sgi.com \
    --cc=shemminger@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).