All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
To: Neil Horman <nhorman@tuxdriver.com>
Cc: Eric Dumazet <dada1@cosmosbay.com>,
	netdev@vger.kernel.org, davem@davemloft.net
Subject: Re: [PATCH] dropmon: add ability to detect when hardwaredropsrxpackets
Date: Thu, 14 May 2009 09:18:53 -0700	[thread overview]
Message-ID: <20090514161853.GI6744@linux.vnet.ibm.com> (raw)
In-Reply-To: <20090514123300.GA7166@hmsreliant.think-freely.org>

On Thu, May 14, 2009 at 08:33:00AM -0400, Neil Horman wrote:
> > 
> > The purpose of list_add_rcu() and list_del_rcu() is to be able to permit
> > concurrent readers to traverse the list while you are modifying that same
> > list.  You still need something to handle concurrent updates to the list.
> > The usual approach is to use locks, in this case, to hold trace_state_lock
> > across the addition, given that it is already held across the removal.
> > 
> > The reason that I expect holding the lock to be OK is that you cannot
> > add elements faster than you delete them long-term, so given that you
> > are willing to hold the lock over deletions, you are probably OK also
> > holding that same lock over additions.  But please let me know if that
> > is not the case!
> > 
> > 							Thanx, Paul
> > 
> 
> 
> Ahh, I did learn something then :).  That makes sense, ok. New patch attached.
> 
> Patch to add the ability to detect drops in hardware interfaces via dropwatch.
> Adds a tracepoint to net_rx_action to signal everytime a napi instance is
> polled.  The dropmon code then periodically checks to see if the rx_frames
> counter has changed, and if so, adds a drop notification to the netlink
> protocol, using the reserved all-0's vector to indicate the drop location was in
> hardware, rather than somewhere in the code.
>    
> change notes:
> 1) added use of trace_state_lock around list_add_rcu operation

Looks good from an RCU viewpoint!

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> 
> 
>  include/linux/net_dropmon.h |    7 ++
>  include/trace/napi.h        |   11 ++++
>  net/core/dev.c              |    5 +
>  net/core/drop_monitor.c     |  115 ++++++++++++++++++++++++++++++++++++++++++--
>  net/core/net-traces.c       |    4 +
>  net/core/netpoll.c          |    2 
>  6 files changed, 139 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h
> index 0217fb8..9addc62 100644
> --- a/include/linux/net_dropmon.h
> +++ b/include/linux/net_dropmon.h
> @@ -8,6 +8,13 @@ struct net_dm_drop_point {
>  	__u32 count;
>  };
> 
> +#define is_drop_point_hw(x) do {\
> +	int ____i, ____j;\
> +	for (____i = 0; ____i < 8; i ____i++)\
> +		____j |= x[____i];\
> +	____j;\
> +} while (0)
> +
>  #define NET_DM_CFG_VERSION  0
>  #define NET_DM_CFG_ALERT_COUNT  1
>  #define NET_DM_CFG_ALERT_DELAY 2
> diff --git a/include/trace/napi.h b/include/trace/napi.h
> new file mode 100644
> index 0000000..7c39eb7
> --- /dev/null
> +++ b/include/trace/napi.h
> @@ -0,0 +1,11 @@
> +#ifndef _TRACE_NAPI_H_
> +#define _TRACE_NAPI_H_
> +
> +#include <linux/skbuff.h>
> +#include <linux/tracepoint.h>
> +
> +DECLARE_TRACE(napi_poll,
> +	TP_PROTO(struct napi_struct *napi),
> +	TP_ARGS(napi));
> +
> +#endif
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 637ea71..165979c 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -126,6 +126,7 @@
>  #include <linux/in.h>
>  #include <linux/jhash.h>
>  #include <linux/random.h>
> +#include <trace/napi.h>
> 
>  #include "net-sysfs.h"
> 
> @@ -2763,8 +2764,10 @@ static void net_rx_action(struct softirq_action *h)
>  		 * accidently calling ->poll() when NAPI is not scheduled.
>  		 */
>  		work = 0;
> -		if (test_bit(NAPI_STATE_SCHED, &n->state))
> +		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
>  			work = n->poll(n, weight);
> +			trace_napi_poll(n);
> +		}
> 
>  		WARN_ON_ONCE(work > weight);
> 
> diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
> index 2797b71..f449971 100644
> --- a/net/core/drop_monitor.c
> +++ b/net/core/drop_monitor.c
> @@ -22,8 +22,10 @@
>  #include <linux/timer.h>
>  #include <linux/bitops.h>
>  #include <net/genetlink.h>
> +#include <net/netevent.h>
> 
>  #include <trace/skb.h>
> +#include <trace/napi.h>
> 
>  #include <asm/unaligned.h>
> 
> @@ -38,7 +40,8 @@ static void send_dm_alert(struct work_struct *unused);
>   * and the work handle that will send up
>   * netlink alerts
>   */
> -struct sock *dm_sock;
> +static int trace_state = TRACE_OFF;
> +static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED;
> 
>  struct per_cpu_dm_data {
>  	struct work_struct dm_alert_work;
> @@ -47,6 +50,13 @@ struct per_cpu_dm_data {
>  	struct timer_list send_timer;
>  };
> 
> +struct dm_hw_stat_delta {
> +	struct net_device *dev;
> +	struct list_head list;
> +	struct rcu_head rcu;
> +	unsigned long last_drop_val;
> +};
> +
>  static struct genl_family net_drop_monitor_family = {
>  	.id             = GENL_ID_GENERATE,
>  	.hdrsize        = 0,
> @@ -59,7 +69,8 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
> 
>  static int dm_hit_limit = 64;
>  static int dm_delay = 1;
> -
> +static unsigned long dm_hw_check_delta = 2*HZ;
> +static LIST_HEAD(hw_stats_list);
> 
>  static void reset_per_cpu_data(struct per_cpu_dm_data *data)
>  {
> @@ -115,7 +126,7 @@ static void sched_send_work(unsigned long unused)
>  	schedule_work(&data->dm_alert_work);
>  }
> 
> -static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
> +static void trace_drop_common(struct sk_buff *skb, void *location)
>  {
>  	struct net_dm_alert_msg *msg;
>  	struct nlmsghdr *nlh;
> @@ -159,26 +170,79 @@ out:
>  	return;
>  }
> 
> +static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
> +{
> +	trace_drop_common(skb, location);
> +}
> +
> +static void trace_napi_poll_hit(struct napi_struct *napi)
> +{
> +	struct dm_hw_stat_delta *new_stat;
> +
> +	/*
> +	 * Ratelimit our check time to dm_hw_check_delta jiffies
> +	 */
> +	if (!time_after(jiffies, napi->dev->last_rx + dm_hw_check_delta))
> +		return;
> +
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
> +		if ((new_stat->dev == napi->dev)  &&
> +		    (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
> +			trace_drop_common(NULL, NULL);
> +			new_stat->last_drop_val = napi->dev->stats.rx_dropped;
> +			break;
> +		}
> +	}
> +	rcu_read_unlock();
> +}
> +
> +
> +static void free_dm_hw_stat(struct rcu_head *head)
> +{
> +	struct dm_hw_stat_delta *n;
> +	n = container_of(head, struct dm_hw_stat_delta, rcu);
> +	kfree(n);
> +}
> +
>  static int set_all_monitor_traces(int state)
>  {
>  	int rc = 0;
> +	struct dm_hw_stat_delta *new_stat = NULL;
> +
> +	spin_lock(&trace_state_lock);
> 
>  	switch (state) {
>  	case TRACE_ON:
>  		rc |= register_trace_kfree_skb(trace_kfree_skb_hit);
> +		rc |= register_trace_napi_poll(trace_napi_poll_hit);
>  		break;
>  	case TRACE_OFF:
>  		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit);
> +		rc |= unregister_trace_napi_poll(trace_napi_poll_hit);
> 
>  		tracepoint_synchronize_unregister();
> +
> +		/*
> +		 * Clean the device list
> +		 */
> +		list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
> +			if (new_stat->dev == NULL) {
> +				list_del_rcu(&new_stat->list);
> +				call_rcu(&new_stat->rcu, free_dm_hw_stat);
> +			}
> +		}
>  		break;
>  	default:
>  		rc = 1;
>  		break;
>  	}
> 
> +	spin_unlock(&trace_state_lock);
> +
>  	if (rc)
>  		return -EINPROGRESS;
> +	trace_state = state;
>  	return rc;
>  }
> 
> @@ -204,6 +268,38 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
>  	return -ENOTSUPP;
>  }
> 
> +static int dropmon_net_event(struct notifier_block *ev_block,
> +			unsigned long event, void *ptr)
> +{
> +	struct net_device *dev = ptr;
> +	struct dm_hw_stat_delta *new_stat = NULL;
> +
> +	switch (event) {
> +	case NETDEV_REGISTER:
> +		new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
> +
> +		if (!new_stat)
> +			goto out;
> +
> +		new_stat->dev = dev;
> +		INIT_RCU_HEAD(&new_stat->rcu);
> +		spin_lock(&trace_state_lock);
> +		list_add_rcu(&new_stat->list, &hw_stats_list);
> +		spin_unlock(&trace_state_lock);
> +		break;
> +	case NETDEV_UNREGISTER:
> +		rcu_read_lock();
> +		list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
> +			if (new_stat->dev == dev)
> +				new_stat->dev = NULL;
> +				break;
> +		}
> +		rcu_read_unlock();
> +		break;
> +	}
> +out:
> +	return NOTIFY_DONE;
> +}
> 
>  static struct genl_ops dropmon_ops[] = {
>  	{
> @@ -220,6 +316,10 @@ static struct genl_ops dropmon_ops[] = {
>  	},
>  };
> 
> +static struct notifier_block dropmon_net_notifier = {
> +	.notifier_call = dropmon_net_event
> +};
> +
>  static int __init init_net_drop_monitor(void)
>  {
>  	int cpu;
> @@ -243,12 +343,18 @@ static int __init init_net_drop_monitor(void)
>  		ret = genl_register_ops(&net_drop_monitor_family,
>  					&dropmon_ops[i]);
>  		if (ret) {
> -			printk(KERN_CRIT "failed to register operation %d\n",
> +			printk(KERN_CRIT "Failed to register operation %d\n",
>  				dropmon_ops[i].cmd);
>  			goto out_unreg;
>  		}
>  	}
> 
> +	rc = register_netdevice_notifier(&dropmon_net_notifier);
> +	if (rc < 0) {
> +		printk(KERN_CRIT "Failed to register netdevice notifier\n");
> +		goto out_unreg;
> +	}
> +
>  	rc = 0;
> 
>  	for_each_present_cpu(cpu) {
> @@ -259,6 +365,7 @@ static int __init init_net_drop_monitor(void)
>  		data->send_timer.data = cpu;
>  		data->send_timer.function = sched_send_work;
>  	}
> +
>  	goto out;
> 
>  out_unreg:
> diff --git a/net/core/net-traces.c b/net/core/net-traces.c
> index c8fb456..b07b25b 100644
> --- a/net/core/net-traces.c
> +++ b/net/core/net-traces.c
> @@ -20,6 +20,7 @@
>  #include <linux/netlink.h>
>  #include <linux/net_dropmon.h>
>  #include <trace/skb.h>
> +#include <trace/napi.h>
> 
>  #include <asm/unaligned.h>
>  #include <asm/bitops.h>
> @@ -27,3 +28,6 @@
> 
>  DEFINE_TRACE(kfree_skb);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
> +
> +DEFINE_TRACE(napi_poll);
> +EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
> diff --git a/net/core/netpoll.c b/net/core/netpoll.c
> index b5873bd..446d3ba 100644
> --- a/net/core/netpoll.c
> +++ b/net/core/netpoll.c
> @@ -24,6 +24,7 @@
>  #include <net/tcp.h>
>  #include <net/udp.h>
>  #include <asm/unaligned.h>
> +#include <trace/napi.h>
> 
>  /*
>   * We maintain a small pool of fully-sized skbs, to make sure the
> @@ -137,6 +138,7 @@ static int poll_one_napi(struct netpoll_info *npinfo,
>  	set_bit(NAPI_STATE_NPSVC, &napi->state);
> 
>  	work = napi->poll(napi, budget);
> +	trace_napi_poll(napi->dev);
> 
>  	clear_bit(NAPI_STATE_NPSVC, &napi->state);
>  	atomic_dec(&trapped);
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

      parent reply	other threads:[~2009-05-14 16:18 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-08 19:50 [PATCH] dropmon: add ability to detect when hardware drops rx packets Neil Horman
2009-05-09  6:30 ` Eric Dumazet
2009-05-09 18:07   ` Neil Horman
2009-05-12 16:30   ` Neil Horman
2009-05-13 18:23     ` [PATCH] dropmon: add ability to detect when hardware drops rxpackets Paul E. McKenney
2009-05-14  0:45       ` Neil Horman
2009-05-14  1:03         ` [PATCH] dropmon: add ability to detect when hardware dropsrxpackets Paul E. McKenney
2009-05-14 12:33           ` Neil Horman
2009-05-14 12:44             ` Jiri Pirko
2009-05-14 16:17               ` [PATCH] dropmon: add ability to detect when hardwaredropsrxpackets Paul E. McKenney
2009-05-14 17:29               ` [PATCH] dropmon: add ability to detect when hardware dropsrxpackets Neil Horman
2009-05-15  5:49                 ` Jarek Poplawski
2009-05-15 11:01                   ` Neil Horman
2009-05-15 11:12                     ` Jarek Poplawski
2009-05-15 11:15                       ` Neil Horman
2009-05-15 11:40                         ` Jarek Poplawski
2009-05-16  0:07                           ` Paul E. McKenney
2009-05-15  6:51                 ` Jiri Pirko
2009-05-15  7:37                   ` Eric Dumazet
2009-05-15 11:12                     ` Neil Horman
2009-05-15 10:59                   ` Neil Horman
2009-05-15 11:27                     ` Jiri Pirko
2009-05-15 16:07                       ` Neil Horman
2009-05-15 18:11                         ` Eric Dumazet
2009-05-15 18:23                           ` Stephen Hemminger
2009-05-15 19:53                             ` Neil Horman
2009-05-15 19:23                           ` Neil Horman
2009-05-16 12:40                           ` Neil Horman
2009-05-18 14:46                             ` Neil Horman
2009-05-21  7:17                               ` David Miller
2009-05-21 17:36                                 ` Neil Horman
2009-05-21 22:15                                   ` David Miller
2009-05-22  0:09                                     ` Neil Horman
2009-05-15 18:18                         ` Stephen Hemminger
2009-05-15 19:12                           ` Neil Horman
2009-05-14 16:18             ` Paul E. McKenney [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090514161853.GI6744@linux.vnet.ibm.com \
    --to=paulmck@linux.vnet.ibm.com \
    --cc=dada1@cosmosbay.com \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=nhorman@tuxdriver.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.