Netdev List

Netdev List
 help / color / mirror / Atom feed

* [Patch net 2/2] net_sched: always call ->destroy when ->init() fails
From: Cong Wang @ 2014-10-24 23:55 UTC (permalink / raw)
  To: netdev
  Cc: davem, Cong Wang, Wang Bo, John Fastabend, Eric Dumazet,
	Patrick McHardy, Terry Lam
In-Reply-To: <1414194959-28006-1-git-send-email-xiyou.wangcong@gmail.com>

In qdisc_create(), when ->init() exists and it fails, we should
call ->destroy() to clean up the potentially partially initialized
qdisc's. This will also make the ->init() implementation easier.

qdisc_create_dflt() already does that. Most callers of qdisc_create_dflt()
simply use noop_qdisc when it fails.

And, most of the ->destroy() implementations are already able to clean up
partially initialization, we don't need to worry.

The following ->init()'s need to catch up:
fq_codel_init(), hhf_init(), multiq_init(),  sfq_init(), mq_init(),
mqprio_init().

Reported-by: Wang Bo <wang.bo116@zte.com.cn>
Cc: Wang Bo <wang.bo116@zte.com.cn>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Terry Lam <vtlam@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 net/sched/sch_api.c      |  2 ++
 net/sched/sch_drr.c      |  1 -
 net/sched/sch_fq_codel.c |  6 +++---
 net/sched/sch_generic.c  |  1 +
 net/sched/sch_hhf.c      |  8 ++------
 net/sched/sch_mq.c       |  6 +-----
 net/sched/sch_mqprio.c   | 18 +++++-------------
 net/sched/sch_multiq.c   |  9 ++-------
 net/sched/sch_qfq.c      |  1 -
 net/sched/sch_sfq.c      |  7 ++++---
 10 files changed, 20 insertions(+), 39 deletions(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 76f402e..7c308c9 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -990,6 +990,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 		qdisc_list_add(sch);
 
 		return sch;
+	} else {
+		goto err_out4;
 	}
 err_out3:
 	dev_put(dev);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 3387060..6c69b88 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -121,7 +121,6 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 					    qdisc_root_sleeping_lock(sch),
 					    tca[TCA_RATE]);
 		if (err) {
-			qdisc_destroy(cl->qdisc);
 			kfree(cl);
 			return err;
 		}
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index b9ca32e..05c725f 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -406,11 +406,11 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
 					   sizeof(struct fq_codel_flow));
 		if (!q->flows)
 			return -ENOMEM;
+
 		q->backlogs = fq_codel_zalloc(q->flows_cnt * sizeof(u32));
-		if (!q->backlogs) {
-			fq_codel_free(q->flows);
+		if (!q->backlogs)
 			return -ENOMEM;
-		}
+
 		for (i = 0; i < q->flows_cnt; i++) {
 			struct fq_codel_flow *flow = q->flows + i;
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6efca30..d1e2ed6 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -622,6 +622,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	return ERR_PTR(err);
 }
 
+/* Callers don't need to clean up on failure, we do here. */
 struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
 				const struct Qdisc_ops *ops,
 				unsigned int parentid)
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 15d3aab..8f94bb9 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -639,10 +639,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
 		for (i = 0; i < HHF_ARRAYS_CNT; i++) {
 			q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN *
 						      sizeof(u32));
-			if (!q->hhf_arrays[i]) {
-				hhf_destroy(sch);
+			if (!q->hhf_arrays[i])
 				return -ENOMEM;
-			}
 		}
 		q->hhf_arrays_reset_timestamp = hhf_time_stamp();
 
@@ -650,10 +648,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
 		for (i = 0; i < HHF_ARRAYS_CNT; i++) {
 			q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN /
 							  BITS_PER_BYTE);
-			if (!q->hhf_valid_bits[i]) {
-				hhf_destroy(sch);
+			if (!q->hhf_valid_bits[i])
 				return -ENOMEM;
-			}
 		}
 
 		/* Initialize Weighted DRR buckets. */
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index f3cbaec..8f009c9 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -61,17 +61,13 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
 					  TC_H_MAKE(TC_H_MAJ(sch->handle),
 						    TC_H_MIN(ntx + 1)));
 		if (qdisc == NULL)
-			goto err;
+			return -ENOMEM;
 		priv->qdiscs[ntx] = qdisc;
 		qdisc->flags |= TCQ_F_ONETXQUEUE;
 	}
 
 	sch->flags |= TCQ_F_MQROOT;
 	return 0;
-
-err:
-	mq_destroy(sch);
-	return -ENOMEM;
 }
 
 static void mq_attach(struct Qdisc *sch)
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 3811a74..d172819 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -117,20 +117,16 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 	/* pre-allocate qdisc, attachment can't fail */
 	priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
 			       GFP_KERNEL);
-	if (priv->qdiscs == NULL) {
-		err = -ENOMEM;
-		goto err;
-	}
+	if (priv->qdiscs == NULL)
+		return -ENOMEM;
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		dev_queue = netdev_get_tx_queue(dev, i);
 		qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops,
 					  TC_H_MAKE(TC_H_MAJ(sch->handle),
 						    TC_H_MIN(i + 1)));
-		if (qdisc == NULL) {
-			err = -ENOMEM;
-			goto err;
-		}
+		if (qdisc == NULL)
+			return -ENOMEM;
 		priv->qdiscs[i] = qdisc;
 		qdisc->flags |= TCQ_F_ONETXQUEUE;
 	}
@@ -143,7 +139,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 		priv->hw_owned = 1;
 		err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
 		if (err)
-			goto err;
+			return err;
 	} else {
 		netdev_set_num_tc(dev, qopt->num_tc);
 		for (i = 0; i < qopt->num_tc; i++)
@@ -157,10 +153,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 
 	sch->flags |= TCQ_F_MQROOT;
 	return 0;
-
-err:
-	mqprio_destroy(sch);
-	return err;
 }
 
 static void mqprio_attach(struct Qdisc *sch)
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 42dd218..31dd2d2 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -252,7 +252,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
 static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct multiq_sched_data *q = qdisc_priv(sch);
-	int i, err;
+	int i;
 
 	q->queues = NULL;
 
@@ -267,12 +267,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
 	for (i = 0; i < q->max_bands; i++)
 		q->queues[i] = &noop_qdisc;
 
-	err = multiq_tune(sch, opt);
-
-	if (err)
-		kfree(q->queues);
-
-	return err;
+	return multiq_tune(sch, opt);
 }
 
 static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 3ec7e88..55ac6a4 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -522,7 +522,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	return 0;
 
 destroy_class:
-	qdisc_destroy(cl->qdisc);
 	kfree(cl);
 	return err;
 }
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b877140..7ad2879 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -761,11 +761,12 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	}
 
 	q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
+	if (!q->ht)
+		return -ENOMEM;
 	q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
-	if (!q->ht || !q->slots) {
-		sfq_destroy(sch);
+	if (!q->slots)
 		return -ENOMEM;
-	}
+
 	for (i = 0; i < q->divisor; i++)
 		q->ht[i] = SFQ_EMPTY_SLOT;
 
-- 
1.8.3.1

^ permalink raw reply related

* Re: [Patch net 2/2] net_sched: always call ->destroy when ->init() fails
From: Eric Dumazet @ 2014-10-25  0:14 UTC (permalink / raw)
  To: Cong Wang
  Cc: netdev, davem, Wang Bo, John Fastabend, Eric Dumazet,
	Patrick McHardy, Terry Lam
In-Reply-To: <1414194959-28006-2-git-send-email-xiyou.wangcong@gmail.com>

On Fri, 2014-10-24 at 16:55 -0700, Cong Wang wrote:
> In qdisc_create(), when ->init() exists and it fails, we should
> call ->destroy() to clean up the potentially partially initialized
> qdisc's. This will also make the ->init() implementation easier.
> 

Why is this patch needed ?

You are adding bugs, its not clear what bug you are fixing.

I really do not like the idea of ->init() relying on a ->destroy() to
cleanup a failed ->init().

This is not what most management functions do in our stack.

I very much prefer that a function returning an error has no side
effect, like if it hadnt be called at all.

> qdisc_create_dflt() already does that. Most callers of qdisc_create_dflt()
> simply use noop_qdisc when it fails.
> 
> And, most of the ->destroy() implementations are already able to clean up
> partially initialization, we don't need to worry.
> 
> The following ->init()'s need to catch up:
> fq_codel_init(), hhf_init(), multiq_init(),  sfq_init(), mq_init(),
> mqprio_init().
> 

...

>  
>  static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
> diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
> index 3ec7e88..55ac6a4 100644
> --- a/net/sched/sch_qfq.c
> +++ b/net/sched/sch_qfq.c
> @@ -522,7 +522,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
>  	return 0;
>  
>  destroy_class:
> -	qdisc_destroy(cl->qdisc);
>  	kfree(cl);
>  	return err;
>  }


Really ? I am calling this a leak of cl->qdisc.

^ permalink raw reply

* Re: [Patch net 1/2] sch_pie: schedule the timer after all init succeed
From: Eric Dumazet @ 2014-10-25  0:17 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, davem, Vijay Subramanian
In-Reply-To: <1414194959-28006-1-git-send-email-xiyou.wangcong@gmail.com>

On Fri, 2014-10-24 at 16:55 -0700, Cong Wang wrote:
> Cc: Vijay Subramanian <vijaynsu@cisco.com>
> Cc: David S. Miller <davem@davemloft.net>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
> ---
>  net/sched/sch_pie.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Acked-by: Eric Dumazet <edumazet@google.com>

Not sure why you use "Cc: David S. Miller <davem@davemloft.net>",
given that all networking patches have "Signed-off-by: David S. Miller
<davem@davemloft.net>" ???

^ permalink raw reply

* Re: localed stuck in recent 3.18 git in copy_net_ns?
From: Jay Vosburgh @ 2014-10-25  0:20 UTC (permalink / raw)
  To: paulmck
  Cc: Yanko Kaneti, Josh Boyer, Eric W. Biederman, Cong Wang,
	Kevin Fenzi, netdev, Linux-Kernel@Vger. Kernel. Org, mroos, tj
In-Reply-To: <20141024230524.GA16023@linux.vnet.ibm.com>

Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:

>On Fri, Oct 24, 2014 at 03:59:31PM -0700, Paul E. McKenney wrote:
[...]
>> Hmmm...  It sure looks like we have some callbacks stuck here.  I clearly
>> need to take a hard look at the sleep/wakeup code.
>> 
>> Thank you for running this!!!
>
>Could you please try the following patch?  If no joy, could you please
>add rcu:rcu_nocb_wake to the list of ftrace events?

	I tried the patch, it did not change the behavior.

	I enabled the rcu:rcu_barrier and rcu:rcu_nocb_wake tracepoints
and ran it again (with this patch and the first patch from earlier
today); the trace output is a bit on the large side so I put it and the
dmesg log at:

http://people.canonical.com/~jvosburgh/nocb-wake-dmesg.txt

http://people.canonical.com/~jvosburgh/nocb-wake-trace.txt

	-J


>							Thanx, Paul
>
>------------------------------------------------------------------------
>
>rcu: Kick rcuo kthreads after their CPU goes offline
>
>If a no-CBs CPU were to post an RCU callback with interrupts disabled
>after it entered the idle loop for the last time, there might be no
>deferred wakeup for the corresponding rcuo kthreads.  This commit
>therefore adds a set of calls to do_nocb_deferred_wakeup() after the
>CPU has gone completely offline.
>
>Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
>
>diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
>index 84b41b3c6ebd..f6880052b917 100644
>--- a/kernel/rcu/tree.c
>+++ b/kernel/rcu/tree.c
>@@ -3493,8 +3493,10 @@ static int rcu_cpu_notify(struct notifier_block *self,
> 	case CPU_DEAD_FROZEN:
> 	case CPU_UP_CANCELED:
> 	case CPU_UP_CANCELED_FROZEN:
>-		for_each_rcu_flavor(rsp)
>+		for_each_rcu_flavor(rsp) {
> 			rcu_cleanup_dead_cpu(cpu, rsp);
>+			do_nocb_deferred_wakeup(per_cpu_ptr(rsp->rda, cpu));
>+		}
> 		break;
> 	default:
> 		break;
>

---
	-Jay Vosburgh, jay.vosburgh@canonical.com

^ permalink raw reply

* Re: [Patch net 1/2] sch_pie: schedule the timer after all init succeed
From: Cong Wang @ 2014-10-25  0:28 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Cong Wang, netdev, David Miller, Vijay Subramanian
In-Reply-To: <1414196222.20845.48.camel@edumazet-glaptop2.roam.corp.google.com>

On Fri, Oct 24, 2014 at 5:17 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> Not sure why you use "Cc: David S. Miller <davem@davemloft.net>",
> given that all networking patches have "Signed-off-by: David S. Miller
> <davem@davemloft.net>" ???
>

I don't understand what you are talking about:

If you are suggesting DaveM is too obvious to Cc, well, Cc'ing him
doesn't harm anything. I know he is on netdev list, but just in case
vger.kernel.org is broken or etc...

If you are suggesting to Cc his another email address, please update
MAINTAINERS.

^ permalink raw reply

* Re: [PATCH net] net/sched: Fix use of wild pointer in mq_destroy() when qdisc_alloc fail
From: Patrick McHardy @ 2014-10-25  0:33 UTC (permalink / raw)
  To: Cong Wang; +Cc: John Fastabend, wang.bo116, David Miller, netdev, cui.yunfeng
In-Reply-To: <CAHA+R7PxvW6umBJMxGk0h=ht4RhoDRcrxr5WSTEUBa80TxU5GQ@mail.gmail.com>

On Fri, Oct 24, 2014 at 03:17:41PM -0700, Cong Wang wrote:
> On Fri, Oct 24, 2014 at 2:45 PM, Patrick McHardy <kaber@trash.net> wrote:
> >> >
> >> > I would argue that the qdisc_destroy() call in qdisc_create_dflt()
> >> > is wrong, it should instead free the qdisc and release the module
> >> > reference manually as done in qdisc_create().
> >> >
> >> > qdisc_destroy() should only be called for fully initialized qdiscs.
> >>
> >> Probably, but at least ->destroy() should be called, looking at
> >> those calling qdisc_watchdog_init(), they are supposed to call
> >> qdisc_watchdog_cancel() when >init() fails after that.
> >
> > In which cases does it actually fail after that? Usually this is
> > called once initialization is complete.
> 
> How about tbf_change() in tbf_init()? If tbf_change() fails,
> watchdog is still there if we don't call ->destroy(). Yes,
> I know the timer is started, the point is we do miss something
> clean up, even trivial.
> 
> tbf is not the only one who calls xxx_change() in xxx_init().

Then these are bugs. On failure we exit with the same state as we
entered, there's nothing new about that.

This in fact is *no* bug though since qdisc_watchdog_init() merely
initializes the timer and doesn't require cleanup.

> > Its simply symetrical, as everywhere else in the kernel. If a sub-init
> > funtion fails, it should clean up and return an error. We don't
> > destroy things we've never successfully initialized, they're supposed
> > to clean up after themselves.
> 
> Most (if not all) ->destroy() are able to clean partially initialized qdisc,
> I don't see why it  could be a problem here.
> 
> We don't have to keep with other kernel subsystem as long as it makes
> sense, net_sched subsystem is pretty much self-contained.

Its about having a sane API.

^ permalink raw reply

* Re: [Patch net 2/2] net_sched: always call ->destroy when ->init() fails
From: Cong Wang @ 2014-10-25  0:36 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Cong Wang, netdev, David Miller, Wang Bo, John Fastabend,
	Eric Dumazet, Patrick McHardy, Terry Lam
In-Reply-To: <1414196053.20845.45.camel@edumazet-glaptop2.roam.corp.google.com>

On Fri, Oct 24, 2014 at 5:14 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Fri, 2014-10-24 at 16:55 -0700, Cong Wang wrote:
>> In qdisc_create(), when ->init() exists and it fails, we should
>> call ->destroy() to clean up the potentially partially initialized
>> qdisc's. This will also make the ->init() implementation easier.
>>
>
> Why is this patch needed ?
>
> You are adding bugs, its not clear what bug you are fixing.


You missed what Wang Bo reported, we could double free
mq qdisc on failure path, one in mq_init() and one in qdisc_create_dflt().


>
> I really do not like the idea of ->init() relying on a ->destroy() to
> cleanup a failed ->init().
>
> This is not what most management functions do in our stack.
>
> I very much prefer that a function returning an error has no side
> effect, like if it hadnt be called at all.


If you would like to fix all qdisc_create_dflt() callers, yes, and good
luck with even more complex error handling there.

Read the diffstat, if we could fix a bug with less code, why not....

>
>> qdisc_create_dflt() already does that. Most callers of qdisc_create_dflt()
>> simply use noop_qdisc when it fails.
>>
>> And, most of the ->destroy() implementations are already able to clean up
>> partially initialization, we don't need to worry.
>>
>> The following ->init()'s need to catch up:
>> fq_codel_init(), hhf_init(), multiq_init(),  sfq_init(), mq_init(),
>> mqprio_init().
>>
>
> ...
>
>>
>>  static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
>> diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
>> index 3ec7e88..55ac6a4 100644
>> --- a/net/sched/sch_qfq.c
>> +++ b/net/sched/sch_qfq.c
>> @@ -522,7 +522,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
>>       return 0;
>>
>>  destroy_class:
>> -     qdisc_destroy(cl->qdisc);
>>       kfree(cl);
>>       return err;
>>  }
>
>
> Really ? I am calling this a leak of cl->qdisc.
>
>

How? I already added a comment on qdisc_create_dflt(), callers don't
need to clean up
on failure path.

^ permalink raw reply

* Re: [Patch net 2/2] net_sched: always call ->destroy when ->init() fails
From: Patrick McHardy @ 2014-10-25  0:36 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Cong Wang, netdev, davem, Wang Bo, John Fastabend, Eric Dumazet,
	Terry Lam
In-Reply-To: <1414196053.20845.45.camel@edumazet-glaptop2.roam.corp.google.com>

On Fri, Oct 24, 2014 at 05:14:13PM -0700, Eric Dumazet wrote:
> On Fri, 2014-10-24 at 16:55 -0700, Cong Wang wrote:
> > In qdisc_create(), when ->init() exists and it fails, we should
> > call ->destroy() to clean up the potentially partially initialized
> > qdisc's. This will also make the ->init() implementation easier.
> > 
> 
> Why is this patch needed ?
> 
> You are adding bugs, its not clear what bug you are fixing.
> 
> I really do not like the idea of ->init() relying on a ->destroy() to
> cleanup a failed ->init().
> 
> This is not what most management functions do in our stack.
> 
> I very much prefer that a function returning an error has no side
> effect, like if it hadnt be called at all.

Absolutely.

Again, the correct fix is to make qdisc_create_dflt() not call
qdisc_destroy() but clean up the qdisc manually as done in
qdisc_create().

^ permalink raw reply

* Re: [Patch net 2/2] net_sched: always call ->destroy when ->init() fails
From: Cong Wang @ 2014-10-25  0:38 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: Eric Dumazet, Cong Wang, netdev, David Miller, Wang Bo,
	John Fastabend, Eric Dumazet, Terry Lam
In-Reply-To: <20141025003644.GB11289@acer.localdomain>

On Fri, Oct 24, 2014 at 5:36 PM, Patrick McHardy <kaber@trash.net> wrote:
>
> Again, the correct fix is to make qdisc_create_dflt() not call
> qdisc_destroy() but clean up the qdisc manually as done in
> qdisc_create().

I kindly wish you a good luck with fixing all callers of qdisc_create_dflt().
Go ahead. :)

^ permalink raw reply

* Re: [Patch net 1/2] sch_pie: schedule the timer after all init succeed
From: Eric Dumazet @ 2014-10-25  0:39 UTC (permalink / raw)
  To: Cong Wang; +Cc: Cong Wang, netdev, David Miller, Vijay Subramanian
In-Reply-To: <CAHA+R7PFSCACHyrU8fa4PVLOF1OtJiFiw4peLmFPzEu6-adaeQ@mail.gmail.com>

On Fri, 2014-10-24 at 17:28 -0700, Cong Wang wrote:
> On Fri, Oct 24, 2014 at 5:17 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> >
> > Not sure why you use "Cc: David S. Miller <davem@davemloft.net>",
> > given that all networking patches have "Signed-off-by: David S. Miller
> > <davem@davemloft.net>" ???
> >
> 
> I don't understand what you are talking about:
> 
> If you are suggesting DaveM is too obvious to Cc, well, Cc'ing him
> doesn't harm anything. I know he is on netdev list, but just in case
> vger.kernel.org is broken or etc...
> 
> If you are suggesting to Cc his another email address, please update
> MAINTAINERS.


You send the patch to David Miller, as a recipient, so that he can read
it, and apply it.

You don't need to add one line in the changelog.

Why would it be needed ? Think about it.

^ permalink raw reply

* Re: [Patch net 2/2] net_sched: always call ->destroy when ->init() fails
From: Eric Dumazet @ 2014-10-25  0:40 UTC (permalink / raw)
  To: Cong Wang
  Cc: Cong Wang, netdev, David Miller, Wang Bo, John Fastabend,
	Eric Dumazet, Patrick McHardy, Terry Lam
In-Reply-To: <CAHA+R7N3jJCgOJjnDUxsTaH7ZCvPydDT60qkcqSPsEyFbp=UQg@mail.gmail.com>

On Fri, 2014-10-24 at 17:36 -0700, Cong Wang wrote:

> >>  static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
> >> diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
> >> index 3ec7e88..55ac6a4 100644
> >> --- a/net/sched/sch_qfq.c
> >> +++ b/net/sched/sch_qfq.c
> >> @@ -522,7 +522,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
> >>       return 0;
> >>
> >>  destroy_class:
> >> -     qdisc_destroy(cl->qdisc);
> >>       kfree(cl);
> >>       return err;
> >>  }
> >
> >
> > Really ? I am calling this a leak of cl->qdisc.
> >
> >
> 
> How? I already added a comment on qdisc_create_dflt(), callers don't
> need to clean up
> on failure path.

Seriously, can you read the function again ?

What happens if gen_new_estimator() fails ?

^ permalink raw reply

* Re: [Patch net 2/2] net_sched: always call ->destroy when ->init() fails
From: Eric Dumazet @ 2014-10-25  0:42 UTC (permalink / raw)
  To: Cong Wang
  Cc: Cong Wang, netdev, David Miller, Wang Bo, John Fastabend,
	Eric Dumazet, Patrick McHardy, Terry Lam
In-Reply-To: <CAHA+R7N3jJCgOJjnDUxsTaH7ZCvPydDT60qkcqSPsEyFbp=UQg@mail.gmail.com>

On Fri, 2014-10-24 at 17:36 -0700, Cong Wang wrote:

> You missed what Wang Bo reported, we could double free
> mq qdisc on failure path, one in mq_init() and one in qdisc_create_dflt().

So fix mq/mqprio ?

I am sorry, but sending patches on Friday afternoon should be forbidden,
too much beer lying around ;)

^ permalink raw reply

* Re: [Patch net 1/2] sch_pie: schedule the timer after all init succeed
From: Cong Wang @ 2014-10-25  0:42 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Cong Wang, netdev, David Miller, Vijay Subramanian
In-Reply-To: <1414197563.20845.50.camel@edumazet-glaptop2.roam.corp.google.com>

On Fri, Oct 24, 2014 at 5:39 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> You don't need to add one line in the changelog.
>
> Why would it be needed ? Think about it.


Simply because git-send-email reads Cc: so that I don't have to
specify in cmdline.
That's all.

OMG, first time to hear people complaining on Cc'ing too much! lol :)

^ permalink raw reply

* Re: [Patch net 2/2] net_sched: always call ->destroy when ->init() fails
From: Patrick McHardy @ 2014-10-25  0:44 UTC (permalink / raw)
  To: Cong Wang
  Cc: Eric Dumazet, Cong Wang, netdev, David Miller, Wang Bo,
	John Fastabend, Eric Dumazet, Terry Lam
In-Reply-To: <CAHA+R7M3y=Mj_L3yL0UM41mfGa+go+Kq4wYga7D6epY3uq_yWQ@mail.gmail.com>

On Fri, Oct 24, 2014 at 05:38:48PM -0700, Cong Wang wrote:
> On Fri, Oct 24, 2014 at 5:36 PM, Patrick McHardy <kaber@trash.net> wrote:
> >
> > Again, the correct fix is to make qdisc_create_dflt() not call
> > qdisc_destroy() but clean up the qdisc manually as done in
> > qdisc_create().
> 
> I kindly wish you a good luck with fixing all callers of qdisc_create_dflt().
> Go ahead. :)

Here you go:

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index fc04fe9..3a71e51 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -590,18 +590,21 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
 	struct Qdisc *sch;
 
 	if (!try_module_get(ops->owner))
-		goto errout;
+		goto err1;
 
 	sch = qdisc_alloc(dev_queue, ops);
 	if (IS_ERR(sch))
-		goto errout;
+		goto err2;
 	sch->parent = parentid;
 
 	if (!ops->init || ops->init(sch, NULL) == 0)
 		return sch;
 
-	qdisc_destroy(sch);
-errout:
+	dev_put(sch->dev_queue->dev);
+	kfree((char *)sch - sch->padded);
+err2:
+	module_put(ops->owner);
+err1:
 	return NULL;
 }
 EXPORT_SYMBOL(qdisc_create_dflt);

^ permalink raw reply related

* Re: [Patch net 2/2] net_sched: always call ->destroy when ->init() fails
From: Cong Wang @ 2014-10-25  0:53 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: Eric Dumazet, Cong Wang, netdev, David Miller, Wang Bo,
	John Fastabend, Eric Dumazet, Terry Lam
In-Reply-To: <20141025004403.GC11289@acer.localdomain>

On Fri, Oct 24, 2014 at 5:44 PM, Patrick McHardy <kaber@trash.net> wrote:
> On Fri, Oct 24, 2014 at 05:38:48PM -0700, Cong Wang wrote:
>> On Fri, Oct 24, 2014 at 5:36 PM, Patrick McHardy <kaber@trash.net> wrote:
>> >
>> > Again, the correct fix is to make qdisc_create_dflt() not call
>> > qdisc_destroy() but clean up the qdisc manually as done in
>> > qdisc_create().
>>
>> I kindly wish you a good luck with fixing all callers of qdisc_create_dflt().
>> Go ahead. :)
>
> Here you go:
>

...

Did you check all ->init() call ->destroy() on failure? Look at the
sch_pie I have fixed in 1/2.

Also check those xxx_init() calling xxx_change().

Really, we don't have to make all ->init() doing cleanup by itself.

^ permalink raw reply

* Re: [PATCH net] net/sched: Fix use of wild pointer in mq_destroy() when qdisc_alloc fail
From: Cong Wang @ 2014-10-25  0:57 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: John Fastabend, Wang Bo, David Miller, netdev, cui.yunfeng
In-Reply-To: <20141025003348.GA11289@acer.localdomain>

On Fri, Oct 24, 2014 at 5:33 PM, Patrick McHardy <kaber@trash.net> wrote:
>
> Its about having a sane API.

I don't see why calling ->destroy() on failure is not sane in qdisc case.
I never want to argue general case.

^ permalink raw reply

* Re: netfilter
From: Jeff Kirsher @ 2014-10-25  1:04 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: netdev@vger.kernel.org
In-Reply-To: <544A9DCF.3010409@mojatatu.com>

I assume you are looking for sponsers for 2015 since July 2014 has
already passed... :-)

On Fri, Oct 24, 2014 at 11:43 AM, Jamal Hadi Salim <jhs@mojatatu.com> wrote:
>



-- 
Cheers,
Jeff

^ permalink raw reply

* Re: [Patch net 2/2] net_sched: always call ->destroy when ->init() fails
From: Patrick McHardy @ 2014-10-25  1:04 UTC (permalink / raw)
  To: Cong Wang
  Cc: Eric Dumazet, Cong Wang, netdev, David Miller, Wang Bo,
	John Fastabend, Eric Dumazet, Terry Lam
In-Reply-To: <CAHA+R7NBr_K4fZ-nr2hi+isbxJt92D9o-q+n9xq5Y1rHv14F7Q@mail.gmail.com>

On Fri, Oct 24, 2014 at 05:53:33PM -0700, Cong Wang wrote:
> On Fri, Oct 24, 2014 at 5:44 PM, Patrick McHardy <kaber@trash.net> wrote:
> > On Fri, Oct 24, 2014 at 05:38:48PM -0700, Cong Wang wrote:
> >> On Fri, Oct 24, 2014 at 5:36 PM, Patrick McHardy <kaber@trash.net> wrote:
> >> >
> >> > Again, the correct fix is to make qdisc_create_dflt() not call
> >> > qdisc_destroy() but clean up the qdisc manually as done in
> >> > qdisc_create().
> >>
> >> I kindly wish you a good luck with fixing all callers of qdisc_create_dflt().
> >> Go ahead. :)
> >
> > Here you go:
> >
> 
> ...
> 
> Did you check all ->init() call ->destroy() on failure? Look at the
> sch_pie I have fixed in 1/2.

Why should they? They need to clean up internally, how they do it is
entirely up to them.

> Also check those xxx_init() calling xxx_change().

Please point to conrete bugs if you have any doubts. Real ones, not things
like qdisc_watchdog_init(). This is how the API to which the qdiscs have
been written has always worked.

And yes, I did check the qdisc error paths many times in the past.

> Really, we don't have to make all ->init() doing cleanup by itself.

Are you seriously suggesting that it would be better to have ->destroy()
check what parts were actually initialized and what needs to be cleaned
up instead of assuming a consistent state and have the only function that
actually knows the current state on error (->init()) do its own cleanup?

That's not even worth arguing about, its utterly and completely wrong.

^ permalink raw reply

* Re: [PATCH net] net/sched: Fix use of wild pointer in mq_destroy() when qdisc_alloc fail
From: Patrick McHardy @ 2014-10-25  1:33 UTC (permalink / raw)
  To: Cong Wang; +Cc: John Fastabend, Wang Bo, David Miller, netdev, cui.yunfeng
In-Reply-To: <CAHA+R7MW-kP0uO2+00+pbx1y=LQynLWy9diznccY69+8nZ5mwQ@mail.gmail.com>

On Fri, Oct 24, 2014 at 05:57:59PM -0700, Cong Wang wrote:
> On Fri, Oct 24, 2014 at 5:33 PM, Patrick McHardy <kaber@trash.net> wrote:
> >
> > Its about having a sane API.
> 
> I don't see why calling ->destroy() on failure is not sane in qdisc case.
> I never want to argue general case.

Because it makes things more complicated. You need to keep track of what
was actually initialized since you can't assume a consistent state in
->destroy() anymore. If ->init() fails, it knows where it failed,
->destroy() can't know that.

Look at htb_destroy() for an example. It starts with

	cancel_work_sync(&q->work);

Was that actually initialized and can be cancled? You don't know.
Next comes

	qdisc_watchdog_cancel(&q->watchdog);

Same here, if the error happened before it was initialized, crash.

These are just the first two lines. You get the problem.

^ permalink raw reply

* Re: localed stuck in recent 3.18 git in copy_net_ns?
From: Paul E. McKenney @ 2014-10-25  2:03 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: Yanko Kaneti, Josh Boyer, Eric W. Biederman, Cong Wang,
	Kevin Fenzi, netdev, Linux-Kernel@Vger. Kernel. Org, mroos, tj
In-Reply-To: <10136.1414196448@famine>

On Fri, Oct 24, 2014 at 05:20:48PM -0700, Jay Vosburgh wrote:
> Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:
> 
> >On Fri, Oct 24, 2014 at 03:59:31PM -0700, Paul E. McKenney wrote:
> [...]
> >> Hmmm...  It sure looks like we have some callbacks stuck here.  I clearly
> >> need to take a hard look at the sleep/wakeup code.
> >> 
> >> Thank you for running this!!!
> >
> >Could you please try the following patch?  If no joy, could you please
> >add rcu:rcu_nocb_wake to the list of ftrace events?
> 
> 	I tried the patch, it did not change the behavior.
> 
> 	I enabled the rcu:rcu_barrier and rcu:rcu_nocb_wake tracepoints
> and ran it again (with this patch and the first patch from earlier
> today); the trace output is a bit on the large side so I put it and the
> dmesg log at:
> 
> http://people.canonical.com/~jvosburgh/nocb-wake-dmesg.txt
> 
> http://people.canonical.com/~jvosburgh/nocb-wake-trace.txt

Thank you again!

Very strange part of the trace.  The only sign of CPU 2 and 3 are:

    ovs-vswitchd-902   [000] ....   109.896840: rcu_barrier: rcu_sched Begin cpu -1 remaining 0 # 0
    ovs-vswitchd-902   [000] ....   109.896840: rcu_barrier: rcu_sched Check cpu -1 remaining 0 # 0
    ovs-vswitchd-902   [000] ....   109.896841: rcu_barrier: rcu_sched Inc1 cpu -1 remaining 0 # 1
    ovs-vswitchd-902   [000] ....   109.896841: rcu_barrier: rcu_sched OnlineNoCB cpu 0 remaining 1 # 1
    ovs-vswitchd-902   [000] d...   109.896841: rcu_nocb_wake: rcu_sched 0 WakeNot
    ovs-vswitchd-902   [000] ....   109.896841: rcu_barrier: rcu_sched OnlineNoCB cpu 1 remaining 2 # 1
    ovs-vswitchd-902   [000] d...   109.896841: rcu_nocb_wake: rcu_sched 1 WakeNot
    ovs-vswitchd-902   [000] ....   109.896842: rcu_barrier: rcu_sched OnlineNoCB cpu 2 remaining 3 # 1
    ovs-vswitchd-902   [000] d...   109.896842: rcu_nocb_wake: rcu_sched 2 WakeNotPoll
    ovs-vswitchd-902   [000] ....   109.896842: rcu_barrier: rcu_sched OnlineNoCB cpu 3 remaining 4 # 1
    ovs-vswitchd-902   [000] d...   109.896842: rcu_nocb_wake: rcu_sched 3 WakeNotPoll
    ovs-vswitchd-902   [000] ....   109.896843: rcu_barrier: rcu_sched Inc2 cpu -1 remaining 4 # 2

The pair of WakeNotPoll trace entries says that at that point, RCU believed
that the CPU 2's and CPU 3's rcuo kthreads did not exist.  :-/

More diagnostics in order...

							Thanx, Paul

^ permalink raw reply

* Re: [PATCH V3.18] rtlwifi: Add check for get_btc_status callback
From: Mike Galbraith @ 2014-10-25  2:57 UTC (permalink / raw)
  To: Murilo Opsfelder Araujo
  Cc: Larry Finger, linville, linux-wireless, troy_tan, netdev,
	Thadeu Cascardo
In-Reply-To: <544AA3EE.7080806@gmail.com>

On Fri, 2014-10-24 at 17:09 -0200, Murilo Opsfelder Araujo wrote: 
> On 10/24/2014 04:13 PM, Murilo Opsfelder Araujo wrote:
> > On 10/24/2014 02:39 PM, Larry Finger wrote:
> > [...]
> >>
> >> Please try the attached patch. It replaces the second one I sent you. I
> >> will probably redo it before submitting the final copy, but this should
> >> work.
> >>
> >> Larry
> >>
> > Hi, Larry.
> >
> > I've tried your patch on top of next-20141023 and it is still crashing
> > on my laptop:
> >
> > http://opsfelder.com/~murilo/lkml/next-20141023_plus_larry_patch_v2.jpg
> >
> It seems a get_btc_status() check was still missing:
> 
> diff --git a/drivers/net/wireless/rtlwifi/pci.c 
> b/drivers/net/wireless/rtlwifi/pci.c
> index a5a350a..ed3364d 100644
> --- a/drivers/net/wireless/rtlwifi/pci.c
> +++ b/drivers/net/wireless/rtlwifi/pci.c
> @@ -1796,7 +1796,8 @@ static int rtl_pci_start(struct ieee80211_hw *hw)
>          rtl_pci_reset_trx_ring(hw);
> 
>          rtlpci->driver_is_goingto_unload = false;
> -       if (rtlpriv->cfg->ops->get_btc_status()) {
> +       if (rtlpriv->cfg->ops->get_btc_status &&
> +           rtlpriv->cfg->ops->get_btc_status()) {
>                  rtlpriv->btcoexist.btc_ops->btc_init_variables(rtlpriv);
>                  rtlpriv->btcoexist.btc_ops->btc_init_hal_vars(rtlpriv);
>          }
> 
> With this minor change and Larry's latest fix_misc_desc patch, 
> next-20141023 booted normally and kernel panic disappeared.
> 
> Now, there is no wifi network available or found by wlan0.

I don't need your addition, but our end results are the same.

[  312.126598] cfg80211: Calling CRDA to update world regulatory domain
[  312.131076] cfg80211: World regulatory domain updated:
[  312.131107] cfg80211:  DFS Master region: unset                                                                                                                                                                                          
[  312.131112] cfg80211:   (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)                                                                                                                                
[  312.131123] cfg80211:   (2402000 KHz - 2472000 KHz @ 40000 KHz), (300 mBi, 2000 mBm), (N/A)                                                                                                                                              
[  312.131129] cfg80211:   (2457000 KHz - 2482000 KHz @ 40000 KHz), (300 mBi, 2000 mBm), (N/A)                                                                                                                                              
[  312.131136] cfg80211:   (2474000 KHz - 2494000 KHz @ 20000 KHz), (300 mBi, 2000 mBm), (N/A)                                                                                                                                              
[  312.131142] cfg80211:   (5170000 KHz - 5250000 KHz @ 40000 KHz), (300 mBi, 2000 mBm), (N/A)                                                                                                                                              
[  312.131149] cfg80211:   (5735000 KHz - 5835000 KHz @ 40000 KHz), (300 mBi, 2000 mBm), (N/A)                                                                                                                                              
[  312.210452] rtl8192se: FW Power Save off (module option)                                                                                                                                                                                 
[  312.210561] rtl8192se: Driver for Realtek RTL8192SE/RTL8191SE                                                                                                                                                                            
[  312.210561] Loading firmware rtlwifi/rtl8192sefw.bin                                                                                                                                                                                     
[  312.212496] ieee80211 phy0: Selected rate control algorithm 'rtl_rc'
[  312.758613] IPv6: ADDRCONF(NETDEV_UP): wlan0: link is not ready

Interface doesn't associate.

3.17.1
[  266.849622] cfg80211: Calling CRDA to update world regulatory domain
[  266.853632] cfg80211: World regulatory domain updated:
[  266.853661] cfg80211:  DFS Master region: unset
[  266.853667] cfg80211:   (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)
[  266.853680] cfg80211:   (2402000 KHz - 2472000 KHz @ 40000 KHz), (300 mBi, 2000 mBm), (N/A)
[  266.853686] cfg80211:   (2457000 KHz - 2482000 KHz @ 40000 KHz), (300 mBi, 2000 mBm), (N/A)
[  266.853691] cfg80211:   (2474000 KHz - 2494000 KHz @ 20000 KHz), (300 mBi, 2000 mBm), (N/A)
[  266.853697] cfg80211:   (5170000 KHz - 5250000 KHz @ 40000 KHz), (300 mBi, 2000 mBm), (N/A)
[  266.853704] cfg80211:   (5735000 KHz - 5835000 KHz @ 40000 KHz), (300 mBi, 2000 mBm), (N/A)
[  266.926560] rtl8192se: FW Power Save off (module option)
[  266.926661] rtl8192se: Driver for Realtek RTL8192SE/RTL8191SE
[  266.926661] Loading firmware rtlwifi/rtl8192sefw.bin
[  266.927054] ieee80211 phy0: Selected rate control algorithm 'rtl_rc'
[  267.437850] IPv6: ADDRCONF(NETDEV_UP): wlan0: link is not ready
[  268.389049] wlan0: authenticate with 20:4e:7f:86:09:40
[  268.410174] wlan0: send auth to 20:4e:7f:86:09:40 (try 1/3)
[  268.411974] wlan0: authenticated
[  268.416222] wlan0: associate with 20:4e:7f:86:09:40 (try 1/3)
[  268.420555] wlan0: RX AssocResp from 20:4e:7f:86:09:40 (capab=0x431 status=0 aid=1)
[  268.423403] wlan0: associated
[  268.423427] IPv6: ADDRCONF(NETDEV_CHANGE): wlan0: link becomes ready

^ permalink raw reply

* Re: localed stuck in recent 3.18 git in copy_net_ns?
From: Paul E. McKenney @ 2014-10-25  5:16 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: Yanko Kaneti, Josh Boyer, Eric W. Biederman, Cong Wang,
	Kevin Fenzi, netdev, Linux-Kernel@Vger. Kernel. Org, mroos, tj
In-Reply-To: <11813.1414211613@famine>

On Fri, Oct 24, 2014 at 09:33:33PM -0700, Jay Vosburgh wrote:
> Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:
> 
> >On Fri, Oct 24, 2014 at 05:20:48PM -0700, Jay Vosburgh wrote:
> >> Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:
> >> 
> >> >On Fri, Oct 24, 2014 at 03:59:31PM -0700, Paul E. McKenney wrote:
> >> [...]
> >> >> Hmmm...  It sure looks like we have some callbacks stuck here.  I clearly
> >> >> need to take a hard look at the sleep/wakeup code.
> >> >> 
> >> >> Thank you for running this!!!
> >> >
> >> >Could you please try the following patch?  If no joy, could you please
> >> >add rcu:rcu_nocb_wake to the list of ftrace events?
> >> 
> >> 	I tried the patch, it did not change the behavior.
> >> 
> >> 	I enabled the rcu:rcu_barrier and rcu:rcu_nocb_wake tracepoints
> >> and ran it again (with this patch and the first patch from earlier
> >> today); the trace output is a bit on the large side so I put it and the
> >> dmesg log at:
> >> 
> >> http://people.canonical.com/~jvosburgh/nocb-wake-dmesg.txt
> >> 
> >> http://people.canonical.com/~jvosburgh/nocb-wake-trace.txt
> >
> >Thank you again!
> >
> >Very strange part of the trace.  The only sign of CPU 2 and 3 are:
> >
> >    ovs-vswitchd-902   [000] ....   109.896840: rcu_barrier: rcu_sched Begin cpu -1 remaining 0 # 0
> >    ovs-vswitchd-902   [000] ....   109.896840: rcu_barrier: rcu_sched Check cpu -1 remaining 0 # 0
> >    ovs-vswitchd-902   [000] ....   109.896841: rcu_barrier: rcu_sched Inc1 cpu -1 remaining 0 # 1
> >    ovs-vswitchd-902   [000] ....   109.896841: rcu_barrier: rcu_sched OnlineNoCB cpu 0 remaining 1 # 1
> >    ovs-vswitchd-902   [000] d...   109.896841: rcu_nocb_wake: rcu_sched 0 WakeNot
> >    ovs-vswitchd-902   [000] ....   109.896841: rcu_barrier: rcu_sched OnlineNoCB cpu 1 remaining 2 # 1
> >    ovs-vswitchd-902   [000] d...   109.896841: rcu_nocb_wake: rcu_sched 1 WakeNot
> >    ovs-vswitchd-902   [000] ....   109.896842: rcu_barrier: rcu_sched OnlineNoCB cpu 2 remaining 3 # 1
> >    ovs-vswitchd-902   [000] d...   109.896842: rcu_nocb_wake: rcu_sched 2 WakeNotPoll
> >    ovs-vswitchd-902   [000] ....   109.896842: rcu_barrier: rcu_sched OnlineNoCB cpu 3 remaining 4 # 1
> >    ovs-vswitchd-902   [000] d...   109.896842: rcu_nocb_wake: rcu_sched 3 WakeNotPoll
> >    ovs-vswitchd-902   [000] ....   109.896843: rcu_barrier: rcu_sched Inc2 cpu -1 remaining 4 # 2
> >
> >The pair of WakeNotPoll trace entries says that at that point, RCU believed
> >that the CPU 2's and CPU 3's rcuo kthreads did not exist.  :-/
> 
> 	On the test system I'm using, CPUs 2 and 3 really do not exist;
> it is a 2 CPU system (Intel Core 2 Duo E8400). I mentioned this in an
> earlier message, but perhaps you missed it in the flurry.

Or forgot it.  Either way, thank you for reminding me.

> 	Looking at the dmesg, the early boot messages seem to be
> confused as to how many CPUs there are, e.g.,
> 
> [    0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=4, Nodes=1
> [    0.000000] Hierarchical RCU implementation.
> [    0.000000]  RCU debugfs-based tracing is enabled.
> [    0.000000]  RCU dyntick-idle grace-period acceleration is enabled.
> [    0.000000]  RCU restricting CPUs from NR_CPUS=256 to nr_cpu_ids=4.
> [    0.000000] RCU: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=4
> [    0.000000] NR_IRQS:16640 nr_irqs:456 0
> [    0.000000]  Offload RCU callbacks from all CPUs
> [    0.000000]  Offload RCU callbacks from CPUs: 0-3.
> 
> 	but later shows 2:
> 
> [    0.233703] x86: Booting SMP configuration:
> [    0.236003] .... node  #0, CPUs:      #1
> [    0.255528] x86: Booted up 1 node, 2 CPUs
> 
> 	In any event, the E8400 is a 2 core CPU with no hyperthreading.

Well, this might explain some of the difficulties.  If RCU decides to wait
on CPUs that don't exist, we will of course get a hang.  And rcu_barrier()
was definitely expecting four CPUs.

So what happens if you boot with maxcpus=2?  (Or build with
CONFIG_NR_CPUS=2.) I suspect that this might avoid the hang.  If so,
I might have some ideas for a real fix.

							Thanx, Paul

^ permalink raw reply

* [PATCH V2 net-next] Bluetooth: fix shadow warning in hci_disconnect()
From: Fabian Frederick @ 2014-10-25  8:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Fabian Frederick, Marcel Holtmann, Gustavo Padovan, Johan Hedberg,
	David S. Miller, linux-bluetooth, netdev

use clkoff_cp for hci_cp_read_clock_offset instead of cp
(already defined above).

Suggested-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
V2: use clkoff_cp instead of cpr (suggested by Marcel Holtmann)

 net/bluetooth/hci_conn.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index b9517bd..62f4ac6 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -141,10 +141,11 @@ int hci_disconnect(struct hci_conn *conn, __u8 reason)
 	 */
 	if (conn->type == ACL_LINK && conn->role == HCI_ROLE_MASTER) {
 		struct hci_dev *hdev = conn->hdev;
-		struct hci_cp_read_clock_offset cp;
+		struct hci_cp_read_clock_offset clkoff_cp;
 
-		cp.handle = cpu_to_le16(conn->handle);
-		hci_send_cmd(hdev, HCI_OP_READ_CLOCK_OFFSET, sizeof(cp), &cp);
+		clkoff_cp.handle = cpu_to_le16(conn->handle);
+		hci_send_cmd(hdev, HCI_OP_READ_CLOCK_OFFSET, sizeof(clkoff_cp),
+			     &clkoff_cp);
 	}
 
 	conn->state = BT_DISCONN;
-- 
1.9.3

^ permalink raw reply related

* [PATCH] fixup! net: pxa168_eth: Provide phy_interface mode on platform_data
From: Sebastian Hesselbarth @ 2014-10-25 10:08 UTC (permalink / raw)
  To: Sebastian Hesselbarth, David S. Miller
  Cc: Florian Fainelli, Eric Miao, netdev, Antoine Tenart, linux-kernel,
	Haojian Zhuang, linux-arm-kernel
In-Reply-To: <1414002412-13615-3-git-send-email-sebastian.hesselbarth@gmail.com>

Do not add phy include to the board file but platform_data include
instead.

Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
---
David,

I should have compile tested this patch earlier. I did now on
pxa168_defconfig right after I received an 0-day kbuild error
on this patch.

This is the corresponding fix, can you please squash it in?

Thanks and sorry for the noise,

Sebastian

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Antoine Tenart <antoine.tenart@free-electrons.com>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/arm/mach-mmp/gplugd.c | 1 -
 include/linux/pxa168_eth.h | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-mmp/gplugd.c b/arch/arm/mach-mmp/gplugd.c
index 3b5794cd0357..22762a1f9f72 100644
--- a/arch/arm/mach-mmp/gplugd.c
+++ b/arch/arm/mach-mmp/gplugd.c
@@ -12,7 +12,6 @@
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
 #include <linux/gpio-pxa.h>
-#include <linux/phy.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach-types.h>
diff --git a/include/linux/pxa168_eth.h b/include/linux/pxa168_eth.h
index 37c381120bc8..e1ab6e86cdb3 100644
--- a/include/linux/pxa168_eth.h
+++ b/include/linux/pxa168_eth.h
@@ -4,6 +4,8 @@
 #ifndef __LINUX_PXA168_ETH_H
 #define __LINUX_PXA168_ETH_H
 
+#include <linux/phy.h>
+
 struct pxa168_eth_platform_data {
 	int	port_number;
 	int	phy_addr;
-- 
1.9.1

^ permalink raw reply related

* Re: localed stuck in recent 3.18 git in copy_net_ns?
From: Yanko Kaneti @ 2014-10-25 12:09 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Josh Boyer, Eric W. Biederman, Cong Wang, Kevin Fenzi, netdev,
	Linux-Kernel@Vger. Kernel. Org, jay.vosburgh, mroos, tj
In-Reply-To: <20141024214927.GA4977@linux.vnet.ibm.com>

On Fri-10/24/14-2014 14:49, Paul E. McKenney wrote:
> On Sat, Oct 25, 2014 at 12:25:57AM +0300, Yanko Kaneti wrote:
> > On Fri-10/24/14-2014 11:32, Paul E. McKenney wrote:
> > > On Fri, Oct 24, 2014 at 08:35:26PM +0300, Yanko Kaneti wrote:
> > > > On Fri-10/24/14-2014 10:20, Paul E. McKenney wrote:
> 
> [ . . . ]
> 
> > > > > Well, if you are feeling aggressive, give the following patch a spin.
> > > > > I am doing sanity tests on it in the meantime.
> > > > 
> > > > Doesn't seem to make a difference here
> > > 
> > > OK, inspection isn't cutting it, so time for tracing.  Does the system
> > > respond to user input?  If so, please enable rcu:rcu_barrier ftrace before
> > > the problem occurs, then dump the trace buffer after the problem occurs.
> > 
> > Sorry for being unresposive here, but I know next to nothing about tracing
> > or most things about the kernel, so I have some cathing up to do.
> > 
> > In the meantime some layman observations while I tried to find what exactly
> > triggers the problem.
> > - Even in runlevel 1 I can reliably trigger the problem by starting libvirtd
> > - libvirtd seems to be very active in using all sorts of kernel facilities
> >   that are modules on fedora so it seems to cause many simultaneous kworker 
> >   calls to modprobe
> > - there are 8 kworker/u16 from 0 to 7
> > - one of these kworkers always deadlocks, while there appear to be two
> >   kworker/u16:6 - the seventh
> 
> Adding Tejun on CC in case this duplication of kworker/u16:6 is important.
> 
> >   6 vs 8 as in 6 rcuos where before they were always 8
> > 
> > Just observations from someone who still doesn't know what the u16
> > kworkers are..
> 
> Could you please run the following diagnostic patch?  This will help
> me see if I have managed to miswire the rcuo kthreads.  It should
> print some information at task-hang time.

So here the output with todays linux tip and the diagnostic patch.
This is the case with just starting libvird in runlevel 1.
Also a snapshot  of the kworker/u16 s

    6 ?        S      0:00  \_ [kworker/u16:0]
  553 ?        S      0:00  |   \_ [kworker/u16:0]
  554 ?        D      0:00  |       \_ /sbin/modprobe -q -- bridge
   78 ?        S      0:00  \_ [kworker/u16:1]
   92 ?        S      0:00  \_ [kworker/u16:2]
   93 ?        S      0:00  \_ [kworker/u16:3]
   94 ?        S      0:00  \_ [kworker/u16:4]
   95 ?        S      0:00  \_ [kworker/u16:5]
   96 ?        D      0:00  \_ [kworker/u16:6]
  105 ?        S      0:00  \_ [kworker/u16:7]
  108 ?        S      0:00  \_ [kworker/u16:8]


INFO: task kworker/u16:6:96 blocked for more than 120 seconds.
      Not tainted 3.18.0-rc1+ #16
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kworker/u16:6   D ffff8800ca9ecec0 11552    96      2 0x00000000
Workqueue: netns cleanup_net
 ffff880221fff9c8 0000000000000096 ffff8800ca9ecec0 00000000001d5f00
 ffff880221ffffd8 00000000001d5f00 ffff880223260000 ffff8800ca9ecec0
 ffffffff82c44010 7fffffffffffffff ffffffff81ee3798 ffffffff81ee3790
Call Trace:
 [<ffffffff81866219>] schedule+0x29/0x70
 [<ffffffff8186b43c>] schedule_timeout+0x26c/0x410
 [<ffffffff81028bea>] ? native_sched_clock+0x2a/0xa0
 [<ffffffff8110748c>] ? mark_held_locks+0x7c/0xb0
 [<ffffffff8186c4c0>] ? _raw_spin_unlock_irq+0x30/0x50
 [<ffffffff8110761d>] ? trace_hardirqs_on_caller+0x15d/0x200
 [<ffffffff81867c4c>] wait_for_completion+0x10c/0x150
 [<ffffffff810e4dc0>] ? wake_up_state+0x20/0x20
 [<ffffffff81133627>] _rcu_barrier+0x677/0xcd0
 [<ffffffff81133cd5>] rcu_barrier+0x15/0x20
 [<ffffffff81720edf>] netdev_run_todo+0x6f/0x310
 [<ffffffff81715aa5>] ? rollback_registered_many+0x265/0x2e0
 [<ffffffff8172df4e>] rtnl_unlock+0xe/0x10
 [<ffffffff81717906>] default_device_exit_batch+0x156/0x180
 [<ffffffff810fd280>] ? abort_exclusive_wait+0xb0/0xb0
 [<ffffffff8170f9b3>] ops_exit_list.isra.1+0x53/0x60
 [<ffffffff81710560>] cleanup_net+0x100/0x1f0
 [<ffffffff810cc988>] process_one_work+0x218/0x850
 [<ffffffff810cc8ef>] ? process_one_work+0x17f/0x850
 [<ffffffff810cd0a7>] ? worker_thread+0xe7/0x4a0
 [<ffffffff810cd02b>] worker_thread+0x6b/0x4a0
 [<ffffffff810ccfc0>] ? process_one_work+0x850/0x850
 [<ffffffff810d337b>] kthread+0x10b/0x130
 [<ffffffff81028c69>] ? sched_clock+0x9/0x10
 [<ffffffff810d3270>] ? kthread_create_on_node+0x250/0x250
 [<ffffffff8186d1fc>] ret_from_fork+0x7c/0xb0
 [<ffffffff810d3270>] ? kthread_create_on_node+0x250/0x250
4 locks held by kworker/u16:6/96:
 #0:  ("%s""netns"){.+.+.+}, at: [<ffffffff810cc8ef>]
 #process_one_work+0x17f/0x850
 #1:  (net_cleanup_work){+.+.+.}, at: [<ffffffff810cc8ef>]
 #process_one_work+0x17f/0x850
 #2:  (net_mutex){+.+.+.}, at: [<ffffffff817104ec>] cleanup_net+0x8c/0x1f0
 #3:  (rcu_sched_state.barrier_mutex){+.+...}, at: [<ffffffff81133025>]
 #_rcu_barrier+0x75/0xcd0
rcu_show_nocb_setup(): rcu_sched nocb state:
  0: ffff8802267ced40 l:ffff8802267ced40 n:ffff8802269ced40 .G.
  1: ffff8802269ced40 l:ffff8802267ced40 n:          (null) ...
  2: ffff880226bced40 l:ffff880226bced40 n:ffff880226dced40 .G.
  3: ffff880226dced40 l:ffff880226bced40 n:          (null) N..
  4: ffff880226fced40 l:ffff880226fced40 n:ffff8802271ced40 .G.
  5: ffff8802271ced40 l:ffff880226fced40 n:          (null) ...
  6: ffff8802273ced40 l:ffff8802273ced40 n:ffff8802275ced40 N..
  7: ffff8802275ced40 l:ffff8802273ced40 n:          (null) N..
rcu_show_nocb_setup(): rcu_bh nocb state:
  0: ffff8802267ceac0 l:ffff8802267ceac0 n:ffff8802269ceac0 ...
  1: ffff8802269ceac0 l:ffff8802267ceac0 n:          (null) ...
  2: ffff880226bceac0 l:ffff880226bceac0 n:ffff880226dceac0 ...
  3: ffff880226dceac0 l:ffff880226bceac0 n:          (null) ...
  4: ffff880226fceac0 l:ffff880226fceac0 n:ffff8802271ceac0 ...
  5: ffff8802271ceac0 l:ffff880226fceac0 n:          (null) ...
  6: ffff8802273ceac0 l:ffff8802273ceac0 n:ffff8802275ceac0 ...
  7: ffff8802275ceac0 l:ffff8802273ceac0 n:          (null) ...
INFO: task modprobe:554 blocked for more than 120 seconds.
      Not tainted 3.18.0-rc1+ #16
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
modprobe        D ffff8800c85dcec0 12456   554    553 0x00000000
 ffff8802178afbf8 0000000000000096 ffff8800c85dcec0 00000000001d5f00
 ffff8802178affd8 00000000001d5f00 ffffffff81e1b580 ffff8800c85dcec0
 ffff8800c85dcec0 ffffffff81f90c08 0000000000000246 ffff8800c85dcec0
Call Trace:
 [<ffffffff818667c1>] schedule_preempt_disabled+0x31/0x80
 [<ffffffff81868013>] mutex_lock_nested+0x183/0x440
 [<ffffffff8171037f>] ? register_pernet_subsys+0x1f/0x50
 [<ffffffff8171037f>] ? register_pernet_subsys+0x1f/0x50
 [<ffffffffa0619000>] ? 0xffffffffa0619000
 [<ffffffff8171037f>] register_pernet_subsys+0x1f/0x50
 [<ffffffffa0619048>] br_init+0x48/0xd3 [bridge]
 [<ffffffff81002148>] do_one_initcall+0xd8/0x210
 [<ffffffff8115bc22>] load_module+0x20c2/0x2870
 [<ffffffff81156c00>] ? store_uevent+0x70/0x70
 [<ffffffff81281327>] ? kernel_read+0x57/0x90
 [<ffffffff8115c5b6>] SyS_finit_module+0xa6/0xe0
 [<ffffffff8186d2d5>] ? sysret_check+0x22/0x5d
 [<ffffffff8186d2a9>] system_call_fastpath+0x12/0x17
1 lock held by modprobe/554:
 #0:  (net_mutex){+.+.+.}, at: [<ffffffff8171037f>]
 #register_pernet_subsys+0x1f/0x50
rcu_show_nocb_setup(): rcu_sched nocb state:
  0: ffff8802267ced40 l:ffff8802267ced40 n:ffff8802269ced40 .G.
  1: ffff8802269ced40 l:ffff8802267ced40 n:          (null) ...
  2: ffff880226bced40 l:ffff880226bced40 n:ffff880226dced40 .G.
  3: ffff880226dced40 l:ffff880226bced40 n:          (null) N..
  4: ffff880226fced40 l:ffff880226fced40 n:ffff8802271ced40 .G.
  5: ffff8802271ced40 l:ffff880226fced40 n:          (null) ...
  6: ffff8802273ced40 l:ffff8802273ced40 n:ffff8802275ced40 N..
  7: ffff8802275ced40 l:ffff8802273ced40 n:          (null) N..
rcu_show_nocb_setup(): rcu_bh nocb state:
  0: ffff8802267ceac0 l:ffff8802267ceac0 n:ffff8802269ceac0 ...
  1: ffff8802269ceac0 l:ffff8802267ceac0 n:          (null) ...
  2: ffff880226bceac0 l:ffff880226bceac0 n:ffff880226dceac0 ...
  3: ffff880226dceac0 l:ffff880226bceac0 n:          (null) ...
  4: ffff880226fceac0 l:ffff880226fceac0 n:ffff8802271ceac0 ...
  5: ffff8802271ceac0 l:ffff880226fceac0 n:          (null) ...
  6: ffff8802273ceac0 l:ffff8802273ceac0 n:ffff8802275ceac0 ...
  7: ffff8802275ceac0 l:ffff8802273ceac0 n:          (null) ...


 
> 							Thanx, Paul
> 
> ------------------------------------------------------------------------
> 
> rcu: Dump no-CBs CPU state at task-hung time
> 
> Strictly diagnostic commit for rcu_barrier() hang.  Not for inclusion.
> 
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
> index 0e5366200154..34048140577b 100644
> --- a/include/linux/rcutiny.h
> +++ b/include/linux/rcutiny.h
> @@ -157,4 +157,8 @@ static inline bool rcu_is_watching(void)
>  
>  #endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
>  
> +static inline void rcu_show_nocb_setup(void)
> +{
> +}
> +
>  #endif /* __LINUX_RCUTINY_H */
> diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
> index 52953790dcca..0b813bdb971b 100644
> --- a/include/linux/rcutree.h
> +++ b/include/linux/rcutree.h
> @@ -97,4 +97,6 @@ extern int rcu_scheduler_active __read_mostly;
>  
>  bool rcu_is_watching(void);
>  
> +void rcu_show_nocb_setup(void);
> +
>  #endif /* __LINUX_RCUTREE_H */
> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index 06db12434d72..e6e4d0f6b063 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -118,6 +118,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
>  		" disables this message.\n");
>  	sched_show_task(t);
>  	debug_show_held_locks(t);
> +	rcu_show_nocb_setup();
>  
>  	touch_nmi_watchdog();
>  
> diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
> index 240fa9094f83..6b373e79ce0e 100644
> --- a/kernel/rcu/rcutorture.c
> +++ b/kernel/rcu/rcutorture.c
> @@ -1513,6 +1513,7 @@ rcu_torture_cleanup(void)
>  {
>  	int i;
>  
> +	rcu_show_nocb_setup();
>  	rcutorture_record_test_transition();
>  	if (torture_cleanup_begin()) {
>  		if (cur_ops->cb_barrier != NULL)
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index 927c17b081c7..285b3f6fb229 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -2699,6 +2699,31 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
>  
>  #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
>  
> +void rcu_show_nocb_setup(void)
> +{
> +#ifdef CONFIG_RCU_NOCB_CPU
> +	int cpu;
> +	struct rcu_data *rdp;
> +	struct rcu_state *rsp;
> +
> +	for_each_rcu_flavor(rsp) {
> +		pr_alert("rcu_show_nocb_setup(): %s nocb state:\n", rsp->name);
> +		for_each_possible_cpu(cpu) {
> +			if (!rcu_is_nocb_cpu(cpu))
> +				continue;
> +			rdp = per_cpu_ptr(rsp->rda, cpu);
> +			pr_alert("%3d: %p l:%p n:%p %c%c%c\n",
> +				 cpu,
> +				 rdp, rdp->nocb_leader, rdp->nocb_next_follower,
> +				 ".N"[!!rdp->nocb_head],
> +				 ".G"[!!rdp->nocb_gp_head],
> +				 ".F"[!!rdp->nocb_follower_head]);
> +		}
> +	}
> +#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
> +}
> +EXPORT_SYMBOL_GPL(rcu_show_nocb_setup);
> +
>  /*
>   * An adaptive-ticks CPU can potentially execute in kernel mode for an
>   * arbitrarily long period of time with the scheduling-clock tick turned
> 

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox