Netdev List
 help / color / mirror / Atom feed
* [PATCH 11/14] net: core: add new/replace rate estimator lock parameter
From: Vlad Buslov @ 2018-05-14 14:27 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jiri, pablo, kadlec, fw, ast, daniel,
	edumazet, vladbu, keescook, linux-kernel, netfilter-devel,
	coreteam, kliteyn
In-Reply-To: <1526308035-12484-1-git-send-email-vladbu@mellanox.com>

Extend rate estimator new and replace APIs with additional spinlock
parameter used by lockless actions to protect rate_est pointer from
concurrent modification.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
 include/net/gen_stats.h    |  2 ++
 net/core/gen_estimator.c   | 58 +++++++++++++++++++++++++++++++++++-----------
 net/netfilter/xt_RATEEST.c |  2 +-
 net/sched/act_api.c        |  2 +-
 net/sched/act_police.c     |  2 +-
 net/sched/sch_api.c        |  2 ++
 net/sched/sch_cbq.c        |  4 ++--
 net/sched/sch_drr.c        |  4 ++--
 net/sched/sch_hfsc.c       |  4 ++--
 net/sched/sch_htb.c        |  4 ++--
 net/sched/sch_qfq.c        |  4 ++--
 11 files changed, 61 insertions(+), 27 deletions(-)

diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 0304ba2..d1ef63d 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -59,12 +59,14 @@ int gnet_stats_finish_copy(struct gnet_dump *d);
 int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 		      struct net_rate_estimator __rcu **rate_est,
+		      spinlock_t *rate_est_lock,
 		      spinlock_t *stats_lock,
 		      seqcount_t *running, struct nlattr *opt);
 void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
 int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
 			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 			  struct net_rate_estimator __rcu **ptr,
+			  spinlock_t *rate_est_lock,
 			  spinlock_t *stats_lock,
 			  seqcount_t *running, struct nlattr *opt);
 bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 98fd127..3512720 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -107,11 +107,43 @@ static void est_timer(struct timer_list *t)
 	mod_timer(&est->timer, est->next_jiffies);
 }
 
+static void __replace_estimator(struct net_rate_estimator __rcu **rate_est,
+				struct net_rate_estimator *new)
+{
+	struct net_rate_estimator *old = rcu_dereference_protected(*rate_est,
+								   1);
+
+	if (old) {
+		del_timer_sync(&old->timer);
+		new->avbps = old->avbps;
+		new->avpps = old->avpps;
+	}
+
+	new->next_jiffies = jiffies + ((HZ/4) << new->intvl_log);
+	timer_setup(&new->timer, est_timer, 0);
+	mod_timer(&new->timer, new->next_jiffies);
+
+	rcu_assign_pointer(*rate_est, new);
+
+	if (old)
+		kfree_rcu(old, rcu);
+}
+
+static void replace_estimator(struct net_rate_estimator __rcu **rate_est,
+			      struct net_rate_estimator *new,
+			      spinlock_t *rate_est_lock)
+{
+	spin_lock(rate_est_lock);
+	__replace_estimator(rate_est, new);
+	spin_unlock(rate_est_lock);
+}
+
 /**
  * gen_new_estimator - create a new rate estimator
  * @bstats: basic statistics
  * @cpu_bstats: bstats per cpu
  * @rate_est: rate estimator statistics
+ * @rate_est_lock: rate_est lock (might be NULL)
  * @stats_lock: statistics lock
  * @running: qdisc running seqcount
  * @opt: rate estimator configuration TLV
@@ -128,12 +160,13 @@ static void est_timer(struct timer_list *t)
 int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 		      struct net_rate_estimator __rcu **rate_est,
+		      spinlock_t *rate_est_lock,
 		      spinlock_t *stats_lock,
 		      seqcount_t *running,
 		      struct nlattr *opt)
 {
 	struct gnet_estimator *parm = nla_data(opt);
-	struct net_rate_estimator *old, *est;
+	struct net_rate_estimator *est;
 	struct gnet_stats_basic_packed b;
 	int intvl_log;
 
@@ -167,20 +200,15 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 		local_bh_enable();
 	est->last_bytes = b.bytes;
 	est->last_packets = b.packets;
-	old = rcu_dereference_protected(*rate_est, 1);
-	if (old) {
-		del_timer_sync(&old->timer);
-		est->avbps = old->avbps;
-		est->avpps = old->avpps;
-	}
 
-	est->next_jiffies = jiffies + ((HZ/4) << intvl_log);
-	timer_setup(&est->timer, est_timer, 0);
-	mod_timer(&est->timer, est->next_jiffies);
+	if (rate_est_lock)
+		replace_estimator(rate_est, est, rate_est_lock);
+	else
+		/* If no spinlock argument provided,
+		 * then assume that caller is already synchronized.
+		 */
+		__replace_estimator(rate_est, est);
 
-	rcu_assign_pointer(*rate_est, est);
-	if (old)
-		kfree_rcu(old, rcu);
 	return 0;
 }
 EXPORT_SYMBOL(gen_new_estimator);
@@ -209,6 +237,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
  * @bstats: basic statistics
  * @cpu_bstats: bstats per cpu
  * @rate_est: rate estimator statistics
+ * @rate_est_lock: rate_est lock (might be NULL)
  * @stats_lock: statistics lock
  * @running: qdisc running seqcount (might be NULL)
  * @opt: rate estimator configuration TLV
@@ -221,10 +250,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
 int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
 			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 			  struct net_rate_estimator __rcu **rate_est,
+			  spinlock_t *rate_est_lock,
 			  spinlock_t *stats_lock,
 			  seqcount_t *running, struct nlattr *opt)
 {
-	return gen_new_estimator(bstats, cpu_bstats, rate_est,
+	return gen_new_estimator(bstats, cpu_bstats, rate_est, rate_est_lock,
 				 stats_lock, running, opt);
 }
 EXPORT_SYMBOL(gen_replace_estimator);
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index dec843c..8e79bd5 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -154,7 +154,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 	cfg.est.interval	= info->interval;
 	cfg.est.ewma_log	= info->ewma_log;
 
-	ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est,
+	ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est, NULL,
 				&est->lock, NULL, &cfg.opt);
 	if (ret < 0)
 		goto err2;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a5193dc..1dc092e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -409,7 +409,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 	p->tcfa_tm.firstuse = 0;
 	if (est) {
 		err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
-					&p->tcfa_rate_est,
+					&p->tcfa_rate_est, NULL,
 					&p->tcfa_lock, NULL, est);
 		if (err)
 			goto err4;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 86d9417..c480d68 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -133,7 +133,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 
 	if (est) {
 		err = gen_replace_estimator(&police->tcf_bstats, NULL,
-					    &police->tcf_rate_est,
+					    &police->tcf_rate_est, NULL,
 					    &police->tcf_lock,
 					    NULL, est);
 		if (err)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 106dae7e..de6a297 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1178,6 +1178,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
 					sch->cpu_bstats,
 					&sch->rate_est,
 					NULL,
+					NULL,
 					running,
 					tca[TCA_RATE]);
 		if (err) {
@@ -1253,6 +1254,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
 				      sch->cpu_bstats,
 				      &sch->rate_est,
 				      NULL,
+				      NULL,
 				      qdisc_root_sleeping_running(sch),
 				      tca[TCA_RATE]);
 	}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index f42025d..2a7ff53 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1503,7 +1503,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, NULL,
-						    &cl->rate_est,
+						    &cl->rate_est, NULL,
 						    NULL,
 						    qdisc_root_sleeping_running(sch),
 						    tca[TCA_RATE]);
@@ -1605,7 +1605,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 	if (tca[TCA_RATE]) {
 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
-					NULL,
+					NULL, NULL,
 					qdisc_root_sleeping_running(sch),
 					tca[TCA_RATE]);
 		if (err) {
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index e0b0cf8..0896e23 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -95,7 +95,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (cl != NULL) {
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, NULL,
-						    &cl->rate_est,
+						    &cl->rate_est, NULL,
 						    NULL,
 						    qdisc_root_sleeping_running(sch),
 						    tca[TCA_RATE]);
@@ -129,7 +129,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (tca[TCA_RATE]) {
 		err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
-					    NULL,
+					    NULL, NULL,
 					    qdisc_root_sleeping_running(sch),
 					    tca[TCA_RATE]);
 		if (err) {
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3ae9877..f341324 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -972,7 +972,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, NULL,
-						    &cl->rate_est,
+						    &cl->rate_est, NULL,
 						    NULL,
 						    qdisc_root_sleeping_running(sch),
 						    tca[TCA_RATE]);
@@ -1042,7 +1042,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (tca[TCA_RATE]) {
 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
-					NULL,
+					NULL, NULL,
 					qdisc_root_sleeping_running(sch),
 					tca[TCA_RATE]);
 		if (err) {
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 2a4ab7c..acc0355 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1407,7 +1407,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		if (htb_rate_est || tca[TCA_RATE]) {
 			err = gen_new_estimator(&cl->bstats, NULL,
 						&cl->rate_est,
-						NULL,
+						NULL, NULL,
 						qdisc_root_sleeping_running(sch),
 						tca[TCA_RATE] ? : &est.nla);
 			if (err) {
@@ -1473,7 +1473,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	} else {
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, NULL,
-						    &cl->rate_est,
+						    &cl->rate_est, NULL,
 						    NULL,
 						    qdisc_root_sleeping_running(sch),
 						    tca[TCA_RATE]);
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index bb1a9c1..8026c1e 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -461,7 +461,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (cl != NULL) { /* modify existing class */
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, NULL,
-						    &cl->rate_est,
+						    &cl->rate_est, NULL,
 						    NULL,
 						    qdisc_root_sleeping_running(sch),
 						    tca[TCA_RATE]);
@@ -488,7 +488,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (tca[TCA_RATE]) {
 		err = gen_new_estimator(&cl->bstats, NULL,
 					&cl->rate_est,
-					NULL,
+					NULL, NULL,
 					qdisc_root_sleeping_running(sch),
 					tca[TCA_RATE]);
 		if (err)
-- 
2.7.5

^ permalink raw reply related

* [PATCH 12/14] net: sched: retry action check-insert on concurrent modification
From: Vlad Buslov @ 2018-05-14 14:27 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jiri, pablo, kadlec, fw, ast, daniel,
	edumazet, vladbu, keescook, linux-kernel, netfilter-devel,
	coreteam, kliteyn
In-Reply-To: <1526308035-12484-1-git-send-email-vladbu@mellanox.com>

Retry check-insert sequence in action init functions if action with same
index was inserted concurrently.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
 net/sched/act_bpf.c        | 8 +++++++-
 net/sched/act_connmark.c   | 8 +++++++-
 net/sched/act_csum.c       | 8 +++++++-
 net/sched/act_gact.c       | 8 +++++++-
 net/sched/act_ife.c        | 8 +++++++-
 net/sched/act_ipt.c        | 8 +++++++-
 net/sched/act_mirred.c     | 8 +++++++-
 net/sched/act_nat.c        | 8 +++++++-
 net/sched/act_pedit.c      | 8 +++++++-
 net/sched/act_police.c     | 9 ++++++++-
 net/sched/act_sample.c     | 8 +++++++-
 net/sched/act_simple.c     | 9 ++++++++-
 net/sched/act_skbedit.c    | 8 +++++++-
 net/sched/act_skbmod.c     | 8 +++++++-
 net/sched/act_tunnel_key.c | 9 ++++++++-
 net/sched/act_vlan.c       | 9 ++++++++-
 16 files changed, 116 insertions(+), 16 deletions(-)

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 5554bf7..7e20fdc 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -299,10 +299,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
 
+replay:
 	if (!tcf_idr_check(tn, parm->index, act, bind)) {
 		ret = tcf_idr_create(tn, parm->index, est, act,
 				     &act_bpf_ops, bind, true);
-		if (ret < 0)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 
 		res = ACT_P_CREATED;
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2a4c3da..6ff45af 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -118,10 +118,16 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_CONNMARK_PARMS]);
 
+replay:
 	if (!tcf_idr_check(tn, parm->index, a, bind)) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_connmark_ops, bind, false);
-		if (ret)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 
 		ci = to_connmark(*a);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 74f5dce..49d06c3 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -67,10 +67,16 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_CSUM_PARMS]);
 
+replay:
 	if (!tcf_idr_check(tn, parm->index, a, bind)) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_csum_ops, bind, true);
-		if (ret)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 9d7d000..2edefeb 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -91,10 +91,16 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 	}
 #endif
 
+replay:
 	if (!tcf_idr_check(tn, parm->index, a, bind)) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_gact_ops, bind, true);
-		if (ret)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index b57c5ba..665790f 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -483,6 +483,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	if (!p)
 		return -ENOMEM;
 
+replay:
 	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind) {
 		kfree(p);
@@ -492,7 +493,12 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
 				     bind, true);
-		if (ret) {
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC) {
+			goto replay;
+		} else if (ret) {
 			kfree(p);
 			return ret;
 		}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 7c26ce1..946193e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -119,6 +119,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	if (tb[TCA_IPT_INDEX] != NULL)
 		index = nla_get_u32(tb[TCA_IPT_INDEX]);
 
+replay:
 	exists = tcf_idr_check(tn, index, a, bind);
 	if (exists && bind)
 		return 0;
@@ -139,7 +140,12 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	if (!exists) {
 		ret = tcf_idr_create(tn, index, est, a, ops, bind,
 				     false);
-		if (ret)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index b9b7b96..4c8bd26 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -94,6 +94,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	}
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
+replay:
 	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
@@ -129,7 +130,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		}
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_mirred_ops, bind, true);
-		if (ret)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else if (!ovr) {
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 77badb2..a1a1885 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -57,10 +57,16 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_NAT_PARMS]);
 
+replay:
 	if (!tcf_idr_check(tn, parm->index, a, bind)) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_nat_ops, bind, false);
-		if (ret)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 8c39adc..e5e93e2 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -167,12 +167,18 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	if (IS_ERR(keys_ex))
 		return PTR_ERR(keys_ex);
 
+replay:
 	if (!tcf_idr_check(tn, parm->index, a, bind)) {
 		if (!parm->nkeys)
 			return -EINVAL;
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_pedit_ops, bind, false);
-		if (ret)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 		p = to_pedit(*a);
 		keys = kmalloc(ksize, GFP_KERNEL);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index c480d68..ced6b1f 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -101,6 +101,8 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_POLICE_TBF]);
+
+replay:
 	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
@@ -108,7 +110,12 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, NULL, a,
 				     &act_police_ops, bind, false);
-		if (ret)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else if (!ovr) {
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index d2b0394..7411805 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -59,6 +59,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_SAMPLE_PARMS]);
 
+replay:
 	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
@@ -66,7 +67,12 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_sample_ops, bind, false);
-		if (ret)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else if (!ovr) {
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 26eb153..a4b2aca 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -101,6 +101,8 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_DEF_PARMS]);
+
+replay:
 	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
@@ -116,7 +118,12 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_simp_ops, bind, false);
-		if (ret)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 
 		d = to_defact(*a);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index bb416b7..7750b77 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -117,6 +117,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
 
+replay:
 	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
@@ -129,7 +130,12 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_skbedit_ops, bind, false);
-		if (ret)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 
 		d = to_skbedit(*a);
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index e1c2e1c..bbc5092 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -128,6 +128,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	if (parm->flags & SKBMOD_F_SWAPMAC)
 		lflags = SKBMOD_F_SWAPMAC;
 
+replay:
 	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
@@ -138,7 +139,12 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_skbmod_ops, bind, true);
-		if (ret)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 
 		ret = ACT_P_CREATED;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index d88c151..4367962 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -99,6 +99,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
+
+replay:
 	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
@@ -161,7 +163,12 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_tunnel_key_ops, bind, true);
-		if (ret)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 
 		ret = ACT_P_CREATED;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index f747fb6..adc4e6e 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -134,6 +134,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	if (!tb[TCA_VLAN_PARMS])
 		return -EINVAL;
 	parm = nla_data(tb[TCA_VLAN_PARMS]);
+
+replay:
 	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
@@ -181,7 +183,12 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_vlan_ops, bind, true);
-		if (ret)
+		/* Action with specified index was created concurrently.
+		 * Check again.
+		 */
+		if (parm->index && ret == -ENOSPC)
+			goto replay;
+		else if (ret)
 			return ret;
 
 		ret = ACT_P_CREATED;
-- 
2.7.5

^ permalink raw reply related

* [PATCH 13/14] net: sched: use unique idr insert function in unlocked actions
From: Vlad Buslov @ 2018-05-14 14:27 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jiri, pablo, kadlec, fw, ast, daniel,
	edumazet, vladbu, keescook, linux-kernel, netfilter-devel,
	coreteam, kliteyn
In-Reply-To: <1526308035-12484-1-git-send-email-vladbu@mellanox.com>

Substitute calls to action insert function with calls to action insert
unique function that warns if insertion overwrites index in idr.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
 net/sched/act_bpf.c        | 2 +-
 net/sched/act_connmark.c   | 2 +-
 net/sched/act_csum.c       | 2 +-
 net/sched/act_gact.c       | 2 +-
 net/sched/act_ife.c        | 2 +-
 net/sched/act_ipt.c        | 2 +-
 net/sched/act_mirred.c     | 2 +-
 net/sched/act_nat.c        | 2 +-
 net/sched/act_pedit.c      | 2 +-
 net/sched/act_police.c     | 2 +-
 net/sched/act_sample.c     | 2 +-
 net/sched/act_simple.c     | 2 +-
 net/sched/act_skbedit.c    | 2 +-
 net/sched/act_skbmod.c     | 2 +-
 net/sched/act_tunnel_key.c | 2 +-
 net/sched/act_vlan.c       | 2 +-
 16 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 7e20fdc..0bf4ecf 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -354,7 +354,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 	rcu_assign_pointer(prog->filter, cfg.filter);
 
 	if (res == ACT_P_CREATED) {
-		tcf_idr_insert(tn, *act);
+		tcf_idr_insert_unique(tn, *act);
 	} else {
 		/* make sure the program being replaced is no longer executing */
 		synchronize_rcu();
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 6ff45af..a4e9f21 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -135,7 +135,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 		ci->net = net;
 		ci->zone = parm->zone;
 
-		tcf_idr_insert(tn, *a);
+		tcf_idr_insert_unique(tn, *a);
 		ret = ACT_P_CREATED;
 	} else {
 		ci = to_connmark(*a);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 49d06c3..d9836d2 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -105,7 +105,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 		kfree_rcu(params_old, rcu);
 
 	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
+		tcf_idr_insert_unique(tn, *a);
 
 	return ret;
 }
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 2edefeb..79266590 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -128,7 +128,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 	}
 #endif
 	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
+		tcf_idr_insert_unique(tn, *a);
 	return ret;
 }
 
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 665790f..060144e 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -590,7 +590,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 		kfree_rcu(p_old, rcu);
 
 	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
+		tcf_idr_insert_unique(tn, *a);
 
 	return ret;
 }
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 946193e..ff8cf9d 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -188,7 +188,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	ipt->tcfi_hook  = hook;
 	spin_unlock_bh(&ipt->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
+		tcf_idr_insert_unique(tn, *a);
 	return ret;
 
 err3:
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 4c8bd26..7ab8d0c 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -157,7 +157,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 
 	if (ret == ACT_P_CREATED) {
 		list_add(&m->tcfm_list, &mirred_list);
-		tcf_idr_insert(tn, *a);
+		tcf_idr_insert_unique(tn, *a);
 	}
 
 	return ret;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index a1a1885..a15c4a9 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -89,7 +89,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 	spin_unlock_bh(&p->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
+		tcf_idr_insert_unique(tn, *a);
 
 	return ret;
 }
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index e5e93e2..49034d4 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -220,7 +220,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 
 	spin_unlock_bh(&p->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
+		tcf_idr_insert_unique(tn, *a);
 	return ret;
 }
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index ced6b1f..eb4e458 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -194,7 +194,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 		return ret;
 
 	police->tcfp_t_c = ktime_get_ns();
-	tcf_idr_insert(tn, *a);
+	tcf_idr_insert_unique(tn, *a);
 
 	return ret;
 
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 7411805..5a650d4 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -97,7 +97,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
+		tcf_idr_insert_unique(tn, *a);
 	return ret;
 }
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index a4b2aca..13809e5 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -146,7 +146,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
+		tcf_idr_insert_unique(tn, *a);
 	return ret;
 }
 
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 7750b77..bf87679 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -169,7 +169,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	spin_unlock_bh(&d->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
+		tcf_idr_insert_unique(tn, *a);
 	return ret;
 }
 
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index bbc5092..dc36e6f 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -185,7 +185,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 		kfree_rcu(p_old, rcu);
 
 	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
+		tcf_idr_insert_unique(tn, *a);
 	return ret;
 }
 
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 4367962..16926c7 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -198,7 +198,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		kfree_rcu(params_old, rcu);
 
 	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
+		tcf_idr_insert_unique(tn, *a);
 
 	return ret;
 
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index adc4e6e..02fbf76 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -221,7 +221,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 		kfree_rcu(p_old, rcu);
 
 	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
+		tcf_idr_insert_unique(tn, *a);
 	return ret;
 }
 
-- 
2.7.5

^ permalink raw reply related

* [PATCH 14/14] net: sched: implement delete for all actions
From: Vlad Buslov @ 2018-05-14 14:27 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jiri, pablo, kadlec, fw, ast, daniel,
	edumazet, vladbu, keescook, linux-kernel, netfilter-devel,
	coreteam, kliteyn
In-Reply-To: <1526308035-12484-1-git-send-email-vladbu@mellanox.com>

Implement delete function that is required to delete actions without
holding rtnl lock. Use action API function that atomically deletes action
only if it is still in action idr. This implementation prevents concurrent
threads from deleting same action twice.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
 net/sched/act_bpf.c        |  8 ++++++++
 net/sched/act_connmark.c   |  8 ++++++++
 net/sched/act_csum.c       |  8 ++++++++
 net/sched/act_gact.c       |  8 ++++++++
 net/sched/act_ife.c        |  8 ++++++++
 net/sched/act_ipt.c        | 16 ++++++++++++++++
 net/sched/act_mirred.c     |  8 ++++++++
 net/sched/act_nat.c        |  8 ++++++++
 net/sched/act_pedit.c      |  8 ++++++++
 net/sched/act_police.c     |  8 ++++++++
 net/sched/act_sample.c     |  8 ++++++++
 net/sched/act_simple.c     |  8 ++++++++
 net/sched/act_skbedit.c    |  8 ++++++++
 net/sched/act_skbmod.c     |  8 ++++++++
 net/sched/act_tunnel_key.c |  8 ++++++++
 net/sched/act_vlan.c       |  8 ++++++++
 16 files changed, 136 insertions(+)

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 0bf4ecf..36f7f66 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -394,6 +394,13 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_bpf_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, bpf_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_bpf_ops __read_mostly = {
 	.kind		=	"bpf",
 	.type		=	TCA_ACT_BPF,
@@ -404,6 +411,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
 	.init		=	tcf_bpf_init,
 	.walk		=	tcf_bpf_walker,
 	.lookup		=	tcf_bpf_search,
+	.delete		=	tcf_bpf_delete,
 	.size		=	sizeof(struct tcf_bpf),
 };
 
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index a4e9f21..346ede7 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -200,6 +200,13 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_connmark_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, connmark_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_connmark_ops = {
 	.kind		=	"connmark",
 	.type		=	TCA_ACT_CONNMARK,
@@ -209,6 +216,7 @@ static struct tc_action_ops act_connmark_ops = {
 	.init		=	tcf_connmark_init,
 	.walk		=	tcf_connmark_walker,
 	.lookup		=	tcf_connmark_search,
+	.delete		=	tcf_connmark_delete,
 	.size		=	sizeof(struct tcf_connmark_info),
 };
 
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index d9836d2..b0a244e 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -655,6 +655,13 @@ static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_csum_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, csum_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_csum_ops = {
 	.kind		= "csum",
 	.type		= TCA_ACT_CSUM,
@@ -665,6 +672,7 @@ static struct tc_action_ops act_csum_ops = {
 	.cleanup	= tcf_csum_cleanup,
 	.walk		= tcf_csum_walker,
 	.lookup		= tcf_csum_search,
+	.delete		= tcf_csum_delete,
 	.size		= sizeof(struct tcf_csum),
 };
 
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 79266590..f6ff668 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -238,6 +238,13 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
 	return sz;
 }
 
+static int tcf_gact_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, gact_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_gact_ops = {
 	.kind		=	"gact",
 	.type		=	TCA_ACT_GACT,
@@ -249,6 +256,7 @@ static struct tc_action_ops act_gact_ops = {
 	.walk		=	tcf_gact_walker,
 	.lookup		=	tcf_gact_search,
 	.get_fill_size	=	tcf_gact_get_fill_size,
+	.delete		=	tcf_gact_delete,
 	.size		=	sizeof(struct tcf_gact),
 };
 
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 060144e..d627060 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -845,6 +845,13 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_ife_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, ife_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_ife_ops = {
 	.kind = "ife",
 	.type = TCA_ACT_IFE,
@@ -855,6 +862,7 @@ static struct tc_action_ops act_ife_ops = {
 	.init = tcf_ife_init,
 	.walk = tcf_ife_walker,
 	.lookup = tcf_ife_search,
+	.delete = tcf_ife_delete,
 	.size =	sizeof(struct tcf_ife_info),
 };
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index ff8cf9d..d7bad79 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -331,6 +331,13 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_ipt_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, ipt_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_ipt_ops = {
 	.kind		=	"ipt",
 	.type		=	TCA_ACT_IPT,
@@ -341,6 +348,7 @@ static struct tc_action_ops act_ipt_ops = {
 	.init		=	tcf_ipt_init,
 	.walk		=	tcf_ipt_walker,
 	.lookup		=	tcf_ipt_search,
+	.delete		=	tcf_ipt_delete,
 	.size		=	sizeof(struct tcf_ipt),
 };
 
@@ -381,6 +389,13 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_xt_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, xt_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_xt_ops = {
 	.kind		=	"xt",
 	.type		=	TCA_ACT_XT,
@@ -391,6 +406,7 @@ static struct tc_action_ops act_xt_ops = {
 	.init		=	tcf_xt_init,
 	.walk		=	tcf_xt_walker,
 	.lookup		=	tcf_xt_search,
+	.delete		=	tcf_xt_delete,
 	.size		=	sizeof(struct tcf_ipt),
 };
 
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 7ab8d0c..62ac34b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -327,6 +327,13 @@ static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
 	return rtnl_dereference(m->tcfm_dev);
 }
 
+static int tcf_mirred_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, mirred_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_mirred_ops = {
 	.kind		=	"mirred",
 	.type		=	TCA_ACT_MIRRED,
@@ -340,6 +347,7 @@ static struct tc_action_ops act_mirred_ops = {
 	.lookup		=	tcf_mirred_search,
 	.size		=	sizeof(struct tcf_mirred),
 	.get_dev	=	tcf_mirred_get_dev,
+	.delete		=	tcf_mirred_delete,
 };
 
 static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index a15c4a9..acf4064 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -301,6 +301,13 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_nat_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, nat_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_nat_ops = {
 	.kind		=	"nat",
 	.type		=	TCA_ACT_NAT,
@@ -310,6 +317,7 @@ static struct tc_action_ops act_nat_ops = {
 	.init		=	tcf_nat_init,
 	.walk		=	tcf_nat_walker,
 	.lookup		=	tcf_nat_search,
+	.delete		=	tcf_nat_delete,
 	.size		=	sizeof(struct tcf_nat),
 };
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 49034d4..4eb605d 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -443,6 +443,13 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_pedit_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, pedit_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_pedit_ops = {
 	.kind		=	"pedit",
 	.type		=	TCA_ACT_PEDIT,
@@ -453,6 +460,7 @@ static struct tc_action_ops act_pedit_ops = {
 	.init		=	tcf_pedit_init,
 	.walk		=	tcf_pedit_walker,
 	.lookup		=	tcf_pedit_search,
+	.delete		=	tcf_pedit_delete,
 	.size		=	sizeof(struct tcf_pedit),
 };
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index eb4e458..b9827bb 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -319,6 +319,13 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_police_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, police_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 MODULE_AUTHOR("Alexey Kuznetsov");
 MODULE_DESCRIPTION("Policing actions");
 MODULE_LICENSE("GPL");
@@ -332,6 +339,7 @@ static struct tc_action_ops act_police_ops = {
 	.init		=	tcf_act_police_init,
 	.walk		=	tcf_act_police_walker,
 	.lookup		=	tcf_police_search,
+	.delete		=	tcf_police_delete,
 	.size		=	sizeof(struct tcf_police),
 };
 
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 5a650d4..5843ce6 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -224,6 +224,13 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_sample_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_sample_ops = {
 	.kind	  = "sample",
 	.type	  = TCA_ACT_SAMPLE,
@@ -234,6 +241,7 @@ static struct tc_action_ops act_sample_ops = {
 	.cleanup  = tcf_sample_cleanup,
 	.walk	  = tcf_sample_walker,
 	.lookup	  = tcf_sample_search,
+	.delete	  = tcf_sample_delete,
 	.size	  = sizeof(struct tcf_sample),
 };
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 13809e5..c9857b2 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -195,6 +195,13 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_simp_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, simp_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_simp_ops = {
 	.kind		=	"simple",
 	.type		=	TCA_ACT_SIMP,
@@ -205,6 +212,7 @@ static struct tc_action_ops act_simp_ops = {
 	.init		=	tcf_simp_init,
 	.walk		=	tcf_simp_walker,
 	.lookup		=	tcf_simp_search,
+	.delete		=	tcf_simp_delete,
 	.size		=	sizeof(struct tcf_defact),
 };
 
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index bf87679..ee4c3f5 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -232,6 +232,13 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_skbedit_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_skbedit_ops = {
 	.kind		=	"skbedit",
 	.type		=	TCA_ACT_SKBEDIT,
@@ -241,6 +248,7 @@ static struct tc_action_ops act_skbedit_ops = {
 	.init		=	tcf_skbedit_init,
 	.walk		=	tcf_skbedit_walker,
 	.lookup		=	tcf_skbedit_search,
+	.delete		=	tcf_skbedit_delete,
 	.size		=	sizeof(struct tcf_skbedit),
 };
 
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index dc36e6f..a8b31e9 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -254,6 +254,13 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_skbmod_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_skbmod_ops = {
 	.kind		=	"skbmod",
 	.type		=	TCA_ACT_SKBMOD,
@@ -264,6 +271,7 @@ static struct tc_action_ops act_skbmod_ops = {
 	.cleanup	=	tcf_skbmod_cleanup,
 	.walk		=	tcf_skbmod_walker,
 	.lookup		=	tcf_skbmod_search,
+	.delete		=	tcf_skbmod_delete,
 	.size		=	sizeof(struct tcf_skbmod),
 };
 
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 16926c7..9175580 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -315,6 +315,13 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tunnel_key_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_tunnel_key_ops = {
 	.kind		=	"tunnel_key",
 	.type		=	TCA_ACT_TUNNEL_KEY,
@@ -325,6 +332,7 @@ static struct tc_action_ops act_tunnel_key_ops = {
 	.cleanup	=	tunnel_key_release,
 	.walk		=	tunnel_key_walker,
 	.lookup		=	tunnel_key_search,
+	.delete		=	tunnel_key_delete,
 	.size		=	sizeof(struct tcf_tunnel_key),
 };
 
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 02fbf76..6116e54 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -290,6 +290,13 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_vlan_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, vlan_net_id);
+
+	return tcf_idr_find_delete(tn, index);
+}
+
 static struct tc_action_ops act_vlan_ops = {
 	.kind		=	"vlan",
 	.type		=	TCA_ACT_VLAN,
@@ -300,6 +307,7 @@ static struct tc_action_ops act_vlan_ops = {
 	.cleanup	=	tcf_vlan_cleanup,
 	.walk		=	tcf_vlan_walker,
 	.lookup		=	tcf_vlan_search,
+	.delete		=	tcf_vlan_delete,
 	.size		=	sizeof(struct tcf_vlan),
 };
 
-- 
2.7.5

^ permalink raw reply related

* [PATCH 06/14] net: sched: implement reference counted action release
From: Vlad Buslov @ 2018-05-14 14:27 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jiri, pablo, kadlec, fw, ast, daniel,
	edumazet, vladbu, keescook, linux-kernel, netfilter-devel,
	coreteam, kliteyn
In-Reply-To: <1526308035-12484-1-git-send-email-vladbu@mellanox.com>

Implement helper function to delete action using new action ops delete
function implemented by each action for lockless execution.

Implement action put function that releases reference to action and frees
it if necessary. Refactor action deletion code to use new put function and
not to rely on rtnl lock. Remove rtnl lock assertions that are no longer
needed.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
 net/sched/act_api.c | 98 ++++++++++++++++++++++++++++++++++++++++++++---------
 net/sched/cls_api.c |  1 -
 2 files changed, 82 insertions(+), 17 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9459cce..d324a4c 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -90,21 +90,39 @@ static void free_tcf(struct tc_action *p)
 	kfree(p);
 }
 
-static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
+static void tcf_action_cleanup(struct tc_action *p)
 {
-	spin_lock_bh(&idrinfo->lock);
-	idr_remove(&idrinfo->action_idr, p->tcfa_index);
-	spin_unlock_bh(&idrinfo->lock);
+	if (p->ops->cleanup)
+		p->ops->cleanup(p);
+
 	gen_kill_estimator(&p->tcfa_rate_est);
 	free_tcf(p);
 }
 
+static int __tcf_action_put(struct tc_action *p, bool bind)
+{
+	struct tcf_idrinfo *idrinfo = p->idrinfo;
+
+	if (refcount_dec_and_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
+		if (bind)
+			atomic_dec(&p->tcfa_bindcnt);
+		idr_remove(&idrinfo->action_idr, p->tcfa_index);
+		spin_unlock(&idrinfo->lock);
+
+		tcf_action_cleanup(p);
+		return 1;
+	}
+
+	if (bind)
+		atomic_dec(&p->tcfa_bindcnt);
+
+	return 0;
+}
+
 int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
 {
 	int ret = 0;
 
-	ASSERT_RTNL();
-
 	/* Release with strict==1 and bind==0 is only called through act API
 	 * interface (classifiers always bind). Only case when action with
 	 * positive reference count and zero bind count can exist is when it was
@@ -118,18 +136,11 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
 	 * are acceptable.
 	 */
 	if (p) {
-		if (bind)
-			atomic_dec(&p->tcfa_bindcnt);
-		else if (strict && atomic_read(&p->tcfa_bindcnt) > 0)
+		if (!bind && strict && atomic_read(&p->tcfa_bindcnt) > 0)
 			return -EPERM;
 
-		if (atomic_read(&p->tcfa_bindcnt) <= 0 &&
-		    refcount_dec_and_test(&p->tcfa_refcnt)) {
-			if (p->ops->cleanup)
-				p->ops->cleanup(p);
-			tcf_idr_remove(p->idrinfo, p);
+		if (__tcf_action_put(p, bind))
 			ret = ACT_P_DELETED;
-		}
 	}
 
 	return ret;
@@ -576,6 +587,11 @@ int tcf_action_destroy(struct list_head *actions, int bind)
 	return ret;
 }
 
+static int tcf_action_put(struct tc_action *p)
+{
+	return __tcf_action_put(p, false);
+}
+
 int
 tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
@@ -975,6 +991,26 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
 	return ERR_PTR(err);
 }
 
+static int tcf_action_del_1(struct net *net, char *kind, u32 index,
+			    struct netlink_ext_ack *extack)
+{
+	const struct tc_action_ops *ops;
+	int err = -EINVAL;
+
+	ops = tc_lookup_action_n(kind);
+	if (!ops) {
+		NL_SET_ERR_MSG(extack, "Specified TC action not found");
+		goto err_out;
+	}
+
+	if (ops->delete)
+		err = ops->delete(net, index);
+
+	module_put(ops->owner);
+err_out:
+	return err;
+}
+
 static int tca_action_flush(struct net *net, struct nlattr *nla,
 			    struct nlmsghdr *n, u32 portid,
 			    struct netlink_ext_ack *extack)
@@ -1052,6 +1088,36 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	return err;
 }
 
+static int tcf_action_delete(struct net *net, struct list_head *actions,
+			     struct netlink_ext_ack *extack)
+{
+	int ret;
+	struct tc_action *a, *tmp;
+	char kind[IFNAMSIZ];
+	u32 act_index;
+
+	list_for_each_entry_safe(a, tmp, actions, list) {
+		const struct tc_action_ops *ops = a->ops;
+
+		/* Actions can be deleted concurrently
+		 * so we must save their type and id to search again
+		 * after reference is released.
+		 */
+		strncpy(kind, a->ops->kind, sizeof(kind) - 1);
+		act_index = a->tcfa_index;
+
+		list_del(&a->list);
+		if (tcf_action_put(a))
+			module_put(ops->owner);
+
+		/* now do the delete */
+		ret = tcf_action_del_1(net, kind, act_index, extack);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
 static int
 tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
@@ -1072,7 +1138,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 	}
 
 	/* now do the delete */
-	ret = tcf_action_destroy(actions, 0);
+	ret = tcf_action_delete(net, actions, extack);
 	if (ret < 0) {
 		NL_SET_ERR_MSG(extack, "Failed to delete TC action");
 		kfree_skb(skb);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index bcba94b..00b88e5 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1417,7 +1417,6 @@ void tcf_exts_destroy(struct tcf_exts *exts)
 #ifdef CONFIG_NET_CLS_ACT
 	LIST_HEAD(actions);
 
-	ASSERT_RTNL();
 	tcf_exts_to_list(exts, &actions);
 	tcf_action_destroy(&actions, TCA_ACT_UNBIND);
 	kfree(exts->actions);
-- 
2.7.5

^ permalink raw reply related

* [PATCH 07/14] net: sched: use reference counting action init
From: Vlad Buslov @ 2018-05-14 14:27 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jiri, pablo, kadlec, fw, ast, daniel,
	edumazet, vladbu, keescook, linux-kernel, netfilter-devel,
	coreteam, kliteyn
In-Reply-To: <1526308035-12484-1-git-send-email-vladbu@mellanox.com>

Change action API to assume that action init function always takes
reference to action, even when overwriting existing action. This is
necessary because action API continues to use action pointer after init
function is done. At this point action becomes accessible for concurrent
modifications so user must always hold reference to it.

Implement helper put list function to atomically release list of actions
after action API init code is done using them.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
 net/sched/act_api.c | 38 +++++++++++++++++---------------------
 1 file changed, 17 insertions(+), 21 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index d324a4c..3f02cd1 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -592,6 +592,18 @@ static int tcf_action_put(struct tc_action *p)
 	return __tcf_action_put(p, false);
 }
 
+static void tcf_action_put_lst(struct list_head *actions)
+{
+	struct tc_action *a, *tmp;
+
+	list_for_each_entry_safe(a, tmp, actions, list) {
+		const struct tc_action_ops *ops = a->ops;
+
+		if (tcf_action_put(a))
+			module_put(ops->owner);
+	}
+}
+
 int
 tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
@@ -799,17 +811,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	return ERR_PTR(err);
 }
 
-static void cleanup_a(struct list_head *actions, int ovr)
-{
-	struct tc_action *a;
-
-	if (!ovr)
-		return;
-
-	list_for_each_entry(a, actions, list)
-		refcount_dec(&a->tcfa_refcnt);
-}
-
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
 		    struct list_head *actions, size_t *attr_size, bool unlocked,
@@ -834,17 +835,10 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		}
 		act->order = i;
 		sz += tcf_action_fill_size(act);
-		if (ovr)
-			refcount_inc(&act->tcfa_refcnt);
 		list_add_tail(&act->list, actions);
 	}
 
 	*attr_size = tcf_action_full_attrs_size(sz);
-
-	/* Remove the temp refcnt which was necessary to protect against
-	 * destroying an existing action which was being replaced
-	 */
-	cleanup_a(actions, ovr);
 	return 0;
 
 err:
@@ -1196,8 +1190,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 		return ret;
 	}
 err:
-	if (event != RTM_GETACTION)
-		tcf_action_destroy(&actions, 0);
+	tcf_action_put_lst(&actions);
 	return ret;
 }
 
@@ -1239,8 +1232,11 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 			      &attr_size, false, extack);
 	if (ret)
 		return ret;
+	ret = tcf_add_notify(net, n, &actions, portid, attr_size, extack);
+	if (ovr)
+		tcf_action_put_lst(&actions);
 
-	return tcf_add_notify(net, n, &actions, portid, attr_size, extack);
+	return ret;
 }
 
 static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
-- 
2.7.5

^ permalink raw reply related

* [PATCH 03/14] net: sched: add 'delete' function to action ops
From: Vlad Buslov @ 2018-05-14 14:27 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jiri, pablo, kadlec, fw, ast, daniel,
	edumazet, vladbu, keescook, linux-kernel, netfilter-devel,
	coreteam, kliteyn
In-Reply-To: <1526308035-12484-1-git-send-email-vladbu@mellanox.com>

Extend action ops with 'delete' function. Each action type to implement its
own delete function that doesn't depend on rtnl lock.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
 include/net/act_api.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index e634014..73175a3 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -100,6 +100,7 @@ struct tc_action_ops {
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
 	size_t  (*get_fill_size)(const struct tc_action *act);
 	struct net_device *(*get_dev)(const struct tc_action *a);
+	int     (*delete)(struct net *net, u32 index);
 };
 
 struct tc_action_net {
-- 
2.7.5

^ permalink raw reply related

* [PATCH 05/14] net: sched: always take reference to action
From: Vlad Buslov @ 2018-05-14 14:27 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jiri, pablo, kadlec, fw, ast, daniel,
	edumazet, vladbu, keescook, linux-kernel, netfilter-devel,
	coreteam, kliteyn
In-Reply-To: <1526308035-12484-1-git-send-email-vladbu@mellanox.com>

Without rtnl lock protection it is no longer safe to use pointer to tc
action without holding reference to it. (it can be destroyed concurrently)

Remove unsafe action idr lookup function. Instead of it, implement safe tcf
idr check function that atomically looks up action in idr and increments
its reference and bind counters.

Implement both action search and check using new safe function.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
 net/sched/act_api.c | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 1331beb..9459cce 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -284,44 +284,38 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(tcf_generic_walker);
 
-static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo)
+bool __tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
+		     int bind)
 {
-	struct tc_action *p = NULL;
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct tc_action *p;
 
 	spin_lock_bh(&idrinfo->lock);
 	p = idr_find(&idrinfo->action_idr, index);
+	if (p) {
+		refcount_inc(&p->tcfa_refcnt);
+		if (bind)
+			atomic_inc(&p->tcfa_bindcnt);
+	}
 	spin_unlock_bh(&idrinfo->lock);
 
-	return p;
+	if (p) {
+		*a = p;
+		return true;
+	}
+	return false;
 }
 
 int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
 {
-	struct tcf_idrinfo *idrinfo = tn->idrinfo;
-	struct tc_action *p = tcf_idr_lookup(index, idrinfo);
-
-	if (p) {
-		*a = p;
-		return 1;
-	}
-	return 0;
+	return __tcf_idr_check(tn, index, a, 0);
 }
 EXPORT_SYMBOL(tcf_idr_search);
 
 bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 		   int bind)
 {
-	struct tcf_idrinfo *idrinfo = tn->idrinfo;
-	struct tc_action *p = tcf_idr_lookup(index, idrinfo);
-
-	if (index && p) {
-		if (bind)
-			atomic_inc(&p->tcfa_bindcnt);
-		refcount_inc(&p->tcfa_refcnt);
-		*a = p;
-		return true;
-	}
-	return false;
+	return __tcf_idr_check(tn, index, a, bind);
 }
 EXPORT_SYMBOL(tcf_idr_check);
 
-- 
2.7.5

^ permalink raw reply related

* [PATCH 09/14] net: sched: don't release reference on action overwrite
From: Vlad Buslov @ 2018-05-14 14:27 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jiri, pablo, kadlec, fw, ast, daniel,
	edumazet, vladbu, keescook, linux-kernel, netfilter-devel,
	coreteam, kliteyn
In-Reply-To: <1526308035-12484-1-git-send-email-vladbu@mellanox.com>

Return from action init function with reference to action taken,
even when overwriting existing action.

Action init API initializes its fourth argument (pointer to pointer to
tc action) to either existing action with same index or newly created
action. In case of existing index(and bind argument is zero), init
function returns without incrementing action reference counter. Caller
of action init then proceeds working with action without actually
holding reference to it. This means that action could be deleted
concurrently. To prevent such scenario this patch changes action init
behavior to always take reference to action before returning
successfully.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
 net/sched/act_bpf.c        |  8 ++++----
 net/sched/act_connmark.c   |  5 +++--
 net/sched/act_csum.c       |  8 ++++----
 net/sched/act_gact.c       |  5 +++--
 net/sched/act_ife.c        | 12 +++++-------
 net/sched/act_ipt.c        |  5 +++--
 net/sched/act_mirred.c     |  5 ++---
 net/sched/act_nat.c        |  5 +++--
 net/sched/act_pedit.c      |  5 +++--
 net/sched/act_police.c     |  8 +++-----
 net/sched/act_sample.c     |  8 +++-----
 net/sched/act_simple.c     |  5 +++--
 net/sched/act_skbedit.c    |  5 +++--
 net/sched/act_skbmod.c     |  8 +++-----
 net/sched/act_tunnel_key.c |  8 +++-----
 net/sched/act_vlan.c       |  8 +++-----
 16 files changed, 51 insertions(+), 57 deletions(-)

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 5d95c43..5554bf7 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -311,9 +311,10 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 		if (bind)
 			return 0;
 
-		tcf_idr_release(*act, bind);
-		if (!replace)
+		if (!replace) {
+			tcf_idr_release(*act, bind);
 			return -EEXIST;
+		}
 	}
 
 	is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
@@ -356,8 +357,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
 	return res;
 out:
-	if (res == ACT_P_CREATED)
-		tcf_idr_release(*act, bind);
+	tcf_idr_release(*act, bind);
 
 	return ret;
 }
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index ff6444e..2a4c3da 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -135,9 +135,10 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 		ci = to_connmark(*a);
 		if (bind)
 			return 0;
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 		/* replacing action and zone */
 		ci->tcf_action = parm->action;
 		ci->zone = parm->zone;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index a692ac1..74f5dce 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -76,9 +76,10 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 	}
 
 	p = to_tcf_csum(*a);
@@ -86,8 +87,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
 	if (unlikely(!params_new)) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 	params_old = rtnl_dereference(p->params);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 0c536cd..9d7d000 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -100,9 +100,10 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 	}
 
 	gact = to_gact(*a);
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index cb155cd..b57c5ba 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -497,12 +497,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 			return ret;
 		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr) {
-			kfree(p);
-			return -EEXIST;
-		}
+		kfree(p);
+		return -EEXIST;
 	}
 
 	ife = to_ife(*a);
@@ -544,13 +542,13 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 				       NULL, NULL);
 		if (err) {
 metadata_parse_err:
-			if (exists)
-				tcf_idr_release(*a, bind);
 			if (ret == ACT_P_CREATED)
 				_tcf_ife_cleanup(*a);
 
 			if (exists)
 				spin_unlock_bh(&ife->tcf_lock);
+			tcf_idr_release(*a, bind);
+
 			kfree(p);
 			return err;
 		}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 0acf784..7c26ce1 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -145,10 +145,11 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_idr_release(*a, bind);
 
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 	}
 	hook = nla_get_u32(tb[TCA_IPT_HOOK]);
 
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 607da4b..b9b7b96 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -132,10 +132,9 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 	m = to_mirred(*a);
 
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 2f2f045..77badb2 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -66,9 +66,10 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 	} else {
 		if (bind)
 			return 0;
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 	}
 	p = to_tcf_nat(*a);
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 117e486..8c39adc 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -185,9 +185,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	} else {
 		if (bind)
 			return 0;
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 		p = to_pedit(*a);
 		if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
 			keys = kmalloc(ksize, GFP_KERNEL);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 2971ba3..86d9417 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -111,10 +111,9 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 
 	police = to_police(*a);
@@ -195,8 +194,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 failure:
 	qdisc_put_rtab(P_tab);
 	qdisc_put_rtab(R_tab);
-	if (ret == ACT_P_CREATED)
-		tcf_idr_release(*a, bind);
+	tcf_idr_release(*a, bind);
 	return err;
 }
 
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 9bbd8e9..d2b0394 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -69,10 +69,9 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 	s = to_sample(*a);
 
@@ -81,8 +80,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	s->psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]);
 	psample_group = psample_group_get(net, s->psample_group_num);
 	if (!psample_group) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 	RCU_INIT_POINTER(s->psample_group, psample_group);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 162f091..26eb153 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -130,9 +130,10 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	} else {
 		d = to_defact(*a);
 
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 
 		reset_policy(d, defdata, parm);
 	}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 4b9d616..bb416b7 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -136,9 +136,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		ret = ACT_P_CREATED;
 	} else {
 		d = to_skbedit(*a);
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 	}
 
 	spin_lock_bh(&d->tcf_lock);
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index c1f9eda..e1c2e1c 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -142,10 +142,9 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 			return ret;
 
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 
 	d = to_skbmod(*a);
@@ -153,8 +152,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	ASSERT_RTNL();
 	p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
 	if (unlikely(!p)) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index e4f9718..d88c151 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -165,10 +165,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			return ret;
 
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 
 	t = to_tunnel_key(*a);
@@ -176,8 +175,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	ASSERT_RTNL();
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
 	if (unlikely(!params_new)) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 6a949f5..f747fb6 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -185,10 +185,9 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 			return ret;
 
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 
 	v = to_vlan(*a);
@@ -196,8 +195,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	ASSERT_RTNL();
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 
-- 
2.7.5

^ permalink raw reply related

* Re: [PATCH v1 iproute2-next 2/3] rdma: print driver resource attributes
From: Steve Wise @ 2018-05-14 14:51 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: dsahern, stephen, netdev, linux-rdma
In-Reply-To: <20180513132447.GF10381@mtr-leonro.mtl.com>



On 5/13/2018 8:24 AM, Leon Romanovsky wrote:
> On Mon, May 07, 2018 at 08:53:16AM -0700, Steve Wise wrote:
>> This enhancement allows printing rdma device-specific state, if provided
>> by the kernel.  This is done in a generic manner, so rdma tool doesn't
> Double space between "." and "This".
>
>> need to know about the details of every type of rdma device.
>>
>> Driver attributes for a rdma resource are in the form of <key,
>> [print_type], value> tuples, where the key is a string and the value can
>> be any supported driver attribute.  The print_type attribute, if present,
> ditto

I'll fix these.

>> provides a print format to use vs the standard print format for the type.
>> For example, the default print type for a PROVIDER_S32 value is "%d ",
>> but "0x%x " if the print_type of PRINT_TYPE_HEX is included inthe tuple.
>>
>> Driver resources are only printed when the -dd flag is present.
>> If -p is present, then the output is formatted to not exceed 80 columns,
>> otherwise it is printed as a single row to be grep/awk friendly.
>>
>> Example output:
>>
>> # rdma resource show qp lqpn 1028 -dd -p
>> link cxgb4_0/- lqpn 1028 rqpn 0 type RC state RTS rq-psn 0 sq-psn 0 path-mig-state MIGRATED pid 0 comm [nvme_rdma]
>>     sqid 1028 flushed 0 memsize 123968 cidx 85 pidx 85 wq_pidx 106 flush_cidx 85 in_use 0
>>     size 386 flags 0x0 rqid 1029 memsize 16768 cidx 43 pidx 41 wq_pidx 171 msn 44 rqt_hwaddr 0x2a8a5d00
>>     rqt_size 256 in_use 128 size 130 idx 43 wr_id 0xffff881057c03408 idx 40 wr_id 0xffff881057c033f0
>>
>> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
>> ---
>>  rdma/rdma.c  |   7 ++-
>>  rdma/rdma.h  |  11 ++++
>>  rdma/res.c   |  30 +++------
>>  rdma/utils.c | 194 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  4 files changed, 221 insertions(+), 21 deletions(-)
>>
>> diff --git a/rdma/rdma.c b/rdma/rdma.c
>> index b43e538..0155627 100644
>> --- a/rdma/rdma.c
>> +++ b/rdma/rdma.c
>> @@ -132,6 +132,7 @@ int main(int argc, char **argv)
>>  	const char *batch_file = NULL;
>>  	bool pretty_output = false;
>>  	bool show_details = false;
>> +	bool show_driver_details = false;
> Reversed Christmas tree please.

Sure. 

>>  	bool json_output = false;
>>  	bool force = false;
>>  	char *filename;
>> @@ -152,7 +153,10 @@ int main(int argc, char **argv)
>>  			pretty_output = true;
>>  			break;
>>  		case 'd':
>> -			show_details = true;
>> +			if (show_details)
>> +				show_driver_details = true;
>> +			else
>> +				show_details = true;
>>  			break;
>>  		case 'j':
>>  			json_output = true;
>> @@ -180,6 +184,7 @@ int main(int argc, char **argv)
>>  	argv += optind;
>>
>>  	rd.show_details = show_details;
>> +	rd.show_driver_details = show_driver_details;
>>  	rd.json_output = json_output;
>>  	rd.pretty_output = pretty_output;
>>
>> diff --git a/rdma/rdma.h b/rdma/rdma.h
>> index 1908fc4..fcaf9e6 100644
>> --- a/rdma/rdma.h
>> +++ b/rdma/rdma.h
>> @@ -55,6 +55,7 @@ struct rd {
>>  	char **argv;
>>  	char *filename;
>>  	bool show_details;
>> +	bool show_driver_details;
>>  	struct list_head dev_map_list;
>>  	uint32_t dev_idx;
>>  	uint32_t port_idx;
>> @@ -115,4 +116,14 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t seq);
>>  void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq, uint16_t flags);
>>  int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
>>  int rd_attr_cb(const struct nlattr *attr, void *data);
>> +int rd_attr_check(const struct nlattr *attr, int *typep);
>> +
>> +/*
>> + * Print helpers
>> + */
>> +void print_driver_table(struct rd *rd, struct nlattr *tb);
>> +void newline(struct rd *rd);
>> +void newline_indent(struct rd *rd);
>> +#define MAX_LINE_LENGTH 80
>> +
>>  #endif /* _RDMA_TOOL_H_ */
>> diff --git a/rdma/res.c b/rdma/res.c
>> index 1a0aab6..074b992 100644
>> --- a/rdma/res.c
>> +++ b/rdma/res.c
>> @@ -439,10 +439,8 @@ static int res_qp_parse_cb(const struct nlmsghdr *nlh, void *data)
>>  		if (nla_line[RDMA_NLDEV_ATTR_RES_PID])
>>  			free(comm);
>>
>> -		if (rd->json_output)
>> -			jsonw_end_array(rd->jw);
>> -		else
>> -			pr_out("\n");
>> +		print_driver_table(rd, nla_line[RDMA_NLDEV_ATTR_DRIVER]);
>> +		newline(rd);
>>  	}
>>  	return MNL_CB_OK;
>>  }
>> @@ -678,10 +676,8 @@ static int res_cm_id_parse_cb(const struct nlmsghdr *nlh, void *data)
>>  		if (nla_line[RDMA_NLDEV_ATTR_RES_PID])
>>  			free(comm);
>>
>> -		if (rd->json_output)
>> -			jsonw_end_array(rd->jw);
>> -		else
>> -			pr_out("\n");
>> +		print_driver_table(rd, nla_line[RDMA_NLDEV_ATTR_DRIVER]);
>> +		newline(rd);
>>  	}
>>  	return MNL_CB_OK;
>>  }
>> @@ -804,10 +800,8 @@ static int res_cq_parse_cb(const struct nlmsghdr *nlh, void *data)
>>  		if (nla_line[RDMA_NLDEV_ATTR_RES_PID])
>>  			free(comm);
>>
>> -		if (rd->json_output)
>> -			jsonw_end_array(rd->jw);
>> -		else
>> -			pr_out("\n");
>> +		print_driver_table(rd, nla_line[RDMA_NLDEV_ATTR_DRIVER]);
>> +		newline(rd);
>>  	}
>>  	return MNL_CB_OK;
>>  }
>> @@ -919,10 +913,8 @@ static int res_mr_parse_cb(const struct nlmsghdr *nlh, void *data)
>>  		if (nla_line[RDMA_NLDEV_ATTR_RES_PID])
>>  			free(comm);
>>
>> -		if (rd->json_output)
>> -			jsonw_end_array(rd->jw);
>> -		else
>> -			pr_out("\n");
>> +		print_driver_table(rd, nla_line[RDMA_NLDEV_ATTR_DRIVER]);
>> +		newline(rd);
>>  	}
>>  	return MNL_CB_OK;
>>  }
>> @@ -1004,10 +996,8 @@ static int res_pd_parse_cb(const struct nlmsghdr *nlh, void *data)
>>  		if (nla_line[RDMA_NLDEV_ATTR_RES_PID])
>>  			free(comm);
>>
>> -		if (rd->json_output)
>> -			jsonw_end_array(rd->jw);
>> -		else
>> -			pr_out("\n");
>> +		print_driver_table(rd, nla_line[RDMA_NLDEV_ATTR_DRIVER]);
>> +		newline(rd);
>>  	}
>>  	return MNL_CB_OK;
>>  }
>> diff --git a/rdma/utils.c b/rdma/utils.c
>> index 49c967f..452fe92 100644
>> --- a/rdma/utils.c
>> +++ b/rdma/utils.c
>> @@ -11,6 +11,7 @@
>>
>>  #include "rdma.h"
>>  #include <ctype.h>
>> +#include <inttypes.h>
>>
>>  int rd_argc(struct rd *rd)
>>  {
>> @@ -393,8 +394,32 @@ static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
>>  	[RDMA_NLDEV_ATTR_RES_MRLEN] = MNL_TYPE_U64,
>>  	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= MNL_TYPE_U32,
>>  	[RDMA_NLDEV_ATTR_NDEV_NAME]		= MNL_TYPE_NUL_STRING,
>> +	[RDMA_NLDEV_ATTR_DRIVER] = MNL_TYPE_NESTED,
>> +	[RDMA_NLDEV_ATTR_DRIVER_ENTRY] = MNL_TYPE_NESTED,
>> +	[RDMA_NLDEV_ATTR_DRIVER_STRING] = MNL_TYPE_NUL_STRING,
>> +	[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = MNL_TYPE_U8,
>> +	[RDMA_NLDEV_ATTR_DRIVER_S32] = MNL_TYPE_U32,
>> +	[RDMA_NLDEV_ATTR_DRIVER_U32] = MNL_TYPE_U32,
>> +	[RDMA_NLDEV_ATTR_DRIVER_S64] = MNL_TYPE_U64,
>> +	[RDMA_NLDEV_ATTR_DRIVER_U64] = MNL_TYPE_U64,
>>  };
>>
>> +int rd_attr_check(const struct nlattr *attr, int *typep)
>> +{
>> +	int type;
>> +
>> +	if (mnl_attr_type_valid(attr, RDMA_NLDEV_ATTR_MAX) < 0)
>> +		return MNL_CB_ERROR;
>> +
>> +	type = mnl_attr_get_type(attr);
>> +
>> +	if (mnl_attr_validate(attr, nldev_policy[type]) < 0)
>> +		return MNL_CB_ERROR;
>> +
>> +	*typep = nldev_policy[type];
>> +	return MNL_CB_OK;
>> +}
>> +
>>  int rd_attr_cb(const struct nlattr *attr, void *data)
>>  {
>>  	const struct nlattr **tb = data;
>> @@ -660,3 +685,172 @@ struct dev_map *dev_map_lookup(struct rd *rd, bool allow_port_index)
>>  	free(dev_name);
>>  	return dev_map;
>>  }
>> +
>> +#define nla_type(attr) ((attr)->nla_type & NLA_TYPE_MASK)
>> +
>> +void newline(struct rd *rd)
>> +{
>> +	if (rd->json_output)
>> +		jsonw_end_array(rd->jw);
>> +	else
>> +		pr_out("\n");
>> +}
>> +
>> +void newline_indent(struct rd *rd)
>> +{
>> +	newline(rd);
>> +	if (!rd->json_output)
>> +		pr_out("    ");
>> +}
>> +
>> +static int print_driver_string(struct rd *rd, const char *key_str,
>> +				 const char *val_str)
>> +{
>> +	if (rd->json_output) {
>> +		jsonw_string_field(rd->jw, key_str, val_str);
>> +		return 0;
>> +	} else {
>> +		return pr_out("%s %s ", key_str, val_str);
>> +	}
>> +}
>> +
>> +static int print_driver_s32(struct rd *rd, const char *key_str, int32_t val,
>> +			      enum rdma_nldev_print_type print_type)
>> +{
>> +	if (rd->json_output) {
>> +		jsonw_int_field(rd->jw, key_str, val);
>> +		return 0;
>> +	}
>> +	switch (print_type) {
>> +	case RDMA_NLDEV_PRINT_TYPE_UNSPEC:
>> +		return pr_out("%s %d ", key_str, val);
>> +	case RDMA_NLDEV_PRINT_TYPE_HEX:
>> +		return pr_out("%s 0x%x ", key_str, val);
>> +	default:
>> +		return -EINVAL;
>> +	}
>> +}
>> +
>> +static int print_driver_u32(struct rd *rd, const char *key_str, uint32_t val,
>> +			      enum rdma_nldev_print_type print_type)
>> +{
>> +	if (rd->json_output) {
>> +		jsonw_int_field(rd->jw, key_str, val);
>> +		return 0;
>> +	}
>> +	switch (print_type) {
>> +	case RDMA_NLDEV_PRINT_TYPE_UNSPEC:
>> +		return pr_out("%s %u ", key_str, val);
>> +	case RDMA_NLDEV_PRINT_TYPE_HEX:
>> +		return pr_out("%s 0x%x ", key_str, val);
>> +	default:
>> +		return -EINVAL;
>> +	}
>> +}
>> +
>> +static int print_driver_s64(struct rd *rd, const char *key_str, int64_t val,
>> +			      enum rdma_nldev_print_type print_type)
>> +{
>> +	if (rd->json_output) {
>> +		jsonw_int_field(rd->jw, key_str, val);
>> +		return 0;
>> +	}
>> +	switch (print_type) {
>> +	case RDMA_NLDEV_PRINT_TYPE_UNSPEC:
>> +		return pr_out("%s %" PRId64 " ", key_str, val);
>> +	case RDMA_NLDEV_PRINT_TYPE_HEX:
>> +		return pr_out("%s 0x%" PRIx64 " ", key_str, val);
>> +	default:
>> +		return -EINVAL;
>> +	}
>> +}
>> +
>> +static int print_driver_u64(struct rd *rd, const char *key_str, uint64_t val,
>> +			      enum rdma_nldev_print_type print_type)
>> +{
>> +	if (rd->json_output) {
>> +		jsonw_int_field(rd->jw, key_str, val);
>> +		return 0;
>> +	}
>> +	switch (print_type) {
>> +	case RDMA_NLDEV_PRINT_TYPE_UNSPEC:
>> +		return pr_out("%s %" PRIu64 " ", key_str, val);
>> +	case RDMA_NLDEV_PRINT_TYPE_HEX:
>> +		return pr_out("%s 0x%" PRIx64 " ", key_str, val);
>> +	default:
>> +		return -EINVAL;
>> +	}
>> +}
>> +
>> +static int print_driver_entry(struct rd *rd, struct nlattr *key_attr,
>> +				struct nlattr *val_attr,
>> +				enum rdma_nldev_print_type print_type)
>> +{
>> +	const char *key_str = mnl_attr_get_str(key_attr);
>> +	int attr_type = nla_type(val_attr);
>> +
>> +	switch (attr_type) {
>> +	case RDMA_NLDEV_ATTR_DRIVER_STRING:
>> +		return print_driver_string(rd, key_str,
>> +				mnl_attr_get_str(val_attr));
>> +	case RDMA_NLDEV_ATTR_DRIVER_S32:
>> +		return print_driver_s32(rd, key_str,
>> +				mnl_attr_get_u32(val_attr), print_type);
>> +	case RDMA_NLDEV_ATTR_DRIVER_U32:
>> +		return print_driver_u32(rd, key_str,
>> +				mnl_attr_get_u32(val_attr), print_type);
>> +	case RDMA_NLDEV_ATTR_DRIVER_S64:
>> +		return print_driver_s64(rd, key_str,
>> +				mnl_attr_get_u64(val_attr), print_type);
>> +	case RDMA_NLDEV_ATTR_DRIVER_U64:
>> +		return print_driver_u64(rd, key_str,
>> +				mnl_attr_get_u64(val_attr), print_type);
>> +	}
>> +	return -EINVAL;
>> +}
>> +
>> +void print_driver_table(struct rd *rd, struct nlattr *tb)
>> +{
>> +	int print_type = RDMA_NLDEV_PRINT_TYPE_UNSPEC;
>> +	struct nlattr *tb_entry, *key = NULL, *val;
>> +	int type, cc = 0;
>> +
>> +	if (!rd->show_driver_details || !tb)
>> +		return;
>> +
>> +	if (rd->pretty_output)
>> +		newline_indent(rd);
>> +
>> +	/*
>> +	 * Driver attrs are tuples of {key, [print-type], value}.
>> +	 * The key must be a string.  If print-type is present, it
>> +	 * defines an alternate printf format type vs the native format
>> +	 * for the attribute.  And the value can be any available
>> +	 * driver type.
>> +	 */
>> +	mnl_attr_for_each_nested(tb_entry, tb) {
>> +
>> +		if (cc > MAX_LINE_LENGTH) {
>> +			if (rd->pretty_output)
>> +				newline_indent(rd);
>> +			cc = 0;
>> +		}
>> +		if (rd_attr_check(tb_entry, &type) != MNL_CB_OK)
>> +			return;
>> +		if (!key) {
>> +			if (type != MNL_TYPE_NUL_STRING)
>> +				return;
>> +			key = tb_entry;
>> +		} else if (type == MNL_TYPE_U8) {
>> +			print_type = mnl_attr_get_u8(tb_entry);
>> +		} else {
>> +			val = tb_entry;
>> +			cc += print_driver_entry(rd, key, val, print_type);
> I stopped to read here, because of two problems:
> 1. print_driver_entry can return negative number, so unclear to me what
> will be the final result of "cc += ..".
> 2. The netlink design is to ignore unknown attributes and not return
> error. It allows to use new kernels with old applications.

Yes, this is a bug.  See below the check for 'cc < 0':

>> +			if (cc < 0)
>> +				return;

It's incorrect.  The return from print_driver_entry() should be checked
for error to allow ignoring unknown attrs, and then if no error, cc gets
incremented.  I'll fix this up.


Thanks for reviewing,

Steve.

>> +			print_type = RDMA_NLDEV_PRINT_TYPE_UNSPEC;
>> +			key = NULL;
>> +		}
>> +	}
>> +	return;
>> +}
>> --
>> 1.8.3.1
>>

^ permalink raw reply

* Re: [PATCH v3 0/1] multi-threading device shutdown
From: Greg KH @ 2018-05-14 15:03 UTC (permalink / raw)
  To: Pavel Tatashin
  Cc: steven.sistare, daniel.m.jordan, linux-kernel, jeffrey.t.kirsher,
	intel-wired-lan, netdev, alexander.duyck, tobin
In-Reply-To: <20180507155402.10086-1-pasha.tatashin@oracle.com>

On Mon, May 07, 2018 at 11:54:01AM -0400, Pavel Tatashin wrote:
> Changelog
> v2 - v3
> 	- Fixed warning from kbuild test.
> 	- Moved device_lock/device_unlock inside device_shutdown_tree().
> 
> v1 - v2
> 	- It turns out we cannot lock more than MAX_LOCK_DEPTH by a single
> 	  thread. (By default this value is 48), and is used to detect
> 	  deadlocks. So, I re-wrote the code to only lock one devices per
> 	  thread instead of pre-locking all devices by the main thread.
> 	- Addressed comments from Tobin C. Harding.
> 	- As suggested by Alexander Duyck removed ixgbe changes. It can be
> 	  done as a separate work scaling RTNL mutex.
> 
> Do a faster shutdown by calling dev->*->shutdown(dev) in parallel.
> device_shutdown() calls these functions for every single device but
> only using one thread.
> 
> Since, nothing else is running on the machine by the device_shutdown()
> s called, there is no reason not to utilize all the available CPU
> resources.

Ah, we can hope so.  I bet this is going to break something, so can we
have some way of turning it on/off dynamically for when it does?

thanks,

greg k-h

^ permalink raw reply

* Re: [PATCH net-next v9 1/7] sched: Add Common Applications Kept Enhanced (cake) qdisc
From: David Miller @ 2018-05-14 15:05 UTC (permalink / raw)
  To: toke; +Cc: netdev, cake
In-Reply-To: <87h8nawqnn.fsf@toke.dk>

From: Toke Høiland-Jørgensen <toke@toke.dk>
Date: Mon, 14 May 2018 11:08:28 +0200

> David Miller <davem@davemloft.net> writes:
> 
>> From: Toke Høiland-Jørgensen <toke@toke.dk>
>> Date: Tue, 08 May 2018 16:34:19 +0200
>>
>>> +struct cake_flow {
>>> +	/* this stuff is all needed per-flow at dequeue time */
>>> +	struct sk_buff	  *head;
>>> +	struct sk_buff	  *tail;
>>
>> Please do not invent your own SKB list handling mechanism.
> 
> We didn't invent it, we inherited it from fq_codel. I was actually about
> to fix that, but then I noticed it was still around in fq_codel, and so
> let it be. I can certainly fix it anyway, but, erm, why is it acceptable
> in fq_codel but not in cake? struct sk_buff_head is not that new, is it?

I guess one argument has to do with the amount of memory consumed by this
per-flow or per-queue information, right?  Because the skb queue head has
a qlen and a spinlock regardless of whether they are used or not.

Furthermore, if you use the __skb_insert() et al. helpers, even though it
won't use the lock it will adjust the qlen counter.  And that's useless
work since you have no use for the qlen value.

Taken together, it seems that what you and fq_codel are doing is not
such a bad idea after all.  So please leave it alone.

On-and-off again, I've looked into converting skbs to using list_head
but it's a non-trivial set of work.  All over the tree the different
layers use the next/prev pointers in different ways.  Some use it for
a doubly linked list.  Some use it for a singly linked list.  Some
encode state in the prev pointer.  You name it, it's out there.

I'll try to get back to that task because obviously it'll be useful to
have code like cake and fq_codel use common helpers instead of custom
stuff.

Thanks.

^ permalink raw reply

* Re: [PATCH 01/14] net: sched: use rcu for action cookie update
From: Jiri Pirko @ 2018-05-14 15:10 UTC (permalink / raw)
  To: Vlad Buslov
  Cc: netdev, davem, jhs, xiyou.wangcong, pablo, kadlec, fw, ast,
	daniel, edumazet, keescook, linux-kernel, netfilter-devel,
	coreteam, kliteyn
In-Reply-To: <1526308035-12484-2-git-send-email-vladbu@mellanox.com>

Mon, May 14, 2018 at 04:27:02PM CEST, vladbu@mellanox.com wrote:
>Implement functions to atomically update and free action cookie
>using rcu mechanism.
>
>Signed-off-by: Vlad Buslov <vladbu@mellanox.com>

Signed-off-by: Jiri Pirko <jiri@mellanox.com>

^ permalink raw reply

* Re: [PATCH 02/14] net: sched: change type of reference and bind counters
From: Jiri Pirko @ 2018-05-14 15:11 UTC (permalink / raw)
  To: Vlad Buslov
  Cc: netdev, davem, jhs, xiyou.wangcong, pablo, kadlec, fw, ast,
	daniel, edumazet, keescook, linux-kernel, netfilter-devel,
	coreteam, kliteyn
In-Reply-To: <1526308035-12484-3-git-send-email-vladbu@mellanox.com>

Mon, May 14, 2018 at 04:27:03PM CEST, vladbu@mellanox.com wrote:
>Change type of action reference counter to refcount_t.
>
>Change type of action bind counter to atomic_t.
>This type is used to allow decrementing bind counter without testing
>for 0 result.
>
>Signed-off-by: Vlad Buslov <vladbu@mellanox.com>

Signed-off-by: Jiri Pirko <jiri@mellanox.com>

^ permalink raw reply

* Re: [PATCH 03/14] net: sched: add 'delete' function to action ops
From: Jiri Pirko @ 2018-05-14 15:12 UTC (permalink / raw)
  To: Vlad Buslov
  Cc: netdev, davem, jhs, xiyou.wangcong, pablo, kadlec, fw, ast,
	daniel, edumazet, keescook, linux-kernel, netfilter-devel,
	coreteam, kliteyn
In-Reply-To: <1526308035-12484-4-git-send-email-vladbu@mellanox.com>

Mon, May 14, 2018 at 04:27:04PM CEST, vladbu@mellanox.com wrote:
>Extend action ops with 'delete' function. Each action type to implement its
>own delete function that doesn't depend on rtnl lock.
>
>Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
>---
> include/net/act_api.h | 1 +
> 1 file changed, 1 insertion(+)
>
>diff --git a/include/net/act_api.h b/include/net/act_api.h
>index e634014..73175a3 100644
>--- a/include/net/act_api.h
>+++ b/include/net/act_api.h
>@@ -100,6 +100,7 @@ struct tc_action_ops {
> 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
> 	size_t  (*get_fill_size)(const struct tc_action *act);
> 	struct net_device *(*get_dev)(const struct tc_action *a);
>+	int     (*delete)(struct net *net, u32 index);

Probably better to squash this to patch 14.

^ permalink raw reply

* [PATCH] lib/rhashtable: reorder some inititalization sequences
From: Davidlohr Bueso @ 2018-05-14 15:13 UTC (permalink / raw)
  To: akpm, tgraf, herbert; +Cc: netdev, linux-kernel, dave, Davidlohr Bueso

rhashtable_init() allocates memory at the very end of the
call, once everything is setup; with the exception of the
nelems parameter. However, unless the user is doing something
bogus with params for which -EINVAL is returned, memory
allocation is the only operation that can trigger the call
to fail.

Thus move bucket_table_alloc() up such that we fail back to
the caller asap, instead of doing useless checks. This is
safe as the the table allocation isn't using the halfly
setup 'ht' structure and bucket_table_alloc() call chain only
ends up using the ht->nulls_base member in INIT_RHT_NULLS_HEAD.

Also move the locking initialization down to the end.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 lib/rhashtable.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 9427b5766134..68aadd6bff60 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1022,8 +1022,6 @@ int rhashtable_init(struct rhashtable *ht,
 	struct bucket_table *tbl;
 	size_t size;
 
-	size = HASH_DEFAULT_SIZE;
-
 	if ((!params->key_len && !params->obj_hashfn) ||
 	    (params->obj_hashfn && !params->obj_cmpfn))
 		return -EINVAL;
@@ -1032,10 +1030,17 @@ int rhashtable_init(struct rhashtable *ht,
 		return -EINVAL;
 
 	memset(ht, 0, sizeof(*ht));
-	mutex_init(&ht->mutex);
-	spin_lock_init(&ht->lock);
 	memcpy(&ht->p, params, sizeof(*params));
 
+	if (!params->nelem_hint)
+		size = HASH_DEFAULT_SIZE;
+	else
+		size = rounded_hashtable_size(&ht->p);
+
+	tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
+	if (tbl == NULL)
+		return -ENOMEM;
+
 	if (params->min_size)
 		ht->p.min_size = roundup_pow_of_two(params->min_size);
 
@@ -1050,9 +1055,6 @@ int rhashtable_init(struct rhashtable *ht,
 
 	ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
 
-	if (params->nelem_hint)
-		size = rounded_hashtable_size(&ht->p);
-
 	if (params->locks_mul)
 		ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
 	else
@@ -1068,10 +1070,8 @@ int rhashtable_init(struct rhashtable *ht,
 		}
 	}
 
-	tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
-	if (tbl == NULL)
-		return -ENOMEM;
-
+	mutex_init(&ht->mutex);
+	spin_lock_init(&ht->lock);
 	atomic_set(&ht->nelems, 0);
 
 	RCU_INIT_POINTER(ht->tbl, tbl);
-- 
2.13.6

^ permalink raw reply related

* Re: [PATCH v1 iproute2-next 1/3] rdma: update rdma_netlink.h to get driver attrs
From: Steve Wise @ 2018-05-14 15:15 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: dsahern, stephen, netdev, linux-rdma
In-Reply-To: <20180513131509.GE10381@mtr-leonro.mtl.com>



On 5/13/2018 8:15 AM, Leon Romanovsky wrote:
> On Mon, May 07, 2018 at 08:53:10AM -0700, Steve Wise wrote:
>> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
>> ---
>>  rdma/include/uapi/rdma/rdma_netlink.h | 37 ++++++++++++++++++++++++++++++++++-
>>  1 file changed, 36 insertions(+), 1 deletion(-)
> Please write in commit message something like: "Based on kernel commit
> ....", so we will be able to track changes.

Sure.

Thanks,

Steve.

^ permalink raw reply

* Re: [bpf-next PATCH 5/5] bpf, doc: howto use/run the BPF selftests
From: Silvan Jegen @ 2018-05-14 15:15 UTC (permalink / raw)
  To: brouer
  Cc: borkmann, alexei.starovoitov, netdev, quentin.monnet, linux-man,
	linux-doc
In-Reply-To: <152630535224.29210.11623921130132517306.stgit@firesoul>

Hi

Some typo fixes below.

On Mon, May 14, 2018 at 3:43 PM Jesper Dangaard Brouer <brouer@redhat.com>
wrote:
> I always forget howto run the BPF selftests. Thus, lets add that info
> to the QA document.

> Documentation was based on Cilium's documentation:
>   http://cilium.readthedocs.io/en/latest/bpf/#verifying-the-setup

> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> ---
>   Documentation/bpf/bpf_devel_QA.rst |   29 +++++++++++++++++++++++++++++
>   1 file changed, 29 insertions(+)

> diff --git a/Documentation/bpf/bpf_devel_QA.rst
b/Documentation/bpf/bpf_devel_QA.rst
> index 2254bdeae990..0e7c1d946e83 100644
> --- a/Documentation/bpf/bpf_devel_QA.rst
> +++ b/Documentation/bpf/bpf_devel_QA.rst
> @@ -417,6 +417,33 @@ submitted by the BPF maintainers to the stable
maintainers.
>   Testing patches
>   ===============

> +Q: How to run BPF selftests
> +---------------------------
> +A: After you have booted into the newly compiled kernel, navigate to
> +the BPF selftests_ suite in order to test BPF functionality (current
> +working directory points to the root of the cloned git tree)::
> +
> +  $ cd tools/testing/selftests/bpf/
> +  $ make
> +
> +To run the verifier tests::
> +
> +  $ sudo ./test_verifier
> +
> +The verifier tests print out all the current checks being
> +performed. The summary at the end of running all tests will dump
> +information of test successes and failures::

Two colons at the end of the line. Don't think that was intended.


> +
> +  Summary: 418 PASSED, 0 FAILED
> +
> +In order to run through all BPF selftests, the following command is
> +needed::
> +
> +  $ sudo make run_tests
> +
> +See the kernels selftest `Documentation/dev-tools/kselftest.rst`_

s/kernels/kernel's/

I also think the underscore at the end of this line is misplaced (or it
should be a dash instead).

Cheers,

Silvan


> +document for further documentation.
> +
>   Q: Which BPF kernel selftests version should I run my kernel against?
>   ---------------------------------------------------------------------
>   A: If you run a kernel ``xyz``, then always run the BPF kernel selftests
> @@ -607,5 +634,7 @@ when:
>   .. _netdev FAQ: ../networking/netdev-FAQ.txt
>   .. _samples/bpf/: ../../samples/bpf/
>   .. _selftests: ../../tools/testing/selftests/bpf/
> +.. _Documentation/dev-tools/kselftest.rst:
> +   https://www.kernel.org/doc/html/latest/dev-tools/kselftest.html

>   Happy BPF hacking!

> --
> To unsubscribe from this list: send the line "unsubscribe linux-man" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 04/14] net: sched: implement unlocked action init API
From: Jiri Pirko @ 2018-05-14 15:16 UTC (permalink / raw)
  To: Vlad Buslov
  Cc: netdev, davem, jhs, xiyou.wangcong, pablo, kadlec, fw, ast,
	daniel, edumazet, keescook, linux-kernel, netfilter-devel,
	coreteam, kliteyn
In-Reply-To: <1526308035-12484-5-git-send-email-vladbu@mellanox.com>

Mon, May 14, 2018 at 04:27:05PM CEST, vladbu@mellanox.com wrote:
>Add additional 'unlocked' argument to act API init functions.
>Argument is true when rtnl lock is not taken and false otherwise.
>It is required to implement actions that need to release rtnl lock before
>loading kernel module and reacquire if afterwards.
>
>Signed-off-by: Vlad Buslov <vladbu@mellanox.com>

[...]


>@@ -721,9 +722,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
> 	a_o = tc_lookup_action_n(act_name);
> 	if (a_o == NULL) {
> #ifdef CONFIG_MODULES
>-		rtnl_unlock();
>+		if (!unlocked)
>+			rtnl_unlock();
> 		request_module("act_%s", act_name);
>-		rtnl_lock();
>+		if (!unlocked)
>+			rtnl_lock();

Although I don't like this conditional locking scheme, I see no other
way to solve this :/ But I think would be better perhaps to rename
"unlocked" to something like "rtnl_held".

[...]

^ permalink raw reply

* net: ieee802154: 6lowpan: fix frag reassembly
From: Stefan Schmidt @ 2018-05-14 15:22 UTC (permalink / raw)
  To: stable
  Cc: Alexander Aring, David S. Miller, linux-wpan@vger.kernel.org,
	Network Development

Hello.


Please apply f18fa5de5ba7f1d6650951502bb96a6e4715a948

(net: ieee802154: 6lowpan: fix frag reassembly) to the 4.16.x stable tree.


Earlier trees are not needed as the problem was introduced in 4.16.


Normally net/ patches would come through DaveM, but he asked me for this one to submit it directly when i sent him the pull request.


First time stable request on my side here, let me know if I got something wrong.


regards

Stefan Schmidt

^ permalink raw reply

* [net-next 00/11][pull request] 40GbE Intel Wired LAN Driver Updates 2018-05-14
From: Jeff Kirsher @ 2018-05-14 15:27 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, nhorman, sassmann, jogreene

This series contains updates to virtchnl, i40e and i40evf.

Bruce cleans up whitespace and unnecessary parentheses in virtchnl.

Jake does a number of stat cleanups in the i40e driver, including
cleanup of code indentation, whitespace issues, remove duplicate stats,
fix grammar in code comment and general spring cleaning of the
statistics code.

Patryk fixes an issue where we recalculate vectors left and vectors
wanted but do not take into account the reduced number of queue pairs
per VSI.

Harshitha adds tx_busy stat to ethtool stats to track the number of
times we return NETDEV_TX_BUSY to the stack during transmit.

Paweł fixes a potential system crash when unloading the VF driver after
a hardware reset.

The following are changes since commit 289e1f4e9e4a09c73a1c0152bb93855ea351ccda:
  net: ipv4: ipconfig: fix unused variable
and are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue 40GbE

Bruce Allan (1):
  virtchnl: Whitespace and parenthesis cleanup

Harshitha Ramamurthy (1):
  i40e: add tx_busy to ethtool stats

Jacob Keller (6):
  i40e: calculate ethtool stats size in a separate function
  i40e: remove duplicate pfc stats
  i40e: cleanup whitespace for some ethtool stat definitions
  i40evf: remove MAX_QUEUES and just use I40EVF_MAX_REQ_QUEUES
  i40e: cleanup wording in a header comment
  i40e: free the skb after clearing the bitlock

Jeff Kirsher (1):
  i40evf: Fix client header define

Patryk Małek (1):
  i40e: Fix recalculation of MSI-X vectors for VMDq

Paweł Jabłoński (1):
  i40evf: Fix a hardware reset support in VF driver

 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 45 ++++++++++---------
 drivers/net/ethernet/intel/i40e/i40e_main.c   | 36 ++++++++++++---
 drivers/net/ethernet/intel/i40e/i40e_ptp.c    |  9 +++-
 drivers/net/ethernet/intel/i40evf/i40evf.h    |  1 -
 .../net/ethernet/intel/i40evf/i40evf_client.h |  6 +--
 .../net/ethernet/intel/i40evf/i40evf_main.c   |  8 ++--
 include/linux/avf/virtchnl.h                  |  4 +-
 7 files changed, 72 insertions(+), 37 deletions(-)

-- 
2.17.0

^ permalink raw reply

* [net-next 01/11] virtchnl: Whitespace and parenthesis cleanup
From: Jeff Kirsher @ 2018-05-14 15:27 UTC (permalink / raw)
  To: davem; +Cc: Bruce Allan, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180514152747.23154-1-jeffrey.t.kirsher@intel.com>

From: Bruce Allan <bruce.w.allan@intel.com>

Clean up existing instances of unnecessary parentheses in if
statement and change order of conditionals to make it easier to read

The opening /* should be followed by a single space and the closing */
should be preceded with a single space.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/avf/virtchnl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index b0a7f315bfbe..212b3822d180 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -485,7 +485,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_key);
 struct virtchnl_rss_lut {
 	u16 vsi_id;
 	u16 lut_entries;
-	u8 lut[1];        /* RSS lookup table*/
+	u8 lut[1];        /* RSS lookup table */
 };
 
 VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_lut);
@@ -819,7 +819,7 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 		return VIRTCHNL_ERR_PARAM;
 	}
 	/* few more checks */
-	if ((valid_len != msglen) || (err_msg_format))
+	if (err_msg_format || valid_len != msglen)
 		return VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH;
 
 	return 0;
-- 
2.17.0

^ permalink raw reply related

* [net-next 04/11] i40e: remove duplicate pfc stats
From: Jeff Kirsher @ 2018-05-14 15:27 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180514152747.23154-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

The pfc related priority stats are already handled separately as these
stats are actually arrays of length I40E_MAX_USER_PRIORITY. Thus,
including them within i40e_gstrings_stats will just duplicate data.

Worse, the sizeof will be incorrect, as it will be the total size of the
stat arrays, which in this case is 8 * sizeof(u64), so we will only copy
the stat contents as if they were a u32.

Since we already correctly handle these stats else where, remove them
from the i40e_gstrings_stats.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index a62142f033d2..0f237397f52b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -103,10 +103,6 @@ static const struct i40e_stats i40e_gstrings_stats[] = {
 	I40E_PF_STAT("link_xoff_rx", stats.link_xoff_rx),
 	I40E_PF_STAT("link_xon_tx", stats.link_xon_tx),
 	I40E_PF_STAT("link_xoff_tx", stats.link_xoff_tx),
-	I40E_PF_STAT("priority_xon_rx", stats.priority_xon_rx),
-	I40E_PF_STAT("priority_xoff_rx", stats.priority_xoff_rx),
-	I40E_PF_STAT("priority_xon_tx", stats.priority_xon_tx),
-	I40E_PF_STAT("priority_xoff_tx", stats.priority_xoff_tx),
 	I40E_PF_STAT("rx_size_64", stats.rx_size_64),
 	I40E_PF_STAT("rx_size_127", stats.rx_size_127),
 	I40E_PF_STAT("rx_size_255", stats.rx_size_255),
-- 
2.17.0

^ permalink raw reply related

* [net-next 03/11] i40e: calculate ethtool stats size in a separate function
From: Jeff Kirsher @ 2018-05-14 15:27 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180514152747.23154-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

Use a separate function to calculate the number of stats for
a particular device. This helps reduce the clutter in
i40e_get_sset_count().

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 28 ++++++++++++-------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index fc6a5eef141c..a62142f033d2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1658,6 +1658,23 @@ static int i40e_set_ringparam(struct net_device *netdev,
 	return err;
 }
 
+static int i40e_get_stats_count(struct net_device *netdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+
+	if (vsi == pf->vsi[pf->lan_vsi] && pf->hw.partition_id == 1) {
+		if (pf->lan_veb != I40E_NO_VEB &&
+		    pf->flags & I40E_FLAG_VEB_STATS_ENABLED)
+			return I40E_PF_STATS_LEN(netdev) + I40E_VEB_STATS_TOTAL;
+		else
+			return I40E_PF_STATS_LEN(netdev);
+	} else {
+		return I40E_VSI_STATS_LEN(netdev);
+	}
+}
+
 static int i40e_get_sset_count(struct net_device *netdev, int sset)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
@@ -1668,16 +1685,7 @@ static int i40e_get_sset_count(struct net_device *netdev, int sset)
 	case ETH_SS_TEST:
 		return I40E_TEST_LEN;
 	case ETH_SS_STATS:
-		if (vsi == pf->vsi[pf->lan_vsi] && pf->hw.partition_id == 1) {
-			int len = I40E_PF_STATS_LEN(netdev);
-
-			if ((pf->lan_veb != I40E_NO_VEB) &&
-			    (pf->flags & I40E_FLAG_VEB_STATS_ENABLED))
-				len += I40E_VEB_STATS_TOTAL;
-			return len;
-		} else {
-			return I40E_VSI_STATS_LEN(netdev);
-		}
+		return i40e_get_stats_count(netdev);
 	case ETH_SS_PRIV_FLAGS:
 		return I40E_PRIV_FLAGS_STR_LEN +
 			(pf->hw.pf_id == 0 ? I40E_GL_PRIV_FLAGS_STR_LEN : 0);
-- 
2.17.0

^ permalink raw reply related

* [net-next 02/11] i40evf: Fix client header define
From: Jeff Kirsher @ 2018-05-14 15:27 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, nhorman, sassmann, jogreene
In-Reply-To: <20180514152747.23154-1-jeffrey.t.kirsher@intel.com>

Fix up the VF client header define, since it is the same as the PF
client header.

Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf_client.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_client.h b/drivers/net/ethernet/intel/i40evf/i40evf_client.h
index fc6592c3de9c..5585f362048a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_client.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_client.h
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright(c) 2013 - 2018 Intel Corporation. */
 
-#ifndef _I40E_CLIENT_H_
-#define _I40E_CLIENT_H_
+#ifndef _I40EVF_CLIENT_H_
+#define _I40EVF_CLIENT_H_
 
 #define I40EVF_CLIENT_STR_LENGTH 10
 
@@ -166,4 +166,4 @@ struct i40e_client {
 /* used by clients */
 int i40evf_register_client(struct i40e_client *client);
 int i40evf_unregister_client(struct i40e_client *client);
-#endif /* _I40E_CLIENT_H_ */
+#endif /* _I40EVF_CLIENT_H_ */
-- 
2.17.0

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox