* [PATCH net-next 01/12] net: sched: flower: don't check for rtnl on head dereference
From: Vlad Buslov @ 2019-02-14 7:47 UTC (permalink / raw)
To: netdev; +Cc: jhs, xiyou.wangcong, jiri, davem, Vlad Buslov
In-Reply-To: <20190214074712.17846-1-vladbu@mellanox.com>
Flower classifier only changes root pointer during init and destroy. Cls
API implements reference counting for tcf_proto, so there is no danger of
concurrent access to tp when it is being destroyed, even without protection
provided by rtnl lock.
Implement new function fl_head_dereference() to dereference tp->root
without checking for rtnl lock. Use it in all flower function that obtain
head pointer instead of rtnl_dereference().
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
net/sched/cls_flower.c | 24 +++++++++++++++++-------
1 file changed, 17 insertions(+), 7 deletions(-)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 32fa3e20adc5..88d7af78ba7e 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -433,10 +433,20 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
cls_flower.stats.lastused);
}
+static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
+{
+ /* Flower classifier only changes root pointer during init and destroy.
+ * Cls API implements reference counting for tcf_proto, so there is no
+ * danger of concurrent access to tp when it is being destroyed, even
+ * without protection provided by rtnl lock.
+ */
+ return rcu_dereference_protected(tp->root, 1);
+}
+
static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
struct netlink_ext_ack *extack)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_head *head = fl_head_dereference(tp);
bool async = tcf_exts_get_net(&f->exts);
bool last;
@@ -468,7 +478,7 @@ static void fl_destroy_sleepable(struct work_struct *work)
static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_head *head = fl_head_dereference(tp);
struct fl_flow_mask *mask, *next_mask;
struct cls_fl_filter *f, *next;
@@ -486,7 +496,7 @@ static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
static void *fl_get(struct tcf_proto *tp, u32 handle)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_head *head = fl_head_dereference(tp);
return idr_find(&head->handle_idr, handle);
}
@@ -1304,7 +1314,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
void **arg, bool ovr, bool rtnl_held,
struct netlink_ext_ack *extack)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_head *head = fl_head_dereference(tp);
struct cls_fl_filter *fold = *arg;
struct cls_fl_filter *fnew;
struct fl_flow_mask *mask;
@@ -1441,7 +1451,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
bool rtnl_held, struct netlink_ext_ack *extack)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_head *head = fl_head_dereference(tp);
struct cls_fl_filter *f = arg;
rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
@@ -1454,7 +1464,7 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
bool rtnl_held)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_head *head = fl_head_dereference(tp);
struct cls_fl_filter *f;
arg->count = arg->skip;
@@ -1473,7 +1483,7 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_head *head = fl_head_dereference(tp);
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
struct fl_flow_mask *mask;
--
2.13.6
^ permalink raw reply related
* [PATCH net-next 06/12] net: sched: flower: handle concurrent mask insertion
From: Vlad Buslov @ 2019-02-14 7:47 UTC (permalink / raw)
To: netdev; +Cc: jhs, xiyou.wangcong, jiri, davem, Vlad Buslov
In-Reply-To: <20190214074712.17846-1-vladbu@mellanox.com>
Without rtnl lock protection masks with same key can be inserted
concurrently. Insert temporary mask with reference count zero to masks
hashtable. This will cause any concurrent modifications to retry.
Wait for rcu grace period to complete after removing temporary mask from
masks hashtable to accommodate concurrent readers.
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Suggested-by: Jiri Pirko <jiri@mellanox.com>
---
net/sched/cls_flower.c | 41 ++++++++++++++++++++++++++++++++++-------
1 file changed, 34 insertions(+), 7 deletions(-)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index b41b72e894a6..2b032303f8d5 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1301,11 +1301,14 @@ static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
INIT_LIST_HEAD_RCU(&newmask->filters);
refcount_set(&newmask->refcnt, 1);
- err = rhashtable_insert_fast(&head->ht, &newmask->ht_node,
- mask_ht_params);
+ err = rhashtable_replace_fast(&head->ht, &mask->ht_node,
+ &newmask->ht_node, mask_ht_params);
if (err)
goto errout_destroy;
+ /* Wait until any potential concurrent users of mask are finished */
+ synchronize_rcu();
+
list_add_tail_rcu(&newmask->list, &head->masks);
return newmask;
@@ -1327,19 +1330,36 @@ static int fl_check_assign_mask(struct cls_fl_head *head,
int ret = 0;
rcu_read_lock();
- fnew->mask = rhashtable_lookup_fast(&head->ht, mask, mask_ht_params);
+
+ /* Insert mask as temporary node to prevent concurrent creation of mask
+ * with same key. Any concurrent lookups with same key will return
+ * EAGAIN because mask's refcnt is zero. It is safe to insert
+ * stack-allocated 'mask' to masks hash table because we call
+ * synchronize_rcu() before returning from this function (either in case
+ * of error or after replacing it with heap-allocated mask in
+ * fl_create_new_mask()).
+ */
+ fnew->mask = rhashtable_lookup_get_insert_fast(&head->ht,
+ &mask->ht_node,
+ mask_ht_params);
if (!fnew->mask) {
rcu_read_unlock();
- if (fold)
- return -EINVAL;
+ if (fold) {
+ ret = -EINVAL;
+ goto errout_cleanup;
+ }
newmask = fl_create_new_mask(head, mask);
- if (IS_ERR(newmask))
- return PTR_ERR(newmask);
+ if (IS_ERR(newmask)) {
+ ret = PTR_ERR(newmask);
+ goto errout_cleanup;
+ }
fnew->mask = newmask;
return 0;
+ } else if (IS_ERR(fnew->mask)) {
+ ret = PTR_ERR(fnew->mask);
} else if (fold && fold->mask != fnew->mask) {
ret = -EINVAL;
} else if (!refcount_inc_not_zero(&fnew->mask->refcnt)) {
@@ -1348,6 +1368,13 @@ static int fl_check_assign_mask(struct cls_fl_head *head,
}
rcu_read_unlock();
return ret;
+
+errout_cleanup:
+ rhashtable_remove_fast(&head->ht, &mask->ht_node,
+ mask_ht_params);
+ /* Wait until any potential concurrent users of mask are finished */
+ synchronize_rcu();
+ return ret;
}
static int fl_set_parms(struct net *net, struct tcf_proto *tp,
--
2.13.6
^ permalink raw reply related
* [PATCH net-next 11/12] net: sched: flower: track rtnl lock state
From: Vlad Buslov @ 2019-02-14 7:47 UTC (permalink / raw)
To: netdev; +Cc: jhs, xiyou.wangcong, jiri, davem, Vlad Buslov
In-Reply-To: <20190214074712.17846-1-vladbu@mellanox.com>
Use 'rtnl_held' flag to track if caller holds rtnl lock. Propagate the flag
to internal functions that need to know rtnl lock state. Take rtnl lock
before calling tcf APIs that require it (hw offload, bind filter, etc.).
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
net/sched/cls_flower.c | 68 +++++++++++++++++++++++++++++++++++---------------
1 file changed, 48 insertions(+), 20 deletions(-)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 556f7a1c694a..8b53959ca716 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -374,11 +374,14 @@ static void fl_destroy_filter_work(struct work_struct *work)
}
static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
+ if (!rtnl_held)
+ rtnl_lock();
+
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_DESTROY;
cls_flower.cookie = (unsigned long) f;
@@ -387,16 +390,22 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
spin_lock(&tp->lock);
tcf_block_offload_dec(block, &f->flags);
spin_unlock(&tp->lock);
+
+ if (!rtnl_held)
+ rtnl_unlock();
}
static int fl_hw_replace_filter(struct tcf_proto *tp,
- struct cls_fl_filter *f,
+ struct cls_fl_filter *f, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
bool skip_sw = tc_skip_sw(f->flags);
- int err;
+ int err = 0;
+
+ if (!rtnl_held)
+ rtnl_lock();
cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts));
if (!cls_flower.rule)
@@ -420,26 +429,37 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
kfree(cls_flower.rule);
if (err < 0) {
- fl_hw_destroy_filter(tp, f, NULL);
- return err;
+ fl_hw_destroy_filter(tp, f, true, NULL);
+ goto errout;
} else if (err > 0) {
f->in_hw_count = err;
+ err = 0;
spin_lock(&tp->lock);
tcf_block_offload_inc(block, &f->flags);
spin_unlock(&tp->lock);
}
- if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
- return -EINVAL;
+ if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) {
+ err = -EINVAL;
+ goto errout;
+ }
- return 0;
+errout:
+ if (!rtnl_held)
+ rtnl_unlock();
+
+ return err;
}
-static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
+static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
+ bool rtnl_held)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
+ if (!rtnl_held)
+ rtnl_lock();
+
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
cls_flower.command = TC_CLSFLOWER_STATS;
cls_flower.cookie = (unsigned long) f;
@@ -450,6 +470,9 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
cls_flower.stats.pkts,
cls_flower.stats.lastused);
+
+ if (!rtnl_held)
+ rtnl_unlock();
}
static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
@@ -506,7 +529,8 @@ static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp,
}
static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
- bool *last, struct netlink_ext_ack *extack)
+ bool *last, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = fl_head_dereference(tp);
bool async = tcf_exts_get_net(&f->exts);
@@ -525,7 +549,7 @@ static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
(*last) = fl_mask_put(head, f->mask, async);
if (!tc_skip_hw(f->flags))
- fl_hw_destroy_filter(tp, f, extack);
+ fl_hw_destroy_filter(tp, f, rtnl_held, extack);
tcf_unbind_filter(tp, &f->res);
__fl_put(f);
} else {
@@ -557,7 +581,7 @@ static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
list_for_each_entry_safe(mask, next_mask, &head->masks, list) {
list_for_each_entry_safe(f, next, &mask->filters, list) {
- __fl_delete(tp, f, &last, extack);
+ __fl_delete(tp, f, &last, rtnl_held, extack);
if (last)
break;
}
@@ -1397,19 +1421,23 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_fl_filter *f, struct fl_flow_mask *mask,
unsigned long base, struct nlattr **tb,
struct nlattr *est, bool ovr,
- struct fl_flow_tmplt *tmplt,
+ struct fl_flow_tmplt *tmplt, bool rtnl_held,
struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true,
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held,
extack);
if (err < 0)
return err;
if (tb[TCA_FLOWER_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+ if (!rtnl_held)
+ rtnl_lock();
tcf_bind_filter(tp, &f->res, base);
+ if (!rtnl_held)
+ rtnl_unlock();
}
err = fl_set_key(net, tb, &f->key, &mask->key, extack);
@@ -1488,7 +1516,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
}
err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
- tp->chain->tmplt_priv, extack);
+ tp->chain->tmplt_priv, rtnl_held, extack);
if (err)
goto errout;
@@ -1497,7 +1525,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
goto errout;
if (!tc_skip_hw(fnew->flags)) {
- err = fl_hw_replace_filter(tp, fnew, extack);
+ err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
if (err)
goto errout_mask;
}
@@ -1541,7 +1569,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
fl_mask_put(head, fold->mask, true);
if (!tc_skip_hw(fold->flags))
- fl_hw_destroy_filter(tp, fold, NULL);
+ fl_hw_destroy_filter(tp, fold, rtnl_held, NULL);
tcf_unbind_filter(tp, &fold->res);
tcf_exts_get_net(&fold->exts);
/* Caller holds reference to fold, so refcnt is always > 0
@@ -1598,7 +1626,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
errout_hw:
spin_unlock(&tp->lock);
if (!tc_skip_hw(fnew->flags))
- fl_hw_destroy_filter(tp, fnew, NULL);
+ fl_hw_destroy_filter(tp, fnew, rtnl_held, NULL);
errout_mask:
fl_mask_put(head, fnew->mask, true);
errout:
@@ -1622,7 +1650,7 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
bool last_on_mask;
int err = 0;
- err = __fl_delete(tp, f, &last_on_mask, extack);
+ err = __fl_delete(tp, f, &last_on_mask, rtnl_held, extack);
*last = list_empty(&head->masks);
__fl_put(f);
@@ -2262,7 +2290,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
spin_unlock(&tp->lock);
if (!skip_hw)
- fl_hw_update_stats(tp, f);
+ fl_hw_update_stats(tp, f, rtnl_held);
if (nla_put_u32(skb, TCA_FLOWER_IN_HW_COUNT, f->in_hw_count))
goto nla_put_failure;
--
2.13.6
^ permalink raw reply related
* [PATCH net-next 00/12] Refactor flower classifier to remove dependency on rtnl lock
From: Vlad Buslov @ 2019-02-14 7:47 UTC (permalink / raw)
To: netdev; +Cc: jhs, xiyou.wangcong, jiri, davem, Vlad Buslov
Currently, all netlink protocol handlers for updating rules, actions and
qdiscs are protected with single global rtnl lock which removes any
possibility for parallelism. This patch set is a third step to remove
rtnl lock dependency from TC rules update path.
Recently, new rtnl registration flag RTNL_FLAG_DOIT_UNLOCKED was added.
TC rule update handlers (RTM_NEWTFILTER, RTM_DELTFILTER, etc.) are
already registered with this flag and only take rtnl lock when qdisc or
classifier requires it. Classifiers can indicate that their ops
callbacks don't require caller to hold rtnl lock by setting the
TCF_PROTO_OPS_DOIT_UNLOCKED flag. The goal of this change is to refactor
flower classifier to support unlocked execution and register it with
unlocked flag.
This patch set implements following changes to make flower classifier
concurrency-safe:
- Implement reference counting for individual filters. Change fl_get to
take reference to filter. Implement tp->ops->put callback that was
introduced in cls API patch set to release reference to flower filter.
- Use tp->lock spinlock to protect internal classifier data structures
from concurrent modification.
- Handle concurrent tcf proto deletion by returning EAGAIN, which will
cause cls API to retry and create new proto instance or return error
to the user (depending on message type).
- Handle concurrent insertion of filter with same priority and handle by
returning EAGAIN, which will cause cls API to lookup filter again and
process it accordingly to netlink message flags.
- Extend flower mask with reference counting and protect masks list with
masks_lock spinlock.
- Prevent concurrent mask insertion by inserting temporary value to
masks hash table. This is necessary because mask initialization is a
sleeping operation and cannot be done while holding tp->lock.
Tcf hw offloads API is not changed by this patch set and still requires
caller to hold rtnl lock. Refactored flower classifier tracks rtnl lock
state by means of 'rtnl_held' flag provided by cls API and obtains the
lock before calling hw offloads.
With these changes flower classifier is safely registered with
TCF_PROTO_OPS_DOIT_UNLOCKED flag in last patch.
Github: [https://github.com/vbuslov/linux/tree/unlocked_flower_cong_1]
Vlad Buslov (12):
net: sched: flower: don't check for rtnl on head dereference
net: sched: flower: refactor fl_change
net: sched: flower: introduce reference counting for filters
net: sched: flower: track filter deletion with flag
net: sched: flower: add reference counter to flower mask
net: sched: flower: handle concurrent mask insertion
net: sched: flower: protect masks list with spinlock
net: sched: flower: handle concurrent filter insertion in fl_change
net: sched: flower: handle concurrent tcf proto deletion
net: sched: flower: protect flower classifier state with spinlock
net: sched: flower: track rtnl lock state
net: sched: flower: set unlocked flag for flower proto ops
net/sched/cls_flower.c | 424 +++++++++++++++++++++++++++++++++++++------------
1 file changed, 321 insertions(+), 103 deletions(-)
--
2.13.6
^ permalink raw reply
* [PATCH net-next 05/12] net: sched: flower: add reference counter to flower mask
From: Vlad Buslov @ 2019-02-14 7:47 UTC (permalink / raw)
To: netdev; +Cc: jhs, xiyou.wangcong, jiri, davem, Vlad Buslov
In-Reply-To: <20190214074712.17846-1-vladbu@mellanox.com>
Extend fl_flow_mask structure with reference counter to allow parallel
modification without relying on rtnl lock. Use rcu read lock to safely
lookup mask and increment reference counter in order to accommodate
concurrent deletes.
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
net/sched/cls_flower.c | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index fa5465f890e1..b41b72e894a6 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -76,6 +76,7 @@ struct fl_flow_mask {
struct list_head filters;
struct rcu_work rwork;
struct list_head list;
+ refcount_t refcnt;
};
struct fl_flow_tmplt {
@@ -320,6 +321,7 @@ static int fl_init(struct tcf_proto *tp)
static void fl_mask_free(struct fl_flow_mask *mask)
{
+ WARN_ON(!list_empty(&mask->filters));
rhashtable_destroy(&mask->ht);
kfree(mask);
}
@@ -335,7 +337,7 @@ static void fl_mask_free_work(struct work_struct *work)
static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
bool async)
{
- if (!list_empty(&mask->filters))
+ if (!refcount_dec_and_test(&mask->refcnt))
return false;
rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params);
@@ -1298,6 +1300,7 @@ static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
INIT_LIST_HEAD_RCU(&newmask->filters);
+ refcount_set(&newmask->refcnt, 1);
err = rhashtable_insert_fast(&head->ht, &newmask->ht_node,
mask_ht_params);
if (err)
@@ -1321,9 +1324,13 @@ static int fl_check_assign_mask(struct cls_fl_head *head,
struct fl_flow_mask *mask)
{
struct fl_flow_mask *newmask;
+ int ret = 0;
+ rcu_read_lock();
fnew->mask = rhashtable_lookup_fast(&head->ht, mask, mask_ht_params);
if (!fnew->mask) {
+ rcu_read_unlock();
+
if (fold)
return -EINVAL;
@@ -1332,11 +1339,15 @@ static int fl_check_assign_mask(struct cls_fl_head *head,
return PTR_ERR(newmask);
fnew->mask = newmask;
+ return 0;
} else if (fold && fold->mask != fnew->mask) {
- return -EINVAL;
+ ret = -EINVAL;
+ } else if (!refcount_inc_not_zero(&fnew->mask->refcnt)) {
+ /* Mask was deleted concurrently, try again */
+ ret = -EAGAIN;
}
-
- return 0;
+ rcu_read_unlock();
+ return ret;
}
static int fl_set_parms(struct net *net, struct tcf_proto *tp,
@@ -1473,6 +1484,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
list_replace_rcu(&fold->list, &fnew->list);
fold->deleted = true;
+ fl_mask_put(head, fold->mask, true);
if (!tc_skip_hw(fold->flags))
fl_hw_destroy_filter(tp, fold, NULL);
tcf_unbind_filter(tp, &fold->res);
@@ -1522,7 +1534,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (!tc_skip_hw(fnew->flags))
fl_hw_destroy_filter(tp, fnew, NULL);
errout_mask:
- fl_mask_put(head, fnew->mask, false);
+ fl_mask_put(head, fnew->mask, true);
errout:
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
--
2.13.6
^ permalink raw reply related
* [PATCH net-next 03/12] net: sched: flower: introduce reference counting for filters
From: Vlad Buslov @ 2019-02-14 7:47 UTC (permalink / raw)
To: netdev; +Cc: jhs, xiyou.wangcong, jiri, davem, Vlad Buslov
In-Reply-To: <20190214074712.17846-1-vladbu@mellanox.com>
Extend flower filters with reference counting in order to remove dependency
on rtnl lock in flower ops and allow to modify filters concurrently.
Reference to flower filter can be taken/released concurrently as soon as it
is marked as 'unlocked' by last patch in this series. Use atomic reference
counter type to make concurrent modifications safe.
Always take reference to flower filter while working with it:
- Modify fl_get() to take reference to filter.
- Implement tp->put() callback as fl_put() function to allow cls API to
release reference taken by fl_get().
- Modify fl_change() to assume that caller holds reference to fold and take
reference to fnew.
- Take reference to filter while using it in fl_walk().
Implement helper functions to get/put filter reference counter.
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
net/sched/cls_flower.c | 95 ++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 81 insertions(+), 14 deletions(-)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 91596a6271f8..b216ed26f344 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/rhashtable.h>
#include <linux/workqueue.h>
+#include <linux/refcount.h>
#include <linux/if_ether.h>
#include <linux/in6.h>
@@ -104,6 +105,11 @@ struct cls_fl_filter {
u32 in_hw_count;
struct rcu_work rwork;
struct net_device *hw_dev;
+ /* Flower classifier is unlocked, which means that its reference counter
+ * can be changed concurrently without any kind of external
+ * synchronization. Use atomic reference counter to be concurrency-safe.
+ */
+ refcount_t refcnt;
};
static const struct rhashtable_params mask_ht_params = {
@@ -443,6 +449,47 @@ static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
return rcu_dereference_protected(tp->root, 1);
}
+static void __fl_put(struct cls_fl_filter *f)
+{
+ if (!refcount_dec_and_test(&f->refcnt))
+ return;
+
+ if (tcf_exts_get_net(&f->exts))
+ tcf_queue_work(&f->rwork, fl_destroy_filter_work);
+ else
+ __fl_destroy_filter(f);
+}
+
+static struct cls_fl_filter *__fl_get(struct cls_fl_head *head, u32 handle)
+{
+ struct cls_fl_filter *f;
+
+ rcu_read_lock();
+ f = idr_find(&head->handle_idr, handle);
+ if (f && !refcount_inc_not_zero(&f->refcnt))
+ f = NULL;
+ rcu_read_unlock();
+
+ return f;
+}
+
+static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp,
+ unsigned long *handle)
+{
+ struct cls_fl_head *head = fl_head_dereference(tp);
+ struct cls_fl_filter *f;
+
+ rcu_read_lock();
+ /* don't return filters that are being deleted */
+ while ((f = idr_get_next_ul(&head->handle_idr,
+ handle)) != NULL &&
+ !refcount_inc_not_zero(&f->refcnt))
+ ++(*handle);
+ rcu_read_unlock();
+
+ return f;
+}
+
static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
struct netlink_ext_ack *extack)
{
@@ -456,10 +503,7 @@ static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
if (!tc_skip_hw(f->flags))
fl_hw_destroy_filter(tp, f, extack);
tcf_unbind_filter(tp, &f->res);
- if (async)
- tcf_queue_work(&f->rwork, fl_destroy_filter_work);
- else
- __fl_destroy_filter(f);
+ __fl_put(f);
return last;
}
@@ -494,11 +538,18 @@ static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
tcf_queue_work(&head->rwork, fl_destroy_sleepable);
}
+static void fl_put(struct tcf_proto *tp, void *arg)
+{
+ struct cls_fl_filter *f = arg;
+
+ __fl_put(f);
+}
+
static void *fl_get(struct tcf_proto *tp, u32 handle)
{
struct cls_fl_head *head = fl_head_dereference(tp);
- return idr_find(&head->handle_idr, handle);
+ return __fl_get(head, handle);
}
static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
@@ -1321,12 +1372,16 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
struct nlattr **tb;
int err;
- if (!tca[TCA_OPTIONS])
- return -EINVAL;
+ if (!tca[TCA_OPTIONS]) {
+ err = -EINVAL;
+ goto errout_fold;
+ }
mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL);
- if (!mask)
- return -ENOBUFS;
+ if (!mask) {
+ err = -ENOBUFS;
+ goto errout_fold;
+ }
tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
if (!tb) {
@@ -1349,6 +1404,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
err = -ENOBUFS;
goto errout_tb;
}
+ refcount_set(&fnew->refcnt, 1);
err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
if (err < 0)
@@ -1381,6 +1437,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (!tc_in_hw(fnew->flags))
fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+ refcount_inc(&fnew->refcnt);
if (fold) {
fnew->handle = handle;
@@ -1399,7 +1456,11 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
fl_hw_destroy_filter(tp, fold, NULL);
tcf_unbind_filter(tp, &fold->res);
tcf_exts_get_net(&fold->exts);
- tcf_queue_work(&fold->rwork, fl_destroy_filter_work);
+ /* Caller holds reference to fold, so refcnt is always > 0
+ * after this.
+ */
+ refcount_dec(&fold->refcnt);
+ __fl_put(fold);
} else {
if (__fl_lookup(fnew->mask, &fnew->mkey)) {
err = -EEXIST;
@@ -1448,6 +1509,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
kfree(tb);
errout_mask_alloc:
kfree(mask);
+errout_fold:
+ if (fold)
+ __fl_put(fold);
return err;
}
@@ -1461,24 +1525,26 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
f->mask->filter_ht_params);
__fl_delete(tp, f, extack);
*last = list_empty(&head->masks);
+ __fl_put(f);
+
return 0;
}
static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
bool rtnl_held)
{
- struct cls_fl_head *head = fl_head_dereference(tp);
struct cls_fl_filter *f;
arg->count = arg->skip;
- while ((f = idr_get_next_ul(&head->handle_idr,
- &arg->cookie)) != NULL) {
+ while ((f = fl_get_next_filter(tp, &arg->cookie)) != NULL) {
if (arg->fn(tp, f, arg) < 0) {
+ __fl_put(f);
arg->stop = 1;
break;
}
- arg->cookie = f->handle + 1;
+ __fl_put(f);
+ arg->cookie++;
arg->count++;
}
}
@@ -2148,6 +2214,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.init = fl_init,
.destroy = fl_destroy,
.get = fl_get,
+ .put = fl_put,
.change = fl_change,
.delete = fl_delete,
.walk = fl_walk,
--
2.13.6
^ permalink raw reply related
* [PATCH net-next 12/12] net: sched: flower: set unlocked flag for flower proto ops
From: Vlad Buslov @ 2019-02-14 7:47 UTC (permalink / raw)
To: netdev; +Cc: jhs, xiyou.wangcong, jiri, davem, Vlad Buslov
In-Reply-To: <20190214074712.17846-1-vladbu@mellanox.com>
Set TCF_PROTO_OPS_DOIT_UNLOCKED for flower classifier to indicate that its
ops callbacks don't require caller to hold rtnl lock.
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
net/sched/cls_flower.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 8b53959ca716..360cac828cad 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2362,6 +2362,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.tmplt_destroy = fl_tmplt_destroy,
.tmplt_dump = fl_tmplt_dump,
.owner = THIS_MODULE,
+ .flags = TCF_PROTO_OPS_DOIT_UNLOCKED,
};
static int __init cls_fl_init(void)
--
2.13.6
^ permalink raw reply related
* [PATCH net-next 08/12] net: sched: flower: handle concurrent filter insertion in fl_change
From: Vlad Buslov @ 2019-02-14 7:47 UTC (permalink / raw)
To: netdev; +Cc: jhs, xiyou.wangcong, jiri, davem, Vlad Buslov
In-Reply-To: <20190214074712.17846-1-vladbu@mellanox.com>
Check if user specified a handle and another filter with the same handle
was inserted concurrently. Return EAGAIN to retry filter processing (in
case it is an overwrite request).
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
net/sched/cls_flower.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index fc6371a9b0f9..114cb7876133 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1539,6 +1539,15 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
/* user specifies a handle and it doesn't exist */
err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
handle, GFP_ATOMIC);
+
+ /* Filter with specified handle was concurrently
+ * inserted after initial check in cls_api. This is not
+ * necessarily an error if NLM_F_EXCL is not set in
+ * message flags. Returning EAGAIN will cause cls_api to
+ * try to update concurrently inserted rule.
+ */
+ if (err == -ENOSPC)
+ err = -EAGAIN;
} else {
handle = 1;
err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
--
2.13.6
^ permalink raw reply related
* Re: [PATCH bpf-next v4 0/2] libbpf: adding AF_XDP support
From: Magnus Karlsson @ 2019-02-14 8:25 UTC (permalink / raw)
To: Jonathan Lemon
Cc: Magnus Karlsson, Björn Töpel, ast, Daniel Borkmann,
Network Development, Jakub Kicinski, Björn Töpel,
Zhang, Qi Z, Jesper Dangaard Brouer, xiaolong.ye
In-Reply-To: <3213D100-3861-4963-9490-EE445B731E63@gmail.com>
On Wed, Feb 13, 2019 at 9:49 PM Jonathan Lemon <jonathan.lemon@gmail.com> wrote:
>
> On 13 Feb 2019, at 3:32, Magnus Karlsson wrote:
>
> > On Mon, Feb 11, 2019 at 9:44 PM Jonathan Lemon
> > <jonathan.lemon@gmail.com> wrote:
> >>
> >> On 8 Feb 2019, at 5:05, Magnus Karlsson wrote:
> >>
> >>> This patch proposes to add AF_XDP support to libbpf. The main reason
> >>> for this is to facilitate writing applications that use AF_XDP by
> >>> offering higher-level APIs that hide many of the details of the
> >>> AF_XDP
> >>> uapi. This is in the same vein as libbpf facilitates XDP adoption by
> >>> offering easy-to-use higher level interfaces of XDP
> >>> functionality. Hopefully this will facilitate adoption of AF_XDP,
> >>> make
> >>> applications using it simpler and smaller, and finally also make it
> >>> possible for applications to benefit from optimizations in the
> >>> AF_XDP
> >>> user space access code. Previously, people just copied and pasted
> >>> the
> >>> code from the sample application into their application, which is
> >>> not
> >>> desirable.
> >>
> >> I like the idea of encapsulating the boilerplate logic in a library.
> >>
> >> I do think there is an important missing piece though - there should
> >> be
> >> some code which queries the netdev for how many queues are attached,
> >> and
> >> create the appropriate number of umem/AF_XDP sockets.
> >>
> >> I ran into this issue when testing the current AF_XDP code - on my
> >> test
> >> boxes, the mlx5 card has 55 channels (aka queues), so when the test
> >> program
> >> binds only to channel 0, nothing works as expected, since not all
> >> traffic
> >> is being intercepted. While obvious in hindsight, this took a while
> >> to
> >> track down.
> >
> > Yes, agreed. You are not the first one to stumble upon this problem
> > :-). Let me think a little bit on how to solve this in a good way. We
> > need this to be simple and intuitive, as you say.
>
> Has any investigation been done on using some variant of MPSC
> implementation
> as an intermediate form for AF_XDP? E.g.: something like LCRQ or the
> bulkQ
> in bpf devmap/cpumap. I'm aware that this would be slightly slower, as
> it
> would introduce a lock in the path, but I'd think that having DEVMAP,
> CPUMAP
> and XSKMAP all behave the same way would add more flexibility.
Not as far as I know. But adding the option of having a MPSC or even
MPMC queues has been on the todo list for a while, however, the
current focus of Björn and myself is to upstream the performance
improvements from the Linux Plumbers paper, improve ease-of-use, and
help Jesper et al. with the per-queue XDP program implementation
(which will increase both performance and ease-of-use). If anyone has
some spare cycles out there, please go ahead and give MPSC or MPMC
queues a try :-).
/Magnus
> Ideally, if the configuration matches the underlying hardware, then the
> implementation would reduce to the current setup (and allow ZC
> implementations),
> but a non-matching configuration would still work - as opposed to the
> current
> situation.
> --
> Jonathan
^ permalink raw reply
* Re: [PATCH 2/2] net: Replace dev_kfree_skb_any by dev_consume_skb_any
From: Sergei Shtylyov @ 2019-02-14 9:13 UTC (permalink / raw)
To: Huang Zijiang, linux-net-drivers
Cc: ecree, bkenward, davem, netdev, linux-kernel, wang.yi59
In-Reply-To: <1550126533-28462-1-git-send-email-huang.zijiang@zte.com.cn>
Hello!
On 14.02.2019 9:42, Huang Zijiang wrote:
> The skb should be freed by dev_consume_skb_any() efx_tx_tso_fallback()
^ in?
> when skb is still used. The skb is be replaced by segments, so the
^^ will?
> original skb should be consumed(not drop).
>
> Signed-off-by: Huang Zijiang <huang.zijiang@zte.com.cn>
[...]
MBR, Sergei
^ permalink raw reply
* Re: [RFC PATCH] net act_vlan: use correct len in skb_pull
From: Toshiaki Makita @ 2019-02-14 9:16 UTC (permalink / raw)
To: Zahari Doychev, netdev, bridge, nikolay, roopa, jhs, jiri,
xiyou.wangcong
Cc: johannes
In-Reply-To: <20190213195102.1917-1-zahari.doychev@linux.com>
On 2019/02/14 4:51, Zahari Doychev wrote:
> The bridge and VLAN code expects that skb->data points to the start of the
> VLAN header instead of the next (network) header. Currently after
> tcf_vlan_act() on ingress filter skb->data points to the next network
> header. In this case the Linux bridge does not forward correctly double
> tagged VLAN packets added using tc vlan action as the outer vlan tag from
> the skb is inserted at the wrong offset after the vlan tag in the payload.
> Making skb->data to point to the VLAN header in tcf_vlan_act() by using
> ETH_HLEN in skb_pull_rcsum() fixes the problem.
>
> The following commands were used for testing:
>
> ip link add name br0 type bridge vlan_filtering 1
> ip link set dev br0 up
>
> ip link set dev net0 up
> ip link set dev net0 master br0
>
> ip link set dev net1 up
> ip link set dev net1 master br0
>
> bridge vlan add dev net0 vid 100 master
> bridge vlan add dev br0 vid 100 self
> bridge vlan add dev net1 vid 100 master
>
> tc qdisc add dev net0 handle ffff: clsact
> tc qdisc add dev net1 handle ffff: clsact
>
> tc filter add dev net0 ingress pref 1 protocol all flower \
> action vlan push id 10 pipe action vlan push id 100
>
> tc filter add dev net0 egress pref 1 protocol 802.1q flower \
> vlan_id 100 vlan_ethtype 802.1q cvlan_id 10 \
> action vlan pop pipe action vlan pop
>
> Signed-off-by: Zahari Doychev <zahari.doychev@linux.com>
> ---
> net/sched/act_vlan.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
> index 93fdaf707313..308d7d89f925 100644
> --- a/net/sched/act_vlan.c
> +++ b/net/sched/act_vlan.c
> @@ -86,7 +86,7 @@ static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a,
>
> out:
> if (skb_at_tc_ingress(skb))
> - skb_pull_rcsum(skb, skb->mac_len);
> + skb_pull_rcsum(skb, ETH_HLEN);
As I said before, it would be safer to remember mac_len and use it later.
__u16 mac_len = skb->mac_len;
...
err = skb_vlan_push(...)
...
if (skb_at_tc_ingress(skb))
skb_pull_rcsum(skb, mac_len);
--
Toshiaki Makita
^ permalink raw reply
* Re: [PATCH 2/2] net: Replace dev_kfree_skb_any by dev_consume_skb_any
From: Bert Kenward @ 2019-02-14 9:34 UTC (permalink / raw)
To: Huang Zijiang, linux-net-drivers
Cc: ecree, davem, netdev, linux-kernel, wang.yi59
In-Reply-To: <1550126533-28462-1-git-send-email-huang.zijiang@zte.com.cn>
On 14/02/2019 06:42, Huang Zijiang wrote:
> The skb should be freed by dev_consume_skb_any() efx_tx_tso_fallback()
> when skb is still used. The skb is be replaced by segments, so the
> original skb should be consumed(not drop).
>
> Signed-off-by: Huang Zijiang <huang.zijiang@zte.com.cn>
Sergei's commit message fixups look good, but apart from that:
Acked-by: Bert Kenward <bkenward@solarflare.com>
> ---
> drivers/net/ethernet/sfc/tx.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
> index c3ad564..ed551f0 100644
> --- a/drivers/net/ethernet/sfc/tx.c
> +++ b/drivers/net/ethernet/sfc/tx.c
> @@ -471,7 +471,7 @@ static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue,
> if (IS_ERR(segments))
> return PTR_ERR(segments);
>
> - dev_kfree_skb_any(skb);
> + dev_consume_skb_any(skb);
> skb = segments;
>
> while (skb) {
>
^ permalink raw reply
* Re: [PATCH 2/4] mac80211: Free mpath object when rhashtable insertion fails
From: Herbert Xu @ 2019-02-14 9:41 UTC (permalink / raw)
To: Johannes Berg; +Cc: David Miller, linux-wireless, netdev, j, tgraf
In-Reply-To: <36adaef5c982ba10444d6f837b0d5c55ac953bdf.camel@sipsolutions.net>
On Wed, Feb 13, 2019 at 04:04:29PM +0100, Johannes Berg wrote:
> On Wed, 2019-02-13 at 22:39 +0800, Herbert Xu wrote:
> > + if (ret != -EEXIST)
> > return ERR_PTR(ret);
>
> Surely that should still be "if (ret && ret != -EEXIST)" otherwise you
> return for the success case too? or "else if"?
>
> I'd probably say we should combine all those ifs into something like this?
>
>
> if (ret == 0) {
> sdata->u.mesh.mesh_paths_generation++;
> return new_mpath;
> } else if (ret == -EEXIST) {
> kfree(new_mpath);
> return mpath;
> } else {
> kfree(new_mpath);
> return NULL;
> }
>
>
> Yes, that's a small change in behaviour as to when the generation
> counter is updated, but I'm pretty sure it really only needs updating
> when we inserted something, not if we found an old mpath.
You are right. Let me fix this and repost.
Thanks!
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* RE: [PATCH] can: flexcan: fix timeout when set small bitrate
From: Joakim Zhang @ 2019-02-14 9:56 UTC (permalink / raw)
To: mkl@pengutronix.de, linux-can@vger.kernel.org
Cc: wg@grandegger.com, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, dl-linux-imx
In-Reply-To: <20190131093509.12613-1-qiangqing.zhang@nxp.com>
Kindly Ping...
Reviewed-by: Dong Aisheng <aisheng.dong@nxp.com>
Best Regards,
Joakim Zhang
> -----Original Message-----
> From: Joakim Zhang
> Sent: 2019年1月31日 17:37
> To: mkl@pengutronix.de; linux-can@vger.kernel.org
> Cc: wg@grandegger.com; netdev@vger.kernel.org;
> linux-kernel@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>; Joakim
> Zhang <qiangqing.zhang@nxp.com>
> Subject: [PATCH] can: flexcan: fix timeout when set small bitrate
>
> Current we can meet timeout issue when setting a small bitrate like 10000 as
> follows on i.MX6UL EVK board (ipg clock = 66MHZ, per clock = 30MHZ):
> root@imx6ul7d:~# ip link set can0 up type can bitrate 10000 A link change
> request failed with some changes committed already.
> Interface can0 may have been left with an inconsistent configuration, please
> check.
> RTNETLINK answers: Connection timed out
>
> It is caused by calling of flexcan_chip_unfreeze() timeout.
>
> Originally the code is using usleep_range(10, 20) for unfreeze operation, but
> the patch (8badd65 can: flexcan: avoid calling usleep_range from interrupt
> context) changed it into udelay(10) which is only a half delay of before,
> there're also some other delay changes.
>
> After double to FLEXCAN_TIMEOUT_US to 100 can fix the issue.
>
> Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> ---
> drivers/net/can/flexcan.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index
> 2bca867bcfaa..1f2b4db7da88 100644
> --- a/drivers/net/can/flexcan.c
> +++ b/drivers/net/can/flexcan.c
> @@ -166,7 +166,7 @@
> #define FLEXCAN_MB_CNT_LENGTH(x) (((x) & 0xf) << 16)
> #define FLEXCAN_MB_CNT_TIMESTAMP(x) ((x) & 0xffff)
>
> -#define FLEXCAN_TIMEOUT_US (50)
> +#define FLEXCAN_TIMEOUT_US (100)
>
> /* FLEXCAN hardware feature flags
> *
> --
> 2.17.1
^ permalink raw reply
* RE: [PATCH V4] can: flexcan: implement can Runtime PM
From: Joakim Zhang @ 2019-02-14 9:57 UTC (permalink / raw)
To: mkl@pengutronix.de, linux-can@vger.kernel.org
Cc: wg@grandegger.com, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, dl-linux-imx, Aisheng Dong
In-Reply-To: <DB7PR04MB4618BFE57BD34A8B7A708E0FE6830@DB7PR04MB4618.eurprd04.prod.outlook.com>
Kindly Ping...
Best Regards,
Joakim Zhang
> -----Original Message-----
> From: Joakim Zhang
> Sent: 2019年1月17日 14:23
> To: mkl@pengutronix.de; linux-can@vger.kernel.org
> Cc: wg@grandegger.com; netdev@vger.kernel.org;
> linux-kernel@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>; Aisheng
> Dong <aisheng.dong@nxp.com>
> Subject: RE: [PATCH V4] can: flexcan: implement can Runtime PM
>
>
> Kindly Ping...
>
> Best Regards,
> Joakim Zhang
>
> > -----Original Message-----
> > From: Joakim Zhang
> > Sent: 2018年11月30日 16:53
> > To: mkl@pengutronix.de; linux-can@vger.kernel.org
> > Cc: wg@grandegger.com; netdev@vger.kernel.org;
> > linux-kernel@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>;
> > Aisheng DONG <aisheng.dong@nxp.com>; Joakim Zhang
> > <qiangqing.zhang@nxp.com>
> > Subject: [PATCH V4] can: flexcan: implement can Runtime PM
> >
> > From: Aisheng Dong <aisheng.dong@nxp.com>
> >
> > Flexcan will be disabled during suspend if no wakeup function required
> > and enabled after resume accordingly. During this period, we could
> > explicitly disable clocks.
> > Since PM is optional, the clock is enabled at probe to guarante the
> > clock is running when PM is not enabled in the kernel.
> >
> > Implement Runtime PM which will:
> > 1) Without CONFIG_PM, clock is running whether Flexcan up or down.
> > 2) With CONFIG_PM, clock enabled while Flexcan up and disabled when
> > Flexcan down.
> > 3) Disable clock when do system suspend and enable clock while system
> > resume.
> > 4) Make Power Domain framework be able to shutdown the corresponding
> > power domain of this device.
> >
> > Signed-off-by: Aisheng Dong <aisheng.dong@nxp.com>
> > Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> > ---
> > ChangeLog:
> > V1->V2:
> > *rebased on patch "can: flexcan: add self wakeup support".
> > V2->V3:
> > *fix device fails to probe without CONFIG_PM.
> > V3->V4:
> > *runtime pm enable should ahead of registering device.
> > *disable device even if keeping the clocks on.
> > ---
> > drivers/net/can/flexcan.c | 111
> > +++++++++++++++++++++++++-------------
> > 1 file changed, 73 insertions(+), 38 deletions(-)
> >
> > diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
> > index
> > 0f36eafe3ac1..cad42f20cfe5 100644
> > --- a/drivers/net/can/flexcan.c
> > +++ b/drivers/net/can/flexcan.c
> > @@ -24,6 +24,7 @@
> > #include <linux/of.h>
> > #include <linux/of_device.h>
> > #include <linux/platform_device.h>
> > +#include <linux/pm_runtime.h>
> > #include <linux/regulator/consumer.h> #include <linux/regmap.h>
> >
> > @@ -277,6 +278,7 @@ struct flexcan_priv {
> > u32 reg_imask1_default;
> > u32 reg_imask2_default;
> >
> > + struct device *dev;
> > struct clk *clk_ipg;
> > struct clk *clk_per;
> > const struct flexcan_devtype_data *devtype_data; @@ -444,6 +446,27
> > @@ static inline void flexcan_error_irq_disable(const struct flexcan_priv
> *priv)
> > priv->write(reg_ctrl, ®s->ctrl);
> > }
> >
> > +static int flexcan_clks_enable(const struct flexcan_priv *priv) {
> > + int err;
> > +
> > + err = clk_prepare_enable(priv->clk_ipg);
> > + if (err)
> > + return err;
> > +
> > + err = clk_prepare_enable(priv->clk_per);
> > + if (err)
> > + clk_disable_unprepare(priv->clk_ipg);
> > +
> > + return err;
> > +}
> > +
> > +static void flexcan_clks_disable(const struct flexcan_priv *priv) {
> > + clk_disable_unprepare(priv->clk_ipg);
> > + clk_disable_unprepare(priv->clk_per);
> > +}
> > +
> > static inline int flexcan_transceiver_enable(const struct flexcan_priv *priv)
> {
> > if (!priv->reg_xceiver)
> > @@ -570,19 +593,13 @@ static int flexcan_get_berr_counter(const struct
> > net_device *dev,
> > const struct flexcan_priv *priv = netdev_priv(dev);
> > int err;
> >
> > - err = clk_prepare_enable(priv->clk_ipg);
> > - if (err)
> > + err = pm_runtime_get_sync(priv->dev);
> > + if (err < 0)
> > return err;
> >
> > - err = clk_prepare_enable(priv->clk_per);
> > - if (err)
> > - goto out_disable_ipg;
> > -
> > err = __flexcan_get_berr_counter(dev, bec);
> >
> > - clk_disable_unprepare(priv->clk_per);
> > - out_disable_ipg:
> > - clk_disable_unprepare(priv->clk_ipg);
> > + pm_runtime_put(priv->dev);
> >
> > return err;
> > }
> > @@ -1215,17 +1232,13 @@ static int flexcan_open(struct net_device *dev)
> > struct flexcan_priv *priv = netdev_priv(dev);
> > int err;
> >
> > - err = clk_prepare_enable(priv->clk_ipg);
> > - if (err)
> > + err = pm_runtime_get_sync(priv->dev);
> > + if (err < 0)
> > return err;
> >
> > - err = clk_prepare_enable(priv->clk_per);
> > - if (err)
> > - goto out_disable_ipg;
> > -
> > err = open_candev(dev);
> > if (err)
> > - goto out_disable_per;
> > + goto out_disable_clks;
> >
> > err = request_irq(dev->irq, flexcan_irq, IRQF_SHARED, dev->name, dev);
> > if (err)
> > @@ -1288,10 +1301,8 @@ static int flexcan_open(struct net_device *dev)
> > free_irq(dev->irq, dev);
> > out_close:
> > close_candev(dev);
> > - out_disable_per:
> > - clk_disable_unprepare(priv->clk_per);
> > - out_disable_ipg:
> > - clk_disable_unprepare(priv->clk_ipg);
> > + out_disable_clks:
> > + pm_runtime_put(priv->dev);
> >
> > return err;
> > }
> > @@ -1306,10 +1317,9 @@ static int flexcan_close(struct net_device
> > *dev)
> >
> > can_rx_offload_del(&priv->offload);
> > free_irq(dev->irq, dev);
> > - clk_disable_unprepare(priv->clk_per);
> > - clk_disable_unprepare(priv->clk_ipg);
> >
> > close_candev(dev);
> > + pm_runtime_put(priv->dev);
> >
> > can_led_event(dev, CAN_LED_EVENT_STOP);
> >
> > @@ -1349,18 +1359,14 @@ static int register_flexcandev(struct
> > net_device
> > *dev)
> > struct flexcan_regs __iomem *regs = priv->regs;
> > u32 reg, err;
> >
> > - err = clk_prepare_enable(priv->clk_ipg);
> > + err = flexcan_clks_enable(priv);
> > if (err)
> > return err;
> >
> > - err = clk_prepare_enable(priv->clk_per);
> > - if (err)
> > - goto out_disable_ipg;
> > -
> > /* select "bus clock", chip must be disabled */
> > err = flexcan_chip_disable(priv);
> > if (err)
> > - goto out_disable_per;
> > + goto out_disable_clks;
> > reg = priv->read(®s->ctrl);
> > reg |= FLEXCAN_CTRL_CLK_SRC;
> > priv->write(reg, ®s->ctrl);
> > @@ -1389,14 +1395,13 @@ static int register_flexcandev(struct
> > net_device
> > *dev)
> >
> > err = register_candev(dev);
> >
> > - /* disable core and turn off clocks */
> > - out_chip_disable:
> > flexcan_chip_disable(priv);
> > - out_disable_per:
> > - clk_disable_unprepare(priv->clk_per);
> > - out_disable_ipg:
> > - clk_disable_unprepare(priv->clk_ipg);
> > + return 0;
> >
> > + out_chip_disable:
> > + flexcan_chip_disable(priv);
> > + out_disable_clks:
> > + flexcan_clks_disable(priv);
> > return err;
> > }
> >
> > @@ -1556,6 +1561,7 @@ static int flexcan_probe(struct platform_device
> > *pdev)
> > priv->write = flexcan_write_le;
> > }
> >
> > + priv->dev = &pdev->dev;
> > priv->can.clock.freq = clock_freq;
> > priv->can.bittiming_const = &flexcan_bittiming_const;
> > priv->can.do_set_mode = flexcan_set_mode; @@ -1569,6 +1575,10 @@
> > static int flexcan_probe(struct platform_device *pdev)
> > priv->devtype_data = devtype_data;
> > priv->reg_xceiver = reg_xceiver;
> >
> > + pm_runtime_get_noresume(&pdev->dev);
> > + pm_runtime_set_active(&pdev->dev);
> > + pm_runtime_enable(&pdev->dev);
> > +
> > err = register_flexcandev(dev);
> > if (err) {
> > dev_err(&pdev->dev, "registering netdev failed\n"); @@ -1586,6
> > +1596,7 @@ static int flexcan_probe(struct platform_device *pdev)
> > dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%d)\n",
> > priv->regs, dev->irq);
> >
> > + pm_runtime_put(&pdev->dev);
> > return 0;
> >
> > failed_register:
> > @@ -1598,6 +1609,7 @@ static int flexcan_remove(struct platform_device
> > *pdev)
> > struct net_device *dev = platform_get_drvdata(pdev);
> >
> > unregister_flexcandev(dev);
> > + pm_runtime_disable(&pdev->dev);
> > free_candev(dev);
> >
> > return 0;
> > @@ -1607,7 +1619,7 @@ static int __maybe_unused
> flexcan_suspend(struct
> > device *device) {
> > struct net_device *dev = dev_get_drvdata(device);
> > struct flexcan_priv *priv = netdev_priv(dev);
> > - int err;
> > + int err = 0;
> >
> > if (netif_running(dev)) {
> > /* if wakeup is enabled, enter stop mode @@ -1620,20 +1632,22
> @@
> > static int __maybe_unused flexcan_suspend(struct device *device)
> > err = flexcan_chip_disable(priv);
> > if (err)
> > return err;
> > +
> > + err = pm_runtime_force_suspend(device);
> > }
> > netif_stop_queue(dev);
> > netif_device_detach(dev);
> > }
> > priv->can.state = CAN_STATE_SLEEPING;
> >
> > - return 0;
> > + return err;
> > }
> >
> > static int __maybe_unused flexcan_resume(struct device *device) {
> > struct net_device *dev = dev_get_drvdata(device);
> > struct flexcan_priv *priv = netdev_priv(dev);
> > - int err;
> > + int err = 0;
> >
> > priv->can.state = CAN_STATE_ERROR_ACTIVE;
> > if (netif_running(dev)) {
> > @@ -1642,14 +1656,34 @@ static int __maybe_unused
> > flexcan_resume(struct device *device)
> > if (device_may_wakeup(device)) {
> > disable_irq_wake(dev->irq);
> > } else {
> > - err = flexcan_chip_enable(priv);
> > + err = pm_runtime_force_resume(device);
> > if (err)
> > return err;
> > +
> > + err = flexcan_chip_enable(priv);
> > }
> > }
> > + return err;
> > +}
> > +
> > +static int __maybe_unused flexcan_runtime_suspend(struct device
> > +*device) {
> > + struct net_device *dev = dev_get_drvdata(device);
> > + struct flexcan_priv *priv = netdev_priv(dev);
> > +
> > + flexcan_clks_disable(priv);
> > +
> > return 0;
> > }
> >
> > +static int __maybe_unused flexcan_runtime_resume(struct device
> > +*device) {
> > + struct net_device *dev = dev_get_drvdata(device);
> > + struct flexcan_priv *priv = netdev_priv(dev);
> > +
> > + return flexcan_clks_enable(priv);
> > +}
> > +
> > static int __maybe_unused flexcan_noirq_suspend(struct device *device)
> {
> > struct net_device *dev = dev_get_drvdata(device); @@ -1676,6 +1710,7
> > @@ static int __maybe_unused flexcan_noirq_resume(struct device
> > *device)
> >
> > static const struct dev_pm_ops flexcan_pm_ops = {
> > SET_SYSTEM_SLEEP_PM_OPS(flexcan_suspend, flexcan_resume)
> > + SET_RUNTIME_PM_OPS(flexcan_runtime_suspend,
> > flexcan_runtime_resume,
> > +NULL)
> > SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(flexcan_noirq_suspend,
> > flexcan_noirq_resume) };
> >
> > --
> > 2.17.1
^ permalink raw reply
* RE: [PATCH] can: flexcan: add TX support for variable payload size
From: Joakim Zhang @ 2019-02-14 9:57 UTC (permalink / raw)
To: mkl@pengutronix.de, linux-can@vger.kernel.org
Cc: wg@grandegger.com, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, dl-linux-imx
In-Reply-To: <DB7PR04MB46185CFC0AE2641366B0A51EE6830@DB7PR04MB4618.eurprd04.prod.outlook.com>
Kindly Ping...
Best Regards,
Joakim Zhang
> -----Original Message-----
> From: Joakim Zhang
> Sent: 2019年1月17日 14:26
> To: mkl@pengutronix.de; linux-can@vger.kernel.org
> Cc: wg@grandegger.com; netdev@vger.kernel.org;
> linux-kernel@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>
> Subject: RE: [PATCH] can: flexcan: add TX support for variable payload size
>
>
> Kindly Ping...
>
> Best Regards,
> Joakim Zhang
>
> > -----Original Message-----
> > From: Joakim Zhang
> > Sent: 2018年12月12日 14:47
> > To: mkl@pengutronix.de; linux-can@vger.kernel.org
> > Cc: wg@grandegger.com; netdev@vger.kernel.org;
> > linux-kernel@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>; Joakim
> > Zhang <qiangqing.zhang@nxp.com>
> > Subject: [PATCH] can: flexcan: add TX support for variable payload
> > size
> >
> > Now the FlexCAN driver always use last mailbox for TX, it will work
> > well when MB payload size is 8/16 bytes.
> > TX mailbox would change to 13 when MB payload size is 64 bytes to
> > support CANFD. So we may need to set iflag register to add support for
> > variable payload size.
> >
> > Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> > ---
> > drivers/net/can/flexcan.c | 42
> > +++++++++++++++++++++++++++++----------
> > 1 file changed, 32 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
> > index
> > 0f36eafe3ac1..13fd085fcf84 100644
> > --- a/drivers/net/can/flexcan.c
> > +++ b/drivers/net/can/flexcan.c
> > @@ -141,7 +141,9 @@
> > #define FLEXCAN_TX_MB_RESERVED_OFF_FIFO 8
> > #define FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP 0
> > #define FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST
> > (FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP + 1)
> > -#define FLEXCAN_IFLAG_MB(x) BIT((x) & 0x1f)
> > +#define FLEXCAN_IFLAG1_MB_NUM 32
> > +#define FLEXCAN_IFLAG1_MB(x) BIT(x)
> > +#define FLEXCAN_IFLAG2_MB(x) BIT((x) & 0x1f)
> > #define FLEXCAN_IFLAG_RX_FIFO_OVERFLOW BIT(7)
> > #define FLEXCAN_IFLAG_RX_FIFO_WARN BIT(6)
> > #define FLEXCAN_IFLAG_RX_FIFO_AVAILABLE BIT(5)
> > @@ -822,9 +824,15 @@ static inline u64
> > flexcan_read_reg_iflag_rx(struct flexcan_priv *priv)
> > struct flexcan_regs __iomem *regs = priv->regs;
> > u32 iflag1, iflag2;
> >
> > - iflag2 = priv->read(®s->iflag2) & priv->reg_imask2_default &
> > - ~FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
> > - iflag1 = priv->read(®s->iflag1) & priv->reg_imask1_default;
> > + if (priv->tx_mb_idx >= FLEXCAN_IFLAG1_MB_NUM) {
> > + iflag2 = priv->read(®s->iflag2) & priv->reg_imask2_default &
> > + ~FLEXCAN_IFLAG2_MB(priv->tx_mb_idx);
> > + iflag1 = priv->read(®s->iflag1) & priv->reg_imask1_default;
> > + } else {
> > + iflag2 = priv->read(®s->iflag2) & priv->reg_imask2_default;
> > + iflag1 = priv->read(®s->iflag1) & priv->reg_imask1_default &
> > + ~FLEXCAN_IFLAG1_MB(priv->tx_mb_idx);
> > + }
> >
> > return (u64)iflag2 << 32 | iflag1;
> > }
> > @@ -836,7 +844,8 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
> > struct flexcan_priv *priv = netdev_priv(dev);
> > struct flexcan_regs __iomem *regs = priv->regs;
> > irqreturn_t handled = IRQ_NONE;
> > - u32 reg_iflag2, reg_esr;
> > + u32 reg_tx_iflag, tx_iflag_idx, reg_esr;
> > + void __iomem *reg_iflag;
> > enum can_state last_state = priv->can.state;
> >
> > /* reception interrupt */
> > @@ -870,10 +879,18 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
> > }
> > }
> >
> > - reg_iflag2 = priv->read(®s->iflag2);
> > + if (priv->tx_mb_idx >= FLEXCAN_IFLAG1_MB_NUM) {
> > + reg_tx_iflag = priv->read(®s->iflag2);
> > + tx_iflag_idx = FLEXCAN_IFLAG2_MB(priv->tx_mb_idx);
> > + reg_iflag = ®s->iflag2;
> > + } else {
> > + reg_tx_iflag = priv->read(®s->iflag1);
> > + tx_iflag_idx = FLEXCAN_IFLAG1_MB(priv->tx_mb_idx);
> > + reg_iflag = ®s->iflag1;
> > + }
> >
> > /* transmission complete interrupt */
> > - if (reg_iflag2 & FLEXCAN_IFLAG_MB(priv->tx_mb_idx)) {
> > + if (reg_tx_iflag & tx_iflag_idx) {
> > u32 reg_ctrl = priv->read(&priv->tx_mb->can_ctrl);
> >
> > handled = IRQ_HANDLED;
> > @@ -885,7 +902,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
> > /* after sending a RTR frame MB is in RX mode */
> > priv->write(FLEXCAN_MB_CODE_TX_INACTIVE,
> > &priv->tx_mb->can_ctrl);
> > - priv->write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), ®s->iflag2);
> > + priv->write(tx_iflag_idx, reg_iflag);
> > netif_wake_queue(dev);
> > }
> >
> > @@ -1244,8 +1261,13 @@ static int flexcan_open(struct net_device *dev)
> > priv->tx_mb_idx = priv->mb_count - 1;
> > priv->tx_mb = flexcan_get_mb(priv, priv->tx_mb_idx);
> >
> > - priv->reg_imask1_default = 0;
> > - priv->reg_imask2_default = FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
> > + if (priv->tx_mb_idx >= FLEXCAN_IFLAG1_MB_NUM) {
> > + priv->reg_imask1_default = 0;
> > + priv->reg_imask2_default = FLEXCAN_IFLAG2_MB(priv->tx_mb_idx);
> > + } else {
> > + priv->reg_imask1_default = FLEXCAN_IFLAG1_MB(priv->tx_mb_idx);
> > + priv->reg_imask2_default = 0;
> > + }
> >
> > priv->offload.mailbox_read = flexcan_mailbox_read;
> >
> > --
> > 2.17.1
^ permalink raw reply
* RE: [PATCH 0/3] can: flexcan: add imx8qm support
From: Joakim Zhang @ 2019-02-14 9:58 UTC (permalink / raw)
To: mkl@pengutronix.de, linux-can@vger.kernel.org
Cc: wg@grandegger.com, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, dl-linux-imx
In-Reply-To: <DB7PR04MB46187B4AABF3607876DA30D4E6830@DB7PR04MB4618.eurprd04.prod.outlook.com>
Kindly Ping...
Best Regards,
Joakim Zhang
> -----Original Message-----
> From: Joakim Zhang
> Sent: 2019年1月17日 14:28
> To: mkl@pengutronix.de; linux-can@vger.kernel.org
> Cc: wg@grandegger.com; netdev@vger.kernel.org;
> linux-kernel@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>
> Subject: RE: [PATCH 0/3] can: flexcan: add imx8qm support
>
>
> Kindly Ping...
>
> Best Regards,
> Joakim Zhang
>
> > -----Original Message-----
> > From: Joakim Zhang
> > Sent: 2018年12月19日 16:39
> > To: mkl@pengutronix.de; linux-can@vger.kernel.org
> > Cc: wg@grandegger.com; netdev@vger.kernel.org;
> > linux-kernel@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>; Joakim
> > Zhang <qiangqing.zhang@nxp.com>
> > Subject: [PATCH 0/3] can: flexcan: add imx8qm support
> >
> > This patch set intends to add Flexcan support for i.MX8QM platform
> > which defaultly supports CAN FD protocol. Although BRS enabled by
> > system reset when the driver sets CAN FD mode, now this patch serial
> > does not support BRS in the driver. And then I will send patch to
> > enable BRS by changing the register to set bit timing.
> >
> > Dong Aisheng (3):
> > can: rx-offload: add CANFD support based on offload
> > can: flexcan: add CAN FD mode support
> > can: flexcan: add imx8qm support
> >
> > drivers/net/can/flexcan.c | 114
> ++++++++++++++++++++++++++++++---
> > drivers/net/can/rx-offload.c | 16 +++--
> > include/linux/can/rx-offload.h | 4 +-
> > 3 files changed, 117 insertions(+), 17 deletions(-)
> >
> > --
> > 2.17.1
^ permalink raw reply
* Re: [PATCH] net: phy: at803x: disable delay only for RGMII mode
From: Peter Ujfalusi @ 2019-02-14 10:49 UTC (permalink / raw)
To: Niklas Cassel, Marc Gonzalez
Cc: Andrew Lunn, Florian Fainelli, Vinod Koul, David S Miller,
linux-arm-msm, Bjorn Andersson, netdev, Nori, Sekhar
In-Reply-To: <20190213174034.GA6954@centauri.lan>
Hi Niklas,
On 13/02/2019 19.40, Niklas Cassel wrote:
> On Wed, Feb 13, 2019 at 02:40:18PM +0100, Marc Gonzalez wrote:
>> On 13/02/2019 14:29, Andrew Lunn wrote:
>>
>>>> So we have these modes:
>>>>
>>>> PHY_INTERFACE_MODE_RGMII: TX and RX delays disabled
>>>> PHY_INTERFACE_MODE_RGMII_ID: TX and RX delays enabled
>>>> PHY_INTERFACE_MODE_RGMII_RXID: RX delay enabled, TX delay disabled
>>>> PHY_INTERFACE_MODE_RGMII_TXID: TX delay enabled, RX delay disabled
>>>>
>>>> What I don't like with this patch, is that if we specify phy-mode
>>>> PHY_INTERFACE_MODE_RGMII_TXID, this patch will enable TX delay,
>>>> but RX delay will not be explicitly set.
>>>
>>> That is not the behaviour we want. It is best to assume the device is
>>> in a random state, and correctly enable/disable all delays as
>>> requested. Only leave the hardware alone if PHY_INTERFACE_MODE_NA is
>>> used.
>>
>> That's what my patch did:
>> https://www.spinics.net/lists/netdev/msg445053.html
>>
>> But see Florian's remarks:
>> https://www.spinics.net/lists/netdev/msg445133.html
>
> Hello Marc,
>
> I saw that comment from Florian. However that was way back in 2017.
> Maybe the phy-modes were not as well defined back then?
>
> Andrew recently suggested to fix the driver so that it conforms with the
> phy-modes, and fix any SoC that specified an incorrect phy-mode in DT
> and thus relied upon the broken behavior of the PHY driver:
> https://www.spinics.net/lists/netdev/msg445133.html
>
>
> So, I've rebased your old patch, see attachment.
> I suggest that Peter test it on am335x-evm.
with the patch + s/rgmii-txid/rgmii-id in the am335x-evmsk.dts ethernet
is working.
I don't have am335x-evm to test, but it has the same PHY as evmsk.
> am335x-evm appears to rely on the current broken behavior of the PHY
> driver, so we will probably need to fix the am335x-evm according to this:
> https://www.spinics.net/lists/netdev/msg445117.html
> and merge that as well.
>
>
> Andrew, Florian, do you both agree?
>
>
> Kind regards,
> Niklas
>
- Péter
Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
^ permalink raw reply
* Re: [PATCH net-next 1/9] Documentation: networking: switchdev: Update port parent ID section
From: Ido Schimmel @ 2019-02-14 10:58 UTC (permalink / raw)
To: Florian Fainelli
Cc: netdev@vger.kernel.org, David S. Miller, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE,
Jiri Pirko, andrew@lunn.ch, vivien.didelot@gmail.com
In-Reply-To: <20190213220638.1552-2-f.fainelli@gmail.com>
On Wed, Feb 13, 2019 at 02:06:30PM -0800, Florian Fainelli wrote:
> Update the section about switchdev drivers having to implement a
> switchdev_port_attr_get() function to return
> SWITCHDEV_ATTR_ID_PORT_PARENT_ID since that is no longer valid after
> commit bccb30254a4a ("net: Get rid of
> SWITCHDEV_ATTR_ID_PORT_PARENT_ID").
>
> Fixes: bccb30254a4a ("net: Get rid of SWITCHDEV_ATTR_ID_PORT_PARENT_ID")
> Acked-by: Jiri Pirko <jiri@mellanox.com>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
^ permalink raw reply
* Re: [PATCH net-next] net: ip6_gre: Give ERSPAN a fill_info link op of its own
From: Lorenzo Bianconi @ 2019-02-14 11:10 UTC (permalink / raw)
To: Petr Machata
Cc: netdev@vger.kernel.org, davem@davemloft.net, kuznet@ms2.inr.ac.ru,
yoshfuji@linux-ipv6.org, lucien.xin
In-Reply-To: <c14a9085e87ca9e36ba7f5feea46e5750a5baeeb.1550086179.git.petrm@mellanox.com>
> In commit c706863bc890 ("net: ip6_gre: always reports o_key to
> userspace"), ip6gre and ip6gretap tunnels started reporting a TUNNEL_KEY
> output flag even if one was not configured at the device.
>
> When an okey-less ip6gre or ip6gretap netdevice is created, it initially
> encapsulates the packets without okey. But any configuration change
> (even a non-change such as setting TOS to an already-configured value)
> then causes the okey flag from the reported configuration to be
> circulated back to actual configuration. From that point on, the device
> encapsulates packets with output key of 0.
>
> The intention was to implement this behavior for ERSPAN devices, not for
> all ip6gre devices. The ERSPAN netdevice should really have its own
> fill_info callback. Add one.
Hi Petr,
I was assuming erspan_ver is set just for erspan tunnels. In particular I guess
the issue is due to the default erspan_ver configuration done in
ip6gre_netlink_parms (commit 84581bdae9587).
What about adding a routine to set erspan_ver and moving it in
ip6erspan_newlink/ip6erspan_changelink? In this way erspan_ver will be
defined just for erspan tunnels.
Moreover do we have a similar issue for IFLA_GRE_ERSPAN_INDEX in
ip6gre_fill_info?
Something like:
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 65a4f96dc462..bb525abd860e 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1719,6 +1719,24 @@ static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
return 0;
}
+static void ip6erspan_set_version(struct nlattr *data[],
+ struct __ip6_tnl_parm *parms)
+{
+ parms->erspan_ver = 1;
+ if (data[IFLA_GRE_ERSPAN_VER])
+ parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+
+ if (parms->erspan_ver == 1) {
+ if (data[IFLA_GRE_ERSPAN_INDEX])
+ parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ } else if (parms->erspan_ver == 2) {
+ if (data[IFLA_GRE_ERSPAN_DIR])
+ parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+ if (data[IFLA_GRE_ERSPAN_HWID])
+ parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+ }
+}
+
static void ip6gre_netlink_parms(struct nlattr *data[],
struct __ip6_tnl_parm *parms)
{
@@ -1767,20 +1785,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
if (data[IFLA_GRE_COLLECT_METADATA])
parms->collect_md = true;
-
- parms->erspan_ver = 1;
- if (data[IFLA_GRE_ERSPAN_VER])
- parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
-
- if (parms->erspan_ver == 1) {
- if (data[IFLA_GRE_ERSPAN_INDEX])
- parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
- } else if (parms->erspan_ver == 2) {
- if (data[IFLA_GRE_ERSPAN_DIR])
- parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
- if (data[IFLA_GRE_ERSPAN_HWID])
- parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
- }
}
static int ip6gre_tap_init(struct net_device *dev)
@@ -2203,6 +2207,7 @@ static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
int err;
ip6gre_netlink_parms(data, &nt->parms);
+ ip6erspan_set_version(data, &nt->parms);
ign = net_generic(net, ip6gre_net_id);
if (nt->parms.collect_md) {
@@ -2248,6 +2253,7 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
if (IS_ERR(t))
return PTR_ERR(t);
+ ip6erspan_set_version(data, &p);
ip6gre_tunnel_unlink_md(ign, t);
ip6gre_tunnel_unlink(ign, t);
ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);
Does it fix reported issue?
Regards,
Lorenzo
>
> Fixes: c706863bc890 ("net: ip6_gre: always reports o_key to userspace")
> CC: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> Signed-off-by: Petr Machata <petrm@mellanox.com>
> ---
> net/ipv6/ip6_gre.c | 31 ++++++++++++++++++++++++-------
> 1 file changed, 24 insertions(+), 7 deletions(-)
>
> diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
> index 65a4f96dc462..0a6087cffe54 100644
> --- a/net/ipv6/ip6_gre.c
> +++ b/net/ipv6/ip6_gre.c
> @@ -2094,15 +2094,13 @@ static size_t ip6gre_get_size(const struct net_device *dev)
> 0;
> }
>
> -static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
> +static int __ip6gre_fill_info(struct sk_buff *skb,
> + const struct net_device *dev,
> + __be16 base_o_flags)
> {
> struct ip6_tnl *t = netdev_priv(dev);
> struct __ip6_tnl_parm *p = &t->parms;
> - __be16 o_flags = p->o_flags;
> -
> - if ((p->erspan_ver == 1 || p->erspan_ver == 2) &&
> - !p->collect_md)
> - o_flags |= TUNNEL_KEY;
> + __be16 o_flags = p->o_flags | base_o_flags;
>
> if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
> nla_put_be16(skb, IFLA_GRE_IFLAGS,
> @@ -2155,6 +2153,11 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
> return -EMSGSIZE;
> }
>
> +static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
> +{
> + return __ip6gre_fill_info(skb, dev, 0);
> +}
> +
> static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
> [IFLA_GRE_LINK] = { .type = NLA_U32 },
> [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
> @@ -2256,6 +2259,20 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
> return 0;
> }
>
> +static int ip6erspan_fill_info(struct sk_buff *skb,
> + const struct net_device *dev)
> +{
> + struct ip6_tnl *t = netdev_priv(dev);
> + struct __ip6_tnl_parm *p = &t->parms;
> + __be16 base_o_flags = 0;
> +
> + if ((p->erspan_ver == 1 || p->erspan_ver == 2) &&
> + !p->collect_md)
> + base_o_flags |= TUNNEL_KEY;
> +
> + return __ip6gre_fill_info(skb, dev, base_o_flags);
> +}
> +
> static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
> .kind = "ip6gre",
> .maxtype = IFLA_GRE_MAX,
> @@ -2295,7 +2312,7 @@ static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = {
> .newlink = ip6erspan_newlink,
> .changelink = ip6erspan_changelink,
> .get_size = ip6gre_get_size,
> - .fill_info = ip6gre_fill_info,
> + .fill_info = ip6erspan_fill_info,
> .get_link_net = ip6_tnl_get_link_net,
> };
>
> --
> 2.4.11
>
^ permalink raw reply related
* Re: [RFC PATCH] net act_vlan: use correct len in skb_pull
From: Zahari Doychev @ 2019-02-14 11:54 UTC (permalink / raw)
To: Toshiaki Makita
Cc: netdev, bridge, nikolay, roopa, jhs, jiri, xiyou.wangcong,
johannes
In-Reply-To: <19f5bdf9-c83b-c9ec-83c3-1e9a88763b30@lab.ntt.co.jp>
On Thu, Feb 14, 2019 at 06:16:12PM +0900, Toshiaki Makita wrote:
> On 2019/02/14 4:51, Zahari Doychev wrote:
> > The bridge and VLAN code expects that skb->data points to the start of the
> > VLAN header instead of the next (network) header. Currently after
> > tcf_vlan_act() on ingress filter skb->data points to the next network
> > header. In this case the Linux bridge does not forward correctly double
> > tagged VLAN packets added using tc vlan action as the outer vlan tag from
> > the skb is inserted at the wrong offset after the vlan tag in the payload.
> > Making skb->data to point to the VLAN header in tcf_vlan_act() by using
> > ETH_HLEN in skb_pull_rcsum() fixes the problem.
> >
> > The following commands were used for testing:
> >
> > ip link add name br0 type bridge vlan_filtering 1
> > ip link set dev br0 up
> >
> > ip link set dev net0 up
> > ip link set dev net0 master br0
> >
> > ip link set dev net1 up
> > ip link set dev net1 master br0
> >
> > bridge vlan add dev net0 vid 100 master
> > bridge vlan add dev br0 vid 100 self
> > bridge vlan add dev net1 vid 100 master
> >
> > tc qdisc add dev net0 handle ffff: clsact
> > tc qdisc add dev net1 handle ffff: clsact
> >
> > tc filter add dev net0 ingress pref 1 protocol all flower \
> > action vlan push id 10 pipe action vlan push id 100
> >
> > tc filter add dev net0 egress pref 1 protocol 802.1q flower \
> > vlan_id 100 vlan_ethtype 802.1q cvlan_id 10 \
> > action vlan pop pipe action vlan pop
> >
> > Signed-off-by: Zahari Doychev <zahari.doychev@linux.com>
> > ---
> > net/sched/act_vlan.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
> > index 93fdaf707313..308d7d89f925 100644
> > --- a/net/sched/act_vlan.c
> > +++ b/net/sched/act_vlan.c
> > @@ -86,7 +86,7 @@ static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a,
> >
> > out:
> > if (skb_at_tc_ingress(skb))
> > - skb_pull_rcsum(skb, skb->mac_len);
> > + skb_pull_rcsum(skb, ETH_HLEN);
>
> As I said before, it would be safer to remember mac_len and use it later.
>
> __u16 mac_len = skb->mac_len;
> ...
> err = skb_vlan_push(...)
> ...
> if (skb_at_tc_ingress(skb))
> skb_pull_rcsum(skb, mac_len);
>
sorry, I misunderstood it. I will send an update.
Zahari
>
> --
> Toshiaki Makita
>
^ permalink raw reply
* Re: [PATCH net-next 7/9] net: bridge: Stop calling switchdev_port_attr_get()
From: Ido Schimmel @ 2019-02-14 11:20 UTC (permalink / raw)
To: Florian Fainelli
Cc: netdev@vger.kernel.org, David S. Miller, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE,
Jiri Pirko, andrew@lunn.ch, vivien.didelot@gmail.com
In-Reply-To: <20190213220638.1552-8-f.fainelli@gmail.com>
On Wed, Feb 13, 2019 at 02:06:36PM -0800, Florian Fainelli wrote:
> Now that all switchdev drivers have been converted to checking the
> bridge port flags during the prepare phase of the
> switchdev_port_attr_set() when the process
> SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS, we can avoid calling
> switchdev_port_attr_get() with
> SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT.
>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> ---
> net/bridge/br_switchdev.c | 16 +++++++---------
> 1 file changed, 7 insertions(+), 9 deletions(-)
>
> diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> index db9e8ab96d48..8f88f8a1a7fa 100644
> --- a/net/bridge/br_switchdev.c
> +++ b/net/bridge/br_switchdev.c
> @@ -64,29 +64,27 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
> {
> struct switchdev_attr attr = {
> .orig_dev = p->dev,
> - .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT,
> + .id = SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS,
> + .u.brport_flags = flags,
> };
> int err;
>
> if (mask & ~BR_PORT_FLAGS_HW_OFFLOAD)
> return 0;
>
> - err = switchdev_port_attr_get(p->dev, &attr);
> - if (err == -EOPNOTSUPP)
> - return 0;
> - if (err)
> + err = switchdev_port_attr_set(p->dev, &attr);
> + if (err && err != -EOPNOTSUPP)
> return err;
>
> - /* Check if specific bridge flag attribute offload is supported */
> - if (!(attr.u.brport_flags_support & mask)) {
> + if (err == -EOPNOTSUPP) {
> br_warn(p->br, "bridge flag offload is not supported %u(%s)\n",
> (unsigned int)p->port_no, p->dev->name);
> - return -EOPNOTSUPP;
> + return err;
> }
I see that you return -EOPNOTSUPP from drivers in case of unsupported
flags. I believe this is problematic (I'll test soon). The same return
code is used by:
1. Switch drivers to indicate unsupported flags
2. switchdev code to indicate unsupported netdev (no switchdev ops)
I guess that with this patch any attempt to set bridge port flags on
veth/dummy device will result in an error.
>
> attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS;
> attr.flags = SWITCHDEV_F_DEFER;
> - attr.u.brport_flags = flags;
> +
> err = switchdev_port_attr_set(p->dev, &attr);
> if (err) {
> br_warn(p->br, "error setting offload flag on port %u(%s)\n",
> --
> 2.17.1
>
^ permalink raw reply
* Re: [PATCH net-next 8/9] net: Remove SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT
From: Ido Schimmel @ 2019-02-14 11:27 UTC (permalink / raw)
To: Florian Fainelli
Cc: netdev@vger.kernel.org, David S. Miller, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE,
Jiri Pirko, andrew@lunn.ch, vivien.didelot@gmail.com
In-Reply-To: <20190213220638.1552-9-f.fainelli@gmail.com>
On Wed, Feb 13, 2019 at 02:06:37PM -0800, Florian Fainelli wrote:
> Now that we have converted the bridge code and the drivers to check for
> bridge port(s) flags at the time we try to set them, there is no need
> for a get() -> set() sequence anymore and
> SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT therefore becomes unused.
>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
^ permalink raw reply
* Re: [PATCH net-next 9/9] net: Get rid of switchdev_port_attr_get()
From: Ido Schimmel @ 2019-02-14 12:10 UTC (permalink / raw)
To: Florian Fainelli
Cc: netdev@vger.kernel.org, David S. Miller, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE,
Jiri Pirko, andrew@lunn.ch, vivien.didelot@gmail.com
In-Reply-To: <20190213220638.1552-10-f.fainelli@gmail.com>
On Wed, Feb 13, 2019 at 02:06:38PM -0800, Florian Fainelli wrote:
> With the bridge no longer calling switchdev_port_attr_get() to obtain
> the supported bridge port flags from a driver but instead trying to set
> the bridge port flags directly and relying on driver to reject
> unsupported configurations, we can effectively get rid of
> switchdev_port_attr_get() entirely since this was the only place where
> it was called.
>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> ---
> Documentation/networking/switchdev.txt | 5 ++---
> drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 7 -------
> drivers/net/ethernet/rocker/rocker_main.c | 7 -------
> drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 7 -------
> include/net/switchdev.h | 8 --------
> net/dsa/slave.c | 7 -------
> 6 files changed, 2 insertions(+), 39 deletions(-)
>
> diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
> index ea90243340a9..327afe754230 100644
> --- a/Documentation/networking/switchdev.txt
> +++ b/Documentation/networking/switchdev.txt
> @@ -233,9 +233,8 @@ the bridge's FDB. It's possible, but not optimal, to enable learning on the
> device port and on the bridge port, and disable learning_sync.
>
> To support learning and learning_sync port attributes, the driver implements
> -switchdev op switchdev_port_attr_get/set for
> -SWITCHDEV_ATTR_PORT_ID_BRIDGE_FLAGS. The driver should initialize the attributes
> -to the hardware defaults.
> +switchdev op switchdev_port_attr_set for SWITCHDEV_ATTR_PORT_ID_BRIDGE_FLAGS.
> +The driver should initialize the attributes to the hardware defaults.
Last sentence is not relevant anymore. learning_sync can also be dropped
^ permalink raw reply
* [rdma-rc PATCH 0/2] iw_cxgb4: Adjust the cq/qp mask
From: Raju Rangoju @ 2019-02-14 12:10 UTC (permalink / raw)
To: jgg, davem, linux-rdma; +Cc: netdev, swise, rajur
Export the LLD sge_host_page_size field to ULDs via
cxgb4_lld_info, so that iw_cxgb4 can adjust the cq/qp
mask based on no.of bar2 pages in a host page.
This series has both net(cxgb4) and rdma(iw_cxgb4) changes,
and I would request this merge via rdma repo.
I have made sure this series applies cleanly on both net-next
and rdma-for-rc and doesn't cause any merge conflicts.
Raju Rangoju (2):
cxgb4: export sge_host_page_size to ulds
iw_cxgb4: cq/qp mask depends on bar2 pages in a host page
drivers/infiniband/hw/cxgb4/device.c | 15 +++++++++++++--
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 1 +
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 1 +
3 files changed, 15 insertions(+), 2 deletions(-)
--
2.13.0
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox