* [PATCH net-next 1/6] cls_api: modify the tc_indr_block_ing_cmd parameters.
From: wenxu @ 2019-07-31 8:12 UTC (permalink / raw)
To: pablo, fw, jakub.kicinski, jiri; +Cc: netfilter-devel, netdev
In-Reply-To: <1564560753-32603-1-git-send-email-wenxu@ucloud.cn>
From: wenxu <wenxu@ucloud.cn>
This patch make tc_indr_block_ing_cmd can't access struct
tc_indr_block_dev and tc_indr_block_cb.
Signed-off-by: wenxu <wenxu@ucloud.cn>
---
net/sched/cls_api.c | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 3565d9a..2e3b58d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -677,26 +677,28 @@ static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
static int tcf_block_setup(struct tcf_block *block,
struct flow_block_offload *bo);
-static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
- struct tc_indr_block_cb *indr_block_cb,
+static void tc_indr_block_ing_cmd(struct net_device *dev,
+ struct tcf_block *block,
+ tc_indr_block_bind_cb_t *cb,
+ void *cb_priv,
enum flow_block_command command)
{
struct flow_block_offload bo = {
.command = command,
.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
- .net = dev_net(indr_dev->dev),
- .block_shared = tcf_block_non_null_shared(indr_dev->block),
+ .net = dev_net(dev),
+ .block_shared = tcf_block_non_null_shared(block),
};
INIT_LIST_HEAD(&bo.cb_list);
- if (!indr_dev->block)
+ if (!block)
return;
- bo.block = &indr_dev->block->flow_block;
+ bo.block = &block->flow_block;
- indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
- &bo);
- tcf_block_setup(indr_dev->block, &bo);
+ cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
+
+ tcf_block_setup(block, &bo);
}
int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
@@ -715,7 +717,8 @@ int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
if (err)
goto err_dev_put;
- tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_BIND);
+ tc_indr_block_ing_cmd(dev, indr_dev->block, cb, cb_priv,
+ FLOW_BLOCK_BIND);
return 0;
err_dev_put:
@@ -752,7 +755,8 @@ void __tc_indr_block_cb_unregister(struct net_device *dev,
return;
/* Send unbind message if required to free any block cbs. */
- tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_UNBIND);
+ tc_indr_block_ing_cmd(dev, indr_dev->block, cb, indr_block_cb->cb_priv,
+ FLOW_BLOCK_UNBIND);
tc_indr_block_cb_del(indr_block_cb);
tc_indr_block_dev_put(indr_dev);
}
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next 3/6] cls_api: add flow_indr_block_call function
From: wenxu @ 2019-07-31 8:12 UTC (permalink / raw)
To: pablo, fw, jakub.kicinski, jiri; +Cc: netfilter-devel, netdev
In-Reply-To: <1564560753-32603-1-git-send-email-wenxu@ucloud.cn>
From: wenxu <wenxu@ucloud.cn>
This patch make indr_block_call don't access struct tc_indr_block_cb
and tc_indr_block_dev directly
Signed-off-by: wenxu <wenxu@ucloud.cn>
---
net/sched/cls_api.c | 33 ++++++++++++++++++++-------------
1 file changed, 20 insertions(+), 13 deletions(-)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f9643fa..617b098 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -783,13 +783,30 @@ void tc_indr_block_cb_unregister(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);
+static void flow_indr_block_call(struct flow_block *flow_block,
+ struct net_device *dev,
+ struct flow_block_offload *bo,
+ enum flow_block_command command)
+{
+ struct tc_indr_block_cb *indr_block_cb;
+ struct tc_indr_block_dev *indr_dev;
+
+ indr_dev = tc_indr_block_dev_lookup(dev);
+ if (!indr_dev)
+ return;
+
+ indr_dev->flow_block = command == FLOW_BLOCK_BIND ? flow_block : NULL;
+
+ list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+ indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
+ bo);
+}
+
static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
struct tcf_block_ext_info *ei,
enum flow_block_command command,
struct netlink_ext_ack *extack)
{
- struct tc_indr_block_cb *indr_block_cb;
- struct tc_indr_block_dev *indr_dev;
struct flow_block_offload bo = {
.command = command,
.binder_type = ei->binder_type,
@@ -800,17 +817,7 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
};
INIT_LIST_HEAD(&bo.cb_list);
- indr_dev = tc_indr_block_dev_lookup(dev);
- if (!indr_dev)
- return;
-
- indr_dev->flow_block = command == FLOW_BLOCK_BIND ?
- &block->flow_block : NULL;
-
- list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
- indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
- &bo);
-
+ flow_indr_block_call(&block->flow_block, dev, &bo, command);
tcf_block_setup(block, &bo);
}
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next 5/6] flow_offload: support get flow_block immediately
From: wenxu @ 2019-07-31 8:12 UTC (permalink / raw)
To: pablo, fw, jakub.kicinski, jiri; +Cc: netfilter-devel, netdev
In-Reply-To: <1564560753-32603-1-git-send-email-wenxu@ucloud.cn>
From: wenxu <wenxu@ucloud.cn>
The new flow-indr-block can't get the tcf_block
directly. It provide a callback list to find the flow_block immediately
when the device register and contain a ingress block.
Signed-off-by: wenxu <wenxu@ucloud.cn>
---
include/net/flow_offload.h | 17 +++++++++++++++++
net/core/flow_offload.c | 33 +++++++++++++++++++++++++++++++++
net/sched/cls_api.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 94 insertions(+)
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index c8d60a6..db04e3f 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -376,6 +376,23 @@ typedef void flow_indr_block_ing_cmd_t(struct net_device *dev,
void *cb_priv,
enum flow_block_command command);
+struct flow_indr_block_info {
+ struct flow_block *flow_block;
+ flow_indr_block_ing_cmd_t *ing_cmd_cb;
+};
+
+typedef bool flow_indr_get_default_block_t(struct net_device *dev,
+ struct flow_indr_block_info *info);
+
+struct flow_indr_get_block_entry {
+ flow_indr_get_default_block_t *get_block_cb;
+ struct list_head list;
+};
+
+void flow_indr_add_default_block_cb(struct flow_indr_get_block_entry *entry);
+
+void flow_indr_del_default_block_cb(struct flow_indr_get_block_entry *entry);
+
int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
flow_indr_block_bind_cb_t *cb,
void *cb_ident);
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index a1fdfa4..8ff7a75b 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -282,6 +282,8 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
}
EXPORT_SYMBOL(flow_block_cb_setup_simple);
+static LIST_HEAD(get_default_block_cb_list);
+
static struct rhashtable indr_setup_block_ht;
struct flow_indr_block_cb {
@@ -313,6 +315,24 @@ struct flow_indr_block_dev {
flow_indr_setup_block_ht_params);
}
+static void flow_get_default_block(struct flow_indr_block_dev *indr_dev)
+{
+ struct flow_indr_get_block_entry *entry_cb;
+ struct flow_indr_block_info info;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(entry_cb, &get_default_block_cb_list, list) {
+ if (entry_cb->get_block_cb(indr_dev->dev, &info)) {
+ indr_dev->flow_block = info.flow_block;
+ indr_dev->ing_cmd_cb = info.ing_cmd_cb;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+}
+
static struct flow_indr_block_dev *
flow_indr_block_dev_get(struct net_device *dev)
{
@@ -328,6 +348,7 @@ struct flow_indr_block_dev {
INIT_LIST_HEAD(&indr_dev->cb_list);
indr_dev->dev = dev;
+ flow_get_default_block(indr_dev);
if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
flow_indr_setup_block_ht_params)) {
kfree(indr_dev);
@@ -492,6 +513,18 @@ void flow_indr_block_call(struct flow_block *flow_block,
}
EXPORT_SYMBOL_GPL(flow_indr_block_call);
+void flow_indr_add_default_block_cb(struct flow_indr_get_block_entry *entry)
+{
+ list_add_tail_rcu(&entry->list, &get_default_block_cb_list);
+}
+EXPORT_SYMBOL_GPL(flow_indr_add_default_block_cb);
+
+void flow_indr_del_default_block_cb(struct flow_indr_get_block_entry *entry)
+{
+ list_del_rcu(&entry->list);
+}
+EXPORT_SYMBOL_GPL(flow_indr_del_default_block_cb);
+
static int __init init_flow_indr_rhashtable(void)
{
return rhashtable_init(&indr_setup_block_ht,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index bd5e591..8bf918c 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -576,6 +576,43 @@ static void tc_indr_block_ing_cmd(struct net_device *dev,
tcf_block_setup(block, &bo);
}
+static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
+{
+ const struct Qdisc_class_ops *cops;
+ struct Qdisc *qdisc;
+
+ if (!dev_ingress_queue(dev))
+ return NULL;
+
+ qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
+ if (!qdisc)
+ return NULL;
+
+ cops = qdisc->ops->cl_ops;
+ if (!cops)
+ return NULL;
+
+ if (!cops->tcf_block)
+ return NULL;
+
+ return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
+}
+
+static bool tc_indr_get_default_block(struct net_device *dev,
+ struct flow_indr_block_info *info)
+{
+ struct tcf_block *block = tc_dev_ingress_block(dev);
+
+ if (block) {
+ info->flow_block = &block->flow_block;
+ info->ing_cmd_cb = tc_indr_block_ing_cmd;
+
+ return true;
+ }
+
+ return false;
+}
+
static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
struct tcf_block_ext_info *ei,
enum flow_block_command command,
@@ -3146,6 +3183,11 @@ static void __net_exit tcf_net_exit(struct net *net)
.size = sizeof(struct tcf_net),
};
+static struct flow_indr_get_block_entry get_block_entry = {
+ .get_block_cb = tc_indr_get_default_block,
+ .list = LIST_HEAD_INIT(get_block_entry.list),
+};
+
static int __init tc_filter_init(void)
{
int err;
@@ -3158,6 +3200,8 @@ static int __init tc_filter_init(void)
if (err)
goto err_register_pernet_subsys;
+ flow_indr_add_default_block_cb(&get_block_entry);
+
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
--
1.8.3.1
^ permalink raw reply related
* [PATCH] net/ethernet/qlogic/qed: force the string buffer NULL-terminated
From: Wang Xiayang @ 2019-07-31 8:15 UTC (permalink / raw)
Cc: aelior, GR-everest-linux-l2, netdev, Wang Xiayang
strncpy() does not ensure NULL-termination when the input string
size equals to the destination buffer size 30.
The output string is passed to qed_int_deassertion_aeu_bit()
which calls DP_INFO() and relies NULL-termination.
Use strlcpy instead. The other conditional branch above strncpy()
needs no fix as snprintf() ensures NULL-termination.
This issue is identified by a Coccinelle script.
Signed-off-by: Wang Xiayang <xywang.sjtu@sjtu.edu.cn>
---
drivers/net/ethernet/qlogic/qed/qed_int.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 4e8118a08654..9f5113639eaf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -1093,7 +1093,7 @@ static int qed_int_deassertion(struct qed_hwfn *p_hwfn,
snprintf(bit_name, 30,
p_aeu->bit_name, num);
else
- strncpy(bit_name,
+ strlcpy(bit_name,
p_aeu->bit_name, 30);
/* We now need to pass bitmask in its
--
2.11.0
^ permalink raw reply related
* [PATCH net-next v2 0/6] net: dsa: mv88e6xxx: add support for MV88E6220
From: Hubert Feurstein @ 2019-07-31 8:23 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: Hubert Feurstein, Andrew Lunn, Vivien Didelot, Florian Fainelli,
David S. Miller, Rasmus Villemoes
This patch series adds support for the MV88E6220 chip to the mv88e6xxx driver.
The MV88E6220 is almost the same as MV88E6250 except that the ports 2-4 are
not routed to pins.
Furthermore, PTP support is added to the MV88E6250 family.
v2:
- insert all 6220 entries in correct numerical order
- introduce invalid_port_mask
- move ptp_cc_mult* to ptp_ops and restored original ptp_adjfine code
- added Andrews Reviewed-By to patch 2 and 4
Hubert Feurstein (6):
net: dsa: mv88e6xxx: add support for MV88E6220
dt-bindings: net: dsa: marvell: add 6220 model to the 6250 family
net: dsa: mv88e6xxx: introduce invalid_port_mask in mv88e6xxx_info
net: dsa: mv88e6xxx: setup message port is not supported in the 6250
familiy
net: dsa: mv88e6xxx: order ptp structs numerically ascending
net: dsa: mv88e6xxx: add PTP support for MV88E6250 family
drivers/net/dsa/mv88e6xxx/chip.c | 49 ++++++++++++--
drivers/net/dsa/mv88e6xxx/chip.h | 15 +++++
drivers/net/dsa/mv88e6xxx/ptp.c | 106 ++++++++++++++++++++++++-------
drivers/net/dsa/mv88e6xxx/ptp.h | 6 +-
4 files changed, 147 insertions(+), 29 deletions(-)
--
2.22.0
^ permalink raw reply
* [PATCH net-next v2 5/6] net: dsa: mv88e6xxx: order ptp structs numerically ascending
From: Hubert Feurstein @ 2019-07-31 8:23 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: Hubert Feurstein, Andrew Lunn, Vivien Didelot, Florian Fainelli,
David S. Miller, Rasmus Villemoes
In-Reply-To: <20190731082351.3157-1-h.feurstein@gmail.com>
As it is done for all the other structs within this driver.
Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>
---
drivers/net/dsa/mv88e6xxx/ptp.c | 32 ++++++++++++++++----------------
drivers/net/dsa/mv88e6xxx/ptp.h | 4 ++--
2 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
index 768d256f7c9f..a1ff182c8737 100644
--- a/drivers/net/dsa/mv88e6xxx/ptp.c
+++ b/drivers/net/dsa/mv88e6xxx/ptp.c
@@ -310,6 +310,22 @@ static int mv88e6352_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
return 0;
}
+const struct mv88e6xxx_ptp_ops mv88e6165_ptp_ops = {
+ .clock_read = mv88e6165_ptp_clock_read,
+ .global_enable = mv88e6165_global_enable,
+ .global_disable = mv88e6165_global_disable,
+ .arr0_sts_reg = MV88E6165_PORT_PTP_ARR0_STS,
+ .arr1_sts_reg = MV88E6165_PORT_PTP_ARR1_STS,
+ .dep_sts_reg = MV88E6165_PORT_PTP_DEP_STS,
+ .rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ),
+};
+
const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops = {
.clock_read = mv88e6352_ptp_clock_read,
.ptp_enable = mv88e6352_ptp_enable,
@@ -333,22 +349,6 @@ const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops = {
(1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ),
};
-const struct mv88e6xxx_ptp_ops mv88e6165_ptp_ops = {
- .clock_read = mv88e6165_ptp_clock_read,
- .global_enable = mv88e6165_global_enable,
- .global_disable = mv88e6165_global_disable,
- .arr0_sts_reg = MV88E6165_PORT_PTP_ARR0_STS,
- .arr1_sts_reg = MV88E6165_PORT_PTP_ARR1_STS,
- .dep_sts_reg = MV88E6165_PORT_PTP_DEP_STS,
- .rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
- (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
- (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
- (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
- (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
- (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
- (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ),
-};
-
static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc)
{
struct mv88e6xxx_chip *chip = cc_to_chip(cc);
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.h b/drivers/net/dsa/mv88e6xxx/ptp.h
index 0a1f8de8f062..58cbd21d58f6 100644
--- a/drivers/net/dsa/mv88e6xxx/ptp.h
+++ b/drivers/net/dsa/mv88e6xxx/ptp.h
@@ -148,8 +148,8 @@ void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip);
#define ptp_to_chip(ptp) container_of(ptp, struct mv88e6xxx_chip, \
ptp_clock_info)
-extern const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops;
extern const struct mv88e6xxx_ptp_ops mv88e6165_ptp_ops;
+extern const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops;
#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
@@ -167,8 +167,8 @@ static inline void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
{
}
-static const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops = {};
static const struct mv88e6xxx_ptp_ops mv88e6165_ptp_ops = {};
+static const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops = {};
#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
--
2.22.0
^ permalink raw reply related
* [PATCH net-next v2 6/6] net: dsa: mv88e6xxx: add PTP support for MV88E6250 family
From: Hubert Feurstein @ 2019-07-31 8:23 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: Hubert Feurstein, Richard Cochran, Andrew Lunn, Vivien Didelot,
Florian Fainelli, David S. Miller, Rasmus Villemoes
In-Reply-To: <20190731082351.3157-1-h.feurstein@gmail.com>
This adds PTP support for the MV88E6250 family.
Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>
---
drivers/net/dsa/mv88e6xxx/chip.c | 4 ++
drivers/net/dsa/mv88e6xxx/chip.h | 4 ++
drivers/net/dsa/mv88e6xxx/ptp.c | 79 +++++++++++++++++++++++++++-----
drivers/net/dsa/mv88e6xxx/ptp.h | 2 +
4 files changed, 78 insertions(+), 11 deletions(-)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 75cfa1f2060b..4a79e389eb8b 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3522,6 +3522,8 @@ static const struct mv88e6xxx_ops mv88e6250_ops = {
.reset = mv88e6250_g1_reset,
.vtu_getnext = mv88e6250_g1_vtu_getnext,
.vtu_loadpurge = mv88e6250_g1_vtu_loadpurge,
+ .avb_ops = &mv88e6352_avb_ops,
+ .ptp_ops = &mv88e6250_ptp_ops,
.phylink_validate = mv88e6065_phylink_validate,
};
@@ -4318,6 +4320,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.atu_move_port_mask = 0xf,
.dual_chip = true,
.tag_protocol = DSA_TAG_PROTO_DSA,
+ .ptp_support = true,
.ops = &mv88e6250_ops,
},
@@ -4363,6 +4366,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.atu_move_port_mask = 0xf,
.dual_chip = true,
.tag_protocol = DSA_TAG_PROTO_DSA,
+ .ptp_support = true,
.ops = &mv88e6250_ops,
},
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index e7b88e9643b9..8c6d3c906197 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -539,6 +539,10 @@ struct mv88e6xxx_ptp_ops {
int arr1_sts_reg;
int dep_sts_reg;
u32 rx_filters;
+ u32 cc_shift;
+ u32 cc_mult;
+ u32 cc_mult_num;
+ u32 cc_mult_dem;
};
#define STATS_TYPE_PORT BIT(0)
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
index a1ff182c8737..073cbd0bb91b 100644
--- a/drivers/net/dsa/mv88e6xxx/ptp.c
+++ b/drivers/net/dsa/mv88e6xxx/ptp.c
@@ -15,11 +15,31 @@
#include "hwtstamp.h"
#include "ptp.h"
-/* Raw timestamps are in units of 8-ns clock periods. */
-#define CC_SHIFT 28
-#define CC_MULT (8 << CC_SHIFT)
-#define CC_MULT_NUM (1 << 9)
-#define CC_MULT_DEM 15625ULL
+#define MV88E6XXX_MAX_ADJ_PPB 1000000
+
+/* Family MV88E6250:
+ * Raw timestamps are in units of 10-ns clock periods.
+ *
+ * clkadj = scaled_ppm * 10*2^28 / (10^6 * 2^16)
+ * simplifies to
+ * clkadj = scaled_ppm * 2^7 / 5^5
+ */
+#define MV88E6250_CC_SHIFT 28
+#define MV88E6250_CC_MULT (10 << MV88E6250_CC_SHIFT)
+#define MV88E6250_CC_MULT_NUM (1 << 7)
+#define MV88E6250_CC_MULT_DEM 3125ULL
+
+/* Other families:
+ * Raw timestamps are in units of 8-ns clock periods.
+ *
+ * clkadj = scaled_ppm * 8*2^28 / (10^6 * 2^16)
+ * simplifies to
+ * clkadj = scaled_ppm * 2^9 / 5^6
+ */
+#define MV88E6XXX_CC_SHIFT 28
+#define MV88E6XXX_CC_MULT (8 << MV88E6XXX_CC_SHIFT)
+#define MV88E6XXX_CC_MULT_NUM (1 << 9)
+#define MV88E6XXX_CC_MULT_DEM 15625ULL
#define TAI_EVENT_WORK_INTERVAL msecs_to_jiffies(100)
@@ -179,6 +199,7 @@ static void mv88e6352_tai_event_work(struct work_struct *ugly)
static int mv88e6xxx_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
{
struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+ const struct mv88e6xxx_ptp_ops *ptp_ops = chip->info->ops->ptp_ops;
int neg_adj = 0;
u32 diff, mult;
u64 adj;
@@ -187,10 +208,11 @@ static int mv88e6xxx_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
neg_adj = 1;
scaled_ppm = -scaled_ppm;
}
- mult = CC_MULT;
- adj = CC_MULT_NUM;
+
+ mult = ptp_ops->cc_mult;
+ adj = ptp_ops->cc_mult_num;
adj *= scaled_ppm;
- diff = div_u64(adj, CC_MULT_DEM);
+ diff = div_u64(adj, ptp_ops->cc_mult_dem);
mv88e6xxx_reg_lock(chip);
@@ -324,6 +346,37 @@ const struct mv88e6xxx_ptp_ops mv88e6165_ptp_ops = {
(1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
(1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
(1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ),
+ .cc_shift = MV88E6XXX_CC_SHIFT,
+ .cc_mult = MV88E6XXX_CC_MULT,
+ .cc_mult_num = MV88E6XXX_CC_MULT_NUM,
+ .cc_mult_dem = MV88E6XXX_CC_MULT_DEM,
+};
+
+const struct mv88e6xxx_ptp_ops mv88e6250_ptp_ops = {
+ .clock_read = mv88e6352_ptp_clock_read,
+ .ptp_enable = mv88e6352_ptp_enable,
+ .ptp_verify = mv88e6352_ptp_verify,
+ .event_work = mv88e6352_tai_event_work,
+ .port_enable = mv88e6352_hwtstamp_port_enable,
+ .port_disable = mv88e6352_hwtstamp_port_disable,
+ .n_ext_ts = 1,
+ .arr0_sts_reg = MV88E6XXX_PORT_PTP_ARR0_STS,
+ .arr1_sts_reg = MV88E6XXX_PORT_PTP_ARR1_STS,
+ .dep_sts_reg = MV88E6XXX_PORT_PTP_DEP_STS,
+ .rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ),
+ .cc_shift = MV88E6250_CC_SHIFT,
+ .cc_mult = MV88E6250_CC_MULT,
+ .cc_mult_num = MV88E6250_CC_MULT_NUM,
+ .cc_mult_dem = MV88E6250_CC_MULT_DEM,
};
const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops = {
@@ -347,6 +400,10 @@ const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops = {
(1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
(1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
(1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ),
+ .cc_shift = MV88E6XXX_CC_SHIFT,
+ .cc_mult = MV88E6XXX_CC_MULT,
+ .cc_mult_num = MV88E6XXX_CC_MULT_NUM,
+ .cc_mult_dem = MV88E6XXX_CC_MULT_DEM,
};
static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc)
@@ -384,8 +441,8 @@ int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
memset(&chip->tstamp_cc, 0, sizeof(chip->tstamp_cc));
chip->tstamp_cc.read = mv88e6xxx_ptp_clock_read;
chip->tstamp_cc.mask = CYCLECOUNTER_MASK(32);
- chip->tstamp_cc.mult = CC_MULT;
- chip->tstamp_cc.shift = CC_SHIFT;
+ chip->tstamp_cc.mult = ptp_ops->cc_mult;
+ chip->tstamp_cc.shift = ptp_ops->cc_shift;
timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc,
ktime_to_ns(ktime_get_real()));
@@ -397,7 +454,6 @@ int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
chip->ptp_clock_info.owner = THIS_MODULE;
snprintf(chip->ptp_clock_info.name, sizeof(chip->ptp_clock_info.name),
"%s", dev_name(chip->dev));
- chip->ptp_clock_info.max_adj = 1000000;
chip->ptp_clock_info.n_ext_ts = ptp_ops->n_ext_ts;
chip->ptp_clock_info.n_per_out = 0;
@@ -413,6 +469,7 @@ int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
}
chip->ptp_clock_info.pin_config = chip->pin_config;
+ chip->ptp_clock_info.max_adj = MV88E6XXX_MAX_ADJ_PPB;
chip->ptp_clock_info.adjfine = mv88e6xxx_ptp_adjfine;
chip->ptp_clock_info.adjtime = mv88e6xxx_ptp_adjtime;
chip->ptp_clock_info.gettime64 = mv88e6xxx_ptp_gettime;
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.h b/drivers/net/dsa/mv88e6xxx/ptp.h
index 58cbd21d58f6..269d5d16a466 100644
--- a/drivers/net/dsa/mv88e6xxx/ptp.h
+++ b/drivers/net/dsa/mv88e6xxx/ptp.h
@@ -149,6 +149,7 @@ void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip);
ptp_clock_info)
extern const struct mv88e6xxx_ptp_ops mv88e6165_ptp_ops;
+extern const struct mv88e6xxx_ptp_ops mv88e6250_ptp_ops;
extern const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops;
#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
@@ -168,6 +169,7 @@ static inline void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
}
static const struct mv88e6xxx_ptp_ops mv88e6165_ptp_ops = {};
+static const struct mv88e6xxx_ptp_ops mv88e6250_ptp_ops = {};
static const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops = {};
#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
--
2.22.0
^ permalink raw reply related
* [PATCH net-next v2 4/6] net: dsa: mv88e6xxx: setup message port is not supported in the 6250 familiy
From: Hubert Feurstein @ 2019-07-31 8:23 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: Hubert Feurstein, Andrew Lunn, Vivien Didelot, Florian Fainelli,
David S. Miller, Rasmus Villemoes
In-Reply-To: <20190731082351.3157-1-h.feurstein@gmail.com>
The MV88E6250 family doesn't support the MV88E6XXX_PORT_CTL1_MESSAGE_PORT
bit.
Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
---
drivers/net/dsa/mv88e6xxx/chip.c | 34 +++++++++++++++++++++++++++++---
drivers/net/dsa/mv88e6xxx/chip.h | 1 +
2 files changed, 32 insertions(+), 3 deletions(-)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 9fdcc21f0858..75cfa1f2060b 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2252,9 +2252,11 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
return err;
}
- err = mv88e6xxx_setup_message_port(chip, port);
- if (err)
- return err;
+ if (chip->info->ops->port_setup_message_port) {
+ err = chip->info->ops->port_setup_message_port(chip, port);
+ if (err)
+ return err;
+ }
/* Port based VLAN map: give each port the same default address
* database, and allow bidirectional communication between the
@@ -2805,6 +2807,7 @@ static const struct mv88e6xxx_ops mv88e6085_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6185_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6xxx_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -2839,6 +2842,7 @@ static const struct mv88e6xxx_ops mv88e6095_ops = {
.port_set_upstream_port = mv88e6095_port_set_upstream_port,
.port_link_state = mv88e6185_port_link_state,
.port_get_cmode = mv88e6185_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6xxx_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -2875,6 +2879,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6185_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6xxx_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -2909,6 +2914,7 @@ static const struct mv88e6xxx_ops mv88e6123_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6185_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6320_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -2946,6 +2952,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = {
.port_set_pause = mv88e6185_port_set_pause,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6185_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6xxx_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -2990,6 +2997,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6390_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3030,6 +3038,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6185_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6xxx_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -3063,6 +3072,7 @@ static const struct mv88e6xxx_ops mv88e6165_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6185_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6xxx_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -3104,6 +3114,7 @@ static const struct mv88e6xxx_ops mv88e6171_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6320_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -3145,6 +3156,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6320_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -3187,6 +3199,7 @@ static const struct mv88e6xxx_ops mv88e6175_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6320_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -3228,6 +3241,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6320_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -3266,6 +3280,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = {
.port_set_pause = mv88e6185_port_set_pause,
.port_link_state = mv88e6185_port_link_state,
.port_get_cmode = mv88e6185_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6xxx_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -3308,6 +3323,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
.port_set_cmode = mv88e6390_port_set_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6390_g1_stats_snapshot,
.stats_set_histogram = mv88e6390_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3353,6 +3369,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
.port_set_cmode = mv88e6390x_port_set_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6390_g1_stats_snapshot,
.stats_set_histogram = mv88e6390_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3398,6 +3415,7 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
.port_set_cmode = mv88e6390_port_set_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6390_g1_stats_snapshot,
.stats_set_histogram = mv88e6390_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3445,6 +3463,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6320_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -3530,6 +3549,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
.port_set_cmode = mv88e6390_port_set_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6390_g1_stats_snapshot,
.stats_set_histogram = mv88e6390_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3577,6 +3597,7 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6320_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3620,6 +3641,7 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6320_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3663,6 +3685,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6390_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3706,6 +3729,7 @@ static const struct mv88e6xxx_ops mv88e6350_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6320_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -3745,6 +3769,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6320_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -3788,6 +3813,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6320_g1_stats_snapshot,
.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6095_stats_get_sset_count,
@@ -3840,6 +3866,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
.port_set_cmode = mv88e6390_port_set_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6390_g1_stats_snapshot,
.stats_set_histogram = mv88e6390_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6320_stats_get_sset_count,
@@ -3889,6 +3916,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
.port_link_state = mv88e6352_port_link_state,
.port_get_cmode = mv88e6352_port_get_cmode,
.port_set_cmode = mv88e6390x_port_set_cmode,
+ .port_setup_message_port = mv88e6xxx_setup_message_port,
.stats_snapshot = mv88e6390_g1_stats_snapshot,
.stats_set_histogram = mv88e6390_g1_stats_set_histogram,
.stats_get_sset_count = mv88e6320_stats_get_sset_count,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 359d258d7151..e7b88e9643b9 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -395,6 +395,7 @@ struct mv88e6xxx_ops {
u8 out);
int (*port_disable_learn_limit)(struct mv88e6xxx_chip *chip, int port);
int (*port_disable_pri_override)(struct mv88e6xxx_chip *chip, int port);
+ int (*port_setup_message_port)(struct mv88e6xxx_chip *chip, int port);
/* CMODE control what PHY mode the MAC will use, eg. SGMII, RGMII, etc.
* Some chips allow this to be configured on specific ports.
--
2.22.0
^ permalink raw reply related
* [PATCH net-next v2 3/6] net: dsa: mv88e6xxx: introduce invalid_port_mask in mv88e6xxx_info
From: Hubert Feurstein @ 2019-07-31 8:23 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: Hubert Feurstein, Andrew Lunn, Vivien Didelot, Florian Fainelli,
David S. Miller, Rasmus Villemoes
In-Reply-To: <20190731082351.3157-1-h.feurstein@gmail.com>
With this it is possible to mark certain chip ports as invalid. This is
required for example for the MV88E6220 (which is in general a MV88E6250
with 7 ports) but the ports 2-4 are not routed to pins.
If a user configures an invalid port, an error is returned.
Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>
---
drivers/net/dsa/mv88e6xxx/chip.c | 9 +++++++++
drivers/net/dsa/mv88e6xxx/chip.h | 10 ++++++++++
2 files changed, 19 insertions(+)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index c8c176da0f1c..9fdcc21f0858 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2469,6 +2469,14 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
/* Setup Switch Port Registers */
for (i = 0; i < mv88e6xxx_num_ports(chip); i++) {
+ /* Prevent the use of an invalid port. */
+ if (mv88e6xxx_is_invalid_port(chip, i) &&
+ !dsa_is_unused_port(ds, i)) {
+ dev_err(chip->dev, "port %d is invalid\n", i);
+ err = -EINVAL;
+ goto unlock;
+ }
+
if (dsa_is_unused_port(ds, i)) {
err = mv88e6xxx_port_set_state(chip, i,
BR_STATE_DISABLED);
@@ -4270,6 +4278,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
*/
.num_ports = 7,
.num_internal_phys = 2,
+ .invalid_port_mask = BIT(2) | BIT(3) | BIT(4),
.max_vid = 4095,
.port_base_addr = 0x08,
.phy_base_addr = 0x00,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 2cc508a1cc32..359d258d7151 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -106,6 +106,11 @@ struct mv88e6xxx_info {
unsigned int g2_irqs;
bool pvt;
+ /* Mark certain ports as invalid. This is required for example for the
+ * MV88E6220 (which is in general a MV88E6250 with 7 ports) but the
+ * ports 2-4 are not routet to pins.
+ */
+ unsigned int invalid_port_mask;
/* Multi-chip Addressing Mode.
* Some chips respond to only 2 registers of its own SMI device address
* when it is non-zero, and use indirect access to internal registers.
@@ -571,6 +576,11 @@ static inline unsigned int mv88e6xxx_num_gpio(struct mv88e6xxx_chip *chip)
return chip->info->num_gpio;
}
+static inline bool mv88e6xxx_is_invalid_port(struct mv88e6xxx_chip *chip, int port)
+{
+ return (chip->info->invalid_port_mask & BIT(port)) != 0;
+}
+
int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
--
2.22.0
^ permalink raw reply related
* [PATCH net-next v2 1/6] net: dsa: mv88e6xxx: add support for MV88E6220
From: Hubert Feurstein @ 2019-07-31 8:23 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: Hubert Feurstein, Andrew Lunn, Vivien Didelot, Florian Fainelli,
David S. Miller, Rasmus Villemoes
In-Reply-To: <20190731082351.3157-1-h.feurstein@gmail.com>
The MV88E6220 is almost the same as MV88E6250 except that the ports 2-4 are
not routed to pins. So the usable ports are 0, 1, 5 and 6.
Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>
---
drivers/net/dsa/mv88e6xxx/chip.c | 25 +++++++++++++++++++++++++
drivers/net/dsa/mv88e6xxx/chip.h | 3 ++-
drivers/net/dsa/mv88e6xxx/port.h | 1 +
3 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 6b17cd961d06..c8c176da0f1c 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -4259,6 +4259,31 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.ops = &mv88e6191_ops,
},
+ [MV88E6220] = {
+ .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6220,
+ .family = MV88E6XXX_FAMILY_6250,
+ .name = "Marvell 88E6220",
+ .num_databases = 64,
+
+ /* Ports 2-4 are not routed to pins
+ * => usable ports 0, 1, 5, 6
+ */
+ .num_ports = 7,
+ .num_internal_phys = 2,
+ .max_vid = 4095,
+ .port_base_addr = 0x08,
+ .phy_base_addr = 0x00,
+ .global1_addr = 0x0f,
+ .global2_addr = 0x07,
+ .age_time_coeff = 15000,
+ .g1_irqs = 9,
+ .g2_irqs = 10,
+ .atu_move_port_mask = 0xf,
+ .dual_chip = true,
+ .tag_protocol = DSA_TAG_PROTO_DSA,
+ .ops = &mv88e6250_ops,
+ },
+
[MV88E6240] = {
.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6240,
.family = MV88E6XXX_FAMILY_6352,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 4646e46d47f2..2cc508a1cc32 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -57,6 +57,7 @@ enum mv88e6xxx_model {
MV88E6190,
MV88E6190X,
MV88E6191,
+ MV88E6220,
MV88E6240,
MV88E6250,
MV88E6290,
@@ -77,7 +78,7 @@ enum mv88e6xxx_family {
MV88E6XXX_FAMILY_6097, /* 6046 6085 6096 6097 */
MV88E6XXX_FAMILY_6165, /* 6123 6161 6165 */
MV88E6XXX_FAMILY_6185, /* 6108 6121 6122 6131 6152 6155 6182 6185 */
- MV88E6XXX_FAMILY_6250, /* 6250 */
+ MV88E6XXX_FAMILY_6250, /* 6220 6250 */
MV88E6XXX_FAMILY_6320, /* 6320 6321 */
MV88E6XXX_FAMILY_6341, /* 6141 6341 */
MV88E6XXX_FAMILY_6351, /* 6171 6175 6350 6351 */
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index 8d5a6cd6fb19..ceec771f8bfc 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -117,6 +117,7 @@
#define MV88E6XXX_PORT_SWITCH_ID_PROD_6190 0x1900
#define MV88E6XXX_PORT_SWITCH_ID_PROD_6191 0x1910
#define MV88E6XXX_PORT_SWITCH_ID_PROD_6185 0x1a70
+#define MV88E6XXX_PORT_SWITCH_ID_PROD_6220 0x2200
#define MV88E6XXX_PORT_SWITCH_ID_PROD_6240 0x2400
#define MV88E6XXX_PORT_SWITCH_ID_PROD_6250 0x2500
#define MV88E6XXX_PORT_SWITCH_ID_PROD_6290 0x2900
--
2.22.0
^ permalink raw reply related
* [PATCH net-next v2 2/6] dt-bindings: net: dsa: marvell: add 6220 model to the 6250 family
From: Hubert Feurstein @ 2019-07-31 8:23 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: Hubert Feurstein, Andrew Lunn, Vivien Didelot, Florian Fainelli,
David S. Miller, Rasmus Villemoes
In-Reply-To: <20190731082351.3157-1-h.feurstein@gmail.com>
The MV88E6220 is part of the MV88E6250 family.
Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
---
Documentation/devicetree/bindings/net/dsa/marvell.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt
index 6f9538974bb9..30c11fea491b 100644
--- a/Documentation/devicetree/bindings/net/dsa/marvell.txt
+++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt
@@ -22,7 +22,7 @@ which is at a different MDIO base address in different switch families.
- "marvell,mv88e6190" : Switch has base address 0x00. Use with models:
6190, 6190X, 6191, 6290, 6390, 6390X
- "marvell,mv88e6250" : Switch has base address 0x08 or 0x18. Use with model:
- 6250
+ 6220, 6250
Required properties:
- compatible : Should be one of "marvell,mv88e6085",
--
2.22.0
^ permalink raw reply related
* KASAN: invalid-free in tls_sk_proto_cleanup
From: syzbot @ 2019-07-31 8:29 UTC (permalink / raw)
To: aviadye, borisp, daniel, davejwatson, davem, jakub.kicinski,
john.fastabend, linux-kernel, netdev, syzkaller-bugs
Hello,
syzbot found the following crash on:
HEAD commit: fde50b96 Add linux-next specific files for 20190726
git tree: linux-next
console output: https://syzkaller.appspot.com/x/log.txt?x=15ea7f3fa00000
kernel config: https://syzkaller.appspot.com/x/.config?x=4b58274564b354c1
dashboard link: https://syzkaller.appspot.com/bug?extid=f5731e2256eb5130dbd6
compiler: gcc (GCC) 9.0.0 20181231 (experimental)
Unfortunately, I don't have any reproducer for this crash yet.
IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+f5731e2256eb5130dbd6@syzkaller.appspotmail.com
==================================================================
BUG: KASAN: double-free or invalid-free in tls_sk_proto_cleanup+0x216/0x3e0
net/tls/tls_main.c:300
CPU: 0 PID: 19107 Comm: syz-executor.4 Not tainted 5.3.0-rc1-next-20190726
#53
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x172/0x1f0 lib/dump_stack.c:113
print_address_description.cold+0xd4/0x306 mm/kasan/report.c:351
kasan_report_invalid_free+0x65/0xa0 mm/kasan/report.c:444
__kasan_slab_free+0x13a/0x150 mm/kasan/common.c:427
kasan_slab_free+0xe/0x10 mm/kasan/common.c:456
__cache_free mm/slab.c:3425 [inline]
kfree+0x10a/0x2c0 mm/slab.c:3756
tls_sk_proto_cleanup+0x216/0x3e0 net/tls/tls_main.c:300
tls_sk_proto_unhash+0x90/0x3f0 net/tls/tls_main.c:330
tcp_set_state+0x5b9/0x7d0 net/ipv4/tcp.c:2235
tcp_done+0xe2/0x320 net/ipv4/tcp.c:3824
tcp_reset+0x132/0x500 net/ipv4/tcp_input.c:4080
tcp_validate_incoming+0xa2d/0x1660 net/ipv4/tcp_input.c:5440
tcp_rcv_established+0x6b5/0x1e70 net/ipv4/tcp_input.c:5648
tcp_v6_do_rcv+0x41e/0x12c0 net/ipv6/tcp_ipv6.c:1356
sk_backlog_rcv include/net/sock.h:945 [inline]
__release_sock+0x129/0x390 net/core/sock.c:2418
release_sock+0x59/0x1c0 net/core/sock.c:2934
sk_stream_wait_memory+0x65a/0xfc0 net/core/stream.c:149
tls_sw_sendmsg+0x673/0x17b0 net/tls/tls_sw.c:1054
inet6_sendmsg+0x9e/0xe0 net/ipv6/af_inet6.c:576
sock_sendmsg_nosec net/socket.c:637 [inline]
sock_sendmsg+0xd7/0x130 net/socket.c:657
__sys_sendto+0x262/0x380 net/socket.c:1952
__do_sys_sendto net/socket.c:1964 [inline]
__se_sys_sendto net/socket.c:1960 [inline]
__x64_sys_sendto+0xe1/0x1a0 net/socket.c:1960
do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x459829
Code: fd b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7
48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff
ff 0f 83 cb b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fcee5e02c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 0000000000459829
RDX: ffffffffffffffc1 RSI: 00000000200005c0 RDI: 0000000000000003
RBP: 000000000075bf20 R08: 0000000000000000 R09: 1201000000003618
R10: 0000000000000000 R11: 0000000000000246 R12: 00007fcee5e036d4
R13: 00000000004c77c1 R14: 00000000004dcf38 R15: 00000000ffffffff
Allocated by task 19107:
save_stack+0x23/0x90 mm/kasan/common.c:69
set_track mm/kasan/common.c:77 [inline]
__kasan_kmalloc mm/kasan/common.c:486 [inline]
__kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:459
kasan_kmalloc+0x9/0x10 mm/kasan/common.c:500
__do_kmalloc mm/slab.c:3655 [inline]
__kmalloc_track_caller+0x15f/0x760 mm/slab.c:3670
kmemdup+0x27/0x60 mm/util.c:120
kmemdup include/linux/string.h:477 [inline]
tls_set_sw_offload+0xb3a/0x1567 net/tls/tls_sw.c:2361
do_tls_setsockopt_conf net/tls/tls_main.c:581 [inline]
do_tls_setsockopt net/tls/tls_main.c:630 [inline]
tls_setsockopt+0x4d5/0x8d0 net/tls/tls_main.c:649
sock_common_setsockopt+0x94/0xd0 net/core/sock.c:3130
__sys_setsockopt+0x261/0x4c0 net/socket.c:2084
__do_sys_setsockopt net/socket.c:2100 [inline]
__se_sys_setsockopt net/socket.c:2097 [inline]
__x64_sys_setsockopt+0xbe/0x150 net/socket.c:2097
do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290
entry_SYSCALL_64_after_hwframe+0x49/0xbe
Freed by task 19107:
save_stack+0x23/0x90 mm/kasan/common.c:69
set_track mm/kasan/common.c:77 [inline]
__kasan_slab_free+0x102/0x150 mm/kasan/common.c:448
kasan_slab_free+0xe/0x10 mm/kasan/common.c:456
__cache_free mm/slab.c:3425 [inline]
kfree+0x10a/0x2c0 mm/slab.c:3756
tls_sk_proto_cleanup+0x216/0x3e0 net/tls/tls_main.c:300
tls_sk_proto_unhash+0x90/0x3f0 net/tls/tls_main.c:330
tcp_set_state+0x5b9/0x7d0 net/ipv4/tcp.c:2235
tcp_done+0xe2/0x320 net/ipv4/tcp.c:3824
tcp_reset+0x132/0x500 net/ipv4/tcp_input.c:4080
tcp_validate_incoming+0xa2d/0x1660 net/ipv4/tcp_input.c:5440
tcp_rcv_established+0x6b5/0x1e70 net/ipv4/tcp_input.c:5648
tcp_v6_do_rcv+0x41e/0x12c0 net/ipv6/tcp_ipv6.c:1356
sk_backlog_rcv include/net/sock.h:945 [inline]
__release_sock+0x129/0x390 net/core/sock.c:2418
release_sock+0x59/0x1c0 net/core/sock.c:2934
wait_on_pending_writer+0x20f/0x420 net/tls/tls_main.c:91
tls_sk_proto_cleanup+0x2c5/0x3e0 net/tls/tls_main.c:295
tls_sk_proto_unhash+0x90/0x3f0 net/tls/tls_main.c:330
tcp_set_state+0x5b9/0x7d0 net/ipv4/tcp.c:2235
tcp_done+0xe2/0x320 net/ipv4/tcp.c:3824
tcp_reset+0x132/0x500 net/ipv4/tcp_input.c:4080
tcp_validate_incoming+0xa2d/0x1660 net/ipv4/tcp_input.c:5440
tcp_rcv_established+0x6b5/0x1e70 net/ipv4/tcp_input.c:5648
tcp_v6_do_rcv+0x41e/0x12c0 net/ipv6/tcp_ipv6.c:1356
sk_backlog_rcv include/net/sock.h:945 [inline]
__release_sock+0x129/0x390 net/core/sock.c:2418
release_sock+0x59/0x1c0 net/core/sock.c:2934
sk_stream_wait_memory+0x65a/0xfc0 net/core/stream.c:149
tls_sw_sendmsg+0x673/0x17b0 net/tls/tls_sw.c:1054
inet6_sendmsg+0x9e/0xe0 net/ipv6/af_inet6.c:576
sock_sendmsg_nosec net/socket.c:637 [inline]
sock_sendmsg+0xd7/0x130 net/socket.c:657
__sys_sendto+0x262/0x380 net/socket.c:1952
__do_sys_sendto net/socket.c:1964 [inline]
__se_sys_sendto net/socket.c:1960 [inline]
__x64_sys_sendto+0xe1/0x1a0 net/socket.c:1960
do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290
entry_SYSCALL_64_after_hwframe+0x49/0xbe
The buggy address belongs to the object at ffff88808ff1c500
which belongs to the cache kmalloc-32 of size 32
The buggy address is located 0 bytes inside of
32-byte region [ffff88808ff1c500, ffff88808ff1c520)
The buggy address belongs to the page:
page:ffffea00023fc700 refcount:1 mapcount:0 mapping:ffff8880aa4001c0
index:0xffff88808ff1cfc1
flags: 0x1fffc0000000200(slab)
raw: 01fffc0000000200 ffffea0002a12308 ffffea0002912208 ffff8880aa4001c0
raw: ffff88808ff1cfc1 ffff88808ff1c000 0000000100000024 0000000000000000
page dumped because: kasan: bad access detected
Memory state around the buggy address:
ffff88808ff1c400: fb fb fb fb fc fc fc fc fb fb fb fb fc fc fc fc
ffff88808ff1c480: fb fb fb fb fc fc fc fc fb fb fb fb fc fc fc fc
> ffff88808ff1c500: fb fb fb fb fc fc fc fc fb fb fb fb fc fc fc fc
^
ffff88808ff1c580: fb fb fb fb fc fc fc fc fb fb fb fb fc fc fc fc
ffff88808ff1c600: fb fb fb fb fc fc fc fc fb fb fb fb fc fc fc fc
==================================================================
---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.
syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
^ permalink raw reply
* Re: [PATCH v2 bpf-next 02/12] libbpf: implement BPF CO-RE offset relocation algorithm
From: Song Liu @ 2019-07-31 8:29 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: Andrii Nakryiko, bpf, netdev@vger.kernel.org, Alexei Starovoitov,
daniel@iogearbox.net, Yonghong Song, Kernel Team
In-Reply-To: <CAEf4Bza3cAoZJE+24_MBiv-8yYtAaTkAez5xq1v12cLW1-RGcw@mail.gmail.com>
> On Jul 30, 2019, at 11:52 PM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>
> On Tue, Jul 30, 2019 at 10:19 PM Song Liu <songliubraving@fb.com> wrote:
>>
>>
>>
>>> On Jul 30, 2019, at 6:00 PM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>>>
>>> On Tue, Jul 30, 2019 at 5:39 PM Song Liu <songliubraving@fb.com> wrote:
>>>>
>>>>
>>>>
>>>>> On Jul 30, 2019, at 12:53 PM, Andrii Nakryiko <andriin@fb.com> wrote:
>>>>>
>>>>> This patch implements the core logic for BPF CO-RE offsets relocations.
>>>>> Every instruction that needs to be relocated has corresponding
>>>>> bpf_offset_reloc as part of BTF.ext. Relocations are performed by trying
>>>>> to match recorded "local" relocation spec against potentially many
>>>>> compatible "target" types, creating corresponding spec. Details of the
>>>>> algorithm are noted in corresponding comments in the code.
>>>>>
>>>>> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
[...]
>>>>
>>>
>>> I just picked the most succinct and non-repetitive form. It's
>>> immediately apparent which type it's implicitly converted to, so I
>>> felt there is no need to repeat it. Also, just (void *) is much
>>> shorter. :)
>>
>> _All_ other code in btf.c converts the pointer to the target type.
>
> Most in libbpf.c doesn't, though. Also, I try to preserve pointer
> constness for uses that don't modify BTF types (pretty much all of
> them in libbpf), so it becomes really verbose, despite extremely short
> variable names:
>
> const struct btf_member *m = (const struct btf_member *)(t + 1);
I don't think being verbose is a big problem here. Overusing
(void *) feels like a bigger problem.
>
> Add one or two levels of nestedness and you are wrapping this line.
>
>> In some cases, it is not apparent which type it is converted to,
>> for example:
>>
>> + m = (void *)(targ_type + 1);
>>
>> I would suggest we do implicit conversion whenever possible.
>
> Implicit conversion (`m = targ_type + 1;`) is a compilation error,
> that won't work.
I misused "implicit" here. I actually meant to say
m = ((const struct btf_member *)(t + 1);
^ permalink raw reply
* Re: [PATCH v5 12/29] compat_ioctl: move drivers to compat_ptr_ioctl
From: Cornelia Huck @ 2019-07-31 8:37 UTC (permalink / raw)
To: Arnd Bergmann
Cc: Alexander Viro, devel, linux-input, kvm, Michael S . Tsirkin,
Greg Kroah-Hartman, linux-usb, netdev, linux-kernel,
Jarkko Sakkinen, virtualization, Jason Gunthorpe, linux-mtd,
Stefan Hajnoczi, Jiri Kosina, linux-fsdevel, ceph-devel,
linux-integrity, linux1394-devel, linux-stm32, linux-arm-kernel
In-Reply-To: <20190730195227.742215-1-arnd@arndb.de>
On Tue, 30 Jul 2019 21:50:28 +0200
Arnd Bergmann <arnd@arndb.de> wrote:
> Each of these drivers has a copy of the same trivial helper function to
> convert the pointer argument and then call the native ioctl handler.
>
> We now have a generic implementation of that, so use it.
>
> Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Acked-by: Michael S. Tsirkin <mst@redhat.com>
> Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
> Reviewed-by: Jiri Kosina <jkosina@suse.cz>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> ---
> drivers/vfio/vfio.c | 39 +++----------------------------
vfio changes:
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
^ permalink raw reply
* [PATCH V2 0/9] Fixes for metadata accelreation
From: Jason Wang @ 2019-07-31 8:46 UTC (permalink / raw)
To: mst, jasowang, kvm, virtualization, netdev; +Cc: linux-kernel, linux-mm, jgg
Hi all:
This series try to fix several issues introduced by meta data
accelreation series. Please review.
Changes from V1:
- Try not use RCU to syncrhonize MMU notifier with vhost worker
- set dirty pages after no readers
- return -EAGAIN only when we find the range is overlapped with
metadata
Jason Wang (9):
vhost: don't set uaddr for invalid address
vhost: validate MMU notifier registration
vhost: fix vhost map leak
vhost: reset invalidate_count in vhost_set_vring_num_addr()
vhost: mark dirty pages during map uninit
vhost: don't do synchronize_rcu() in vhost_uninit_vq_maps()
vhost: do not use RCU to synchronize MMU notifier with worker
vhost: correctly set dirty pages in MMU notifiers callback
vhost: do not return -EAGIAN for non blocking invalidation too early
drivers/vhost/vhost.c | 232 +++++++++++++++++++++++++++---------------
drivers/vhost/vhost.h | 8 +-
2 files changed, 154 insertions(+), 86 deletions(-)
--
2.18.1
^ permalink raw reply
* [PATCH V2 1/9] vhost: don't set uaddr for invalid address
From: Jason Wang @ 2019-07-31 8:46 UTC (permalink / raw)
To: mst, jasowang, kvm, virtualization, netdev; +Cc: linux-kernel, linux-mm, jgg
In-Reply-To: <20190731084655.7024-1-jasowang@redhat.com>
We should not setup uaddr for the invalid address, otherwise we may
try to pin or prefetch mapping of wrong pages.
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/vhost/vhost.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 0536f8526359..488380a581dc 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2082,7 +2082,8 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
}
#if VHOST_ARCH_CAN_ACCEL_UACCESS
- vhost_setup_vq_uaddr(vq);
+ if (r == 0)
+ vhost_setup_vq_uaddr(vq);
if (d->mm)
mmu_notifier_register(&d->mmu_notifier, d->mm);
--
2.18.1
^ permalink raw reply related
* [PATCH V2 2/9] vhost: validate MMU notifier registration
From: Jason Wang @ 2019-07-31 8:46 UTC (permalink / raw)
To: mst, jasowang, kvm, virtualization, netdev; +Cc: linux-kernel, linux-mm, jgg
In-Reply-To: <20190731084655.7024-1-jasowang@redhat.com>
The return value of mmu_notifier_register() is not checked in
vhost_vring_set_num_addr(). This will cause an out of sync between mm
and MMU notifier thus a double free. To solve this, introduce a
boolean flag to track whether MMU notifier is registered and only do
unregistering when it was true.
Reported-and-tested-by:
syzbot+e58112d71f77113ddb7b@syzkaller.appspotmail.com
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/vhost/vhost.c | 19 +++++++++++++++----
drivers/vhost/vhost.h | 1 +
2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 488380a581dc..17f6abea192e 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -629,6 +629,7 @@ void vhost_dev_init(struct vhost_dev *dev,
dev->iov_limit = iov_limit;
dev->weight = weight;
dev->byte_weight = byte_weight;
+ dev->has_notifier = false;
init_llist_head(&dev->work_list);
init_waitqueue_head(&dev->wait);
INIT_LIST_HEAD(&dev->read_list);
@@ -730,6 +731,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
if (err)
goto err_mmu_notifier;
#endif
+ dev->has_notifier = true;
return 0;
@@ -959,7 +961,11 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
}
if (dev->mm) {
#if VHOST_ARCH_CAN_ACCEL_UACCESS
- mmu_notifier_unregister(&dev->mmu_notifier, dev->mm);
+ if (dev->has_notifier) {
+ mmu_notifier_unregister(&dev->mmu_notifier,
+ dev->mm);
+ dev->has_notifier = false;
+ }
#endif
mmput(dev->mm);
}
@@ -2064,8 +2070,10 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
/* Unregister MMU notifer to allow invalidation callback
* can access vq->uaddrs[] without holding a lock.
*/
- if (d->mm)
+ if (d->has_notifier) {
mmu_notifier_unregister(&d->mmu_notifier, d->mm);
+ d->has_notifier = false;
+ }
vhost_uninit_vq_maps(vq);
#endif
@@ -2085,8 +2093,11 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
if (r == 0)
vhost_setup_vq_uaddr(vq);
- if (d->mm)
- mmu_notifier_register(&d->mmu_notifier, d->mm);
+ if (d->mm) {
+ r = mmu_notifier_register(&d->mmu_notifier, d->mm);
+ if (!r)
+ d->has_notifier = true;
+ }
#endif
mutex_unlock(&vq->mutex);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 42a8c2a13ab1..a9a2a93857d2 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -214,6 +214,7 @@ struct vhost_dev {
int iov_limit;
int weight;
int byte_weight;
+ bool has_notifier;
};
bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
--
2.18.1
^ permalink raw reply related
* [PATCH V2 3/9] vhost: fix vhost map leak
From: Jason Wang @ 2019-07-31 8:46 UTC (permalink / raw)
To: mst, jasowang, kvm, virtualization, netdev; +Cc: linux-kernel, linux-mm, jgg
In-Reply-To: <20190731084655.7024-1-jasowang@redhat.com>
We don't free map during vhost_map_unprefetch(). This means it could
be leaked. Fixing by free the map.
Reported-by: Michael S. Tsirkin <mst@redhat.com>
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/vhost/vhost.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 17f6abea192e..2a3154976277 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -302,9 +302,7 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
static void vhost_map_unprefetch(struct vhost_map *map)
{
kfree(map->pages);
- map->pages = NULL;
- map->npages = 0;
- map->addr = NULL;
+ kfree(map);
}
static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
--
2.18.1
^ permalink raw reply related
* [PATCH V2 4/9] vhost: reset invalidate_count in vhost_set_vring_num_addr()
From: Jason Wang @ 2019-07-31 8:46 UTC (permalink / raw)
To: mst, jasowang, kvm, virtualization, netdev; +Cc: linux-kernel, linux-mm, jgg
In-Reply-To: <20190731084655.7024-1-jasowang@redhat.com>
The vhost_set_vring_num_addr() could be called in the middle of
invalidate_range_start() and invalidate_range_end(). If we don't reset
invalidate_count after the un-registering of MMU notifier, the
invalidate_cont will run out of sync (e.g never reach zero). This will
in fact disable the fast accessor path. Fixing by reset the count to
zero.
Reported-by: Michael S. Tsirkin <mst@redhat.com>
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/vhost/vhost.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 2a3154976277..2a7217c33668 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2073,6 +2073,10 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
d->has_notifier = false;
}
+ /* reset invalidate_count in case we are in the middle of
+ * invalidate_start() and invalidate_end().
+ */
+ vq->invalidate_count = 0;
vhost_uninit_vq_maps(vq);
#endif
--
2.18.1
^ permalink raw reply related
* [PATCH V2 5/9] vhost: mark dirty pages during map uninit
From: Jason Wang @ 2019-07-31 8:46 UTC (permalink / raw)
To: mst, jasowang, kvm, virtualization, netdev; +Cc: linux-kernel, linux-mm, jgg
In-Reply-To: <20190731084655.7024-1-jasowang@redhat.com>
We don't mark dirty pages if the map was teared down outside MMU
notifier. This will lead untracked dirty pages. Fixing by marking
dirty pages during map uninit.
Reported-by: Michael S. Tsirkin <mst@redhat.com>
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/vhost/vhost.c | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 2a7217c33668..c12cdadb0855 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -305,6 +305,18 @@ static void vhost_map_unprefetch(struct vhost_map *map)
kfree(map);
}
+static void vhost_set_map_dirty(struct vhost_virtqueue *vq,
+ struct vhost_map *map, int index)
+{
+ struct vhost_uaddr *uaddr = &vq->uaddrs[index];
+ int i;
+
+ if (uaddr->write) {
+ for (i = 0; i < map->npages; i++)
+ set_page_dirty(map->pages[i]);
+ }
+}
+
static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
{
struct vhost_map *map[VHOST_NUM_ADDRS];
@@ -314,8 +326,10 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
for (i = 0; i < VHOST_NUM_ADDRS; i++) {
map[i] = rcu_dereference_protected(vq->maps[i],
lockdep_is_held(&vq->mmu_lock));
- if (map[i])
+ if (map[i]) {
+ vhost_set_map_dirty(vq, map[i], i);
rcu_assign_pointer(vq->maps[i], NULL);
+ }
}
spin_unlock(&vq->mmu_lock);
@@ -353,7 +367,6 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
{
struct vhost_uaddr *uaddr = &vq->uaddrs[index];
struct vhost_map *map;
- int i;
if (!vhost_map_range_overlap(uaddr, start, end))
return;
@@ -364,10 +377,7 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
map = rcu_dereference_protected(vq->maps[index],
lockdep_is_held(&vq->mmu_lock));
if (map) {
- if (uaddr->write) {
- for (i = 0; i < map->npages; i++)
- set_page_dirty(map->pages[i]);
- }
+ vhost_set_map_dirty(vq, map, index);
rcu_assign_pointer(vq->maps[index], NULL);
}
spin_unlock(&vq->mmu_lock);
--
2.18.1
^ permalink raw reply related
* [PATCH V2 6/9] vhost: don't do synchronize_rcu() in vhost_uninit_vq_maps()
From: Jason Wang @ 2019-07-31 8:46 UTC (permalink / raw)
To: mst, jasowang, kvm, virtualization, netdev; +Cc: linux-kernel, linux-mm, jgg
In-Reply-To: <20190731084655.7024-1-jasowang@redhat.com>
There's no need for RCU synchronization in vhost_uninit_vq_maps()
since we've already serialized with readers (memory accessors). This
also avoid the possible userspace DOS through ioctl() because of the
possible high latency caused by synchronize_rcu().
Reported-by: Michael S. Tsirkin <mst@redhat.com>
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/vhost/vhost.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index c12cdadb0855..cfc11f9ed9c9 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -333,7 +333,9 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
}
spin_unlock(&vq->mmu_lock);
- synchronize_rcu();
+ /* No need for synchronize_rcu() or kfree_rcu() since we are
+ * serialized with memory accessors (e.g vq mutex held).
+ */
for (i = 0; i < VHOST_NUM_ADDRS; i++)
if (map[i])
--
2.18.1
^ permalink raw reply related
* [PATCH V2 7/9] vhost: do not use RCU to synchronize MMU notifier with worker
From: Jason Wang @ 2019-07-31 8:46 UTC (permalink / raw)
To: mst, jasowang, kvm, virtualization, netdev; +Cc: linux-kernel, linux-mm, jgg
In-Reply-To: <20190731084655.7024-1-jasowang@redhat.com>
We used to use RCU to synchronize MMU notifier with worker. This leads
calling synchronize_rcu() in invalidate_range_start(). But on a busy
system, there would be many factors that may slow down the
synchronize_rcu() which makes it unsuitable to be called in MMU
notifier.
A solution is SRCU but its overhead is obvious with the expensive full
memory barrier. Another choice is to use seqlock, but it doesn't
provide a synchronization method between readers and writers. The last
choice is to use vq mutex, but it need to deal with the worst case
that MMU notifier must be blocked and wait for the finish of swap in.
So this patch switches use a counter to track whether or not the map
was used. The counter was increased when vq try to start or finish
uses the map. This means, when it was even, we're sure there's no
readers and MMU notifier is synchronized. When it was odd, it means
there's a reader we need to wait it to be even again then we are
synchronized. To avoid full memory barrier, store_release +
load_acquire on the counter is used.
Consider the read critical section is pretty small the synchronization
should be done very fast.
Note the patch lead about 3% PPS dropping.
Reported-by: Michael S. Tsirkin <mst@redhat.com>
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/vhost/vhost.c | 145 ++++++++++++++++++++++++++----------------
drivers/vhost/vhost.h | 7 +-
2 files changed, 94 insertions(+), 58 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index cfc11f9ed9c9..db2c81cb1e90 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -324,17 +324,16 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
spin_lock(&vq->mmu_lock);
for (i = 0; i < VHOST_NUM_ADDRS; i++) {
- map[i] = rcu_dereference_protected(vq->maps[i],
- lockdep_is_held(&vq->mmu_lock));
+ map[i] = vq->maps[i];
if (map[i]) {
vhost_set_map_dirty(vq, map[i], i);
- rcu_assign_pointer(vq->maps[i], NULL);
+ vq->maps[i] = NULL;
}
}
spin_unlock(&vq->mmu_lock);
- /* No need for synchronize_rcu() or kfree_rcu() since we are
- * serialized with memory accessors (e.g vq mutex held).
+ /* No need for synchronization since we are serialized with
+ * memory accessors (e.g vq mutex held).
*/
for (i = 0; i < VHOST_NUM_ADDRS; i++)
@@ -362,6 +361,44 @@ static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr,
return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size);
}
+static void inline vhost_vq_access_map_begin(struct vhost_virtqueue *vq)
+{
+ int ref = READ_ONCE(vq->ref);
+
+ smp_store_release(&vq->ref, ref + 1);
+ /* Make sure ref counter is visible before accessing the map */
+ smp_load_acquire(&vq->ref);
+}
+
+static void inline vhost_vq_access_map_end(struct vhost_virtqueue *vq)
+{
+ int ref = READ_ONCE(vq->ref);
+
+ /* Make sure vq access is done before increasing ref counter */
+ smp_store_release(&vq->ref, ref + 1);
+}
+
+static void inline vhost_vq_sync_access(struct vhost_virtqueue *vq)
+{
+ int ref;
+
+ /* Make sure map change was done before checking ref counter */
+ smp_mb();
+
+ ref = READ_ONCE(vq->ref);
+ if (ref & 0x1) {
+ /* When ref change, we are sure no reader can see
+ * previous map */
+ while (READ_ONCE(vq->ref) == ref) {
+ set_current_state(TASK_RUNNING);
+ schedule();
+ }
+ }
+ /* Make sure ref counter was checked before any other
+ * operations that was dene on map. */
+ smp_mb();
+}
+
static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
int index,
unsigned long start,
@@ -376,16 +413,15 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
spin_lock(&vq->mmu_lock);
++vq->invalidate_count;
- map = rcu_dereference_protected(vq->maps[index],
- lockdep_is_held(&vq->mmu_lock));
+ map = vq->maps[index];
if (map) {
vhost_set_map_dirty(vq, map, index);
- rcu_assign_pointer(vq->maps[index], NULL);
+ vq->maps[index] = NULL;
}
spin_unlock(&vq->mmu_lock);
if (map) {
- synchronize_rcu();
+ vhost_vq_sync_access(vq);
vhost_map_unprefetch(map);
}
}
@@ -457,7 +493,7 @@ static void vhost_init_maps(struct vhost_dev *dev)
for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i];
for (j = 0; j < VHOST_NUM_ADDRS; j++)
- RCU_INIT_POINTER(vq->maps[j], NULL);
+ vq->maps[j] = NULL;
}
}
#endif
@@ -655,6 +691,7 @@ void vhost_dev_init(struct vhost_dev *dev,
vq->indirect = NULL;
vq->heads = NULL;
vq->dev = dev;
+ vq->ref = 0;
mutex_init(&vq->mutex);
spin_lock_init(&vq->mmu_lock);
vhost_vq_reset(dev, vq);
@@ -921,7 +958,7 @@ static int vhost_map_prefetch(struct vhost_virtqueue *vq,
map->npages = npages;
map->pages = pages;
- rcu_assign_pointer(vq->maps[index], map);
+ vq->maps[index] = map;
/* No need for a synchronize_rcu(). This function should be
* called by dev->worker so we are serialized with all
* readers.
@@ -1216,18 +1253,18 @@ static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
struct vring_used *used;
if (!vq->iotlb) {
- rcu_read_lock();
+ vhost_vq_access_map_begin(vq);
- map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
+ map = vq->maps[VHOST_ADDR_USED];
if (likely(map)) {
used = map->addr;
*((__virtio16 *)&used->ring[vq->num]) =
cpu_to_vhost16(vq, vq->avail_idx);
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
return 0;
}
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
}
#endif
@@ -1245,18 +1282,18 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
size_t size;
if (!vq->iotlb) {
- rcu_read_lock();
+ vhost_vq_access_map_begin(vq);
- map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
+ map = vq->maps[VHOST_ADDR_USED];
if (likely(map)) {
used = map->addr;
size = count * sizeof(*head);
memcpy(used->ring + idx, head, size);
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
return 0;
}
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
}
#endif
@@ -1272,17 +1309,17 @@ static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
struct vring_used *used;
if (!vq->iotlb) {
- rcu_read_lock();
+ vhost_vq_access_map_begin(vq);
- map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
+ map = vq->maps[VHOST_ADDR_USED];
if (likely(map)) {
used = map->addr;
used->flags = cpu_to_vhost16(vq, vq->used_flags);
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
return 0;
}
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
}
#endif
@@ -1298,17 +1335,17 @@ static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
struct vring_used *used;
if (!vq->iotlb) {
- rcu_read_lock();
+ vhost_vq_access_map_begin(vq);
- map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
+ map = vq->maps[VHOST_ADDR_USED];
if (likely(map)) {
used = map->addr;
used->idx = cpu_to_vhost16(vq, vq->last_used_idx);
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
return 0;
}
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
}
#endif
@@ -1362,17 +1399,17 @@ static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
struct vring_avail *avail;
if (!vq->iotlb) {
- rcu_read_lock();
+ vhost_vq_access_map_begin(vq);
- map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
+ map = vq->maps[VHOST_ADDR_AVAIL];
if (likely(map)) {
avail = map->addr;
*idx = avail->idx;
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
return 0;
}
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
}
#endif
@@ -1387,17 +1424,17 @@ static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
struct vring_avail *avail;
if (!vq->iotlb) {
- rcu_read_lock();
+ vhost_vq_access_map_begin(vq);
- map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
+ map = vq->maps[VHOST_ADDR_AVAIL];
if (likely(map)) {
avail = map->addr;
*head = avail->ring[idx & (vq->num - 1)];
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
return 0;
}
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
}
#endif
@@ -1413,17 +1450,17 @@ static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq,
struct vring_avail *avail;
if (!vq->iotlb) {
- rcu_read_lock();
+ vhost_vq_access_map_begin(vq);
- map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
+ map = vq->maps[VHOST_ADDR_AVAIL];
if (likely(map)) {
avail = map->addr;
*flags = avail->flags;
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
return 0;
}
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
}
#endif
@@ -1438,15 +1475,15 @@ static inline int vhost_get_used_event(struct vhost_virtqueue *vq,
struct vring_avail *avail;
if (!vq->iotlb) {
- rcu_read_lock();
- map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
+ vhost_vq_access_map_begin(vq);
+ map = vq->maps[VHOST_ADDR_AVAIL];
if (likely(map)) {
avail = map->addr;
*event = (__virtio16)avail->ring[vq->num];
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
return 0;
}
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
}
#endif
@@ -1461,17 +1498,17 @@ static inline int vhost_get_used_idx(struct vhost_virtqueue *vq,
struct vring_used *used;
if (!vq->iotlb) {
- rcu_read_lock();
+ vhost_vq_access_map_begin(vq);
- map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
+ map = vq->maps[VHOST_ADDR_USED];
if (likely(map)) {
used = map->addr;
*idx = used->idx;
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
return 0;
}
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
}
#endif
@@ -1486,17 +1523,17 @@ static inline int vhost_get_desc(struct vhost_virtqueue *vq,
struct vring_desc *d;
if (!vq->iotlb) {
- rcu_read_lock();
+ vhost_vq_access_map_begin(vq);
- map = rcu_dereference(vq->maps[VHOST_ADDR_DESC]);
+ map = vq->maps[VHOST_ADDR_DESC];
if (likely(map)) {
d = map->addr;
*desc = *(d + idx);
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
return 0;
}
- rcu_read_unlock();
+ vhost_vq_access_map_end(vq);
}
#endif
@@ -1843,13 +1880,11 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq,
#if VHOST_ARCH_CAN_ACCEL_UACCESS
static void vhost_vq_map_prefetch(struct vhost_virtqueue *vq)
{
- struct vhost_map __rcu *map;
+ struct vhost_map *map;
int i;
for (i = 0; i < VHOST_NUM_ADDRS; i++) {
- rcu_read_lock();
- map = rcu_dereference(vq->maps[i]);
- rcu_read_unlock();
+ map = vq->maps[i];
if (unlikely(!map))
vhost_map_prefetch(vq, i);
}
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index a9a2a93857d2..f9e9558a529d 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -115,16 +115,17 @@ struct vhost_virtqueue {
#if VHOST_ARCH_CAN_ACCEL_UACCESS
/* Read by memory accessors, modified by meta data
* prefetching, MMU notifier and vring ioctl().
- * Synchonrized through mmu_lock (writers) and RCU (writers
- * and readers).
+ * Synchonrized through mmu_lock (writers) and ref counters,
+ * see vhost_vq_access_map_begin()/vhost_vq_access_map_end().
*/
- struct vhost_map __rcu *maps[VHOST_NUM_ADDRS];
+ struct vhost_map *maps[VHOST_NUM_ADDRS];
/* Read by MMU notifier, modified by vring ioctl(),
* synchronized through MMU notifier
* registering/unregistering.
*/
struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS];
#endif
+ int ref;
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick;
--
2.18.1
^ permalink raw reply related
* [PATCH V2 8/9] vhost: correctly set dirty pages in MMU notifiers callback
From: Jason Wang @ 2019-07-31 8:46 UTC (permalink / raw)
To: mst, jasowang, kvm, virtualization, netdev; +Cc: linux-kernel, linux-mm, jgg
In-Reply-To: <20190731084655.7024-1-jasowang@redhat.com>
We need make sure there's no reference on the map before trying to
mark set dirty pages.
Reported-by: Michael S. Tsirkin <mst@redhat.com>
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/vhost/vhost.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index db2c81cb1e90..fc2da8a0c671 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -414,14 +414,13 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
++vq->invalidate_count;
map = vq->maps[index];
- if (map) {
- vhost_set_map_dirty(vq, map, index);
+ if (map)
vq->maps[index] = NULL;
- }
spin_unlock(&vq->mmu_lock);
if (map) {
vhost_vq_sync_access(vq);
+ vhost_set_map_dirty(vq, map, index);
vhost_map_unprefetch(map);
}
}
--
2.18.1
^ permalink raw reply related
* [PATCH V2 9/9] vhost: do not return -EAGIAN for non blocking invalidation too early
From: Jason Wang @ 2019-07-31 8:46 UTC (permalink / raw)
To: mst, jasowang, kvm, virtualization, netdev; +Cc: linux-kernel, linux-mm, jgg
In-Reply-To: <20190731084655.7024-1-jasowang@redhat.com>
Instead of returning -EAGAIN unconditionally, we'd better do that only
we're sure the range is overlapped with the metadata area.
Reported-by: Jason Gunthorpe <jgg@ziepe.ca>
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/vhost/vhost.c | 32 +++++++++++++++++++-------------
1 file changed, 19 insertions(+), 13 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index fc2da8a0c671..96c6aeb1871f 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -399,16 +399,19 @@ static void inline vhost_vq_sync_access(struct vhost_virtqueue *vq)
smp_mb();
}
-static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
- int index,
- unsigned long start,
- unsigned long end)
+static int vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
+ int index,
+ unsigned long start,
+ unsigned long end,
+ bool blockable)
{
struct vhost_uaddr *uaddr = &vq->uaddrs[index];
struct vhost_map *map;
if (!vhost_map_range_overlap(uaddr, start, end))
- return;
+ return 0;
+ else if (!blockable)
+ return -EAGAIN;
spin_lock(&vq->mmu_lock);
++vq->invalidate_count;
@@ -423,6 +426,8 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
vhost_set_map_dirty(vq, map, index);
vhost_map_unprefetch(map);
}
+
+ return 0;
}
static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq,
@@ -443,18 +448,19 @@ static int vhost_invalidate_range_start(struct mmu_notifier *mn,
{
struct vhost_dev *dev = container_of(mn, struct vhost_dev,
mmu_notifier);
- int i, j;
-
- if (!mmu_notifier_range_blockable(range))
- return -EAGAIN;
+ bool blockable = mmu_notifier_range_blockable(range);
+ int i, j, ret;
for (i = 0; i < dev->nvqs; i++) {
struct vhost_virtqueue *vq = dev->vqs[i];
- for (j = 0; j < VHOST_NUM_ADDRS; j++)
- vhost_invalidate_vq_start(vq, j,
- range->start,
- range->end);
+ for (j = 0; j < VHOST_NUM_ADDRS; j++) {
+ ret = vhost_invalidate_vq_start(vq, j,
+ range->start,
+ range->end, blockable);
+ if (ret)
+ return ret;
+ }
}
return 0;
--
2.18.1
^ permalink raw reply related
* Re: [PATCH V2 7/9] vhost: do not use RCU to synchronize MMU notifier with worker
From: Jason Wang @ 2019-07-31 8:50 UTC (permalink / raw)
To: mst, kvm, virtualization, netdev; +Cc: linux-kernel, linux-mm, jgg
In-Reply-To: <20190731084655.7024-8-jasowang@redhat.com>
On 2019/7/31 下午4:46, Jason Wang wrote:
> We used to use RCU to synchronize MMU notifier with worker. This leads
> calling synchronize_rcu() in invalidate_range_start(). But on a busy
> system, there would be many factors that may slow down the
> synchronize_rcu() which makes it unsuitable to be called in MMU
> notifier.
>
> A solution is SRCU but its overhead is obvious with the expensive full
> memory barrier. Another choice is to use seqlock, but it doesn't
> provide a synchronization method between readers and writers. The last
> choice is to use vq mutex, but it need to deal with the worst case
> that MMU notifier must be blocked and wait for the finish of swap in.
>
> So this patch switches use a counter to track whether or not the map
> was used. The counter was increased when vq try to start or finish
> uses the map. This means, when it was even, we're sure there's no
> readers and MMU notifier is synchronized. When it was odd, it means
> there's a reader we need to wait it to be even again then we are
> synchronized. To avoid full memory barrier, store_release +
> load_acquire on the counter is used.
For reviewers, I try hard to avoid e.g smp_mb(), please double check
whether or not this trick work.
Thanks
>
> Consider the read critical section is pretty small the synchronization
> should be done very fast.
>
> Note the patch lead about 3% PPS dropping.
>
> Reported-by: Michael S. Tsirkin <mst@redhat.com>
> Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> drivers/vhost/vhost.c | 145 ++++++++++++++++++++++++++----------------
> drivers/vhost/vhost.h | 7 +-
> 2 files changed, 94 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index cfc11f9ed9c9..db2c81cb1e90 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -324,17 +324,16 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
>
> spin_lock(&vq->mmu_lock);
> for (i = 0; i < VHOST_NUM_ADDRS; i++) {
> - map[i] = rcu_dereference_protected(vq->maps[i],
> - lockdep_is_held(&vq->mmu_lock));
> + map[i] = vq->maps[i];
> if (map[i]) {
> vhost_set_map_dirty(vq, map[i], i);
> - rcu_assign_pointer(vq->maps[i], NULL);
> + vq->maps[i] = NULL;
> }
> }
> spin_unlock(&vq->mmu_lock);
>
> - /* No need for synchronize_rcu() or kfree_rcu() since we are
> - * serialized with memory accessors (e.g vq mutex held).
> + /* No need for synchronization since we are serialized with
> + * memory accessors (e.g vq mutex held).
> */
>
> for (i = 0; i < VHOST_NUM_ADDRS; i++)
> @@ -362,6 +361,44 @@ static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr,
> return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size);
> }
>
> +static void inline vhost_vq_access_map_begin(struct vhost_virtqueue *vq)
> +{
> + int ref = READ_ONCE(vq->ref);
> +
> + smp_store_release(&vq->ref, ref + 1);
> + /* Make sure ref counter is visible before accessing the map */
> + smp_load_acquire(&vq->ref);
> +}
> +
> +static void inline vhost_vq_access_map_end(struct vhost_virtqueue *vq)
> +{
> + int ref = READ_ONCE(vq->ref);
> +
> + /* Make sure vq access is done before increasing ref counter */
> + smp_store_release(&vq->ref, ref + 1);
> +}
> +
> +static void inline vhost_vq_sync_access(struct vhost_virtqueue *vq)
> +{
> + int ref;
> +
> + /* Make sure map change was done before checking ref counter */
> + smp_mb();
> +
> + ref = READ_ONCE(vq->ref);
> + if (ref & 0x1) {
> + /* When ref change, we are sure no reader can see
> + * previous map */
> + while (READ_ONCE(vq->ref) == ref) {
> + set_current_state(TASK_RUNNING);
> + schedule();
> + }
> + }
> + /* Make sure ref counter was checked before any other
> + * operations that was dene on map. */
> + smp_mb();
> +}
> +
> static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
> int index,
> unsigned long start,
> @@ -376,16 +413,15 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
> spin_lock(&vq->mmu_lock);
> ++vq->invalidate_count;
>
> - map = rcu_dereference_protected(vq->maps[index],
> - lockdep_is_held(&vq->mmu_lock));
> + map = vq->maps[index];
> if (map) {
> vhost_set_map_dirty(vq, map, index);
> - rcu_assign_pointer(vq->maps[index], NULL);
> + vq->maps[index] = NULL;
> }
> spin_unlock(&vq->mmu_lock);
>
> if (map) {
> - synchronize_rcu();
> + vhost_vq_sync_access(vq);
> vhost_map_unprefetch(map);
> }
> }
> @@ -457,7 +493,7 @@ static void vhost_init_maps(struct vhost_dev *dev)
> for (i = 0; i < dev->nvqs; ++i) {
> vq = dev->vqs[i];
> for (j = 0; j < VHOST_NUM_ADDRS; j++)
> - RCU_INIT_POINTER(vq->maps[j], NULL);
> + vq->maps[j] = NULL;
> }
> }
> #endif
> @@ -655,6 +691,7 @@ void vhost_dev_init(struct vhost_dev *dev,
> vq->indirect = NULL;
> vq->heads = NULL;
> vq->dev = dev;
> + vq->ref = 0;
> mutex_init(&vq->mutex);
> spin_lock_init(&vq->mmu_lock);
> vhost_vq_reset(dev, vq);
> @@ -921,7 +958,7 @@ static int vhost_map_prefetch(struct vhost_virtqueue *vq,
> map->npages = npages;
> map->pages = pages;
>
> - rcu_assign_pointer(vq->maps[index], map);
> + vq->maps[index] = map;
> /* No need for a synchronize_rcu(). This function should be
> * called by dev->worker so we are serialized with all
> * readers.
> @@ -1216,18 +1253,18 @@ static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
> struct vring_used *used;
>
> if (!vq->iotlb) {
> - rcu_read_lock();
> + vhost_vq_access_map_begin(vq);
>
> - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
> + map = vq->maps[VHOST_ADDR_USED];
> if (likely(map)) {
> used = map->addr;
> *((__virtio16 *)&used->ring[vq->num]) =
> cpu_to_vhost16(vq, vq->avail_idx);
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> return 0;
> }
>
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> }
> #endif
>
> @@ -1245,18 +1282,18 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
> size_t size;
>
> if (!vq->iotlb) {
> - rcu_read_lock();
> + vhost_vq_access_map_begin(vq);
>
> - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
> + map = vq->maps[VHOST_ADDR_USED];
> if (likely(map)) {
> used = map->addr;
> size = count * sizeof(*head);
> memcpy(used->ring + idx, head, size);
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> return 0;
> }
>
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> }
> #endif
>
> @@ -1272,17 +1309,17 @@ static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
> struct vring_used *used;
>
> if (!vq->iotlb) {
> - rcu_read_lock();
> + vhost_vq_access_map_begin(vq);
>
> - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
> + map = vq->maps[VHOST_ADDR_USED];
> if (likely(map)) {
> used = map->addr;
> used->flags = cpu_to_vhost16(vq, vq->used_flags);
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> return 0;
> }
>
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> }
> #endif
>
> @@ -1298,17 +1335,17 @@ static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
> struct vring_used *used;
>
> if (!vq->iotlb) {
> - rcu_read_lock();
> + vhost_vq_access_map_begin(vq);
>
> - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
> + map = vq->maps[VHOST_ADDR_USED];
> if (likely(map)) {
> used = map->addr;
> used->idx = cpu_to_vhost16(vq, vq->last_used_idx);
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> return 0;
> }
>
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> }
> #endif
>
> @@ -1362,17 +1399,17 @@ static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
> struct vring_avail *avail;
>
> if (!vq->iotlb) {
> - rcu_read_lock();
> + vhost_vq_access_map_begin(vq);
>
> - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
> + map = vq->maps[VHOST_ADDR_AVAIL];
> if (likely(map)) {
> avail = map->addr;
> *idx = avail->idx;
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> return 0;
> }
>
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> }
> #endif
>
> @@ -1387,17 +1424,17 @@ static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
> struct vring_avail *avail;
>
> if (!vq->iotlb) {
> - rcu_read_lock();
> + vhost_vq_access_map_begin(vq);
>
> - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
> + map = vq->maps[VHOST_ADDR_AVAIL];
> if (likely(map)) {
> avail = map->addr;
> *head = avail->ring[idx & (vq->num - 1)];
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> return 0;
> }
>
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> }
> #endif
>
> @@ -1413,17 +1450,17 @@ static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq,
> struct vring_avail *avail;
>
> if (!vq->iotlb) {
> - rcu_read_lock();
> + vhost_vq_access_map_begin(vq);
>
> - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
> + map = vq->maps[VHOST_ADDR_AVAIL];
> if (likely(map)) {
> avail = map->addr;
> *flags = avail->flags;
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> return 0;
> }
>
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> }
> #endif
>
> @@ -1438,15 +1475,15 @@ static inline int vhost_get_used_event(struct vhost_virtqueue *vq,
> struct vring_avail *avail;
>
> if (!vq->iotlb) {
> - rcu_read_lock();
> - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
> + vhost_vq_access_map_begin(vq);
> + map = vq->maps[VHOST_ADDR_AVAIL];
> if (likely(map)) {
> avail = map->addr;
> *event = (__virtio16)avail->ring[vq->num];
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> return 0;
> }
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> }
> #endif
>
> @@ -1461,17 +1498,17 @@ static inline int vhost_get_used_idx(struct vhost_virtqueue *vq,
> struct vring_used *used;
>
> if (!vq->iotlb) {
> - rcu_read_lock();
> + vhost_vq_access_map_begin(vq);
>
> - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
> + map = vq->maps[VHOST_ADDR_USED];
> if (likely(map)) {
> used = map->addr;
> *idx = used->idx;
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> return 0;
> }
>
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> }
> #endif
>
> @@ -1486,17 +1523,17 @@ static inline int vhost_get_desc(struct vhost_virtqueue *vq,
> struct vring_desc *d;
>
> if (!vq->iotlb) {
> - rcu_read_lock();
> + vhost_vq_access_map_begin(vq);
>
> - map = rcu_dereference(vq->maps[VHOST_ADDR_DESC]);
> + map = vq->maps[VHOST_ADDR_DESC];
> if (likely(map)) {
> d = map->addr;
> *desc = *(d + idx);
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> return 0;
> }
>
> - rcu_read_unlock();
> + vhost_vq_access_map_end(vq);
> }
> #endif
>
> @@ -1843,13 +1880,11 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq,
> #if VHOST_ARCH_CAN_ACCEL_UACCESS
> static void vhost_vq_map_prefetch(struct vhost_virtqueue *vq)
> {
> - struct vhost_map __rcu *map;
> + struct vhost_map *map;
> int i;
>
> for (i = 0; i < VHOST_NUM_ADDRS; i++) {
> - rcu_read_lock();
> - map = rcu_dereference(vq->maps[i]);
> - rcu_read_unlock();
> + map = vq->maps[i];
> if (unlikely(!map))
> vhost_map_prefetch(vq, i);
> }
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index a9a2a93857d2..f9e9558a529d 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -115,16 +115,17 @@ struct vhost_virtqueue {
> #if VHOST_ARCH_CAN_ACCEL_UACCESS
> /* Read by memory accessors, modified by meta data
> * prefetching, MMU notifier and vring ioctl().
> - * Synchonrized through mmu_lock (writers) and RCU (writers
> - * and readers).
> + * Synchonrized through mmu_lock (writers) and ref counters,
> + * see vhost_vq_access_map_begin()/vhost_vq_access_map_end().
> */
> - struct vhost_map __rcu *maps[VHOST_NUM_ADDRS];
> + struct vhost_map *maps[VHOST_NUM_ADDRS];
> /* Read by MMU notifier, modified by vring ioctl(),
> * synchronized through MMU notifier
> * registering/unregistering.
> */
> struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS];
> #endif
> + int ref;
> const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
>
> struct file *kick;
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox