Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next 7/7] net/mlx4_core: Support more than 64 VFs
From: Or Gerlitz @ 2014-11-13 12:45 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Matan Barak, Amir Vadai, Jack Morgenstein, Or Gerlitz
In-Reply-To: <1415882733-3084-1-git-send-email-ogerlitz@mellanox.com>

From: Matan Barak <matanb@mellanox.com>

We now allow up to 126 VFs. Note though that certain firmware
versions only allow up to 80 VFs. Moreover, old HCAs only support 64 VFs.
In these cases, we limit the maximum number of VFs to 64.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c   |    5 ++++-
 drivers/net/ethernet/mellanox/mlx4/main.c |   24 ++++++++++++++++++++++++
 include/linux/mlx4/device.h               |    5 +++--
 3 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index d2f594f..4251f81 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -143,7 +143,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[14] = "Ethernet protocol control support",
 		[15] = "Ethernet Backplane autoneg support",
 		[16] = "CONFIG DEV support",
-		[17] = "Asymmetric EQs support"
+		[17] = "Asymmetric EQs support",
+		[18] = "More than 80 VFs support"
 	};
 	int i;
 
@@ -860,6 +861,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL;
 	if (field32 & (1 << 20))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM;
+	if (field32 & (1 << 21))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_80_VFS;
 
 	if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
 		for (i = 1; i <= dev_cap->num_ports; ++i) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index ebb2790..3044f9e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2373,6 +2373,24 @@ disable_sriov:
 	return dev_flags & ~MLX4_FLAG_MASTER;
 }
 
+enum {
+	MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1,
+};
+
+static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
+			      int *nvfs)
+{
+	int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2];
+	/* Checking for 64 VFs as a limitation of CX2 */
+	if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) &&
+	    requested_vfs >= 64) {
+		mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n",
+			 requested_vfs);
+		return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64;
+	}
+	return 0;
+}
+
 static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 			 int total_vfs, int *nvfs, struct mlx4_priv *priv)
 {
@@ -2484,6 +2502,9 @@ slave_start:
 				goto err_fw;
 			}
 
+			if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
+				goto err_fw;
+
 			if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
 				u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
 								  existing_vfs);
@@ -2512,6 +2533,9 @@ slave_start:
 				mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
 				goto err_fw;
 			}
+
+			if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
+				goto err_fw;
 		}
 	}
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1c560eb..cf09e65 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -95,7 +95,7 @@ enum {
 
 enum {
 	MLX4_MAX_NUM_PF		= 16,
-	MLX4_MAX_NUM_VF		= 64,
+	MLX4_MAX_NUM_VF		= 126,
 	MLX4_MAX_NUM_VF_P_PORT  = 64,
 	MLX4_MFUNC_MAX		= 80,
 	MLX4_MAX_EQ_NUM		= 1024,
@@ -190,7 +190,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL        = 1LL <<  14,
 	MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP	= 1LL <<  15,
 	MLX4_DEV_CAP_FLAG2_CONFIG_DEV		= 1LL <<  16,
-	MLX4_DEV_CAP_FLAG2_SYS_EQS		= 1LL <<  17
+	MLX4_DEV_CAP_FLAG2_SYS_EQS		= 1LL <<  17,
+	MLX4_DEV_CAP_FLAG2_80_VFS		= 1LL <<  18
 };
 
 enum {
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next] rhashtable: Drop gfp_flags arg in insert/remove functions
From: Thomas Graf @ 2014-11-13 12:45 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, ebiederm, eric.dumazet

Reallocation is only required for shrinking and expanding and both rely
on a mutex for synchronization and callers of rhashtable_init() are in
non atomic context. Therefore, no reason to continue passing allocation
hints through the API.

Instead, use GFP_KERNEL and add __GFP_NOWARN | __GFP_NORETRY to allow
for silent fall back to vzalloc() without the OOM killer jumping in as
pointed out by Eric Dumazet and Eric W. Biederman.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
---
 include/linux/rhashtable.h | 10 +++++-----
 lib/rhashtable.c           | 41 +++++++++++++++++------------------------
 net/netfilter/nft_hash.c   |  4 ++--
 net/netlink/af_netlink.c   |  4 ++--
 4 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index fb298e9d..942fa44 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -96,16 +96,16 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params);
 u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len);
 u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr);
 
-void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node, gfp_t);
-bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node, gfp_t);
+void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node);
+bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node);
 void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
-			     struct rhash_head __rcu **pprev, gfp_t flags);
+			     struct rhash_head __rcu **pprev);
 
 bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size);
 bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);
 
-int rhashtable_expand(struct rhashtable *ht, gfp_t flags);
-int rhashtable_shrink(struct rhashtable *ht, gfp_t flags);
+int rhashtable_expand(struct rhashtable *ht);
+int rhashtable_shrink(struct rhashtable *ht);
 
 void *rhashtable_lookup(const struct rhashtable *ht, const void *key);
 void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash,
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 081be3b..088e7e5 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -107,13 +107,13 @@ static u32 head_hashfn(const struct rhashtable *ht,
 	return obj_hashfn(ht, rht_obj(ht, he), hsize);
 }
 
-static struct bucket_table *bucket_table_alloc(size_t nbuckets, gfp_t flags)
+static struct bucket_table *bucket_table_alloc(size_t nbuckets)
 {
 	struct bucket_table *tbl;
 	size_t size;
 
 	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
-	tbl = kzalloc(size, flags);
+	tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
 	if (tbl == NULL)
 		tbl = vzalloc(size);
 
@@ -200,7 +200,6 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
 /**
  * rhashtable_expand - Expand hash table while allowing concurrent lookups
  * @ht:		the hash table to expand
- * @flags:	allocation flags
  *
  * A secondary bucket array is allocated and the hash entries are migrated
  * while keeping them on both lists until the end of the RCU grace period.
@@ -211,7 +210,7 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
  * The caller must ensure that no concurrent table mutations take place.
  * It is however valid to have concurrent lookups if they are RCU protected.
  */
-int rhashtable_expand(struct rhashtable *ht, gfp_t flags)
+int rhashtable_expand(struct rhashtable *ht)
 {
 	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
 	struct rhash_head *he;
@@ -223,7 +222,7 @@ int rhashtable_expand(struct rhashtable *ht, gfp_t flags)
 	if (ht->p.max_shift && ht->shift >= ht->p.max_shift)
 		return 0;
 
-	new_tbl = bucket_table_alloc(old_tbl->size * 2, flags);
+	new_tbl = bucket_table_alloc(old_tbl->size * 2);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
@@ -281,7 +280,6 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
 /**
  * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
  * @ht:		the hash table to shrink
- * @flags:	allocation flags
  *
  * This function may only be called in a context where it is safe to call
  * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
@@ -289,7 +287,7 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
  * The caller must ensure that no concurrent table mutations take place.
  * It is however valid to have concurrent lookups if they are RCU protected.
  */
-int rhashtable_shrink(struct rhashtable *ht, gfp_t flags)
+int rhashtable_shrink(struct rhashtable *ht)
 {
 	struct bucket_table *ntbl, *tbl = rht_dereference(ht->tbl, ht);
 	struct rhash_head __rcu **pprev;
@@ -300,7 +298,7 @@ int rhashtable_shrink(struct rhashtable *ht, gfp_t flags)
 	if (ht->shift <= ht->p.min_shift)
 		return 0;
 
-	ntbl = bucket_table_alloc(tbl->size / 2, flags);
+	ntbl = bucket_table_alloc(tbl->size / 2);
 	if (ntbl == NULL)
 		return -ENOMEM;
 
@@ -341,7 +339,6 @@ EXPORT_SYMBOL_GPL(rhashtable_shrink);
  * rhashtable_insert - insert object into hash hash table
  * @ht:		hash table
  * @obj:	pointer to hash head inside object
- * @flags:	allocation flags (table expansion)
  *
  * Will automatically grow the table via rhashtable_expand() if the the
  * grow_decision function specified at rhashtable_init() returns true.
@@ -349,8 +346,7 @@ EXPORT_SYMBOL_GPL(rhashtable_shrink);
  * The caller must ensure that no concurrent table mutations occur. It is
  * however valid to have concurrent lookups if they are RCU protected.
  */
-void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
-		       gfp_t flags)
+void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 {
 	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
 	u32 hash;
@@ -363,7 +359,7 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
 	ht->nelems++;
 
 	if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
-		rhashtable_expand(ht, flags);
+		rhashtable_expand(ht);
 }
 EXPORT_SYMBOL_GPL(rhashtable_insert);
 
@@ -372,14 +368,13 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
  * @ht:		hash table
  * @obj:	pointer to hash head inside object
  * @pprev:	pointer to previous element
- * @flags:	allocation flags (table expansion)
  *
  * Identical to rhashtable_remove() but caller is alreayd aware of the element
  * in front of the element to be deleted. This is in particular useful for
  * deletion when combined with walking or lookup.
  */
 void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
-			     struct rhash_head __rcu **pprev, gfp_t flags)
+			     struct rhash_head __rcu **pprev)
 {
 	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
 
@@ -390,7 +385,7 @@ void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
 
 	if (ht->p.shrink_decision &&
 	    ht->p.shrink_decision(ht, tbl->size))
-		rhashtable_shrink(ht, flags);
+		rhashtable_shrink(ht);
 }
 EXPORT_SYMBOL_GPL(rhashtable_remove_pprev);
 
@@ -398,7 +393,6 @@ EXPORT_SYMBOL_GPL(rhashtable_remove_pprev);
  * rhashtable_remove - remove object from hash table
  * @ht:		hash table
  * @obj:	pointer to hash head inside object
- * @flags:	allocation flags (table expansion)
  *
  * Since the hash chain is single linked, the removal operation needs to
  * walk the bucket chain upon removal. The removal operation is thus
@@ -410,8 +404,7 @@ EXPORT_SYMBOL_GPL(rhashtable_remove_pprev);
  * The caller must ensure that no concurrent table mutations occur. It is
  * however valid to have concurrent lookups if they are RCU protected.
  */
-bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj,
-		       gfp_t flags)
+bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 {
 	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
 	struct rhash_head __rcu **pprev;
@@ -429,7 +422,7 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj,
 			continue;
 		}
 
-		rhashtable_remove_pprev(ht, he, pprev, flags);
+		rhashtable_remove_pprev(ht, he, pprev);
 		return true;
 	}
 
@@ -572,7 +565,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	if (params->nelem_hint)
 		size = rounded_hashtable_size(params);
 
-	tbl = bucket_table_alloc(size, GFP_KERNEL);
+	tbl = bucket_table_alloc(size);
 	if (tbl == NULL)
 		return -ENOMEM;
 
@@ -707,7 +700,7 @@ static int __init test_rhashtable(struct rhashtable *ht)
 		obj->ptr = TEST_PTR;
 		obj->value = i * 2;
 
-		rhashtable_insert(ht, &obj->node, GFP_KERNEL);
+		rhashtable_insert(ht, &obj->node);
 	}
 
 	rcu_read_lock();
@@ -718,7 +711,7 @@ static int __init test_rhashtable(struct rhashtable *ht)
 
 	for (i = 0; i < TEST_NEXPANDS; i++) {
 		pr_info("  Table expansion iteration %u...\n", i);
-		rhashtable_expand(ht, GFP_KERNEL);
+		rhashtable_expand(ht);
 
 		rcu_read_lock();
 		pr_info("  Verifying lookups...\n");
@@ -728,7 +721,7 @@ static int __init test_rhashtable(struct rhashtable *ht)
 
 	for (i = 0; i < TEST_NEXPANDS; i++) {
 		pr_info("  Table shrinkage iteration %u...\n", i);
-		rhashtable_shrink(ht, GFP_KERNEL);
+		rhashtable_shrink(ht);
 
 		rcu_read_lock();
 		pr_info("  Verifying lookups...\n");
@@ -743,7 +736,7 @@ static int __init test_rhashtable(struct rhashtable *ht)
 		obj = rhashtable_lookup(ht, &key);
 		BUG_ON(!obj);
 
-		rhashtable_remove(ht, &obj->node, GFP_KERNEL);
+		rhashtable_remove(ht, &obj->node);
 		kfree(obj);
 	}
 
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 8892b7b..2501187 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -65,7 +65,7 @@ static int nft_hash_insert(const struct nft_set *set,
 	if (set->flags & NFT_SET_MAP)
 		nft_data_copy(he->data, &elem->data);
 
-	rhashtable_insert(priv, &he->node, GFP_KERNEL);
+	rhashtable_insert(priv, &he->node);
 
 	return 0;
 }
@@ -88,7 +88,7 @@ static void nft_hash_remove(const struct nft_set *set,
 	pprev = elem->cookie;
 	he = rht_dereference((*pprev), priv);
 
-	rhashtable_remove_pprev(priv, he, pprev, GFP_KERNEL);
+	rhashtable_remove_pprev(priv, he, pprev);
 
 	synchronize_rcu();
 	kfree(he);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 580b794..756fc14 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1092,7 +1092,7 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 
 	nlk_sk(sk)->portid = portid;
 	sock_hold(sk);
-	rhashtable_insert(&table->hash, &nlk_sk(sk)->node, GFP_KERNEL);
+	rhashtable_insert(&table->hash, &nlk_sk(sk)->node);
 	err = 0;
 err:
 	mutex_unlock(&nl_sk_hash_lock);
@@ -1105,7 +1105,7 @@ static void netlink_remove(struct sock *sk)
 
 	mutex_lock(&nl_sk_hash_lock);
 	table = &nl_table[sk->sk_protocol];
-	if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node, GFP_KERNEL)) {
+	if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) {
 		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
 		__sock_put(sk);
 	}
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 5/7] net/mlx4_core: Add QUERY_FUNC firmware command
From: Or Gerlitz @ 2014-11-13 12:45 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Matan Barak, Amir Vadai, Jack Morgenstein, Or Gerlitz
In-Reply-To: <1415882733-3084-1-git-send-email-ogerlitz@mellanox.com>

From: Matan Barak <matanb@mellanox.com>

QUERY_FUNC firmware command could be used in order to query the
number of EQs, reserved EQs, etc for a specific function.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c |   56 +++++++++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/fw.h |   11 ++++++
 2 files changed, 67 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index f1a6718..b3bbeb9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -177,6 +177,62 @@ int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg)
 	return err;
 }
 
+int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	u32 *outbox;
+	u8 in_modifier;
+	u8 field;
+	u16 field16;
+	int err;
+
+#define QUERY_FUNC_BUS_OFFSET			0x00
+#define QUERY_FUNC_DEVICE_OFFSET		0x01
+#define QUERY_FUNC_FUNCTION_OFFSET		0x01
+#define QUERY_FUNC_PHYSICAL_FUNCTION_OFFSET	0x03
+#define QUERY_FUNC_RSVD_EQS_OFFSET		0x04
+#define QUERY_FUNC_MAX_EQ_OFFSET		0x06
+#define QUERY_FUNC_RSVD_UARS_OFFSET		0x0b
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	outbox = mailbox->buf;
+
+	in_modifier = slave;
+	mlx4_dbg(dev, "%s for VF %d\n", __func__, in_modifier);
+
+	err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, 0,
+			   MLX4_CMD_QUERY_FUNC,
+			   MLX4_CMD_TIME_CLASS_A,
+			   MLX4_CMD_NATIVE);
+	if (err)
+		goto out;
+
+	MLX4_GET(field, outbox, QUERY_FUNC_BUS_OFFSET);
+	func->bus = field & 0xf;
+	MLX4_GET(field, outbox, QUERY_FUNC_DEVICE_OFFSET);
+	func->device = field & 0xf1;
+	MLX4_GET(field, outbox, QUERY_FUNC_FUNCTION_OFFSET);
+	func->function = field & 0x7;
+	MLX4_GET(field, outbox, QUERY_FUNC_PHYSICAL_FUNCTION_OFFSET);
+	func->physical_function = field & 0xf;
+	MLX4_GET(field16, outbox, QUERY_FUNC_RSVD_EQS_OFFSET);
+	func->rsvd_eqs = field16 & 0xffff;
+	MLX4_GET(field16, outbox, QUERY_FUNC_MAX_EQ_OFFSET);
+	func->max_eq = field16 & 0xffff;
+	MLX4_GET(field, outbox, QUERY_FUNC_RSVD_UARS_OFFSET);
+	func->rsvd_uars = field & 0x0f;
+
+	mlx4_dbg(dev, "Bus: %d, Device: %d, Function: %d, Physical function: %d, Max EQs: %d, Reserved EQs: %d, Reserved UARs: %d\n",
+		 func->bus, func->device, func->function, func->physical_function,
+		 func->max_eq, func->rsvd_eqs, func->rsvd_uars);
+
+out:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+
 int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 				struct mlx4_vhcr *vhcr,
 				struct mlx4_cmd_mailbox *inbox,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 694557e..48c11b5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -145,6 +145,16 @@ struct mlx4_func_cap {
 	u64	phys_port_id;
 };
 
+struct mlx4_func {
+	int	bus;
+	int	device;
+	int	function;
+	int	physical_function;
+	int	rsvd_eqs;
+	int	max_eq;
+	int	rsvd_uars;
+};
+
 struct mlx4_adapter {
 	char board_id[MLX4_BOARD_ID_LEN];
 	u8   inta_pin;
@@ -211,6 +221,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 				struct mlx4_cmd_mailbox *inbox,
 				struct mlx4_cmd_mailbox *outbox,
 				struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave);
 int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm);
 int mlx4_UNMAP_FA(struct mlx4_dev *dev);
 int mlx4_RUN_FW(struct mlx4_dev *dev);
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 6/7] net/mlx4_core: Flexible (asymmetric) allocation of EQs and MSI-X vectors for PF/VFs
From: Or Gerlitz @ 2014-11-13 12:45 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Matan Barak, Amir Vadai, Jack Morgenstein, Or Gerlitz
In-Reply-To: <1415882733-3084-1-git-send-email-ogerlitz@mellanox.com>

From: Matan Barak <matanb@mellanox.com>

Previously, the driver queried the firmware in order to get the number
of supported EQs. Under SRIOV, since this was done before the driver
notified the firmware how many VFs it actually needs, the firmware had
to take into account a worst case scenario and always allocated four EQs
per VF, where one was used for events while the others were used for completions.

Now, when the firmware supports the asymmetric allocation scheme, denoted
by exposing num_sys_eqs > 0 (--> MLX4_DEV_CAP_FLAG2_SYS_EQS), we use the
QUERY_FUNC command to query the firmware before enabling SRIOV. Thus we
can get more EQs and MSI-X vectors per function.

Moreover, when running in the new firmware/driver mode, the limitation
that the number of EQs should be a power of two is lifted.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 drivers/infiniband/hw/mlx4/main.c            |    3 +-
 drivers/net/ethernet/mellanox/mlx4/eq.c      |    8 +-
 drivers/net/ethernet/mellanox/mlx4/fw.c      |   52 +++++++--
 drivers/net/ethernet/mellanox/mlx4/fw.h      |    2 +
 drivers/net/ethernet/mellanox/mlx4/main.c    |  144 ++++++++++++++++++++++----
 drivers/net/ethernet/mellanox/mlx4/profile.c |   19 +++-
 include/linux/mlx4/device.h                  |    4 +-
 7 files changed, 190 insertions(+), 42 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 8b72cf3..0c33755 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1975,8 +1975,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 	    dev->caps.num_ports > dev->caps.comp_pool)
 		return;
 
-	eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/
-					dev->caps.num_ports);
+	eq_per_port = dev->caps.comp_pool / dev->caps.num_ports;
 
 	/* Init eq table */
 	added_eqs = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 49290a4..d68b264 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1123,8 +1123,12 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
 		goto err_out_free;
 	}
 
-	err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs,
-			       dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0);
+	err = mlx4_bitmap_init(&priv->eq_table.bitmap,
+			       roundup_pow_of_two(dev->caps.num_eqs),
+			       dev->caps.num_eqs - 1,
+			       dev->caps.reserved_eqs,
+			       roundup_pow_of_two(dev->caps.num_eqs) -
+			       dev->caps.num_eqs);
 	if (err)
 		goto err_out_free;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index b3bbeb9..d2f594f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -142,7 +142,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[13] = "Large cache line (>64B) EQE stride support",
 		[14] = "Ethernet protocol control support",
 		[15] = "Ethernet Backplane autoneg support",
-		[16] = "CONFIG DEV support"
+		[16] = "CONFIG DEV support",
+		[17] = "Asymmetric EQs support"
 	};
 	int i;
 
@@ -200,7 +201,6 @@ int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave)
 	outbox = mailbox->buf;
 
 	in_modifier = slave;
-	mlx4_dbg(dev, "%s for VF %d\n", __func__, in_modifier);
 
 	err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, 0,
 			   MLX4_CMD_QUERY_FUNC,
@@ -243,6 +243,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 	u8	field, port;
 	u32	size, proxy_qp, qkey;
 	int	err = 0;
+	struct mlx4_func func;
 
 #define QUERY_FUNC_CAP_FLAGS_OFFSET		0x0
 #define QUERY_FUNC_CAP_NUM_PORTS_OFFSET		0x1
@@ -287,6 +288,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 #define QUERY_FUNC_CAP_VF_ENABLE_QP0		0x08
 
 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80
+#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31)
 
 	if (vhcr->op_modifier == 1) {
 		struct mlx4_active_ports actv_ports =
@@ -365,11 +367,24 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		size = dev->caps.num_cqs;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP);
 
-		size = dev->caps.num_eqs;
-		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
-
-		size = dev->caps.reserved_eqs;
-		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
+		if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) ||
+		    mlx4_QUERY_FUNC(dev, &func, slave)) {
+			size = vhcr->in_modifier &
+				QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ?
+				dev->caps.num_eqs :
+				rounddown_pow_of_two(dev->caps.num_eqs);
+			MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
+			size = dev->caps.reserved_eqs;
+			MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
+		} else {
+			size = vhcr->in_modifier &
+				QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ?
+				func.max_eq :
+				rounddown_pow_of_two(func.max_eq);
+			MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
+			size = func.rsvd_eqs;
+			MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
+		}
 
 		size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave];
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
@@ -399,14 +414,17 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
 	u8			field, op_modifier;
 	u32			size, qkey;
 	int			err = 0, quotas = 0;
+	u32                     in_modifier;
 
 	op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
+	in_modifier = op_modifier ? gen_or_port :
+		QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS;
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
 
-	err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier,
+	err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, op_modifier,
 			   MLX4_CMD_QUERY_FUNC_CAP,
 			   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 	if (err)
@@ -578,6 +596,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET		0x21
 #define QUERY_DEV_CAP_RSVD_MRW_OFFSET		0x22
 #define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET	0x23
+#define QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET		0x26
 #define QUERY_DEV_CAP_MAX_AV_OFFSET		0x27
 #define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET		0x29
 #define QUERY_DEV_CAP_MAX_RES_QP_OFFSET		0x2b
@@ -678,6 +697,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev_cap->reserved_mrws = 1 << (field & 0xf);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET);
 	dev_cap->max_mtt_seg = 1 << (field & 0x3f);
+	MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET);
+	dev_cap->num_sys_eqs = size & 0xfff;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET);
 	dev_cap->max_requester_per_qp = 1 << (field & 0x3f);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET);
@@ -905,8 +926,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	 * we can't use any EQs whose doorbell falls on that page,
 	 * even if the EQ itself isn't reserved.
 	 */
-	dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4,
-				    dev_cap->reserved_eqs);
+	if (dev_cap->num_sys_eqs == 0)
+		dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4,
+					    dev_cap->reserved_eqs);
+	else
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SYS_EQS;
 
 	mlx4_dbg(dev, "Max ICM size %lld MB\n",
 		 (unsigned long long) dev_cap->max_icm_sz >> 20);
@@ -916,8 +940,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		 dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz);
 	mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n",
 		 dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz);
-	mlx4_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n",
-		 dev_cap->max_eqs, dev_cap->reserved_eqs, dev_cap->eqc_entry_sz);
+	mlx4_dbg(dev, "Num sys EQs: %d, max EQs: %d, reserved EQs: %d, entry size: %d\n",
+		 dev_cap->num_sys_eqs, dev_cap->max_eqs, dev_cap->reserved_eqs,
+		 dev_cap->eqc_entry_sz);
 	mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n",
 		 dev_cap->reserved_mrws, dev_cap->reserved_mtts);
 	mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n",
@@ -1463,6 +1488,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 #define	 INIT_HCA_AUXC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x50)
 #define	 INIT_HCA_EQC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x60)
 #define	 INIT_HCA_LOG_EQ_OFFSET		 (INIT_HCA_QPC_OFFSET + 0x67)
+#define	INIT_HCA_NUM_SYS_EQS_OFFSET	(INIT_HCA_QPC_OFFSET + 0x6a)
 #define	 INIT_HCA_RDMARC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x70)
 #define	 INIT_HCA_LOG_RD_OFFSET		 (INIT_HCA_QPC_OFFSET + 0x77)
 #define INIT_HCA_MCAST_OFFSET		 0x0c0
@@ -1566,6 +1592,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	MLX4_PUT(inbox, param->auxc_base,     INIT_HCA_AUXC_BASE_OFFSET);
 	MLX4_PUT(inbox, param->eqc_base,      INIT_HCA_EQC_BASE_OFFSET);
 	MLX4_PUT(inbox, param->log_num_eqs,   INIT_HCA_LOG_EQ_OFFSET);
+	MLX4_PUT(inbox, param->num_sys_eqs,   INIT_HCA_NUM_SYS_EQS_OFFSET);
 	MLX4_PUT(inbox, param->rdmarc_base,   INIT_HCA_RDMARC_BASE_OFFSET);
 	MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET);
 
@@ -1676,6 +1703,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 	MLX4_GET(param->auxc_base,     outbox, INIT_HCA_AUXC_BASE_OFFSET);
 	MLX4_GET(param->eqc_base,      outbox, INIT_HCA_EQC_BASE_OFFSET);
 	MLX4_GET(param->log_num_eqs,   outbox, INIT_HCA_LOG_EQ_OFFSET);
+	MLX4_GET(param->num_sys_eqs,   outbox, INIT_HCA_NUM_SYS_EQS_OFFSET);
 	MLX4_GET(param->rdmarc_base,   outbox, INIT_HCA_RDMARC_BASE_OFFSET);
 	MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 48c11b5..475215e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -56,6 +56,7 @@ struct mlx4_dev_cap {
 	int max_mpts;
 	int reserved_eqs;
 	int max_eqs;
+	int num_sys_eqs;
 	int reserved_mtts;
 	int max_mrw_sz;
 	int reserved_mrws;
@@ -180,6 +181,7 @@ struct mlx4_init_hca_param {
 	u8  log_num_srqs;
 	u8  log_num_cqs;
 	u8  log_num_eqs;
+	u16 num_sys_eqs;
 	u8  log_rd_per_qp;
 	u8  log_mc_table_sz;
 	u8  log_mpt_sz;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 43047b2..ebb2790 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -197,6 +197,29 @@ static void mlx4_set_port_mask(struct mlx4_dev *dev)
 		dev->caps.port_mask[i] = dev->caps.port_type[i];
 }
 
+enum {
+	MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
+};
+
+static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
+{
+	int err = 0;
+	struct mlx4_func func;
+
+	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
+		err = mlx4_QUERY_FUNC(dev, &func, 0);
+		if (err) {
+			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+			return err;
+		}
+		dev_cap->max_eqs = func.max_eq;
+		dev_cap->reserved_eqs = func.rsvd_eqs;
+		dev_cap->reserved_uars = func.rsvd_uars;
+		err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
+	}
+	return err;
+}
+
 static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
 {
 	struct mlx4_caps *dev_cap = &dev->caps;
@@ -261,7 +284,10 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	}
 
 	dev->caps.num_ports	     = dev_cap->num_ports;
-	dev->phys_caps.num_phys_eqs  = MLX4_MAX_EQ_NUM;
+	dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
+	dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
+				      dev->caps.num_sys_eqs :
+				      MLX4_MAX_EQ_NUM;
 	for (i = 1; i <= dev->caps.num_ports; ++i) {
 		dev->caps.vl_cap[i]	    = dev_cap->max_vl[i];
 		dev->caps.ib_mtu_cap[i]	    = dev_cap->ib_mtu[i];
@@ -1130,8 +1156,7 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
 	if (err)
 		goto err_srq;
 
-	num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
-		  dev->caps.num_eqs;
+	num_eqs = dev->phys_caps.num_phys_eqs;
 	err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
 				  cmpt_base +
 				  ((u64) (MLX4_CMPT_TYPE_EQ *
@@ -1193,8 +1218,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 	}
 
 
-	num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
-		   dev->caps.num_eqs;
+	num_eqs = dev->phys_caps.num_phys_eqs;
 	err = mlx4_init_icm_table(dev, &priv->eq_table.table,
 				  init_hca->eqc_base, dev_cap->eqc_entry_sz,
 				  num_eqs, num_eqs, 0, 0);
@@ -1719,6 +1743,19 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 			mlx4_err(dev, "INIT_HCA command failed, aborting\n");
 			goto err_free_icm;
 		}
+
+		if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
+			err = mlx4_query_func(dev, &dev_cap);
+			if (err < 0) {
+				mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
+				goto err_stop_fw;
+			} else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
+				dev->caps.num_eqs = dev_cap.max_eqs;
+				dev->caps.reserved_eqs = dev_cap.reserved_eqs;
+				dev->caps.reserved_uars = dev_cap.reserved_uars;
+			}
+		}
+
 		/*
 		 * If TS is supported by FW
 		 * read HCA frequency by QUERY_HCA command
@@ -2085,12 +2122,11 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct msix_entry *entries;
-	int nreq = min_t(int, dev->caps.num_ports *
-			 min_t(int, num_online_cpus() + 1,
-			       MAX_MSIX_P_PORT) + MSIX_LEGACY_SZ, MAX_MSIX);
 	int i;
 
 	if (msi_x) {
+		int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ;
+
 		nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
 			     nreq);
 
@@ -2345,6 +2381,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 	int err;
 	int port;
 	int i;
+	struct mlx4_dev_cap *dev_cap = NULL;
 	int existing_vfs = 0;
 
 	dev = &priv->dev;
@@ -2381,15 +2418,6 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 			}
 		}
 
-		if (total_vfs) {
-			existing_vfs = pci_num_vf(pdev);
-			dev->flags = MLX4_FLAG_MASTER;
-			dev->flags = mlx4_enable_sriov(dev, pdev, total_vfs,
-						       existing_vfs);
-			if (!SRIOV_VALID_STATE(dev->flags))
-				goto err_sriov;
-		}
-
 		atomic_set(&priv->opreq_count, 0);
 		INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
 
@@ -2403,6 +2431,12 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 			mlx4_err(dev, "Failed to reset HCA, aborting\n");
 			goto err_sriov;
 		}
+
+		if (total_vfs) {
+			existing_vfs = pci_num_vf(pdev);
+			dev->flags = MLX4_FLAG_MASTER;
+			dev->num_vfs = total_vfs;
+		}
 	}
 
 slave_start:
@@ -2416,9 +2450,10 @@ slave_start:
 	 * before posting commands. Also, init num_slaves before calling
 	 * mlx4_init_hca */
 	if (mlx4_is_mfunc(dev)) {
-		if (mlx4_is_master(dev))
+		if (mlx4_is_master(dev)) {
 			dev->num_slaves = MLX4_MAX_NUM_SLAVES;
-		else {
+
+		} else {
 			dev->num_slaves = 0;
 			err = mlx4_multi_func_init(dev);
 			if (err) {
@@ -2434,6 +2469,52 @@ slave_start:
 		goto err_mfunc;
 	}
 
+	if (mlx4_is_master(dev)) {
+		if (!dev_cap) {
+			dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
+
+			if (!dev_cap) {
+				err = -ENOMEM;
+				goto err_fw;
+			}
+
+			err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
+			if (err) {
+				mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+				goto err_fw;
+			}
+
+			if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
+				u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
+								  existing_vfs);
+
+				mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
+				dev->flags = dev_flags;
+				if (!SRIOV_VALID_STATE(dev->flags)) {
+					mlx4_err(dev, "Invalid SRIOV state\n");
+					goto err_sriov;
+				}
+				err = mlx4_reset(dev);
+				if (err) {
+					mlx4_err(dev, "Failed to reset HCA, aborting.\n");
+					goto err_sriov;
+				}
+				goto slave_start;
+			}
+		} else {
+			/* Legacy mode FW requires SRIOV to be enabled before
+			 * doing QUERY_DEV_CAP, since max_eq's value is different if
+			 * SRIOV is enabled.
+			 */
+			memset(dev_cap, 0, sizeof(*dev_cap));
+			err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
+			if (err) {
+				mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+				goto err_fw;
+			}
+		}
+	}
+
 	err = mlx4_init_hca(dev);
 	if (err) {
 		if (err == -EACCES) {
@@ -2457,6 +2538,30 @@ slave_start:
 			goto err_fw;
 	}
 
+	if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
+		u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs);
+
+		if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
+			mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
+			dev->flags = dev_flags;
+			err = mlx4_cmd_init(dev);
+			if (err) {
+				/* Only VHCR is cleaned up, so could still
+				 * send FW commands
+				 */
+				mlx4_err(dev, "Failed to init VHCR command interface, aborting\n");
+				goto err_close;
+			}
+		} else {
+			dev->flags = dev_flags;
+		}
+
+		if (!SRIOV_VALID_STATE(dev->flags)) {
+			mlx4_err(dev, "Invalid SRIOV state\n");
+			goto err_close;
+		}
+	}
+
 	/* check if the device is functioning at its maximum possible speed.
 	 * No return code for this call, just warn the user in case of PCI
 	 * express device capabilities are under-satisfied by the bus.
@@ -2631,6 +2736,7 @@ err_sriov:
 	if (!mlx4_is_slave(dev))
 		mlx4_free_ownership(dev);
 
+	kfree(dev_cap);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c
index 14089d9..2bf437a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/profile.c
+++ b/drivers/net/ethernet/mellanox/mlx4/profile.c
@@ -126,8 +126,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 	profile[MLX4_RES_AUXC].num    = request->num_qp;
 	profile[MLX4_RES_SRQ].num     = request->num_srq;
 	profile[MLX4_RES_CQ].num      = request->num_cq;
-	profile[MLX4_RES_EQ].num      = mlx4_is_mfunc(dev) ?
-					dev->phys_caps.num_phys_eqs :
+	profile[MLX4_RES_EQ].num = mlx4_is_mfunc(dev) ? dev->phys_caps.num_phys_eqs :
 					min_t(unsigned, dev_cap->max_eqs, MAX_MSIX);
 	profile[MLX4_RES_DMPT].num    = request->num_mpt;
 	profile[MLX4_RES_CMPT].num    = MLX4_NUM_CMPTS;
@@ -216,10 +215,18 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 			init_hca->log_num_cqs = profile[i].log_num;
 			break;
 		case MLX4_RES_EQ:
-			dev->caps.num_eqs     = roundup_pow_of_two(min_t(unsigned, dev_cap->max_eqs,
-									 MAX_MSIX));
-			init_hca->eqc_base    = profile[i].start;
-			init_hca->log_num_eqs = ilog2(dev->caps.num_eqs);
+			if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
+				init_hca->log_num_eqs = 0x1f;
+				init_hca->eqc_base    = profile[i].start;
+				init_hca->num_sys_eqs = dev_cap->num_sys_eqs;
+			} else {
+				dev->caps.num_eqs     = roundup_pow_of_two(
+								min_t(unsigned,
+								      dev_cap->max_eqs,
+								      MAX_MSIX));
+				init_hca->eqc_base    = profile[i].start;
+				init_hca->log_num_eqs = ilog2(dev->caps.num_eqs);
+			}
 			break;
 		case MLX4_RES_DMPT:
 			dev->caps.num_mpts	= profile[i].num;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 3d9bff0..1c560eb 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -189,7 +189,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_EQE_STRIDE		= 1LL <<  13,
 	MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL        = 1LL <<  14,
 	MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP	= 1LL <<  15,
-	MLX4_DEV_CAP_FLAG2_CONFIG_DEV		= 1LL <<  16
+	MLX4_DEV_CAP_FLAG2_CONFIG_DEV		= 1LL <<  16,
+	MLX4_DEV_CAP_FLAG2_SYS_EQS		= 1LL <<  17
 };
 
 enum {
@@ -443,6 +444,7 @@ struct mlx4_caps {
 	int			num_cqs;
 	int			max_cqes;
 	int			reserved_cqs;
+	int			num_sys_eqs;
 	int			num_eqs;
 	int			reserved_eqs;
 	int			num_comp_vectors;
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 3/7] net/mlx4_core: Refactor mlx4_cmd_init and mlx4_cmd_cleanup
From: Or Gerlitz @ 2014-11-13 12:45 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Matan Barak, Amir Vadai, Jack Morgenstein, Or Gerlitz
In-Reply-To: <1415882733-3084-1-git-send-email-ogerlitz@mellanox.com>

From: Matan Barak <matanb@mellanox.com>

Refactoring mlx4_cmd_init and mlx4_cmd_cleanup such that partial init
and cleanup are possible. After this refactoring, calling mlx4_cmd_init
several times is safe.

This is necessary in the VF init flow when mlx4_init_hca returns -EACCESS,
we need to issue cleanup and re-attempt to call it with the slave flag.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c  |   76 +++++++++++++++++------------
 drivers/net/ethernet/mellanox/mlx4/main.c |    6 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |   11 ++++-
 3 files changed, 57 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 3c05e58..5c93d14 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2117,50 +2117,52 @@ err_vhcr:
 int mlx4_cmd_init(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
+	int flags = 0;
+
+	if (!priv->cmd.initialized) {
+		mutex_init(&priv->cmd.hcr_mutex);
+		mutex_init(&priv->cmd.slave_cmd_mutex);
+		sema_init(&priv->cmd.poll_sem, 1);
+		priv->cmd.use_events = 0;
+		priv->cmd.toggle     = 1;
+		priv->cmd.initialized = 1;
+		flags |= MLX4_CMD_CLEANUP_STRUCT;
+	}
 
-	mutex_init(&priv->cmd.hcr_mutex);
-	mutex_init(&priv->cmd.slave_cmd_mutex);
-	sema_init(&priv->cmd.poll_sem, 1);
-	priv->cmd.use_events = 0;
-	priv->cmd.toggle     = 1;
-
-	priv->cmd.hcr = NULL;
-	priv->mfunc.vhcr = NULL;
-
-	if (!mlx4_is_slave(dev)) {
+	if (!mlx4_is_slave(dev) && !priv->cmd.hcr) {
 		priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) +
 					MLX4_HCR_BASE, MLX4_HCR_SIZE);
 		if (!priv->cmd.hcr) {
 			mlx4_err(dev, "Couldn't map command register\n");
-			return -ENOMEM;
+			goto err;
 		}
+		flags |= MLX4_CMD_CLEANUP_HCR;
 	}
 
-	if (mlx4_is_mfunc(dev)) {
+	if (mlx4_is_mfunc(dev) && !priv->mfunc.vhcr) {
 		priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE,
 						      &priv->mfunc.vhcr_dma,
 						      GFP_KERNEL);
 		if (!priv->mfunc.vhcr)
-			goto err_hcr;
+			goto err;
+
+		flags |= MLX4_CMD_CLEANUP_VHCR;
 	}
 
-	priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev,
-					 MLX4_MAILBOX_SIZE,
-					 MLX4_MAILBOX_SIZE, 0);
-	if (!priv->cmd.pool)
-		goto err_vhcr;
+	if (!priv->cmd.pool) {
+		priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev,
+						 MLX4_MAILBOX_SIZE,
+						 MLX4_MAILBOX_SIZE, 0);
+		if (!priv->cmd.pool)
+			goto err;
 
-	return 0;
+		flags |= MLX4_CMD_CLEANUP_POOL;
+	}
 
-err_vhcr:
-	if (mlx4_is_mfunc(dev))
-		dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
-				  priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
-	priv->mfunc.vhcr = NULL;
+	return 0;
 
-err_hcr:
-	if (!mlx4_is_slave(dev))
-		iounmap(priv->cmd.hcr);
+err:
+	mlx4_cmd_cleanup(dev, flags);
 	return -ENOMEM;
 }
 
@@ -2184,18 +2186,28 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
 	iounmap(priv->mfunc.comm);
 }
 
-void mlx4_cmd_cleanup(struct mlx4_dev *dev)
+void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
-	pci_pool_destroy(priv->cmd.pool);
+	if (priv->cmd.pool && (cleanup_mask & MLX4_CMD_CLEANUP_POOL)) {
+		pci_pool_destroy(priv->cmd.pool);
+		priv->cmd.pool = NULL;
+	}
 
-	if (!mlx4_is_slave(dev))
+	if (!mlx4_is_slave(dev) && priv->cmd.hcr &&
+	    (cleanup_mask & MLX4_CMD_CLEANUP_HCR)) {
 		iounmap(priv->cmd.hcr);
-	if (mlx4_is_mfunc(dev))
+		priv->cmd.hcr = NULL;
+	}
+	if (mlx4_is_mfunc(dev) && priv->mfunc.vhcr &&
+	    (cleanup_mask & MLX4_CMD_CLEANUP_VHCR)) {
 		dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
 				  priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
-	priv->mfunc.vhcr = NULL;
+		priv->mfunc.vhcr = NULL;
+	}
+	if (priv->cmd.initialized && (cleanup_mask & MLX4_CMD_CLEANUP_STRUCT))
+		priv->cmd.initialized = 0;
 }
 
 /*
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 71f1f3b..b2732eb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2396,7 +2396,7 @@ slave_start:
 		if (err == -EACCES) {
 			/* Not primary Physical function
 			 * Running in slave mode */
-			mlx4_cmd_cleanup(dev);
+			mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
 			dev->flags |= MLX4_FLAG_SLAVE;
 			dev->flags &= ~MLX4_FLAG_MASTER;
 			goto slave_start;
@@ -2561,7 +2561,7 @@ err_mfunc:
 		mlx4_multi_func_cleanup(dev);
 
 err_cmd:
-	mlx4_cmd_cleanup(dev);
+	mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
 
 err_sriov:
 	if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs)
@@ -2805,7 +2805,7 @@ static void mlx4_unload_one(struct pci_dev *pdev)
 	mlx4_close_hca(dev);
 	if (mlx4_is_slave(dev))
 		mlx4_multi_func_cleanup(dev);
-	mlx4_cmd_cleanup(dev);
+	mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
 
 	if (dev->flags & MLX4_FLAG_MSI_X)
 		pci_disable_msix(pdev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index f8fc7bd..f48e7c3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -606,6 +606,7 @@ struct mlx4_cmd {
 	u8			use_events;
 	u8			toggle;
 	u8			comm_toggle;
+	u8			initialized;
 };
 
 enum {
@@ -1126,8 +1127,16 @@ int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave,
 
 int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe);
 
+enum {
+	MLX4_CMD_CLEANUP_STRUCT = 1UL << 0,
+	MLX4_CMD_CLEANUP_POOL	= 1UL << 1,
+	MLX4_CMD_CLEANUP_HCR	= 1UL << 2,
+	MLX4_CMD_CLEANUP_VHCR	= 1UL << 3,
+	MLX4_CMD_CLEANUP_ALL	= (MLX4_CMD_CLEANUP_VHCR << 1) - 1
+};
+
 int mlx4_cmd_init(struct mlx4_dev *dev);
-void mlx4_cmd_cleanup(struct mlx4_dev *dev);
+void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask);
 int mlx4_multi_func_init(struct mlx4_dev *dev);
 void mlx4_multi_func_cleanup(struct mlx4_dev *dev);
 void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 1/7] net/mlx4_core: Fix wrong reading of reserved_eqs
From: Or Gerlitz @ 2014-11-13 12:45 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Matan Barak, Amir Vadai, Jack Morgenstein, Or Gerlitz
In-Reply-To: <1415882733-3084-1-git-send-email-ogerlitz@mellanox.com>

From: Matan Barak <matanb@mellanox.com>

We mistakenly read the reserved_eqs field as a standard
numeric value rather than a log2 value.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index d6dba77..9051b36 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -611,7 +611,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET);
 	dev_cap->max_mpts = 1 << (field & 0x3f);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET);
-	dev_cap->reserved_eqs = field & 0xf;
+	dev_cap->reserved_eqs = 1 << (field & 0xf);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET);
 	dev_cap->max_eqs = 1 << (field & 0xf);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 0/7] mlx4: Flexible (asymmetric) allocation of EQs and MSI-X vectors
From: Or Gerlitz @ 2014-11-13 12:45 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Matan Barak, Amir Vadai, Jack Morgenstein, Or Gerlitz

Hi Dave,

This series from Matan Barak is built as follows:

The 1st two patches fix small bugs w.r.t firmware spec. Next
are two patches which do more re-factoring of the init/fini flow
and a patch that adds support for the QUERY_FUNC firmware command,
these are all pre-steps for the major patch of the series. In this
patch (#6) we change the order of talking/querying the firmware
and enabling SRIOV. This allows to remote worst-case assumption
w.r.t the number of available MSI-X vectors and EQs per function.

The last patch easily enjoys this ordering change, to enable
supports > 64 VFs over a firmware that allows for that.

Or.

Matan Barak (7):
  net/mlx4_core: Fix wrong reading of reserved_eqs
  net/mlx4_core: Use correct variable type for mlx4_slave_cap
  net/mlx4_core: Refactor mlx4_cmd_init and mlx4_cmd_cleanup
  net/mlx4_core: Refactor mlx4_load_one
  net/mlx4_core: Add QUERY_FUNC firmware command
  net/mlx4_core: Flexible (asymmetric) allocation of EQs and MSI-X vectors for PF/VFs
  net/mlx4_core: Support more than 64 VFs

 drivers/infiniband/hw/mlx4/main.c            |    3 +-
 drivers/net/ethernet/mellanox/mlx4/cmd.c     |   76 ++++---
 drivers/net/ethernet/mellanox/mlx4/eq.c      |    8 +-
 drivers/net/ethernet/mellanox/mlx4/fw.c      |  113 +++++++++-
 drivers/net/ethernet/mellanox/mlx4/fw.h      |   15 ++-
 drivers/net/ethernet/mellanox/mlx4/main.c    |  307 +++++++++++++++++++++-----
 drivers/net/ethernet/mellanox/mlx4/mlx4.h    |   11 +-
 drivers/net/ethernet/mellanox/mlx4/profile.c |   19 +-
 include/linux/mlx4/device.h                  |    7 +-
 9 files changed, 440 insertions(+), 119 deletions(-)

^ permalink raw reply

* [PATCH net-next 2/7] net/mlx4_core: Use correct variable type for mlx4_slave_cap
From: Or Gerlitz @ 2014-11-13 12:45 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Matan Barak, Amir Vadai, Jack Morgenstein, Or Gerlitz
In-Reply-To: <1415882733-3084-1-git-send-email-ogerlitz@mellanox.com>

From: Matan Barak <matanb@mellanox.com>

We've used an incorrect type for the loop counter and the
mlx4_QUERY_FUNC_CAP function. The current input modifier
is either a port or a boolean.
Since the number of ports is always a positive value < 255,
we should use u8 instead of an integer with casting.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c   |    2 +-
 drivers/net/ethernet/mellanox/mlx4/fw.h   |    2 +-
 drivers/net/ethernet/mellanox/mlx4/main.c |    4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 9051b36..f1a6718 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -335,7 +335,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 	return err;
 }
 
-int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
+int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
 			struct mlx4_func_cap *func_cap)
 {
 	struct mlx4_cmd_mailbox *mailbox;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 9b835ae..694557e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -204,7 +204,7 @@ struct mlx4_set_ib_param {
 };
 
 int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
-int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
+int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
 			struct mlx4_func_cap *func_cap);
 int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 				struct mlx4_vhcr *vhcr,
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 2f6ba42..71f1f3b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -631,7 +631,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 	struct mlx4_dev_cap	   dev_cap;
 	struct mlx4_func_cap	   func_cap;
 	struct mlx4_init_hca_param hca_param;
-	int			   i;
+	u8			   i;
 
 	memset(&hca_param, 0, sizeof(hca_param));
 	err = mlx4_QUERY_HCA(dev, &hca_param);
@@ -732,7 +732,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 	}
 
 	for (i = 1; i <= dev->caps.num_ports; ++i) {
-		err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap);
+		err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap);
 		if (err) {
 			mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
 				 i, err);
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 4/7] net/mlx4_core: Refactor mlx4_load_one
From: Or Gerlitz @ 2014-11-13 12:45 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Matan Barak, Amir Vadai, Jack Morgenstein, Or Gerlitz
In-Reply-To: <1415882733-3084-1-git-send-email-ogerlitz@mellanox.com>

From: Matan Barak <matanb@mellanox.com>

Refactor mlx4_load_one, as a preparation step for a new and
more complicated load function. The goal is to support both
newer firmware that required init_hca to be done before
enable_sriov and legacy firmwares that requires things to
be done the other way around.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx4/main.c |  141 ++++++++++++++++++++---------
 1 files changed, 99 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index b2732eb..43047b2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1473,6 +1473,12 @@ static void mlx4_close_hca(struct mlx4_dev *dev)
 	else {
 		mlx4_CLOSE_HCA(dev, 0);
 		mlx4_free_icms(dev);
+	}
+}
+
+static void mlx4_close_fw(struct mlx4_dev *dev)
+{
+	if (!mlx4_is_slave(dev)) {
 		mlx4_UNMAP_FA(dev);
 		mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
 	}
@@ -1619,17 +1625,10 @@ static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
 		 == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
 }
 
-static int mlx4_init_hca(struct mlx4_dev *dev)
+static int mlx4_init_fw(struct mlx4_dev *dev)
 {
-	struct mlx4_priv	  *priv = mlx4_priv(dev);
-	struct mlx4_adapter	   adapter;
-	struct mlx4_dev_cap	   dev_cap;
 	struct mlx4_mod_stat_cfg   mlx4_cfg;
-	struct mlx4_profile	   profile;
-	struct mlx4_init_hca_param init_hca;
-	u64 icm_size;
-	int err;
-	struct mlx4_config_dev_params params;
+	int err = 0;
 
 	if (!mlx4_is_slave(dev)) {
 		err = mlx4_QUERY_FW(dev);
@@ -1652,7 +1651,23 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 		err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
 		if (err)
 			mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
+	}
+
+	return err;
+}
+
+static int mlx4_init_hca(struct mlx4_dev *dev)
+{
+	struct mlx4_priv	  *priv = mlx4_priv(dev);
+	struct mlx4_adapter	   adapter;
+	struct mlx4_dev_cap	   dev_cap;
+	struct mlx4_profile	   profile;
+	struct mlx4_init_hca_param init_hca;
+	u64 icm_size;
+	struct mlx4_config_dev_params params;
+	int err;
 
+	if (!mlx4_is_slave(dev)) {
 		err = mlx4_dev_cap(dev, &dev_cap);
 		if (err) {
 			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
@@ -2275,6 +2290,53 @@ static void mlx4_free_ownership(struct mlx4_dev *dev)
 	iounmap(owner);
 }
 
+#define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV)	==\
+				  !!((flags) & MLX4_FLAG_MASTER))
+
+static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
+			     u8 total_vfs, int existing_vfs)
+{
+	u64 dev_flags = dev->flags;
+
+	dev->dev_vfs = kzalloc(
+			total_vfs * sizeof(*dev->dev_vfs),
+			GFP_KERNEL);
+	if (NULL == dev->dev_vfs) {
+		mlx4_err(dev, "Failed to allocate memory for VFs\n");
+		goto disable_sriov;
+	} else if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
+		int err = 0;
+
+		atomic_inc(&pf_loading);
+		if (existing_vfs) {
+			if (existing_vfs != total_vfs)
+				mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
+					 existing_vfs, total_vfs);
+		} else {
+			mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
+			err = pci_enable_sriov(pdev, total_vfs);
+		}
+		if (err) {
+			mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
+				 err);
+			atomic_dec(&pf_loading);
+			goto disable_sriov;
+		} else {
+			mlx4_warn(dev, "Running in master mode\n");
+			dev_flags |= MLX4_FLAG_SRIOV |
+				MLX4_FLAG_MASTER;
+			dev_flags &= ~MLX4_FLAG_SLAVE;
+			dev->num_vfs = total_vfs;
+		}
+	}
+	return dev_flags;
+
+disable_sriov:
+	dev->num_vfs = 0;
+	kfree(dev->dev_vfs);
+	return dev_flags & ~MLX4_FLAG_MASTER;
+}
+
 static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 			 int total_vfs, int *nvfs, struct mlx4_priv *priv)
 {
@@ -2320,37 +2382,12 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 		}
 
 		if (total_vfs) {
-			mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n",
-				  total_vfs);
-			dev->dev_vfs = kzalloc(
-				total_vfs * sizeof(*dev->dev_vfs),
-				GFP_KERNEL);
-			if (NULL == dev->dev_vfs) {
-				mlx4_err(dev, "Failed to allocate memory for VFs\n");
-				err = -ENOMEM;
-				goto err_free_own;
-			} else {
-				atomic_inc(&pf_loading);
-				existing_vfs = pci_num_vf(pdev);
-				if (existing_vfs) {
-					err = 0;
-					if (existing_vfs != total_vfs)
-						mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
-							 existing_vfs, total_vfs);
-				} else {
-					err = pci_enable_sriov(pdev, total_vfs);
-				}
-				if (err) {
-					mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
-						 err);
-					atomic_dec(&pf_loading);
-				} else {
-					mlx4_warn(dev, "Running in master mode\n");
-					dev->flags |= MLX4_FLAG_SRIOV |
-						MLX4_FLAG_MASTER;
-					dev->num_vfs = total_vfs;
-				}
-			}
+			existing_vfs = pci_num_vf(pdev);
+			dev->flags = MLX4_FLAG_MASTER;
+			dev->flags = mlx4_enable_sriov(dev, pdev, total_vfs,
+						       existing_vfs);
+			if (!SRIOV_VALID_STATE(dev->flags))
+				goto err_sriov;
 		}
 
 		atomic_set(&priv->opreq_count, 0);
@@ -2391,17 +2428,33 @@ slave_start:
 		}
 	}
 
+	err = mlx4_init_fw(dev);
+	if (err) {
+		mlx4_err(dev, "Failed to init fw, aborting.\n");
+		goto err_mfunc;
+	}
+
 	err = mlx4_init_hca(dev);
 	if (err) {
 		if (err == -EACCES) {
 			/* Not primary Physical function
 			 * Running in slave mode */
 			mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
+			/* We're not a PF */
+			if (dev->flags & MLX4_FLAG_SRIOV) {
+				if (!existing_vfs)
+					pci_disable_sriov(pdev);
+				if (mlx4_is_master(dev))
+					atomic_dec(&pf_loading);
+				dev->flags &= ~MLX4_FLAG_SRIOV;
+			}
+			if (!mlx4_is_slave(dev))
+				mlx4_free_ownership(dev);
 			dev->flags |= MLX4_FLAG_SLAVE;
 			dev->flags &= ~MLX4_FLAG_MASTER;
 			goto slave_start;
 		} else
-			goto err_mfunc;
+			goto err_fw;
 	}
 
 	/* check if the device is functioning at its maximum possible speed.
@@ -2556,6 +2609,9 @@ err_master_mfunc:
 err_close:
 	mlx4_close_hca(dev);
 
+err_fw:
+	mlx4_close_fw(dev);
+
 err_mfunc:
 	if (mlx4_is_slave(dev))
 		mlx4_multi_func_cleanup(dev);
@@ -2572,7 +2628,6 @@ err_sriov:
 
 	kfree(priv->dev.dev_vfs);
 
-err_free_own:
 	if (!mlx4_is_slave(dev))
 		mlx4_free_ownership(dev);
 
@@ -2803,6 +2858,7 @@ static void mlx4_unload_one(struct pci_dev *pdev)
 	if (mlx4_is_master(dev))
 		mlx4_multi_func_cleanup(dev);
 	mlx4_close_hca(dev);
+	mlx4_close_fw(dev);
 	if (mlx4_is_slave(dev))
 		mlx4_multi_func_cleanup(dev);
 	mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
@@ -2812,6 +2868,7 @@ static void mlx4_unload_one(struct pci_dev *pdev)
 	if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
 		mlx4_warn(dev, "Disabling SR-IOV\n");
 		pci_disable_sriov(pdev);
+		dev->flags &= ~MLX4_FLAG_SRIOV;
 		dev->num_vfs = 0;
 	}
 
-- 
1.7.1

^ permalink raw reply related

* Re: [PATCH v4 4/8] net: can: c_can: Add syscon/regmap RAMINIT mechanism
From: Marc Kleine-Budde @ 2014-11-13 12:44 UTC (permalink / raw)
  To: Roger Quadros, wg
  Cc: wsa, tony, tglx, mugunthanvnm, george.cherian, balbi, nsekhar, nm,
	sergei.shtylyov, linux-omap, linux-can, netdev
In-Reply-To: <1415371762-29885-5-git-send-email-rogerq@ti.com>

[-- Attachment #1: Type: text/plain, Size: 8054 bytes --]

On 11/07/2014 03:49 PM, Roger Quadros wrote:
> Some TI SoCs like DRA7 have a RAMINIT register specification
> different from the other AMxx SoCs and as expected by the
> existing driver.
> 
> To add more insanity, this register is shared with other
> IPs like DSS, PCIe and PWM.
> 
> Provides a more generic mechanism to specify the RAMINIT
> register location and START/DONE bit position and use the
> syscon/regmap framework to access the register.
> 
> Signed-off-by: Roger Quadros <rogerq@ti.com>
> ---
>  .../devicetree/bindings/net/can/c_can.txt          |   3 +
>  drivers/net/can/c_can/c_can.h                      |  11 +-
>  drivers/net/can/c_can/c_can_platform.c             | 112 ++++++++++++++-------
>  3 files changed, 86 insertions(+), 40 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/net/can/c_can.txt b/Documentation/devicetree/bindings/net/can/c_can.txt
> index 8f1ae81..a3ca3ee 100644
> --- a/Documentation/devicetree/bindings/net/can/c_can.txt
> +++ b/Documentation/devicetree/bindings/net/can/c_can.txt
> @@ -12,6 +12,9 @@ Required properties:
>  Optional properties:
>  - ti,hwmods		: Must be "d_can<n>" or "c_can<n>", n being the
>  			  instance number
> +- syscon-raminit	: Handle to system control region that contains the
> +			  RAMINIT register, register offset to the RAMINIT
> +			  register and the CAN instance number (0 offset).
>  
>  Note: "ti,hwmods" field is used to fetch the base address and irq
>  resources from TI, omap hwmod data base during device registration.
> diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h
> index 3c305a1..0e17c7b 100644
> --- a/drivers/net/can/c_can/c_can.h
> +++ b/drivers/net/can/c_can/c_can.h
> @@ -179,6 +179,14 @@ struct c_can_driver_data {
>  	bool raminit_pulse;	/* If set, sets and clears START bit (pulse) */
>  };
>  
> +/* Out of band RAMINIT register access via syscon regmap */
> +struct c_can_raminit {
> +	struct regmap *syscon;	/* for raminit ctrl. reg. access */
> +	unsigned int reg;	/* register index within syscon */
> +	u8 start_bit;
> +	u8 done_bit;
> +};
> +
>  /* c_can private data structure */
>  struct c_can_priv {
>  	struct can_priv can;	/* must be the first member */
> @@ -196,8 +204,7 @@ struct c_can_priv {
>  	const u16 *regs;
>  	void *priv;		/* for board-specific data */
>  	enum c_can_dev_id type;
> -	u32 __iomem *raminit_ctrlreg;
> -	int instance;
> +	struct c_can_raminit raminit_sys;	/* RAMINIT via syscon regmap */
>  	void (*raminit) (const struct c_can_priv *priv, bool enable);
>  	u32 comm_rcv_high;
>  	u32 rxmasked;
> diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c
> index 20deb67..3776483 100644
> --- a/drivers/net/can/c_can/c_can_platform.c
> +++ b/drivers/net/can/c_can/c_can_platform.c
> @@ -32,14 +32,13 @@
>  #include <linux/clk.h>
>  #include <linux/of.h>
>  #include <linux/of_device.h>
> +#include <linux/mfd/syscon.h>
> +#include <linux/regmap.h>
>  
>  #include <linux/can/dev.h>
>  
>  #include "c_can.h"
>  
> -#define CAN_RAMINIT_START_MASK(i)	(0x001 << (i))
> -#define CAN_RAMINIT_DONE_MASK(i)	(0x100 << (i))
> -#define CAN_RAMINIT_ALL_MASK(i)		(0x101 << (i))
>  #define DCAN_RAM_INIT_BIT		(1 << 3)
>  static DEFINE_SPINLOCK(raminit_lock);
>  /*
> @@ -72,47 +71,61 @@ static void c_can_plat_write_reg_aligned_to_32bit(const struct c_can_priv *priv,
>  	writew(val, priv->base + 2 * priv->regs[index]);
>  }
>  
> -static void c_can_hw_raminit_wait_ti(const struct c_can_priv *priv, u32 mask,
> -				  u32 val)
> +static void c_can_hw_raminit_wait_syscon(const struct c_can_priv *priv,
> +					 u32 mask, u32 val)
>  {
>  	int timeout = 0;
> +	const struct c_can_raminit *raminit = &priv->raminit_sys;
> +	u32 ctrl;
> +
>  	/* We look only at the bits of our instance. */
>  	val &= mask;
> -	while ((readl(priv->raminit_ctrlreg) & mask) != val) {
> +	do {
>  		udelay(1);
>  		timeout++;
>  
> +		regmap_read(raminit->syscon, raminit->reg, &ctrl);
>  		if (timeout == 1000) {
>  			dev_err(&priv->dev->dev, "%s: time out\n", __func__);
>  			break;
>  		}
> -	}
> +	} while ((ctrl & mask) != val);
>  }
>  
> -static void c_can_hw_raminit_ti(const struct c_can_priv *priv, bool enable)
> +static void c_can_hw_raminit_syscon(const struct c_can_priv *priv, bool enable)
>  {
> -	u32 mask = CAN_RAMINIT_ALL_MASK(priv->instance);
> +	u32 mask;
>  	u32 ctrl;
> +	const struct c_can_raminit *raminit = &priv->raminit_sys;
> +	u8 start_bit, done_bit;
> +
> +	start_bit = raminit->start_bit;
> +	done_bit = raminit->done_bit;
>  
>  	spin_lock(&raminit_lock);
>  
> -	ctrl = readl(priv->raminit_ctrlreg);
> +	mask = 1 << start_bit | 1 << done_bit;
> +	regmap_read(raminit->syscon, raminit->reg, &ctrl);
> +
>  	/* We clear the done and start bit first. The start bit is
>  	 * looking at the 0 -> transition, but is not self clearing;
>  	 * And we clear the init done bit as well.
> +	 * NOTE: DONE must be written with 1 to clear it.
>  	 */
> -	ctrl &= ~CAN_RAMINIT_START_MASK(priv->instance);
> -	ctrl |= CAN_RAMINIT_DONE_MASK(priv->instance);
> -	writel(ctrl, priv->raminit_ctrlreg);
> -	ctrl &= ~CAN_RAMINIT_DONE_MASK(priv->instance);
> -	c_can_hw_raminit_wait_ti(priv, mask, ctrl);
> +	ctrl &= ~(1 << start_bit);
> +	ctrl |= 1 << done_bit;
> +	regmap_write(raminit->syscon, raminit->reg, ctrl);
> +
> +	ctrl &= ~(1 << done_bit);
> +	c_can_hw_raminit_wait_syscon(priv, mask, ctrl);
>  
>  	if (enable) {
>  		/* Set start bit and wait for the done bit. */
> -		ctrl |= CAN_RAMINIT_START_MASK(priv->instance);
> -		writel(ctrl, priv->raminit_ctrlreg);
> -		ctrl |= CAN_RAMINIT_DONE_MASK(priv->instance);
> -		c_can_hw_raminit_wait_ti(priv, mask, ctrl);
> +		ctrl |= 1 << start_bit;
> +		regmap_write(raminit->syscon, raminit->reg, ctrl);
> +
> +		ctrl |= 1 << done_bit;
> +		c_can_hw_raminit_wait_syscon(priv, mask, ctrl);
>  	}
>  	spin_unlock(&raminit_lock);
>  }
> @@ -206,10 +219,11 @@ static int c_can_plat_probe(struct platform_device *pdev)
>  	struct net_device *dev;
>  	struct c_can_priv *priv;
>  	const struct of_device_id *match;
> -	struct resource *mem, *res;
> +	struct resource *mem;
>  	int irq;
>  	struct clk *clk;
>  	const struct c_can_driver_data *drvdata;
> +	struct device_node *np = pdev->dev.of_node;
>  
>  	match = of_match_device(c_can_of_table, &pdev->dev);
>  	if (match) {
> @@ -278,27 +292,49 @@ static int c_can_plat_probe(struct platform_device *pdev)
>  		priv->read_reg32 = d_can_plat_read_reg32;
>  		priv->write_reg32 = d_can_plat_write_reg32;
>  
> -		if (pdev->dev.of_node)
> -			priv->instance = of_alias_get_id(pdev->dev.of_node, "d_can");
> -		else
> -			priv->instance = pdev->id;
> -
> -		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> -		/* Not all D_CAN modules have a separate register for the D_CAN
> -		 * RAM initialization. Use default RAM init bit in D_CAN module
> -		 * if not specified in DT.
> +		/* Check if we need custom RAMINIT via syscon. Mostly for TI
> +		 * platforms. Only supported with DT boot.
>  		 */
> -		if (!res) {
> +		if (np && of_property_read_bool(np, "syscon-raminit")) {
> +			u32 id;
> +			struct c_can_raminit *raminit = &priv->raminit_sys;
> +
> +			ret = -EINVAL;
> +			raminit->syscon = syscon_regmap_lookup_by_phandle(np,
> +									  "syscon-raminit");

You should return PTR_ERR() here, as it it might be -EPROBE_DEFER

> +			if (IS_ERR(raminit->syscon)) {
> +				dev_err(&pdev->dev,
> +					"couldn't get syscon regmap for RAMINIT\n");
> +				goto exit_free_device;
> +			}

...and maybe remove this error message completely.

regards,
Marc
-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [PATCH v4 4/8] net: can: c_can: Add syscon/regmap RAMINIT mechanism
From: Roger Quadros @ 2014-11-13 12:09 UTC (permalink / raw)
  To: Marc Kleine-Budde, wg
  Cc: wsa, tony, tglx, mugunthanvnm, george.cherian, balbi, nsekhar, nm,
	sergei.shtylyov, linux-omap, linux-can, netdev
In-Reply-To: <54649153.409@pengutronix.de>

On 11/13/2014 01:09 PM, Marc Kleine-Budde wrote:
> On 11/07/2014 03:49 PM, Roger Quadros wrote:
>> Some TI SoCs like DRA7 have a RAMINIT register specification
>> different from the other AMxx SoCs and as expected by the
>> existing driver.
>>
>> To add more insanity, this register is shared with other
>> IPs like DSS, PCIe and PWM.
>>
>> Provides a more generic mechanism to specify the RAMINIT
>> register location and START/DONE bit position and use the
>> syscon/regmap framework to access the register.
> 
> What about the existing device trees that don't have the syscon-raminit
> phandle? We can either keep the existing init routines or create regmap
> in the platform driver an use the new ones.

There is only one user
arch/arm/boot/dts/am33xx.dtsi

The can nodes are disabled there and no other board file is enabling that node.
So there is no breakage as such and not worth the hassle to maintain the old routine.

I will be sending the corresponding dts changes today which Tony will take as we don't see
any DT binding changes.

cheers,
-roger

^ permalink raw reply

* Re: [PATCH net-next 1/1] ipvlan: Initial check-in of the IPVLAN driver.
From: Pavel Emelyanov @ 2014-11-13 11:07 UTC (permalink / raw)
  To: Mahesh Bandewar
  Cc: netdev, Eric Dumazet, Maciej Zenczykowski, Laurent Chavey,
	Tim Hockin, David Miller, Brandon Philips
In-Reply-To: <CAF2d9jgRFZzjtEkwVqo5Jw1rzbAS_9NC8LiOtO6xHgWqkVM2Zg@mail.gmail.com>

>>> +static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
>>> +                        struct nlattr *tb[], struct nlattr *data[])
>>> +{
>>> +     struct ipvl_dev *ipvlan = netdev_priv(dev);
>>> +     struct ipvl_port *port;
>>> +     struct net_device *phy_dev;
>>> +     int err;
>>> +
>>> +     ipvlan_dbg(3, "%s[%d]: Entering...\n", __func__, __LINE__);
>>> +     if (!tb[IFLA_LINK]) {
>>> +             ipvlan_dbg(3, "%s[%d]: Returning -EINVAL...\n",
>>> +                        __func__, __LINE__);
>>> +             return -EINVAL;
>>> +     }
>>> +
>>> +     phy_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
>>> +     if (phy_dev == NULL) {
>>> +             ipvlan_dbg(3, "%s[%d]: Returning -ENODEV...\n",
>>> +                        __func__, __LINE__);
>>> +             return -ENODEV;
>>> +     }
>>> +
>>> +     /* TODO will someone try creating ipvlan-dev on an ipvlan-virtual dev?*/
>>> +     if (!ipvlan_dev_master(phy_dev)) {
>>> +             err = ipvlan_port_create(phy_dev);
>>> +             if (err < 0) {
>>> +                     ipvlan_dbg(3, "%s[%d]: Returning error (%d)...\n",
>>> +                                __func__, __LINE__, err);
>>> +                     return err;
>>> +             }
>>> +     }
>>> +
>>> +     port = ipvlan_port_get_rtnl(phy_dev);
>>> +     /* Get the mode if specified. */
>>> +     if (data && data[IFLA_IPVLAN_MODE])
>>> +             port->mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
>>
>> Should the invalid value be checked here? There are places
>> where we BUG() in mode being "unknown".
>>
> Assuming the calls come over netlink, the ".validate" will be called
> before ".newlink", so that would be unnecessary, isn't it?

Yes, you're right. I've missed the validate callback.

>>> +             break;
>>> +
>>
>>> +static int ipvlan_addr4_event(struct notifier_block *unused,
>>> +                           unsigned long event, void *ptr)
>>> +{
>>> +     struct in_ifaddr *if4 = (struct in_ifaddr *)ptr;
>>> +     struct net_device *dev = (struct net_device *)if4->ifa_dev->dev;
>>> +     struct ipvl_dev *ipvlan = netdev_priv(dev);
>>> +     struct in_addr ip4_addr;
>>> +
>>> +     ipvlan_dbg(3, "%s[%d]: Entering...\n", __func__, __LINE__);
>>> +     if (!ipvlan_dev_slave(dev))
>>> +             return NOTIFY_DONE;
>>> +
>>> +     if (!ipvlan || !ipvlan->port)
>>> +             return NOTIFY_DONE;
>>> +
>>> +     switch (event) {
>>> +     case NETDEV_UP:
>>
>> Can it be (in the future) somehow restricted so that net-namespace wouldn't
>> be able to assign arbitrary IP address here? One of the reasons for using
>> such devices is to enforce the container to use the IP address given from
>> the host.
>>
> Probably this could be a config (sysfs?) entry which would lockup the
> config coming from ns when set. So code could look like -
>           case NETDEV_UP:
>                          if (!restrict_ns_config) {
>                             ...
>                          }
>                          break;

Maybe introduce some "lock" call for ipvlan device after which no new IP addresses
can be assigned? And the configuration would look like

1. create ipvlan
2. move to proper net namespace
3. add addresses
4. lock

?

Thanks,
Pavel

^ permalink raw reply

* Re: [PATCH nf] netfilter: conntrack: fix race in __nf_conntrack_confirm against get_next_corpse
From: Pablo Neira Ayuso @ 2014-11-13 12:08 UTC (permalink / raw)
  To: Jörg Marx
  Cc: Jesper Dangaard Brouer, programme110, netfilter-devel,
	Florian Westphal, netdev, Patrick McHardy
In-Reply-To: <54633D1B.5090404@secunet.com>

On Wed, Nov 12, 2014 at 11:57:31AM +0100, Jörg Marx wrote:
> On 12.11.2014 08:35, Jesper Dangaard Brouer wrote:
> 
> Hi,
> 
> I wrote the patch in 2010, so find some arguments below:
> 
> >>> > > +	nf_ct_del_from_dying_or_unconfirmed_list(ct);
> >>> > >  
> >>> > >  	if (unlikely(nf_ct_is_dying(ct))) {
> >>> > > +		nf_ct_add_to_dying_list(ct);
> >>> > >  		nf_conntrack_double_unlock(hash, reply_hash);
> >>> > >  		local_bh_enable();
> >>> > >  		return NF_ACCEPT;
> >> > 
> >> > Not directly related to your patch, but I don't find a good reason why
> >> > we're accepting this packet.
> >> > 
> >> > If the conntrack from the unconfirmed list is dying, then the object
> >> > will be released by when the packet leaves the stack to its
> >> > destination. With stateful filtering depending in place, the follow up
> >> > packet in the reply direction will likely be considered invalid (if
> >> > tcp tracking is on). Fortunately for us, the origin will likely
> >> > retransmit the syn again, so the ct will be setup accordingly.
> >> > 
> >> > So, why should we allow this to go through?
> > True, it also seems strange to me that we accept this packet.
> 
> The raise was triggered in a scenario when we tested high-load IPsec
> tunnels and flushed the conntrack hashs from userspace.
> 
> For me there is little difference in choosing DROP or ACCEPT as verdict.
> The packet/skb belongs to a formerly allowed connection, most likely
> this connection is still allowed (but the conntrack hash entry is about
> to be removed due to userspace is flushing the conntrack table).

__nf_conntrack_confirm() is only called for the first packet that we
see in a flow. If you just invoked the flush command once (which
should be the common case), then this is likely to be the first packet
of the flow (unless you already called flush anytime soon in the
past).

> To minimize the impact (lost packets -> retransmit) I decided to allow
> the skb in flight, so were is no lost packet at this place.

I understand your original motivation was to be conservative.

> When the connection is not allowed anymore (but was allowed up to now,
> because the hash entry exists), the impact is one last packet 'slipping
> through'.

The general policy in conntrack is to not drop packets, but in this
case we'll leave things in inconsistent state (ie. we will likely
receive a reply packet in response to the original packet that has no
conntrack yet).

Thanks.

^ permalink raw reply

* Re: [PATCH] can: Fix bug in suspend/resume
From: Marc Kleine-Budde @ 2014-11-13 12:03 UTC (permalink / raw)
  To: Lothar Waßmann, Appana Durga Kedareswara Rao
  Cc: wg@grandegger.com, Michal Simek, Soren Brinkmann,
	grant.likely@linaro.org, robh+dt@kernel.org,
	netdev@vger.kernel.org, devicetree@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org, linux-can@vger.kernel.org
In-Reply-To: <20141113123218.79850236@ipc1.ka-ro>

[-- Attachment #1: Type: text/plain, Size: 847 bytes --]

On 11/13/2014 12:32 PM, Lothar Waßmann wrote:
>>>>  static int __maybe_unused xcan_suspend(struct device *dev)  {
>>>> -    struct platform_device *pdev = dev_get_drvdata(dev);
>>>> +    struct platform_device *pdev = container_of(dev,
>>>> +                    struct platform_device, dev);
>>>>      struct net_device *ndev = platform_get_drvdata(pdev);
>>>>      struct xcan_priv *priv = netdev_priv(ndev);
>>
> Why not simply:
> 	struct net_device *ndev = dev_get_drvdata(dev);
> 
> There is no need for a struct platform_device* at all.

ACK

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* [PATCH] FOU: Fix no return statement warning for !CONFIG_NET_FOU_IP_TUNNELS
From: Thomas Graf @ 2014-11-13 11:48 UTC (permalink / raw)
  To: netdev; +Cc: therbert

net/ipv4/fou.c: In function ‘ip_tunnel_encap_del_fou_ops’:
net/ipv4/fou.c:861:1: warning: no return statement in function returning non-void [-Wreturn-type]

Fixes: a8c5f90fb5 ("ip_tunnel: Ops registration for secondary encap (fou, gue)")
Signed-off-by: Thomas Graf <tgraf@suug.ch>
---
 net/ipv4/fou.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index fe09077..b0b436b 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -856,7 +856,7 @@ static int ip_tunnel_encap_add_fou_ops(void)
 	return 0;
 }
 
-static int ip_tunnel_encap_del_fou_ops(void)
+static void ip_tunnel_encap_del_fou_ops(void)
 {
 }
 
-- 
1.9.3

^ permalink raw reply related

* Re: arm64 allmodconfig failures in nft_reject_bridge.c
From: Pablo Neira Ayuso @ 2014-11-13 11:43 UTC (permalink / raw)
  To: Mark Brown
  Cc: linaro-kernel, kernel-build-reports, netdev, bridge,
	Patrick McHardy, Stephen Hemminger, coreteam, netfilter-devel,
	Jozsef Kadlecsik, David S. Miller, Guenter Roeck
In-Reply-To: <20141113113320.GF3815@sirena.org.uk>

On Thu, Nov 13, 2014 at 11:33:20AM +0000, Mark Brown wrote:
> Since about -rc3 we've been seeing build failures in Linus' tree on
> arm64 allmodconfig due to:
> 
> > 	arm64-allmodconfig
> > ../net/bridge/netfilter/nft_reject_bridge.c:240:3: error: implicit declaration of function 'csum_ipv6_magic' [-Werror=implicit-function-declaration]
> 
> By the time I looked into this it was fixed in -next by c1207c049b204b0
> (netfilter: nft_reject_bridge: Fix powerpc build error) from Guenter but
> that doesn't seem to have made it into -rc4 so I just wanted to check
> that this fix was intended to go to Linus before v3.18?

I can see this in David's tree:

http://git.kernel.org/cgit/linux/kernel/git/davem/net.git/commit/?id=c1207c049b204b0a96535dc5416aee331b51e0e1

I think this was reported by when -rc4 was already out, so you'll see
this by -rc5.

^ permalink raw reply

* Re: [GIT PULL nf] Second Round of IPVS Fixes for v3.18
From: Pablo Neira Ayuso @ 2014-11-13 11:38 UTC (permalink / raw)
  To: Simon Horman
  Cc: lvs-devel, netdev, netfilter-devel, Wensong Zhang,
	Julian Anastasov
In-Reply-To: <1415758920-32578-1-git-send-email-horms@verge.net.au>

On Wed, Nov 12, 2014 at 11:21:59AM +0900, Simon Horman wrote:
> Hi Pablo,
> 
> please consider this fix for v3.18.
> 
> It fixes handling of skb->sk which may cause incorrect handling
> of connections from a local process.
> 
> This problem was introduced in its current form by 8052ba292559f907e
> ("ipvs: support ipv4 in ipv6 and ipv6 in ipv4 tunnel forwarding") in
> v3.18-rc1.

Pulled, thanks Simon.

> I believe it also exists in a different form in older kernels.
> No fix for that is available at this time.

AFAIK -stable also accepts backports if there's a clear relation
between this original patch in mainstream and the backported version.

^ permalink raw reply

* Re: arm64 allmodconfig failures in nft_reject_bridge.c
From: Mark Brown @ 2014-11-13 11:33 UTC (permalink / raw)
  To: David S. Miller, Guenter Roeck, Pablo Neira Ayuso,
	Patrick McHardy, Jozsef Kadlecsik, Stephen Hemminger
  Cc: linaro-kernel, kernel-build-reports, netfilter-devel, coreteam,
	bridge, netdev
In-Reply-To: <20141112233738.GB3815@sirena.org.uk>

[-- Attachment #1: Type: text/plain, Size: 541 bytes --]

Since about -rc3 we've been seeing build failures in Linus' tree on
arm64 allmodconfig due to:

> 	arm64-allmodconfig
> ../net/bridge/netfilter/nft_reject_bridge.c:240:3: error: implicit declaration of function 'csum_ipv6_magic' [-Werror=implicit-function-declaration]

By the time I looked into this it was fixed in -next by c1207c049b204b0
(netfilter: nft_reject_bridge: Fix powerpc build error) from Guenter but
that doesn't seem to have made it into -rc4 so I just wanted to check
that this fix was intended to go to Linus before v3.18?

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 473 bytes --]

^ permalink raw reply

* Re: [PATCH] can: Fix bug in suspend/resume
From: Lothar Waßmann @ 2014-11-13 11:32 UTC (permalink / raw)
  To: Appana Durga Kedareswara Rao
  Cc: Marc Kleine-Budde, wg@grandegger.com, Michal Simek,
	Soren Brinkmann, grant.likely@linaro.org, robh+dt@kernel.org,
	netdev@vger.kernel.org, devicetree@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org, linux-can@vger.kernel.org
In-Reply-To: <9455da2b86c74d5aafe9bf118edec7bb@BN1AFFO11FD045.protection.gbl>

Hi,

Appana Durga Kedareswara Rao wrote:
> Hi Marc,
> 
> -----Original Message-----
> From: Marc Kleine-Budde [mailto:mkl@pengutronix.de]
> Sent: Thursday, November 13, 2014 4:16 PM
> To: Appana Durga Kedareswara Rao; wg@grandegger.com; Michal Simek; Soren Brinkmann; grant.likely@linaro.org; robh+dt@kernel.org
> Cc: linux-can@vger.kernel.org; netdev@vger.kernel.org; linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; devicetree@vger.kernel.org; Appana Durga Kedareswara Rao
> Subject: Re: [PATCH] can: Fix bug in suspend/resume
> 
> On 11/13/2014 11:15 AM, Marc Kleine-Budde wrote:
> > On 11/13/2014 07:58 AM, Kedareswara rao Appana wrote:
> >> When accessing the priv structure use container_of instead of dev_get_drvdata.
> >
> > Why?
> 
> The drvdata here is the struct net_device, not the platform device.
> Please state this in the commit message.
> 
> If I understand the code correct, you can make use of the existing helper function to_platform_device():
> 
> http://lxr.free-electrons.com/source/include/linux/platform_device.h#L42
> 
> Thanks for the suggestion.
>  Will use this macro(to_platform_device)  .
> 
> >
> >> Enable the clocks in the suspend before accessing the registers of the CAN.
> >>
> >> Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
> >> ---
> >>  drivers/net/can/xilinx_can.c |   20 ++++++++++++++++++--
> >>  1 files changed, 18 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/net/can/xilinx_can.c
> >> b/drivers/net/can/xilinx_can.c index 5e8b560..63ef645 100644
> >> --- a/drivers/net/can/xilinx_can.c
> >> +++ b/drivers/net/can/xilinx_can.c
> >> @@ -972,15 +972,30 @@ static const struct net_device_ops xcan_netdev_ops = {
> >>   */
> >>  static int __maybe_unused xcan_suspend(struct device *dev)  {
> >> -    struct platform_device *pdev = dev_get_drvdata(dev);
> >> +    struct platform_device *pdev = container_of(dev,
> >> +                    struct platform_device, dev);
> >>      struct net_device *ndev = platform_get_drvdata(pdev);
> >>      struct xcan_priv *priv = netdev_priv(ndev);
>
Why not simply:
	struct net_device *ndev = dev_get_drvdata(dev);

There is no need for a struct platform_device* at all.


Lothar Waßmann
-- 
___________________________________________________________

Ka-Ro electronics GmbH | Pascalstraße 22 | D - 52076 Aachen
Phone: +49 2408 1402-0 | Fax: +49 2408 1402-10
Geschäftsführer: Matthias Kaussen
Handelsregistereintrag: Amtsgericht Aachen, HRB 4996

www.karo-electronics.de | info@karo-electronics.de
___________________________________________________________

^ permalink raw reply

* Re: [ethtool][PATCH] Fix build with musl by using more common typedefs
From: Paul Barker @ 2014-11-13 11:22 UTC (permalink / raw)
  To: Ben Hutchings, netdev; +Cc: Paul Barker, John Spencer
In-Reply-To: <1414323849-5739-2-git-send-email-paul@paulbarker.me.uk>

On 26 October 2014 11:44, Paul Barker <paul@paulbarker.me.uk> wrote:
> When using musl as the standard C library, type names such as '__int32_t' are
> not defined. Instead we must use the more commonly defined type names such as
> 'int32_t', which are defined in <stdint.h>.
>
> Signed-off-by: John Spencer <maillist-linux@barfooze.de>
> Signed-off-by: Paul Barker <paul@paulbarker.me.uk>
> ---
>  internal.h | 13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/internal.h b/internal.h
> index a9dfae0..262a39f 100644
> --- a/internal.h
> +++ b/internal.h
> @@ -7,6 +7,7 @@
>  #include "ethtool-config.h"
>  #endif
>  #include <stdio.h>
> +#include <stdint.h>
>  #include <stdlib.h>
>  #include <string.h>
>  #include <sys/types.h>
> @@ -17,16 +18,16 @@
>
>  /* ethtool.h expects these to be defined by <linux/types.h> */
>  #ifndef HAVE_BE_TYPES
> -typedef __uint16_t __be16;
> -typedef __uint32_t __be32;
> +typedef uint16_t __be16;
> +typedef uint32_t __be32;
>  typedef unsigned long long __be64;
>  #endif
>
>  typedef unsigned long long u64;
> -typedef __uint32_t u32;
> -typedef __uint16_t u16;
> -typedef __uint8_t u8;
> -typedef __int32_t s32;
> +typedef uint32_t u32;
> +typedef uint16_t u16;
> +typedef uint8_t u8;
> +typedef int32_t s32;
>
>  #include "ethtool-copy.h"
>  #include "net_tstamp-copy.h"
> --
> 2.1.2
>

Ping. Could you let me know if this patch is acceptable or not?

Cheers,

-- 
Paul Barker

Email: paul@paulbarker.me.uk
http://www.paulbarker.me.uk

^ permalink raw reply

* Re: [PATCH v4 4/8] net: can: c_can: Add syscon/regmap RAMINIT mechanism
From: Marc Kleine-Budde @ 2014-11-13 11:09 UTC (permalink / raw)
  To: Roger Quadros, wg
  Cc: wsa, tony, tglx, mugunthanvnm, george.cherian, balbi, nsekhar, nm,
	sergei.shtylyov, linux-omap, linux-can, netdev
In-Reply-To: <1415371762-29885-5-git-send-email-rogerq@ti.com>

[-- Attachment #1: Type: text/plain, Size: 940 bytes --]

On 11/07/2014 03:49 PM, Roger Quadros wrote:
> Some TI SoCs like DRA7 have a RAMINIT register specification
> different from the other AMxx SoCs and as expected by the
> existing driver.
> 
> To add more insanity, this register is shared with other
> IPs like DSS, PCIe and PWM.
> 
> Provides a more generic mechanism to specify the RAMINIT
> register location and START/DONE bit position and use the
> syscon/regmap framework to access the register.

What about the existing device trees that don't have the syscon-raminit
phandle? We can either keep the existing init routines or create regmap
in the platform driver an use the new ones.

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* RE: [PATCH] can: Fix bug in suspend/resume
From: Appana Durga Kedareswara Rao @ 2014-11-13 11:01 UTC (permalink / raw)
  To: Marc Kleine-Budde, wg@grandegger.com, Michal Simek,
	Soren Brinkmann, grant.likely@linaro.org, robh+dt@kernel.org
  Cc: linux-can@vger.kernel.org, netdev@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, devicetree@vger.kernel.org
In-Reply-To: <54648BF4.60007@pengutronix.de>

Hi Marc,

-----Original Message-----
From: Appana Durga Kedareswara Rao
Sent: Thursday, November 13, 2014 4:28 PM
To: 'Marc Kleine-Budde'; wg@grandegger.com; Michal Simek; Soren Brinkmann; grant.likely@linaro.org; robh+dt@kernel.org
Cc: linux-can@vger.kernel.org; netdev@vger.kernel.org; linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; devicetree@vger.kernel.org
Subject: RE: [PATCH] can: Fix bug in suspend/resume

Hi Marc,

-----Original Message-----
From: Marc Kleine-Budde [mailto:mkl@pengutronix.de]
Sent: Thursday, November 13, 2014 4:16 PM
To: Appana Durga Kedareswara Rao; wg@grandegger.com; Michal Simek; Soren Brinkmann; grant.likely@linaro.org; robh+dt@kernel.org
Cc: linux-can@vger.kernel.org; netdev@vger.kernel.org; linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; devicetree@vger.kernel.org; Appana Durga Kedareswara Rao
Subject: Re: [PATCH] can: Fix bug in suspend/resume

On 11/13/2014 11:15 AM, Marc Kleine-Budde wrote:
> On 11/13/2014 07:58 AM, Kedareswara rao Appana wrote:
>> When accessing the priv structure use container_of instead of dev_get_drvdata.
>
> Why?

The drvdata here is the struct net_device, not the platform device.
Please state this in the commit message.

If I understand the code correct, you can make use of the existing helper function to_platform_device():

http://lxr.free-electrons.com/source/include/linux/platform_device.h#L42

Thanks for the suggestion.
 Will use this macro(to_platform_device)  .

>
>> Enable the clocks in the suspend before accessing the registers of the CAN.
>>
>> Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
>> ---
>>  drivers/net/can/xilinx_can.c |   20 ++++++++++++++++++--
>>  1 files changed, 18 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/can/xilinx_can.c
>> b/drivers/net/can/xilinx_can.c index 5e8b560..63ef645 100644
>> --- a/drivers/net/can/xilinx_can.c
>> +++ b/drivers/net/can/xilinx_can.c
>> @@ -972,15 +972,30 @@ static const struct net_device_ops xcan_netdev_ops = {
>>   */
>>  static int __maybe_unused xcan_suspend(struct device *dev)  {
>> -    struct platform_device *pdev = dev_get_drvdata(dev);
>> +    struct platform_device *pdev = container_of(dev,
>> +                    struct platform_device, dev);
>>      struct net_device *ndev = platform_get_drvdata(pdev);
>>      struct xcan_priv *priv = netdev_priv(ndev);
>> +    int ret;
>>
>>      if (netif_running(ndev)) {
>>              netif_stop_queue(ndev);
>>              netif_device_detach(ndev);
>>      }
>>
>> +    ret = clk_prepare_enable(priv->can_clk);
>> +    if (ret) {
>> +            dev_err(dev, "unable to enable device clock\n");
>> +            return ret;
>> +    }
>> +
>> +    ret = clk_prepare_enable(priv->bus_clk);
>> +    if (ret) {
>> +            dev_err(dev, "unable to enable bus clock\n");
>> +            clk_disable_unprepare(priv->can_clk);
>> +            return ret;
>> +    }
>
> Now you have clock imbalance. Per suspend/resume cycle the clocks are
> enabled twice, but disabled only once.
>

The clocks are getting disabled and un prepared at the end of the probe.
In the resume the driver is doing register write.

Sorry spell mistake not in resume in the suspend I am doing a register write.

In order to do that register write I have to again enable and prepare the clocks.


Regards,
Kedar.


Regards,
Kedar.

>> +
>>      priv->write_reg(priv, XCAN_MSR_OFFSET, XCAN_MSR_SLEEP_MASK);
>>      priv->can.state = CAN_STATE_SLEEPING;
>>
>> @@ -999,7 +1014,8 @@ static int __maybe_unused xcan_suspend(struct device *dev)
>>   */
>>  static int __maybe_unused xcan_resume(struct device *dev)  {
>> -    struct platform_device *pdev = dev_get_drvdata(dev);
>> +    struct platform_device *pdev = container_of(dev,
>> +                    struct platform_device, dev);
>>      struct net_device *ndev = platform_get_drvdata(pdev);
>>      struct xcan_priv *priv = netdev_priv(ndev);
>>      int ret;

Marc

--
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |



This email and any attachments are intended for the sole use of the named recipient(s) and contain(s) confidential information that may be proprietary, privileged or copyrighted under applicable law. If you are not the intended recipient, do not read, copy, or forward this email message or any attachments. Delete this email message and any attachments immediately.


^ permalink raw reply

* Re: [PATCH v4 3/8] net: can: c_can: Add RAMINIT register information to driver data
From: Marc Kleine-Budde @ 2014-11-13 10:59 UTC (permalink / raw)
  To: Roger Quadros, wg
  Cc: wsa, tony, tglx, mugunthanvnm, george.cherian, balbi, nsekhar, nm,
	sergei.shtylyov, linux-omap, linux-can, netdev
In-Reply-To: <1415371762-29885-4-git-send-email-rogerq@ti.com>

[-- Attachment #1: Type: text/plain, Size: 1724 bytes --]

On 11/07/2014 03:49 PM, Roger Quadros wrote:
> Some platforms (e.g. TI) need special RAMINIT register handling.
> Provide a way to store RAMINIT register description in driver data.
> 
> Signed-off-by: Roger Quadros <rogerq@ti.com>
> ---
>  drivers/net/can/c_can/c_can.h          | 6 ++++++
>  drivers/net/can/c_can/c_can_platform.c | 1 +
>  2 files changed, 7 insertions(+)
> 
> diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h
> index 26c975d..3c305a1 100644
> --- a/drivers/net/can/c_can/c_can.h
> +++ b/drivers/net/can/c_can/c_can.h
> @@ -171,6 +171,12 @@ enum c_can_dev_id {
>  
>  struct c_can_driver_data {
>  	enum c_can_dev_id id;
> +
> +	/* RAMINIT register description. Optional. */
> +	u8 num_can;		/* Number of CAN instances on the SoC */
> +	u8 *raminit_start_bits;	/* Array of START bit positions */
> +	u8 *raminit_done_bits;	/* Array of DONE bit positions */
> +	bool raminit_pulse;	/* If set, sets and clears START bit (pulse) */
>  };
>  
>  /* c_can private data structure */
> diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c
> index 1546c2b..20deb67 100644
> --- a/drivers/net/can/c_can/c_can_platform.c
> +++ b/drivers/net/can/c_can/c_can_platform.c
> @@ -250,6 +250,7 @@ static int c_can_plat_probe(struct platform_device *pdev)
>  	}
>  
>  	priv = netdev_priv(dev);
> +

Dropped this hunk while applying.

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* RE: [PATCH] can: Fix bug in suspend/resume
From: Appana Durga Kedareswara Rao @ 2014-11-13 10:58 UTC (permalink / raw)
  To: Marc Kleine-Budde, wg-5Yr1BZd7O62+XT7JhA+gdA@public.gmane.org,
	Michal Simek, Soren Brinkmann,
	grant.likely-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org,
	robh+dt-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
  Cc: linux-can-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	devicetree-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <54648BF4.60007-bIcnvbaLZ9MEGnE8C9+IrQ@public.gmane.org>

Hi Marc,

-----Original Message-----
From: Marc Kleine-Budde [mailto:mkl@pengutronix.de]
Sent: Thursday, November 13, 2014 4:16 PM
To: Appana Durga Kedareswara Rao; wg@grandegger.com; Michal Simek; Soren Brinkmann; grant.likely@linaro.org; robh+dt@kernel.org
Cc: linux-can@vger.kernel.org; netdev@vger.kernel.org; linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; devicetree@vger.kernel.org; Appana Durga Kedareswara Rao
Subject: Re: [PATCH] can: Fix bug in suspend/resume

On 11/13/2014 11:15 AM, Marc Kleine-Budde wrote:
> On 11/13/2014 07:58 AM, Kedareswara rao Appana wrote:
>> When accessing the priv structure use container_of instead of dev_get_drvdata.
>
> Why?

The drvdata here is the struct net_device, not the platform device.
Please state this in the commit message.

If I understand the code correct, you can make use of the existing helper function to_platform_device():

http://lxr.free-electrons.com/source/include/linux/platform_device.h#L42

Thanks for the suggestion.
 Will use this macro(to_platform_device)  .

>
>> Enable the clocks in the suspend before accessing the registers of the CAN.
>>
>> Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
>> ---
>>  drivers/net/can/xilinx_can.c |   20 ++++++++++++++++++--
>>  1 files changed, 18 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/can/xilinx_can.c
>> b/drivers/net/can/xilinx_can.c index 5e8b560..63ef645 100644
>> --- a/drivers/net/can/xilinx_can.c
>> +++ b/drivers/net/can/xilinx_can.c
>> @@ -972,15 +972,30 @@ static const struct net_device_ops xcan_netdev_ops = {
>>   */
>>  static int __maybe_unused xcan_suspend(struct device *dev)  {
>> -    struct platform_device *pdev = dev_get_drvdata(dev);
>> +    struct platform_device *pdev = container_of(dev,
>> +                    struct platform_device, dev);
>>      struct net_device *ndev = platform_get_drvdata(pdev);
>>      struct xcan_priv *priv = netdev_priv(ndev);
>> +    int ret;
>>
>>      if (netif_running(ndev)) {
>>              netif_stop_queue(ndev);
>>              netif_device_detach(ndev);
>>      }
>>
>> +    ret = clk_prepare_enable(priv->can_clk);
>> +    if (ret) {
>> +            dev_err(dev, "unable to enable device clock\n");
>> +            return ret;
>> +    }
>> +
>> +    ret = clk_prepare_enable(priv->bus_clk);
>> +    if (ret) {
>> +            dev_err(dev, "unable to enable bus clock\n");
>> +            clk_disable_unprepare(priv->can_clk);
>> +            return ret;
>> +    }
>
> Now you have clock imbalance. Per suspend/resume cycle the clocks are
> enabled twice, but disabled only once.
>

The clocks are getting disabled and un prepared at the end of the probe.
In the resume the driver is doing register write.
In order to do that register write I have to again enable and prepare the clocks.

Regards,
Kedar.

>> +
>>      priv->write_reg(priv, XCAN_MSR_OFFSET, XCAN_MSR_SLEEP_MASK);
>>      priv->can.state = CAN_STATE_SLEEPING;
>>
>> @@ -999,7 +1014,8 @@ static int __maybe_unused xcan_suspend(struct device *dev)
>>   */
>>  static int __maybe_unused xcan_resume(struct device *dev)  {
>> -    struct platform_device *pdev = dev_get_drvdata(dev);
>> +    struct platform_device *pdev = container_of(dev,
>> +                    struct platform_device, dev);
>>      struct net_device *ndev = platform_get_drvdata(pdev);
>>      struct xcan_priv *priv = netdev_priv(ndev);
>>      int ret;

Marc

--
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |



This email and any attachments are intended for the sole use of the named recipient(s) and contain(s) confidential information that may be proprietary, privileged or copyrighted under applicable law. If you are not the intended recipient, do not read, copy, or forward this email message or any attachments. Delete this email message and any attachments immediately.


^ permalink raw reply

* Re: [PATCH v4 2/8] net: can: c_can: Introduce c_can_driver_data structure
From: Marc Kleine-Budde @ 2014-11-13 10:57 UTC (permalink / raw)
  To: Roger Quadros, wg
  Cc: wsa, tony, tglx, mugunthanvnm, george.cherian, balbi, nsekhar, nm,
	sergei.shtylyov, linux-omap, linux-can, netdev
In-Reply-To: <1415371762-29885-3-git-send-email-rogerq@ti.com>

[-- Attachment #1: Type: text/plain, Size: 1593 bytes --]

On 11/07/2014 03:49 PM, Roger Quadros wrote:
> We want to have more data than just can_dev_id to be present
> in the driver data e.g. TI platforms need RAMINIT register
> description. Introduce the c_can_driver_data structure and move
> the can_dev_id into it.
> 
> Tidy up the way it is used on probe().
> 
> Signed-off-by: Roger Quadros <rogerq@ti.com>

[...]

> @@ -198,21 +206,19 @@ static int c_can_plat_probe(struct platform_device *pdev)
>  	struct net_device *dev;
>  	struct c_can_priv *priv;
>  	const struct of_device_id *match;
> -	const struct platform_device_id *id;
>  	struct resource *mem, *res;
>  	int irq;
>  	struct clk *clk;
> -
> -	if (pdev->dev.of_node) {
> -		match = of_match_device(c_can_of_table, &pdev->dev);
> -		if (!match) {
> -			dev_err(&pdev->dev, "Failed to find matching dt id\n");
> -			ret = -EINVAL;
> -			goto exit;
> -		}
> -		id = match->data;
> +	const struct c_can_driver_data *drvdata;
> +
> +	match = of_match_device(c_can_of_table, &pdev->dev);
> +	if (match) {
> +		drvdata = match->data;
> +	} else if (pdev->id_entry->driver_data) {
> +		drvdata = (struct c_can_driver_data *)
> +			   pdev->id_entry->driver_data;
                           ^^^^^^^^^^^^^^
I've changes this to platform_get_device_id() while aplying.

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox