netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 13/17] mlx4: Add blue flame support for kernel consumers
@ 2011-03-07 10:17 Yevgeny Petrilin
  2011-03-07 21:07 ` Roland Dreier
  0 siblings, 1 reply; 10+ messages in thread
From: Yevgeny Petrilin @ 2011-03-07 10:17 UTC (permalink / raw)
  To: davem; +Cc: netdev, yevgenyp, eli

From: Eli Cohen <eli@mellanox.co.il>

Using blue flame can improve latency by allowing the HW to more efficiently
access the WQE. This patch presents two functions that are used to allocate or
release HW resources for using blue flame; the caller need to supply a struct
mlx4_bf object when allocating resources. Consumers that make use of this API
should post doorbells to the UAR object pointed by the initialized struct
mlx4_bf;

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
---
 drivers/net/mlx4/main.c     |   31 ++++++++++++++
 drivers/net/mlx4/mlx4.h     |    3 +
 drivers/net/mlx4/pd.c       |   94 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mlx4/device.h |   13 ++++++
 include/linux/mlx4/qp.h     |    1 +
 5 files changed, 142 insertions(+), 0 deletions(-)

diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index c8e2761..34581d2 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -39,6 +39,7 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
+#include <linux/io-mapping.h>
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/doorbell.h>
@@ -721,8 +722,31 @@ static void mlx4_free_icms(struct mlx4_dev *dev)
 	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
 }
 
+static int map_bf_area(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	resource_size_t bf_start;
+	resource_size_t bf_len;
+	int err = 0;
+
+	bf_start = pci_resource_start(dev->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT);
+	bf_len = pci_resource_len(dev->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT);
+	priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
+	if (!priv->bf_mapping)
+		err = -ENOMEM;
+
+	return err;
+}
+
+static void unmap_bf_area(struct mlx4_dev *dev)
+{
+	if (mlx4_priv(dev)->bf_mapping)
+		io_mapping_free(mlx4_priv(dev)->bf_mapping);
+}
+
 static void mlx4_close_hca(struct mlx4_dev *dev)
 {
+	unmap_bf_area(dev);
 	mlx4_CLOSE_HCA(dev, 0);
 	mlx4_free_icms(dev);
 	mlx4_UNMAP_FA(dev);
@@ -775,6 +799,9 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 		goto err_stop_fw;
 	}
 
+	if (map_bf_area(dev))
+		mlx4_dbg(dev, "Kernel support for blue flame is not available for kernels < 2.6.28\n");
+
 	init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
 
 	err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
@@ -805,6 +832,7 @@ err_free_icm:
 	mlx4_free_icms(dev);
 
 err_stop_fw:
+	unmap_bf_area(dev);
 	mlx4_UNMAP_FA(dev);
 	mlx4_free_icm(dev, priv->fw.fw_icm, 0);
 
@@ -1196,6 +1224,9 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	pci_read_config_byte(pdev, PCI_REVISION_ID, &dev->rev_id);
 
+	INIT_LIST_HEAD(&priv->bf_list);
+	mutex_init(&priv->bf_mutex);
+
 	/*
 	 * Now reset the HCA before we touch the PCI capabilities or
 	 * attempt a firmware command, since a boot ROM may have left
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index a50923a..008bf95 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -353,6 +353,9 @@ struct mlx4_priv {
 	struct mutex		port_mutex;
 	struct mlx4_msix_ctl	msix_ctl;
 	struct mlx4_steer	*steer;
+	struct list_head	bf_list;
+	struct mutex		bf_mutex;
+	struct io_mapping	*bf_mapping;
 };
 
 static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
diff --git a/drivers/net/mlx4/pd.c b/drivers/net/mlx4/pd.c
index c4988d6..5210a0f 100644
--- a/drivers/net/mlx4/pd.c
+++ b/drivers/net/mlx4/pd.c
@@ -32,6 +32,7 @@
  */
 
 #include <linux/errno.h>
+#include <linux/io-mapping.h>
 
 #include <asm/page.h>
 
@@ -77,6 +78,7 @@ int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar)
 		return -ENOMEM;
 
 	uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
+	uar->map = NULL;
 
 	return 0;
 }
@@ -88,6 +90,98 @@ void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar)
 }
 EXPORT_SYMBOL_GPL(mlx4_uar_free);
 
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_uar *uar;
+	int err = 0;
+	int idx;
+
+	if (!priv->bf_mapping)
+		return -ENOMEM;
+
+	mutex_lock(&priv->bf_mutex);
+	if (!list_empty(&priv->bf_list))
+		uar = list_entry(priv->bf_list.next, struct mlx4_uar, bf_list);
+	else {
+		uar = kmalloc(sizeof *uar, GFP_KERNEL);
+		if (!uar) {
+			err = -ENOMEM;
+			goto out;
+		}
+		err = mlx4_uar_alloc(dev, uar);
+		if (err)
+			goto free_kmalloc;
+
+		uar->map = ioremap(uar->pfn << PAGE_SHIFT, PAGE_SIZE);
+		if (!uar->map) {
+			err = -ENOMEM;
+			goto free_uar;
+		}
+
+		uar->bf_map = io_mapping_map_wc(priv->bf_mapping, uar->index << PAGE_SHIFT);
+		if (!uar->bf_map) {
+			err = -ENOMEM;
+			goto unamp_uar;
+		}
+		uar->free_bf_bmap = 0;
+		list_add(&uar->bf_list, &priv->bf_list);
+	}
+
+	bf->uar = uar;
+	idx = ffz(uar->free_bf_bmap);
+	uar->free_bf_bmap |= 1 << idx;
+	bf->uar = uar;
+	bf->offset = 0;
+	bf->buf_size = dev->caps.bf_reg_size / 2;
+	bf->reg = uar->bf_map + idx * dev->caps.bf_reg_size;
+	if (uar->free_bf_bmap == (1 << dev->caps.bf_regs_per_page) - 1)
+		list_del_init(&uar->bf_list);
+
+	goto out;
+
+unamp_uar:
+	bf->uar = NULL;
+	iounmap(uar->map);
+
+free_uar:
+	mlx4_uar_free(dev, uar);
+
+free_kmalloc:
+	kfree(uar);
+
+out:
+	mutex_unlock(&priv->bf_mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_bf_alloc);
+
+void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int idx;
+
+	if (!bf->uar || !bf->uar->bf_map)
+		return;
+
+	mutex_lock(&priv->bf_mutex);
+	idx = (bf->reg - bf->uar->bf_map) / dev->caps.bf_reg_size;
+	bf->uar->free_bf_bmap &= ~(1 << idx);
+	if (!bf->uar->free_bf_bmap) {
+		if (!list_empty(&bf->uar->bf_list))
+			list_del(&bf->uar->bf_list);
+
+		io_mapping_unmap(bf->uar->bf_map);
+		iounmap(bf->uar->map);
+		mlx4_uar_free(dev, bf->uar);
+		kfree(bf->uar);
+	} else if (list_empty(&bf->uar->bf_list))
+		list_add(&bf->uar->bf_list, &priv->bf_list);
+
+	mutex_unlock(&priv->bf_mutex);
+}
+EXPORT_SYMBOL_GPL(mlx4_bf_free);
+
 int mlx4_init_uar_table(struct mlx4_dev *dev)
 {
 	if (dev->caps.num_uars <= 128) {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index f1cb31b..229cd65 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -351,6 +351,17 @@ struct mlx4_fmr {
 struct mlx4_uar {
 	unsigned long		pfn;
 	int			index;
+	struct list_head	bf_list;
+	unsigned		free_bf_bmap;
+	void __iomem	       *map;
+	void __iomem	       *bf_map;
+};
+
+struct mlx4_bf {
+	unsigned long		offset;
+	int			buf_size;
+	struct mlx4_uar	       *uar;
+	void __iomem	       *reg;
 };
 
 struct mlx4_cq {
@@ -478,6 +489,8 @@ void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn);
 
 int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar);
 void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar);
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf);
+void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf);
 
 int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
 		  struct mlx4_mtt *mtt);
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 0eeb2a1..9e9eb21 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -303,6 +303,7 @@ struct mlx4_wqe_data_seg {
 
 enum {
 	MLX4_INLINE_ALIGN	= 64,
+	MLX4_INLINE_SEG		= 1 << 31,
 };
 
 struct mlx4_wqe_inline_seg {
-- 
1.6.0.2





^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 13/17] mlx4: Add blue flame support for kernel consumers
  2011-03-07 10:17 Yevgeny Petrilin
@ 2011-03-07 21:07 ` Roland Dreier
  2011-03-07 21:27   ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Roland Dreier @ 2011-03-07 21:07 UTC (permalink / raw)
  To: Yevgeny Petrilin; +Cc: davem, netdev, eli

On Mon, Mar 7, 2011 at 2:17 AM, Yevgeny Petrilin
<yevgenyp@mellanox.co.il> wrote:
> +       if (map_bf_area(dev))
> +               mlx4_dbg(dev, "Kernel support for blue flame is not available for kernels < 2.6.28\n");

This seems like a really bad error message.  Can map_bf_area() actually fail?

 - R.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 13/17] mlx4: Add blue flame support for kernel consumers
  2011-03-07 21:07 ` Roland Dreier
@ 2011-03-07 21:27   ` David Miller
  0 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2011-03-07 21:27 UTC (permalink / raw)
  To: roland; +Cc: yevgenyp, netdev, eli

From: Roland Dreier <roland@purestorage.com>
Date: Mon, 7 Mar 2011 13:07:48 -0800

> On Mon, Mar 7, 2011 at 2:17 AM, Yevgeny Petrilin
> <yevgenyp@mellanox.co.il> wrote:
>> +       if (map_bf_area(dev))
>> +               mlx4_dbg(dev, "Kernel support for blue flame is not available for kernels < 2.6.28\n");
> 
> This seems like a really bad error message.  Can map_bf_area() actually fail?

Indeed, referencing specific old kernel versions in the upstream
driver makes no sense at all.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: FW: [PATCH 13/17] mlx4: Add blue flame support for kernel consumers
       [not found] <CFE9BFE80FFE4D4892AA5D31387E310F02F3ED@mtldag01.mtl.com>
@ 2011-03-07 21:36 ` Eli Cohen
  2011-03-07 21:40   ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Eli Cohen @ 2011-03-07 21:36 UTC (permalink / raw)
  To: roland, davem; +Cc: davem, netdev

> 
> On Mon, Mar 7, 2011 at 2:17 AM, Yevgeny Petrilin
> <yevgenyp@mellanox.co.il> wrote:
> > +       if (map_bf_area(dev))
> > +               mlx4_dbg(dev, "Kernel support for blue flame is not available for kernels < 2.6.28\n");
> 
> This seems like a really bad error message.  Can map_bf_area() actually fail?

I agree that this message is inappropriate here; it is originiated
from the OFED patches which support older kernels too.
As for the question if a message is justified here at all, I think the
answer is yes becuase of this:

+static int map_bf_area(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       resource_size_t bf_start;
+       resource_size_t bf_len;
+       int err = 0;
+
+       bf_start = pci_resource_start(dev->pdev, 2) +
(dev->caps.num_uars << PAGE_SHIFT);
+       bf_len = pci_resource_len(dev->pdev, 2) - (dev->caps.num_uars
<< PAGE_SHIFT);
+       priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
+       if (!priv->bf_mapping)
+               err = -ENOMEM;

Specifically, some archs may not support write combining.
 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 13/17] mlx4: Add blue flame support for kernel consumers
  2011-03-07 21:36 ` FW: [PATCH 13/17] mlx4: Add blue flame support for kernel consumers Eli Cohen
@ 2011-03-07 21:40   ` David Miller
  2011-03-07 21:48     ` Eli Cohen
  0 siblings, 1 reply; 10+ messages in thread
From: David Miller @ 2011-03-07 21:40 UTC (permalink / raw)
  To: eli; +Cc: roland, netdev

From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Mon, 7 Mar 2011 23:36:48 +0200

> Specifically, some archs may not support write combining.

They should just create a non-write-combining mapping if they
don't support it.

It could still fail due to resource constraints, but not because
of the reason you're stating.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 13/17] mlx4: Add blue flame support for kernel consumers
  2011-03-07 21:40   ` David Miller
@ 2011-03-07 21:48     ` Eli Cohen
  2011-03-07 21:49       ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Eli Cohen @ 2011-03-07 21:48 UTC (permalink / raw)
  To: David Miller; +Cc: roland, netdev

On Mon, Mar 07, 2011 at 01:40:01PM -0800, David Miller wrote:
> From: Eli Cohen <eli@dev.mellanox.co.il>
> Date: Mon, 7 Mar 2011 23:36:48 +0200
> 
> > Specifically, some archs may not support write combining.
> 
> They should just create a non-write-combining mapping if they
> don't support it.
> 

I wouldn't expect that since the caller function could be mislead to
believe it has a write combining capable area.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 13/17] mlx4: Add blue flame support for kernel consumers
  2011-03-07 21:48     ` Eli Cohen
@ 2011-03-07 21:49       ` David Miller
  2011-03-07 21:50         ` David Miller
  2011-03-07 21:58         ` Eli Cohen
  0 siblings, 2 replies; 10+ messages in thread
From: David Miller @ 2011-03-07 21:49 UTC (permalink / raw)
  To: eli; +Cc: roland, netdev

From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Mon, 7 Mar 2011 23:48:12 +0200

> On Mon, Mar 07, 2011 at 01:40:01PM -0800, David Miller wrote:
>> From: Eli Cohen <eli@dev.mellanox.co.il>
>> Date: Mon, 7 Mar 2011 23:36:48 +0200
>> 
>> > Specifically, some archs may not support write combining.
>> 
>> They should just create a non-write-combining mapping if they
>> don't support it.
>> 
> 
> I wouldn't expect that since the caller function could be mislead to
> believe it has a write combining capable area.

It's a performance optimization, if you don't get write combining you'll
get more strict ordering, rather than less.

It cannot cause problem.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 13/17] mlx4: Add blue flame support for kernel consumers
  2011-03-07 21:49       ` David Miller
@ 2011-03-07 21:50         ` David Miller
  2011-03-07 21:58         ` Eli Cohen
  1 sibling, 0 replies; 10+ messages in thread
From: David Miller @ 2011-03-07 21:50 UTC (permalink / raw)
  To: eli; +Cc: roland, netdev

From: David Miller <davem@davemloft.net>
Date: Mon, 07 Mar 2011 13:49:31 -0800 (PST)

> From: Eli Cohen <eli@dev.mellanox.co.il>
> Date: Mon, 7 Mar 2011 23:48:12 +0200
> 
>> On Mon, Mar 07, 2011 at 01:40:01PM -0800, David Miller wrote:
>>> From: Eli Cohen <eli@dev.mellanox.co.il>
>>> Date: Mon, 7 Mar 2011 23:36:48 +0200
>>> 
>>> > Specifically, some archs may not support write combining.
>>> 
>>> They should just create a non-write-combining mapping if they
>>> don't support it.
>>> 
>> 
>> I wouldn't expect that since the caller function could be mislead to
>> believe it has a write combining capable area.
> 
> It's a performance optimization, if you don't get write combining you'll
> get more strict ordering, rather than less.
> 
> It cannot cause problem.

BTW, if we did as you suggest, fail if we don't support write combining,
then half the drivers in the tree would fail to probe on sparc64.

Every other driver expects it to succeed, with either write-combining
or more strict ordering semantics.  Never to fail simply because
write-combining isn't supported.

It's a request, not a requirement.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 13/17] mlx4: Add blue flame support for kernel consumers
  2011-03-07 21:49       ` David Miller
  2011-03-07 21:50         ` David Miller
@ 2011-03-07 21:58         ` Eli Cohen
  2011-03-07 22:09           ` David Miller
  1 sibling, 1 reply; 10+ messages in thread
From: Eli Cohen @ 2011-03-07 21:58 UTC (permalink / raw)
  To: David Miller; +Cc: roland, netdev

On Mon, Mar 07, 2011 at 01:49:31PM -0800, David Miller wrote:
> 
> It's a performance optimization, if you don't get write combining you'll
> get more strict ordering, rather than less.
> 
> It cannot cause problem.

I agree, but the function could still fail and the caller's logic
could attempt to call ioreamp or take other action. For example, in
the case of blue flame, it is better performance-wise to avoid using
this feature if write combining is not available.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 13/17] mlx4: Add blue flame support for kernel consumers
  2011-03-07 21:58         ` Eli Cohen
@ 2011-03-07 22:09           ` David Miller
  0 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2011-03-07 22:09 UTC (permalink / raw)
  To: eli; +Cc: roland, netdev

From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Mon, 7 Mar 2011 23:58:03 +0200

> On Mon, Mar 07, 2011 at 01:49:31PM -0800, David Miller wrote:
>> 
>> It's a performance optimization, if you don't get write combining you'll
>> get more strict ordering, rather than less.
>> 
>> It cannot cause problem.
> 
> I agree, but the function could still fail and the caller's logic
> could attempt to call ioreamp or take other action. For example, in
> the case of blue flame, it is better performance-wise to avoid using
> this feature if write combining is not available.

It could, but the less complicated the interfaces the better.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-03-07 22:08 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <CFE9BFE80FFE4D4892AA5D31387E310F02F3ED@mtldag01.mtl.com>
2011-03-07 21:36 ` FW: [PATCH 13/17] mlx4: Add blue flame support for kernel consumers Eli Cohen
2011-03-07 21:40   ` David Miller
2011-03-07 21:48     ` Eli Cohen
2011-03-07 21:49       ` David Miller
2011-03-07 21:50         ` David Miller
2011-03-07 21:58         ` Eli Cohen
2011-03-07 22:09           ` David Miller
2011-03-07 10:17 Yevgeny Petrilin
2011-03-07 21:07 ` Roland Dreier
2011-03-07 21:27   ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).