From: Bryce Lelbach <blelbach-eiRV2mVQLkiVc3sceRu5cw@public.gmane.org>
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: Patch: Support for Xeon Phi
Date: Fri, 24 May 2013 12:02:53 -0500 [thread overview]
Message-ID: <20130524170253.GC2591@pyxis.br.cox.net> (raw)
[-- Attachment #1.1: Type: text/plain, Size: 652 bytes --]
The attached patch modifies the kernel Infiniband drivers to support the Xeon Phi
co-processor.
This patch is a modified version of a patch from Intel's MPSS framework
(specifically, from the "KNC_gold_update_1-2.1.4982-15-rhel-6.3" package), which
will apply to a 3.7.8 kernel (I am about to try it on a 3.8 kernel). To the best
of my knowledge, newer RHEL kernels are shipped with this patch.
--
Bryce Adelstein-Lelbach aka wash
STE||AR Group, Center for Computation and Technology, LSU
--
225-317-3866 - iPhone
225-578-6182 - Work (no voicemail)
--
stellar.cct.lsu.edu
boost-spirit.com
llvm.linuxfoundation.org
cppnow.org
--
[-- Attachment #1.2: linux_3.7.8_ib_xeon_phi.patch --]
[-- Type: text/x-diff, Size: 10917 bytes --]
diff -u -r -N linux-source-3.7/drivers/infiniband/core/sysfs.c linux-source-3.7-xeon-phi/drivers/infiniband/core/sysfs.c
--- linux-source-3.7/drivers/infiniband/core/sysfs.c 2013-02-14 12:57:59.000000000 -0600
+++ linux-source-3.7-xeon-phi/drivers/infiniband/core/sysfs.c 2013-04-16 15:34:29.954382402 -0500
@@ -610,6 +610,7 @@
case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
+ case RDMA_NODE_MIC: return sprintf(buf, "%d: MIC\n", dev->node_type);
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
}
}
diff -u -r -N linux-source-3.7/drivers/infiniband/core/umem.c linux-source-3.7-xeon-phi/drivers/infiniband/core/umem.c
--- linux-source-3.7/drivers/infiniband/core/umem.c 2013-02-14 12:57:59.000000000 -0600
+++ linux-source-3.7-xeon-phi/drivers/infiniband/core/umem.c 2013-04-16 15:37:23.996479997 -0500
@@ -101,7 +101,6 @@
if (!umem)
return ERR_PTR(-ENOMEM);
- umem->context = context;
umem->length = size;
umem->offset = addr & ~PAGE_MASK;
umem->page_size = PAGE_SIZE;
@@ -216,7 +215,6 @@
return ret < 0 ? ERR_PTR(ret) : umem;
}
-EXPORT_SYMBOL(ib_umem_get);
static void ib_umem_account(struct work_struct *work)
{
@@ -230,10 +228,10 @@
}
/**
- * ib_umem_release - release memory pinned with ib_umem_get
+ * ib_release_umem - release memory pinned with ib_umem_get
* @umem: umem struct to release
*/
-void ib_umem_release(struct ib_umem *umem)
+void ib_release_umem(struct ib_umem *umem)
{
struct ib_ucontext *context = umem->context;
struct mm_struct *mm;
@@ -274,9 +272,8 @@
mmput(mm);
kfree(umem);
}
-EXPORT_SYMBOL(ib_umem_release);
-int ib_umem_page_count(struct ib_umem *umem)
+int ib_page_count_umem(struct ib_umem *umem)
{
struct ib_umem_chunk *chunk;
int shift;
@@ -292,4 +289,40 @@
return n;
}
+
+struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+ size_t size, int access, int dmasync)
+{
+ struct ib_umem_ops *ops = context->umem_ops;
+ struct ib_umem *umem;
+
+ umem = (ops && ops->get) ?
+ ops->get(context, addr, size, access, dmasync) :
+ ib_get_umem(context, addr, size, access, dmasync);
+
+ if (!IS_ERR(umem))
+ umem->context = context;
+
+ return umem;
+}
+EXPORT_SYMBOL(ib_umem_get);
+
+void ib_umem_release(struct ib_umem *umem)
+{
+ struct ib_umem_ops *ops = umem->context->umem_ops;
+
+ if (ops && ops->release)
+ ops->release(umem);
+ else
+ ib_release_umem(umem);
+}
+EXPORT_SYMBOL(ib_umem_release);
+
+int ib_umem_page_count(struct ib_umem *umem)
+{
+ struct ib_umem_ops *ops = umem->context->umem_ops;
+
+ return (ops && ops->page_count) ?
+ ops->page_count(umem) : ib_page_count_umem(umem);
+}
EXPORT_SYMBOL(ib_umem_page_count);
diff -u -r -N linux-source-3.7/drivers/infiniband/core/uverbs_cmd.c linux-source-3.7-xeon-phi/drivers/infiniband/core/uverbs_cmd.c
--- linux-source-3.7/drivers/infiniband/core/uverbs_cmd.c 2013-02-14 12:57:59.000000000 -0600
+++ linux-source-3.7-xeon-phi/drivers/infiniband/core/uverbs_cmd.c 2013-04-16 15:37:29.688406735 -0500
@@ -54,8 +54,24 @@
static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
+static int uverbs_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
+{
+ return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
+}
+
+static int uverbs_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
+{
+ return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
+}
+
+static struct ib_udata_ops uverbs_copy = {
+ .copy_from = uverbs_copy_from_udata,
+ .copy_to = uverbs_copy_to_udata
+};
+
#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
do { \
+ (udata)->ops = &uverbs_copy; \
(udata)->inbuf = (void __user *) (ibuf); \
(udata)->outbuf = (void __user *) (obuf); \
(udata)->inlen = (ilen); \
@@ -321,6 +337,7 @@
goto err;
}
+ ucontext->umem_ops = NULL;
ucontext->device = ibdev;
INIT_LIST_HEAD(&ucontext->pd_list);
INIT_LIST_HEAD(&ucontext->mr_list);
diff -u -r -N linux-source-3.7/drivers/infiniband/core/verbs.c linux-source-3.7-xeon-phi/drivers/infiniband/core/verbs.c
--- linux-source-3.7/drivers/infiniband/core/verbs.c 2013-02-14 12:57:59.000000000 -0600
+++ linux-source-3.7-xeon-phi/drivers/infiniband/core/verbs.c 2013-04-16 15:37:35.676341755 -0500
@@ -114,6 +114,8 @@
return RDMA_TRANSPORT_IB;
case RDMA_NODE_RNIC:
return RDMA_TRANSPORT_IWARP;
+ case RDMA_NODE_MIC:
+ return RDMA_TRANSPORT_SCIF;
default:
BUG();
return 0;
diff -u -r -N linux-source-3.7/drivers/infiniband/hw/mthca/mthca_memfree.c linux-source-3.7-xeon-phi/drivers/infiniband/hw/mthca/mthca_memfree.c
--- linux-source-3.7/drivers/infiniband/hw/mthca/mthca_memfree.c 2013-02-14 12:57:59.000000000 -0600
+++ linux-source-3.7-xeon-phi/drivers/infiniband/hw/mthca/mthca_memfree.c 2013-04-16 15:50:22.647903759 -0500
@@ -39,6 +39,12 @@
#include <asm/page.h>
+/* Must use the ib_umem routines to support the IB proxy server. */
+#define MTHCA_IB_UMEM
+#ifdef MTHCA_IB_UMEM
+#include <rdma/ib_umem.h>
+#endif
+
#include "mthca_memfree.h"
#include "mthca_dev.h"
#include "mthca_cmd.h"
@@ -56,7 +62,11 @@
struct mutex mutex;
struct {
u64 uvirt;
+#ifdef MTHCA_IB_UMEM
+ struct ib_umem *umem;
+#else
struct scatterlist mem;
+#endif
int refcount;
} page[0];
};
@@ -446,7 +456,12 @@
int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
struct mthca_user_db_table *db_tab, int index, u64 uaddr)
{
+#ifdef MTHCA_IB_UMEM
+ struct mthca_ucontext *context;
+ struct ib_umem_chunk *chunk;
+#else
struct page *pages[1];
+#endif
int ret = 0;
int i;
@@ -472,6 +487,22 @@
goto out;
}
+#ifdef MTHCA_IB_UMEM
+ context = container_of(uar, struct mthca_ucontext, uar);
+
+ db_tab->page[i].umem = ib_umem_get(&context->ibucontext,
+ uaddr & PAGE_MASK, PAGE_SIZE, 0, 0);
+ if (IS_ERR(db_tab->page[i].umem)) {
+ ret = PTR_ERR(db_tab->page[i].umem);
+ goto out;
+ }
+
+ chunk = list_entry(db_tab->page[i].umem->chunk_list.next,
+ struct ib_umem_chunk, list);
+
+ ret = mthca_MAP_ICM_page(dev, sg_dma_address(&chunk->page_list[0]),
+ mthca_uarc_virt(dev, uar, i));
+#else
ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
pages, NULL);
if (ret < 0)
@@ -488,9 +519,14 @@
ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
mthca_uarc_virt(dev, uar, i));
+#endif
if (ret) {
+#ifdef MTHCA_IB_UMEM
+ ib_umem_release(db_tab->page[i].umem);
+#else
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
put_page(sg_page(&db_tab->page[i].mem));
+#endif
goto out;
}
@@ -505,17 +541,29 @@
void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
struct mthca_user_db_table *db_tab, int index)
{
+#ifdef MTHCA_IB_UMEM
+ int i;
+#endif
if (!mthca_is_memfree(dev))
return;
+ mutex_lock(&db_tab->mutex);
+
+#ifdef MTHCA_IB_UMEM
+ i = index / MTHCA_DB_REC_PER_PAGE;
+ if (!--db_tab->page[i].refcount) {
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1);
+ ib_umem_release(db_tab->page[i].umem);
+ db_tab->page[i].uvirt = 0;
+ }
+#else
/*
* To make our bookkeeping simpler, we don't unmap DB
* pages until we clean up the whole db table.
*/
- mutex_lock(&db_tab->mutex);
-
--db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;
+#endif
mutex_unlock(&db_tab->mutex);
}
@@ -538,7 +586,11 @@
for (i = 0; i < npages; ++i) {
db_tab->page[i].refcount = 0;
db_tab->page[i].uvirt = 0;
+#ifdef MTHCA_IB_UMEM
+ db_tab->page[i].umem = NULL;
+#else
sg_init_table(&db_tab->page[i].mem, 1);
+#endif
}
return db_tab;
@@ -555,8 +607,12 @@
for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) {
if (db_tab->page[i].uvirt) {
mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1);
+#ifdef MTHCA_IB_UMEM
+ ib_umem_release(db_tab->page[i].umem);
+#else
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
put_page(sg_page(&db_tab->page[i].mem));
+#endif
}
}
diff -u -r -N linux-source-3.7/include/rdma/ib_verbs.h linux-source-3.7-xeon-phi/include/rdma/ib_verbs.h
--- linux-source-3.7/include/rdma/ib_verbs.h 2013-02-14 12:57:59.000000000 -0600
+++ linux-source-3.7-xeon-phi/include/rdma/ib_verbs.h 2013-04-16 15:32:44.519537838 -0500
@@ -67,12 +67,14 @@
RDMA_NODE_IB_CA = 1,
RDMA_NODE_IB_SWITCH,
RDMA_NODE_IB_ROUTER,
- RDMA_NODE_RNIC
+ RDMA_NODE_RNIC,
+ RDMA_NODE_MIC
};
enum rdma_transport_type {
RDMA_TRANSPORT_IB,
- RDMA_TRANSPORT_IWARP
+ RDMA_TRANSPORT_IWARP,
+ RDMA_TRANSPORT_SCIF
};
enum rdma_transport_type
@@ -82,6 +84,7 @@
IB_LINK_LAYER_UNSPECIFIED,
IB_LINK_LAYER_INFINIBAND,
IB_LINK_LAYER_ETHERNET,
+ IB_LINK_LAYER_SCIF
};
enum ib_device_cap_flags {
@@ -877,7 +880,18 @@
u8 page_shift;
};
+struct ib_ucontext;
+struct ib_umem_ops {
+ struct ib_umem *(*get)(struct ib_ucontext *context,
+ unsigned long addr, size_t size,
+ int access, int dmasync);
+ void (*release)(struct ib_umem *umem);
+ int (*page_count)(struct ib_umem *umem);
+};
+
struct ib_ucontext {
+ struct ib_umem_ops *umem_ops; /* set to NULL for default ops */
+ void *umem_private_data;
struct ib_device *device;
struct list_head pd_list;
struct list_head mr_list;
@@ -901,11 +915,20 @@
int live;
};
+struct ib_udata;
+struct ib_udata_ops {
+ int (*copy_from)(void *dest, struct ib_udata *udata,
+ size_t len);
+ int (*copy_to)(struct ib_udata *udata, void *src,
+ size_t len);
+};
+
struct ib_udata {
- void __user *inbuf;
- void __user *outbuf;
- size_t inlen;
- size_t outlen;
+ struct ib_udata_ops *ops;
+ void __user *inbuf;
+ void __user *outbuf;
+ size_t inlen;
+ size_t outlen;
};
struct ib_pd {
@@ -1281,12 +1304,12 @@
static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
{
- return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
+ return udata->ops->copy_from(dest, udata, len);
}
static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
{
- return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
+ return udata->ops->copy_to(udata, src, len);
}
/**
[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 490 bytes --]
next reply other threads:[~2013-05-24 17:02 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-05-24 17:02 Bryce Lelbach [this message]
[not found] ` <20130524170253.GC2591-3v4vfGR2gegKq2+8A78TPA@public.gmane.org>
2013-05-24 18:16 ` Patch: Support for Xeon Phi Bart Van Assche
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130524170253.GC2591@pyxis.br.cox.net \
--to=blelbach-eirv2mvqlkivc3sceru5cw@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.