* [PATCH 03/16] ehca: userspace support
From: Heiko J Schick @ 2006-05-15 17:41 UTC (permalink / raw)
To: openib-general, Christoph Raisch, Hoang-Nam Nguyen, Marcus Eder,
schihei, linux-kernel, linuxppc-dev
Signed-off-by: Heiko J Schick <schickhj@de.ibm.com>
drivers/infiniband/hw/ehca/ehca_uverbs.c | 391 +++++++++++++++++++++++++++++++
1 file changed, 391 insertions(+)
--- linux-2.6.17-rc2-orig/drivers/infiniband/hw/ehca/ehca_uverbs.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.17-rc2/drivers/infiniband/hw/ehca/ehca_uverbs.c 2006-05-12 12:31:52.000000000 +0200
@@ -0,0 +1,391 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * userspace support verbs
+ *
+ * Authors: Christoph Raisch <raisch@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Heiko J Schick <schickhj@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#undef DEB_PREFIX
+#define DEB_PREFIX "uver"
+
+#include <asm/current.h>
+
+#include "ehca_classes.h"
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
+ struct ib_udata *udata)
+{
+ struct ehca_ucontext *my_context = NULL;
+
+ EHCA_CHECK_ADR_P(device);
+ EDEB_EN(7, "device=%p name=%s", device, device->name);
+
+ my_context = kzalloc(sizeof *my_context, GFP_KERNEL);
+ if (!my_context) {
+ EDEB_ERR(4, "Out of memory device=%p", device);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ EDEB_EX(7, "device=%p ucontext=%p", device, my_context);
+
+ return &my_context->ib_ucontext;
+}
+
+int ehca_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct ehca_ucontext *my_context = NULL;
+ EHCA_CHECK_ADR(context);
+ EDEB_EN(7, "ucontext=%p", context);
+ my_context = container_of(context, struct ehca_ucontext, ib_ucontext);
+ kfree(my_context);
+ EDEB_EN(7, "ucontext=%p", context);
+ return 0;
+}
+
+struct page *ehca_nopage(struct vm_area_struct *vma,
+ unsigned long address, int *type)
+{
+ struct page *mypage = NULL;
+ u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
+ u32 idr_handle = fileoffset >> 32;
+ u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */
+ u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
+ u32 cur_pid = current->tgid;
+ unsigned long flags;
+
+ EDEB_EN(7, "vm_start=%lx vm_end=%lx vm_page_prot=%lx vm_fileoff=%lx "
+ "address=%lx",
+ vma->vm_start, vma->vm_end, vma->vm_page_prot, fileoffset,
+ address);
+
+ if (q_type == 1) { /* CQ */
+ struct ehca_cq *cq = NULL;
+ u64 offset;
+ void *vaddr = NULL;
+
+ spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ cq = idr_find(&ehca_cq_idr, idr_handle);
+ spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+ if (cq->ownpid != cur_pid) {
+ EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x",
+ cur_pid, cq->ownpid);
+ return NOPAGE_SIGBUS;
+ }
+
+ /* make sure this mmap really belongs to the authorized user */
+ if (!cq) {
+ EDEB_ERR(4, "cq is NULL ret=NOPAGE_SIGBUS");
+ return NOPAGE_SIGBUS;
+ }
+ if (rsrc_type == 2) {
+ EDEB(6, "cq=%p cq queuearea", cq);
+ offset = address - vma->vm_start;
+ vaddr = ipz_qeit_calc(&cq->ipz_queue, offset);
+ EDEB(6, "offset=%lx vaddr=%p", offset, vaddr);
+ mypage = virt_to_page(vaddr);
+ }
+ } else if (q_type == 2) { /* QP */
+ struct ehca_qp *qp = NULL;
+ struct ehca_pd *pd = NULL;
+ u64 offset;
+ void *vaddr = NULL;
+
+ spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+ qp = idr_find(&ehca_qp_idr, idr_handle);
+ spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+
+ pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
+ if (pd->ownpid != cur_pid) {
+ EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x",
+ cur_pid, pd->ownpid);
+ return NOPAGE_SIGBUS;
+ }
+
+ /* make sure this mmap really belongs to the authorized user */
+ if (!qp) {
+ EDEB_ERR(4, "qp is NULL ret=NOPAGE_SIGBUS");
+ return NOPAGE_SIGBUS;
+ }
+ if (rsrc_type == 2) { /* rqueue */
+ EDEB(6, "qp=%p qp rqueuearea", qp);
+ offset = address - vma->vm_start;
+ vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset);
+ EDEB(6, "offset=%lx vaddr=%p", offset, vaddr);
+ mypage = virt_to_page(vaddr);
+ } else if (rsrc_type == 3) { /* squeue */
+ EDEB(6, "qp=%p qp squeuearea", qp);
+ offset = address - vma->vm_start;
+ vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset);
+ EDEB(6, "offset=%lx vaddr=%p", offset, vaddr);
+ mypage = virt_to_page(vaddr);
+ }
+ }
+
+ if (!mypage) {
+ EDEB_ERR(4, "Invalid page adr==NULL ret=NOPAGE_SIGBUS");
+ return NOPAGE_SIGBUS;
+ }
+ get_page(mypage);
+ EDEB_EX(7, "page adr=%p", mypage);
+ return mypage;
+}
+
+static struct vm_operations_struct ehcau_vm_ops = {
+ .nopage = ehca_nopage,
+};
+
+int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
+ u32 idr_handle = fileoffset >> 32;
+ u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */
+ u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
+ u32 ret = -EFAULT; /* assume the worst */
+ u64 vsize = 0; /* must be calculated/set below */
+ u64 physical = 0; /* must be calculated/set below */
+ u32 cur_pid = current->tgid;
+ unsigned long flags;
+
+ EDEB_EN(7, "vm_start=%lx vm_end=%lx vm_page_prot=%lx vm_fileoff=%lx",
+ vma->vm_start, vma->vm_end, vma->vm_page_prot, fileoffset);
+
+ if (q_type == 1) { /* CQ */
+ struct ehca_cq *cq;
+
+ spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ cq = idr_find(&ehca_cq_idr, idr_handle);
+ spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+ if (cq->ownpid != cur_pid) {
+ EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x",
+ cur_pid, cq->ownpid);
+ return -ENOMEM;
+ }
+
+ /* make sure this mmap really belongs to the authorized user */
+ if (!cq)
+ return -EINVAL;
+ if (!cq->ib_cq.uobject)
+ return -EINVAL;
+ if (cq->ib_cq.uobject->context != context)
+ return -EINVAL;
+ if (rsrc_type == 1) { /* galpa fw handle */
+ EDEB(6, "cq=%p cq triggerarea", cq);
+ vma->vm_flags |= VM_RESERVED;
+ vsize = vma->vm_end - vma->vm_start;
+ if (vsize != EHCA_PAGESIZE) {
+ EDEB_ERR(4, "invalid vsize=%lx",
+ vma->vm_end - vma->vm_start);
+ ret = -EINVAL;
+ goto mmap_exit0;
+ }
+
+ physical = cq->galpas.user.fw_handle;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_flags |= VM_IO | VM_RESERVED;
+
+ EDEB(6, "vsize=%lx physical=%lx", vsize, physical);
+ ret = remap_pfn_range(vma, vma->vm_start,
+ physical >> PAGE_SHIFT, vsize,
+ vma->vm_page_prot);
+ if (ret) {
+ EDEB_ERR(4, "remap_pfn_range() failed ret=%x",
+ ret);
+ ret = -ENOMEM;
+ }
+ goto mmap_exit0;
+ } else if (rsrc_type == 2) { /* cq queue_addr */
+ EDEB(6, "cq=%p cq q_addr", cq);
+ /* vma->vm_page_prot =
+ * pgprot_noncached(vma->vm_page_prot); */
+ vma->vm_flags |= VM_RESERVED;
+ vma->vm_ops = &ehcau_vm_ops;
+ ret = 0;
+ goto mmap_exit0;
+ } else {
+ EDEB_ERR(6, "bad resource type %x", rsrc_type);
+ ret = -EINVAL;
+ goto mmap_exit0;
+ }
+ } else if (q_type == 2) { /* QP */
+ struct ehca_qp *qp = NULL;
+ struct ehca_pd *pd = NULL;
+
+ spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+ qp = idr_find(&ehca_qp_idr, idr_handle);
+ spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+ pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
+ if (pd->ownpid != cur_pid) {
+ EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x",
+ cur_pid, pd->ownpid);
+ return -ENOMEM;
+ }
+
+ /* make sure this mmap really belongs to the authorized user */
+ if (!qp || !qp->ib_qp.uobject ||
+ qp->ib_qp.uobject->context != context) {
+ EDEB(6, "qp=%p, uobject=%p, context=%p",
+ qp, qp->ib_qp.uobject, qp->ib_qp.uobject->context);
+ ret = -EINVAL;
+ goto mmap_exit0;
+ }
+ if (rsrc_type == 1) { /* galpa fw handle */
+ EDEB(6, "qp=%p qp triggerarea", qp);
+ vma->vm_flags |= VM_RESERVED;
+ vsize = vma->vm_end - vma->vm_start;
+ if (vsize != EHCA_PAGESIZE) {
+ EDEB_ERR(4, "invalid vsize=%lx",
+ vma->vm_end - vma->vm_start);
+ ret = -EINVAL;
+ goto mmap_exit0;
+ }
+
+ physical = qp->galpas.user.fw_handle;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_flags |= VM_IO | VM_RESERVED;
+
+ EDEB(6, "vsize=%lx physical=%lx", vsize, physical);
+ ret = remap_pfn_range(vma, vma->vm_start,
+ physical >> PAGE_SHIFT, vsize,
+ vma->vm_page_prot);
+ if (ret) {
+ EDEB_ERR(4, "remap_pfn_range() failed ret=%x",
+ ret);
+ ret = -ENOMEM;
+ }
+ goto mmap_exit0;
+ } else if (rsrc_type == 2) { /* qp rqueue_addr */
+ EDEB(6, "qp=%p qp rqueue_addr", qp);
+ vma->vm_flags |= VM_RESERVED;
+ vma->vm_ops = &ehcau_vm_ops;
+ ret = 0;
+ goto mmap_exit0;
+ } else if (rsrc_type == 3) { /* qp squeue_addr */
+ EDEB(6, "qp=%p qp squeue_addr", qp);
+ vma->vm_flags |= VM_RESERVED;
+ vma->vm_ops = &ehcau_vm_ops;
+ ret = 0;
+ goto mmap_exit0;
+ } else {
+ EDEB_ERR(4, "bad resource type %x", rsrc_type);
+ ret = -EINVAL;
+ goto mmap_exit0;
+ }
+ } else {
+ EDEB_ERR(4, "bad queue type %x", q_type);
+ ret = -EINVAL;
+ goto mmap_exit0;
+ }
+
+mmap_exit0:
+ EDEB_EX(7, "ret=%x", ret);
+ return ret;
+}
+
+int ehca_mmap_nopage(u64 foffset, u64 length, void ** mapped,
+ struct vm_area_struct ** vma)
+{
+ EDEB_EN(7, "foffset=%lx length=%lx", foffset, length);
+ down_write(¤t->mm->mmap_sem);
+ *mapped = (void*)
+ do_mmap(NULL,0,
+ length,
+ PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
+ foffset);
+ up_write(¤t->mm->mmap_sem);
+ if (*mapped) {
+ *vma = find_vma(current->mm,(u64)*mapped);
+ if (*vma) {
+ (*vma)->vm_flags |= VM_RESERVED;
+ (*vma)->vm_ops = &ehcau_vm_ops;
+ } else
+ EDEB_ERR(4, "couldn't find queue vma queue=%p", *mapped);
+ } else
+ EDEB_ERR(4, "couldn't create mmap length=%lx", length);
+ EDEB_EX(7, "mapped=%p", *mapped);
+ return 0;
+}
+
+int ehca_mmap_register(u64 physical, void ** mapped,
+ struct vm_area_struct ** vma)
+{
+ int ret = 0;
+ unsigned long vsize;
+ /* ehca hw supports only 4k page */
+ ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma);
+ (*vma)->vm_flags |= VM_RESERVED;
+ vsize = (*vma)->vm_end - (*vma)->vm_start;
+ if (vsize != EHCA_PAGESIZE) {
+ EDEB_ERR(4, "invalid vsize=%lx",
+ (*vma)->vm_end - (*vma)->vm_start);
+ ret = -EINVAL;
+ return ret;
+ }
+
+ (*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot);
+ (*vma)->vm_flags |= VM_IO | VM_RESERVED;
+
+ EDEB(6, "vsize=%lx physical=%lx", vsize, physical);
+ ret = remap_pfn_range((*vma), (*vma)->vm_start,
+ physical >> PAGE_SHIFT, vsize,
+ (*vma)->vm_page_prot);
+ if (ret) {
+ EDEB_ERR(4, "remap_pfn_range() failed ret=%x", ret);
+ ret = -ENOMEM;
+ }
+ return ret;
+
+}
+
+int ehca_munmap(unsigned long addr, size_t len) {
+ int ret = 0;
+ struct mm_struct *mm = current->mm;
+ if (mm) {
+ down_write(&mm->mmap_sem);
+ ret = do_munmap(mm, addr, len);
+ up_write(&mm->mmap_sem);
+ }
+ return ret;
+}
^ permalink raw reply
* [PATCH 02/16] ehca: structure definitions
From: Heiko J Schick @ 2006-05-15 17:41 UTC (permalink / raw)
To: openib-general, Christoph Raisch, Hoang-Nam Nguyen, Marcus Eder,
schihei, linux-kernel, linuxppc-dev
Signed-off-by: Heiko J Schick <schickhj@de.ibm.com>
drivers/infiniband/hw/ehca/ehca_classes.h | 350 ++++++++++++++++++++++
drivers/infiniband/hw/ehca/ehca_classes_pSeries.h | 251 +++++++++++++++
2 files changed, 601 insertions(+)
--- linux-2.6.17-rc2-orig/drivers/infiniband/hw/ehca/ehca_classes.h 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.17-rc2/drivers/infiniband/hw/ehca/ehca_classes.h 2006-05-12 12:48:21.000000000 +0200
@@ -0,0 +1,350 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * Struct definition for eHCA internal structures
+ *
+ * Authors: Heiko J Schick <schickhj@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_CLASSES_H__
+#define __EHCA_CLASSES_H__
+
+#include "ehca_classes.h"
+#include "ipz_pt_fn.h"
+
+struct ehca_module;
+struct ehca_qp;
+struct ehca_cq;
+struct ehca_eq;
+struct ehca_mr;
+struct ehca_mw;
+struct ehca_pd;
+struct ehca_av;
+
+#ifdef CONFIG_PPC64
+#include "ehca_classes_pSeries.h"
+#endif
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "ehca_irq.h"
+
+struct ehca_module {
+ struct list_head shca_list;
+ spinlock_t shca_lock;
+ struct timer_list timer;
+ kmem_cache_t *cache_pd;
+ kmem_cache_t *cache_cq;
+ kmem_cache_t *cache_qp;
+ kmem_cache_t *cache_av;
+ kmem_cache_t *cache_mr;
+ kmem_cache_t *cache_mw;
+ struct ehca_pfmodule pf;
+};
+
+struct ehca_eq {
+ u32 length;
+ struct ipz_queue ipz_queue;
+ struct ipz_eq_handle ipz_eq_handle;
+ struct work_struct work;
+ struct h_galpas galpas;
+ int is_initialized;
+ struct ehca_pfeq pf;
+ spinlock_t spinlock;
+ struct tasklet_struct interrupt_task;
+ u32 ist;
+};
+
+struct ehca_sport {
+ struct ib_cq *ibcq_aqp1;
+ struct ib_qp *ibqp_aqp1;
+ enum ib_rate rate;
+ enum ib_port_state port_state;
+};
+
+struct ehca_shca {
+ struct ib_device ib_device;
+ struct ibmebus_dev *ibmebus_dev;
+ u8 num_ports;
+ int hw_level;
+ struct list_head shca_list;
+ struct ipz_adapter_handle ipz_hca_handle;
+ struct ehca_sport sport[2];
+ struct ehca_eq eq;
+ struct ehca_eq neq;
+ struct ehca_mr *maxmr;
+ struct ehca_pd *pd;
+ struct ehca_pfshca pf;
+ struct h_galpas galpas;
+};
+
+struct ehca_pd {
+ struct ib_pd ib_pd;
+ struct ipz_pd fw_pd;
+ struct ehca_pfpd pf;
+ u32 ownpid;
+};
+
+struct ehca_qp {
+ struct ib_qp ib_qp;
+ u32 qp_type;
+ struct ipz_queue ipz_squeue;
+ struct ipz_queue ipz_rqueue;
+ struct h_galpas galpas;
+ u32 qkey;
+ u32 real_qp_num;
+ u32 token;
+ spinlock_t spinlock_s;
+ spinlock_t spinlock_r;
+ u32 sq_max_inline_data_size;
+ struct ipz_qp_handle ipz_qp_handle;
+ struct ehca_pfqp pf;
+ struct ib_qp_init_attr init_attr;
+ u64 uspace_squeue;
+ u64 uspace_rqueue;
+ u64 uspace_fwh;
+ struct ehca_cq *send_cq;
+ struct ehca_cq *recv_cq;
+ unsigned int sqerr_purgeflag;
+ struct hlist_node list_entries;
+};
+
+/* must be power of 2 */
+#define QP_HASHTAB_LEN 8
+
+struct ehca_cq {
+ struct ib_cq ib_cq;
+ struct ipz_queue ipz_queue;
+ struct h_galpas galpas;
+ spinlock_t spinlock;
+ u32 cq_number;
+ u32 token;
+ u32 nr_of_entries;
+ struct ipz_cq_handle ipz_cq_handle;
+ struct ehca_pfcq pf;
+ spinlock_t cb_lock;
+ u64 uspace_queue;
+ u64 uspace_fwh;
+ struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
+ struct list_head entry;
+ u32 nr_callbacks;
+ spinlock_t task_lock;
+ u32 ownpid;
+};
+
+enum ehca_mr_flag {
+ EHCA_MR_FLAG_FMR = 0x80000000, /* FMR, created with ehca_alloc_fmr */
+ EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR */
+};
+
+struct ehca_mr {
+ union {
+ struct ib_mr ib_mr; /* must always be first in ehca_mr */
+ struct ib_fmr ib_fmr; /* must always be first in ehca_mr */
+ } ib;
+ spinlock_t mrlock;
+
+ enum ehca_mr_flag flags;
+ u32 num_pages; /* number of MR pages */
+ u32 num_4k; /* number of 4k "page" portions to form MR */
+ int acl; /* ACL (stored here for usage in reregister) */
+ u64 *start; /* virtual start address (stored here for */
+ /* usage in reregister) */
+ u64 size; /* size (stored here for usage in reregister) */
+ u32 fmr_page_size; /* page size for FMR */
+ u32 fmr_max_pages; /* max pages for FMR */
+ u32 fmr_max_maps; /* max outstanding maps for FMR */
+ u32 fmr_map_cnt; /* map counter for FMR */
+ /* fw specific data */
+ struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */
+ struct h_galpas galpas;
+ /* data for userspace bridge */
+ u32 nr_of_pages;
+ void *pagearray;
+
+ struct ehca_pfmr pf; /* platform specific part of MR */
+};
+
+struct ehca_mw {
+ struct ib_mw ib_mw; /* gen2 mw, must always be first in ehca_mw */
+ spinlock_t mwlock;
+
+ u8 never_bound; /* indication MW was never bound */
+ struct ipz_mrmw_handle ipz_mw_handle; /* MW handle for h-calls */
+ struct h_galpas galpas;
+
+ struct ehca_pfmw pf; /* platform specific part of MW */
+};
+
+enum ehca_mr_pgi_type {
+ EHCA_MR_PGI_PHYS = 1, /* type of ehca_reg_phys_mr,
+ * ehca_rereg_phys_mr,
+ * ehca_reg_internal_maxmr */
+ EHCA_MR_PGI_USER = 2, /* type of ehca_reg_user_mr */
+ EHCA_MR_PGI_FMR = 3 /* type of ehca_map_phys_fmr */
+};
+
+struct ehca_mr_pginfo {
+ enum ehca_mr_pgi_type type;
+ u64 num_pages;
+ u64 page_cnt;
+ u64 num_4k; /* number of 4k "page" portions */
+ u64 page_4k_cnt; /* counter for 4k "page" portions */
+ u64 next_4k; /* next 4k "page" portion in buffer/chunk/listelem */
+
+ /* type EHCA_MR_PGI_PHYS section */
+ int num_phys_buf;
+ struct ib_phys_buf *phys_buf_array;
+ u64 next_buf;
+
+ /* type EHCA_MR_PGI_USER section */
+ struct ib_umem *region;
+ struct ib_umem_chunk *next_chunk;
+ u64 next_nmap;
+
+ /* type EHCA_MR_PGI_FMR section */
+ u64 *page_list;
+ u64 next_listelem;
+ /* next_4k also used within EHCA_MR_PGI_FMR */
+};
+
+/* output parameters for MR/FMR hipz calls */
+struct ehca_mr_hipzout_parms {
+ struct ipz_mrmw_handle handle;
+ u32 lkey;
+ u32 rkey;
+ u64 len;
+ u64 vaddr;
+ u32 acl;
+};
+
+/* output parameters for MW hipz calls */
+struct ehca_mw_hipzout_parms {
+ struct ipz_mrmw_handle handle;
+ u32 rkey;
+};
+
+struct ehca_av {
+ struct ib_ah ib_ah;
+ struct ehca_ud_av av;
+};
+
+struct ehca_ucontext {
+ struct ib_ucontext ib_ucontext;
+};
+
+struct ehca_module *ehca_module_new(void);
+
+int ehca_module_delete(struct ehca_module *me);
+
+int ehca_eq_ctor(struct ehca_eq *eq);
+
+int ehca_eq_dtor(struct ehca_eq *eq);
+
+struct ehca_shca *ehca_shca_new(void);
+
+int ehca_shca_delete(struct ehca_shca *me);
+
+struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor);
+
+extern spinlock_t ehca_qp_idr_lock;
+extern spinlock_t ehca_cq_idr_lock;
+extern struct idr ehca_qp_idr;
+extern struct idr ehca_cq_idr;
+
+struct ipzu_queue_resp {
+ u64 queue; /* points to first queue entry */
+ u32 qe_size; /* queue entry size */
+ u32 act_nr_of_sg;
+ u32 queue_length; /* queue length allocated in bytes */
+ u32 pagesize;
+ u32 toggle_state;
+ u32 dummy; /* padding for 8 byte alignment */
+};
+
+struct ehca_create_cq_resp {
+ u32 cq_number;
+ u32 token;
+ struct ipzu_queue_resp ipz_queue;
+ struct h_galpas galpas;
+};
+
+struct ehca_create_qp_resp {
+ u32 qp_num;
+ u32 token;
+ u32 qp_type;
+ u32 qkey;
+ /* qp_num assigned by ehca: sqp0/1 may have got different numbers */
+ u32 real_qp_num;
+ u32 dummy; /* padding for 8 byte alignment */
+ struct ipzu_queue_resp ipz_squeue;
+ struct ipzu_queue_resp ipz_rqueue;
+ struct h_galpas galpas;
+};
+
+struct ehca_alloc_cq_parms {
+ u32 nr_cqe;
+ u32 act_nr_of_entries;
+ u32 act_pages;
+ struct ipz_eq_handle eq_handle;
+};
+
+struct ehca_alloc_qp_parms {
+ int servicetype;
+ int sigtype;
+ int daqp_ctrl;
+ int max_send_sge;
+ int max_recv_sge;
+ int ud_av_l_key_ctl;
+
+ u16 act_nr_send_wqes;
+ u16 act_nr_recv_wqes;
+ u8 act_nr_recv_sges;
+ u8 act_nr_send_sges;
+
+ u32 nr_rq_pages;
+ u32 nr_sq_pages;
+
+ struct ipz_eq_handle ipz_eq_handle;
+ struct ipz_pd pd;
+};
+
+int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
+int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
+struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
+
+#endif
--- linux-2.6.17-rc2-orig/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.17-rc2/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h 2006-04-28 14:20:07.000000000 +0200
@@ -0,0 +1,251 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * pSeries interface definitions
+ *
+ * Authors: Waleri Fomin <fomin@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_CLASSES_PSERIES_H__
+#define __EHCA_CLASSES_PSERIES_H__
+
+#include "hcp_phyp.h"
+#include "ipz_pt_fn.h"
+
+
+struct ehca_pfmodule {
+};
+
+struct ehca_pfshca {
+};
+
+struct ehca_pfqp {
+ struct ipz_qpt sqpt;
+ struct ipz_qpt rqpt;
+};
+
+struct ehca_pfcq {
+ struct ipz_qpt qpt;
+ u32 cqnr;
+};
+
+struct ehca_pfeq {
+ struct ipz_qpt qpt;
+ struct h_galpa galpa;
+ u32 eqnr;
+};
+
+struct ehca_pfpd {
+};
+
+struct ehca_pfmr {
+};
+
+struct ehca_pfmw {
+};
+
+struct ipz_adapter_handle {
+ u64 handle;
+};
+
+struct ipz_cq_handle {
+ u64 handle;
+};
+
+struct ipz_eq_handle {
+ u64 handle;
+};
+
+struct ipz_qp_handle {
+ u64 handle;
+};
+struct ipz_mrmw_handle {
+ u64 handle;
+};
+
+struct ipz_pd {
+ u32 value;
+};
+
+struct hcp_modify_qp_control_block {
+ u32 qkey; /* 00 */
+ u32 rdd; /* reliable datagram domain */
+ u32 send_psn; /* 02 */
+ u32 receive_psn; /* 03 */
+ u32 prim_phys_port; /* 04 */
+ u32 alt_phys_port; /* 05 */
+ u32 prim_p_key_idx; /* 06 */
+ u32 alt_p_key_idx; /* 07 */
+ u32 rdma_atomic_ctrl; /* 08 */
+ u32 qp_state; /* 09 */
+ u32 reserved_10; /* 10 */
+ u32 rdma_nr_atomic_resp_res; /* 11 */
+ u32 path_migration_state; /* 12 */
+ u32 rdma_atomic_outst_dest_qp; /* 13 */
+ u32 dest_qp_nr; /* 14 */
+ u32 min_rnr_nak_timer_field; /* 15 */
+ u32 service_level; /* 16 */
+ u32 send_grh_flag; /* 17 */
+ u32 retry_count; /* 18 */
+ u32 timeout; /* 19 */
+ u32 path_mtu; /* 20 */
+ u32 max_static_rate; /* 21 */
+ u32 dlid; /* 22 */
+ u32 rnr_retry_count; /* 23 */
+ u32 source_path_bits; /* 24 */
+ u32 traffic_class; /* 25 */
+ u32 hop_limit; /* 26 */
+ u32 source_gid_idx; /* 27 */
+ u32 flow_label; /* 28 */
+ u32 reserved_29; /* 29 */
+ union { /* 30 */
+ u64 dw[2];
+ u8 byte[16];
+ } dest_gid;
+ u32 service_level_al; /* 34 */
+ u32 send_grh_flag_al; /* 35 */
+ u32 retry_count_al; /* 36 */
+ u32 timeout_al; /* 37 */
+ u32 max_static_rate_al; /* 38 */
+ u32 dlid_al; /* 39 */
+ u32 rnr_retry_count_al; /* 40 */
+ u32 source_path_bits_al; /* 41 */
+ u32 traffic_class_al; /* 42 */
+ u32 hop_limit_al; /* 43 */
+ u32 source_gid_idx_al; /* 44 */
+ u32 flow_label_al; /* 45 */
+ u32 reserved_46; /* 46 */
+ u32 reserved_47; /* 47 */
+ union { /* 48 */
+ u64 dw[2];
+ u8 byte[16];
+ } dest_gid_al;
+ u32 max_nr_outst_send_wr; /* 52 */
+ u32 max_nr_outst_recv_wr; /* 53 */
+ u32 disable_ete_credit_check; /* 54 */
+ u32 qp_number; /* 55 */
+ u64 send_queue_handle; /* 56 */
+ u64 recv_queue_handle; /* 58 */
+ u32 actual_nr_sges_in_sq_wqe; /* 60 */
+ u32 actual_nr_sges_in_rq_wqe; /* 61 */
+ u32 qp_enable; /* 62 */
+ u32 curr_srq_limit; /* 63 */
+ u64 qp_aff_asyn_ev_log_reg; /* 64 */
+ u64 shared_rq_hndl; /* 66 */
+ u64 trigg_doorbell_qp_hndl; /* 68 */
+ u32 reserved_70_127[58]; /* 70 */
+};
+
+#define MQPCB_MASK_QKEY EHCA_BMASK_IBM(0,0)
+#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM(2,2)
+#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM(3,3)
+#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM(4,4)
+#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM(5,5)
+#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM(6,6)
+#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM(7,7)
+#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM(8,8)
+#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM(9,9)
+#define MQPCB_QP_STATE EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11,11)
+#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12,12)
+#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13,13)
+#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14,14)
+#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15,15)
+#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16,16)
+#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17,17)
+#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18,18)
+#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19,19)
+#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20,20)
+#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21,21)
+#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22,22)
+#define MQPCB_DLID EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23,23)
+#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24,24)
+#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25,31)
+#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25,25)
+#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26,26)
+#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27,27)
+#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28,28)
+#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12,31)
+#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30,30)
+#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31,31)
+#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28,31)
+#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32,32)
+#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31,31)
+#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33,33)
+#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34,34)
+#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27,31)
+#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35,35)
+#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36,36)
+#define MQPCB_DLID_AL EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37,37)
+#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38,38)
+#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25,31)
+#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39,39)
+#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40,40)
+#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41,41)
+#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42,42)
+#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12,31)
+#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44,44)
+#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45,45)
+#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46,46)
+#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47,47)
+#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31,31)
+#define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31)
+#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48)
+#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31)
+#define MQPCB_MASK_CURR_SQR_LIMIT EHCA_BMASK_IBM(49,49)
+#define MQPCB_CURR_SQR_LIMIT EHCA_BMASK_IBM(15,31)
+#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50)
+#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51)
+
+#endif /* __EHCA_CLASSES_PSERIES_H__ */
^ permalink raw reply
* [PATCH 01/16] ehca: module infrastructure
From: Heiko J Schick @ 2006-05-15 17:41 UTC (permalink / raw)
To: openib-general, Christoph Raisch, Hoang-Nam Nguyen, Marcus Eder,
schihei, linux-kernel, linuxppc-dev
Signed-off-by: Heiko J Schick <schickhj@de.ibm.com>
drivers/infiniband/hw/ehca/ehca_main.c | 966 +++++++++++++++++++++++++++++++++
1 file changed, 966 insertions(+)
--- linux-2.6.17-rc2-orig/drivers/infiniband/hw/ehca/ehca_main.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.17-rc2/drivers/infiniband/hw/ehca/ehca_main.c 2006-05-15 19:17:26.000000000 +0200
@@ -0,0 +1,966 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * module start stop, hca detection
+ *
+ * Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define DEB_PREFIX "shca"
+
+#include "ehca_classes.h"
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
+MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
+MODULE_VERSION("SVNEHCA_0006");
+
+struct ehca_comp_pool* ehca_pool;
+
+int ehca_open_aqp1 = 0;
+int ehca_debug_level = -1;
+int ehca_hw_level = 0;
+int ehca_nr_ports = 2;
+int ehca_use_hp_mr = 0;
+int ehca_port_act_time = 30;
+int ehca_poll_all_eqs = 1;
+int ehca_static_rate = -1;
+
+module_param_named(open_aqp1, ehca_open_aqp1, int, 0);
+module_param_named(debug_level, ehca_debug_level, int, 0);
+module_param_named(hw_level, ehca_hw_level, int, 0);
+module_param_named(nr_ports, ehca_nr_ports, int, 0);
+module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0);
+module_param_named(port_act_time, ehca_port_act_time, int, 0);
+module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0);
+module_param_named(static_rate, ehca_static_rate, int, 0);
+
+MODULE_PARM_DESC(open_aqp1,
+ "AQP1 on startup (0: no (default), 1: yes)");
+MODULE_PARM_DESC(debug_level,
+ "debug level"
+ " (0: node, 6: only errors (default), 9: all)");
+MODULE_PARM_DESC(hw_level,
+ "hardware level"
+ " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)");
+MODULE_PARM_DESC(nr_ports,
+ "number of connected ports (default: 2)");
+MODULE_PARM_DESC(use_hp_mr,
+ "high performance MRs (0: no (default), 1: yes)");
+MODULE_PARM_DESC(port_act_time,
+ "time to wait for port activation (default: 30 sec)");
+MODULE_PARM_DESC(poll_all_eqs,
+ "polls all event queues periodically"
+ " (0: no, 1: yes (default))");
+MODULE_PARM_DESC(static_rate,
+ "set permanent static rate (default: disabled)");
+
+/* This external trace mask controls what will end up in the
+ * kernel ring buffer. Number 6 means, that everything between
+ * 0 and 5 will be stored.
+ */
+u8 ehca_edeb_mask[EHCA_EDEB_TRACE_MASK_SIZE]={6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 0, 0};
+
+spinlock_t ehca_qp_idr_lock;
+spinlock_t ehca_cq_idr_lock;
+DEFINE_IDR(ehca_qp_idr);
+DEFINE_IDR(ehca_cq_idr);
+
+struct ehca_module ehca_module;
+
+void ehca_init_trace(void)
+{
+ EDEB_EN(7, "");
+
+ if (ehca_debug_level != -1) {
+ int i;
+ for (i = 0; i < EHCA_EDEB_TRACE_MASK_SIZE; i++)
+ ehca_edeb_mask[i] = ehca_debug_level;
+ }
+
+ EDEB_EX(7, "");
+}
+
+int ehca_create_slab_caches(struct ehca_module *ehca_module)
+{
+ int ret = 0;
+
+ EDEB_EN(7, "");
+
+ ehca_module->cache_pd =
+ kmem_cache_create("ehca_cache_pd",
+ sizeof(struct ehca_pd),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!ehca_module->cache_pd) {
+ EDEB_ERR(4, "Cannot create PD SLAB cache.");
+ ret = -ENOMEM;
+ goto create_slab_caches1;
+ }
+
+ ehca_module->cache_cq =
+ kmem_cache_create("ehca_cache_cq",
+ sizeof(struct ehca_cq),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!ehca_module->cache_cq) {
+ EDEB_ERR(4, "Cannot create CQ SLAB cache.");
+ ret = -ENOMEM;
+ goto create_slab_caches2;
+ }
+
+ ehca_module->cache_qp =
+ kmem_cache_create("ehca_cache_qp",
+ sizeof(struct ehca_qp),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!ehca_module->cache_qp) {
+ EDEB_ERR(4, "Cannot create QP SLAB cache.");
+ ret = -ENOMEM;
+ goto create_slab_caches3;
+ }
+
+ ehca_module->cache_av =
+ kmem_cache_create("ehca_cache_av",
+ sizeof(struct ehca_av),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!ehca_module->cache_av) {
+ EDEB_ERR(4, "Cannot create AV SLAB cache.");
+ ret = -ENOMEM;
+ goto create_slab_caches4;
+ }
+
+ ehca_module->cache_mw =
+ kmem_cache_create("ehca_cache_mw",
+ sizeof(struct ehca_mw),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!ehca_module->cache_mw) {
+ EDEB_ERR(4, "Cannot create MW SLAB cache.");
+ ret = -ENOMEM;
+ goto create_slab_caches5;
+ }
+
+ ehca_module->cache_mr =
+ kmem_cache_create("ehca_cache_mr",
+ sizeof(struct ehca_mr),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!ehca_module->cache_mr) {
+ EDEB_ERR(4, "Cannot create MR SLAB cache.");
+ ret = -ENOMEM;
+ goto create_slab_caches6;
+ }
+
+ EDEB_EX(7, "ret=%x", ret);
+
+ return ret;
+
+create_slab_caches6:
+ kmem_cache_destroy(ehca_module->cache_mw);
+
+create_slab_caches5:
+ kmem_cache_destroy(ehca_module->cache_av);
+
+create_slab_caches4:
+ kmem_cache_destroy(ehca_module->cache_qp);
+
+create_slab_caches3:
+ kmem_cache_destroy(ehca_module->cache_cq);
+
+create_slab_caches2:
+ kmem_cache_destroy(ehca_module->cache_pd);
+
+create_slab_caches1:
+ EDEB_EX(7, "ret=%x", ret);
+
+ return ret;
+}
+
+int ehca_destroy_slab_caches(struct ehca_module *ehca_module)
+{
+ int ret;
+
+ EDEB_EN(7, "");
+
+ ret = kmem_cache_destroy(ehca_module->cache_pd);
+ if (ret)
+ EDEB_ERR(4, "Cannot destroy PD SLAB cache. ret=%x", ret);
+
+ ret = kmem_cache_destroy(ehca_module->cache_cq);
+ if (ret)
+ EDEB_ERR(4, "Cannot destroy CQ SLAB cache. ret=%x", ret);
+
+ ret = kmem_cache_destroy(ehca_module->cache_qp);
+ if (ret)
+ EDEB_ERR(4, "Cannot destroy QP SLAB cache. ret=%x", ret);
+
+ ret = kmem_cache_destroy(ehca_module->cache_av);
+ if (ret)
+ EDEB_ERR(4, "Cannot destroy AV SLAB cache. ret=%x", ret);
+
+ ret = kmem_cache_destroy(ehca_module->cache_mw);
+ if (ret)
+ EDEB_ERR(4, "Cannot destroy MW SLAB cache. ret=%x", ret);
+
+ ret = kmem_cache_destroy(ehca_module->cache_mr);
+ if (ret)
+ EDEB_ERR(4, "Cannot destroy MR SLAB cache. ret=%x", ret);
+
+ EDEB_EX(7, "");
+
+ return 0;
+}
+
+#define EHCA_HCAAVER EHCA_BMASK_IBM(32,39)
+#define EHCA_REVID EHCA_BMASK_IBM(40,63)
+
+int ehca_sense_attributes(struct ehca_shca *shca)
+{
+ int ret = -EINVAL;
+ u64 h_ret = H_SUCCESS;
+ struct hipz_query_hca *rblock;
+
+ EDEB_EN(7, "shca=%p", shca);
+
+ rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+ if (!rblock) {
+ EDEB_ERR(4, "Cannot allocate rblock memory.");
+ ret = -ENOMEM;
+ goto num_ports0;
+ }
+
+ h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
+ if (h_ret != H_SUCCESS) {
+ EDEB_ERR(4, "Cannot query device properties. h_ret=%lx", h_ret);
+ ret = -EPERM;
+ goto num_ports1;
+ }
+
+ if (ehca_nr_ports == 1)
+ shca->num_ports = 1;
+ else
+ shca->num_ports = (u8)rblock->num_ports;
+
+ EDEB(6, " ... found %x ports", rblock->num_ports);
+
+ if (ehca_hw_level == 0) {
+ u32 hcaaver;
+ u32 revid;
+
+ hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver);
+ revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver);
+
+ EDEB(6, " ... hardware version=%x:%x",
+ hcaaver, revid);
+
+ if ((hcaaver == 1) && (revid == 0))
+ shca->hw_level = 0;
+ else if ((hcaaver == 1) && (revid == 1))
+ shca->hw_level = 1;
+ else if ((hcaaver == 1) && (revid == 2))
+ shca->hw_level = 2;
+ }
+ EDEB(6, " ... hardware level=%x", shca->hw_level);
+
+ shca->sport[0].rate = IB_RATE_30_GBPS;
+ shca->sport[1].rate = IB_RATE_30_GBPS;
+
+ ret = 0;
+
+num_ports1:
+ kfree(rblock);
+
+num_ports0:
+ EDEB_EX(7, "ret=%x", ret);
+
+ return ret;
+}
+
+static int init_node_guid(struct ehca_shca* shca)
+{
+ int ret = 0;
+ struct hipz_query_hca *rblock;
+
+ EDEB_EN(7, "");
+
+ rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+ if (!rblock) {
+ EDEB_ERR(4, "Can't allocate rblock memory.");
+ ret = -ENOMEM;
+ goto init_node_guid0;
+ }
+
+ if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
+ EDEB_ERR(4, "Can't query device properties");
+ ret = -EINVAL;
+ goto init_node_guid1;
+ }
+
+ memcpy(&shca->ib_device.node_guid, &rblock->node_guid, (sizeof(u64)));
+
+init_node_guid1:
+ kfree(rblock);
+
+init_node_guid0:
+ EDEB_EX(7, "node_guid=%lx ret=%x", shca->ib_device.node_guid, ret);
+
+ return ret;
+}
+
+int ehca_register_device(struct ehca_shca *shca)
+{
+ int ret = 0;
+
+ EDEB_EN(7, "shca=%p", shca);
+
+ ret = init_node_guid(shca);
+ if (ret)
+ return ret;
+
+ strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
+ shca->ib_device.owner = THIS_MODULE;
+
+ shca->ib_device.uverbs_abi_ver = 5;
+ shca->ib_device.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
+
+ shca->ib_device.node_type = RDMA_NODE_IB_CA;
+ shca->ib_device.phys_port_cnt = shca->num_ports;
+ shca->ib_device.dma_device = &shca->ibmebus_dev->ofdev.dev;
+ shca->ib_device.query_device = ehca_query_device;
+ shca->ib_device.query_port = ehca_query_port;
+ shca->ib_device.query_gid = ehca_query_gid;
+ shca->ib_device.query_pkey = ehca_query_pkey;
+ /* shca->in_device.modify_device = ehca_modify_device */
+ shca->ib_device.modify_port = ehca_modify_port;
+ shca->ib_device.alloc_ucontext = ehca_alloc_ucontext;
+ shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext;
+ shca->ib_device.alloc_pd = ehca_alloc_pd;
+ shca->ib_device.dealloc_pd = ehca_dealloc_pd;
+ shca->ib_device.create_ah = ehca_create_ah;
+ /* shca->ib_device.modify_ah = ehca_modify_ah; */
+ shca->ib_device.query_ah = ehca_query_ah;
+ shca->ib_device.destroy_ah = ehca_destroy_ah;
+ shca->ib_device.create_qp = ehca_create_qp;
+ shca->ib_device.modify_qp = ehca_modify_qp;
+ shca->ib_device.query_qp = ehca_query_qp;
+ shca->ib_device.destroy_qp = ehca_destroy_qp;
+ shca->ib_device.post_send = ehca_post_send;
+ shca->ib_device.post_recv = ehca_post_recv;
+ shca->ib_device.create_cq = ehca_create_cq;
+ shca->ib_device.destroy_cq = ehca_destroy_cq;
+ shca->ib_device.resize_cq = ehca_resize_cq;
+ shca->ib_device.poll_cq = ehca_poll_cq;
+ /* shca->ib_device.peek_cq = ehca_peek_cq; */
+ shca->ib_device.req_notify_cq = ehca_req_notify_cq;
+ /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */
+ shca->ib_device.get_dma_mr = ehca_get_dma_mr;
+ shca->ib_device.reg_phys_mr = ehca_reg_phys_mr;
+ shca->ib_device.reg_user_mr = ehca_reg_user_mr;
+ shca->ib_device.query_mr = ehca_query_mr;
+ shca->ib_device.dereg_mr = ehca_dereg_mr;
+ shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr;
+ shca->ib_device.alloc_mw = ehca_alloc_mw;
+ shca->ib_device.bind_mw = ehca_bind_mw;
+ shca->ib_device.dealloc_mw = ehca_dealloc_mw;
+ shca->ib_device.alloc_fmr = ehca_alloc_fmr;
+ shca->ib_device.map_phys_fmr = ehca_map_phys_fmr;
+ shca->ib_device.unmap_fmr = ehca_unmap_fmr;
+ shca->ib_device.dealloc_fmr = ehca_dealloc_fmr;
+ shca->ib_device.attach_mcast = ehca_attach_mcast;
+ shca->ib_device.detach_mcast = ehca_detach_mcast;
+ /* shca->ib_device.process_mad = ehca_process_mad; */
+ shca->ib_device.mmap = ehca_mmap;
+
+ ret = ib_register_device(&shca->ib_device);
+
+ EDEB_EX(7, "ret=%x", ret);
+
+ return ret;
+}
+
+static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
+{
+ struct ehca_sport *sport;
+ struct ib_cq *ibcq;
+ struct ib_qp *ibqp;
+ struct ib_qp_init_attr qp_init_attr;
+ int ret = 0;
+
+ EDEB_EN(7, "shca=%p port=%x", shca, port);
+
+ sport = &shca->sport[port - 1];
+
+ if (sport->ibcq_aqp1) {
+ EDEB_ERR(4, "AQP1 CQ is already created.");
+ return -EPERM;
+ }
+
+ ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10);
+ if (IS_ERR(ibcq)) {
+ EDEB_ERR(4, "Cannot create AQP1 CQ.");
+ return PTR_ERR(ibcq);
+ }
+ sport->ibcq_aqp1 = ibcq;
+
+ if (sport->ibqp_aqp1) {
+ EDEB_ERR(4, "AQP1 QP is already created.");
+ ret = -EPERM;
+ goto create_aqp1;
+ }
+
+ memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr));
+ qp_init_attr.send_cq = ibcq;
+ qp_init_attr.recv_cq = ibcq;
+ qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ qp_init_attr.cap.max_send_wr = 100;
+ qp_init_attr.cap.max_recv_wr = 100;
+ qp_init_attr.cap.max_send_sge = 2;
+ qp_init_attr.cap.max_recv_sge = 1;
+ qp_init_attr.qp_type = IB_QPT_GSI;
+ qp_init_attr.port_num = port;
+ qp_init_attr.qp_context = NULL;
+ qp_init_attr.event_handler = NULL;
+ qp_init_attr.srq = NULL;
+
+ ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr);
+ if (IS_ERR(ibqp)) {
+ EDEB_ERR(4, "Cannot create AQP1 QP.");
+ ret = PTR_ERR(ibqp);
+ goto create_aqp1;
+ }
+ sport->ibqp_aqp1 = ibqp;
+
+ EDEB_EX(7, "ret=%x", ret);
+
+ return ret;
+
+create_aqp1:
+ ib_destroy_cq(sport->ibcq_aqp1);
+
+ EDEB_EX(7, "ret=%x", ret);
+
+ return ret;
+}
+
+static int ehca_destroy_aqp1(struct ehca_sport *sport)
+{
+ int ret = 0;
+
+ EDEB_EN(7, "sport=%p", sport);
+
+ ret = ib_destroy_qp(sport->ibqp_aqp1);
+ if (ret) {
+ EDEB_ERR(4, "Cannot destroy AQP1 QP. ret=%x", ret);
+ goto destroy_aqp1;
+ }
+
+ ret = ib_destroy_cq(sport->ibcq_aqp1);
+ if (ret)
+ EDEB_ERR(4, "Cannot destroy AQP1 CQ. ret=%x", ret);
+
+destroy_aqp1:
+ EDEB_EX(7, "ret=%x", ret);
+
+ return ret;
+}
+
+static ssize_t ehca_show_debug_mask(struct device_driver *ddp, char *buf)
+{
+ int i;
+ int total = 0;
+ total += snprintf(buf + total, PAGE_SIZE - total, "%d",
+ ehca_edeb_mask[0]);
+ for (i = 1; i < EHCA_EDEB_TRACE_MASK_SIZE; i++) {
+ total += snprintf(buf + total, PAGE_SIZE - total, "%d",
+ ehca_edeb_mask[i]);
+ }
+
+ total += snprintf(buf + total, PAGE_SIZE - total, "\n");
+
+ return total;
+}
+
+static ssize_t ehca_store_debug_mask(struct device_driver *ddp,
+ const char *buf, size_t count)
+{
+ int i;
+ for (i = 0; i < EHCA_EDEB_TRACE_MASK_SIZE; i++) {
+ char value = buf[i] - '0';
+ if ((value <= 9) && (count >= i)) {
+ ehca_edeb_mask[i] = value;
+ }
+ }
+ return count;
+}
+DRIVER_ATTR(debug_mask, S_IRUSR | S_IWUSR,
+ ehca_show_debug_mask, ehca_store_debug_mask);
+
+void ehca_create_driver_sysfs(struct ibmebus_driver *drv)
+{
+ driver_create_file(&drv->driver, &driver_attr_debug_mask);
+}
+
+void ehca_remove_driver_sysfs(struct ibmebus_driver *drv)
+{
+ driver_remove_file(&drv->driver, &driver_attr_debug_mask);
+}
+
+#define EHCA_RESOURCE_ATTR(name) \
+static ssize_t ehca_show_##name(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct ehca_shca *shca; \
+ struct hipz_query_hca *rblock; \
+ int data; \
+ \
+ shca = dev->driver_data; \
+ \
+ rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); \
+ if (!rblock) { \
+ EDEB_ERR(4, "Can't allocate rblock memory."); \
+ return 0; \
+ } \
+ \
+ if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
+ EDEB_ERR(4, "Can't query device properties"); \
+ kfree(rblock); \
+ return 0; \
+ } \
+ \
+ data = rblock->name; \
+ kfree(rblock); \
+ \
+ if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \
+ return snprintf(buf, 256, "1\n"); \
+ else \
+ return snprintf(buf, 256, "%d\n", data); \
+ \
+} \
+static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL);
+
+EHCA_RESOURCE_ATTR(num_ports);
+EHCA_RESOURCE_ATTR(hw_ver);
+EHCA_RESOURCE_ATTR(max_eq);
+EHCA_RESOURCE_ATTR(cur_eq);
+EHCA_RESOURCE_ATTR(max_cq);
+EHCA_RESOURCE_ATTR(cur_cq);
+EHCA_RESOURCE_ATTR(max_qp);
+EHCA_RESOURCE_ATTR(cur_qp);
+EHCA_RESOURCE_ATTR(max_mr);
+EHCA_RESOURCE_ATTR(cur_mr);
+EHCA_RESOURCE_ATTR(max_mw);
+EHCA_RESOURCE_ATTR(cur_mw);
+EHCA_RESOURCE_ATTR(max_pd);
+EHCA_RESOURCE_ATTR(max_ah);
+
+static ssize_t ehca_show_adapter_handle(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ehca_shca *shca = dev->driver_data;
+
+ return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle);
+
+}
+static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
+
+
+
+void ehca_create_device_sysfs(struct ibmebus_dev *dev)
+{
+ device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
+ device_create_file(&dev->ofdev.dev, &dev_attr_num_ports);
+ device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver);
+ device_create_file(&dev->ofdev.dev, &dev_attr_max_eq);
+ device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq);
+ device_create_file(&dev->ofdev.dev, &dev_attr_max_cq);
+ device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq);
+ device_create_file(&dev->ofdev.dev, &dev_attr_max_qp);
+ device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp);
+ device_create_file(&dev->ofdev.dev, &dev_attr_max_mr);
+ device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr);
+ device_create_file(&dev->ofdev.dev, &dev_attr_max_mw);
+ device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw);
+ device_create_file(&dev->ofdev.dev, &dev_attr_max_pd);
+ device_create_file(&dev->ofdev.dev, &dev_attr_max_ah);
+}
+
+void ehca_remove_device_sysfs(struct ibmebus_dev *dev)
+{
+ device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
+ device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports);
+ device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver);
+ device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq);
+ device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq);
+ device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq);
+ device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq);
+ device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp);
+ device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp);
+ device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr);
+ device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr);
+ device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw);
+ device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw);
+ device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd);
+ device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah);
+}
+
+static int __devinit ehca_probe(struct ibmebus_dev *dev,
+ const struct of_device_id *id)
+{
+ struct ehca_shca *shca;
+ u64 *handle;
+ struct ib_pd *ibpd;
+ int ret = 0;
+
+ EDEB_EN(7, "name=%s", dev->name);
+
+ handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL);
+ if (!handle) {
+ EDEB_ERR(4, "Cannot get eHCA handle for adapter: %s.",
+ dev->ofdev.node->full_name);
+ return -ENODEV;
+ }
+
+ if (!(*handle)) {
+ EDEB_ERR(4, "Wrong eHCA handle for adapter: %s.",
+ dev->ofdev.node->full_name);
+ return -ENODEV;
+ }
+
+ shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca));
+ if (shca == NULL) {
+ EDEB_ERR(4, "Cannot allocate shca memory.");
+ return -ENOMEM;
+ }
+
+ shca->ibmebus_dev = dev;
+ shca->ipz_hca_handle.handle = *handle;
+ dev->ofdev.dev.driver_data = shca;
+
+ ret = ehca_sense_attributes(shca);
+ if (ret < 0) {
+ EDEB_ERR(4, "Cannot sense eHCA attributes.");
+ goto probe1;
+ }
+
+ /* create event queues */
+ ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048);
+ if (ret) {
+ EDEB_ERR(4, "Cannot create EQ.");
+ goto probe1;
+ }
+
+ ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
+ if (ret) {
+ EDEB_ERR(4, "Cannot create NEQ.");
+ goto probe2;
+ }
+
+ /* create internal protection domain */
+ ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL);
+ if (IS_ERR(ibpd)) {
+ EDEB_ERR(4, "Cannot create internal PD.");
+ ret = PTR_ERR(ibpd);
+ goto probe3;
+ }
+
+ shca->pd = container_of(ibpd, struct ehca_pd, ib_pd);
+ shca->pd->ib_pd.device = &shca->ib_device;
+
+ /* create internal max MR */
+ ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
+ if (ret) {
+ EDEB_ERR(4, "Cannot create internal MR. ret=%x", ret);
+ goto probe4;
+ }
+
+ ret = ehca_register_device(shca);
+ if (ret) {
+ EDEB_ERR(4, "Cannot register Infiniband device.");
+ goto probe5;
+ }
+
+ /* create AQP1 for port 1 */
+ if (ehca_open_aqp1 == 1) {
+ shca->sport[0].port_state = IB_PORT_DOWN;
+ ret = ehca_create_aqp1(shca, 1);
+ if (ret) {
+ EDEB_ERR(4, "Cannot create AQP1 for port 1.");
+ goto probe6;
+ }
+ }
+
+ /* create AQP1 for port 2 */
+ if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) {
+ shca->sport[1].port_state = IB_PORT_DOWN;
+ ret = ehca_create_aqp1(shca, 2);
+ if (ret) {
+ EDEB_ERR(4, "Cannot create AQP1 for port 2.");
+ goto probe7;
+ }
+ }
+
+ ehca_create_device_sysfs(dev);
+
+ spin_lock(&ehca_module.shca_lock);
+ list_add(&shca->shca_list, &ehca_module.shca_list);
+ spin_unlock(&ehca_module.shca_lock);
+
+ EDEB_EX(7, "ret=%x", ret);
+
+ return 0;
+
+probe7:
+ ret = ehca_destroy_aqp1(&shca->sport[0]);
+ if (ret)
+ EDEB_ERR(4, "Cannot destroy AQP1 for port 1. ret=%x", ret);
+
+probe6:
+ ib_unregister_device(&shca->ib_device);
+
+probe5:
+ ret = ehca_dereg_internal_maxmr(shca);
+ if (ret)
+ EDEB_ERR(4, "Cannot destroy internal MR. ret=%x", ret);
+
+probe4:
+ ret = ehca_dealloc_pd(&shca->pd->ib_pd);
+ if (ret != 0)
+ EDEB_ERR(4, "Cannot destroy internal PD. ret=%x", ret);
+
+probe3:
+ ret = ehca_destroy_eq(shca, &shca->neq);
+ if (ret != 0)
+ EDEB_ERR(4, "Cannot destroy NEQ. ret=%x", ret);
+
+probe2:
+ ret = ehca_destroy_eq(shca, &shca->eq);
+ if (ret != 0)
+ EDEB_ERR(4, "Cannot destroy EQ. ret=%x", ret);
+
+probe1:
+ ib_dealloc_device(&shca->ib_device);
+
+ EDEB_EX(4, "ret=%x", ret);
+
+ return -EINVAL;
+}
+
+static int __devexit ehca_remove(struct ibmebus_dev *dev)
+{
+ struct ehca_shca *shca = dev->ofdev.dev.driver_data;
+ int ret;
+
+ EDEB_EN(7, "shca=%p", shca);
+
+ ehca_remove_device_sysfs(dev);
+
+ if (ehca_open_aqp1 == 1) {
+ int i;
+
+ for (i = 0; i < shca->num_ports; i++) {
+ ret = ehca_destroy_aqp1(&shca->sport[i]);
+ if (ret != 0)
+ EDEB_ERR(4, "Cannot destroy AQP1 for port %x."
+ " ret=%x", ret, i);
+ }
+ }
+
+ ib_unregister_device(&shca->ib_device);
+
+ ret = ehca_dereg_internal_maxmr(shca);
+ if (ret)
+ EDEB_ERR(4, "Cannot destroy internal MR. ret=%x", ret);
+
+ ret = ehca_dealloc_pd(&shca->pd->ib_pd);
+ if (ret)
+ EDEB_ERR(4, "Cannot destroy internal PD. ret=%x", ret);
+
+ ret = ehca_destroy_eq(shca, &shca->eq);
+ if (ret)
+ EDEB_ERR(4, "Cannot destroy EQ. ret=%x", ret);
+
+ ret = ehca_destroy_eq(shca, &shca->neq);
+ if (ret)
+ EDEB_ERR(4, "Canot destroy NEQ. ret=%x", ret);
+
+ ib_dealloc_device(&shca->ib_device);
+
+ spin_lock(&ehca_module.shca_lock);
+ list_del(&shca->shca_list);
+ spin_unlock(&ehca_module.shca_lock);
+
+ EDEB_EX(7, "ret=%x", ret);
+
+ return ret;
+}
+
+static struct of_device_id ehca_device_table[] =
+{
+ {
+ .name = "lhca",
+ .compatible = "IBM,lhca",
+ },
+ {},
+};
+
+static struct ibmebus_driver ehca_driver = {
+ .name = "ehca",
+ .id_table = ehca_device_table,
+ .probe = ehca_probe,
+ .remove = ehca_remove,
+};
+
+int __init ehca_module_init(void)
+{
+ int ret = 0;
+
+ printk(KERN_INFO "eHCA Infiniband Device Driver "
+ "(Rel.: SVNEHCA_0006)\n");
+ EDEB_EN(7, "");
+
+ idr_init(&ehca_qp_idr);
+ idr_init(&ehca_cq_idr);
+ spin_lock_init(&ehca_qp_idr_lock);
+ spin_lock_init(&ehca_cq_idr_lock);
+
+ INIT_LIST_HEAD(&ehca_module.shca_list);
+ spin_lock_init(&ehca_module.shca_lock);
+
+ ehca_init_trace();
+
+ ehca_pool = ehca_create_comp_pool();
+ if (ehca_pool == NULL) {
+ EDEB_ERR(4, "Cannot create comp pool.");
+ ret = -EINVAL;
+ goto module_init0;
+ }
+
+ if ((ret = ehca_create_slab_caches(&ehca_module))) {
+ EDEB_ERR(4, "Cannot create SLAB caches");
+ ret = -ENOMEM;
+ goto module_init1;
+ }
+
+ if ((ret = ibmebus_register_driver(&ehca_driver))) {
+ EDEB_ERR(4, "Cannot register eHCA device driver");
+ ret = -EINVAL;
+ goto module_init2;
+ }
+
+ ehca_create_driver_sysfs(&ehca_driver);
+
+ if (ehca_poll_all_eqs != 1) {
+ EDEB_ERR(4, "WARNING!!!");
+ EDEB_ERR(4, "It is possible to lose interrupts.");
+
+ return 0;
+ }
+
+ init_timer(&ehca_module.timer);
+ ehca_module.timer.function = ehca_poll_eqs;
+ ehca_module.timer.data = (unsigned long)(void*)&ehca_module;
+ ehca_module.timer.expires = jiffies + HZ;
+ add_timer(&ehca_module.timer);
+
+ EDEB_EX(7, "ret=%x", ret);
+
+ return 0;
+
+module_init2:
+ ehca_destroy_slab_caches(&ehca_module);
+
+module_init1:
+ ehca_destroy_comp_pool(ehca_pool);
+
+module_init0:
+ EDEB_EX(7, "ret=%x", ret);
+
+ return ret;
+};
+
+void __exit ehca_module_exit(void)
+{
+ EDEB_EN(7, "");
+
+ if (ehca_poll_all_eqs == 1)
+ del_timer_sync(&ehca_module.timer);
+
+ ehca_remove_driver_sysfs(&ehca_driver);
+ ibmebus_unregister_driver(&ehca_driver);
+
+ if (ehca_destroy_slab_caches(&ehca_module) != 0)
+ EDEB_ERR(4, "Cannot destroy SLAB caches");
+
+ ehca_destroy_comp_pool(ehca_pool);
+
+ idr_destroy(&ehca_cq_idr);
+ idr_destroy(&ehca_qp_idr);
+
+ EDEB_EX(7, "");
+};
+
+module_init(ehca_module_init);
+module_exit(ehca_module_exit);
^ permalink raw reply
* [PATCH 00/16] ehca: IBM eHCA InfiniBand Device Driver
From: Heiko J Schick @ 2006-05-15 17:41 UTC (permalink / raw)
To: openib-general, Christoph Raisch, Hoang-Nam Nguyen, Marcus Eder,
schihei, linux-kernel, linuxppc-dev
Hello,
many thanks for your comments. They are very helpful for us. All
17 patches have to be applied, otherwise the driver won't compile.
We would appreciate for any comments and feedbacks.
Signed-off-by: Heiko J Schick <schickhj@de.ibm.com>
Changelog-by: Heiko J Schick <schickhj@de.ibm.com>
Changelog:
Differences to PatchSet http://openib.org/pipermail/openib-general/2006-April/020584.html
Differences to PatchSet http://openib.org/pipermail/openib-general/2006-March/018144.html
Differences to PatchSet http://openib.org/pipermail/openib-general/2006-March/017412.html
- Linux kernel coding style
- Reduce number of parameters passed to firmware interface wrappers
- Remove ehca_kernel.h
- Remove implementation of plpar_hcall_7arg_7ret() and plpar_hcall_9arg_9ret(),
which are now included in kernel code
- Remove simulation stub
drivers/infiniband/hw/ehca/Kconfig | 6
drivers/infiniband/hw/ehca/Makefile | 16
drivers/infiniband/hw/ehca/ehca_av.c | 306 ++
drivers/infiniband/hw/ehca/ehca_classes.h | 350 +++
drivers/infiniband/hw/ehca/ehca_classes_pSeries.h | 251 ++
drivers/infiniband/hw/ehca/ehca_cq.c | 431 +++
drivers/infiniband/hw/ehca/ehca_eq.c | 222 +
drivers/infiniband/hw/ehca/ehca_hca.c | 282 ++
drivers/infiniband/hw/ehca/ehca_irq.c | 710 ++++++
drivers/infiniband/hw/ehca/ehca_irq.h | 77
drivers/infiniband/hw/ehca/ehca_iverbs.h | 181 +
drivers/infiniband/hw/ehca/ehca_main.c | 966 ++++++++
drivers/infiniband/hw/ehca/ehca_mcast.c | 194 +
drivers/infiniband/hw/ehca/ehca_mrmw.c | 2474 ++++++++++++++++++++++
drivers/infiniband/hw/ehca/ehca_mrmw.h | 143 +
drivers/infiniband/hw/ehca/ehca_pd.c | 118 +
drivers/infiniband/hw/ehca/ehca_qes.h | 274 ++
drivers/infiniband/hw/ehca/ehca_qp.c | 1565 +++++++++++++
drivers/infiniband/hw/ehca/ehca_reqs.c | 683 ++++++
drivers/infiniband/hw/ehca/ehca_sqp.c | 123 +
drivers/infiniband/hw/ehca/ehca_tools.h | 411 +++
drivers/infiniband/hw/ehca/ehca_uverbs.c | 391 +++
drivers/infiniband/hw/ehca/hcp_if.c | 1476 +++++++++++++
drivers/infiniband/hw/ehca/hcp_if.h | 330 ++
drivers/infiniband/hw/ehca/hcp_phyp.c | 92
drivers/infiniband/hw/ehca/hcp_phyp.h | 95
drivers/infiniband/hw/ehca/hipz_fns.h | 68
drivers/infiniband/hw/ehca/hipz_fns_core.h | 122 +
drivers/infiniband/hw/ehca/hipz_hw.h | 395 +++
drivers/infiniband/hw/ehca/ipz_pt_fn.c | 177 +
drivers/infiniband/hw/ehca/ipz_pt_fn.h | 254 ++
31 files changed, 13183 insertions(+)
^ permalink raw reply
* [PATCH] Export PowerPC atomic operations to userspace
From: Brent Cook @ 2006-05-15 16:54 UTC (permalink / raw)
To: linuxppc-embedded
The atomic operations in asm/atomic.h are really useful from userspace too.
Other architectures (i386, x86_64, mips) export these to userspace, but the
powerpc versions are guarded by __KERNEL__ for some reason. Can we remove
these if there is no good reason to guard them?
Signed-off-by: Brent Cook <bcook@bpointsys.com>
Index: linux-2.6-bps/include/asm-powerpc/atomic.h
===================================================================
--- linux-2.6-bps/include/asm-powerpc/atomic.h (revision 77)
+++ linux-2.6-bps/include/asm-powerpc/atomic.h (working copy)
@@ -7,7 +7,6 @@
typedef struct { volatile int counter; } atomic_t;
-#ifdef __KERNEL__
#include <linux/compiler.h>
#include <asm/synch.h>
#include <asm/asm-compat.h>
@@ -414,5 +413,4 @@
#endif /* __powerpc64__ */
#include <asm-generic/atomic.h>
-#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_ATOMIC_H_ */
^ permalink raw reply
* MPC8248 goes berserk when printing messages on the SMC console
From: Laurent Pinchart @ 2006-05-15 16:28 UTC (permalink / raw)
To: linuxppc-embedded
Hi everybody,
I'm trying to convince an MPC8248 board to boot Linux, and I'm having problems
with the SMC console. I posted an e-mail on linuxppc-embedded regarding
BDI2000 problems earlier today. The problem hasn't been solved, but I've been
able to work around it by adding an infinite loop in the kernel code at the
point where I want to break, halting the execution when the loop has been
reached, and using gdb from there.
The SMC console works fine in U-Boot. 115200 8N1, no problem there.
When booting a Linux 2.6.16 kernel, the processor doesn't print anything on
the console and seems to hang. Further analysis with a BDI2000 reveals the
following.
- The CPU is stuck in cpm_uart_console_write, checking for
while ((bdp->cbd_sc & BD_SC_READY) != 0);
At that point, most of the external and internal memory read as 0 (SDRAM,
flash, ...). A few memory locations contain values different than 0, and I
suspect those to come directly from the cache.
- If I break right before setting the BD_SC_READY flag and run using stepi,
random characters are sent on the serial port. Measuring the baudrate using
an oscilloscope confirms that it has been correctly configured. Only the data
is wrong.
- If I remove the lines
bdp->cbd_sc |= BD_SC_READY;
the kernel runs until it panics when mounting the root partition.
- If I disable the SMC transmitter (by not setting SMCMR_TEN in SMCMR), the
kernel hangs when wrapping around to the first buffer descriptor, waiting for
it to be ready, which of course never happens.
This seems to confirm that the CPM does something very armful to the CPU. I'm
completely lost here, and would appreciate any help.
Best regards,
Laurent Pinchart
^ permalink raw reply
* Re: [PATCH] Fix pSeries identification in prom_init.c
From: Michael Neuling @ 2006-05-15 16:17 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linuxppc-dev list, Paul Mackerras, segher
In-Reply-To: <1147672282.21291.92.camel@localhost.localdomain>
> > The OF trampoline code prom_init.c still needs to identify IBM
> > pSeries (PAPR) machines in order to run some platform specific code
> > on them like instanciating the TCE tables. The code doing that
> > detection was changed recently in 2.6.17 early stages but was done
> > slightly incorrectly. It should be testing for an exact match of
> > "chrp" and it currently tests for anything that begins with
> > "chrp". That means it will incorrectly match with platforms using
> > Maple-like device-trees and have open firmware. This fixes it by
> > using strcmp instead of strncmp to match what the actual platform
> > detection code does.
>
> Michael, I noticed you changed strcmp to strncmp, any reason why you
> did that ?
To be safe if we are returned a non terminated string. I'd not realised
the case you've mentioned.
How much we should trust firmware? With strcpy, should we explicitly
terminate the string first (I removed one of these originally)? Patch
below, compiled not run.
-
The OF trampoline code prom_init.c still needs to identify IBM pSeries
(PAPR) machines in order to run some platform specific code on them like
instantiating the TCE tables. The code doing that detection was changed
recently in 2.6.17 early stages but was done slightly incorrectly. It
should be testing for an exact match of "chrp" and it currently tests
for anything that begins with "chrp". That means it will incorrectly
match with platforms using Maple-like device-trees and have open
firmware. This fixes it by using strcmp instead of strncmp to match what
the actual platform detection code does.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Neuling <mikey@neuling.org>
---
arch/powerpc/kernel/prom_init.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletion(-)
Index: linux-2.6-powerpc/arch/powerpc/kernel/prom_init.c
===================================================================
--- linux-2.6-powerpc.orig/arch/powerpc/kernel/prom_init.c
+++ linux-2.6-powerpc/arch/powerpc/kernel/prom_init.c
@@ -1636,7 +1636,8 @@ static int __init prom_find_machine_type
compat, sizeof(compat)-1);
if (len <= 0)
return PLATFORM_GENERIC;
- if (strncmp(compat, RELOC("chrp"), 4))
+ compat[len] = 0;
+ if (strcmp(compat, RELOC("chrp")))
return PLATFORM_GENERIC;
/* Default to pSeries. We need to know if we are running LPAR */
^ permalink raw reply
* [dtc][PATCH] Fix ftdump data walking
From: Jimi Xenidis @ 2006-05-15 15:47 UTC (permalink / raw)
To: linuxppc-dev
This patch fixes a pointer addition bug in ftdump and handles endian
issue.
Signed-off-by: Jimi Xenidis <jimix@watson.ibm.com>
---
diff --git a/ftdump.c b/ftdump.c
index 4d4354b..7e643c8 100644
--- a/ftdump.c
+++ b/ftdump.c
@@ -67,7 +67,8 @@ static void print_data(const void *data,
} else if ((len % 4) == 0) {
printf(" = <");
for (i = 0; i < len; i += 4)
- printf("%08x%s", *((uint32_t *)data + i),
+ printf("%08x%s",
+ be32_to_cpu(*((uint32_t *)((ulong)data + i))),
i < (len - 4) ? " " : "");
printf(">");
} else {
^ permalink raw reply related
* [PATCH] Create /proc/rtas,/proc/ppc64/rtas if RTAS exists.
From: mostrows @ 2006-05-15 12:51 UTC (permalink / raw)
To: paulus, benh, linuxppc-dev
Use the existence of RTAS device tree node to determine if
/proc/rtas. /proc/ppc64/rtas are to be created. Using machine type
is not reliable (i.e. Maple-like machines may have RTAS).
--
Signed-off-by: Michal Ostrowski <mostrows@watson.ibm.com>
---
arch/powerpc/kernel/proc_ppc64.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
d8054b07bbf12e0178ec5ad7ce6a750a3af53064
diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c
index 3c2cf66..2b87f82 100644
--- a/arch/powerpc/kernel/proc_ppc64.c
+++ b/arch/powerpc/kernel/proc_ppc64.c
@@ -52,7 +52,7 @@ static int __init proc_ppc64_create(void
if (!root)
return 1;
- if (!machine_is(pseries) && !machine_is(cell))
+ if (!of_find_node_by_path("/rtas"))
return 0;
if (!proc_mkdir("rtas", root))
--
1.1.4.g0b63-dirty
^ permalink raw reply related
* Re: [PATCH 5/6] Have ia64 use add_active_range() and free_area_init_nodes
From: Mel Gorman @ 2006-05-15 12:27 UTC (permalink / raw)
To: Andrew Morton
Cc: davej, tony.luck, linuxppc-dev, ak, bob.picco, linux-kernel,
linux-mm
In-Reply-To: <20060514203158.216a966e.akpm@osdl.org>
On (14/05/06 20:31), Andrew Morton didst pronounce:
> Mel Gorman <mel@csn.ul.ie> wrote:
> >
> > Size zones and holes in an architecture independent manner for ia64.
> >
>
> This one makes my ia64 die very early in boot. The trace is pretty useless.
>
> config at http://www.zip.com.au/~akpm/linux/patches/stuff/config-ia64
>
> <log snipped>
Curses. When I tried to reproduce this, the machine booted with my default
config but died before initialising the console with your config. The machine
is far away so I can't see the screen or restart the machine remotely so
I can only assume it is dying for the same reasons yours did.
> Note the misaligned pfns.
>
> Andy's (misspelled) CONFIG_UNALIGNED_ZONE_BOUNDRIES patch didn't actually
> include an update to any Kconfig files. But hacking that in by hand didn't
> help.
It would not have helped in this case because the zone boundaries would still
be in the wrong place for ia64. Below is a patch that aligns the zones on
all architectures that use CONFIG_ARCH_POPULATES_NODE_MAP . That is currently
i386, x86_64, powerpc, ppc and ia64. It does *not* align pgdat->node_start_pfn
but I don't believe that it is necessary.
I can't test it on ia64 until I get someone to restart the machine. The patch
compiles and is currently boot-testing on a range of other machines. I hope
to know within 5-6 hours if everything is ok.
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc4-mm4-clean/mm/page_alloc.c linux-2.6.17-rc4-mm4-ia64_force_alignment/mm/page_alloc.c
--- linux-2.6.17-rc4-mm4-clean/mm/page_alloc.c 2006-05-15 10:37:55.000000000 +0100
+++ linux-2.6.17-rc4-mm4-ia64_force_alignment/mm/page_alloc.c 2006-05-15 13:10:42.000000000 +0100
@@ -2640,14 +2640,20 @@ void __init free_area_init_nodes(unsigne
{
unsigned long nid;
int zone_index;
+ unsigned long lowest_pfn = find_min_pfn_with_active_regions();
+
+ lowest_pfn = zone_boundary_align_pfn(lowest_pfn);
+ arch_max_dma_pfn = zone_boundary_align_pfn(arch_max_dma_pfn);
+ arch_max_dma32_pfn = zone_boundary_align_pfn(arch_max_dma32_pfn);
+ arch_max_low_pfn = zone_boundary_align_pfn(arch_max_low_pfn);
+ arch_max_high_pfn = zone_boundary_align_pfn(arch_max_high_pfn);
/* Record where the zone boundaries are */
memset(arch_zone_lowest_possible_pfn, 0,
sizeof(arch_zone_lowest_possible_pfn));
memset(arch_zone_highest_possible_pfn, 0,
sizeof(arch_zone_highest_possible_pfn));
- arch_zone_lowest_possible_pfn[ZONE_DMA] =
- find_min_pfn_with_active_regions();
+ arch_zone_lowest_possible_pfn[ZONE_DMA] = lowest_pfn;
arch_zone_highest_possible_pfn[ZONE_DMA] = arch_max_dma_pfn;
arch_zone_highest_possible_pfn[ZONE_DMA32] = arch_max_dma32_pfn;
arch_zone_highest_possible_pfn[ZONE_NORMAL] = arch_max_low_pfn;
^ permalink raw reply
* Re: windfarm for PM72/PM73/RM31
From: Étienne Bersac @ 2006-05-15 11:39 UTC (permalink / raw)
To: Robin H. Johnson; +Cc: linuxppc-dev
In-Reply-To: <20060515035151.GB25086@curie-int.vc.shawcable.net>
Ok,
Let me wish you a good recovering :)
Étienne.
^ permalink raw reply
* Re: [PATCH 5/6] Have ia64 use add_active_range() and free_area_init_nodes
From: Andy Whitcroft @ 2006-05-15 11:02 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: akpm, nickpiggin, tony.luck, davej, mel, linux-kernel, bob.picco,
ak, linuxppc-dev, linux-mm
In-Reply-To: <20060515192918.c3e2e895.kamezawa.hiroyu@jp.fujitsu.com>
KAMEZAWA Hiroyuki wrote:
> On Mon, 15 May 2006 11:19:27 +0100
> Andy Whitcroft <apw@shadowen.org> wrote:
>
>
>>Nick Piggin wrote:
>>
>>>Andy Whitcroft wrote:
>>>
>>>
>>>>Interesting. You are correct there was no config component, at the time
>>>>I didn't have direct evidence that any architecture needed it, only that
>>>>we had an unchecked requirement on zones, a requirement that had only
>>>>recently arrived with the changes to free buddy detection. I note that
>>>
>>>
>>>Recently arrived? Over a year ago with the no-buddy-bitmap patches,
>>>right? Just checking because I that's what I'm assuming broke it...
>>
>>Yep, sorry I forget I was out of the game for 6 months! And yes that
>>was when the requirements were altered.
>>
>
> When no-bitmap-buddy patches was included,
>
> 1. bad_range() is not covered by CONFIG_VM_DEBUG. It always worked.
> ==
> static int bad_range(struct zone *zone, struct page *page)
> {
> if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages)
> return 1;
> if (page_to_pfn(page) < zone->zone_start_pfn)
> return 1;
> ==
> And , this code
> ==
> buddy = __page_find_buddy(page, page_idx, order);
>
> if (bad_range(zone, buddy))
> break;
> ==
>
> checked whether buddy is in zone and guarantees it to have page struct.
>
>
> But clean-up/speed-up codes vanished these checks. (I don't know when this occurs)
> Sorry for misses these things.
Heh, sorry to make it sound like it was you who was responsible.
-apw
^ permalink raw reply
* booting linux on IBM750 evaluation board
From: sharath kumar @ 2006-05-15 10:41 UTC (permalink / raw)
To: linuxppc-embedded
HI
i am working on IBM750 evaluation baord from IBM. It
has Marvel chipset.
I only have pibs running on this board.
I want to boot linux on this baord.
can anyone tell the procedure
Thanks and Regards
__________________________________________________
Do You Yahoo!?
Tired of spam? Yahoo! Mail has the best spam protection around
http://mail.yahoo.com
^ permalink raw reply
* Re: [PATCH 5/6] Have ia64 use add_active_range() and free_area_init_nodes
From: KAMEZAWA Hiroyuki @ 2006-05-15 10:47 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: akpm, nickpiggin, tony.luck, davej, mel, linux-kernel, bob.picco,
ak, linuxppc-dev, linux-mm
In-Reply-To: <20060515192918.c3e2e895.kamezawa.hiroyu@jp.fujitsu.com>
On Mon, 15 May 2006 19:29:18 +0900
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:
> On Mon, 15 May 2006 11:19:27 +0100
> Andy Whitcroft <apw@shadowen.org> wrote:
>
> > Nick Piggin wrote:
> > > Andy Whitcroft wrote:
> > >
> > >> Interesting. You are correct there was no config component, at the time
> > >> I didn't have direct evidence that any architecture needed it, only that
> > >> we had an unchecked requirement on zones, a requirement that had only
> > >> recently arrived with the changes to free buddy detection. I note that
> > >
> > >
> > > Recently arrived? Over a year ago with the no-buddy-bitmap patches,
> > > right? Just checking because I that's what I'm assuming broke it...
> >
> > Yep, sorry I forget I was out of the game for 6 months! And yes that
> > was when the requirements were altered.
> >
> When no-bitmap-buddy patches was included,
>
> 1. bad_range() is not covered by CONFIG_VM_DEBUG. It always worked.
> ==
> static int bad_range(struct zone *zone, struct page *page)
> {
> if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages)
> return 1;
> if (page_to_pfn(page) < zone->zone_start_pfn)
> return 1;
> ==
> And , this code
> ==
> buddy = __page_find_buddy(page, page_idx, order);
>
> if (bad_range(zone, buddy))
> break;
> ==
>
> checked whether buddy is in zone and guarantees it to have page struct.
>
>
> But clean-up/speed-up codes vanished these checks. (I don't know when this occurs)
> Sorry for misses these things.
>
One more point
When above no-bitmap patches was included, the user of not-aligned zones
are only ia64, I think. Because ia64 used virtual mem_map, page_to_pfn(page)
on CONFIG_DISCONTIG_MEM doesn't access page struct itself.
#define page_to_pfn(page) (page - vmemmap)
So, it didn't panic. ia64/vmemmap was safe.
If other archs used not-aligned zone + CONFIG_DISCONTIGMEM,
not-aligned-zones problem would come out earlier.
-Kame
^ permalink raw reply
* Re: [PATCH 5/6] Have ia64 use add_active_range() and free_area_init_nodes
From: KAMEZAWA Hiroyuki @ 2006-05-15 10:29 UTC (permalink / raw)
To: Andy Whitcroft
Cc: akpm, nickpiggin, tony.luck, davej, mel, linux-kernel, bob.picco,
ak, linuxppc-dev, linux-mm
In-Reply-To: <446855AF.1090100@shadowen.org>
On Mon, 15 May 2006 11:19:27 +0100
Andy Whitcroft <apw@shadowen.org> wrote:
> Nick Piggin wrote:
> > Andy Whitcroft wrote:
> >
> >> Interesting. You are correct there was no config component, at the time
> >> I didn't have direct evidence that any architecture needed it, only that
> >> we had an unchecked requirement on zones, a requirement that had only
> >> recently arrived with the changes to free buddy detection. I note that
> >
> >
> > Recently arrived? Over a year ago with the no-buddy-bitmap patches,
> > right? Just checking because I that's what I'm assuming broke it...
>
> Yep, sorry I forget I was out of the game for 6 months! And yes that
> was when the requirements were altered.
>
When no-bitmap-buddy patches was included,
1. bad_range() is not covered by CONFIG_VM_DEBUG. It always worked.
==
static int bad_range(struct zone *zone, struct page *page)
{
if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages)
return 1;
if (page_to_pfn(page) < zone->zone_start_pfn)
return 1;
==
And , this code
==
buddy = __page_find_buddy(page, page_idx, order);
if (bad_range(zone, buddy))
break;
==
checked whether buddy is in zone and guarantees it to have page struct.
But clean-up/speed-up codes vanished these checks. (I don't know when this occurs)
Sorry for misses these things.
-Kame
^ permalink raw reply
* Re: [PATCH 5/6] Have ia64 use add_active_range() and free_area_init_nodes
From: Andy Whitcroft @ 2006-05-15 10:19 UTC (permalink / raw)
To: Nick Piggin
Cc: Andrew Morton, davej, tony.luck, linuxppc-dev, Mel Gorman,
linux-kernel, bob.picco, ak, linux-mm
In-Reply-To: <44685123.7040501@yahoo.com.au>
Nick Piggin wrote:
> Andy Whitcroft wrote:
>
>> Interesting. You are correct there was no config component, at the time
>> I didn't have direct evidence that any architecture needed it, only that
>> we had an unchecked requirement on zones, a requirement that had only
>> recently arrived with the changes to free buddy detection. I note that
>
>
> Recently arrived? Over a year ago with the no-buddy-bitmap patches,
> right? Just checking because I that's what I'm assuming broke it...
Yep, sorry I forget I was out of the game for 6 months! And yes that
was when the requirements were altered.
-apw
^ permalink raw reply
* Page fault when debugging a 2.6.16 kernel on MPC8248
From: Laurent Pinchart @ 2006-05-15 10:10 UTC (permalink / raw)
To: linuxppc-embedded
Hello everybody,
I'm trying to bring up Linux on a custom MPC8248 board.
I've been able to setup U-Boot correctly up to the point where I boot the
Linux kernel using bootm. The kernel doesn't print anything on the serial
console and hangs.
Thanks to a BDI2000, I've been able to get the following backtrace.
#0 cpm_uart_console_write (co=0xd,
s=0xc0203e4b "x version 2.6.16-dirty (laurent@pclaurent) (gcc version
3.4.1) #422 Thu May 11 13:30:44 CEST 2006\n<6>Technotrade S.A.",
count=1) at drivers/serial/cpm_uart/cpm_uart_core.c:1023
#1 0xc0014144 in __call_console_drivers (start=3, end=105) at
kernel/printk.c:368
#2 0xc00144f4 in release_console_sem () at kernel/printk.c:430
#3 0xc0014ce4 in register_console (console=0xc01c2e60) at kernel/printk.c:946
#4 0xc01f7308 in cpm_uart_console_init () at
drivers/serial/cpm_uart/cpm_uart_core.c:1149
#5 0xc01f673c in console_init () at drivers/char/tty_io.c:3181
#6 0xc01e85b8 in start_kernel () at init/main.c:495
#7 0x00000000 in ?? ()
At that point, all the memory read with the BDI2000 (internal memory, SDRAM,
SRAM, flash, ...) reads as 0.
Knowing that something was wrong with the memory, I tried to execute the
kernel step by step. Unfortunately, after breaking in start_kernel, the first
gdb "next" command to step over the call to printk(KERN_NOTICE) hung. Hitting
Ctrl-C in gdb returned the following backtrace.
#0 0xc0005f4c in __delay ()
#1 0xc0013b18 in panic (fmt=0x487ab0 <Address 0x487ab0 out of bounds>) at
include/asm/delay.h:42
#2 0xc001679c in do_exit (code=11) at kernel/exit.c:809
#3 0x2400c022 in ?? ()
#4 0xc0004b8c in die (str=0x3ff <Address 0x3ff out of bounds>, fp=0xc01e7c60,
err=11) at arch/ppc/kernel/traps.c:101
#5 0xc000bf3c in bad_page_fault (regs=0xc01e7c60, address=0, sig=11) at
arch/ppc/mm/fault.c:336
#6 0xc00046d0 in handle_page_fault ()
#7 0xc00046d0 in handle_page_fault ()
I'm lost here. Why does Linux page faults when executed step by step (or
rather using "next") but doesn't when running normally ? I suspect that this
could be related to the memory reading as 0 in cpm_uart_console_write.
Thanks in advance for all the help you can provide.
Best regards,
Laurent Pinchart
^ permalink raw reply
* Re: how to mount /dev/ram0 to /
From: tony @ 2006-05-15 9:45 UTC (permalink / raw)
To: linuxppc-embedded@ozlabs.org
SSByZWJ1aWxkIHRoZSBidXp5Ym94LGFuZCBhZGRlZCBzb21lIHRvb2xzKGxpa2UgZG9zMnVuaXgs
c3R0eSx1bml4MmRvcyxkYyxsb2Fka21wKSAsdGhlbiBpdCB3b3Jrcyhtb3VudCAvZGV2L3JhbTAg
YXV0b21hdGljYWxseSksbWF5YmUgaXQncyBzb21ldGhpbmcgYWJvdXQgdGhlIHRvb2xzIGFib3Zl
Lg0KDQp0aGFuayBXb2xmZ2FuZyBEZW5rIGFuZCBhbGwuLg0KDQqhoaGhoaGhoaGhoaGhoaGhdG9u
eQ0KoaGhoaGhoaGhoaGhoaGhoWhhbmd0b29AMTYzLmNvbQ0KoaGhoaGhoaGhoaGhoaGhoaGhoaEy
MDA2LTA1LTE1DQo=
^ permalink raw reply
* Re: [PATCH 5/6] Have ia64 use add_active_range() and free_area_init_nodes
From: Nick Piggin @ 2006-05-15 10:00 UTC (permalink / raw)
To: Andy Whitcroft
Cc: Andrew Morton, davej, tony.luck, linuxppc-dev, Mel Gorman,
linux-kernel, bob.picco, ak, linux-mm
In-Reply-To: <44683A09.2060404@shadowen.org>
Andy Whitcroft wrote:
> Interesting. You are correct there was no config component, at the time
> I didn't have direct evidence that any architecture needed it, only that
> we had an unchecked requirement on zones, a requirement that had only
> recently arrived with the changes to free buddy detection. I note that
Recently arrived? Over a year ago with the no-buddy-bitmap patches,
right? Just checking because I that's what I'm assuming broke it...
> MAX_ORDER is 17 for ia64 so that probabally accounts for the
> missalignment. It is clear that the reporting is slightly over-zelous
> as I am reporting zero-sized zones. I'll get that fixed and patch to
> you. I'll also have a look at the patch as added to -mm and try and get
> the rest of the spelling sorted :-/.
>
> I'll go see if we currently have a machine to test this config on.
--
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com
^ permalink raw reply
* Re: [PATCH] Fix pSeries identification in prom_init.c
From: segher @ 2006-05-15 9:51 UTC (permalink / raw)
To: benh; +Cc: linuxppc-dev, paulus
In-Reply-To: <1147671963.21291.91.camel@localhost.localdomain>
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>
> Paul, if you are happy with that, please send upsteam ASAP.
>
> Index: linux-work/arch/powerpc/kernel/prom_init.c
> =================================================================== ---
> linux-work.orig/arch/powerpc/kernel/prom_init.c 2006-05-02
> 10:51:33.000000000 +1000 +++
> linux-work/arch/powerpc/kernel/prom_init.c 2006-05-15 15:37:03.000000000
> +1000 @@ -1636,7 +1636,7 @@
> compat, sizeof(compat)-1);
> if (len <= 0)
> return PLATFORM_GENERIC;
> - if (strncmp(compat, RELOC("chrp"), 4))
> + if (strcmp(compat, RELOC("chrp")))
> return PLATFORM_GENERIC;
>
> /* Default to pSeries. We need to know if we are running LPAR */
Confirmed it works correctly on affected systems. Thanks Ben.
Signed-off-by: Segher Boessenkool <segher@kernel.crashing.org>
^ permalink raw reply
* Re: [PATCH] powerpc: Fix ide-pmac sysfs entry
From: Benjamin Herrenschmidt @ 2006-05-15 8:52 UTC (permalink / raw)
To: Gabriel Paubert
Cc: Andrew Morton, Pete Popov, B.Zolnierkiewicz, linuxppc-dev, paulus,
Alan Cox
In-Reply-To: <20060515083847.GA5229@iram.es>
> Actually I have one of these, and regularly use it and update
> the kernel to the latest git once or twice per month, but I
> typically have two batteries and have not swapped in the
> media bay under Linux for a long time.
>
> If you are interested, I could test the patch when I find
> some time (not today: my 8 year old son went to the hospital
> for an emergency last Thursday, he is cured now and should
> come out today).
Any test is welcome, glad to know your son is well !
Cheers,
Ben.
^ permalink raw reply
* Re: [PATCH] powerpc: Fix ide-pmac sysfs entry
From: Gabriel Paubert @ 2006-05-15 8:38 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: Andrew Morton, Pete Popov, B.Zolnierkiewicz, linuxppc-dev, paulus,
Alan Cox
In-Reply-To: <1147677830.21291.114.camel@localhost.localdomain>
On Mon, May 15, 2006 at 05:23:50PM +1000, Benjamin Herrenschmidt wrote:
> On Mon, 2006-05-15 at 00:08 -0700, Andrew Morton wrote:
> > Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> > >
> > > It looks like the generic ide code now wants ide_init_hwif_ports() to
> > > set the parent struct device into the ide_hw structure (new field ?).
> > > Without this, the mac ide code can cause the ide probing code to explode
> > > in flames in sysfs registration due to what looks like a stale pointer
> > > in there (happens when removing/re-inserting one of the hotswap media
> > > bays on some laptops).
> > >
> >
> > You don't sound very confident.
>
> Well, the problem is fixed with my change, I'd like to have Bart
> confirmation that's its the right approach. It looks like it is but that
> code is fairly intricated :)
>
> > afaict things went bad in October last year. hw_regs_t.dev was added here:
> >
> > http://www.kernel.org/git/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=26a940e21752e0de8f068f77dad606a7d1986937
> >
> > but that only converted one driver to initialse ->dev.
> >
> > How come things didn't break then? Did some later change cause the
> > problems?
>
> Well, for one, very few drivers cause ide_unregister nor rely on those
> old mecanism... it seems that the way I initialize things at boot works
> fine, it's when I unregister/re-register via the hotswap bay that things
> go bunk.
>
> It's possible that it got broken back then and not noticed since ... or
> another change actually made _use_ of that field... not sure, Bart might
> confirm. The Mac laptops with those hotswap bays are fairly old and I
> haven't tested for some time. I just did after getting user reports, and
> users of old machines like that tend to use stable distro kernels which
> tend to be fairly old...
Actually I have one of these, and regularly use it and update
the kernel to the latest git once or twice per month, but I
typically have two batteries and have not swapped in the
media bay under Linux for a long time.
If you are interested, I could test the patch when I find
some time (not today: my 8 year old son went to the hospital
for an emergency last Thursday, he is cured now and should
come out today).
Regards,
Gabriel
^ permalink raw reply
* Re: [PATCH 5/6] Have ia64 use add_active_range() and free_area_init_nodes
From: Andy Whitcroft @ 2006-05-15 8:21 UTC (permalink / raw)
To: Andrew Morton
Cc: davej, tony.luck, linuxppc-dev, Mel Gorman, linux-kernel,
bob.picco, ak, linux-mm
In-Reply-To: <20060514203158.216a966e.akpm@osdl.org>
Andrew Morton wrote:
> Mel Gorman <mel@csn.ul.ie> wrote:
>
>>Size zones and holes in an architecture independent manner for ia64.
>>
>
>
> This one makes my ia64 die very early in boot. The trace is pretty useless.
>
> config at http://www.zip.com.au/~akpm/linux/patches/stuff/config-ia64
>
> EFI v1.10 by INTEL: SALsystab=0x3fe4c8c0 ACPI=0x3ff84000 ACPI 2.0=0x3ff83000 MP0
> Early serial console at I/O port 0x2f8 (options '9600n8')
> SAL 3.1: Intel Corp SR870BN4 vers0
> SAL Platform features: BusLock IRQ_Redirection
> SAL: AP wakeup using external interrupt vector 0xf0
> No logical to physical processor mapping available
> iosapic_system_init: Disabling PC-AT compatible 8259 interrupts
> ACPI: Local APIC address c0000000fee00000
> PLATFORM int CPEI (0x3): GSI 22 (level, low) -> CPU 0 (0xc618) vector 30
> register_intr: changing vector 39 from IO-SAPIC-edge to IO-SAPIC-level
> 4 CPUs available, 4 CPUs total
> MCA related initialization done
> node 0 zone DMA missaligned start pfn, enable UNALIGNED_ZONE_BOUNDRIES
> node 0 zone DMA32 missaligned start pfn, enable UNALIGNED_ZONE_BOUNDRIES
> node 0 zone Normal missaligned start pfn, enable UNALIGNED_ZONE_BOUNDRIES
> node 0 zone HighMem missaligned start pfn, enable UNALIGNED_ZONE_BOUNDRIES
> SMP: Allowing 4 CPUs, 0 hotplug CPUs
> Built 1 zonelists
> Kernel command line: BOOT_IMAGE=scsi0:\EFI\redhat\vmlinuz-2.6.17-rc4-mm1 root=/o
> PID hash table entries: 4096 (order: 12, 32768 bytes)
> Console: colour VGA+ 80x25
> Dentry cache hash table entries: 131072 (order: 6, 1048576 bytes)
> Inode-cache hash table entries: 65536 (order: 5, 524288 bytes)
> Placing software IO TLB between 0x4a30000 - 0x8a30000
> Unable to handle kernel NULL pointer dereference (address 0000000000000008)
> swapper[0]: Oops 8813272891392 [1]
> Modules linked in:
>
> Pid: 0, CPU 0, comm: swapper
> psr : 00001010084a6010 ifs : 800000000000060f ip : [<a0000001000e6750>] Notd
> ip is at __free_pages_ok+0x190/0x3c0
> unat: 0000000000000000 pfs : 000000000000060f rsc : 0000000000000003
> rnat: 0000000000ffffff bsps: 00000000000002f9 pr : 80000000afb5956b
> ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c8a70433f
> csd : 0930ffff00090000 ssd : 0930ffff00090000
> b0 : a0000001000e6660 b6 : e00000003fe52940 b7 : a000000100790120
> f6 : 1003e6db6db6db6db6db7 f7 : 1003e000000000006dec0
> f8 : 1003e000000000000fb80 f9 : 1003e000000000006e080
> f10 : 1003e000000000000fb40 f11 : 1003e000000000006dec0
> r1 : a000000100af2db0 r2 : 0000000000000001 r3 : 0000000000000000
> r8 : a0000001008f3d38 r9 : 0000000000004000 r10 : 0000000000370400
> r11 : 0000000000004000 r12 : a0000001007b7e10 r13 : a0000001007b0000
> r14 : 0000000000000001 r15 : 0000000100000001 r16 : 0000000100000001
> r17 : 0000000100000001 r18 : 0000000000001041 r19 : 0000000000000000
> r20 : e00000000149df00 r21 : 0000000100000000 r22 : 0000000055555155
> r23 : 00000000ffffffff r24 : e00000000149df08 r25 : 1555555555555155
> r26 : 0000000000000032 r27 : 0000000000000000 r28 : 0000000000000008
> r29 : 0000000000001041 r30 : 0000000000001041 r31 : 0000000000000001
> Unable to handle kernel NULL pointer dereference (address 0000000000000000)
> swapper[0]: Oops 8813272891392 [2]
> Modules linked in:
>
> Pid: 0, CPU 0, comm: swapper
> psr : 0000101008022018 ifs : 8000000000000287 ip : [<a0000001001236c0>] Notd
> ip is at kmem_cache_alloc+0x40/0x100
> unat: 0000000000000000 pfs : 0000000000000712 rsc : 0000000000000003
> rnat: 0000000000000000 bsps: 0000000000000000 pr : 80000000afb59967
> ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c8a70033f
> csd : 0930ffff00090000 ssd : 0930ffff00090000
> b0 : a00000010003e450 b6 : a000000100001b50 b7 : a00000010003f320
> f6 : 1003e9e3779b97f4a7c16 f7 : 0ffdb8000000000000000
> f8 : 1003e000000000000007f f9 : 1003e0000000000000379
> f10 : 1003e6db6db6db6db6db7 f11 : 1003e000000000000007f
> r1 : a000000100af2db0 r2 : 0000000000000000 r3 : 0000000000000000
> r8 : 0000000000000000 r9 : 0000000000000000 r10 : a0000001007b0f24
> r11 : 0000000000000000 r12 : a0000001007b7280 r13 : a0000001007b0000
> r14 : 0000000000000000 r15 : 0000000000000000 r16 : a0000001007b7310
> r17 : 0000000000000000 r18 : a0000001007b7478 r19 : 0000000000000000
> r20 : 0000000000000000 r21 : 0000000000000018 r22 : 0000000000000000
> r23 : 0000000000000000 r24 : 0000000000000000 r25 : a0000001007b7308
> r26 : 000000007fffffff r27 : a000000100825520 r28 : a0000001008f3c40
> r29 : a000000100816ca8 r30 : 0000000000000018 r31 : 0000000000000018
>
>
>
> (gdb) l *0xa0000001000e6750
> 0xa0000001000e6750 is in __free_pages_ok (mm.h:324).
> 319 extern void FASTCALL(__page_cache_release(struct page *));
> 320
> 321 static inline int page_count(struct page *page)
> 322 {
> 323 if (unlikely(PageCompound(page)))
> 324 page = (struct page *)page_private(page);
> 325 return atomic_read(&page->_count);
> 326 }
> 327
> 328 static inline void get_page(struct page *page)
>
>
> Note the misaligned pfns.
>
> Andy's (misspelled) CONFIG_UNALIGNED_ZONE_BOUNDRIES patch didn't actually
> include an update to any Kconfig files. But hacking that in by hand didn't
> help.
Interesting. You are correct there was no config component, at the time
I didn't have direct evidence that any architecture needed it, only that
we had an unchecked requirement on zones, a requirement that had only
recently arrived with the changes to free buddy detection. I note that
MAX_ORDER is 17 for ia64 so that probabally accounts for the
missalignment. It is clear that the reporting is slightly over-zelous
as I am reporting zero-sized zones. I'll get that fixed and patch to
you. I'll also have a look at the patch as added to -mm and try and get
the rest of the spelling sorted :-/.
I'll go see if we currently have a machine to test this config on.
-apw
^ permalink raw reply
* Re: [PATCH] powerpc: Fix ide-pmac sysfs entry
From: Benjamin Herrenschmidt @ 2006-05-15 7:23 UTC (permalink / raw)
To: Andrew Morton
Cc: B.Zolnierkiewicz, linuxppc-dev, paulus, Pete Popov, Alan Cox
In-Reply-To: <20060515000810.487b834e.akpm@osdl.org>
On Mon, 2006-05-15 at 00:08 -0700, Andrew Morton wrote:
> Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> >
> > It looks like the generic ide code now wants ide_init_hwif_ports() to
> > set the parent struct device into the ide_hw structure (new field ?).
> > Without this, the mac ide code can cause the ide probing code to explode
> > in flames in sysfs registration due to what looks like a stale pointer
> > in there (happens when removing/re-inserting one of the hotswap media
> > bays on some laptops).
> >
>
> You don't sound very confident.
Well, the problem is fixed with my change, I'd like to have Bart
confirmation that's its the right approach. It looks like it is but that
code is fairly intricated :)
> afaict things went bad in October last year. hw_regs_t.dev was added here:
>
> http://www.kernel.org/git/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=26a940e21752e0de8f068f77dad606a7d1986937
>
> but that only converted one driver to initialse ->dev.
>
> How come things didn't break then? Did some later change cause the
> problems?
Well, for one, very few drivers cause ide_unregister nor rely on those
old mecanism... it seems that the way I initialize things at boot works
fine, it's when I unregister/re-register via the hotswap bay that things
go bunk.
It's possible that it got broken back then and not noticed since ... or
another change actually made _use_ of that field... not sure, Bart might
confirm. The Mac laptops with those hotswap bays are fairly old and I
haven't tested for some time. I just did after getting user reports, and
users of old machines like that tend to use stable distro kernels which
tend to be fairly old...
Ben.
^ permalink raw reply
* Re: [PATCH] powerpc: Fix ide-pmac sysfs entry
From: Andrew Morton @ 2006-05-15 7:08 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: B.Zolnierkiewicz, linuxppc-dev, paulus, Pete Popov, Alan Cox
In-Reply-To: <1147676318.21291.105.camel@localhost.localdomain>
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
>
> It looks like the generic ide code now wants ide_init_hwif_ports() to
> set the parent struct device into the ide_hw structure (new field ?).
> Without this, the mac ide code can cause the ide probing code to explode
> in flames in sysfs registration due to what looks like a stale pointer
> in there (happens when removing/re-inserting one of the hotswap media
> bays on some laptops).
>
You don't sound very confident.
> ---
>
> Andew: That's a 2.6.17 candidate
>
> Index: linux-work/drivers/ide/ppc/pmac.c
> ===================================================================
> --- linux-work.orig/drivers/ide/ppc/pmac.c 2006-04-19 15:04:47.000000000 +1000
> +++ linux-work/drivers/ide/ppc/pmac.c 2006-05-15 16:43:20.000000000 +1000
> @@ -553,6 +553,8 @@
>
> if (irq != NULL)
> *irq = pmac_ide[ix].irq;
> +
> + hw->dev = &pmac_ide[ix].mdev->ofdev.dev;
> }
>
> #define PMAC_IDE_REG(x) ((void __iomem *)(IDE_DATA_REG+(x)))
>
afaict things went bad in October last year. hw_regs_t.dev was added here:
http://www.kernel.org/git/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=26a940e21752e0de8f068f77dad606a7d1986937
but that only converted one driver to initialse ->dev.
How come things didn't break then? Did some later change cause the
problems?
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox