* [PATCH v3 05/37] s390/mm: provide memory management functions for protected KVM guests
From: Christian Borntraeger @ 2020-02-20 10:39 UTC (permalink / raw)
To: Christian Borntraeger, Janosch Frank
Cc: KVM, Cornelia Huck, David Hildenbrand, Thomas Huth,
Ulrich Weigand, Claudio Imbrenda, linux-s390, Michael Mueller,
Vasily Gorbik, Andrea Arcangeli, linux-mm
In-Reply-To: <20200220104020.5343-1-borntraeger@de.ibm.com>
From: Claudio Imbrenda <imbrenda@linux.ibm.com>
This provides the basic ultravisor calls and page table handling to cope
with secure guests:
- provide arch_make_page_accessible
- make pages accessible after unmapping of secure guests
- provide the ultravisor commands convert to/from secure
- provide the ultravisor commands pin/unpin shared
- provide callbacks to make pages secure (inacccessible)
- we check for the expected pin count to only make pages secure if the
host is not accessing them
- we fence hugetlbfs for secure pages
Co-developed-by: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
Signed-off-by: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
[borntraeger@de.ibm.com: patch merging, splitting, fixing]
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
arch/s390/include/asm/gmap.h | 4 +
arch/s390/include/asm/mmu.h | 2 +
arch/s390/include/asm/mmu_context.h | 1 +
arch/s390/include/asm/page.h | 5 +
arch/s390/include/asm/pgtable.h | 35 ++++-
arch/s390/include/asm/uv.h | 31 ++++
arch/s390/kernel/uv.c | 227 ++++++++++++++++++++++++++++
7 files changed, 300 insertions(+), 5 deletions(-)
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 37f96b6f0e61..3c4926aa78f4 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -9,6 +9,7 @@
#ifndef _ASM_S390_GMAP_H
#define _ASM_S390_GMAP_H
+#include <linux/radix-tree.h>
#include <linux/refcount.h>
/* Generic bits for GMAP notification on DAT table entry changes. */
@@ -31,6 +32,7 @@
* @table: pointer to the page directory
* @asce: address space control element for gmap page table
* @pfault_enabled: defines if pfaults are applicable for the guest
+ * @guest_handle: protected virtual machine handle for the ultravisor
* @host_to_rmap: radix tree with gmap_rmap lists
* @children: list of shadow gmap structures
* @pt_list: list of all page tables used in the shadow guest address space
@@ -54,6 +56,8 @@ struct gmap {
unsigned long asce_end;
void *private;
bool pfault_enabled;
+ /* only set for protected virtual machines */
+ unsigned long guest_handle;
/* Additional data for shadow guest address spaces */
struct radix_tree_root host_to_rmap;
struct list_head children;
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index bcfb6371086f..e21b618ad432 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -16,6 +16,8 @@ typedef struct {
unsigned long asce;
unsigned long asce_limit;
unsigned long vdso_base;
+ /* The mmu context belongs to a secure guest. */
+ atomic_t is_protected;
/*
* The following bitfields need a down_write on the mm
* semaphore when they are written to. As they are only
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 8d04e6f3f796..afa836014076 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -23,6 +23,7 @@ static inline int init_new_context(struct task_struct *tsk,
INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.flush_count, 0);
+ atomic_set(&mm->context.is_protected, 0);
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
mm->context.compat_mm = test_thread_flag(TIF_31BIT);
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 85e944f04c70..4ebcf891ff3c 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -153,6 +153,11 @@ static inline int devmem_is_allowed(unsigned long pfn)
#define HAVE_ARCH_FREE_PAGE
#define HAVE_ARCH_ALLOC_PAGE
+#if IS_ENABLED(CONFIG_PGSTE)
+int arch_make_page_accessible(struct page *page);
+#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+#endif
+
#endif /* !__ASSEMBLY__ */
#define __PAGE_OFFSET 0x0UL
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 137a3920ca36..cc7a1adacb94 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -19,6 +19,7 @@
#include <linux/atomic.h>
#include <asm/bug.h>
#include <asm/page.h>
+#include <asm/uv.h>
extern pgd_t swapper_pg_dir[];
extern void paging_init(void);
@@ -520,6 +521,15 @@ static inline int mm_has_pgste(struct mm_struct *mm)
return 0;
}
+static inline int mm_is_protected(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (unlikely(atomic_read(&mm->context.is_protected)))
+ return 1;
+#endif
+ return 0;
+}
+
static inline int mm_alloc_pgste(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
@@ -1061,7 +1071,12 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_t res;
+
+ res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -1073,7 +1088,12 @@ void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_t res;
+
+ res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(vma->vm_mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
/*
@@ -1088,12 +1108,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep, int full)
{
+ pte_t res;
+
if (full) {
- pte_t pte = *ptep;
+ res = *ptep;
*ptep = __pte(_PAGE_INVALID);
- return pte;
+ } else {
+ res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
- return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index cad643b05d19..7956868340c1 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -15,6 +15,7 @@
#include <linux/errno.h>
#include <linux/bug.h>
#include <asm/page.h>
+#include <asm/gmap.h>
#define UVC_RC_EXECUTED 0x0001
#define UVC_RC_INV_CMD 0x0002
@@ -24,6 +25,10 @@
#define UVC_CMD_QUI 0x0001
#define UVC_CMD_INIT_UV 0x000f
+#define UVC_CMD_CONV_TO_SEC_STOR 0x0200
+#define UVC_CMD_CONV_FROM_SEC_STOR 0x0201
+#define UVC_CMD_PIN_PAGE_SHARED 0x0341
+#define UVC_CMD_UNPIN_PAGE_SHARED 0x0342
#define UVC_CMD_SET_SHARED_ACCESS 0x1000
#define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001
@@ -31,8 +36,12 @@
enum uv_cmds_inst {
BIT_UVC_CMD_QUI = 0,
BIT_UVC_CMD_INIT_UV = 1,
+ BIT_UVC_CMD_CONV_TO_SEC_STOR = 6,
+ BIT_UVC_CMD_CONV_FROM_SEC_STOR = 7,
BIT_UVC_CMD_SET_SHARED_ACCESS = 8,
BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9,
+ BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
+ BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
};
struct uv_cb_header {
@@ -69,6 +78,19 @@ struct uv_cb_init {
u64 reserved28[4];
} __packed __aligned(8);
+struct uv_cb_cts {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 gaddr;
+} __packed __aligned(8);
+
+struct uv_cb_cfs {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 paddr;
+} __packed __aligned(8);
+
struct uv_cb_share {
struct uv_cb_header header;
u64 reserved08[3];
@@ -171,12 +193,21 @@ static inline int is_prot_virt_host(void)
return prot_virt_host;
}
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
+int uv_convert_from_secure(unsigned long paddr);
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+
void setup_uv(void);
void adjust_to_uv_max(unsigned long *vmax);
#else
#define is_prot_virt_host() 0
static inline void setup_uv(void) {}
static inline void adjust_to_uv_max(unsigned long *vmax) {}
+
+static inline int uv_convert_from_secure(unsigned long paddr)
+{
+ return 0;
+}
#endif
#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || \
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 1ddc42154ef6..4539003dac9d 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -12,6 +12,8 @@
#include <linux/sizes.h>
#include <linux/bitmap.h>
#include <linux/memblock.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
#include <asm/facility.h>
#include <asm/sections.h>
#include <asm/uv.h>
@@ -97,4 +99,229 @@ void adjust_to_uv_max(unsigned long *vmax)
{
*vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
}
+
+/*
+ * Requests the Ultravisor to pin the page in the shared state. This will
+ * cause an intercept when the guest attempts to unshare the pinned page.
+ */
+static int uv_pin_shared(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_PIN_PAGE_SHARED,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr,
+ };
+
+ if (uv_call(0, (u64)&uvcb))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Requests the Ultravisor to encrypt a guest page and make it
+ * accessible to the host for paging (export).
+ *
+ * @paddr: Absolute host address of page to be exported
+ */
+int uv_convert_from_secure(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr
+ };
+
+ if (uv_call(0, (u64)&uvcb))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Calculate the expected ref_count for a page that would otherwise have no
+ * further pins. This was cribbed from similar functions in other places in
+ * the kernel, but with some slight modifications. We know that a secure
+ * page can not be a huge page for example.
+ */
+static int expected_page_refs(struct page *page)
+{
+ int res;
+
+ res = page_mapcount(page);
+ if (PageSwapCache(page)) {
+ res++;
+ } else if (page_mapping(page)) {
+ res++;
+ if (page_has_private(page))
+ res++;
+ }
+ return res;
+}
+
+static int make_secure_pte(pte_t *ptep, unsigned long addr,
+ struct page *exp_page, struct uv_cb_header *uvcb)
+{
+ pte_t entry = READ_ONCE(*ptep);
+ struct page *page;
+ int expected, rc = 0;
+
+ if (!pte_present(entry))
+ return -ENXIO;
+ if (pte_val(entry) & _PAGE_INVALID)
+ return -ENXIO;
+
+ page = pte_page(entry);
+ if (page != exp_page)
+ return -ENXIO;
+ if (PageWriteback(page))
+ return -EAGAIN;
+ expected = expected_page_refs(page);
+ if (!page_ref_freeze(page, expected))
+ return -EBUSY;
+ set_bit(PG_arch_1, &page->flags);
+ rc = uv_call(0, (u64)uvcb);
+ page_ref_unfreeze(page, expected);
+ /* Return -ENXIO if the page was not mapped, -EINVAL otherwise */
+ if (rc)
+ rc = uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
+ return rc;
+}
+
+/*
+ * Requests the Ultravisor to make a page accessible to a guest.
+ * If it's brought in the first time, it will be cleared. If
+ * it has been exported before, it will be decrypted and integrity
+ * checked.
+ */
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+{
+ struct vm_area_struct *vma;
+ bool local_drain = false;
+ spinlock_t *ptelock;
+ unsigned long uaddr;
+ struct page *page;
+ pte_t *ptep;
+ int rc;
+
+again:
+ rc = -EFAULT;
+ down_read(&gmap->mm->mmap_sem);
+
+ uaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(uaddr))
+ goto out;
+ vma = find_vma(gmap->mm, uaddr);
+ if (!vma)
+ goto out;
+ /*
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode. If
+ * userspace is playing dirty tricky with mapping huge pages later
+ * on this will result in a segmentation fault.
+ */
+ if (is_vm_hugetlb_page(vma))
+ goto out;
+
+ rc = -ENXIO;
+ page = follow_page(vma, uaddr, FOLL_WRITE);
+ if (IS_ERR_OR_NULL(page))
+ goto out;
+
+ lock_page(page);
+ ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
+ rc = make_secure_pte(ptep, uaddr, page, uvcb);
+ pte_unmap_unlock(ptep, ptelock);
+ unlock_page(page);
+out:
+ up_read(&gmap->mm->mmap_sem);
+
+ if (rc == -EAGAIN) {
+ wait_on_page_writeback(page);
+ } else if (rc == -EBUSY) {
+ /*
+ * If we have tried a local drain and the page refcount
+ * still does not match our expected safe value, try with a
+ * system wide drain. This is needed if the pagevecs holding
+ * the page are on a different CPU.
+ */
+ if (local_drain) {
+ lru_add_drain_all();
+ /* We give up here, and let the caller try again */
+ return -EAGAIN;
+ }
+ /*
+ * We are here if the page refcount does not match the
+ * expected safe value. The main culprits are usually
+ * pagevecs. With lru_add_drain() we drain the pagevecs
+ * on the local CPU so that hopefully the refcount will
+ * reach the expected safe value.
+ */
+ lru_add_drain();
+ local_drain = true;
+ /* And now we try again immediately after draining */
+ goto again;
+ } else if (rc == -ENXIO) {
+ if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
+ return -EFAULT;
+ return -EAGAIN;
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_make_secure);
+
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
+{
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .guest_handle = gmap->guest_handle,
+ .gaddr = gaddr,
+ };
+
+ return gmap_make_secure(gmap, gaddr, &uvcb);
+}
+EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
+
+/*
+ * To be called with the page locked or with an extra reference! This will
+ * prevent gmap_make_secure from touching the page concurrently. Having 2
+ * parallel make_page_accessible is fine, as the UV calls will become a
+ * no-op if the page is already exported.
+ */
+int arch_make_page_accessible(struct page *page)
+{
+ int rc = 0;
+
+ /* Hugepage cannot be protected, so nothing to do */
+ if (PageHuge(page))
+ return 0;
+
+ /*
+ * PG_arch_1 is used in 3 places:
+ * 1. for kernel page tables during early boot
+ * 2. for storage keys of huge pages and KVM
+ * 3. As an indication that this page might be secure. This can
+ * overindicate, e.g. we set the bit before calling
+ * convert_to_secure.
+ * As secure pages are never huge, all 3 variants can co-exists.
+ */
+ if (!test_bit(PG_arch_1, &page->flags))
+ return 0;
+
+ rc = uv_pin_shared(page_to_phys(page));
+ if (!rc) {
+ clear_bit(PG_arch_1, &page->flags);
+ return 0;
+ }
+
+ rc = uv_convert_from_secure(page_to_phys(page));
+ if (!rc) {
+ clear_bit(PG_arch_1, &page->flags);
+ return 0;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(arch_make_page_accessible);
+
#endif
--
2.25.0
^ permalink raw reply related
* [PATCH v3 01/37] mm:gup/writeback: add callbacks for inaccessible pages
From: Christian Borntraeger @ 2020-02-20 10:39 UTC (permalink / raw)
To: Christian Borntraeger, Janosch Frank, Andrew Morton
Cc: KVM, Cornelia Huck, David Hildenbrand, Thomas Huth,
Ulrich Weigand, Claudio Imbrenda, linux-s390, Michael Mueller,
Vasily Gorbik, Andrea Arcangeli, linux-mm, Will Deacon
In-Reply-To: <20200220104020.5343-1-borntraeger@de.ibm.com>
From: Claudio Imbrenda <imbrenda@linux.ibm.com>
With the introduction of protected KVM guests on s390 there is now a
concept of inaccessible pages. These pages need to be made accessible
before the host can access them.
While cpu accesses will trigger a fault that can be resolved, I/O
accesses will just fail. We need to add a callback into architecture
code for places that will do I/O, namely when writeback is started or
when a page reference is taken.
This is not only to enable paging, file backing etc, it is also
necessary to protect the host against a malicious user space. For
example a bad QEMU could simply start direct I/O on such protected
memory. We do not want userspace to be able to trigger I/O errors and
thus we the logic is "whenever somebody accesses that page (gup) or
doing I/O, make sure that this page can be accessed. When the guest
tries to access that page we will wait in the page fault handler for
writeback to have finished and for the page_ref to be the expected
value.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
include/linux/gfp.h | 6 ++++++
mm/gup.c | 15 ++++++++++++---
mm/page-writeback.c | 5 +++++
3 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index e5b817cb86e7..be2754841369 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -485,6 +485,12 @@ static inline void arch_free_page(struct page *page, int order) { }
#ifndef HAVE_ARCH_ALLOC_PAGE
static inline void arch_alloc_page(struct page *page, int order) { }
#endif
+#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+static inline int arch_make_page_accessible(struct page *page)
+{
+ return 0;
+}
+#endif
struct page *
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
diff --git a/mm/gup.c b/mm/gup.c
index 1b521e0ac1de..354bcfbd844b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -193,6 +193,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
struct page *page;
spinlock_t *ptl;
pte_t *ptep, pte;
+ int ret;
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
@@ -250,8 +251,6 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
if (is_zero_pfn(pte_pfn(pte))) {
page = pte_page(pte);
} else {
- int ret;
-
ret = follow_pfn_pte(vma, address, ptep, flags);
page = ERR_PTR(ret);
goto out;
@@ -259,7 +258,6 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
}
if (flags & FOLL_SPLIT && PageTransCompound(page)) {
- int ret;
get_page(page);
pte_unmap_unlock(ptep, ptl);
lock_page(page);
@@ -276,6 +274,12 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
page = ERR_PTR(-ENOMEM);
goto out;
}
+ ret = arch_make_page_accessible(page);
+ if (ret) {
+ put_page(page);
+ page = ERR_PTR(ret);
+ goto out;
+ }
}
if (flags & FOLL_TOUCH) {
if ((flags & FOLL_WRITE) &&
@@ -1919,6 +1923,11 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
VM_BUG_ON_PAGE(compound_head(page) != head, page);
+ ret = arch_make_page_accessible(page);
+ if (ret) {
+ put_page(head);
+ goto pte_unmap;
+ }
SetPageReferenced(page);
pages[*nr] = page;
(*nr)++;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2caf780a42e7..558d7063c117 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2807,6 +2807,11 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
}
unlock_page_memcg(page);
+ /*
+ * If writeback has been triggered on a page that cannot be made
+ * accessible, it is too late.
+ */
+ WARN_ON(arch_make_page_accessible(page));
return ret;
}
--
2.25.0
^ permalink raw reply related
* [PATCH v3 06/37] s390/mm: add (non)secure page access exceptions handlers
From: Christian Borntraeger @ 2020-02-20 10:39 UTC (permalink / raw)
To: Christian Borntraeger, Janosch Frank
Cc: KVM, Cornelia Huck, David Hildenbrand, Thomas Huth,
Ulrich Weigand, Claudio Imbrenda, linux-s390, Michael Mueller,
Vasily Gorbik, Andrea Arcangeli, linux-mm, Janosch Frank
In-Reply-To: <20200220104020.5343-1-borntraeger@de.ibm.com>
From: Vasily Gorbik <gor@linux.ibm.com>
Add exceptions handlers performing transparent transition of non-secure
pages to secure (import) upon guest access and secure pages to
non-secure (export) upon hypervisor access.
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
[frankja@linux.ibm.com: adding checks for failures]
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
[imbrenda@linux.ibm.com: adding a check for gmap fault]
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
[borntraeger@de.ibm.com: patch merging, splitting, fixing]
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
arch/s390/kernel/pgm_check.S | 4 +-
arch/s390/mm/fault.c | 78 ++++++++++++++++++++++++++++++++++++
2 files changed, 80 insertions(+), 2 deletions(-)
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index eee3a482195a..2c27907a5ffc 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -78,8 +78,8 @@ PGM_CHECK(do_dat_exception) /* 39 */
PGM_CHECK(do_dat_exception) /* 3a */
PGM_CHECK(do_dat_exception) /* 3b */
PGM_CHECK_DEFAULT /* 3c */
-PGM_CHECK_DEFAULT /* 3d */
-PGM_CHECK_DEFAULT /* 3e */
+PGM_CHECK(do_secure_storage_access) /* 3d */
+PGM_CHECK(do_non_secure_storage_access) /* 3e */
PGM_CHECK_DEFAULT /* 3f */
PGM_CHECK(monitor_event_exception) /* 40 */
PGM_CHECK_DEFAULT /* 41 */
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 7b0bb475c166..7bd86ebc882f 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -38,6 +38,7 @@
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/facility.h>
+#include <asm/uv.h>
#include "../kernel/entry.h"
#define __FAIL_ADDR_MASK -4096L
@@ -816,3 +817,80 @@ static int __init pfault_irq_init(void)
early_initcall(pfault_irq_init);
#endif /* CONFIG_PFAULT */
+
+#if IS_ENABLED(CONFIG_PGSTE)
+void do_secure_storage_access(struct pt_regs *regs)
+{
+ unsigned long addr = regs->int_parm_long & __FAIL_ADDR_MASK;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ struct page *page;
+ int rc;
+
+ switch (get_fault_type(regs)) {
+ case USER_FAULT:
+ mm = current->mm;
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, addr);
+ if (!vma) {
+ up_read(&mm->mmap_sem);
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ break;
+ }
+ page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
+ if (IS_ERR_OR_NULL(page)) {
+ up_read(&mm->mmap_sem);
+ break;
+ }
+ if (arch_make_page_accessible(page))
+ send_sig(SIGSEGV, current, 0);
+ put_page(page);
+ up_read(&mm->mmap_sem);
+ break;
+ case KERNEL_FAULT:
+ page = phys_to_page(addr);
+ if (unlikely(!try_get_page(page)))
+ break;
+ rc = arch_make_page_accessible(page);
+ put_page(page);
+ if (rc)
+ BUG();
+ break;
+ case VDSO_FAULT:
+ /* fallthrough */
+ case GMAP_FAULT:
+ /* fallthrough */
+ default:
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ WARN_ON_ONCE(1);
+ }
+}
+NOKPROBE_SYMBOL(do_secure_storage_access);
+
+void do_non_secure_storage_access(struct pt_regs *regs)
+{
+ unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
+ struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+
+ if (get_fault_type(regs) != GMAP_FAULT) {
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL)
+ send_sig(SIGSEGV, current, 0);
+}
+NOKPROBE_SYMBOL(do_non_secure_storage_access);
+
+#else
+void do_secure_storage_access(struct pt_regs *regs)
+{
+ default_trap_handler(regs);
+}
+
+void do_non_secure_storage_access(struct pt_regs *regs)
+{
+ default_trap_handler(regs);
+}
+#endif
--
2.25.0
^ permalink raw reply related
* [PATCH v3 37/37] KVM: s390: protvirt: introduce and enable KVM_CAP_S390_PROTECTED
From: Christian Borntraeger @ 2020-02-20 10:40 UTC (permalink / raw)
To: Christian Borntraeger, Janosch Frank
Cc: KVM, Cornelia Huck, David Hildenbrand, Thomas Huth,
Ulrich Weigand, Claudio Imbrenda, linux-s390, Michael Mueller,
Vasily Gorbik
In-Reply-To: <20200220104020.5343-1-borntraeger@de.ibm.com>
Now that everything is in place, we can announce the feature.
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
arch/s390/kvm/kvm-s390.c | 3 +++
include/uapi/linux/kvm.h | 1 +
2 files changed, 4 insertions(+)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d79ccd34b5cb..e5f823840c29 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -574,6 +574,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_BPB:
r = test_facility(82);
break;
+ case KVM_CAP_S390_PROTECTED:
+ r = is_prot_virt_host();
+ break;
default:
r = 0;
}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 60efbbc86209..2551de8bec57 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1015,6 +1015,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_ARM_NISV_TO_USER 177
#define KVM_CAP_ARM_INJECT_EXT_DABT 178
#define KVM_CAP_S390_VCPU_RESETS 179
+#define KVM_CAP_S390_PROTECTED 180
#ifdef KVM_CAP_IRQ_ROUTING
--
2.25.0
^ permalink raw reply related
* [PATCH v3 34/37] DOCUMENTATION: Protected virtual machine introduction and IPL
From: Christian Borntraeger @ 2020-02-20 10:40 UTC (permalink / raw)
To: Christian Borntraeger, Janosch Frank
Cc: KVM, Cornelia Huck, David Hildenbrand, Thomas Huth,
Ulrich Weigand, Claudio Imbrenda, linux-s390, Michael Mueller,
Vasily Gorbik, Janosch Frank
In-Reply-To: <20200220104020.5343-1-borntraeger@de.ibm.com>
From: Janosch Frank <frankja@linux.ibm.com>
Add documentation about protected KVM guests and description of changes
that are necessary to move a KVM VM into Protected Virtualization mode.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
[borntraeger@de.ibm.com: fixing and conversion to rst]
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
Documentation/virt/kvm/index.rst | 2 +
Documentation/virt/kvm/s390-pv-boot.rst | 83 +++++++++++++++++
Documentation/virt/kvm/s390-pv.rst | 116 ++++++++++++++++++++++++
MAINTAINERS | 1 +
4 files changed, 202 insertions(+)
create mode 100644 Documentation/virt/kvm/s390-pv-boot.rst
create mode 100644 Documentation/virt/kvm/s390-pv.rst
diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst
index 774deaebf7fa..dcc252634cf9 100644
--- a/Documentation/virt/kvm/index.rst
+++ b/Documentation/virt/kvm/index.rst
@@ -18,6 +18,8 @@ KVM
nested-vmx
ppc-pv
s390-diag
+ s390-pv
+ s390-pv-boot
timekeeping
vcpu-requests
diff --git a/Documentation/virt/kvm/s390-pv-boot.rst b/Documentation/virt/kvm/s390-pv-boot.rst
new file mode 100644
index 000000000000..b762df206ab7
--- /dev/null
+++ b/Documentation/virt/kvm/s390-pv-boot.rst
@@ -0,0 +1,83 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================
+s390 (IBM Z) Boot/IPL of Protected VMs
+======================================
+
+Summary
+-------
+The memory of Protected Virtual Machines (PVMs) is not accessible to
+I/O or the hypervisor. In those cases where the hypervisor needs to
+access the memory of a PVM, that memory must be made accessible.
+Memory made accessible to the hypervisor will be encrypted. See
+:doc:`s390-pv` for details."
+
+On IPL (boot) a small plaintext bootloader is started, which provides
+information about the encrypted components and necessary metadata to
+KVM to decrypt the protected virtual machine.
+
+Based on this data, KVM will make the protected virtual machine known
+to the Ultravisor(UV) and instruct it to secure the memory of the PVM,
+decrypt the components and verify the data and address list hashes, to
+ensure integrity. Afterwards KVM can run the PVM via the SIE
+instruction which the UV will intercept and execute on KVM's behalf.
+
+As the guest image is just like an opaque kernel image that does the
+switch into PV mode itself, the user can load encrypted guest
+executables and data via every available method (network, dasd, scsi,
+direct kernel, ...) without the need to change the boot process.
+
+
+Diag308
+-------
+This diagnose instruction is the basic mechanism to handle IPL and
+related operations for virtual machines. The VM can set and retrieve
+IPL information blocks, that specify the IPL method/devices and
+request VM memory and subsystem resets, as well as IPLs.
+
+For PVMs this concept has been extended with new subcodes:
+
+Subcode 8: Set an IPL Information Block of type 5 (information block
+for PVMs)
+Subcode 9: Store the saved block in guest memory
+Subcode 10: Move into Protected Virtualization mode
+
+The new PV load-device-specific-parameters field specifies all data
+that is necessary to move into PV mode.
+
+* PV Header origin
+* PV Header length
+* List of Components composed of
+ * AES-XTS Tweak prefix
+ * Origin
+ * Size
+
+The PV header contains the keys and hashes, which the UV will use to
+decrypt and verify the PV, as well as control flags and a start PSW.
+
+The components are for instance an encrypted kernel, kernel parameters
+and initrd. The components are decrypted by the UV.
+
+After the initial import of the encrypted data, all defined pages will
+contain the guest content. All non-specified pages will start out as
+zero pages on first access.
+
+
+When running in protected virtualization mode, some subcodes will result in
+exceptions or return error codes.
+
+Subcodes 4 and 7, which specify operations that do not clear the guest
+memory, will result in specification exceptions. This is because the
+UV will clear all memory when a secure VM is removed, and therefore
+non-clearing IPL subcodes are not allowed."
+
+Subcodes 8, 9, 10 will result in specification exceptions.
+Re-IPL into a protected mode is only possible via a detour into non
+protected mode.
+
+Keys
+----
+Every CEC will have a unique public key to enable tooling to build
+encrypted images.
+See `s390-tools <https://github.com/ibm-s390-tools/s390-tools/>`_
+for the tooling.
diff --git a/Documentation/virt/kvm/s390-pv.rst b/Documentation/virt/kvm/s390-pv.rst
new file mode 100644
index 000000000000..27fe03eaeaad
--- /dev/null
+++ b/Documentation/virt/kvm/s390-pv.rst
@@ -0,0 +1,116 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+s390 (IBM Z) Ultravisor and Protected VMs
+=========================================
+
+Summary
+-------
+Protected virtual machines (PVM) are KVM VMs that do not allow KVM to
+access VM state like guest memory or guest registers. Instead, the
+PVMs are mostly managed by a new entity called Ultravisor (UV). The UV
+provides an API that can be used by PVMs and KVM to request management
+actions.
+
+Each guest starts in the non-protected mode and then may make a
+request to transition into protected mode. On transition, KVM
+registers the guest and its VCPUs with the Ultravisor and prepares
+everything for running it.
+
+The Ultravisor will secure and decrypt the guest's boot memory
+(i.e. kernel/initrd). It will safeguard state changes like VCPU
+starts/stops and injected interrupts while the guest is running.
+
+As access to the guest's state, such as the SIE state description, is
+normally needed to be able to run a VM, some changes have been made in
+the behavior of the SIE instruction. A new format 4 state description
+has been introduced, where some fields have different meanings for a
+PVM. SIE exits are minimized as much as possible to improve speed and
+reduce exposed guest state.
+
+
+Interrupt injection
+-------------------
+Interrupt injection is safeguarded by the Ultravisor. As KVM doesn't
+have access to the VCPUs' lowcores, injection is handled via the
+format 4 state description.
+
+Machine check, external, IO and restart interruptions each can be
+injected on SIE entry via a bit in the interrupt injection control
+field (offset 0x54). If the guest cpu is not enabled for the interrupt
+at the time of injection, a validity interception is recognized. The
+format 4 state description contains fields in the interception data
+block where data associated with the interrupt can be transported.
+
+Program and Service Call exceptions have another layer of
+safeguarding; they can only be injected for instructions that have
+been intercepted into KVM. The exceptions need to be a valid outcome
+of an instruction emulation by KVM, e.g. we can never inject a
+addressing exception as they are reported by SIE since KVM has no
+access to the guest memory.
+
+
+Mask notification interceptions
+-------------------------------
+In order to be notified when a PVM enables a certain class of
+interrupt, KVM cannot intercept lctl(g) and lpsw(e) anymore. As a
+replacement, two new interception codes have been introduced: One
+indicating that the contents of CRs 0, 6, or 14 have been changed,
+indicating different interruption subclasses; and one indicating that
+PSW bit 13 has been changed, indicating that a machine check
+intervention was requested and those are now enabled.
+
+Instruction emulation
+---------------------
+With the format 4 state description for PVMs, the SIE instruction already
+interprets more instructions than it does with format 2. It is not able
+to interpret every instruction, but needs to hand some tasks to KVM;
+therefore, the SIE and the ultravisor safeguard emulation inputs and outputs.
+
+The control structures associated with SIE provide the Secure
+Instruction Data Area (SIDA), the Interception Parameters (IP) and the
+Secure Interception General Register Save Area. Guest GRs and most of
+the instruction data, such as I/O data structures, are filtered.
+Instruction data is copied to and from the Secure Instruction Data
+Area (SIDA) when needed. Guest GRs are put into / retrieved from the
+Secure Interception General Register Save Area.
+
+Only GR values needed to emulate an instruction will be copied into this
+save area and the real register numbers will be hidden.
+
+The Interception Parameters state description field still contains the
+the bytes of the instruction text, but with pre-set register values
+instead of the actual ones. I.e. each instruction always uses the same
+instruction text, in order not to leak guest instruction text.
+This also implies that the register content that a guest had in r<n>
+may be in r<m> from the hypervisor's point of view.
+
+The Secure Instruction Data Area contains instruction storage
+data. Instruction data, i.e. data being referenced by an instruction
+like the SCCB for sclp, is moved via the SIDA. When an instruction is
+intercepted, the SIE will only allow data and program interrupts for
+this instruction to be moved to the guest via the two data areas
+discussed before. Other data is either ignored or results in validity
+interceptions.
+
+
+Instruction emulation interceptions
+-----------------------------------
+There are two types of SIE secure instruction intercepts: the normal
+and the notification type. Normal secure instruction intercepts will
+make the guest pending for instruction completion of the intercepted
+instruction type, i.e. on SIE entry it is attempted to complete
+emulation of the instruction with the data provided by KVM. That might
+be a program exception or instruction completion.
+
+The notification type intercepts inform KVM about guest environment
+changes due to guest instruction interpretation. Such an interception
+is recognized, for example, for the store prefix instruction to provide
+the new lowcore location. On SIE reentry, any KVM data in the data areas
+is ignored and execution continues as if the guest instruction had
+completed. For that reason KVM is not allowed to inject a program
+interrupt.
+
+Links
+-----
+`KVM Forum 2019 presentation <https://static.sched.com/hosted_files/kvmforum2019/3b/ibm_protected_vms_s390x.pdf>`_
diff --git a/MAINTAINERS b/MAINTAINERS
index a0d86490c2c6..97a70647c93a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9209,6 +9209,7 @@ L: kvm@vger.kernel.org
W: http://www.ibm.com/developerworks/linux/linux390/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
S: Supported
+F: Documentation/virt/kvm/s390*
F: arch/s390/include/uapi/asm/kvm*
F: arch/s390/include/asm/gmap.h
F: arch/s390/include/asm/kvm*
--
2.25.0
^ permalink raw reply related
* [PATCH v3 00/37] KVM: s390: Add support for protected VMs
From: Christian Borntraeger @ 2020-02-20 10:39 UTC (permalink / raw)
To: Christian Borntraeger, Janosch Frank, Andrew Morton
Cc: KVM, Cornelia Huck, David Hildenbrand, Thomas Huth,
Ulrich Weigand, Claudio Imbrenda, linux-s390, Michael Mueller,
Vasily Gorbik, Andrea Arcangeli, linux-mm
mm people: This series contains a "pretty small" common code memory
management change that will allow paging, guest backing with files etc
almost just like normal VMs. It should be a no-op for all architectures
not opting in. And it should be usable for others that also try to get
notified on "the pages are in the process of being used for things like
I/O". This time I included error handling and an ACK from Will Deacon.
mm-related patches CCed on linux-mm, the complete list can be found on
the KVM and linux-s390 list.
Andrew, any chance to either take " mm:gup/writeback: add callbacks for
inaccessible pages" or ACK so that I can take it?
Overview
--------
Protected VMs (PVM) are KVM VMs, where KVM can't access the VM's state
like guest memory and guest registers anymore. Instead the PVMs are
mostly managed by a new entity called Ultravisor (UV), which provides
an API, so KVM and the PV can request management actions.
PVMs are encrypted at rest and protected from hypervisor access while
running. They switch from a normal operation into protected mode, so
we can still use the standard boot process to load a encrypted blob
and then move it into protected mode.
Rebooting is only possible by passing through the unprotected/normal
mode and switching to protected again.
All patches are in the protvirtv4 branch of the korg s390 kvm git
https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git/log/?h=protvirtv5
Claudio presented the technology at his presentation at KVM Forum
2019.
https://static.sched.com/hosted_files/kvmforum2019/3b/ibm_protected_vms_s390x.pdf
v2 -> v3
- rebase against v5.6-rc2
- move some checks into the callers
- typo fixes
- extend UV query size
- do a tlb flush when entering/exiting protected mode
- more comments
- change interface to PV_ENABLE/DISABLE instead of vcpu/vm
create/destroy
- lockdep checks for *is_protected calls
- locking improments
- move facility 161 to qemu
- checkpatch fixes
- merged error handling in mm patch
- removed vcpu pv commands
- use mp_state for setting the IPL PSW
v1 -> v2
- rebase on top of kvm/master
- pipe through rc and rrc. This might have created some churn here and
there
- turn off sclp masking when rebooting into "unsecure"
- memory management simplification
- prefix page handling now via intercept 112
- io interrupt intervention request fix (do not use GISA)
- api.txt conversion to rst
- sample patches on top of mm/gup/writeback
- tons of review feedback
- kvm_uv debug feature fixes and unifications
- ultravisor information for /sys/firmware
-
RFCv2 -> v1 (you can diff the protvirtv2 and the protvirtv3 branch)
- tons of review feedback integrated (see mail thread)
- memory management now complete and working
- Documentation patches merged
- interrupt patches merged
- CONFIG_KVM_S390_PROTECTED_VIRTUALIZATION_HOST removed
- SIDA interface integrated into memop
- for merged patches I removed reviews that were not in all patches
Christian Borntraeger (5):
KVM: s390/mm: Make pages accessible before destroying the guest
KVM: s390: protvirt: Add SCLP interrupt handling
KVM: s390: protvirt: do not inject interrupts after start
KVM: s390: rstify new ioctls in api.rst
KVM: s390: protvirt: introduce and enable KVM_CAP_S390_PROTECTED
Claudio Imbrenda (3):
mm:gup/writeback: add callbacks for inaccessible pages
s390/mm: provide memory management functions for protected KVM guests
KVM: s390/mm: handle guest unpin events
Janosch Frank (24):
KVM: s390: protvirt: Add UV debug trace
KVM: s390: add new variants of UV CALL
KVM: s390: protvirt: Add initial vm and cpu lifecycle handling
KVM: s390: protvirt: Add KVM api documentation
KVM: s390: protvirt: Secure memory is not mergeable
KVM: s390: protvirt: Handle SE notification interceptions
KVM: s390: protvirt: Instruction emulation
KVM: s390: protvirt: Handle spec exception loops
KVM: s390: protvirt: Add new gprs location handling
KVM: S390: protvirt: Introduce instruction data area bounce buffer
KVM: s390: protvirt: handle secure guest prefix pages
KVM: s390: protvirt: Write sthyi data to instruction data area
KVM: s390: protvirt: STSI handling
KVM: s390: protvirt: disallow one_reg
KVM: s390: protvirt: Do only reset registers that are accessible
KVM: s390: protvirt: Only sync fmt4 registers
KVM: s390: protvirt: Add program exception injection
KVM: s390: protvirt: UV calls in support of diag308 0, 1
KVM: s390: protvirt: Report CPU state to Ultravisor
KVM: s390: protvirt: Support cmd 5 operation state
KVM: s390: protvirt: Mask PSW interrupt bits for interception 104 and
112
KVM: s390: protvirt: Add UV cpu reset calls
DOCUMENTATION: Protected virtual machine introduction and IPL
s390: protvirt: Add sysfs firmware interface for Ultravisor
information
Michael Mueller (1):
KVM: s390: protvirt: Implement interrupt injection
Ulrich Weigand (1):
KVM: s390/interrupt: do not pin adapter interrupt pages
Vasily Gorbik (3):
s390/protvirt: introduce host side setup
s390/protvirt: add ultravisor initialization
s390/mm: add (non)secure page access exceptions handlers
.../admin-guide/kernel-parameters.txt | 5 +
Documentation/virt/kvm/api.rst | 91 +++-
Documentation/virt/kvm/devices/s390_flic.rst | 11 +-
Documentation/virt/kvm/index.rst | 2 +
Documentation/virt/kvm/s390-pv-boot.rst | 83 +++
Documentation/virt/kvm/s390-pv.rst | 116 ++++
MAINTAINERS | 1 +
arch/s390/boot/Makefile | 2 +-
arch/s390/boot/uv.c | 21 +-
arch/s390/include/asm/gmap.h | 6 +
arch/s390/include/asm/kvm_host.h | 113 +++-
arch/s390/include/asm/mmu.h | 2 +
arch/s390/include/asm/mmu_context.h | 1 +
arch/s390/include/asm/page.h | 5 +
arch/s390/include/asm/pgtable.h | 35 +-
arch/s390/include/asm/uv.h | 252 ++++++++-
arch/s390/kernel/Makefile | 1 +
arch/s390/kernel/pgm_check.S | 4 +-
arch/s390/kernel/setup.c | 9 +-
arch/s390/kernel/uv.c | 413 ++++++++++++++
arch/s390/kvm/Makefile | 2 +-
arch/s390/kvm/diag.c | 4 +
arch/s390/kvm/intercept.c | 115 +++-
arch/s390/kvm/interrupt.c | 399 ++++++++------
arch/s390/kvm/kvm-s390.c | 509 +++++++++++++++---
arch/s390/kvm/kvm-s390.h | 51 +-
arch/s390/kvm/priv.c | 11 +-
arch/s390/kvm/pv.c | 286 ++++++++++
arch/s390/mm/fault.c | 78 +++
arch/s390/mm/gmap.c | 65 ++-
include/linux/gfp.h | 6 +
include/uapi/linux/kvm.h | 43 +-
mm/gup.c | 15 +-
mm/page-writeback.c | 5 +
34 files changed, 2442 insertions(+), 320 deletions(-)
create mode 100644 Documentation/virt/kvm/s390-pv-boot.rst
create mode 100644 Documentation/virt/kvm/s390-pv.rst
create mode 100644 arch/s390/kernel/uv.c
create mode 100644 arch/s390/kvm/pv.c
--
2.25.0
^ permalink raw reply
* Re: [PATCH v3 05/17] s390x: protvirt: Support unpack facility
From: Cornelia Huck @ 2020-02-20 10:39 UTC (permalink / raw)
To: Janosch Frank; +Cc: qemu-s390x, mihajlov, qemu-devel, david
In-Reply-To: <20200214151636.8764-6-frankja@linux.ibm.com>
On Fri, 14 Feb 2020 10:16:24 -0500
Janosch Frank <frankja@linux.ibm.com> wrote:
> When a guest has saved a ipib of type 5 and call diagnose308 with
s/call/calls/
> subcode 10, we have to setup the protected processing environment via
> Ultravisor calls. The calls are done by KVM and are exposed via an API.
>
> The following steps are necessary:
> 1. Create a VM (register it with the Ultravisor)
> 2. Create secure CPUs for all of our current cpus
> 3. Forward the secure header to the Ultravisor (has all information on
> how to decrypt the image and VM information)
> 4. Protect image pages from the host and decrypt them
> 5. Verify the image integrity
>
> Only after step 5 a protected VM is allowed to run.
>
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> [Changes
> to machine]
> ---
> hw/s390x/Makefile.objs | 1 +
> hw/s390x/ipl.c | 32 ++++++
> hw/s390x/ipl.h | 2 +
> hw/s390x/pv.c | 154 ++++++++++++++++++++++++++++
> hw/s390x/pv.h | 38 +++++++
> hw/s390x/s390-virtio-ccw.c | 79 ++++++++++++++
> include/hw/s390x/s390-virtio-ccw.h | 1 +
> target/s390x/cpu.c | 4 +
> target/s390x/cpu.h | 1 +
> target/s390x/cpu_features_def.inc.h | 1 +
> 10 files changed, 313 insertions(+)
> create mode 100644 hw/s390x/pv.c
> create mode 100644 hw/s390x/pv.h
(...)
> diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
> new file mode 100644
> index 0000000000..5b6a26cba9
> --- /dev/null
> +++ b/hw/s390x/pv.c
> @@ -0,0 +1,154 @@
> +/*
> + * Secure execution functions
> + *
> + * Copyright IBM Corp. 2019
Update the year?
> + * Author(s):
> + * Janosch Frank <frankja@linux.ibm.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
> + * your option) any later version. See the COPYING file in the top-level
> + * directory.
> + */
(...)
> +void s390_pv_vm_destroy(void)
> +{
> + s390_pv_cmd_exit(KVM_PV_VM_DESTROY, NULL);
Why does this exit()? Should Never Happen?
> +}
> +
> +int s390_pv_vcpu_create(CPUState *cs)
> +{
> + int rc;
> +
> + rc = s390_pv_cmd_vcpu(cs, KVM_PV_VCPU_CREATE, NULL);
> + if (!rc) {
> + S390_CPU(cs)->env.pv = true;
> + }
> +
> + return rc;
> +}
> +
> +void s390_pv_vcpu_destroy(CPUState *cs)
> +{
> + s390_pv_cmd_vcpu_exit(cs, KVM_PV_VCPU_DESTROY, NULL);
dito
> + S390_CPU(cs)->env.pv = false;
> +}
(...)
> +void s390_pv_perf_clear_reset(void)
> +{
> + s390_pv_cmd_exit(KVM_PV_VM_PREP_RESET, NULL);
And here. Or is that because the machine should not be left around in
an undefined state?
> +}
> +
> +int s390_pv_verify(void)
> +{
> + return s390_pv_cmd(KVM_PV_VM_VERIFY, NULL);
> +}
> +
> +void s390_pv_unshare(void)
> +{
> + s390_pv_cmd_exit(KVM_PV_VM_UNSHARE_ALL, NULL);
> +}
> diff --git a/hw/s390x/pv.h b/hw/s390x/pv.h
> new file mode 100644
> index 0000000000..7d20bdd12e
> --- /dev/null
> +++ b/hw/s390x/pv.h
> @@ -0,0 +1,38 @@
> +/*
> + * Protected Virtualization header
> + *
> + * Copyright IBM Corp. 2019
Year++
> + * Author(s):
> + * Janosch Frank <frankja@linux.ibm.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
> + * your option) any later version. See the COPYING file in the top-level
> + * directory.
> + */
> +
> +#ifndef HW_S390_PV_H
> +#define HW_S390_PV_H
> +
> +#ifdef CONFIG_KVM
> +int s390_pv_vm_create(void);
> +void s390_pv_vm_destroy(void);
> +void s390_pv_vcpu_destroy(CPUState *cs);
> +int s390_pv_vcpu_create(CPUState *cs);
> +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
> +int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak);
> +void s390_pv_perf_clear_reset(void);
> +int s390_pv_verify(void);
> +void s390_pv_unshare(void);
> +#else
> +int s390_pv_vm_create(void) { return 0; }
I'm wondering why you return 0 here (and below). These function should
not be called for !KVM, but just to help catch logic error, use -EINVAL
or so?
> +void s390_pv_vm_destroy(void) {}
> +void s390_pv_vcpu_destroy(CPUState *cs) {}
> +int s390_pv_vcpu_create(CPUState *cs) { return 0; }
> +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; }
> +int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0: }
> +void s390_pv_perf_clear_reset(void) {}
> +int s390_pv_verify(void) { return 0; }
> +void s390_pv_unshare(void) {}
> +#endif
> +
> +#endif /* HW_S390_PV_H */
> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> index e759eb5f83..5fa4372083 100644
> --- a/hw/s390x/s390-virtio-ccw.c
> +++ b/hw/s390x/s390-virtio-ccw.c
> @@ -41,6 +41,7 @@
> #include "hw/qdev-properties.h"
> #include "hw/s390x/tod.h"
> #include "sysemu/sysemu.h"
> +#include "hw/s390x/pv.h"
>
> S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
> {
> @@ -240,9 +241,11 @@ static void s390_create_sclpconsole(const char *type, Chardev *chardev)
> static void ccw_init(MachineState *machine)
> {
> int ret;
> + S390CcwMachineState *ms = S390_CCW_MACHINE(machine);
> VirtualCssBus *css_bus;
> DeviceState *dev;
>
> + ms->pv = false;
I'm wondering why you need to init this to false - isn't it already
zeroed out?
> s390_sclp_init();
> /* init memory + setup max page size. Required for the CPU model */
> s390_memory_init(machine->ram_size);
> @@ -318,10 +321,58 @@ static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg)
> s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu);
> }
>
> +static int s390_machine_pv_secure(S390CcwMachineState *ms)
> +{
> + CPUState *t;
> + int rc;
> +
> + /* Create SE VM */
> + rc = s390_pv_vm_create();
> + if (rc) {
> + return rc;
> + }
> +
> + CPU_FOREACH(t) {
> + rc = s390_pv_vcpu_create(t);
> + if (rc) {
> + return rc;
No need to undo something on error?
> + }
> + }
> +
> + ms->pv = true;
> +
> + /* Set SE header and unpack */
> + rc = s390_ipl_prepare_pv_header();
> + if (rc) {
> + return rc;
Also here.
> + }
> +
> + /* Decrypt image */
> + rc = s390_ipl_pv_unpack();
> + if (rc) {
> + return rc;
And here.
> + }
> +
> + /* Verify integrity */
> + rc = s390_pv_verify();
> + return rc;
And here.
> +}
(...)
> diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
> index 8da1905485..1dbd84b9d7 100644
> --- a/target/s390x/cpu.c
> +++ b/target/s390x/cpu.c
> @@ -37,6 +37,8 @@
> #include "sysemu/hw_accel.h"
> #include "hw/qdev-properties.h"
> #ifndef CONFIG_USER_ONLY
> +#include "hw/s390x/s390-virtio-ccw.h"
> +#include "hw/s390x/pv.h"
> #include "hw/boards.h"
> #include "sysemu/arch_init.h"
> #include "sysemu/sysemu.h"
> @@ -191,6 +193,7 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp)
>
> #if !defined(CONFIG_USER_ONLY)
> MachineState *ms = MACHINE(qdev_get_machine());
> + S390CcwMachineState *ccw = S390_CCW_MACHINE(ms);
I find the variable name a bit confusing... maybe ccw_ms?
> unsigned int max_cpus = ms->smp.max_cpus;
> if (cpu->env.core_id >= max_cpus) {
> error_setg(&err, "Unable to add CPU with core-id: %" PRIu32
> @@ -205,6 +208,7 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp)
> goto out;
> }
>
> + cpu->env.pv = ccw->pv;
So, if you add a cpu, it will inherit the pv state of the machine...
doesn't it need any setup?
> /* sync cs->cpu_index and env->core_id. The latter is needed for TCG. */
> cs->cpu_index = cpu->env.core_id;
> #endif
(...)
^ permalink raw reply
* Re: [PATCH v2 RESEND] mm/sparsemem: pfn_to_page is not valid yet on SPARSEMEM
From: Michal Hocko @ 2020-02-20 10:41 UTC (permalink / raw)
To: Baoquan He
Cc: linux-kernel, linux-mm, akpm, richardw.yang, david, osalvador,
dan.j.williams
In-Reply-To: <20200219030454.4844-1-bhe@redhat.com>
On Wed 19-02-20 11:04:54, Baoquan He wrote:
> From: Wei Yang <richardw.yang@linux.intel.com>
>
> When we use SPARSEMEM instead of SPARSEMEM_VMEMMAP, pfn_to_page()
> doesn't work before sparse_init_one_section() is called. This leads to a
> crash when hotplug memory:
>
> [ 41.839170] BUG: unable to handle page fault for address: 0000000006400000
> [ 41.840663] #PF: supervisor write access in kernel mode
> [ 41.841822] #PF: error_code(0x0002) - not-present page
> [ 41.842970] PGD 0 P4D 0
> [ 41.843538] Oops: 0002 [#1] SMP PTI
> [ 41.844125] CPU: 3 PID: 221 Comm: kworker/u16:1 Tainted: G W 5.5.0-next-20200205+ #343
> [ 41.845659] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
> [ 41.846977] Workqueue: kacpi_hotplug acpi_hotplug_work_fn
> [ 41.847904] RIP: 0010:__memset+0x24/0x30
> [ 41.848660] Code: cc cc cc cc cc cc 0f 1f 44 00 00 49 89 f9 48 89 d1 83 e2 07 48 c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 <f3> 48 ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1 f3
> [ 41.851836] RSP: 0018:ffffb43ac0373c80 EFLAGS: 00010a87
> [ 41.852686] RAX: ffffffffffffffff RBX: ffff8a1518800000 RCX: 0000000000050000
> [ 41.853824] RDX: 0000000000000000 RSI: 00000000000000ff RDI: 0000000006400000
> [ 41.854967] RBP: 0000000000140000 R08: 0000000000100000 R09: 0000000006400000
> [ 41.856107] R10: 0000000000000000 R11: 0000000000000002 R12: 0000000000000000
> [ 41.857255] R13: 0000000000000028 R14: 0000000000000000 R15: ffff8a153ffd9280
> [ 41.858414] FS: 0000000000000000(0000) GS:ffff8a153ab00000(0000) knlGS:0000000000000000
> [ 41.859703] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 41.860627] CR2: 0000000006400000 CR3: 0000000136fca000 CR4: 00000000000006e0
> [ 41.861716] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 41.862680] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 41.863628] Call Trace:
> [ 41.863983] sparse_add_section+0x1c9/0x26a
> [ 41.864570] __add_pages+0xbf/0x150
> [ 41.865057] add_pages+0x12/0x60
> [ 41.865489] add_memory_resource+0xc8/0x210
> [ 41.866017] ? wake_up_q+0xa0/0xa0
> [ 41.866416] __add_memory+0x62/0xb0
> [ 41.866825] acpi_memory_device_add+0x13f/0x300
> [ 41.867410] acpi_bus_attach+0xf6/0x200
> [ 41.867890] acpi_bus_scan+0x43/0x90
> [ 41.868448] acpi_device_hotplug+0x275/0x3d0
> [ 41.868972] acpi_hotplug_work_fn+0x1a/0x30
> [ 41.869473] process_one_work+0x1a7/0x370
> [ 41.869953] worker_thread+0x30/0x380
> [ 41.870396] ? flush_rcu_work+0x30/0x30
> [ 41.870846] kthread+0x112/0x130
> [ 41.871236] ? kthread_create_on_node+0x60/0x60
> [ 41.871770] ret_from_fork+0x35/0x40
>
> We should use memmap as it did.
>
> Fixes: ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug")
> Signed-off-by: Wei Yang <richardw.yang@linux.intel.com>
> Acked-by: David Hildenbrand <david@redhat.com>
> Reviewed-by: Baoquan He <bhe@redhat.com>
> CC: Dan Williams <dan.j.williams@intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
> ---
> v1->v2:
> Add trace printed by triggered bug to log.
>
> v1 can be found here:
> http://lkml.kernel.org/r/20200206125343.9070-1-richardw.yang@linux.intel.com
>
> mm/sparse.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/mm/sparse.c b/mm/sparse.c
> index c184b69460b7..596b2a45b100 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -876,7 +876,7 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
> * Poison uninitialized struct pages in order to catch invalid flags
> * combinations.
> */
> - page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
> + page_init_poison(memmap, sizeof(struct page) * nr_pages);
>
> ms = __nr_to_section(section_nr);
> set_section_nid(section_nr, nid);
> --
> 2.17.2
>
--
Michal Hocko
SUSE Labs
^ permalink raw reply
* Re: [dpdk-dev] [PATCH] examples/ipsec-secgw: extend inline session to non AES-GCM
From: Ananyev, Konstantin @ 2020-02-20 10:41 UTC (permalink / raw)
To: Akhil Goyal, Iremonger, Bernard
Cc: dev@dpdk.org, stable@dpdk.org, Ankur Dwivedi,
Narayana Prasad Raju Athreya, Anoob Joseph
In-Reply-To: <VE1PR04MB66398C838477113CC1FCD453E6130@VE1PR04MB6639.eurprd04.prod.outlook.com>
Hi Akhil,
> > > This patch extends creation of inline session to all the algorithms.
> > > Previously the inline session was enabled only for AES-GCM cipher.
> > >
> > > Fixes: 3a690d5a65e2 ("examples/ipsec-secgw: fix first packet with inline
> > > crypto")
> > >
> > > Signed-off-by: Ankur Dwivedi <adwivedi@marvell.com>
> > > ---
> > > examples/ipsec-secgw/sa.c | 26 ++++++++++++--------------
> > > 1 file changed, 12 insertions(+), 14 deletions(-)
> >
> > Acked-by: Anoob Joseph <anoobj@marvell.com>
> Acked-by: Akhil Goyal <akhil.goyal@nxp.com>
>
> Hi Konstantin,
>
> I am about to merge this patch. Do you have any issues on this.
No objections, LGTM.
Konstantin
^ permalink raw reply
* Re: [PATCH v2] util/async: make bh_aio_poll() O(1)
From: Paolo Bonzini @ 2020-02-20 10:40 UTC (permalink / raw)
To: Stefan Hajnoczi, qemu-devel; +Cc: Kevin Wolf, Fam Zheng, qemu-block, Max Reitz
In-Reply-To: <20200219175348.1161536-1-stefanha@redhat.com>
On 19/02/20 18:53, Stefan Hajnoczi wrote:
> The ctx->first_bh list contains all created BHs, including those that
> are not scheduled. The list is iterated by the event loop and therefore
> has O(n) time complexity with respected to the number of created BHs.
>
> Rewrite BHs so that only scheduled or deleted BHs are enqueued.
> Only BHs that actually require action will be iterated.
>
> One semantic change is required: qemu_bh_delete() enqueues the BH and
> therefore invokes aio_notify(). The
> tests/test-aio.c:test_source_bh_delete_from_cb() test case assumed that
> g_main_context_iteration(NULL, false) returns false after
> qemu_bh_delete() but it now returns true for one iteration. Fix up the
> test case.
>
> This patch makes aio_compute_timeout() and aio_bh_poll() drop from a CPU
> profile reported by perf-top(1). Previously they combined to 9% CPU
> utilization when AioContext polling is commented out and the guest has 2
> virtio-blk,num-queues=1 and 99 virtio-blk,num-queues=32 devices.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> v2:
> * Use QSLIST for BHs and QSIMPLEQ for BHListSlices [Paolo]
> (Note that I replaced bh = atomic_rcu_read(&first_bh) with
> QSLIST_FOREACH(&bh_list) so there is no memory ordering but I think
> this is safe.)
I have sent an implementation of QSLIST_FOREACH_RCU. I doubt you'd see
any problem in practice, since RCU accesses only need a compiler
barrier, but it's cleaner.
With that changed,
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Any further optimization of aio_compute_timeout() and aio_ctx_check()
can be done on top.
Paolo
> ---
> include/block/aio.h | 20 +++-
> tests/test-aio.c | 3 +-
> util/async.c | 236 +++++++++++++++++++++++++-------------------
> 3 files changed, 157 insertions(+), 102 deletions(-)
>
> diff --git a/include/block/aio.h b/include/block/aio.h
> index 7ba9bd7874..1a2ce9ca26 100644
> --- a/include/block/aio.h
> +++ b/include/block/aio.h
> @@ -51,6 +51,19 @@ struct ThreadPool;
> struct LinuxAioState;
> struct LuringState;
>
> +/*
> + * Each aio_bh_poll() call carves off a slice of the BH list, so that newly
> + * scheduled BHs are not processed until the next aio_bh_poll() call. All
> + * active aio_bh_poll() calls chain their slices together in a list, so that
> + * nested aio_bh_poll() calls process all scheduled bottom halves.
> + */
> +typedef QSLIST_HEAD(, QEMUBH) BHList;
> +typedef struct BHListSlice BHListSlice;
> +struct BHListSlice {
> + BHList bh_list;
> + QSIMPLEQ_ENTRY(BHListSlice) next;
> +};
> +
> struct AioContext {
> GSource source;
>
> @@ -91,8 +104,11 @@ struct AioContext {
> */
> QemuLockCnt list_lock;
>
> - /* Anchor of the list of Bottom Halves belonging to the context */
> - struct QEMUBH *first_bh;
> + /* Bottom Halves pending aio_bh_poll() processing */
> + BHList bh_list;
> +
> + /* Chained BH list slices for each nested aio_bh_poll() call */
> + QSIMPLEQ_HEAD(, BHListSlice) bh_slice_list;
>
> /* Used by aio_notify.
> *
> diff --git a/tests/test-aio.c b/tests/test-aio.c
> index 86fb73b3d5..8a46078463 100644
> --- a/tests/test-aio.c
> +++ b/tests/test-aio.c
> @@ -615,7 +615,8 @@ static void test_source_bh_delete_from_cb(void)
> g_assert_cmpint(data1.n, ==, data1.max);
> g_assert(data1.bh == NULL);
>
> - g_assert(!g_main_context_iteration(NULL, false));
> + assert(g_main_context_iteration(NULL, false));
> + assert(!g_main_context_iteration(NULL, false));
> }
>
> static void test_source_bh_delete_from_cb_many(void)
> diff --git a/util/async.c b/util/async.c
> index c192a24a61..2c8499cc29 100644
> --- a/util/async.c
> +++ b/util/async.c
> @@ -36,16 +36,76 @@
> /***********************************************************/
> /* bottom halves (can be seen as timers which expire ASAP) */
>
> +/* QEMUBH::flags values */
> +enum {
> + /* Already enqueued and waiting for aio_bh_poll() */
> + BH_PENDING = (1 << 0),
> +
> + /* Invoke the callback */
> + BH_SCHEDULED = (1 << 1),
> +
> + /* Delete without invoking callback */
> + BH_DELETED = (1 << 2),
> +
> + /* Delete after invoking callback */
> + BH_ONESHOT = (1 << 3),
> +
> + /* Schedule periodically when the event loop is idle */
> + BH_IDLE = (1 << 4),
> +};
> +
> struct QEMUBH {
> AioContext *ctx;
> QEMUBHFunc *cb;
> void *opaque;
> - QEMUBH *next;
> - bool scheduled;
> - bool idle;
> - bool deleted;
> + QSLIST_ENTRY(QEMUBH) next;
> + unsigned flags;
> };
>
> +/* Called concurrently from any thread */
> +static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
> +{
> + AioContext *ctx = bh->ctx;
> + unsigned old_flags;
> +
> + /*
> + * The memory barrier implicit in atomic_fetch_or makes sure that:
> + * 1. idle & any writes needed by the callback are done before the
> + * locations are read in the aio_bh_poll.
> + * 2. ctx is loaded before the callback has a chance to execute and bh
> + * could be freed.
> + */
> + old_flags = atomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
> + if (!(old_flags & BH_PENDING)) {
> + QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
> + }
> +
> + aio_notify(ctx);
> +}
> +
> +/* Only called from aio_bh_poll() and aio_ctx_finalize() */
> +static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
> +{
> + QEMUBH *bh = QSLIST_FIRST(head);
> +
> + if (!bh) {
> + return NULL;
> + }
> +
> + QSLIST_REMOVE_HEAD(head, next);
> +
> + /*
> + * The atomic_and is paired with aio_bh_enqueue(). The implicit memory
> + * barrier ensures that the callback sees all writes done by the scheduling
> + * thread. It also ensures that the scheduling thread sees the cleared
> + * flag before bh->cb has run, and thus will call aio_notify again if
> + * necessary.
> + */
> + *flags = atomic_fetch_and(&bh->flags,
> + ~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
> + return bh;
> +}
> +
> void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
> {
> QEMUBH *bh;
> @@ -55,15 +115,7 @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
> .cb = cb,
> .opaque = opaque,
> };
> - qemu_lockcnt_lock(&ctx->list_lock);
> - bh->next = ctx->first_bh;
> - bh->scheduled = 1;
> - bh->deleted = 1;
> - /* Make sure that the members are ready before putting bh into list */
> - smp_wmb();
> - ctx->first_bh = bh;
> - qemu_lockcnt_unlock(&ctx->list_lock);
> - aio_notify(ctx);
> + aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT);
> }
>
> QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
> @@ -75,12 +127,6 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
> .cb = cb,
> .opaque = opaque,
> };
> - qemu_lockcnt_lock(&ctx->list_lock);
> - bh->next = ctx->first_bh;
> - /* Make sure that the members are ready before putting bh into list */
> - smp_wmb();
> - ctx->first_bh = bh;
> - qemu_lockcnt_unlock(&ctx->list_lock);
> return bh;
> }
>
> @@ -89,91 +135,56 @@ void aio_bh_call(QEMUBH *bh)
> bh->cb(bh->opaque);
> }
>
> -/* Multiple occurrences of aio_bh_poll cannot be called concurrently.
> - * The count in ctx->list_lock is incremented before the call, and is
> - * not affected by the call.
> - */
> +/* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
> int aio_bh_poll(AioContext *ctx)
> {
> - QEMUBH *bh, **bhp, *next;
> - int ret;
> - bool deleted = false;
> -
> - ret = 0;
> - for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) {
> - next = atomic_rcu_read(&bh->next);
> - /* The atomic_xchg is paired with the one in qemu_bh_schedule. The
> - * implicit memory barrier ensures that the callback sees all writes
> - * done by the scheduling thread. It also ensures that the scheduling
> - * thread sees the zero before bh->cb has run, and thus will call
> - * aio_notify again if necessary.
> - */
> - if (atomic_xchg(&bh->scheduled, 0)) {
> + BHListSlice slice;
> + BHListSlice *s;
> + int ret = 0;
> +
> + QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
> + QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
> +
> + while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) {
> + QEMUBH *bh;
> + unsigned flags;
> +
> + bh = aio_bh_dequeue(&s->bh_list, &flags);
> + if (!bh) {
> + QSIMPLEQ_REMOVE_HEAD(&ctx->bh_slice_list, next);
> + continue;
> + }
> +
> + if ((flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
> /* Idle BHs don't count as progress */
> - if (!bh->idle) {
> + if (!(flags & BH_IDLE)) {
> ret = 1;
> }
> - bh->idle = 0;
> aio_bh_call(bh);
> }
> - if (bh->deleted) {
> - deleted = true;
> + if (flags & (BH_DELETED | BH_ONESHOT)) {
> + g_free(bh);
> }
> }
>
> - /* remove deleted bhs */
> - if (!deleted) {
> - return ret;
> - }
> -
> - if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
> - bhp = &ctx->first_bh;
> - while (*bhp) {
> - bh = *bhp;
> - if (bh->deleted && !bh->scheduled) {
> - *bhp = bh->next;
> - g_free(bh);
> - } else {
> - bhp = &bh->next;
> - }
> - }
> - qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
> - }
> return ret;
> }
>
> void qemu_bh_schedule_idle(QEMUBH *bh)
> {
> - bh->idle = 1;
> - /* Make sure that idle & any writes needed by the callback are done
> - * before the locations are read in the aio_bh_poll.
> - */
> - atomic_mb_set(&bh->scheduled, 1);
> + aio_bh_enqueue(bh, BH_SCHEDULED | BH_IDLE);
> }
>
> void qemu_bh_schedule(QEMUBH *bh)
> {
> - AioContext *ctx;
> -
> - ctx = bh->ctx;
> - bh->idle = 0;
> - /* The memory barrier implicit in atomic_xchg makes sure that:
> - * 1. idle & any writes needed by the callback are done before the
> - * locations are read in the aio_bh_poll.
> - * 2. ctx is loaded before scheduled is set and the callback has a chance
> - * to execute.
> - */
> - if (atomic_xchg(&bh->scheduled, 1) == 0) {
> - aio_notify(ctx);
> - }
> + aio_bh_enqueue(bh, BH_SCHEDULED);
> }
>
> -
> /* This func is async.
> */
> void qemu_bh_cancel(QEMUBH *bh)
> {
> - atomic_mb_set(&bh->scheduled, 0);
> + atomic_and(&bh->flags, ~BH_SCHEDULED);
> }
>
> /* This func is async.The bottom half will do the delete action at the finial
> @@ -181,21 +192,16 @@ void qemu_bh_cancel(QEMUBH *bh)
> */
> void qemu_bh_delete(QEMUBH *bh)
> {
> - bh->scheduled = 0;
> - bh->deleted = 1;
> + aio_bh_enqueue(bh, BH_DELETED);
> }
>
> -int64_t
> -aio_compute_timeout(AioContext *ctx)
> +static int64_t aio_compute_bh_timeout(BHList *head, int timeout)
> {
> - int64_t deadline;
> - int timeout = -1;
> QEMUBH *bh;
>
> - for (bh = atomic_rcu_read(&ctx->first_bh); bh;
> - bh = atomic_rcu_read(&bh->next)) {
> - if (bh->scheduled) {
> - if (bh->idle) {
> + QSLIST_FOREACH(bh, head, next) {
> + if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
> + if (bh->flags & BH_IDLE) {
> /* idle bottom halves will be polled at least
> * every 10ms */
> timeout = 10000000;
> @@ -207,6 +213,28 @@ aio_compute_timeout(AioContext *ctx)
> }
> }
>
> + return timeout;
> +}
> +
> +int64_t
> +aio_compute_timeout(AioContext *ctx)
> +{
> + BHListSlice *s;
> + int64_t deadline;
> + int timeout = -1;
> +
> + timeout = aio_compute_bh_timeout(&ctx->bh_list, timeout);
> + if (timeout == 0) {
> + return 0;
> + }
> +
> + QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
> + timeout = aio_compute_bh_timeout(&s->bh_list, timeout);
> + if (timeout == 0) {
> + return 0;
> + }
> + }
> +
> deadline = timerlistgroup_deadline_ns(&ctx->tlg);
> if (deadline == 0) {
> return 0;
> @@ -237,15 +265,24 @@ aio_ctx_check(GSource *source)
> {
> AioContext *ctx = (AioContext *) source;
> QEMUBH *bh;
> + BHListSlice *s;
>
> atomic_and(&ctx->notify_me, ~1);
> aio_notify_accept(ctx);
>
> - for (bh = ctx->first_bh; bh; bh = bh->next) {
> - if (bh->scheduled) {
> + QSLIST_FOREACH(bh, &ctx->bh_list, next) {
> + if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
> return true;
> }
> }
> +
> + QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
> + QSLIST_FOREACH(bh, &s->bh_list, next) {
> + if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
> + return true;
> + }
> + }
> + }
> return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
> }
>
> @@ -265,6 +302,8 @@ static void
> aio_ctx_finalize(GSource *source)
> {
> AioContext *ctx = (AioContext *) source;
> + QEMUBH *bh;
> + unsigned flags;
>
> thread_pool_free(ctx->thread_pool);
>
> @@ -287,18 +326,15 @@ aio_ctx_finalize(GSource *source)
> assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
> qemu_bh_delete(ctx->co_schedule_bh);
>
> - qemu_lockcnt_lock(&ctx->list_lock);
> - assert(!qemu_lockcnt_count(&ctx->list_lock));
> - while (ctx->first_bh) {
> - QEMUBH *next = ctx->first_bh->next;
> + /* There must be no aio_bh_poll() calls going on */
> + assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list));
>
> + while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) {
> /* qemu_bh_delete() must have been called on BHs in this AioContext */
> - assert(ctx->first_bh->deleted);
> + assert(flags & BH_DELETED);
>
> - g_free(ctx->first_bh);
> - ctx->first_bh = next;
> + g_free(bh);
> }
> - qemu_lockcnt_unlock(&ctx->list_lock);
>
> aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL);
> event_notifier_cleanup(&ctx->notifier);
> @@ -445,6 +481,8 @@ AioContext *aio_context_new(Error **errp)
> AioContext *ctx;
>
> ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
> + QSLIST_INIT(&ctx->bh_list);
> + QSIMPLEQ_INIT(&ctx->bh_slice_list);
> aio_context_setup(ctx);
>
> ret = event_notifier_init(&ctx->notifier, false);
>
^ permalink raw reply
* Re: [PATCH v4 5/6] driver core: Rename deferred_probe_timeout and make it global
From: Rafael J. Wysocki @ 2020-02-20 10:42 UTC (permalink / raw)
To: John Stultz
Cc: lkml, Rob Herring, Rafael J. Wysocki, Kevin Hilman, Ulf Hansson,
Pavel Machek, Len Brown, Todd Kjos, Bjorn Andersson,
Liam Girdwood, Mark Brown, Thierry Reding, Linus Walleij,
Greg Kroah-Hartman, Linux PM
In-Reply-To: <20200220050440.45878-6-john.stultz@linaro.org>
On Thu, Feb 20, 2020 at 6:05 AM John Stultz <john.stultz@linaro.org> wrote:
>
> Since other subsystems (like regulator) have similar arbitrary
> timeouts for how long they try to resolve driver dependencies,
> rename deferred_probe_timeout to driver_deferred_probe_timeout
> and set it as global, so it can be shared.
Fair enough.
> Cc: Rob Herring <robh@kernel.org>
> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
> Cc: Kevin Hilman <khilman@kernel.org>
> Cc: Ulf Hansson <ulf.hansson@linaro.org>
> Cc: Pavel Machek <pavel@ucw.cz>
> Cc: Len Brown <len.brown@intel.com>
> Cc: Todd Kjos <tkjos@google.com>
> Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
> Cc: Liam Girdwood <lgirdwood@gmail.com>
> Cc: Mark Brown <broonie@kernel.org>
> Cc: Thierry Reding <treding@nvidia.com>
> Cc: Linus Walleij <linus.walleij@linaro.org>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: linux-pm@vger.kernel.org
> Signed-off-by: John Stultz <john.stultz@linaro.org>
> Change-Id: I92ee3b392004ecc9217c5337b54eda48c2d7f3ee
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
> v4:
> * Split out into its own patch as suggested by Mark
> * Renamed deferred_probe_timeout as suggested by Greg
> ---
> drivers/base/dd.c | 18 ++++++++++--------
> include/linux/device/driver.h | 1 +
> 2 files changed, 11 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index 408e4da081da..39f1ce6d4f1c 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -229,17 +229,19 @@ DEFINE_SHOW_ATTRIBUTE(deferred_devs);
> * In the case of modules, set the default probe timeout to
> * 30 seconds to give userland some time to load needed modules
> */
> -static int deferred_probe_timeout = 30;
> +int driver_deferred_probe_timeout = 30;
> #else
> /* In the case of !modules, no probe timeout needed */
> -static int deferred_probe_timeout = -1;
> +int driver_deferred_probe_timeout = -1;
> #endif
> +EXPORT_SYMBOL_GPL(driver_deferred_probe_timeout);
> +
> static int __init deferred_probe_timeout_setup(char *str)
> {
> int timeout;
>
> if (!kstrtoint(str, 10, &timeout))
> - deferred_probe_timeout = timeout;
> + driver_deferred_probe_timeout = timeout;
> return 1;
> }
> __setup("deferred_probe_timeout=", deferred_probe_timeout_setup);
> @@ -259,10 +261,10 @@ __setup("deferred_probe_timeout=", deferred_probe_timeout_setup);
> */
> int driver_deferred_probe_check_state(struct device *dev)
> {
> - if (!initcalls_done || deferred_probe_timeout > 0)
> + if (!initcalls_done || driver_deferred_probe_timeout > 0)
> return -EPROBE_DEFER;
>
> - if (!deferred_probe_timeout) {
> + if (!driver_deferred_probe_timeout) {
> dev_WARN(dev, "deferred probe timeout, ignoring dependency");
> return -ETIMEDOUT;
> }
> @@ -276,7 +278,7 @@ static void deferred_probe_timeout_work_func(struct work_struct *work)
> {
> struct device_private *private, *p;
>
> - deferred_probe_timeout = 0;
> + driver_deferred_probe_timeout = 0;
> driver_deferred_probe_trigger();
> flush_work(&deferred_probe_work);
>
> @@ -310,9 +312,9 @@ static int deferred_probe_initcall(void)
> driver_deferred_probe_trigger();
> flush_work(&deferred_probe_work);
>
> - if (deferred_probe_timeout > 0) {
> + if (driver_deferred_probe_timeout > 0) {
> schedule_delayed_work(&deferred_probe_timeout_work,
> - deferred_probe_timeout * HZ);
> + driver_deferred_probe_timeout * HZ);
> }
> return 0;
> }
> diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
> index 5242afabfaba..ee7ba5b5417e 100644
> --- a/include/linux/device/driver.h
> +++ b/include/linux/device/driver.h
> @@ -236,6 +236,7 @@ driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev)
> }
> #endif
>
> +extern int driver_deferred_probe_timeout;
> void driver_deferred_probe_add(struct device *dev);
> int driver_deferred_probe_check_state(struct device *dev);
> void driver_init(void);
> --
> 2.17.1
>
^ permalink raw reply
* Re: [dpdk-dev] [PATCH] examples/ipsec-secgw: extend inline session to non AES-GCM
From: Akhil Goyal @ 2020-02-20 10:42 UTC (permalink / raw)
To: Ananyev, Konstantin, Iremonger, Bernard
Cc: dev@dpdk.org, stable@dpdk.org, Ankur Dwivedi,
Narayana Prasad Raju Athreya, Anoob Joseph
In-Reply-To: <SN6PR11MB2558380B223583D9E473111B9A130@SN6PR11MB2558.namprd11.prod.outlook.com>
>
> Hi Akhil,
>
> > > > This patch extends creation of inline session to all the algorithms.
> > > > Previously the inline session was enabled only for AES-GCM cipher.
> > > >
> > > > Fixes: 3a690d5a65e2 ("examples/ipsec-secgw: fix first packet with inline
> > > > crypto")
> > > >
> > > > Signed-off-by: Ankur Dwivedi <adwivedi@marvell.com>
> > > > ---
> > > > examples/ipsec-secgw/sa.c | 26 ++++++++++++--------------
> > > > 1 file changed, 12 insertions(+), 14 deletions(-)
> > >
> > > Acked-by: Anoob Joseph <anoobj@marvell.com>
> > Acked-by: Akhil Goyal <akhil.goyal@nxp.com>
> >
> > Hi Konstantin,
> >
> > I am about to merge this patch. Do you have any issues on this.
>
> No objections, LGTM.
> Konstantin
>
Applied to dpdk-next-crypto
Thanks.
^ permalink raw reply
* Re: [dpdk-dev] [PATCH 4/4] ci: reorganise Travis jobs
From: Thomas Monjalon @ 2020-02-20 10:42 UTC (permalink / raw)
To: David Marchand, Aaron Conole; +Cc: dev, Michael Santana, Bruce Richardson
In-Reply-To: <f7tpneauuhz.fsf@dhcp-25.97.bos.redhat.com>
19/02/2020 22:39, Aaron Conole:
> David Marchand <david.marchand@redhat.com> writes:
>
> > Let's prune the jobs list to limit the amount of time spent by the robot
> > in Travis.
> >
> > Since meson enables automatically the relevant components, there is not
> > much gain in testing with extra_packages vs required_packages only.
> >
> > For a given arch/compiler/env combination, compilation is first tested
> > in all jobs that run tests or build the docs or run the ABI checks.
> > In the same context, for jobs that accumulates running tests, building
> > the docs etc..., those steps are independent and can be split to save
> > some cpu on Travis.
> >
> > With this, we go down from 21 to 15 jobs.
> >
> > Note: this patch requires a flush of the existing caches in Travis.
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > ---
>
> In general, I think the idea with required vs. extra was to have a build
> that did the minimum required, and one that did all the packages (to
> allow a minimum vs. full DPDK).
>
> At least, that's from
> http://mails.dpdk.org/archives/dev/2019-January/124007.html
I think the benefit of a minimum build is to have a quick report,
and easy to setup.
> Not sure if that's still something anyone cares about.
Given that Travis knows how to satisfy the dependencies,
and that we must wait for all jobs to finish,
I don't see any benefit of a minimal setup.
^ permalink raw reply
* Re: [PATCH] thinkpad_acpi: Add sysfs entry for lcdshadow feature
From: Andy Shevchenko @ 2020-02-20 10:42 UTC (permalink / raw)
To: Nitin Joshi, Mat King, Jani Nikula, Daniel Thompson, Jingoo Han,
Rajat Jain
Cc: Henrique de Moraes Holschuh, Darren Hart, Andy Shevchenko,
Thinkpad-acpi devel ML, Platform Driver, mpearson, Nitin Joshi,
Benjamin Berg, Linux Kernel Mailing List, dri-devel,
Greg Kroah-Hartman
In-Reply-To: <20200220074637.7578-1-njoshi1@lenovo.com>
On Thu, Feb 20, 2020 at 9:48 AM Nitin Joshi <nitjoshi@gmail.com> wrote:
>
> This feature is supported on some Thinkpad products like T490s, Thinkpad
> X1 yoga 4th Gen etc . The lcdshadow feature can be enabled and disabled
> when user press "Fn" + "D" key. Currently, no user feedback is given for
> this action. Adding as sysfs entry allows userspace to show an On Screen
> Display whenever the setting changes.
>
> Summary of changes is mentioned below :
>
> - Added TP_HKEY_EV_LCDSHADOW_CHANGED for consistency inside the driver
> - Added unmapped LCDSHADOW to keymap
> - Added lcdshadow_get function to read value using ACPI
> - Added lcdshadow_refresh function to re-read value and send notification
> - Added sysfs group creation to tpaci_lcdshadow_init
> - Added lcdshadow_exit to remove sysfs group again
> - Implemented lcdshadow_enable_show/lcdshadow_enable_store
> - Added handler to tpacpi_driver_event to update refresh lcdshadow
> - Explicitly call tpacpi_driver_event for extended keyset
Adding custom PrivacyGuard support to this driver was my mistake,
There is a discussion [1] how to do this in generic way to cover other
possible users.
I Cc this to people from that discussion.
[1]: https://lore.kernel.org/dri-devel/CAL_quvRknSSVvXN3q_Se0hrziw2oTNS3ENNoeHYhvciCRq9Yww@mail.gmail.com/
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply
* Re: [PATCH] rcu_queue: add QSLIST functions
From: no-reply @ 2020-02-20 10:43 UTC (permalink / raw)
To: pbonzini; +Cc: qemu-devel, stefanha
In-Reply-To: <20200220103828.24525-1-pbonzini@redhat.com>
Patchew URL: https://patchew.org/QEMU/20200220103828.24525-1-pbonzini@redhat.com/
Hi,
This series seems to have some coding style problems. See output below for
more information:
Subject: [PATCH] rcu_queue: add QSLIST functions
Message-id: 20200220103828.24525-1-pbonzini@redhat.com
Type: series
=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===
Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
- [tag update] patchew/20200220041118.23264-1-alxndr@bu.edu -> patchew/20200220041118.23264-1-alxndr@bu.edu
* [new tag] patchew/20200220103828.24525-1-pbonzini@redhat.com -> patchew/20200220103828.24525-1-pbonzini@redhat.com
Switched to a new branch 'test'
909114b rcu_queue: add QSLIST functions
=== OUTPUT BEGIN ===
WARNING: Block comments use a leading /* on a separate line
#26: FILE: include/qemu/queue.h:217:
+} while (/*CONSTCOND*/0)
WARNING: Block comments use a leading /* on a separate line
#61: FILE: include/qemu/rcu_queue.h:278:
+} while (/*CONSTCOND*/0)
WARNING: Block comments use a leading /* on a separate line
#66: FILE: include/qemu/rcu_queue.h:283:
+} while (/*CONSTCOND*/0)
WARNING: Block comments use a leading /* on a separate line
#70: FILE: include/qemu/rcu_queue.h:287:
+} while (/*CONSTCOND*/0)
WARNING: Block comments use a leading /* on a separate line
#83: FILE: include/qemu/rcu_queue.h:300:
+} while (/*CONSTCOND*/0)
ERROR: code indent should never use tabs
#141: FILE: tests/test-rcu-list.c:155:
+^I QSLIST_REMOVE_RCU(&Q_list_head, el, list_element, f)$
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#153:
new file mode 100644
total: 1 errors, 6 warnings, 119 lines checked
Commit 909114b5ff89 (rcu_queue: add QSLIST functions) has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===
Test command exited with code: 1
The full log is available at
http://patchew.org/logs/20200220103828.24525-1-pbonzini@redhat.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-devel@redhat.com
^ permalink raw reply
* Re: [dpdk-dev] [PATCH 1/4] ci: remove unnecessary dependency on Linux headers
From: Thomas Monjalon @ 2020-02-20 10:44 UTC (permalink / raw)
To: David Marchand; +Cc: aconole, dev, Michael Santana
In-Reply-To: <20200219194131.29417-2-david.marchand@redhat.com>
19/02/2020 20:41, David Marchand:
> Following removal of kmod compilation, we don't need to install
> linux-headers anymore.
>
> Fixes: ea860973592b ("ci: remove redundant configs disabling kmods")
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Thomas Monjalon <thomas@monjalon.net>
^ permalink raw reply
* Re: [cip-dev] [PATCH 4.19.y-cip 05/23] usb: typec: Find the ports by also matching against the device node
From: Marian-Cristian Rotariu @ 2020-02-20 10:44 UTC (permalink / raw)
To: Pavel Machek; +Cc: cip-dev@lists.cip-project.org
In-Reply-To: <20200219075630.GC31996@amd>
> -----Original Message-----
> From: Pavel Machek <pavel@denx.de>
> Sent: 19 February 2020 07:57
> To: Marian-Cristian Rotariu <marian-cristian.rotariu.rb@bp.renesas.com>
> Cc: cip-dev@lists.cip-project.org
> Subject: Re: [cip-dev] [PATCH 4.19.y-cip 05/23] usb: typec: Find the ports by
> also matching against the device node
>
> Hi!
>
> > From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
> >
> > commit 6a0bbcf96b2273f110a14d11a5952527c5921191 upstream.
> >
> > When the connections are defined in firmware, struct device_connection
> > will have the fwnode member pointing to the device node (struct
> > fwnode_handle) of the requested device, and the endpoint will not be
> > used at all in that case.
>
> > static void *typec_port_match(struct device_connection *con, int ep,
> > void *data) {
> > - return class_find_device(typec_class, NULL, con->endpoint[ep],
> > - __typec_port_match);
> ...
> > + dev = class_find_device(typec_class, NULL, con->endpoint[ep],
> > + typec_port_name_match);
> > +
> > + return dev ? dev : ERR_PTR(-EPROBE_DEFER);
> > }
>
> So... this adds handling in fwnode != NULL (expected, okay with me), but it
> also changes behaviour in fwnode == NULL case: return value changed from
> NULL to ERR_PTR(-EPROBE_DEFER). Are all callers ready to handle the
> changed situation in -cip? Could we get some explanation why it is
> neccessary/good idea?
This is part of the Type-C Alternate Modes. This callback is used only by the
typec_altmode_register_notifier() that should be called by the upper driver
that uses the USB Type-C port in the Alternate Mode. In this way it can receive
notifications from it.
In its initial form, this function explores the graph child nodes and finds the
adequate USB port controller device. But, this device might not be ready/probed
yet. Hence, the EPROBE_DEFER addition of this patch.
Strangely enough, no one is using this notification chain so far, not even in
mainstream. I guess Type-C is still in its infancy.
_______________________________________________
cip-dev mailing list
cip-dev@lists.cip-project.org
https://lists.cip-project.org/mailman/listinfo/cip-dev
^ permalink raw reply
* Re: [PATCH v2] util/async: make bh_aio_poll() O(1)
From: Paolo Bonzini @ 2020-02-20 10:43 UTC (permalink / raw)
To: Stefan Hajnoczi, qemu-devel; +Cc: Kevin Wolf, Fam Zheng, qemu-block, Max Reitz
In-Reply-To: <20200219175348.1161536-1-stefanha@redhat.com>
On 19/02/20 18:53, Stefan Hajnoczi wrote:
> +}
> +
> +/* Only called from aio_bh_poll() and aio_ctx_finalize() */
> +static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
> +{
> + QEMUBH *bh = QSLIST_FIRST(head);
I forgot, this should also become QSLIST_FIRST_RCU.
Paolo
^ permalink raw reply
* Re: [dpdk-dev] [PATCH] net/ice: fix queue bind MSI-X interrupt error
From: Ferruh Yigit @ 2020-02-20 10:45 UTC (permalink / raw)
To: Zhu, TaoX, Yang, Qiming, Lu, Wenzhuo
Cc: dev@dpdk.org, Xing, Beilei, Zhang, Qi Z, Ye, Xiaolong,
stable@dpdk.org
In-Reply-To: <60652C6914E08D41B9AA1580751B3CA9015DAD46@SHSMSX103.ccr.corp.intel.com>
On 2/20/2020 9:43 AM, Zhu, TaoX wrote:
> Hi Yigit, Ferruh
>
> The original author was not found. The meaning of these bits is as follows:
> ITR Index of the interrupt cause:
> 00b - ITR0 ; 01b - ITR1; 10b - ITR2; 11b - NoITR
>
> I think the reason why the original author wrote this strange code is to highlight that these bits are important in this function,
> he wanted to make it clear that he used ITR0 . I think it's possible, so I keep it.
That is OK, thanks for clarification.
>
>
> BR,
> Zhu, Tao
>
>
>> -----Original Message-----
>> From: Yigit, Ferruh
>> Sent: Thursday, February 20, 2020 5:21 PM
>> To: Zhu, TaoX <taox.zhu@intel.com>; Yang, Qiming
>> <qiming.yang@intel.com>; Lu, Wenzhuo <wenzhuo.lu@intel.com>
>> Cc: dev@dpdk.org; Xing, Beilei <beilei.xing@intel.com>; Zhang, Qi Z
>> <qi.z.zhang@intel.com>; Ye, Xiaolong <xiaolong.ye@intel.com>;
>> stable@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH] net/ice: fix queue bind MSI-X interrupt
>> error
>>
>> On 2/19/2020 10:17 AM, taox.zhu@intel.com wrote:
>>> From: Zhu Tao <taox.zhu@intel.com>
>>>
>>> To bind a queue to an MSI-X interrupt, need to set some register.
>>> The register consists of many parts, each of which has several bits;
>>> therefore, the shift operator '<<' was used; so the operator '<' in
>>> the code should be '<<'.
>>>
>>> Old code adds 1 on even MSI-X interrupt vector index used by queue,
>>> resulting in interrupt mapping error.
>>>
>>> Fixes: 65dfc889d8 ("net/ice: support Rx queue interruption")
>>> Cc: stable@dpdk.org
>>>
>>> Signed-off-by: Zhu Tao <taox.zhu@intel.com>
>>> ---
>>> drivers/net/ice/ice_ethdev.c | 4 ++--
>>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/net/ice/ice_ethdev.c
>>> b/drivers/net/ice/ice_ethdev.c index 8e9369e0a..85ef83e92 100644
>>> --- a/drivers/net/ice/ice_ethdev.c
>>> +++ b/drivers/net/ice/ice_ethdev.c
>>> @@ -2605,9 +2605,9 @@ __vsi_queues_bind_intr(struct ice_vsi *vsi,
>> uint16_t msix_vect,
>>> for (i = 0; i < nb_queue; i++) {
>>> /*do actual bind*/
>>> val = (msix_vect & QINT_RQCTL_MSIX_INDX_M) |
>>> - (0 < QINT_RQCTL_ITR_INDX_S) |
>> QINT_RQCTL_CAUSE_ENA_M;
>>> + (0 << QINT_RQCTL_ITR_INDX_S) |
>> QINT_RQCTL_CAUSE_ENA_M;
>>> val_tx = (msix_vect & QINT_TQCTL_MSIX_INDX_M) |
>>> - (0 < QINT_TQCTL_ITR_INDX_S) |
>> QINT_TQCTL_CAUSE_ENA_M;
>>> + (0 << QINT_TQCTL_ITR_INDX_S) |
>> QINT_TQCTL_CAUSE_ENA_M;
>>
>> Hi Tao,
>>
>> Out of curiosity, what is the point of left shifting "0"?
^ permalink raw reply
* [igt-dev] ✗ Fi.CI.BAT: failure for series starting with [i-g-t,1/3] igt/kms_frontbuffer_tracking: Skip over IGT_DRAW_BLT when there's no BLT
From: Patchwork @ 2020-02-20 10:45 UTC (permalink / raw)
To: Chris Wilson; +Cc: igt-dev
In-Reply-To: <20200220094134.1560435-1-chris@chris-wilson.co.uk>
== Series Details ==
Series: series starting with [i-g-t,1/3] igt/kms_frontbuffer_tracking: Skip over IGT_DRAW_BLT when there's no BLT
URL : https://patchwork.freedesktop.org/series/73698/
State : failure
== Summary ==
CI Bug Log - changes from IGT_5453 -> IGTPW_4191
====================================================
Summary
-------
**FAILURE**
Serious unknown changes coming with IGTPW_4191 absolutely need to be
verified manually.
If you think the reported changes have nothing to do with the changes
introduced in IGTPW_4191, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.
External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/index.html
Possible new issues
-------------------
Here are the unknown changes that may have been introduced in IGTPW_4191:
### IGT changes ###
#### Possible regressions ####
* igt@i915_module_load@reload:
- fi-icl-u2: [PASS][1] -> [DMESG-WARN][2] +1 similar issue
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-icl-u2/igt@i915_module_load@reload.html
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-icl-u2/igt@i915_module_load@reload.html
* igt@i915_selftest@live_gt_mocs:
- fi-bwr-2160: [PASS][3] -> [INCOMPLETE][4]
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-bwr-2160/igt@i915_selftest@live_gt_mocs.html
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-bwr-2160/igt@i915_selftest@live_gt_mocs.html
Known issues
------------
Here are the changes found in IGTPW_4191 that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@gem_close_race@basic-threads:
- fi-byt-j1900: [PASS][5] -> [INCOMPLETE][6] ([i915#45])
[5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-byt-j1900/igt@gem_close_race@basic-threads.html
[6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-byt-j1900/igt@gem_close_race@basic-threads.html
- fi-byt-n2820: [PASS][7] -> [INCOMPLETE][8] ([i915#45])
[7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-byt-n2820/igt@gem_close_race@basic-threads.html
[8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-byt-n2820/igt@gem_close_race@basic-threads.html
* igt@i915_selftest@live_gem_contexts:
- fi-cfl-guc: [PASS][9] -> [INCOMPLETE][10] ([CI#80] / [fdo#106070] / [i915#424])
[9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-cfl-guc/igt@i915_selftest@live_gem_contexts.html
[10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-cfl-guc/igt@i915_selftest@live_gem_contexts.html
- fi-cml-s: [PASS][11] -> [DMESG-FAIL][12] ([i915#877])
[11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-cml-s/igt@i915_selftest@live_gem_contexts.html
[12]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-cml-s/igt@i915_selftest@live_gem_contexts.html
* igt@kms_flip@basic-flip-vs-wf_vblank:
- fi-bsw-n3050: [PASS][13] -> [FAIL][14] ([i915#34])
[13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-bsw-n3050/igt@kms_flip@basic-flip-vs-wf_vblank.html
[14]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-bsw-n3050/igt@kms_flip@basic-flip-vs-wf_vblank.html
* igt@kms_frontbuffer_tracking@basic:
- fi-hsw-4770: [PASS][15] -> [FAIL][16] ([i915#49])
[15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-hsw-4770/igt@kms_frontbuffer_tracking@basic.html
[16]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-hsw-4770/igt@kms_frontbuffer_tracking@basic.html
#### Possible fixes ####
* igt@i915_selftest@live_gtt:
- fi-bxt-dsi: [TIMEOUT][17] ([fdo#112271]) -> [PASS][18]
[17]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-bxt-dsi/igt@i915_selftest@live_gtt.html
[18]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-bxt-dsi/igt@i915_selftest@live_gtt.html
* igt@kms_chamelium@common-hpd-after-suspend:
- fi-kbl-7500u: [FAIL][19] ([CI#81]) -> [PASS][20] +1 similar issue
[19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-kbl-7500u/igt@kms_chamelium@common-hpd-after-suspend.html
[20]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-kbl-7500u/igt@kms_chamelium@common-hpd-after-suspend.html
* igt@kms_chamelium@dp-crc-fast:
- fi-kbl-7500u: [FAIL][21] ([CI#81] / [fdo#109635]) -> [PASS][22] +2 similar issues
[21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-kbl-7500u/igt@kms_chamelium@dp-crc-fast.html
[22]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-kbl-7500u/igt@kms_chamelium@dp-crc-fast.html
* igt@kms_chamelium@dp-edid-read:
- fi-kbl-7500u: [FAIL][23] ([CI#81] / [fdo#109635] / [i915#976]) -> [PASS][24]
[23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-kbl-7500u/igt@kms_chamelium@dp-edid-read.html
[24]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-kbl-7500u/igt@kms_chamelium@dp-edid-read.html
- fi-icl-u2: [FAIL][25] ([CI#81] / [fdo#109635]) -> [PASS][26] +2 similar issues
[25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-icl-u2/igt@kms_chamelium@dp-edid-read.html
[26]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-icl-u2/igt@kms_chamelium@dp-edid-read.html
* igt@kms_chamelium@hdmi-hpd-fast:
- fi-icl-u2: [FAIL][27] ([CI#81]) -> [PASS][28] +2 similar issues
[27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-icl-u2/igt@kms_chamelium@hdmi-hpd-fast.html
[28]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-icl-u2/igt@kms_chamelium@hdmi-hpd-fast.html
* igt@prime_vgem@basic-fence-flip:
- fi-kbl-7500u: [SKIP][29] ([fdo#109271]) -> [PASS][30] +23 similar issues
[29]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-kbl-7500u/igt@prime_vgem@basic-fence-flip.html
[30]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-kbl-7500u/igt@prime_vgem@basic-fence-flip.html
#### Warnings ####
* igt@kms_chamelium@dp-hpd-fast:
- fi-icl-u2: [FAIL][31] ([CI#81] / [fdo#109635]) -> [DMESG-WARN][32] ([i915#289])
[31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-icl-u2/igt@kms_chamelium@dp-hpd-fast.html
[32]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-icl-u2/igt@kms_chamelium@dp-hpd-fast.html
* igt@kms_chamelium@hdmi-hpd-fast:
- fi-kbl-7500u: [FAIL][33] ([CI#81]) -> [FAIL][34] ([fdo#111407])
[33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
[34]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
* igt@kms_chamelium@vga-hpd-fast:
- fi-icl-u2: [FAIL][35] ([CI#81]) -> [SKIP][36] ([fdo#109309]) +1 similar issue
[35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-icl-u2/igt@kms_chamelium@vga-hpd-fast.html
[36]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-icl-u2/igt@kms_chamelium@vga-hpd-fast.html
- fi-kbl-7500u: [FAIL][37] ([CI#81]) -> [SKIP][38] ([fdo#109271]) +1 similar issue
[37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5453/fi-kbl-7500u/igt@kms_chamelium@vga-hpd-fast.html
[38]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/fi-kbl-7500u/igt@kms_chamelium@vga-hpd-fast.html
{name}: This element is suppressed. This means it is ignored when computing
the status of the difference (SUCCESS, WARNING, or FAILURE).
[CI#80]: https://gitlab.freedesktop.org/gfx-ci/i915-infra/issues/80
[CI#81]: https://gitlab.freedesktop.org/gfx-ci/i915-infra/issues/81
[fdo#106070]: https://bugs.freedesktop.org/show_bug.cgi?id=106070
[fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
[fdo#109309]: https://bugs.freedesktop.org/show_bug.cgi?id=109309
[fdo#109635]: https://bugs.freedesktop.org/show_bug.cgi?id=109635
[fdo#111407]: https://bugs.freedesktop.org/show_bug.cgi?id=111407
[fdo#112271]: https://bugs.freedesktop.org/show_bug.cgi?id=112271
[i915#1233]: https://gitlab.freedesktop.org/drm/intel/issues/1233
[i915#289]: https://gitlab.freedesktop.org/drm/intel/issues/289
[i915#34]: https://gitlab.freedesktop.org/drm/intel/issues/34
[i915#424]: https://gitlab.freedesktop.org/drm/intel/issues/424
[i915#45]: https://gitlab.freedesktop.org/drm/intel/issues/45
[i915#49]: https://gitlab.freedesktop.org/drm/intel/issues/49
[i915#877]: https://gitlab.freedesktop.org/drm/intel/issues/877
[i915#976]: https://gitlab.freedesktop.org/drm/intel/issues/976
Participating hosts (51 -> 47)
------------------------------
Additional (2): fi-skl-6770hq fi-kbl-7560u
Missing (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-byt-clapper fi-bdw-samus
Build changes
-------------
* CI: CI-20190529 -> None
* IGT: IGT_5453 -> IGTPW_4191
CI-20190529: 20190529
CI_DRM_7971: 9df74cf797a04e60dac4d9f5742481bbfeead8ea @ git://anongit.freedesktop.org/gfx-ci/linux
IGTPW_4191: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/index.html
IGT_5453: cae9a5881ed2c5be2c2518a255740b612a927f9a @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4191/index.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply
* Re: [PATCH v4 3/3] clocksource: Add Low Power STM32 timers driver
From: Benjamin GAIGNARD @ 2020-02-20 10:45 UTC (permalink / raw)
To: Daniel Lezcano, lee.jones@linaro.org, robh+dt@kernel.org,
mark.rutland@arm.com, mcoquelin.stm32@gmail.com, Alexandre TORGUE,
tglx@linutronix.de, Fabrice GASNIER
Cc: devicetree@vger.kernel.org,
linux-stm32@st-md-mailman.stormreply.com,
linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org, Benjamin Gaignard,
Pascal PAILLET-LME
In-Reply-To: <687ab83c-6381-57aa-3bc1-3628e27644b5@linaro.org>
On 2/20/20 11:36 AM, Daniel Lezcano wrote:
> On 17/02/2020 14:45, Benjamin Gaignard wrote:
>> From: Benjamin Gaignard <benjamin.gaignard@linaro.org>
>>
>> Implement clock event driver using low power STM32 timers.
>> Low power timer counters running even when CPUs are stopped.
>> It could be used as clock event broadcaster to wake up CPUs but not like
>> a clocksource because each it rise an interrupt the counter restart from 0.
>>
>> Low power timers have a 16 bits counter and a prescaler which allow to
>> divide the clock per power of 2 to up 128 to target a 32KHz rate.
>>
>> Signed-off-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
>> Signed-off-by: Pascal Paillet <p.paillet@st.com>
>> ---
>> version 4:
>> - move defines in mfd/stm32-lptimer.h
>> - change compatiblename
>> - reword commit message
>> - make driver Kconfig depends of MFD_STM32_LPTIMER
>> - remove useless include
>> - remove rate and clk fields from the private structure
>> - to add comments about the registers sequence in stm32_clkevent_lp_set_timer
>> - rework probe function and use devm_request_irq()
>> - do not allow module to be removed
>> - make sure that wakeup interrupt is set
>>
>> drivers/clocksource/Kconfig | 7 ++
>> drivers/clocksource/Makefile | 1 +
>> drivers/clocksource/timer-stm32-lp.c | 213 +++++++++++++++++++++++++++++++++++
>> 3 files changed, 221 insertions(+)
>> create mode 100644 drivers/clocksource/timer-stm32-lp.c
>>
>> diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
>> index cc909e465823..9fc2b513db6f 100644
>> --- a/drivers/clocksource/Kconfig
>> +++ b/drivers/clocksource/Kconfig
>> @@ -292,6 +292,13 @@ config CLKSRC_STM32
>> select CLKSRC_MMIO
>> select TIMER_OF
>>
>> +config CLKSRC_STM32_LP
>> + bool "Low power clocksource for STM32 SoCs"
>> + depends on MFD_STM32_LPTIMER || COMPILE_TEST
>> + help
>> + This option enables support for STM32 low power clockevent available
>> + on STM32 SoCs
>> +
>> config CLKSRC_MPS2
>> bool "Clocksource for MPS2 SoCs" if COMPILE_TEST
>> depends on GENERIC_SCHED_CLOCK
>> diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
>> index 713686faa549..c00fffbd4769 100644
>> --- a/drivers/clocksource/Makefile
>> +++ b/drivers/clocksource/Makefile
>> @@ -44,6 +44,7 @@ obj-$(CONFIG_BCM_KONA_TIMER) += bcm_kona_timer.o
>> obj-$(CONFIG_CADENCE_TTC_TIMER) += timer-cadence-ttc.o
>> obj-$(CONFIG_CLKSRC_EFM32) += timer-efm32.o
>> obj-$(CONFIG_CLKSRC_STM32) += timer-stm32.o
>> +obj-$(CONFIG_CLKSRC_STM32_LP) += timer-stm32-lp.o
>> obj-$(CONFIG_CLKSRC_EXYNOS_MCT) += exynos_mct.o
>> obj-$(CONFIG_CLKSRC_LPC32XX) += timer-lpc32xx.o
>> obj-$(CONFIG_CLKSRC_MPS2) += mps2-timer.o
>> diff --git a/drivers/clocksource/timer-stm32-lp.c b/drivers/clocksource/timer-stm32-lp.c
>> new file mode 100644
>> index 000000000000..50eecdb88216
>> --- /dev/null
>> +++ b/drivers/clocksource/timer-stm32-lp.c
>> @@ -0,0 +1,213 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) STMicroelectronics 2019 - All Rights Reserved
>> + * Authors: Benjamin Gaignard <benjamin.gaignard@st.com> for STMicroelectronics.
>> + * Pascal Paillet <p.paillet@st.com> for STMicroelectronics.
>> + */
>> +
>> +#include <linux/clk.h>
>> +#include <linux/clockchips.h>
>> +#include <linux/interrupt.h>
>> +#include <linux/mfd/stm32-lptimer.h>
>> +#include <linux/module.h>
>> +#include <linux/of_address.h>
>> +#include <linux/of_irq.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/pm_wakeirq.h>
>> +
>> +#define CFGR_PSC_OFFSET 9
>> +#define STM32_LP_RATING 400
>> +#define STM32_TARGET_CLKRATE (32000 * HZ)
>> +#define STM32_LP_MAX_PSC 7
>> +
>> +struct stm32_lp_private {
>> + struct regmap *reg;
>> + struct clock_event_device clkevt;
>> + unsigned long period;
>> +};
>> +
>> +static struct stm32_lp_private*
>> +to_priv(struct clock_event_device *clkevt)
>> +{
>> + return container_of(clkevt, struct stm32_lp_private, clkevt);
>> +}
>> +
>> +static int stm32_clkevent_lp_shutdown(struct clock_event_device *clkevt)
>> +{
>> + struct stm32_lp_private *priv = to_priv(clkevt);
>> +
>> + regmap_write(priv->reg, STM32_LPTIM_CR, 0);
>> + regmap_write(priv->reg, STM32_LPTIM_IER, 0);
>> + /* clear pending flags */
>> + regmap_write(priv->reg, STM32_LPTIM_ICR, STM32_LPTIM_ARRMCF);
>> +
>> + return 0;
>> +}
>> +
>> +static int stm32_clkevent_lp_set_timer(unsigned long evt,
>> + struct clock_event_device *clkevt,
>> + int is_periodic)
>> +{
>> + struct stm32_lp_private *priv = to_priv(clkevt);
>> +
>> + /* disable LPTIMER to be able to write into IER register*/
>> + regmap_write(priv->reg, STM32_LPTIM_CR, 0);
>> + /* enable ARR interrupt */
>> + regmap_write(priv->reg, STM32_LPTIM_IER, STM32_LPTIM_ARRMIE);
>> + /* enable LPTIMER to be able to write into ARR register */
>> + regmap_write(priv->reg, STM32_LPTIM_CR, STM32_LPTIM_ENABLE);
>> + /* set next event counter */
>> + regmap_write(priv->reg, STM32_LPTIM_ARR, evt);
>> +
>> + /* start counter */
>> + if (is_periodic)
>> + regmap_write(priv->reg, STM32_LPTIM_CR,
>> + STM32_LPTIM_CNTSTRT | STM32_LPTIM_ENABLE);
>> + else
>> + regmap_write(priv->reg, STM32_LPTIM_CR,
>> + STM32_LPTIM_SNGSTRT | STM32_LPTIM_ENABLE);
> The regmap config in stm32-lptimer is not defined with the fast_io flag
> (on purpose or not?) that means we can potentially deadlock here as the
> lock is a mutex.
>
> Isn't it detected with the lock validation scheme?
It wasn't a problem with other parts of the mfd and I don't notice
issues so I guess it is ok.
>
>> + return 0;
>> +}
>> +static int stm32_clkevent_lp_remove(struct platform_device *pdev)
>> +{
>> + return -EBUSY; /* cannot unregister clockevent */
>> +}
> Won't be the mfd into an inconsistent state here? The other subsystems
> will be removed but this one will prevent to unload the module leading
> to a situation where the mfd is partially removed but still there
> without a possible recovery, no?
We can't enable the timer part of the mfd at the same time than the
other features.
It has be exclusive and that exclude the problem you describe above.
>
>> +static const struct of_device_id stm32_clkevent_lp_of_match[] = {
>> + { .compatible = "st,stm32-lptimer-timer", },
>> + {},
>> +};
>> +MODULE_DEVICE_TABLE(of, stm32_clkevent_lp_of_match);
>> +
>> +static struct platform_driver stm32_clkevent_lp_driver = {
>> + .probe = stm32_clkevent_lp_probe,
>> + .remove = stm32_clkevent_lp_remove,
>> + .driver = {
>> + .name = "stm32-lptimer-timer",
>> + .of_match_table = of_match_ptr(stm32_clkevent_lp_of_match),
>> + },
>> +};
>
^ permalink raw reply
* Re: [PATCH v4 3/3] clocksource: Add Low Power STM32 timers driver
From: Benjamin GAIGNARD @ 2020-02-20 10:45 UTC (permalink / raw)
To: Daniel Lezcano, lee.jones@linaro.org, robh+dt@kernel.org,
mark.rutland@arm.com, mcoquelin.stm32@gmail.com, Alexandre TORGUE,
tglx@linutronix.de, Fabrice GASNIER
Cc: devicetree@vger.kernel.org, linux-kernel@vger.kernel.org,
Pascal PAILLET-LME, Benjamin Gaignard,
linux-stm32@st-md-mailman.stormreply.com,
linux-arm-kernel@lists.infradead.org
In-Reply-To: <687ab83c-6381-57aa-3bc1-3628e27644b5@linaro.org>
On 2/20/20 11:36 AM, Daniel Lezcano wrote:
> On 17/02/2020 14:45, Benjamin Gaignard wrote:
>> From: Benjamin Gaignard <benjamin.gaignard@linaro.org>
>>
>> Implement clock event driver using low power STM32 timers.
>> Low power timer counters running even when CPUs are stopped.
>> It could be used as clock event broadcaster to wake up CPUs but not like
>> a clocksource because each it rise an interrupt the counter restart from 0.
>>
>> Low power timers have a 16 bits counter and a prescaler which allow to
>> divide the clock per power of 2 to up 128 to target a 32KHz rate.
>>
>> Signed-off-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
>> Signed-off-by: Pascal Paillet <p.paillet@st.com>
>> ---
>> version 4:
>> - move defines in mfd/stm32-lptimer.h
>> - change compatiblename
>> - reword commit message
>> - make driver Kconfig depends of MFD_STM32_LPTIMER
>> - remove useless include
>> - remove rate and clk fields from the private structure
>> - to add comments about the registers sequence in stm32_clkevent_lp_set_timer
>> - rework probe function and use devm_request_irq()
>> - do not allow module to be removed
>> - make sure that wakeup interrupt is set
>>
>> drivers/clocksource/Kconfig | 7 ++
>> drivers/clocksource/Makefile | 1 +
>> drivers/clocksource/timer-stm32-lp.c | 213 +++++++++++++++++++++++++++++++++++
>> 3 files changed, 221 insertions(+)
>> create mode 100644 drivers/clocksource/timer-stm32-lp.c
>>
>> diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
>> index cc909e465823..9fc2b513db6f 100644
>> --- a/drivers/clocksource/Kconfig
>> +++ b/drivers/clocksource/Kconfig
>> @@ -292,6 +292,13 @@ config CLKSRC_STM32
>> select CLKSRC_MMIO
>> select TIMER_OF
>>
>> +config CLKSRC_STM32_LP
>> + bool "Low power clocksource for STM32 SoCs"
>> + depends on MFD_STM32_LPTIMER || COMPILE_TEST
>> + help
>> + This option enables support for STM32 low power clockevent available
>> + on STM32 SoCs
>> +
>> config CLKSRC_MPS2
>> bool "Clocksource for MPS2 SoCs" if COMPILE_TEST
>> depends on GENERIC_SCHED_CLOCK
>> diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
>> index 713686faa549..c00fffbd4769 100644
>> --- a/drivers/clocksource/Makefile
>> +++ b/drivers/clocksource/Makefile
>> @@ -44,6 +44,7 @@ obj-$(CONFIG_BCM_KONA_TIMER) += bcm_kona_timer.o
>> obj-$(CONFIG_CADENCE_TTC_TIMER) += timer-cadence-ttc.o
>> obj-$(CONFIG_CLKSRC_EFM32) += timer-efm32.o
>> obj-$(CONFIG_CLKSRC_STM32) += timer-stm32.o
>> +obj-$(CONFIG_CLKSRC_STM32_LP) += timer-stm32-lp.o
>> obj-$(CONFIG_CLKSRC_EXYNOS_MCT) += exynos_mct.o
>> obj-$(CONFIG_CLKSRC_LPC32XX) += timer-lpc32xx.o
>> obj-$(CONFIG_CLKSRC_MPS2) += mps2-timer.o
>> diff --git a/drivers/clocksource/timer-stm32-lp.c b/drivers/clocksource/timer-stm32-lp.c
>> new file mode 100644
>> index 000000000000..50eecdb88216
>> --- /dev/null
>> +++ b/drivers/clocksource/timer-stm32-lp.c
>> @@ -0,0 +1,213 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) STMicroelectronics 2019 - All Rights Reserved
>> + * Authors: Benjamin Gaignard <benjamin.gaignard@st.com> for STMicroelectronics.
>> + * Pascal Paillet <p.paillet@st.com> for STMicroelectronics.
>> + */
>> +
>> +#include <linux/clk.h>
>> +#include <linux/clockchips.h>
>> +#include <linux/interrupt.h>
>> +#include <linux/mfd/stm32-lptimer.h>
>> +#include <linux/module.h>
>> +#include <linux/of_address.h>
>> +#include <linux/of_irq.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/pm_wakeirq.h>
>> +
>> +#define CFGR_PSC_OFFSET 9
>> +#define STM32_LP_RATING 400
>> +#define STM32_TARGET_CLKRATE (32000 * HZ)
>> +#define STM32_LP_MAX_PSC 7
>> +
>> +struct stm32_lp_private {
>> + struct regmap *reg;
>> + struct clock_event_device clkevt;
>> + unsigned long period;
>> +};
>> +
>> +static struct stm32_lp_private*
>> +to_priv(struct clock_event_device *clkevt)
>> +{
>> + return container_of(clkevt, struct stm32_lp_private, clkevt);
>> +}
>> +
>> +static int stm32_clkevent_lp_shutdown(struct clock_event_device *clkevt)
>> +{
>> + struct stm32_lp_private *priv = to_priv(clkevt);
>> +
>> + regmap_write(priv->reg, STM32_LPTIM_CR, 0);
>> + regmap_write(priv->reg, STM32_LPTIM_IER, 0);
>> + /* clear pending flags */
>> + regmap_write(priv->reg, STM32_LPTIM_ICR, STM32_LPTIM_ARRMCF);
>> +
>> + return 0;
>> +}
>> +
>> +static int stm32_clkevent_lp_set_timer(unsigned long evt,
>> + struct clock_event_device *clkevt,
>> + int is_periodic)
>> +{
>> + struct stm32_lp_private *priv = to_priv(clkevt);
>> +
>> + /* disable LPTIMER to be able to write into IER register*/
>> + regmap_write(priv->reg, STM32_LPTIM_CR, 0);
>> + /* enable ARR interrupt */
>> + regmap_write(priv->reg, STM32_LPTIM_IER, STM32_LPTIM_ARRMIE);
>> + /* enable LPTIMER to be able to write into ARR register */
>> + regmap_write(priv->reg, STM32_LPTIM_CR, STM32_LPTIM_ENABLE);
>> + /* set next event counter */
>> + regmap_write(priv->reg, STM32_LPTIM_ARR, evt);
>> +
>> + /* start counter */
>> + if (is_periodic)
>> + regmap_write(priv->reg, STM32_LPTIM_CR,
>> + STM32_LPTIM_CNTSTRT | STM32_LPTIM_ENABLE);
>> + else
>> + regmap_write(priv->reg, STM32_LPTIM_CR,
>> + STM32_LPTIM_SNGSTRT | STM32_LPTIM_ENABLE);
> The regmap config in stm32-lptimer is not defined with the fast_io flag
> (on purpose or not?) that means we can potentially deadlock here as the
> lock is a mutex.
>
> Isn't it detected with the lock validation scheme?
It wasn't a problem with other parts of the mfd and I don't notice
issues so I guess it is ok.
>
>> + return 0;
>> +}
>> +static int stm32_clkevent_lp_remove(struct platform_device *pdev)
>> +{
>> + return -EBUSY; /* cannot unregister clockevent */
>> +}
> Won't be the mfd into an inconsistent state here? The other subsystems
> will be removed but this one will prevent to unload the module leading
> to a situation where the mfd is partially removed but still there
> without a possible recovery, no?
We can't enable the timer part of the mfd at the same time than the
other features.
It has be exclusive and that exclude the problem you describe above.
>
>> +static const struct of_device_id stm32_clkevent_lp_of_match[] = {
>> + { .compatible = "st,stm32-lptimer-timer", },
>> + {},
>> +};
>> +MODULE_DEVICE_TABLE(of, stm32_clkevent_lp_of_match);
>> +
>> +static struct platform_driver stm32_clkevent_lp_driver = {
>> + .probe = stm32_clkevent_lp_probe,
>> + .remove = stm32_clkevent_lp_remove,
>> + .driver = {
>> + .name = "stm32-lptimer-timer",
>> + .of_match_table = of_match_ptr(stm32_clkevent_lp_of_match),
>> + },
>> +};
>
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply
* Re: [PATCH v2 6/7] misc: bcm-vk: add Broadcom VK driver
From: Dan Carpenter @ 2020-02-20 10:43 UTC (permalink / raw)
To: Scott Branden
Cc: Luis Chamberlain, Greg Kroah-Hartman, David Brown, Alexander Viro,
Shuah Khan, bjorn.andersson, Shuah Khan, Arnd Bergmann,
Rafael J . Wysocki, linux-kernel, linux-arm-msm, linux-fsdevel,
BCM Kernel Feedback, Olof Johansson, Andrew Morton,
Colin Ian King, Kees Cook, Takashi Iwai, linux-kselftest,
Andy Gross, Desmond Yan, James Hu
In-Reply-To: <20200220004825.23372-7-scott.branden@broadcom.com>
I'm not sure what git tree you're building on top of. This patchset
doesn't apply to linux-next which is the only thing I tested...
On Wed, Feb 19, 2020 at 04:48:24PM -0800, Scott Branden wrote:
> Add Broadcom VK driver offload engine.
> This driver interfaces to the VK PCIe offload engine to perform
> should offload functions as video transcoding on multiple streams
> in parallel. VK device is booted from files loaded using
> request_firmware_into_buf mechanism. After booted card status is updated
> and messages can then be sent to the card.
> Such messages contain scatter gather list of addresses
> to pull data from the host to perform operations on.
>
> Signed-off-by: Scott Branden <scott.branden@broadcom.com>
> Signed-off-by: Desmond Yan <desmond.yan@broadcom.com>
> Signed-off-by: James Hu <james.hu@broadcom.com>
> ---
> drivers/misc/Kconfig | 1 +
> drivers/misc/Makefile | 1 +
> drivers/misc/bcm-vk/Kconfig | 42 +
> drivers/misc/bcm-vk/Makefile | 11 +
> drivers/misc/bcm-vk/bcm_vk.h | 357 ++++++++
> drivers/misc/bcm-vk/bcm_vk_dev.c | 1197 ++++++++++++++++++++++++++
> drivers/misc/bcm-vk/bcm_vk_msg.c | 1359 ++++++++++++++++++++++++++++++
> drivers/misc/bcm-vk/bcm_vk_msg.h | 210 +++++
> drivers/misc/bcm-vk/bcm_vk_sg.c | 273 ++++++
> drivers/misc/bcm-vk/bcm_vk_sg.h | 60 ++
> drivers/misc/bcm-vk/bcm_vk_tty.c | 327 +++++++
> 11 files changed, 3838 insertions(+)
> create mode 100644 drivers/misc/bcm-vk/Kconfig
> create mode 100644 drivers/misc/bcm-vk/Makefile
> create mode 100644 drivers/misc/bcm-vk/bcm_vk.h
> create mode 100644 drivers/misc/bcm-vk/bcm_vk_dev.c
> create mode 100644 drivers/misc/bcm-vk/bcm_vk_msg.c
> create mode 100644 drivers/misc/bcm-vk/bcm_vk_msg.h
> create mode 100644 drivers/misc/bcm-vk/bcm_vk_sg.c
> create mode 100644 drivers/misc/bcm-vk/bcm_vk_sg.h
> create mode 100644 drivers/misc/bcm-vk/bcm_vk_tty.c
>
> diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
> index 7f0d48f406e3..39c74ed6a846 100644
> --- a/drivers/misc/Kconfig
> +++ b/drivers/misc/Kconfig
> @@ -478,6 +478,7 @@ source "drivers/misc/genwqe/Kconfig"
> source "drivers/misc/echo/Kconfig"
> source "drivers/misc/cxl/Kconfig"
> source "drivers/misc/ocxl/Kconfig"
> +source "drivers/misc/bcm-vk/Kconfig"
> source "drivers/misc/cardreader/Kconfig"
> source "drivers/misc/habanalabs/Kconfig"
> endmenu
> diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
> index c1860d35dc7e..d9a3b2791560 100644
> --- a/drivers/misc/Makefile
> +++ b/drivers/misc/Makefile
> @@ -53,6 +53,7 @@ obj-$(CONFIG_VEXPRESS_SYSCFG) += vexpress-syscfg.o
> obj-$(CONFIG_CXL_BASE) += cxl/
> obj-$(CONFIG_PCI_ENDPOINT_TEST) += pci_endpoint_test.o
> obj-$(CONFIG_OCXL) += ocxl/
> +obj-$(CONFIG_BCM_VK) += bcm-vk/
> obj-y += cardreader/
> obj-$(CONFIG_PVPANIC) += pvpanic.o
> obj-$(CONFIG_HABANA_AI) += habanalabs/
> diff --git a/drivers/misc/bcm-vk/Kconfig b/drivers/misc/bcm-vk/Kconfig
> new file mode 100644
> index 000000000000..c75dfb89a38d
> --- /dev/null
> +++ b/drivers/misc/bcm-vk/Kconfig
> @@ -0,0 +1,42 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +#
> +# Broadcom VK device
> +#
> +config BCM_VK
> + tristate "Support for Broadcom VK Accelerators"
> + depends on PCI_MSI
> + default m
> + help
> + Select this option to enable support for Broadcom
> + VK Accelerators. VK is used for performing
> + specific video offload processing. This driver enables
> + userspace programs to access these accelerators via /dev/bcm-vk.N
> + devices.
> +
> + If unsure, say N.
> +
> +if BCM_VK
> +
> +config BCM_VK_H2VK_VERIFY_AND_RETRY
> + bool "Host To VK Verifiy Data and Retry"
> + help
> + Turn on to verify the data passed down to VK is good,
> + and if not, do a retry until it succeeds.
> + This is a debug/workaround on FPGA PCIe timing issues
> + but may be found useful for debugging other PCIe hardware issues.
> + Small performance loss by enabling this debug config.
> + For properly operating PCIe hardware no need to enable this.
> +
> + If unsure, say N.
> +
> +config BCM_VK_QSTATS
> + bool "VK Queue Statistics"
> + help
> + Turn on to enable Queue Statistics.
> + These are useful for debugging purposes.
> + Some performance loss by enabling this debug config.
> + For properly operating PCIe hardware no need to enable this.
> +
> + If unsure, say N.
> +
> +endif
> diff --git a/drivers/misc/bcm-vk/Makefile b/drivers/misc/bcm-vk/Makefile
> new file mode 100644
> index 000000000000..05cb213ee826
> --- /dev/null
> +++ b/drivers/misc/bcm-vk/Makefile
> @@ -0,0 +1,11 @@
> +# SPDX-License-Identifier: GPL-2.0
> +#
> +# Makefile for Broadcom VK driver
> +#
> +
> +obj-$(CONFIG_BCM_VK) += bcm_vk.o
> +bcm_vk-objs := \
> + bcm_vk_dev.o \
> + bcm_vk_msg.o \
> + bcm_vk_sg.o \
> + bcm_vk_tty.o
> diff --git a/drivers/misc/bcm-vk/bcm_vk.h b/drivers/misc/bcm-vk/bcm_vk.h
> new file mode 100644
> index 000000000000..203ca741bf88
> --- /dev/null
> +++ b/drivers/misc/bcm-vk/bcm_vk.h
> @@ -0,0 +1,357 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright 2018-2020 Broadcom.
> + */
> +
> +#ifndef BCM_VK_H
> +#define BCM_VK_H
> +
> +#include <linux/atomic.h>
> +#include <linux/pci.h>
> +#include <linux/irq.h>
> +#include <linux/miscdevice.h>
> +#include <linux/mutex.h>
> +#include <linux/sched/signal.h>
> +#include <linux/tty.h>
> +#include <linux/uaccess.h>
> +#include <linux/version.h>
> +#include <uapi/linux/misc/bcm_vk.h>
> +
> +#include "bcm_vk_msg.h"
> +
> +#define DRV_MODULE_NAME "bcm-vk"
> +
> +/*
> + * Load Image is completed in two stages:
> + *
> + * 1) When the VK device boot-up, M7 CPU runs and executes the BootROM.
> + * The Secure Boot Loader (SBL) as part of the BootROM will run
> + * to open up ITCM for host to push BOOT1 image.
> + * SBL will authenticate the image before jumping to BOOT1 image.
> + *
> + * 2) Because BOOT1 image is a secured image, we also called it the
> + * Secure Boot Image (SBI). At second stage, SBI will initialize DDR
> + * and wait for host to push BOOT2 image to DDR.
> + * SBI will authenticate the image before jumping to BOOT2 image.
> + *
> + */
> +/* Location of registers of interest in BAR0 */
> +
> +/* Request register for Secure Boot Loader (SBL) download */
> +#define BAR_CODEPUSH_SBL 0x400
> +/* Start of ITCM */
> +#define CODEPUSH_BOOT1_ENTRY 0x00400000
> +#define CODEPUSH_MASK 0xFFFFF000
> +#define CODEPUSH_BOOTSTART BIT(0)
> +
> +/* Boot Status register */
> +#define BAR_BOOT_STATUS 0x404
> +
> +#define SRAM_OPEN BIT(16)
> +#define DDR_OPEN BIT(17)
> +
> +/* Firmware loader progress status definitions */
> +#define FW_LOADER_ACK_SEND_MORE_DATA BIT(18)
> +#define FW_LOADER_ACK_IN_PROGRESS BIT(19)
> +#define FW_LOADER_ACK_RCVD_ALL_DATA BIT(20)
> +
> +/* definitions for boot status register */
> +#define BOOT_STATE_MASK 0xFFF3FFFF
> +#define BROM_STATUS_NOT_RUN 0x2
> +#define BROM_NOT_RUN (SRAM_OPEN | BROM_STATUS_NOT_RUN)
> +#define BROM_STATUS_COMPLETE 0x6
> +#define BROM_RUNNING (SRAM_OPEN | BROM_STATUS_COMPLETE)
> +#define BOOT1_STATUS_COMPLETE 0x6
> +#define BOOT1_RUNNING (DDR_OPEN | BOOT1_STATUS_COMPLETE)
> +#define BOOT2_STATUS_COMPLETE 0x6
> +#define BOOT2_RUNNING (FW_LOADER_ACK_RCVD_ALL_DATA | \
> + BOOT2_STATUS_COMPLETE)
> +
> +/* Boot request for Secure Boot Image (SBI) */
> +#define BAR_CODEPUSH_SBI 0x408
> +/* 64M mapped to BAR2 */
> +#define CODEPUSH_BOOT2_ENTRY 0x60000000
> +
> +#define BAR_CARD_STATUS 0x410
> +/* CARD_STATUS definitions */
> +#define CARD_STATUS_TTYVK0_READY BIT(0)
> +#define CARD_STATUS_TTYVK1_READY BIT(1)
> +
> +#define BAR_METADATA_VERSION 0x440
> +#define BAR_OS_UPTIME 0x444
> +#define BAR_CHIP_ID 0x448
> +
> +#define BAR_CARD_TEMPERATURE 0x45C
> +/* defines for all temperature sensor */
> +#define BCM_VK_TEMP_FIELD_MASK 0xFF
> +#define BCM_VK_CPU_TEMP_SHIFT 0
> +#define BCM_VK_DDR0_TEMP_SHIFT 8
> +#define BCM_VK_DDR1_TEMP_SHIFT 16
> +
> +#define BAR_CARD_VOLTAGE 0x460
> +/* defines for voltage rail conversion */
> +#define BCM_VK_VOLT_RAIL_MASK 0xFFFF
> +#define BCM_VK_3P3_VOLT_REG_SHIFT 16
> +
> +#define BAR_CARD_ERR_LOG 0x464
> +/* Error log register bit definition - register for error alerts */
> +#define ERR_LOG_UECC BIT(0)
> +#define ERR_LOG_SSIM_BUSY BIT(1)
> +#define ERR_LOG_AFBC_BUSY BIT(2)
> +#define ERR_LOG_HIGH_TEMP_ERR BIT(3)
> +#define ERR_LOG_WDOG_TIMEOUT BIT(4)
> +#define ERR_LOG_SYS_FAULT BIT(5)
> +#define ERR_LOG_MEM_ALLOC_FAIL BIT(8)
> +#define ERR_LOG_LOW_TEMP_WARN BIT(9)
> +#define ERR_LOG_ECC BIT(10)
> +/* Alert bit definitions detectd on host */
> +#define ERR_LOG_HOST_HB_FAIL BIT(14)
> +#define ERR_LOG_HOST_PCIE_DWN BIT(15)
> +
> +#define BAR_CARD_ERR_MEM 0x468
> +/* defines for mem err, all fields have same width */
> +#define BCM_VK_MEM_ERR_FIELD_MASK 0xFF
> +#define BCM_VK_ECC_MEM_ERR_SHIFT 0
> +#define BCM_VK_UECC_MEM_ERR_SHIFT 8
> +/* threshold of event occurrence and logs start to come out */
> +#define BCM_VK_ECC_THRESHOLD 10
> +#define BCM_VK_UECC_THRESHOLD 1
> +
> +#define BAR_CARD_PWR_AND_THRE 0x46C
> +/* defines for power and temp threshold, all fields have same width */
> +#define BCM_VK_PWR_AND_THRE_FIELD_MASK 0xFF
> +#define BCM_VK_LOW_TEMP_THRE_SHIFT 0
> +#define BCM_VK_HIGH_TEMP_THRE_SHIFT 8
> +#define BCM_VK_PWR_STATE_SHIFT 16
> +
> +#define BAR_CARD_STATIC_INFO 0x470
> +
> +#define BAR_BOOTSRC_SELECT 0xC78
> +/* BOOTSRC definitions */
> +#define BOOTSRC_SOFT_ENABLE BIT(14)
> +
> +/* Card OS Firmware version size */
> +#define BAR_FIRMWARE_TAG_SIZE 50
> +#define FIRMWARE_STATUS_PRE_INIT_DONE 0x1F
> +
> +/* VK MSG_ID defines */
> +#define VK_MSG_ID_BITMAP_SIZE 4096
> +#define VK_MSG_ID_BITMAP_MASK (VK_MSG_ID_BITMAP_SIZE - 1)
> +#define VK_MSG_ID_OVERFLOW 0xFFFF
> +
> +/* VK device supports a maximum of 3 bars */
> +#define MAX_BAR 3
> +
> +/* default number of msg blk for inband SGL */
> +#define BCM_VK_DEF_IB_SGL_BLK_LEN 16
> +#define BCM_VK_IB_SGL_BLK_MAX 24
> +
> +enum pci_barno {
> + BAR_0 = 0,
> + BAR_1,
> + BAR_2
> +};
> +
> +#define BCM_VK_NUM_TTY 2
> +
> +struct bcm_vk_tty {
> + struct tty_port port;
> + uint32_t to_offset; /* bar offset to use */
> + uint32_t to_size; /* to VK buffer size */
> + uint32_t wr; /* write offset shadow */
> + uint32_t from_offset; /* bar offset to use */
> + uint32_t from_size; /* from VK buffer size */
> + uint32_t rd; /* read offset shadow */
> +};
> +
> +/* VK device max power state, supports 3, full, reduced and low */
> +#define MAX_OPP 3
> +#define MAX_CARD_INFO_TAG_SIZE 64
> +
> +struct bcm_vk_card_info {
> + uint32_t version;
> + char os_tag[MAX_CARD_INFO_TAG_SIZE];
> + char cmpt_tag[MAX_CARD_INFO_TAG_SIZE];
> + uint32_t cpu_freq_mhz;
> + uint32_t cpu_scale[MAX_OPP];
> + uint32_t ddr_freq_mhz;
> + uint32_t ddr_size_MB;
> + uint32_t video_core_freq_mhz;
> +};
> +
> +/*
> + * Control structure of logging messages from the card. This
> + * buffer is for logmsg that comes from vk
> + */
> +struct bcm_vk_peer_log {
> + uint32_t rd_idx;
> + uint32_t wr_idx;
> + uint32_t buf_size;
> + uint32_t mask;
> + char data[0];
> +};
> +/* max size per line of peer log */
> +#define BCM_VK_PEER_LOG_LINE_MAX 256
> +
> +struct bcm_vk_hb_ctrl {
> + struct timer_list timer;
> + uint32_t last_uptime;
> + uint32_t lost_cnt;
> +};
> +
> +struct bcm_vk_alert {
> + uint16_t flags;
> + uint16_t notfs;
> +};
> +
> +/* some alert counters that the driver will keep track */
> +struct bcm_vk_alert_cnts {
> + uint16_t ecc;
> + uint16_t uecc;
> +};
> +
> +struct bcm_vk {
> + struct pci_dev *pdev;
> + void __iomem *bar[MAX_BAR];
> + int num_irqs;
> +
> + struct bcm_vk_card_info card_info;
> +
> + /* mutex to protect the ioctls */
> + struct mutex mutex;
> + struct miscdevice miscdev;
> + int misc_devid; /* dev id allocated */
> +
> + struct tty_driver *tty_drv;
> + struct timer_list serial_timer;
> + struct bcm_vk_tty tty[BCM_VK_NUM_TTY];
> +
> + /* Reference-counting to handle file operations */
> + struct kref kref;
> +
> + spinlock_t msg_id_lock;
> + uint16_t msg_id;
> + DECLARE_BITMAP(bmap, VK_MSG_ID_BITMAP_SIZE);
> + spinlock_t ctx_lock;
> + struct bcm_vk_ctx ctx[VK_CMPT_CTX_MAX];
> + struct bcm_vk_ht_entry pid_ht[VK_PID_HT_SZ];
> + struct task_struct *reset_ppid; /* process that issue reset */
> +
> + atomic_t msgq_inited; /* indicate if info has been synced with vk */
> + struct bcm_vk_msg_chan h2vk_msg_chan;
> + struct bcm_vk_msg_chan vk2h_msg_chan;
These two names are almost the same except the letters are jumbled up.
Please, figure out better names because the human mind sees them as
exactly the same at first glance.
https://www.mnn.com/lifestyle/arts-culture/stories/why-your-brain-can-read-jumbled-letters
> +
> + struct workqueue_struct *wq_thread;
> + struct work_struct wq_work; /* work queue for deferred job */
> + unsigned long wq_offload[1]; /* various flags on wq requested */
> + void *tdma_vaddr; /* test dma segment virtual addr */
> + dma_addr_t tdma_addr; /* test dma segment bus addr */
> +
> + struct notifier_block panic_nb;
> + uint32_t ib_sgl_size; /* size allocated for inband sgl insertion */
> +
> + /* heart beat mechanism control structure */
> + struct bcm_vk_hb_ctrl hb_ctrl;
> + /* house-keeping variable of error logs */
> + spinlock_t host_alert_lock; /* protection to access host_alert struct */
> + struct bcm_vk_alert host_alert;
> + struct bcm_vk_alert peer_alert; /* bits set by the card */
> + struct bcm_vk_alert_cnts alert_cnts;
> +
> + /* offset of the peer log control in BAR2 */
> + uint32_t peerlog_off;
> +};
> +
> +/* wq offload work items bits definitions */
> +enum bcm_vk_wq_offload_flags {
> + BCM_VK_WQ_DWNLD_PEND = 0,
> + BCM_VK_WQ_DWNLD_AUTO = 1,
> + BCM_VK_WQ_NOTF_PEND = 2,
> +};
> +
> +/* a macro to get an individual field with mask and shift */
> +#define BCM_VK_EXTRACT_FIELD(_field, _reg, _mask, _shift) \
> + (_field = (((_reg) >> (_shift)) & (_mask)))
> +
> +/* structure that is used to faciliate displaying of register content */
> +struct bcm_vk_entry {
> + const uint32_t mask;
> + const uint32_t exp_val;
> + const char *str;
> +};
> +
> +/* alerts that could be generated from peer */
> +#define BCM_VK_PEER_ERR_NUM 9
> +extern struct bcm_vk_entry const bcm_vk_peer_err[BCM_VK_PEER_ERR_NUM];
> +/* alerts detected by the host */
> +#define BCM_VK_HOST_ERR_NUM 2
> +extern struct bcm_vk_entry const bcm_vk_host_err[BCM_VK_HOST_ERR_NUM];
> +
> +/*
> + * check if PCIe interface is down on read. Use it when it is
> + * certain that _val should never be all ones.
> + */
> +#define BCM_VK_INTF_IS_DOWN(val) ((val) == 0xFFFFFFFF)
> +
> +static inline u32 vkread32(struct bcm_vk *vk,
> + enum pci_barno bar,
> + uint64_t offset)
> +{
> + u32 value;
> +
> + value = ioread32(vk->bar[bar] + offset);
> + return value;
return ioread32(vk->bar[bar] + offset);
> +}
> +
> +static inline void vkwrite32(struct bcm_vk *vk,
> + u32 value,
> + enum pci_barno bar,
> + uint64_t offset)
> +{
> + iowrite32(value, vk->bar[bar] + offset);
> +}
> +
> +static inline u8 vkread8(struct bcm_vk *vk,
> + enum pci_barno bar,
> + uint64_t offset)
> +{
> + u8 value;
> +
> + value = ioread8(vk->bar[bar] + offset);
> + return value;
return ioread8(vk->bar[bar] + offset);
> +}
> +
> +static inline void vkwrite8(struct bcm_vk *vk,
> + u8 value,
> + enum pci_barno bar,
> + uint64_t offset)
> +{
> + iowrite8(value, vk->bar[bar] + offset);
> +}
> +
> +int bcm_vk_open(struct inode *inode, struct file *p_file);
> +ssize_t bcm_vk_read(struct file *p_file, char __user *buf, size_t count,
> + loff_t *f_pos);
> +ssize_t bcm_vk_write(struct file *p_file, const char __user *buf,
> + size_t count, loff_t *f_pos);
> +int bcm_vk_release(struct inode *inode, struct file *p_file);
> +void bcm_vk_release_data(struct kref *kref);
> +irqreturn_t bcm_vk_msgq_irqhandler(int irq, void *dev_id);
> +irqreturn_t bcm_vk_notf_irqhandler(int irq, void *dev_id);
> +int bcm_vk_msg_init(struct bcm_vk *vk);
> +void bcm_vk_msg_remove(struct bcm_vk *vk);
> +int bcm_vk_sync_msgq(struct bcm_vk *vk, bool force_sync);
> +bool bcm_vk_msgq_marker_valid(struct bcm_vk *vk);
> +void bcm_vk_blk_drv_access(struct bcm_vk *vk);
> +int bcm_vk_send_shutdown_msg(struct bcm_vk *vk, uint32_t shut_type, pid_t pid);
> +void bcm_vk_trigger_reset(struct bcm_vk *vk);
> +void bcm_h2vk_doorbell(struct bcm_vk *vk, uint32_t q_num, uint32_t db_val);
> +int bcm_vk_auto_load_all_images(struct bcm_vk *vk);
> +int bcm_vk_tty_init(struct bcm_vk *vk, char *name);
> +void bcm_vk_tty_exit(struct bcm_vk *vk);
> +void bcm_vk_hb_init(struct bcm_vk *vk);
> +void bcm_vk_hb_deinit(struct bcm_vk *vk);
> +void bcm_vk_handle_notf(struct bcm_vk *vk);
> +bool bcm_vk_drv_access_ok(struct bcm_vk *vk);
> +
> +#endif
> diff --git a/drivers/misc/bcm-vk/bcm_vk_dev.c b/drivers/misc/bcm-vk/bcm_vk_dev.c
> new file mode 100644
> index 000000000000..d8f59e898ca8
> --- /dev/null
> +++ b/drivers/misc/bcm-vk/bcm_vk_dev.c
> @@ -0,0 +1,1197 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright 2018-2020 Broadcom.
> + */
> +
> +#include <linux/delay.h>
> +#include <linux/firmware.h>
> +#include <linux/fs.h>
> +#include <linux/idr.h>
> +#include <linux/interrupt.h>
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include <linux/pci_regs.h>
> +#include <uapi/linux/misc/bcm_vk.h>
> +
> +#include "bcm_vk.h"
> +
> +#define PCI_DEVICE_ID_VALKYRIE 0x5E87
> +#define PCI_DEVICE_ID_VIPER 0x5E88
> +
> +static DEFINE_IDA(bcm_vk_ida);
> +
> +struct load_image_tab {
> + const uint32_t image_type;
> + const char *image_name;
Indented too far.
> +};
> +
> +enum soc_idx {
> + VALKYRIE = 0,
> + VIPER
> +};
> +
> +#define NUM_BOOT_STAGES 2
> +const struct load_image_tab image_tab[][NUM_BOOT_STAGES] = {
> + [VALKYRIE] = {
> + {VK_IMAGE_TYPE_BOOT1, VK_BOOT1_DEF_VALKYRIE_FILENAME},
> + {VK_IMAGE_TYPE_BOOT2, VK_BOOT2_DEF_VALKYRIE_FILENAME}
> + },
> + [VIPER] = {
> + {VK_IMAGE_TYPE_BOOT1, VK_BOOT1_DEF_VIPER_FILENAME},
> + {VK_IMAGE_TYPE_BOOT2, VK_BOOT2_DEF_VIPER_FILENAME}
> + }
> +};
> +
> +/* Location of memory base addresses of interest in BAR1 */
> +/* Load Boot1 to start of ITCM */
> +#define BAR1_CODEPUSH_BASE_BOOT1 0x100000
> +
> +/* Allow minimum 1s for Load Image timeout responses */
> +#define LOAD_IMAGE_TIMEOUT_MS (1 * MSEC_PER_SEC)
> +
> +/* Image startup timeouts */
> +#define BOOT1_STARTUP_TIMEOUT_MS (5 * MSEC_PER_SEC)
> +#define BOOT2_STARTUP_TIMEOUT_MS (10 * MSEC_PER_SEC)
> +
> +/* 1ms wait for checking the transfer complete status */
> +#define TXFR_COMPLETE_TIMEOUT_MS 1
> +
> +/* MSIX usages */
> +#define VK_MSIX_MSGQ_MAX 3
> +#define VK_MSIX_NOTF_MAX 1
> +#define VK_MSIX_IRQ_MAX (VK_MSIX_MSGQ_MAX + VK_MSIX_NOTF_MAX)
> +
> +/* Number of bits set in DMA mask*/
> +#define BCM_VK_DMA_BITS 64
> +
> +/* deinit time for the card os after receiving doorbell */
> +#define BCM_VK_DEINIT_TIME_MS (2 * MSEC_PER_SEC)
> +
> +/*
> + * module parameters
> + */
> +static bool auto_load = true;
> +module_param(auto_load, bool, 0444);
> +MODULE_PARM_DESC(auto_load,
> + "Load images automatically at PCIe probe time.\n");
> +static uint nr_scratch_pages = VK_BAR1_SCRATCH_DEF_NR_PAGES;
> +module_param(nr_scratch_pages, uint, 0444);
> +MODULE_PARM_DESC(nr_scratch_pages,
> + "Number of pre allocated DMAable coherent pages.\n");
> +static uint nr_ib_sgl_blk = BCM_VK_DEF_IB_SGL_BLK_LEN;
> +module_param(nr_ib_sgl_blk, uint, 0444);
> +MODULE_PARM_DESC(nr_ib_sgl_blk,
> + "Number of in-band msg blks for short SGL.\n");
> +
> +/*
> + * alerts that could be generated from peer
> + */
> +struct bcm_vk_entry const bcm_vk_peer_err[BCM_VK_PEER_ERR_NUM] = {
> + {ERR_LOG_UECC, ERR_LOG_UECC, "uecc"},
> + {ERR_LOG_SSIM_BUSY, ERR_LOG_SSIM_BUSY, "ssim_busy"},
> + {ERR_LOG_AFBC_BUSY, ERR_LOG_AFBC_BUSY, "afbc_busy"},
> + {ERR_LOG_HIGH_TEMP_ERR, ERR_LOG_HIGH_TEMP_ERR, "high_temp"},
> + {ERR_LOG_WDOG_TIMEOUT, ERR_LOG_WDOG_TIMEOUT, "wdog_timeout"},
> + {ERR_LOG_SYS_FAULT, ERR_LOG_SYS_FAULT, "sys_fault"},
> + {ERR_LOG_MEM_ALLOC_FAIL, ERR_LOG_MEM_ALLOC_FAIL, "malloc_fail warn"},
> + {ERR_LOG_LOW_TEMP_WARN, ERR_LOG_LOW_TEMP_WARN, "low_temp warn"},
> + {ERR_LOG_ECC, ERR_LOG_ECC, "ecc"},
> +};
> +/* alerts detected by the host */
> +struct bcm_vk_entry const bcm_vk_host_err[BCM_VK_HOST_ERR_NUM] = {
> + {ERR_LOG_HOST_PCIE_DWN, ERR_LOG_HOST_PCIE_DWN, "PCIe_down"},
> + {ERR_LOG_HOST_HB_FAIL, ERR_LOG_HOST_HB_FAIL, "hb_fail"},
> +};
> +
> +irqreturn_t bcm_vk_notf_irqhandler(int irq, void *dev_id)
> +{
> + struct bcm_vk *vk = dev_id;
> +
> + if (!bcm_vk_drv_access_ok(vk)) {
> + dev_err(&vk->pdev->dev,
> + "Interrupt %d received when msgq not inited\n", irq);
> + goto skip_schedule_work;
> + }
> +
> + /* if notification is not pending, set bit and schedule work */
> + if (test_and_set_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload) == 0)
> + queue_work(vk->wq_thread, &vk->wq_work);
> +
> +skip_schedule_work:
> + return IRQ_HANDLED;
> +}
> +
> +static void bcm_vk_log_notf(struct bcm_vk *vk,
> + struct bcm_vk_alert *alert,
> + struct bcm_vk_entry const *entry_tab,
> + const uint32_t table_size)
> +{
> + uint32_t i;
> + uint32_t masked_val, latched_val;
> + struct bcm_vk_entry const *entry;
> + uint32_t reg;
> + uint16_t ecc_mem_err, uecc_mem_err;
> + struct device *dev = &vk->pdev->dev;
> +
> + for (i = 0; i < table_size; i++) {
> + entry = &entry_tab[i];
> + masked_val = entry->mask & alert->notfs;
> + latched_val = entry->mask & alert->flags;
> +
> + if (masked_val == ERR_LOG_UECC) {
> + /*
> + * if there is difference between stored cnt and it
> + * is greater than threshold, log it.
> + */
> + reg = vkread32(vk, BAR_0, BAR_CARD_ERR_MEM);
> + BCM_VK_EXTRACT_FIELD(uecc_mem_err, reg,
> + BCM_VK_MEM_ERR_FIELD_MASK,
> + BCM_VK_UECC_MEM_ERR_SHIFT);
> + if ((uecc_mem_err != vk->alert_cnts.uecc) &&
> + (uecc_mem_err >= BCM_VK_UECC_THRESHOLD))
> + dev_info(dev,
> + "ALERT! %s.%d uecc RAISED - ErrCnt %d\n",
> + DRV_MODULE_NAME, vk->misc_devid,
> + uecc_mem_err);
> + vk->alert_cnts.uecc = uecc_mem_err;
> + } else if (masked_val == ERR_LOG_ECC) {
> + reg = vkread32(vk, BAR_0, BAR_CARD_ERR_MEM);
> + BCM_VK_EXTRACT_FIELD(ecc_mem_err, reg,
> + BCM_VK_MEM_ERR_FIELD_MASK,
> + BCM_VK_ECC_MEM_ERR_SHIFT);
> + if ((ecc_mem_err != vk->alert_cnts.ecc) &&
> + (ecc_mem_err >= BCM_VK_ECC_THRESHOLD))
> + dev_info(dev, "ALERT! %s.%d ecc RAISED - ErrCnt %d\n",
> + DRV_MODULE_NAME, vk->misc_devid,
> + ecc_mem_err);
> + vk->alert_cnts.ecc = ecc_mem_err;
> + } else if (masked_val != latched_val) {
> + /* print a log as info */
> + dev_info(dev, "ALERT! %s.%d %s %s\n",
> + DRV_MODULE_NAME, vk->misc_devid, entry->str,
> + masked_val ? "RAISED" : "CLEARED");
> + }
> + }
> +}
> +
> +static void bcm_vk_dump_peer_log(struct bcm_vk *vk)
> +{
> + struct bcm_vk_peer_log log, *p_ctl;
> + char loc_buf[BCM_VK_PEER_LOG_LINE_MAX];
> + int cnt;
> + struct device *dev = &vk->pdev->dev;
> + uint data_offset;
> +
> + p_ctl = vk->bar[BAR_2] + vk->peerlog_off;
Have you tried running Sparse on this driver?
> + log = *p_ctl;
> + /* do a rmb() to make sure log is updated */
> + rmb();
> +
> + dev_dbg(dev, "Peer PANIC: Size 0x%x(0x%x), [Rd Wr] = [%d %d]\n",
> + log.buf_size, log.mask, log.rd_idx, log.wr_idx);
> +
> + cnt = 0;
> + data_offset = vk->peerlog_off + sizeof(struct bcm_vk_peer_log);
> + while (log.rd_idx != log.wr_idx) {
> + loc_buf[cnt] = vkread8(vk, BAR_2, data_offset + log.rd_idx);
> +
> + if ((loc_buf[cnt] == '\0') ||
> + (cnt == (BCM_VK_PEER_LOG_LINE_MAX - 1))) {
> + dev_err(dev, "%s", loc_buf);
> + cnt = 0;
> + } else {
> + cnt++;
> + }
> + log.rd_idx = (log.rd_idx + 1) & log.mask;
> + }
> + /* update rd idx at the end */
> + vkwrite32(vk, log.rd_idx, BAR_2, vk->peerlog_off);
> +}
> +
> +void bcm_vk_handle_notf(struct bcm_vk *vk)
> +{
> + uint32_t reg;
> + struct bcm_vk_alert alert;
> + bool intf_down;
> + unsigned long flags;
> +
> + /* handle peer alerts and then locally detected ones */
> + reg = vkread32(vk, BAR_0, BAR_CARD_ERR_LOG);
> + intf_down = BCM_VK_INTF_IS_DOWN(reg);
> + if (!intf_down) {
> + vk->peer_alert.notfs = reg;
> + bcm_vk_log_notf(vk, &vk->peer_alert, bcm_vk_peer_err,
> + ARRAY_SIZE(bcm_vk_peer_err));
> + vk->peer_alert.flags = vk->peer_alert.notfs;
> + } else {
> + /* turn off access */
> + bcm_vk_blk_drv_access(vk);
> + }
> +
> + /* check and make copy of alert with lock and then free lock */
> + spin_lock_irqsave(&vk->host_alert_lock, flags);
> + if (intf_down)
> + vk->host_alert.notfs |= ERR_LOG_HOST_PCIE_DWN;
> +
> + alert = vk->host_alert;
> + vk->host_alert.flags = vk->host_alert.notfs;
> + spin_unlock_irqrestore(&vk->host_alert_lock, flags);
> +
> + /* call display with copy */
> + bcm_vk_log_notf(vk, &alert, bcm_vk_host_err,
> + ARRAY_SIZE(bcm_vk_host_err));
> +
> + /*
> + * If it is a sys fault or heartbeat timeout, we would like extract
> + * log msg from the card so that we would know what is the last fault
> + */
> + if ((!intf_down) &&
> + ((vk->host_alert.flags & ERR_LOG_HOST_HB_FAIL) ||
> + (vk->peer_alert.flags & ERR_LOG_SYS_FAULT)))
> + bcm_vk_dump_peer_log(vk);
> +}
> +
> +static inline int bcm_vk_wait(struct bcm_vk *vk, enum pci_barno bar,
> + uint64_t offset, u32 mask, u32 value,
> + unsigned long timeout_ms)
> +{
> + struct device *dev = &vk->pdev->dev;
> + unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
> + u32 rd_val;
> +
> + do {
> + rd_val = vkread32(vk, bar, offset);
> + dev_dbg(dev, "BAR%d Offset=0x%llx: 0x%x\n",
> + bar, offset, rd_val);
> +
> + if (time_after(jiffies, timeout))
> + return -ETIMEDOUT;
> +
> + cpu_relax();
> + cond_resched();
> + } while ((rd_val & mask) != value);
> +
> + return 0;
> +}
> +
> +static void bcm_vk_get_card_info(struct bcm_vk *vk)
> +{
> + struct device *dev = &vk->pdev->dev;
> + uint32_t offset;
> + int i;
> + uint8_t *dst;
> + struct bcm_vk_card_info *info = &vk->card_info;
> +
> + /* first read the offset from spare register */
> + offset = vkread32(vk, BAR_0, BAR_CARD_STATIC_INFO);
> + offset &= (pci_resource_len(vk->pdev, BAR_2 * 2) - 1);
> +
> + /* based on the offset, read info to internal card info structure */
> + dst = (uint8_t *)info;
> + for (i = 0; i < sizeof(*info); i++)
> + *dst++ = vkread8(vk, BAR_2, offset++);
> +
> +#define CARD_INFO_LOG_FMT "version : %x\n" \
> + "os_tag : %s\n" \
> + "cmpt_tag : %s\n" \
> + "cpu_freq : %d MHz\n" \
> + "cpu_scale : %d full, %d lowest\n" \
> + "ddr_freq : %d MHz\n" \
> + "ddr_size : %d MB\n" \
> + "video_freq: %d MHz\n"
> + dev_dbg(dev, CARD_INFO_LOG_FMT, info->version, info->os_tag,
> + info->cmpt_tag, info->cpu_freq_mhz, info->cpu_scale[0],
> + info->cpu_scale[MAX_OPP - 1], info->ddr_freq_mhz,
> + info->ddr_size_MB, info->video_core_freq_mhz);
> +
> + /* get the peer log pointer, only need the offset */
> + vk->peerlog_off = offset;
> +}
> +
> +static int bcm_vk_sync_card_info(struct bcm_vk *vk)
> +{
> + uint32_t rdy_marker = vkread32(vk, BAR_1, VK_BAR1_MSGQ_DEF_RDY);
> +
> + /* check for marker, but allow diags mode to skip sync */
> + if (!bcm_vk_msgq_marker_valid(vk))
> + return (rdy_marker == VK_BAR1_DIAG_RDY_MARKER ? 0 : -EINVAL);
> +
> + /*
> + * Write down scratch addr which is used for DMA. For
> + * signed part, BAR1 is accessible only after boot2 has come
> + * up
> + */
> + if (vk->tdma_addr) {
> + vkwrite32(vk, vk->tdma_addr >> 32, BAR_1,
> + VK_BAR1_SCRATCH_OFF_LO);
> + vkwrite32(vk, (uint32_t)vk->tdma_addr, BAR_1,
> + VK_BAR1_SCRATCH_OFF_HI);
> + vkwrite32(vk, nr_scratch_pages * PAGE_SIZE, BAR_1,
> + VK_BAR1_SCRATCH_SZ_ADDR);
> + }
> +
> + /* get static card info, only need to read once */
> + bcm_vk_get_card_info(vk);
> +
> + return 0;
> +}
> +
> +void bcm_vk_blk_drv_access(struct bcm_vk *vk)
> +{
> + int i;
> +
> + /*
> + * kill all the apps except for the process that is resetting.
> + * If not called during reset, reset_ppid == NULL, and all will be
> + * killed.
> + */
> + spin_lock(&vk->ctx_lock);
> +
> + /* set msgq_inited to 0 so that all rd/wr will be blocked */
> + atomic_set(&vk->msgq_inited, 0);
> +
> + for (i = 0; i < VK_PID_HT_SZ; i++) {
> + struct bcm_vk_ctx *ctx;
> +
> + list_for_each_entry(ctx, &vk->pid_ht[i].head, node) {
> + if (ctx->ppid != vk->reset_ppid) {
> + dev_dbg(&vk->pdev->dev,
> + "Send kill signal to pid %d\n",
> + task_pid_nr(ctx->ppid));
> + kill_pid(task_pid(ctx->ppid), SIGKILL, 1);
> + }
> + }
> + }
> + spin_unlock(&vk->ctx_lock);
> +}
> +
> +static void bcm_vk_buf_notify(struct bcm_vk *vk, void *bufp,
> + dma_addr_t host_buf_addr, uint32_t buf_size)
> +{
> + /* update the dma address to the card */
> + vkwrite32(vk, host_buf_addr >> 32, BAR_1,
> + VK_BAR1_DMA_BUF_OFF_HI);
> + vkwrite32(vk, (uint32_t)host_buf_addr, BAR_1,
> + VK_BAR1_DMA_BUF_OFF_LO);
> + vkwrite32(vk, buf_size, BAR_1, VK_BAR1_DMA_BUF_SZ);
> +}
> +
> +static int bcm_vk_load_image_by_type(struct bcm_vk *vk, u32 load_type,
> + const char *filename)
> +{
> + struct device *dev = &vk->pdev->dev;
> + const struct firmware *fw;
> + void *bufp;
> + size_t max_buf;
> + int ret;
> + uint64_t offset_codepush;
> + u32 codepush;
> + u32 value;
> + dma_addr_t boot2_dma_addr;
> +
> + if (load_type == VK_IMAGE_TYPE_BOOT1) {
> + /*
> + * After POR, enable VK soft BOOTSRC so bootrom do not clear
> + * the pushed image (the TCM memories).
> + */
> + value = vkread32(vk, BAR_0, BAR_BOOTSRC_SELECT);
> + value |= BOOTSRC_SOFT_ENABLE;
> + vkwrite32(vk, value, BAR_0, BAR_BOOTSRC_SELECT);
> +
> + codepush = CODEPUSH_BOOTSTART + CODEPUSH_BOOT1_ENTRY;
> + offset_codepush = BAR_CODEPUSH_SBL;
> +
> + /* Write a 1 to request SRAM open bit */
> + vkwrite32(vk, CODEPUSH_BOOTSTART, BAR_0, offset_codepush);
> +
> + /* Wait for VK to respond */
> + ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS, SRAM_OPEN,
> + SRAM_OPEN, LOAD_IMAGE_TIMEOUT_MS);
> + if (ret < 0) {
> + dev_err(dev, "boot1 timeout\n");
> + goto err_out;
> + }
> +
> + bufp = vk->bar[BAR_1] + BAR1_CODEPUSH_BASE_BOOT1;
> + max_buf = SZ_256K;
> + } else if (load_type == VK_IMAGE_TYPE_BOOT2) {
> + codepush = CODEPUSH_BOOT2_ENTRY;
> + offset_codepush = BAR_CODEPUSH_SBI;
> +
> + /* Wait for VK to respond */
> + ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS, DDR_OPEN,
> + DDR_OPEN, LOAD_IMAGE_TIMEOUT_MS);
> + if (ret < 0) {
> + dev_err(dev, "boot2 timeout\n");
> + goto err_out;
> + }
> +
> + max_buf = SZ_4M;
> + bufp = dma_alloc_coherent(dev,
> + max_buf,
> + &boot2_dma_addr, GFP_KERNEL);
> + if (!bufp) {
> + dev_err(dev, "Error allocating 0x%zx\n", max_buf);
> + ret = -ENOMEM;
> + goto err_out;
> + }
> +
> + bcm_vk_buf_notify(vk, bufp, boot2_dma_addr, max_buf);
> + } else {
> + dev_err(dev, "Error invalid image type 0x%x\n", load_type);
> + ret = -EINVAL;
> + goto err_out;
> + }
> +
> + ret = request_firmware_into_buf(&fw, filename, dev,
> + bufp, max_buf, 0,
> + KERNEL_PREAD_FLAG_PART);
> + if (ret) {
> + dev_err(dev, "Error %d requesting firmware file: %s\n",
> + ret, filename);
> + goto err_out;
> + }
> + dev_dbg(dev, "size=0x%zx\n", fw->size);
> +
> + dev_dbg(dev, "Signaling 0x%x to 0x%llx\n", codepush, offset_codepush);
> + vkwrite32(vk, codepush, BAR_0, offset_codepush);
> +
> + if (load_type == VK_IMAGE_TYPE_BOOT1) {
> + /* wait until done */
> + ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS,
> + BOOT1_RUNNING,
> + BOOT1_RUNNING,
> + BOOT1_STARTUP_TIMEOUT_MS);
> + if (ret) {
> + dev_err(dev,
> + "Timeout %ld ms waiting for boot1 to come up\n",
> + BOOT1_STARTUP_TIMEOUT_MS);
> + goto err_firmware_out;
> + }
> + } else if (load_type == VK_IMAGE_TYPE_BOOT2) {
> + unsigned long timeout = jiffies + msecs_to_jiffies(
> + LOAD_IMAGE_TIMEOUT_MS);
> +
> + /* To send more data to VK than max_buf allowed at a time */
> + do {
> + /*
> + * Check for ack from card. when Ack is received,
> + * it means all the data is received by card.
> + * Exit the loop after ack is received.
> + */
> + ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS,
> + FW_LOADER_ACK_RCVD_ALL_DATA,
> + FW_LOADER_ACK_RCVD_ALL_DATA,
> + TXFR_COMPLETE_TIMEOUT_MS);
> + if (ret == 0) {
> + dev_info(dev, "Exit boot2 download\n");
> + break;
> + }
> +
> + /* exit the loop, if there is no response from card */
> + if (time_after(jiffies, timeout)) {
> + dev_err(dev, "Error. No reply from card\n");
> + ret = -ETIMEDOUT;
> + goto err_firmware_out;
> + }
> +
> + /* Wait for VK to open BAR space to copy new data */
> + ret = bcm_vk_wait(vk, BAR_0, offset_codepush,
> + codepush, 0,
> + TXFR_COMPLETE_TIMEOUT_MS);
> + if (ret == 0) {
> + ret = request_firmware_into_buf(
> + &fw,
> + filename,
> + dev, bufp,
> + max_buf,
> + fw->size,
> + KERNEL_PREAD_FLAG_PART);
> + if (ret) {
> + dev_err(dev, "Error %d requesting firmware file: %s offset: 0x%zx\n",
> + ret, filename,
> + fw->size);
> + goto err_firmware_out;
> + }
> + dev_dbg(dev, "size=0x%zx\n", fw->size);
> + dev_dbg(dev, "Signaling 0x%x to 0x%llx\n",
> + codepush, offset_codepush);
> + vkwrite32(vk, codepush, BAR_0, offset_codepush);
> + /* reload timeout after every codepush */
> + timeout = jiffies + msecs_to_jiffies(
> + LOAD_IMAGE_TIMEOUT_MS);
> + }
> + } while (1);
> +
> + /* wait for fw status bits to indicate app ready */
> + ret = bcm_vk_wait(vk, BAR_0, VK_BAR_FWSTS,
> + VK_FWSTS_READY,
> + VK_FWSTS_READY,
> + BOOT2_STARTUP_TIMEOUT_MS);
> + if (ret < 0) {
> + dev_err(dev, "Boot2 not ready - timeout\n");
> + goto err_firmware_out;
> + }
> +
> + /*
> + * Next, initialize Message Q if we are loading boot2.
> + * Do a force sync
> + */
> + ret = bcm_vk_sync_msgq(vk, true);
> + if (ret) {
> + dev_err(dev, "Boot2 Error reading comm msg Q info\n");
> + ret = -EIO;
> + goto err_firmware_out;
> + }
> +
> + /* sync & channel other info */
> + ret = bcm_vk_sync_card_info(vk);
> + if (ret) {
> + dev_err(dev, "Syncing Card Info failure\n");
> + goto err_firmware_out;
> + }
> + }
> +
> +err_firmware_out:
> + release_firmware(fw);
> +
> +err_out:
> + if (load_type == VK_IMAGE_TYPE_BOOT2)
> + dma_free_coherent(dev, max_buf, bufp, boot2_dma_addr);
bufp is uninitialized. Never use the label name "err" or "out" because
it's always wrong. This is just a combination of the two.
> +
> + return ret;
> +}
> +
> +static u32 bcm_vk_next_boot_image(struct bcm_vk *vk)
> +{
> + uint32_t boot_status;
> + uint32_t fw_status;
> + u32 load_type = 0; /* default for unknown */
> +
> + boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
> + fw_status = vkread32(vk, BAR_0, VK_BAR_FWSTS);
> +
> + if (!BCM_VK_INTF_IS_DOWN(boot_status) && (boot_status & SRAM_OPEN))
> + load_type = VK_IMAGE_TYPE_BOOT1;
> + else if (boot_status == BOOT1_RUNNING)
> + load_type = VK_IMAGE_TYPE_BOOT2;
> +
> + /*
> + * TO_FIX: For now, like to know what value we get everytime
> + * for debugging.
> + */
> + dev_info(&vk->pdev->dev,
> + "boot-status value for next image: 0x%x : fw-status 0x%x\n",
> + boot_status, fw_status);
> +
> + return load_type;
> +}
> +
> +int bcm_vk_auto_load_all_images(struct bcm_vk *vk)
> +{
> + int i, id, ret = -1;
> + struct device *dev = &vk->pdev->dev;
> + struct pci_dev *pdev = to_pci_dev(dev);
> + uint32_t curr_type;
> + const char *curr_name;
> +
> + switch (pdev->device) {
> + case PCI_DEVICE_ID_VALKYRIE:
> + id = VALKYRIE;
> + break;
> +
> + case PCI_DEVICE_ID_VIPER:
> + id = VIPER;
> + break;
> +
> + default:
> + dev_err(dev, "no images for 0x%x\n", pdev->device);
> + goto bcm_vk_auto_load_all_exit;
> + }
> +
> + /* log a message to know the relative loading order */
> + dev_info(dev, "Load All for device %d\n", vk->misc_devid);
> +
> + for (i = 0; i < NUM_BOOT_STAGES; i++) {
> + curr_type = image_tab[id][i].image_type;
> + if (bcm_vk_next_boot_image(vk) == curr_type) {
> + curr_name = image_tab[id][i].image_name;
> + ret = bcm_vk_load_image_by_type(vk, curr_type,
> + curr_name);
> + dev_info(dev, "Auto load %s, ret %d\n",
> + curr_name, ret);
> +
> + if (ret) {
> + dev_err(dev, "Error loading default %s\n",
> + curr_name);
> + goto bcm_vk_auto_load_all_exit;
> + }
> + }
> + }
> +
> +bcm_vk_auto_load_all_exit:
> + return ret;
> +}
> +
> +static int bcm_vk_trigger_autoload(struct bcm_vk *vk)
> +{
> + if (test_and_set_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload) != 0)
> + return -EPERM;
> +
> + set_bit(BCM_VK_WQ_DWNLD_AUTO, vk->wq_offload);
> + queue_work(vk->wq_thread, &vk->wq_work);
> +
> + return 0;
> +}
> +
> +static long bcm_vk_load_image(struct bcm_vk *vk, struct vk_image *arg)
> +{
> + int ret;
> + struct device *dev = &vk->pdev->dev;
> + struct vk_image image;
> + u32 next_loadable;
> +
> + if (copy_from_user(&image, arg, sizeof(image))) {
> + ret = -EACCES;
> + goto bcm_vk_load_image_exit;
> + }
> +
> + /* first check if fw is at the right state for the download */
What does this comment mean? Just delete it. The code is clear on
its own.
> + next_loadable = bcm_vk_next_boot_image(vk);
> + if (next_loadable != image.type) {
> + dev_err(dev, "Next expected image %u, Loading %u\n",
> + next_loadable, image.type);
> + ret = -EPERM;
> + goto bcm_vk_load_image_exit;
> + }
> +
> + /*
> + * if something is pending download already. This could only happen
> + * for now when the driver is being loaded, or if someone has issued
> + * another download command in another shell.
> + */
> + if (test_and_set_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload) != 0) {
> + dev_err(dev, "Download operation already pending.\n");
> + return -EPERM;
Woohoo for the direct return! Just make this function use direct
returns everywhere and delete all the do nothing returns.
> + }
> +
> + ret = bcm_vk_load_image_by_type(vk, image.type, image.filename);
> + clear_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload);
> +
> +bcm_vk_load_image_exit:
> + return ret;
> +}
> +
> +static long bcm_vk_access_bar(struct bcm_vk *vk, struct vk_access *arg)
> +{
> + struct device *dev = &vk->pdev->dev;
> + struct vk_access access;
> + long ret = -EINVAL;
> + u32 value;
> + long i;
> + long num;
> +
> + if (copy_from_user(&access, arg, sizeof(struct vk_access))) {
> + ret = -EACCES;
return -EFAULT;
> + goto err_out;
> + }
> +
> + /* do some range checking in the barno and offset */
> + if (access.barno >= MAX_BAR) {
> + dev_err(dev, "invalid bar no %d\n", access.barno);
> + goto err_out;
> + } else if ((access.offset + access.len) >
> + pci_resource_len(vk->pdev, access.barno * 2)) {
There is an arithmatic overlfow. Also len should be a multiple of
sizeof(u32).
} else if (access.len % sizeof(u32) ||
access.offset + access.len < access.offset ||
access.offset + access.len > pci_resource_len(vk->pdev,
access.barno * 2)) {
> + dev_err(dev, "invalid bar offset 0x%llx, len 0x%x\n",
> + access.offset, access.len);
> + goto err_out;
> + }
> +
> + if (access.type == VK_ACCESS_READ) {
> + dev_dbg(dev, "read barno:%d offset:0x%llx len:0x%x\n",
> + access.barno, access.offset, access.len);
> + num = access.len / sizeof(u32);
> + for (i = 0; i < num; i++) {
> + value = vkread32(vk, access.barno,
> + access.offset + (i * sizeof(u32)));
> + ret = put_user(value, access.data + i);
> + if (ret)
> + goto err_out;
> +
> + dev_dbg(dev, "0x%x\n", value);
> + }
> + } else if (access.type == VK_ACCESS_WRITE) {
> + dev_dbg(dev, "write barno:%d offset:0x%llx len:0x%x\n",
> + access.barno, access.offset, access.len);
> + num = access.len / sizeof(u32);
> + for (i = 0; i < num; i++) {
> + ret = get_user(value, access.data + i);
> + if (ret)
> + goto err_out;
> +
> + vkwrite32(vk, value, access.barno,
> + access.offset + (i * sizeof(u32)));
> + dev_dbg(dev, "0x%x\n", value);
> + }
> + } else {
> + dev_dbg(dev, "invalid access type %d\n", access.type);
> + }
> +err_out:
> + return ret;
> +}
> +
> +static int bcm_vk_reset_successful(struct bcm_vk *vk)
> +{
> + struct device *dev = &vk->pdev->dev;
> + u32 fw_status, reset_reason;
> + int ret = -EAGAIN;
> +
> + /*
> + * Reset could be triggered when the card in several state:
> + * i) in bootROM
> + * ii) after boot1
> + * iii) boot2 running
> + *
> + * i) & ii) - no status bits will be updated. If vkboot1
> + * runs automatically after reset, it will update the reason
> + * to be unknown reason
> + * iii) - reboot reason match + deinit done.
> + */
> + fw_status = vkread32(vk, BAR_0, VK_BAR_FWSTS);
> + /* immediate exit if interface goes down */
> + if (BCM_VK_INTF_IS_DOWN(fw_status)) {
> + dev_err(dev, "PCIe Intf Down!\n");
> + goto bcm_vk_reset_exit;
> + }
> +
> + /* initial check on reset reason */
This comment doesn't add anything.
> + reset_reason = (fw_status & VK_FWSTS_RESET_REASON_MASK);
> + if ((reset_reason == VK_FWSTS_RESET_MBOX_DB)
> + || (reset_reason == VK_FWSTS_RESET_UNKNOWN))
> + ret = 0;
Please use checkpatch.pl --strict.
if ((reset_reason == VK_FWSTS_RESET_MBOX_DB) ||
(reset_reason == VK_FWSTS_RESET_UNKNOWN))
ret = 0;
> +
> + /*
> + * if some of the deinit bits are set, but done
> + * bit is not, this is a failure if triggered while boot2 is running
> + */
> + if ((fw_status & VK_FWSTS_DEINIT_TRIGGERED)
> + && !(fw_status & VK_FWSTS_RESET_DONE))
> + ret = -EAGAIN;
> +
> +bcm_vk_reset_exit:
> + dev_dbg(dev, "FW status = 0x%x ret %d\n", fw_status, ret);
> +
> + return ret;
> +}
> +
> +static long bcm_vk_reset(struct bcm_vk *vk, struct vk_reset *arg)
> +{
> + struct device *dev = &vk->pdev->dev;
> + struct vk_reset reset;
> + int ret = 0;
> +
> + if (copy_from_user(&reset, arg, sizeof(struct vk_reset))) {
> + ret = -EACCES;
return -EFAULT;
> + goto err_out;
> + }
> + dev_info(dev, "Issue Reset\n");
> +
> + /*
> + * The following is the sequence of reset:
> + * - send card level graceful shut down
> + * - wait enough time for VK to handle its business, stopping DMA etc
> + * - kill host apps
> + * - Trigger interrupt with DB
> + */
> + bcm_vk_send_shutdown_msg(vk, VK_SHUTDOWN_GRACEFUL, 0);
> +
> + spin_lock(&vk->ctx_lock);
> + if (!vk->reset_ppid) {
> + vk->reset_ppid = current;
> + } else {
> + dev_err(dev, "Reset already launched by process pid %d\n",
> + task_pid_nr(vk->reset_ppid));
> + ret = -EACCES;
> + }
> + spin_unlock(&vk->ctx_lock);
> + if (ret)
> + goto err_out;
> +
> + bcm_vk_blk_drv_access(vk);
> + bcm_vk_trigger_reset(vk);
> +
> + /*
> + * Wait enough time for card os to deinit
> + * and populate the reset reason.
> + */
> + msleep(BCM_VK_DEINIT_TIME_MS);
> +
> + ret = bcm_vk_reset_successful(vk);
> +
> +err_out:
> + return ret;
> +}
> +
> +static int bcm_vk_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + struct bcm_vk_ctx *ctx = file->private_data;
> + struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
> + unsigned long pg_size;
> +
> + /* only BAR2 is mmap possible, which is bar num 4 due to 64bit */
> +#define VK_MMAPABLE_BAR 4
> +
> + pg_size = ((pci_resource_len(vk->pdev, VK_MMAPABLE_BAR) - 1)
> + >> PAGE_SHIFT) + 1;
> + if (vma->vm_pgoff + vma_pages(vma) > pg_size)
> + return -EINVAL;
> +
> + vma->vm_pgoff += (pci_resource_start(vk->pdev, VK_MMAPABLE_BAR)
> + >> PAGE_SHIFT);
> + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> +
> + return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
> + vma->vm_end - vma->vm_start,
> + vma->vm_page_prot);
> +}
> +
> +static long bcm_vk_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> +{
> + long ret = -EINVAL;
> + struct bcm_vk_ctx *ctx = file->private_data;
> + struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
> + void __user *argp = (void __user *)arg;
> +
> + dev_dbg(&vk->pdev->dev,
> + "ioctl, cmd=0x%02x, arg=0x%02lx\n",
> + cmd, arg);
> +
> + mutex_lock(&vk->mutex);
> +
> + switch (cmd) {
> + case VK_IOCTL_LOAD_IMAGE:
> + ret = bcm_vk_load_image(vk, argp);
> + break;
> +
> + case VK_IOCTL_ACCESS_BAR:
> + ret = bcm_vk_access_bar(vk, argp);
> + break;
> +
> + case VK_IOCTL_RESET:
> + ret = bcm_vk_reset(vk, argp);
> + break;
> +
> + default:
> + break;
> + }
> +
> + mutex_unlock(&vk->mutex);
> +
> + return ret;
> +}
> +
> +static const struct file_operations bcm_vk_fops = {
> + .owner = THIS_MODULE,
> + .open = bcm_vk_open,
> + .read = bcm_vk_read,
> + .write = bcm_vk_write,
> + .release = bcm_vk_release,
> + .mmap = bcm_vk_mmap,
> + .unlocked_ioctl = bcm_vk_ioctl,
> +};
> +
> +static int bcm_vk_on_panic(struct notifier_block *nb,
> + unsigned long e, void *p)
> +{
> + struct bcm_vk *vk = container_of(nb, struct bcm_vk, panic_nb);
> +
> + bcm_h2vk_doorbell(vk, VK_BAR0_RESET_DB_NUM, VK_BAR0_RESET_DB_HARD);
> +
> + return 0;
> +}
> +
> +static int bcm_vk_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> +{
> + int err;
> + int i;
> + int id;
> + int irq;
> + char name[20];
> + struct bcm_vk *vk;
> + struct device *dev = &pdev->dev;
> + struct miscdevice *misc_device;
> + uint32_t boot_status;
> +
> + /* allocate vk structure which is tied to kref for freeing */
> + vk = kzalloc(sizeof(*vk), GFP_KERNEL);
> + if (!vk)
> + return -ENOMEM;
> +
> + kref_init(&vk->kref);
> + if (nr_ib_sgl_blk > BCM_VK_IB_SGL_BLK_MAX) {
> + dev_warn(dev, "Inband SGL blk %d limited to max %d\n",
> + nr_ib_sgl_blk, BCM_VK_IB_SGL_BLK_MAX);
> + nr_ib_sgl_blk = BCM_VK_IB_SGL_BLK_MAX;
> + }
> + vk->ib_sgl_size = nr_ib_sgl_blk * VK_MSGQ_BLK_SIZE;
> + vk->pdev = pdev;
> + mutex_init(&vk->mutex);
> +
> + err = pci_enable_device(pdev);
> + if (err) {
> + dev_err(dev, "Cannot enable PCI device\n");
> + return err;
> + }
> +
> + err = pci_request_regions(pdev, DRV_MODULE_NAME);
> + if (err) {
> + dev_err(dev, "Cannot obtain PCI resources\n");
> + goto err_disable_pdev;
> + }
> +
> + /* make sure DMA is good */
> + err = dma_set_mask_and_coherent(&pdev->dev,
> + DMA_BIT_MASK(BCM_VK_DMA_BITS));
> + if (err) {
> + dev_err(dev, "failed to set DMA mask\n");
> + goto err_disable_pdev;
> + }
> +
> + /* The tdma is a scratch area for some DMA testings. */
> + if (nr_scratch_pages) {
> + vk->tdma_vaddr = dma_alloc_coherent(dev,
> + nr_scratch_pages * PAGE_SIZE,
> + &vk->tdma_addr, GFP_KERNEL);
> + if (!vk->tdma_vaddr) {
> + err = -ENOMEM;
> + goto err_disable_pdev;
> + }
> + }
> +
> + pci_set_master(pdev);
> + pci_set_drvdata(pdev, vk);
> +
> + irq = pci_alloc_irq_vectors(pdev,
> + 1,
> + VK_MSIX_IRQ_MAX,
> + PCI_IRQ_MSI | PCI_IRQ_MSIX);
> +
> + if (irq < VK_MSIX_IRQ_MAX) {
> + dev_err(dev, "failed to get %d MSIX interrupts, ret(%d)\n",
> + VK_MSIX_IRQ_MAX, irq);
> + err = (irq >= 0) ? -EINVAL : irq;
> + goto err_disable_pdev;
> + }
> +
> + dev_info(dev, "Number of IRQs %d allocated.\n", irq);
> +
> + for (i = 0; i < MAX_BAR; i++) {
> + /* multiple by 2 for 64 bit BAR mapping */
> + vk->bar[i] = pci_ioremap_bar(pdev, i * 2);
> + if (!vk->bar[i]) {
> + dev_err(dev, "failed to remap BAR%d\n", i);
> + goto err_iounmap;
> + }
> + }
> +
> + for (vk->num_irqs = 0;
> + vk->num_irqs < VK_MSIX_MSGQ_MAX;
> + vk->num_irqs++) {
> + err = devm_request_irq(dev, pci_irq_vector(pdev, vk->num_irqs),
> + bcm_vk_msgq_irqhandler,
> + IRQF_SHARED, DRV_MODULE_NAME, vk);
> + if (err) {
> + dev_err(dev, "failed to request msgq IRQ %d for MSIX %d\n",
> + pdev->irq + vk->num_irqs, vk->num_irqs + 1);
> + goto err_irq;
> + }
> + }
> + /* one irq for notification from VK */
> + err = devm_request_irq(dev, pci_irq_vector(pdev, vk->num_irqs),
> + bcm_vk_notf_irqhandler,
> + IRQF_SHARED, DRV_MODULE_NAME, vk);
> + if (err) {
> + dev_err(dev, "failed to request notf IRQ %d for MSIX %d\n",
> + pdev->irq + vk->num_irqs, vk->num_irqs + 1);
> + goto err_irq;
> + }
> + vk->num_irqs++;
> +
> + id = ida_simple_get(&bcm_vk_ida, 0, 0, GFP_KERNEL);
> + if (id < 0) {
> + err = id;
> + dev_err(dev, "unable to get id\n");
> + goto err_irq;
> + }
> +
> + vk->misc_devid = id;
> + snprintf(name, sizeof(name), DRV_MODULE_NAME ".%d", id);
> + misc_device = &vk->miscdev;
> + misc_device->minor = MISC_DYNAMIC_MINOR;
> + misc_device->name = kstrdup(name, GFP_KERNEL);
> + if (!misc_device->name) {
> + err = -ENOMEM;
> + goto err_ida_remove;
> + }
> + misc_device->fops = &bcm_vk_fops,
> +
> + err = misc_register(misc_device);
> + if (err) {
> + dev_err(dev, "failed to register device\n");
> + goto err_kfree_name;
> + }
> +
> + err = bcm_vk_msg_init(vk);
> + if (err) {
> + dev_err(dev, "failed to init msg queue info\n");
> + goto err_misc_deregister;
> + }
> +
> + /* sync other info */
> + bcm_vk_sync_card_info(vk);
> +
> + /* register for panic notifier */
> + vk->panic_nb.notifier_call = bcm_vk_on_panic;
> + atomic_notifier_chain_register(&panic_notifier_list,
> + &vk->panic_nb);
> +
> + snprintf(name, sizeof(name), KBUILD_MODNAME ".%d_ttyVK", id);
> + err = bcm_vk_tty_init(vk, name);
> + if (err)
> + goto err_unregister_panic_notifier;
> +
> + /*
> + * lets trigger an auto download. We don't want to do it serially here
> + * because at probing time, it is not supposed to block for a long time.
> + */
> + boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
> + if (auto_load) {
> + if ((boot_status & BOOT_STATE_MASK) == BROM_RUNNING) {
> + if (bcm_vk_trigger_autoload(vk))
> + goto err_bcm_vk_tty_exit;
> + } else {
> + dev_info(dev,
> + "Auto-load skipped - BROM not in proper state (0x%x)\n",
> + boot_status);
> + }
> + }
> +
> + /* enable hb */
> + bcm_vk_hb_init(vk);
> +
> + dev_info(dev, "BCM-VK:%u created, 0x%p\n", id, vk);
> +
> + return 0;
> +
> +err_bcm_vk_tty_exit:
> + bcm_vk_tty_exit(vk);
> +
> +err_unregister_panic_notifier:
> + atomic_notifier_chain_unregister(&panic_notifier_list,
> + &vk->panic_nb);
> +
> +err_misc_deregister:
> + misc_deregister(misc_device);
> +
> +err_kfree_name:
> + kfree(misc_device->name);
> + misc_device->name = NULL;
> +
> +err_ida_remove:
> + ida_simple_remove(&bcm_vk_ida, id);
> +
> +err_irq:
> + for (i = 0; i < vk->num_irqs; i++)
> + devm_free_irq(dev, pci_irq_vector(pdev, i), vk);
> +
> + pci_disable_msix(pdev);
> + pci_disable_msi(pdev);
> +
> +err_iounmap:
> + for (i = 0; i < MAX_BAR; i++) {
> + if (vk->bar[i])
> + pci_iounmap(pdev, vk->bar[i]);
> + }
> + pci_release_regions(pdev);
> +
> +err_disable_pdev:
> + pci_disable_device(pdev);
> +
> + return err;
> +}
> +
> +void bcm_vk_release_data(struct kref *kref)
> +{
> + struct bcm_vk *vk = container_of(kref, struct bcm_vk, kref);
> +
> + /* use raw print, as dev is gone */
> + pr_info("BCM-VK:%d release data 0x%p\n", vk->misc_devid, vk);
> + kfree(vk);
> +}
> +
> +static void bcm_vk_remove(struct pci_dev *pdev)
> +{
> + int i;
> + struct bcm_vk *vk = pci_get_drvdata(pdev);
> + struct miscdevice *misc_device = &vk->miscdev;
> +
> + bcm_vk_hb_deinit(vk);
> + bcm_vk_tty_exit(vk);
> +
> + /* unregister panic notifier */
> + atomic_notifier_chain_unregister(&panic_notifier_list,
> + &vk->panic_nb);
> +
> + bcm_vk_msg_remove(vk);
> +
> + if (vk->tdma_vaddr)
> + dma_free_coherent(&pdev->dev, nr_scratch_pages * PAGE_SIZE,
> + vk->tdma_vaddr, vk->tdma_addr);
> +
> + /* remove if name is set which means misc dev registered */
> + if (misc_device->name) {
> + misc_deregister(misc_device);
> + kfree(misc_device->name);
> + ida_simple_remove(&bcm_vk_ida, vk->misc_devid);
> + }
> + for (i = 0; i < vk->num_irqs; i++)
> + devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), vk);
> +
> + pci_disable_msix(pdev);
> + pci_disable_msi(pdev);
> +
> + cancel_work_sync(&vk->wq_work);
> + destroy_workqueue(vk->wq_thread);
> +
> + for (i = 0; i < MAX_BAR; i++) {
> + if (vk->bar[i])
> + pci_iounmap(pdev, vk->bar[i]);
> + }
> +
> + dev_info(&pdev->dev, "BCM-VK:%d released\n", vk->misc_devid);
> +
> + pci_release_regions(pdev);
> + pci_disable_device(pdev);
> +
> + kref_put(&vk->kref, bcm_vk_release_data);
> +}
> +
> +static const struct pci_device_id bcm_vk_ids[] = {
> + { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_VALKYRIE), },
> + { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_VIPER), },
> + { }
> +};
> +MODULE_DEVICE_TABLE(pci, bcm_vk_ids);
> +
> +static struct pci_driver pci_driver = {
> + .name = DRV_MODULE_NAME,
> + .id_table = bcm_vk_ids,
> + .probe = bcm_vk_probe,
> + .remove = bcm_vk_remove,
> +};
> +module_pci_driver(pci_driver);
> +
> +MODULE_DESCRIPTION("Broadcom VK Host Driver");
> +MODULE_AUTHOR("Scott Branden <scott.branden@broadcom.com>");
> +MODULE_LICENSE("GPL v2");
> +MODULE_VERSION("1.0");
> diff --git a/drivers/misc/bcm-vk/bcm_vk_msg.c b/drivers/misc/bcm-vk/bcm_vk_msg.c
> new file mode 100644
> index 000000000000..651fba2149e2
> --- /dev/null
> +++ b/drivers/misc/bcm-vk/bcm_vk_msg.c
> @@ -0,0 +1,1359 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright 2018-2020 Broadcom.
> + */
> +
> +#include <linux/delay.h>
> +#include <linux/fs.h>
> +#include <linux/hash.h>
> +#include <linux/interrupt.h>
> +#include <linux/list.h>
> +#include <linux/sizes.h>
> +#include <linux/spinlock.h>
> +#include <linux/timer.h>
> +
> +#include "bcm_vk.h"
> +#include "bcm_vk_msg.h"
> +#include "bcm_vk_sg.h"
> +
> +#if defined(CONFIG_BCM_VK_H2VK_VERIFY_AND_RETRY)
> +/*
> + * Turn on the following to verify the data passed down to VK is good, and
> + * if not, do retry. This is a debug/workaround on FPGA PCIe timing issues
> + * but may be found useful for debugging other PCIe hardware issues.
> + */
> +static void bcm_vk_h2vk_verify_idx(struct device *dev,
> + const char *tag,
> + volatile uint32_t *idx,
> + uint32_t expected)
> +{
> + unsigned int count = 0;
> +
> + while (*idx != expected) {
> + count++;
> + dev_err(dev, "[%d] %s exp %d idx %d\n",
> + count, tag, expected, *idx);
> +
> + /* write again */
> + *idx = expected;
> + }
> +}
> +
> +static void bcm_vk_h2vk_verify_blk(struct device *dev,
> + const struct vk_msg_blk *src,
> + volatile struct vk_msg_blk *dst)
> +
> +{
> + struct vk_msg_blk rd_bck;
> + unsigned int count = 0;
> +
> + rd_bck = *dst;
> + while (memcmp(&rd_bck, src, sizeof(rd_bck)) != 0) {
> + count++;
> + dev_err(dev,
> + "[%d]Src Blk: [0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x]\n",
> + count,
> + src->function_id,
> + src->size,
> + src->queue_id,
> + src->msg_id,
> + src->context_id,
> + src->args[0],
> + src->args[1]);
> + dev_err(dev,
> + "[%d]Rdb Blk: [0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x]\n",
> + count,
> + rd_bck.function_id,
> + rd_bck.size,
> + rd_bck.queue_id,
> + rd_bck.msg_id,
> + rd_bck.context_id,
> + rd_bck.args[0],
> + rd_bck.args[1]);
> +
> + *dst = *src;
> + rd_bck = *dst;
> + }
> +}
> +#else
> +static void bcm_vk_h2vk_verify_idx(struct device __always_unused *dev,
> + const char __always_unused *tag,
> + volatile uint32_t __always_unused *idx,
> + uint32_t __always_unused expected)
> +{
> +}
> +
> +static void bcm_vk_h2vk_verify_blk
> + (struct device __always_unused *dev,
> + const struct vk_msg_blk __always_unused *src,
> + volatile struct vk_msg_blk __always_unused *dst)
> +
> +{
> +}
> +#endif
> +
> +#if defined(CONFIG_BCM_VK_QSTATS)
> +
> +/* Use default value of 20000 rd/wr per update */
> +#if !defined(BCM_VK_QSTATS_ACC_CNT)
> +#define BCM_VK_QSTATS_ACC_CNT 20000
> +#endif
> +
> +void bcm_vk_update_qstats(struct bcm_vk *vk, const char *tag,
> + struct bcm_vk_qstats *qstats, uint32_t occupancy)
> +{
> + struct bcm_vk_qs_cnts *qcnts = &qstats->qcnts;
> +
> + if (occupancy > qcnts->max_occ) {
> + qcnts->max_occ = occupancy;
> + if (occupancy > qcnts->max_abs)
> + qcnts->max_abs = occupancy;
> + }
> +
> + qcnts->acc_sum += occupancy;
> + if (++qcnts->cnt >= BCM_VK_QSTATS_ACC_CNT) {
> + /* log average and clear counters */
> + dev_info(&vk->pdev->dev,
> + "%s[%d]: Max: [%3d/%3d] Acc %d num %d, Aver %d\n",
> + tag, qstats->q_num,
> + qcnts->max_occ, qcnts->max_abs,
> + qcnts->acc_sum,
> + qcnts->cnt,
> + qcnts->acc_sum / qcnts->cnt);
> +
> + qcnts->cnt = 0;
> + qcnts->max_occ = 0;
> + qcnts->acc_sum = 0;
> + }
> +}
> +#endif
> +
> +/* number of retries when enqueue message fails before returning EAGAIN */
> +#define BCM_VK_H2VK_ENQ_RETRY 10
> +#define BCM_VK_H2VK_ENQ_RETRY_DELAY_MS 50
> +
> +bool bcm_vk_drv_access_ok(struct bcm_vk *vk)
> +{
> + return (!!atomic_read(&vk->msgq_inited));
> +}
> +
> +static void bcm_vk_set_host_alert(struct bcm_vk *vk, uint32_t bit_mask)
> +{
> + struct bcm_vk_alert *alert = &vk->host_alert;
> + unsigned long flags;
> +
> + /* use irqsave version as this maybe called inside timer interrupt */
> + spin_lock_irqsave(&vk->host_alert_lock, flags);
> + alert->notfs |= bit_mask;
> + spin_unlock_irqrestore(&vk->host_alert_lock, flags);
> +
> + if (test_and_set_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload) == 0)
> + queue_work(vk->wq_thread, &vk->wq_work);
> +}
> +
> +/*
> + * Heartbeat related defines
> + * The heartbeat from host is a last resort. If stuck condition happens
> + * on the card, firmware is supposed to detect it. Therefore, the heartbeat
> + * values used will be more relaxed on the driver, which need to be bigger
> + * than the watchdog timeout on the card.
> + */
> +#define BCM_VK_HB_TIMER_S 3
> +#define BCM_VK_HB_TIMER_VALUE (BCM_VK_HB_TIMER_S * HZ)
> +#define BCM_VK_HB_LOST_MAX 4
> +
> +static void bcm_vk_hb_poll(struct timer_list *t)
> +{
> + uint32_t uptime_s;
> + struct bcm_vk_hb_ctrl *hb = container_of(t, struct bcm_vk_hb_ctrl,
> + timer);
> + struct bcm_vk *vk = container_of(hb, struct bcm_vk, hb_ctrl);
> +
> + if (bcm_vk_drv_access_ok(vk)) {
> + /* read uptime from register and compare */
> + uptime_s = vkread32(vk, BAR_0, BAR_OS_UPTIME);
> +
> + if (uptime_s == hb->last_uptime)
> + hb->lost_cnt++;
> +
> + dev_dbg(&vk->pdev->dev, "Last uptime %d current %d, lost %d\n",
> + hb->last_uptime, uptime_s, hb->lost_cnt);
> +
> + /*
> + * if the interface goes down without any activity, a value
> + * of 0xFFFFFFFF will be continuously read, and the detection
> + * will be happened eventually.
> + */
> + hb->last_uptime = uptime_s;
> + } else {
> + /* reset heart beat lost cnt */
> + hb->lost_cnt = 0;
> + }
> +
> + /* next, check if heartbeat exceeds limit */
> + if (hb->lost_cnt > BCM_VK_HB_LOST_MAX) {
> + dev_err(&vk->pdev->dev, "Heartbeat Misses %d times, %d s!\n",
> + BCM_VK_HB_LOST_MAX,
> + BCM_VK_HB_LOST_MAX * BCM_VK_HB_TIMER_S);
> +
> + bcm_vk_blk_drv_access(vk);
> + bcm_vk_set_host_alert(vk, ERR_LOG_HOST_HB_FAIL);
> + }
> + /* re-arm timer */
> + mod_timer(&hb->timer, jiffies + BCM_VK_HB_TIMER_VALUE);
> +}
> +
> +void bcm_vk_hb_init(struct bcm_vk *vk)
> +{
> + struct bcm_vk_hb_ctrl *hb = &vk->hb_ctrl;
> +
> + timer_setup(&hb->timer, bcm_vk_hb_poll, 0);
> + mod_timer(&hb->timer, jiffies + BCM_VK_HB_TIMER_VALUE);
> +}
> +
> +void bcm_vk_hb_deinit(struct bcm_vk *vk)
> +{
> + struct bcm_vk_hb_ctrl *hb = &vk->hb_ctrl;
> +
> + del_timer(&hb->timer);
> +}
> +
> +static void bcm_vk_msgid_bitmap_clear(struct bcm_vk *vk,
> + unsigned int start,
> + unsigned int nbits)
> +{
> + spin_lock(&vk->msg_id_lock);
> + bitmap_clear(vk->bmap, start, nbits);
> + spin_unlock(&vk->msg_id_lock);
> +}
> +
> +/*
> + * allocate a ctx per file struct
> + */
> +static struct bcm_vk_ctx *bcm_vk_get_ctx(struct bcm_vk *vk,
> + struct task_struct *ppid)
> +{
> + uint32_t i;
> + struct bcm_vk_ctx *ctx = NULL;
> + const pid_t pid = task_pid_nr(ppid);
> + uint32_t hash_idx = hash_32(pid, VK_PID_HT_SHIFT_BIT);
> +
> + spin_lock(&vk->ctx_lock);
> +
> + /* check if it is in reset, if so, don't allow */
> + if (vk->reset_ppid) {
> + dev_err(&vk->pdev->dev,
> + "No context allowed during reset by pid %d\n",
> + task_pid_nr(vk->reset_ppid));
> +
> + goto in_reset_exit;
> + }
> +
> + for (i = 0; i < ARRAY_SIZE(vk->ctx); i++) {
> + if (!vk->ctx[i].in_use) {
> + vk->ctx[i].in_use = true;
> + ctx = &vk->ctx[i];
> + break;
> + }
> + }
> +
> + if (!ctx) {
> + dev_err(&vk->pdev->dev, "All context in use\n");
> +
> + goto all_in_use_exit;
> + }
> +
> + /* set the pid and insert it to hash table */
> + ctx->ppid = ppid;
> + ctx->hash_idx = hash_idx;
> + list_add_tail(&ctx->node, &vk->pid_ht[hash_idx].head);
> +
> + /* increase kref */
> + kref_get(&vk->kref);
> +
> + /* clear counter */
> + ctx->pend_cnt = 0;
> +all_in_use_exit:
> +in_reset_exit:
> + spin_unlock(&vk->ctx_lock);
> +
> + return ctx;
> +}
> +
> +static uint16_t bcm_vk_get_msg_id(struct bcm_vk *vk)
> +{
> + uint16_t rc = VK_MSG_ID_OVERFLOW;
> + uint16_t test_bit_count = 0;
> +
> + spin_lock(&vk->msg_id_lock);
> + while (test_bit_count < VK_MSG_ID_BITMAP_SIZE) {
> + vk->msg_id++;
> + vk->msg_id = vk->msg_id & VK_MSG_ID_BITMAP_MASK;
> + if (test_bit(vk->msg_id, vk->bmap)) {
> + test_bit_count++;
> + continue;
> + }
> + rc = vk->msg_id;
> + bitmap_set(vk->bmap, vk->msg_id, 1);
> + break;
> + }
> + spin_unlock(&vk->msg_id_lock);
> +
> + return rc;
> +}
> +
> +static int bcm_vk_free_ctx(struct bcm_vk *vk, struct bcm_vk_ctx *ctx)
> +{
> + uint32_t idx;
> + uint32_t hash_idx;
> + pid_t pid;
> + struct bcm_vk_ctx *entry;
> + int count = 0;
> +
> + if (ctx == NULL) {
> + dev_err(&vk->pdev->dev, "NULL context detected\n");
> + return -EINVAL;
> + }
> + idx = ctx->idx;
> + pid = task_pid_nr(ctx->ppid);
> +
> + spin_lock(&vk->ctx_lock);
> +
> + if (!vk->ctx[idx].in_use) {
> + dev_err(&vk->pdev->dev, "context[%d] not in use!\n", idx);
> + } else {
> + vk->ctx[idx].in_use = false;
> + vk->ctx[idx].miscdev = NULL;
> +
> + /* Remove it from hash list and see if it is the last one. */
> + list_del(&ctx->node);
> + hash_idx = ctx->hash_idx;
> + list_for_each_entry(entry, &vk->pid_ht[hash_idx].head, node) {
> + if (task_pid_nr(entry->ppid) == pid)
> + count++;
> + }
> + }
> +
> + spin_unlock(&vk->ctx_lock);
> +
> + return count;
> +}
> +
> +static void bcm_vk_free_wkent(struct device *dev, struct bcm_vk_wkent *entry)
> +{
> + bcm_vk_sg_free(dev, entry->dma, VK_DMA_MAX_ADDRS);
> +
> + kfree(entry->vk2h_msg);
> + kfree(entry);
> +}
> +
> +static void bcm_vk_drain_all_pend(struct device *dev,
> + struct bcm_vk_msg_chan *chan,
> + struct bcm_vk_ctx *ctx)
> +{
> + uint32_t num;
> + struct bcm_vk_wkent *entry, *tmp;
> + struct bcm_vk *vk;
> + struct list_head del_q;
> +
> + if (ctx)
> + vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
> +
> + INIT_LIST_HEAD(&del_q);
> + spin_lock(&chan->pendq_lock);
> + for (num = 0; num < chan->q_nr; num++) {
> + list_for_each_entry_safe(entry, tmp, &chan->pendq[num], node) {
> + if (ctx == NULL) {
> + list_del(&entry->node);
> + list_add_tail(&entry->node, &del_q);
> + } else if (entry->ctx->idx == ctx->idx) {
> + struct vk_msg_blk *msg;
> + int bit_set;
> + bool responded;
> +
> + /* if it is specific ctx, log for any stuck */
> + msg = entry->h2vk_msg;
> + bit_set = test_bit(msg->msg_id, vk->bmap);
> + responded = entry->vk2h_msg ? true : false;
> + dev_info(dev,
> + "Drained: fid %u size %u msg 0x%x(seq-%x) ctx 0x%x[fd-%d] args:[0x%x 0x%x] resp %s, bmap %d\n",
> + msg->function_id, msg->size,
> + msg->msg_id, entry->seq_num,
> + msg->context_id, entry->ctx->idx,
> + msg->args[0], msg->args[1],
> + responded ? "T" : "F", bit_set);
> + list_del(&entry->node);
> + list_add_tail(&entry->node, &del_q);
> + if (responded)
> + ctx->pend_cnt--;
> + else if (bit_set)
> + bcm_vk_msgid_bitmap_clear(vk,
> + msg->msg_id,
> + 1);
> + }
> + }
> + }
> + spin_unlock(&chan->pendq_lock);
> +
> + /* batch clean up */
> + num = 0;
> + list_for_each_entry_safe(entry, tmp, &del_q, node) {
> + list_del(&entry->node);
> + bcm_vk_free_wkent(dev, entry);
> + num++;
> + }
> + if (num)
> + dev_info(dev, "Total drained items %d\n", num);
> +}
> +
> +bool bcm_vk_msgq_marker_valid(struct bcm_vk *vk)
> +{
> + uint32_t rdy_marker = 0;
> + uint32_t fw_status;
> +
> + fw_status = vkread32(vk, BAR_0, VK_BAR_FWSTS);
> +
> + if ((fw_status & VK_FWSTS_READY) == VK_FWSTS_READY)
> + rdy_marker = vkread32(vk, BAR_1, VK_BAR1_MSGQ_DEF_RDY);
> +
> + return (rdy_marker == VK_BAR1_MSGQ_RDY_MARKER);
> +}
> +
> +/*
> + * Function to sync up the messages queue info that is provided by BAR1
> + */
> +int bcm_vk_sync_msgq(struct bcm_vk *vk, bool force_sync)
> +{
> + struct bcm_vk_msgq *msgq = NULL;
> + struct device *dev = &vk->pdev->dev;
> + uint32_t msgq_off;
> + uint32_t num_q;
> + struct bcm_vk_msg_chan *chan_list[] = {&vk->h2vk_msg_chan,
> + &vk->vk2h_msg_chan};
> + struct bcm_vk_msg_chan *chan = NULL;
> + int i, j;
> + int ret = 0;
> +
> + /*
> + * If the driver is loaded at startup where vk OS is not up yet,
> + * the msgq-info may not be available until a later time. In
> + * this case, we skip and the sync function is supposed to be
> + * called again.
> + */
> + if (!bcm_vk_msgq_marker_valid(vk)) {
> + dev_info(dev, "BAR1 msgq marker not initialized.\n");
> + return ret;
This is "return 0;" which seems wrong.
> + }
> +
> + msgq_off = vkread32(vk, BAR_1, VK_BAR1_MSGQ_CTRL_OFF);
> +
> + /* each side is always half the total */
> + num_q = vk->h2vk_msg_chan.q_nr = vk->vk2h_msg_chan.q_nr =
> + vkread32(vk, BAR_1, VK_BAR1_MSGQ_NR) / 2;
> +
> + /* first msgq location */
> + msgq = (struct bcm_vk_msgq *)(vk->bar[BAR_1] + msgq_off);
> +
> + /*
> + * if this function is called when it is already inited,
> + * something is wrong
> + */
> + if (bcm_vk_drv_access_ok(vk) && (!force_sync)) {
Delete extra parens.
> + dev_err(dev, "Msgq info already in sync\n");
> + ret = -EPERM;
> + goto already_inited;
Please do a direct "return -EPERM;"
> + }
> +
> + for (i = 0; i < ARRAY_SIZE(chan_list); i++) {
> + chan = chan_list[i];
> + memset(chan->sync_qinfo, 0, sizeof(chan->sync_qinfo));
> +
> + for (j = 0; j < num_q; j++) {
> + chan->msgq[j] = msgq;
> +
> + dev_info(dev,
> + "MsgQ[%d] type %d num %d, @ 0x%x, rd_idx %d wr_idx %d, size %d, nxt 0x%x\n",
> + j,
> + chan->msgq[j]->type,
> + chan->msgq[j]->num,
> + chan->msgq[j]->start,
> + chan->msgq[j]->rd_idx,
> + chan->msgq[j]->wr_idx,
> + chan->msgq[j]->size,
> + chan->msgq[j]->nxt);
It would be shorter to replace chan->msgq[j]->type with msgq->type.
> +
> + /* formulate and record static info */
> + chan->sync_qinfo[j].q_start =
> + vk->bar[BAR_1] + chan->msgq[j]->start;
Maybe set "qinfo = &chan->sync_qinfo[j];" at the start of the loop so
that these fit on one line.
qinfo->q_start = vk->bar[BAR_1] + msgq->start;
> + chan->sync_qinfo[j].q_size = chan->msgq[j]->size;
> + /* set low threshold as 50% or 1/2 */
> + chan->sync_qinfo[j].q_low =
> + chan->sync_qinfo[j].q_size >> 1;
> + chan->sync_qinfo[j].q_mask =
> + chan->sync_qinfo[j].q_size - 1;
> +
> + msgq = (struct bcm_vk_msgq *)
> + ((char *)msgq + sizeof(*msgq) + msgq->nxt);
> +
> + rmb(); /* do a read mb to guarantee */
> + }
> + }
> + atomic_set(&vk->msgq_inited, 1);
> +
> +already_inited:
> + return ret;
> +}
> +
> +static int bcm_vk_msg_chan_init(struct bcm_vk_msg_chan *chan)
> +{
> + int rc = 0;
> + uint32_t i;
> +
> + mutex_init(&chan->msgq_mutex);
> + spin_lock_init(&chan->pendq_lock);
> + for (i = 0; i < VK_MSGQ_MAX_NR; i++) {
> + INIT_LIST_HEAD(&chan->pendq[i]);
> +#if defined(CONFIG_BCM_VK_QSTATS)
> + chan->qstats[i].q_num = i;
> +#endif
> + }
> +
> + return rc;
return 0;
> +}
> +
> +static void bcm_vk_append_pendq(struct bcm_vk_msg_chan *chan, uint16_t q_num,
> + struct bcm_vk_wkent *entry)
> +{
> + spin_lock(&chan->pendq_lock);
> + list_add_tail(&entry->node, &chan->pendq[q_num]);
> + if (entry->vk2h_msg)
> + entry->ctx->pend_cnt++;
> + spin_unlock(&chan->pendq_lock);
> +}
> +
> +static uint32_t bcm_vk_append_ib_sgl(struct bcm_vk *vk,
> + struct bcm_vk_wkent *entry,
> + struct _vk_data *data,
> + unsigned int num_planes)
> +{
> + unsigned int i;
> + unsigned int item_cnt = 0;
> + struct device *dev = &vk->pdev->dev;
> + struct bcm_vk_msg_chan *chan = &vk->h2vk_msg_chan;
> + struct vk_msg_blk *msg = &entry->h2vk_msg[0];
> + struct bcm_vk_msgq *msgq;
> + struct bcm_vk_sync_qinfo *qinfo;
> + uint32_t ib_sgl_size = 0;
> + uint8_t *buf = (uint8_t *)&entry->h2vk_msg[entry->h2vk_blks];
> + uint32_t avail;
> +
> + /* check if high watermark is hit, and if so, skip */
> + msgq = chan->msgq[msg->queue_id];
> + qinfo = &chan->sync_qinfo[msg->queue_id];
> + avail = VK_MSGQ_AVAIL_SPACE(msgq, qinfo);
> + if (avail < qinfo->q_low) {
> + dev_dbg(dev, "Skip inserting inband SGL, [0x%x/0x%x]\n",
> + avail, qinfo->q_size);
> + return ib_sgl_size;
return 0;
> + }
> +
> + for (i = 0; i < num_planes; i++) {
> + if (data[i].address &&
> + (ib_sgl_size + data[i].size) <= vk->ib_sgl_size) {
> +
> + item_cnt++;
> + memcpy(buf, entry->dma[i].sglist, data[i].size);
> + ib_sgl_size += data[i].size;
> + buf += data[i].size;
> + }
> + }
> +
> + dev_dbg(dev, "Num %u sgl items appended, size 0x%x, room 0x%x\n",
> + item_cnt, ib_sgl_size, vk->ib_sgl_size);
> +
> + /* round up size */
> + ib_sgl_size = (ib_sgl_size + VK_MSGQ_BLK_SIZE - 1)
> + >> VK_MSGQ_BLK_SZ_SHIFT;
> +
> + return ib_sgl_size;
> +}
> +
> +void bcm_h2vk_doorbell(struct bcm_vk *vk, uint32_t q_num,
> + uint32_t db_val)
> +{
> + /* press door bell based on q_num */
> + vkwrite32(vk,
> + db_val,
> + BAR_0,
> + VK_BAR0_REGSEG_DB_BASE + q_num * VK_BAR0_REGSEG_DB_REG_GAP);
> +}
> +
> +static int bcm_h2vk_msg_enqueue(struct bcm_vk *vk, struct bcm_vk_wkent *entry)
> +{
> + static uint32_t seq_num;
> + struct bcm_vk_msg_chan *chan = &vk->h2vk_msg_chan;
> + struct device *dev = &vk->pdev->dev;
> + struct vk_msg_blk *src = &entry->h2vk_msg[0];
> +
> + volatile struct vk_msg_blk *dst;
> + struct bcm_vk_msgq *msgq;
> + struct bcm_vk_sync_qinfo *qinfo;
> + uint32_t q_num = src->queue_id;
> + uint32_t wr_idx; /* local copy */
> + uint32_t i;
> + uint32_t avail;
> + uint32_t retry;
> +
> + if (entry->h2vk_blks != src->size + 1) {
> + dev_err(dev, "number of blks %d not matching %d MsgId[0x%x]: func %d ctx 0x%x\n",
> + entry->h2vk_blks,
> + src->size + 1,
> + src->msg_id,
> + src->function_id,
> + src->context_id);
> + return -EMSGSIZE;
> + }
> +
> + msgq = chan->msgq[q_num];
> + qinfo = &chan->sync_qinfo[q_num];
> +
> + rmb(); /* start with a read barrier */
> + mutex_lock(&chan->msgq_mutex);
> +
> + avail = VK_MSGQ_AVAIL_SPACE(msgq, qinfo);
> +
> +#if defined(CONFIG_BCM_VK_QSTATS)
> + bcm_vk_update_qstats(vk, "h2vk", &chan->qstats[q_num],
> + qinfo->q_size - avail);
> +#endif
> + /* if not enough space, return EAGAIN and let app handles it */
> + retry = 0;
> + while ((avail < entry->h2vk_blks)
> + && (retry++ < BCM_VK_H2VK_ENQ_RETRY)) {
> + mutex_unlock(&chan->msgq_mutex);
> +
> + msleep(BCM_VK_H2VK_ENQ_RETRY_DELAY_MS);
> + mutex_lock(&chan->msgq_mutex);
> + avail = VK_MSGQ_AVAIL_SPACE(msgq, qinfo);
> + }
> + if (retry > BCM_VK_H2VK_ENQ_RETRY) {
> + mutex_unlock(&chan->msgq_mutex);
> + return -EAGAIN;
> + }
> +
> + /* at this point, mutex is taken and there is enough space */
> + entry->seq_num = seq_num++; /* update debug seq number */
> + wr_idx = msgq->wr_idx;
> +
> + if (wr_idx >= qinfo->q_size) {
> + dev_crit(dev, "Invalid wr_idx 0x%x => max 0x%x!",
> + wr_idx, qinfo->q_size);
> + bcm_vk_blk_drv_access(vk);
> + bcm_vk_set_host_alert(vk, ERR_LOG_HOST_PCIE_DWN);
> + goto idx_err;
> + }
> +
> + dst = VK_MSGQ_BLK_ADDR(qinfo, wr_idx);
> + for (i = 0; i < entry->h2vk_blks; i++) {
> + *dst = *src;
> +
> + bcm_vk_h2vk_verify_blk(dev, src, dst);
> +
> + src++;
> + wr_idx = VK_MSGQ_INC(qinfo, wr_idx, 1);
> + dst = VK_MSGQ_BLK_ADDR(qinfo, wr_idx);
> + }
> +
> + /* flush the write pointer */
> + msgq->wr_idx = wr_idx;
> + wmb(); /* flush */
> +
> + bcm_vk_h2vk_verify_idx(dev, "wr_idx", &msgq->wr_idx, wr_idx);
> +
> + /* log new info for debugging */
> + dev_dbg(dev,
> + "MsgQ[%d] [Rd Wr] = [%d %d] blks inserted %d - Q = [u-%d a-%d]/%d\n",
> + msgq->num,
> + msgq->rd_idx, msgq->wr_idx, entry->h2vk_blks,
> + VK_MSGQ_OCCUPIED(msgq, qinfo),
> + VK_MSGQ_AVAIL_SPACE(msgq, qinfo),
> + msgq->size);
> + /*
> + * press door bell based on queue number. 1 is added to the wr_idx
> + * to avoid the value of 0 appearing on the VK side to distinguish
> + * from initial value.
> + */
> + bcm_h2vk_doorbell(vk, q_num, wr_idx + 1);
> +idx_err:
> + mutex_unlock(&chan->msgq_mutex);
> + return 0;
> +}
> +
> +int bcm_vk_send_shutdown_msg(struct bcm_vk *vk, uint32_t shut_type,
> + pid_t pid)
> +{
> + int rc = 0;
> + struct bcm_vk_wkent *entry;
> + struct device *dev = &vk->pdev->dev;
> +
> + /*
> + * check if the marker is still good. Sometimes, the PCIe interface may
> + * have gone done, and if so and we ship down thing based on broken
> + * values, kernel may panic.
> + */
> + if (!bcm_vk_msgq_marker_valid(vk)) {
> + dev_info(dev, "PCIe comm chan - invalid marker (0x%x)!\n",
> + vkread32(vk, BAR_1, VK_BAR1_MSGQ_DEF_RDY));
> + return -EINVAL;
> + }
> +
> + entry = kzalloc(sizeof(struct bcm_vk_wkent) +
> + sizeof(struct vk_msg_blk), GFP_KERNEL);
> + if (!entry)
> + return -ENOMEM;
> +
> + /* just fill up non-zero data */
> + entry->h2vk_msg[0].function_id = VK_FID_SHUTDOWN;
> + entry->h2vk_msg[0].queue_id = 0; /* use highest queue */
> + entry->h2vk_blks = 1; /* always 1 block */
> +
> + entry->h2vk_msg[0].args[0] = shut_type;
> + entry->h2vk_msg[0].args[1] = pid;
> +
> + rc = bcm_h2vk_msg_enqueue(vk, entry);
> + if (rc)
> + dev_err(dev,
> + "Sending shutdown message to q %d for pid %d fails.\n",
> + entry->h2vk_msg[0].queue_id, pid);
> +
> + kfree(entry);
> +
> + return rc;
> +}
> +
> +int bcm_vk_handle_last_sess(struct bcm_vk *vk, struct task_struct *ppid)
> +{
> + int rc = 0;
> + pid_t pid = task_pid_nr(ppid);
> + struct device *dev = &vk->pdev->dev;
> +
> + /*
> + * don't send down or do anything if message queue is not initialized
> + * and if it is the reset session, clear it.
> + */
> + if (!bcm_vk_drv_access_ok(vk)) {
> +
> + if (vk->reset_ppid == ppid)
> + vk->reset_ppid = NULL;
> + return -EPERM;
> + }
> +
> + dev_dbg(dev, "No more sessions, shut down pid %d\n", pid);
> +
> + /* only need to do it if it is not the reset process */
> + if (vk->reset_ppid != ppid)
> + rc = bcm_vk_send_shutdown_msg(vk, VK_SHUTDOWN_PID, pid);
> + else
> + /* reset the pointer if it is exiting last session */
> + vk->reset_ppid = NULL;
> +
> + return rc;
> +}
> +
> +static struct bcm_vk_wkent *bcm_vk_find_pending(struct bcm_vk *vk,
> + struct bcm_vk_msg_chan *chan,
> + uint16_t q_num,
> + uint16_t msg_id)
> +{
> + bool found = false;
> + struct bcm_vk_wkent *entry;
> +
> + spin_lock(&chan->pendq_lock);
> + list_for_each_entry(entry, &chan->pendq[q_num], node) {
> +
> + if (entry->h2vk_msg[0].msg_id == msg_id) {
> + list_del(&entry->node);
> + found = true;
> + bcm_vk_msgid_bitmap_clear(vk, msg_id, 1);
> + break;
> + }
> + }
> + spin_unlock(&chan->pendq_lock);
> + return ((found) ? entry : NULL);
> +}
> +
> +static uint32_t bcm_vk2h_msg_dequeue(struct bcm_vk *vk)
> +{
> + struct device *dev = &vk->pdev->dev;
> + struct bcm_vk_msg_chan *chan = &vk->vk2h_msg_chan;
> + struct vk_msg_blk *data;
> + volatile struct vk_msg_blk *src;
> + struct vk_msg_blk *dst;
> + struct bcm_vk_msgq *msgq;
> + struct bcm_vk_sync_qinfo *qinfo;
> + struct bcm_vk_wkent *entry;
> + uint32_t rd_idx;
> + uint32_t q_num, j;
> + uint32_t num_blks;
> + uint32_t total = 0;
> +
> + /*
> + * drain all the messages from the queues, and find its pending
> + * entry in the h2vk queue, based on msg_id & q_num, and move the
> + * entry to the vk2h pending queue, waiting for user space
> + * program to extract
> + */
> + mutex_lock(&chan->msgq_mutex);
> + rmb(); /* start with a read barrier */
> + for (q_num = 0; q_num < chan->q_nr; q_num++) {
> + msgq = chan->msgq[q_num];
> + qinfo = &chan->sync_qinfo[q_num];
> +
> + while (!VK_MSGQ_EMPTY(msgq)) {
> +
> + /*
> + * Make a local copy and get pointer to src blk
> + * The rd_idx is masked before getting the pointer to
> + * avoid out of bound access in case the interface goes
> + * down. It will end up pointing to the last block in
> + * the buffer, but subsequent src->size check would be
> + * able to catch this.
> + */
> + rd_idx = msgq->rd_idx;
> + src = VK_MSGQ_BLK_ADDR(qinfo,
> + rd_idx & VK_MSGQ_SIZE_MASK(qinfo));
> +
> + if ((rd_idx >= qinfo->q_size)
> + || (src->size > (qinfo->q_size - 1))) {
> + dev_crit(dev,
> + "Invalid rd_idx 0x%x or size 0x%x => max 0x%x!",
> + rd_idx, src->size, qinfo->q_size);
> + bcm_vk_blk_drv_access(vk);
> + bcm_vk_set_host_alert(
> + vk, ERR_LOG_HOST_PCIE_DWN);
> + goto idx_err;
> + }
> +
> +#if defined(CONFIG_BCM_VK_QSTATS)
> + bcm_vk_update_qstats(vk, "vk2h", &chan->qstats[q_num],
> + VK_MSGQ_OCCUPIED(msgq, qinfo));
> +#endif
> + num_blks = src->size + 1;
> + data = kzalloc(num_blks * VK_MSGQ_BLK_SIZE, GFP_KERNEL);
> + if (data) {
> + /* copy messages and linearize it */
> + dst = data;
> + for (j = 0; j < num_blks; j++) {
> + *dst = *src;
> +
> + dst++;
> + rd_idx = VK_MSGQ_INC(qinfo, rd_idx, 1);
> + src = VK_MSGQ_BLK_ADDR(qinfo, rd_idx);
> + }
> + total++;
> + } else {
> + dev_crit(dev, "Error allocating memory\n");
> + /* just keep draining..... */
> + rd_idx = VK_MSGQ_INC(qinfo, rd_idx, num_blks);
> + }
> +
> + /* flush rd pointer after a message is dequeued */
> + msgq->rd_idx = rd_idx;
> + mb(); /* do both rd/wr as we are extracting data out */
> +
> + bcm_vk_h2vk_verify_idx(dev, "rd_idx",
> + &msgq->rd_idx, rd_idx);
> +
> + /* log new info for debugging */
> + dev_dbg(dev,
> + "MsgQ[%d] [Rd Wr] = [%d %d] blks extracted %d - Q = [u-%d a-%d]/%d\n",
> + msgq->num,
> + msgq->rd_idx, msgq->wr_idx, num_blks,
> + VK_MSGQ_OCCUPIED(msgq, qinfo),
> + VK_MSGQ_AVAIL_SPACE(msgq, qinfo),
> + msgq->size);
> +
> + /*
> + * No need to search if it is an autonomous one-way
> + * message from driver, as these messages do not bear
> + * a h2vk pending item. Currently, only the shutdown
> + * message falls into this category.
> + */
> + if (data->function_id == VK_FID_SHUTDOWN) {
> + kfree(data);
> + continue;
> + }
> +
> + /* lookup original message in h2vk direction */
> + entry = bcm_vk_find_pending(vk,
> + &vk->h2vk_msg_chan,
> + q_num,
> + data->msg_id);
> +
> + /*
> + * if there is message to does not have prior send,
> + * this is the location to add here
> + */
> + if (entry) {
> + entry->vk2h_blks = num_blks;
> + entry->vk2h_msg = data;
> + bcm_vk_append_pendq(&vk->vk2h_msg_chan,
> + q_num, entry);
> +
> + } else {
> + dev_crit(dev,
> + "Could not find MsgId[0x%x] for resp func %d bmap %d\n",
> + data->msg_id, data->function_id,
> + test_bit(data->msg_id, vk->bmap));
> + kfree(data);
> + }
> +
> + }
> + }
> +idx_err:
> + mutex_unlock(&chan->msgq_mutex);
> + dev_dbg(dev, "total %d drained from queues\n", total);
> +
> + return total;
> +}
> +
> +/*
> + * deferred work queue for draining and auto download.
> + */
> +static void bcm_vk_wq_handler(struct work_struct *work)
> +{
> + struct bcm_vk *vk = container_of(work, struct bcm_vk, wq_work);
> + struct device *dev = &vk->pdev->dev;
> + uint32_t tot;
> +
> + /* check wq offload bit map to perform various operations */
> + if (test_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload)) {
> + /* clear bit right the way for notification */
> + clear_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload);
> + bcm_vk_handle_notf(vk);
> + }
> + if (test_bit(BCM_VK_WQ_DWNLD_AUTO, vk->wq_offload)) {
> + bcm_vk_auto_load_all_images(vk);
> +
> + /*
> + * at the end of operation, clear AUTO bit and pending
> + * bit
> + */
> + clear_bit(BCM_VK_WQ_DWNLD_AUTO, vk->wq_offload);
> + clear_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload);
> + }
> +
> + /* next, try to drain */
> + tot = bcm_vk2h_msg_dequeue(vk);
> +
> + if (tot == 0)
> + dev_dbg(dev, "Spurious trigger for workqueue\n");
> +}
> +
> +/*
> + * init routine for all required data structures
> + */
> +static int bcm_vk_data_init(struct bcm_vk *vk)
> +{
> + int rc = 0;
> + int i;
> +
> + spin_lock_init(&vk->ctx_lock);
> + for (i = 0; i < ARRAY_SIZE(vk->ctx); i++) {
> + vk->ctx[i].in_use = false;
> + vk->ctx[i].idx = i; /* self identity */
> + vk->ctx[i].miscdev = NULL;
> + }
> + spin_lock_init(&vk->msg_id_lock);
> + spin_lock_init(&vk->host_alert_lock);
> + vk->msg_id = 0;
> +
> + /* initialize hash table */
> + for (i = 0; i < VK_PID_HT_SZ; i++)
> + INIT_LIST_HEAD(&vk->pid_ht[i].head);
> +
> + INIT_WORK(&vk->wq_work, bcm_vk_wq_handler);
> + return rc;
return 0;
> +}
> +
> +irqreturn_t bcm_vk_msgq_irqhandler(int irq, void *dev_id)
> +{
> + struct bcm_vk *vk = dev_id;
> +
> + if (!bcm_vk_drv_access_ok(vk)) {
> + dev_err(&vk->pdev->dev,
> + "Interrupt %d received when msgq not inited\n", irq);
> + goto skip_schedule_work;
> + }
> +
> + queue_work(vk->wq_thread, &vk->wq_work);
> +
> +skip_schedule_work:
> + return IRQ_HANDLED;
> +}
> +
> +int bcm_vk_open(struct inode *inode, struct file *p_file)
> +{
> + struct bcm_vk_ctx *ctx;
> + struct miscdevice *miscdev = (struct miscdevice *)p_file->private_data;
> + struct bcm_vk *vk = container_of(miscdev, struct bcm_vk, miscdev);
> + struct device *dev = &vk->pdev->dev;
> + int rc = 0;
> +
> + /* get a context and set it up for file */
> + ctx = bcm_vk_get_ctx(vk, current);
> + if (!ctx) {
> + dev_err(dev, "Error allocating context\n");
> + rc = -ENOMEM;
> + } else {
> +
> + /*
> + * set up context and replace private data with context for
> + * other methods to use. Reason for the context is because
> + * it is allowed for multiple sessions to open the sysfs, and
> + * for each file open, when upper layer query the response,
> + * only those that are tied to a specific open should be
> + * returned. The context->idx will be used for such binding
> + */
> + ctx->miscdev = miscdev;
> + p_file->private_data = ctx;
> + dev_dbg(dev, "ctx_returned with idx %d, pid %d\n",
> + ctx->idx, task_pid_nr(ctx->ppid));
> + }
> + return rc;
> +}
> +
> +ssize_t bcm_vk_read(struct file *p_file, char __user *buf, size_t count,
> + loff_t *f_pos)
> +{
> + ssize_t rc = -ENOMSG;
> + struct bcm_vk_ctx *ctx = p_file->private_data;
> + struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk,
> + miscdev);
> + struct device *dev = &vk->pdev->dev;
> + struct bcm_vk_msg_chan *chan = &vk->vk2h_msg_chan;
> + struct bcm_vk_wkent *entry = NULL;
> + uint32_t q_num;
> + uint32_t rsp_length;
> + bool found = false;
> +
> + if (!bcm_vk_drv_access_ok(vk))
> + return -EPERM;
> +
> + dev_dbg(dev, "Buf count %ld\n", count);
> + found = false;
> +
> + /*
> + * search through the pendq on the vk2h chan, and return only those
> + * that belongs to the same context. Search is always from the high to
> + * the low priority queues
> + */
> + spin_lock(&chan->pendq_lock);
> + for (q_num = 0; q_num < chan->q_nr; q_num++) {
> + list_for_each_entry(entry, &chan->pendq[q_num], node) {
> + if (entry->ctx->idx == ctx->idx) {
> + if (count >=
> + (entry->vk2h_blks * VK_MSGQ_BLK_SIZE)) {
> + list_del(&entry->node);
> + ctx->pend_cnt--;
> + found = true;
> + } else {
> + /* buffer not big enough */
> + rc = -EMSGSIZE;
> + }
> + goto bcm_vk_read_loop_exit;
> + }
> + }
> + }
> + bcm_vk_read_loop_exit:
> + spin_unlock(&chan->pendq_lock);
> +
> + if (found) {
> + /* retrieve the passed down msg_id */
> + entry->vk2h_msg[0].msg_id = entry->usr_msg_id;
> + rsp_length = entry->vk2h_blks * VK_MSGQ_BLK_SIZE;
> + if (copy_to_user(buf, entry->vk2h_msg, rsp_length) == 0)
> + rc = rsp_length;
The error code should be -EFAULT if this message fails (vs -ENOMSG).
> +
> + bcm_vk_free_wkent(dev, entry);
> + } else if (rc == -EMSGSIZE) {
> + struct vk_msg_blk tmp_msg = entry->vk2h_msg[0];
> +
> + /*
> + * in this case, return just the first block, so
> + * that app knows what size it is looking for.
> + */
> + tmp_msg.msg_id = entry->usr_msg_id;
> + tmp_msg.size = entry->vk2h_blks - 1;
> + if (copy_to_user(buf, &tmp_msg, VK_MSGQ_BLK_SIZE) != 0) {
> + dev_err(dev, "Error return 1st block in -EMSGSIZE\n");
> + rc = -EFAULT;
> + }
> + }
> + return rc;
> +}
> +
> +ssize_t bcm_vk_write(struct file *p_file, const char __user *buf,
> + size_t count, loff_t *f_pos)
> +{
> + ssize_t rc = -EPERM;
> + struct bcm_vk_ctx *ctx = p_file->private_data;
> + struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk,
> + miscdev);
> + struct bcm_vk_msgq *msgq;
> + struct device *dev = &vk->pdev->dev;
> + struct bcm_vk_wkent *entry;
> + uint32_t sgl_extra_blks;
> +
> + if (!bcm_vk_drv_access_ok(vk))
> + return -EPERM;
> +
> + dev_dbg(dev, "Msg count %ld\n", count);
> +
> + /* first, do sanity check where count should be multiple of basic blk */
> + if (count & (VK_MSGQ_BLK_SIZE - 1)) {
> + dev_err(dev, "Failure with size %ld not multiple of %ld\n",
> + count, VK_MSGQ_BLK_SIZE);
> + rc = -EBADR;
#define EBADR 53 /* Invalid request descriptor */
This should just be -EINVAL;
> + goto bcm_vk_write_err;
These label names are too long. They don't need the function prefix.
> + }
> +
> + /* allocate the work entry + buffer for size count and inband sgl */
> + entry = kzalloc(sizeof(struct bcm_vk_wkent) + count + vk->ib_sgl_size,
> + GFP_KERNEL);
> + if (!entry) {
> + rc = -ENOMEM;
> + goto bcm_vk_write_err;
> + }
> +
> + /* now copy msg from user space, and then formulate the wk ent */
> + if (copy_from_user(&entry->h2vk_msg[0], buf, count))
> + goto bcm_vk_write_free_ent;
This is returning -EPERM instead of -EFAULT.
> +
> + entry->h2vk_blks = count >> VK_MSGQ_BLK_SZ_SHIFT;
> + entry->ctx = ctx;
> +
> + /* do a check on the blk size which could not exceed queue space */
> + msgq = vk->h2vk_msg_chan.msgq[entry->h2vk_msg[0].queue_id];
> + if (entry->h2vk_blks + (vk->ib_sgl_size >> VK_MSGQ_BLK_SZ_SHIFT)
> + > (msgq->size - 1)) {
> + dev_err(dev, "Blk size %d exceed max queue size allowed %d\n",
> + entry->h2vk_blks, msgq->size - 1);
> + rc = -EOVERFLOW;
This should probably just be -EINVAL as well.
> + goto bcm_vk_write_free_ent;
> + }
> +
> + /* Use internal message id */
> + entry->usr_msg_id = entry->h2vk_msg[0].msg_id;
> + rc = bcm_vk_get_msg_id(vk);
> + if (rc == VK_MSG_ID_OVERFLOW) {
> + dev_err(dev, "msg_id overflow\n");
> + rc = -EOVERFLOW;
> + goto bcm_vk_write_free_ent;
> + }
> + entry->h2vk_msg[0].msg_id = rc;
> +
> + dev_dbg(dev,
> + "Message ctx id %d, usr_msg_id 0x%x sent msg_id 0x%x\n",
> + ctx->idx, entry->usr_msg_id,
> + entry->h2vk_msg[0].msg_id);
> +
> + /* Convert any pointers to sg list */
> + if (entry->h2vk_msg[0].function_id == VK_FID_TRANS_BUF) {
> + unsigned int num_planes;
> + int dir;
> + struct _vk_data *data;
> +
> + /*
> + * check if we are in reset, if so, no buffer transfer is
> + * allowed and return error.
> + */
> + if (vk->reset_ppid) {
> + dev_dbg(dev, "No Transfer allowed during reset, pid %d.\n",
> + task_pid_nr(ctx->ppid));
> + rc = -EACCES;
> + goto bcm_vk_write_free_msgid;
> + }
> +
> + num_planes = entry->h2vk_msg[0].args[0] & VK_CMD_PLANES_MASK;
> + if ((entry->h2vk_msg[0].args[0] & VK_CMD_MASK)
> + == VK_CMD_DOWNLOAD) {
> + /* Memory transfer from vk device */
There are a bunch of pretty obvious comments that should just be
deleted.
> + dir = DMA_FROM_DEVICE;
> + } else {
> + /* Memory transfer to vk device */
> + dir = DMA_TO_DEVICE;
> + }
> +
> + /* Calculate vk_data location */
> + /* Go to end of the message */
> + data = (struct _vk_data *)
> + &(entry->h2vk_msg[entry->h2vk_msg[0].size + 1]);
> + /* Now back up to the start of the pointers */
> + data -= num_planes;
> +
> + /* Convert user addresses to DMA SG List */
> + rc = bcm_vk_sg_alloc(dev, entry->dma, dir, data, num_planes);
> + if (rc)
> + goto bcm_vk_write_free_msgid;
> +
> + /* try to embed inband sgl */
> + sgl_extra_blks = bcm_vk_append_ib_sgl(vk, entry, data,
> + num_planes);
> + entry->h2vk_blks += sgl_extra_blks;
> + entry->h2vk_msg[0].size += sgl_extra_blks;
> + }
> +
> + /*
> + * store wk ent to pending queue until a response is got. This needs to
^^
vk?
> + * be done before enqueuing the message
> + */
> + bcm_vk_append_pendq(&vk->h2vk_msg_chan, entry->h2vk_msg[0].queue_id,
> + entry);
> +
> + rc = bcm_h2vk_msg_enqueue(vk, entry);
> + if (rc) {
> + dev_err(dev, "Fail to enqueue msg to h2vk queue\n");
> +
> + /* remove message from pending list */
> + entry = bcm_vk_find_pending(vk,
> + &vk->h2vk_msg_chan,
> + entry->h2vk_msg[0].queue_id,
> + entry->h2vk_msg[0].msg_id);
Can bcm_vk_find_pending() return NULL here (leading to an Oops when we
do entry->h2vk_msg[0].msg_id and why can't we just use the previous
value of entry? The comment doesn't help me.
> + goto bcm_vk_write_free_ent;
> + }
> +
> + return count;
> +
> +bcm_vk_write_free_msgid:
> + bcm_vk_msgid_bitmap_clear(vk, entry->h2vk_msg[0].msg_id, 1);
> +bcm_vk_write_free_ent:
> + kfree(entry);
> +bcm_vk_write_err:
> + return rc;
> +}
> +
> +int bcm_vk_release(struct inode *inode, struct file *p_file)
> +{
> + int ret;
> + struct bcm_vk_ctx *ctx = p_file->private_data;
> + struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
> + struct device *dev = &vk->pdev->dev;
> + struct task_struct *ppid = ctx->ppid;
> + pid_t pid = task_pid_nr(ppid);
> +
> + dev_dbg(dev, "Draining with context idx %d pid %d\n",
> + ctx->idx, pid);
> +
> + bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->h2vk_msg_chan, ctx);
> + bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->vk2h_msg_chan, ctx);
> +
> + ret = bcm_vk_free_ctx(vk, ctx);
This returns a positive number.
> + if (ret == 0)
> + ret = bcm_vk_handle_last_sess(vk, ppid);
> +
> + /* free memory if it is the last reference */
> + kref_put(&vk->kref, bcm_vk_release_data);
> +
> + return ret;
Which we propogate to here. Probably this should just be "return 0;".
> +}
> +
> +int bcm_vk_msg_init(struct bcm_vk *vk)
> +{
> + struct device *dev = &vk->pdev->dev;
> + int err = 0;
> +
> + if (bcm_vk_data_init(vk)) {
> + dev_err(dev, "Error initializing internal data structures\n");
> + err = -EINVAL;
> + goto err_out;
> + }
> +
> + if (bcm_vk_msg_chan_init(&vk->h2vk_msg_chan) ||
> + bcm_vk_msg_chan_init(&vk->vk2h_msg_chan)) {
> + dev_err(dev, "Error initializing communication channel\n");
> + err = -EIO;
> + goto err_out;
> + }
> +
> + /* create dedicated workqueue */
> + vk->wq_thread = create_singlethread_workqueue(vk->miscdev.name);
> + if (!vk->wq_thread) {
> + dev_err(dev, "Fail to create workqueue thread\n");
> + err = -ENOMEM;
> + goto err_out;
> + }
> +
> + /* read msgq info */
> + if (bcm_vk_sync_msgq(vk, false)) {
> + dev_err(dev, "Error reading comm msg Q info\n");
> + err = -EIO;
> + goto err_out;
> + }
> +
> +err_out:
> + return err;
This should probably clean up the work queue on error?
> +}
> +
> +void bcm_vk_msg_remove(struct bcm_vk *vk)
> +{
> + bcm_vk_blk_drv_access(vk);
> +
> + /* drain all pending items */
> + bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->h2vk_msg_chan, NULL);
> + bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->vk2h_msg_chan, NULL);
> +}
> +
> +void bcm_vk_trigger_reset(struct bcm_vk *vk)
> +{
> + uint32_t i;
> + u32 value;
> +
> + /* clean up before pressing the door bell */
> + bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->h2vk_msg_chan, NULL);
> + bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->vk2h_msg_chan, NULL);
> + vkwrite32(vk, 0, BAR_1, VK_BAR1_MSGQ_DEF_RDY);
> + /* make tag '\0' terminated */
> + vkwrite32(vk, 0, BAR_1, VK_BAR1_BOOT1_VER_TAG);
> +
> + for (i = 0; i < VK_BAR1_DAUTH_MAX; i++) {
> + vkwrite32(vk, 0, BAR_1, VK_BAR1_DAUTH_STORE_ADDR(i));
> + vkwrite32(vk, 0, BAR_1, VK_BAR1_DAUTH_VALID_ADDR(i));
> + }
> + for (i = 0; i < VK_BAR1_SOTP_REVID_MAX; i++)
> + vkwrite32(vk, 0, BAR_1, VK_BAR1_SOTP_REVID_ADDR(i));
> +
> + memset(&vk->card_info, 0, sizeof(vk->card_info));
> + memset(&vk->alert_cnts, 0, sizeof(vk->alert_cnts));
> +
> + /*
> + * When boot request fails, the CODE_PUSH_OFFSET stays persistent.
> + * Allowing us to debug the failure. When we call reset,
> + * we should clear CODE_PUSH_OFFSET so ROM does not execute
> + * boot again (and fails again) and instead waits for a new
> + * codepush.
> + */
> + value = vkread32(vk, BAR_0, BAR_CODEPUSH_SBL);
> + value &= ~CODEPUSH_MASK;
> + vkwrite32(vk, value, BAR_0, BAR_CODEPUSH_SBL);
> +
> + /* reset fw_status with proper reason, and press db */
> + vkwrite32(vk, VK_FWSTS_RESET_MBOX_DB, BAR_0, VK_BAR_FWSTS);
> + bcm_h2vk_doorbell(vk, VK_BAR0_RESET_DB_NUM, VK_BAR0_RESET_DB_SOFT);
> +
> + /* clear the uptime register after reset pressed and alert record */
> + vkwrite32(vk, 0, BAR_0, BAR_OS_UPTIME);
> + memset(&vk->host_alert, 0, sizeof(vk->host_alert));
> + memset(&vk->peer_alert, 0, sizeof(vk->peer_alert));
> +#if defined(CONFIG_BCM_VK_QSTATS)
> + /* clear qstats */
> + for (i = 0; i < VK_MSGQ_MAX_NR; i++) {
> + memset(&vk->h2vk_msg_chan.qstats[i].qcnts, 0,
> + sizeof(vk->h2vk_msg_chan.qstats[i].qcnts));
> + memset(&vk->vk2h_msg_chan.qstats[i].qcnts, 0,
> + sizeof(vk->vk2h_msg_chan.qstats[i].qcnts));
> + }
> +#endif
> + /* clear 4096 bits of bitmap */
> + bitmap_clear(vk->bmap, 0, VK_MSG_ID_BITMAP_SIZE);
> +}
> diff --git a/drivers/misc/bcm-vk/bcm_vk_msg.h b/drivers/misc/bcm-vk/bcm_vk_msg.h
> new file mode 100644
> index 000000000000..076307b2f1c4
> --- /dev/null
> +++ b/drivers/misc/bcm-vk/bcm_vk_msg.h
> @@ -0,0 +1,210 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright 2018-2020 Broadcom.
> + */
> +
> +#ifndef BCM_VK_MSG_H
> +#define BCM_VK_MSG_H
> +
> +#include <uapi/linux/misc/bcm_vk.h>
> +#include "bcm_vk_sg.h"
> +
> +/* Single message queue control structure */
> +struct bcm_vk_msgq {
> + uint16_t type; /* queue type */
> + uint16_t num; /* queue number */
> + uint32_t start; /* offset in BAR1 where the queue memory starts */
> + volatile uint32_t rd_idx; /* read idx */
> + volatile uint32_t wr_idx; /* write idx */
> + uint32_t size; /*
> + * size, which is in number of 16byte blocks,
> + * to align with the message data structure.
> + */
> + uint32_t nxt; /*
> + * nxt offset to the next msg queue struct.
> + * This is to provide flexibity for alignment purposes.
> + */
> +};
> +
> +/*
> + * Structure to record static info from the msgq sync. We keep local copy
> + * for some of these variables for both performance + checking purpose.
> + */
> +struct bcm_vk_sync_qinfo {
> + void __iomem *q_start;
> + uint32_t q_size;
> + uint32_t q_mask;
> + uint32_t q_low;
> +};
> +
> +#define VK_MSGQ_MAX_NR 4 /* Maximum number of message queues */
> +
> +/*
> + * some useful message queue macros
> + */
> +
> +/* vk_msg_blk is 16 bytes fixed */
> +#define VK_MSGQ_BLK_SIZE (sizeof(struct vk_msg_blk))
> +/* shift for fast division of basic msg blk size */
> +#define VK_MSGQ_BLK_SZ_SHIFT 4
> +
> +#define VK_MSGQ_EMPTY(_msgq) ((_msgq)->rd_idx == (_msgq)->wr_idx)
> +
> +#define VK_MSGQ_SIZE_MASK(_qinfo) ((_qinfo)->q_mask)
> +
> +#define VK_MSGQ_INC(_qinfo, _idx, _inc) \
> + (((_idx) + (_inc)) & VK_MSGQ_SIZE_MASK(_qinfo))
> +
> +#define VK_MSGQ_BLK_ADDR(_qinfo, _idx) \
> + (volatile struct vk_msg_blk *)((_qinfo)->q_start + \
> + (VK_MSGQ_BLK_SIZE * (_idx)))
> +
> +#define VK_MSGQ_OCCUPIED(_msgq, _qinfo) \
> + (((_msgq)->wr_idx - (_msgq)->rd_idx) & VK_MSGQ_SIZE_MASK(_qinfo))
> +
> +#define VK_MSGQ_AVAIL_SPACE(_msgq, _qinfo) \
> + ((_qinfo)->q_size - VK_MSGQ_OCCUPIED(_msgq, _qinfo) - 1)
> +
> +/* context per session opening of sysfs */
> +struct bcm_vk_ctx {
> + struct list_head node; /* use for linkage in Hash Table */
> + uint idx;
> + bool in_use;
> + struct task_struct *ppid;
> + uint32_t hash_idx;
> + struct miscdevice *miscdev;
> + int pend_cnt; /* number of items pending to be read from host */
> +};
> +
> +/* pid hash table entry */
> +struct bcm_vk_ht_entry {
> + struct list_head head;
> +};
> +
> +#define VK_DMA_MAX_ADDRS 4 /* Max 4 DMA Addresses */
> +/* structure for house keeping a single work entry */
> +struct bcm_vk_wkent {
> +
> + struct list_head node; /* for linking purpose */
> + struct bcm_vk_ctx *ctx;
> +
> + /* Store up to 4 dma pointers */
> + struct bcm_vk_dma dma[VK_DMA_MAX_ADDRS];
> +
> + uint32_t vk2h_blks; /* response */
> + struct vk_msg_blk *vk2h_msg;
> +
> + /*
> + * put the h2vk_msg at the end so that we could simply append h2vk msg
> + * to the end of the allocated block
> + */
> + uint32_t usr_msg_id;
> + uint32_t h2vk_blks;
> + uint32_t seq_num;
> + struct vk_msg_blk h2vk_msg[0];
> +};
> +
> +/* queue stats counters */
> +struct bcm_vk_qs_cnts {
> + uint32_t cnt; /* general counter, used to limit output */
> + uint32_t acc_sum;
> + uint32_t max_occ; /* max during a sampling period */
> + uint32_t max_abs; /* the abs max since reset */
> +};
> +
> +/* stats structure */
> +struct bcm_vk_qstats {
> + uint32_t q_num;
> + struct bcm_vk_qs_cnts qcnts;
> +};
> +
> +/* control channel structure for either h2vk or vk2h communication */
> +struct bcm_vk_msg_chan {
> + uint32_t q_nr;
> + struct mutex msgq_mutex;
> + /* pointing to BAR locations */
> + struct bcm_vk_msgq *msgq[VK_MSGQ_MAX_NR];
> +
> + spinlock_t pendq_lock;
> + /* for temporary storing pending items, one for each queue */
> + struct list_head pendq[VK_MSGQ_MAX_NR];
> + /* static queue info from the sync */
> + struct bcm_vk_sync_qinfo sync_qinfo[VK_MSGQ_MAX_NR];
> +#if defined(CONFIG_BCM_VK_QSTATS)
> + /* qstats */
> + struct bcm_vk_qstats qstats[VK_MSGQ_MAX_NR];
> +#endif
> +};
> +
> +/* total number of supported ctx, 32 ctx each for 5 components */
> +#define VK_CMPT_CTX_MAX (32 * 5)
> +
> +/* hash table defines to store the opened FDs */
> +#define VK_PID_HT_SHIFT_BIT 7 /* 128 */
> +#define VK_PID_HT_SZ (1 << VK_PID_HT_SHIFT_BIT)
> +
> +/* The following are offsets of DDR info provided by the vk card */
> +#define VK_BAR0_SEG_SIZE (4 * SZ_1K) /* segment size for BAR0 */
> +
> +/* shutdown types supported */
> +#define VK_SHUTDOWN_PID 1
> +#define VK_SHUTDOWN_GRACEFUL 2
> +
> +/*
> + * first door bell reg, ie for queue = 0. Only need the first one, as
> + * we will use the queue number to derive the others
> + */
> +#define VK_BAR0_REGSEG_DB_BASE 0x484
> +#define VK_BAR0_REGSEG_DB_REG_GAP 8 /*
> + * DB register gap,
> + * DB1 at 0x48c and DB2 at 0x494
> + */
> +
> +/* reset register and specific values */
> +#define VK_BAR0_RESET_DB_NUM 3
> +#define VK_BAR0_RESET_DB_SOFT 0xFFFFFFFF
> +#define VK_BAR0_RESET_DB_HARD 0xFFFFFFFD
> +
> +/* BAR1 message q definition */
> +
> +/* indicate if msgq ctrl in BAR1 is populated */
> +#define VK_BAR1_MSGQ_DEF_RDY 0x60c0
> +/* ready marker value for the above location, normal boot2 */
> +#define VK_BAR1_MSGQ_RDY_MARKER 0xbeefcafe
> +/* ready marker value for the above location, normal boot2 */
> +#define VK_BAR1_DIAG_RDY_MARKER 0xdeadcafe
> +/* number of msgqs in BAR1 */
> +#define VK_BAR1_MSGQ_NR 0x60c4
> +/* BAR1 queue control structure offset */
> +#define VK_BAR1_MSGQ_CTRL_OFF 0x60c8
> +/* BAR1 ucode and boot1 version tag */
> +#define VK_BAR1_UCODE_VER_TAG 0x6170
> +#define VK_BAR1_BOOT1_VER_TAG 0x61b0
> +#define VK_BAR1_VER_TAG_SIZE 64
> +/* Memory to hold the DMA buffer memory address allocated for boot2 download */
> +#define VK_BAR1_DMA_BUF_OFF_HI 0x61e0
> +#define VK_BAR1_DMA_BUF_OFF_LO (VK_BAR1_DMA_BUF_OFF_HI + 4)
> +#define VK_BAR1_DMA_BUF_SZ (VK_BAR1_DMA_BUF_OFF_HI + 8)
> +/* Scratch memory allocated on host for VK */
> +#define VK_BAR1_SCRATCH_OFF_LO 0x61f0
> +#define VK_BAR1_SCRATCH_OFF_HI (VK_BAR1_SCRATCH_OFF_LO + 4)
> +#define VK_BAR1_SCRATCH_SZ_ADDR (VK_BAR1_SCRATCH_OFF_LO + 8)
> +#define VK_BAR1_SCRATCH_DEF_NR_PAGES 32
> + /* BAR1 SOTP AUTH and REVID info */
> +#define VK_BAR1_DAUTH_BASE_ADDR 0x6200
> +#define VK_BAR1_DAUTH_STORE_SIZE 0x48
> +#define VK_BAR1_DAUTH_VALID_SIZE 0x8
> +#define VK_BAR1_DAUTH_MAX 4
> +#define VK_BAR1_DAUTH_STORE_ADDR(x) \
> + (VK_BAR1_DAUTH_BASE_ADDR + \
> + (x) * (VK_BAR1_DAUTH_STORE_SIZE + VK_BAR1_DAUTH_VALID_SIZE))
> +#define VK_BAR1_DAUTH_VALID_ADDR(x) \
> + (VK_BAR1_DAUTH_STORE_ADDR(x) + VK_BAR1_DAUTH_STORE_SIZE)
> +
> +#define VK_BAR1_SOTP_REVID_BASE_ADDR 0x6340
> +#define VK_BAR1_SOTP_REVID_SIZE 0x10
> +#define VK_BAR1_SOTP_REVID_MAX 2
> +#define VK_BAR1_SOTP_REVID_ADDR(x) \
> + (VK_BAR1_SOTP_REVID_BASE_ADDR + (x) * VK_BAR1_SOTP_REVID_SIZE)
> +
> +#endif
> diff --git a/drivers/misc/bcm-vk/bcm_vk_sg.c b/drivers/misc/bcm-vk/bcm_vk_sg.c
> new file mode 100644
> index 000000000000..e5de07958f0b
> --- /dev/null
> +++ b/drivers/misc/bcm-vk/bcm_vk_sg.c
> @@ -0,0 +1,273 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright 2018-2020 Broadcom.
> + */
> +#include <linux/dma-mapping.h>
> +#include <linux/mm.h>
> +#include <linux/pagemap.h>
> +#include <linux/vmalloc.h>
> +
> +#include <asm/page.h>
> +#include <asm/pgtable.h>
> +#include <asm/unaligned.h>
> +
> +#include <uapi/linux/misc/bcm_vk.h>
> +
> +#include "bcm_vk.h"
> +#include "bcm_vk_msg.h"
> +#include "bcm_vk_sg.h"
> +
> +/*
> + * Valkyrie has a hardware limitation of 16M transfer size.
> + * So limit the SGL chunks to 16M.
> + */
> +#define BCM_VK_MAX_SGL_CHUNK SZ_16M
> +
> +static int bcm_vk_dma_alloc(struct device *dev,
> + struct bcm_vk_dma *dma,
> + int dir,
> + struct _vk_data *vkdata);
> +static int bcm_vk_dma_free(struct device *dev, struct bcm_vk_dma *dma);
> +
> +/* Uncomment to dump SGLIST */
> +//#define BCM_VK_DUMP_SGLIST
> +
> +static int bcm_vk_dma_alloc(struct device *dev,
> + struct bcm_vk_dma *dma,
> + int direction,
> + struct _vk_data *vkdata)
> +{
> + dma_addr_t addr, sg_addr;
> + int err;
> + int i;
> + int offset;
> + uint32_t size;
> + uint32_t remaining_size;
> + uint32_t transfer_size;
> + uint64_t data;
> + unsigned long first, last;
> + struct _vk_data *sgdata;
> +
> + /* Get 64-bit user address */
> + data = get_unaligned(&(vkdata->address));
Extra parens.
> +
> + /* offset into first page */
> + offset = offset_in_page(data);
> +
> + /* Calculate number of pages */
> + first = (data & PAGE_MASK) >> PAGE_SHIFT;
> + last = ((data + vkdata->size - 1) & PAGE_MASK) >> PAGE_SHIFT;
> + dma->nr_pages = last - first + 1;
> +
> + /* Allocate DMA pages */
> + dma->pages = kmalloc_array(dma->nr_pages,
> + sizeof(struct page *),
> + GFP_KERNEL);
> + if (dma->pages == NULL)
> + return -ENOMEM;
> +
> + dev_dbg(dev, "Alloc DMA Pages [0x%llx+0x%x => %d pages]\n",
> + data, vkdata->size, dma->nr_pages);
> +
> + dma->direction = direction;
> +
> + /* Get user pages into memory */
> + err = get_user_pages_fast(data & PAGE_MASK,
> + dma->nr_pages,
> + direction == DMA_FROM_DEVICE,
> + dma->pages);
> + if (err != dma->nr_pages) {
> + dma->nr_pages = (err >= 0) ? err : 0;
> + dev_err(dev, "get_user_pages_fast, err=%d [%d]\n",
> + err, dma->nr_pages);
> + return err < 0 ? err : -EINVAL;
> + }
> +
> + /* Max size of sg list is 1 per mapped page + fields at start */
> + dma->sglen = (dma->nr_pages * sizeof(*sgdata)) +
> + (sizeof(uint32_t) * SGLIST_VKDATA_START);
> +
> + /* Allocate sglist */
> + dma->sglist = dma_alloc_coherent(dev,
> + dma->sglen,
> + &dma->handle,
> + GFP_KERNEL);
dma->sglist = dma_alloc_coherent(dev, dma->sglen, &dma->handle,
GFP_KERNEL);
> + if (!dma->sglist)
> + return -ENOMEM;
No cleanup?
> +
> + dma->sglist[SGLIST_NUM_SG] = 0;
> + dma->sglist[SGLIST_TOTALSIZE] = vkdata->size;
> + remaining_size = vkdata->size;
> + sgdata = (struct _vk_data *)&(dma->sglist[SGLIST_VKDATA_START]);
> +
> + /* Map all pages into DMA */
> + i = 0;
Set but not used.
> + size = min_t(size_t, PAGE_SIZE - offset, remaining_size);
> + remaining_size -= size;
> + sg_addr = dma_map_page(dev,
> + dma->pages[0],
> + offset,
> + size,
> + dma->direction);
> + transfer_size = size;
> + if (unlikely(dma_mapping_error(dev, sg_addr))) {
> + __free_page(dma->pages[0]);
> + return -EIO;
> + }
> +
> + for (i = 1; i < dma->nr_pages; i++) {
> + size = min_t(size_t, PAGE_SIZE, remaining_size);
> + remaining_size -= size;
> + addr = dma_map_page(dev,
> + dma->pages[i],
> + 0,
> + size,
> + dma->direction);
> + if (unlikely(dma_mapping_error(dev, addr))) {
> + __free_page(dma->pages[i]);
> + return -EIO;
> + }
> +
> + /*
> + * Compress SG list entry when pages are contiguous
> + * and transfer size less or equal to BCM_VK_MAX_SGL_CHUNK
> + */
> + if ((addr == (sg_addr + transfer_size)) &&
> + ((transfer_size + size) <= BCM_VK_MAX_SGL_CHUNK)) {
> + /* pages are contiguous, add to same sg entry */
> + transfer_size += size;
> + } else {
> + /* pages are not contiguous, write sg entry */
> + sgdata->size = transfer_size;
> + put_unaligned(sg_addr, (uint64_t *)&(sgdata->address));
> + dma->sglist[SGLIST_NUM_SG]++;
> +
> + /* start new sg entry */
> + sgdata++;
> + sg_addr = addr;
> + transfer_size = size;
> + }
> + }
> + /* Write last sg list entry */
> + sgdata->size = transfer_size;
> + put_unaligned(sg_addr, (uint64_t *)&(sgdata->address));
> + dma->sglist[SGLIST_NUM_SG]++;
> +
> + /* Update pointers and size field to point to sglist */
> + put_unaligned((uint64_t)dma->handle, &(vkdata->address));
> + vkdata->size = (dma->sglist[SGLIST_NUM_SG] * sizeof(*sgdata)) +
> + (sizeof(uint32_t) * SGLIST_VKDATA_START);
> +
> +#ifdef BCM_VK_DUMP_SGLIST
> + dev_dbg(dev,
> + "sgl 0x%llx handle 0x%llx, sglen: 0x%x sgsize: 0x%x\n",
> + (uint64_t)dma->sglist,
> + dma->handle,
> + dma->sglen,
> + vkdata->size);
> + for (i = 0; i < vkdata->size / sizeof(uint32_t); i++)
> + dev_dbg(dev, "i:0x%x 0x%x\n", i, dma->sglist[i]);
> +#endif
> +
> + return 0;
> +}
> +
> +int bcm_vk_sg_alloc(struct device *dev,
> + struct bcm_vk_dma *dma,
> + int dir,
> + struct _vk_data *vkdata,
> + int num)
> +{
> + int i;
> + int rc = -EINVAL;
> +
> + /* Convert user addresses to DMA SG List */
> + for (i = 0; i < num; i++) {
> + if (vkdata[i].size && vkdata[i].address) {
> + /*
> + * If both size and address are non-zero
> + * then DMA alloc.
> + */
> + rc = bcm_vk_dma_alloc(dev,
> + &dma[i],
> + dir,
> + &vkdata[i]);
> + } else if (vkdata[i].size ||
> + vkdata[i].address) {
> + /*
> + * If one of size and address are zero
> + * there is a problem.
> + */
> + dev_err(dev,
> + "Invalid vkdata %x 0x%x 0x%llx\n",
> + i, vkdata[i].size, vkdata[i].address);
> + rc = -EINVAL;
> + } else {
> + /*
> + * If size and address are both zero
> + * don't convert, but return success.
> + */
> + rc = 0;
> + }
> +
> + if (rc)
> + goto fail_alloc;
> + }
> + return rc;
> +
> +fail_alloc:
> + while (i > 0) {
> + i--;
> + if (dma[i].sglist)
> + bcm_vk_dma_free(dev, &dma[i]);
> + }
> + return rc;
> +}
> +
> +static int bcm_vk_dma_free(struct device *dev, struct bcm_vk_dma *dma)
> +{
> + dma_addr_t addr;
> + int i;
> + int num_sg;
> + uint32_t size;
> + struct _vk_data *vkdata;
> +
> + dev_dbg(dev, "free sglist=%p sglen=0x%x\n",
> + dma->sglist, dma->sglen);
> +
This fits in 80 characters.
dev_dbg(dev, "free sglist=%p sglen=0x%x\n", dma->sglist, dma->sglen);
> + /* Unmap all pages in the sglist */
> + num_sg = dma->sglist[SGLIST_NUM_SG];
> + vkdata = (struct _vk_data *)&(dma->sglist[SGLIST_VKDATA_START]);
Extra parens.
> + for (i = 0; i < num_sg; i++) {
> + size = vkdata[i].size;
> + addr = get_unaligned(&(vkdata[i].address));
> +
> + dma_unmap_page(dev, addr, size, dma->direction);
> + }
> +
> + /* Free allocated sglist */
> + dma_free_coherent(dev, dma->sglen, dma->sglist, dma->handle);
> +
> + /* Release lock on all pages */
> + for (i = 0; i < dma->nr_pages; i++)
> + put_page(dma->pages[i]);
> +
> + /* Free allocated dma pages */
> + kfree(dma->pages);
> + dma->sglist = NULL;
> +
> + return 0;
> +}
> +
> +int bcm_vk_sg_free(struct device *dev, struct bcm_vk_dma *dma, int num)
> +{
> + int i;
> +
> + /* Unmap and free all pages and sglists */
> + for (i = 0; i < num; i++) {
> + if (dma[i].sglist)
> + bcm_vk_dma_free(dev, &dma[i]);
> + }
> +
> + return 0;
> +}
> diff --git a/drivers/misc/bcm-vk/bcm_vk_sg.h b/drivers/misc/bcm-vk/bcm_vk_sg.h
> new file mode 100644
> index 000000000000..df88154e80b7
> --- /dev/null
> +++ b/drivers/misc/bcm-vk/bcm_vk_sg.h
> @@ -0,0 +1,60 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright 2018-2020 Broadcom.
> + */
> +
> +#ifndef BCM_VK_SG_H
> +#define BCM_VK_SG_H
> +
> +#include <linux/dma-mapping.h>
> +
> +struct bcm_vk_dma {
> + /* for userland buffer */
> + struct page **pages;
> + int nr_pages;
> +
> + /* common */
> + dma_addr_t handle;
> + /*
> + * sglist is of the following LE format
> + * [U32] num_sg = number of sg addresses (N)
> + * [U32] totalsize = totalsize of data being transferred in sglist
> + * [U32] size[0] = size of data in address0
> + * [U32] addr_l[0] = lower 32-bits of address0
> + * [U32] addr_h[0] = higher 32-bits of address0
> + * ..
> + * [U32] size[N-1] = size of data in addressN-1
> + * [U32] addr_l[N-1] = lower 32-bits of addressN-1
> + * [U32] addr_h[N-1] = higher 32-bits of addressN-1
> + */
> + uint32_t *sglist;
> +#define SGLIST_NUM_SG 0
> +#define SGLIST_TOTALSIZE 1
> +#define SGLIST_VKDATA_START 2
> +
> + int sglen; /* Length (bytes) of sglist */
> + int direction;
> +};
> +
> +struct _vk_data {
> + uint32_t size; /* data size in bytes */
> + uint64_t address; /* Pointer to data */
> +} __packed;
> +
> +/*
> + * Scatter-gather DMA buffer API.
> + *
> + * These functions provide a simple way to create a page list and a
> + * scatter-gather list from userspace address and map the memory
> + * for DMA operation.
> + */
> +int bcm_vk_sg_alloc(struct device *dev,
> + struct bcm_vk_dma *dma,
> + int dir,
> + struct _vk_data *vkdata,
> + int num);
> +
> +int bcm_vk_sg_free(struct device *dev, struct bcm_vk_dma *dma, int num);
> +
> +#endif
> +
> diff --git a/drivers/misc/bcm-vk/bcm_vk_tty.c b/drivers/misc/bcm-vk/bcm_vk_tty.c
> new file mode 100644
> index 000000000000..3835218aaec6
> --- /dev/null
> +++ b/drivers/misc/bcm-vk/bcm_vk_tty.c
> @@ -0,0 +1,327 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright 2018-2020 Broadcom.
> + */
> +
> +#include <linux/tty.h>
> +#include <linux/tty_driver.h>
> +#include <linux/tty_flip.h>
> +
> +#include "bcm_vk.h"
> +
> +/* TTYVK base offset is 0x30000 into BAR1 */
> +#define BAR1_TTYVK_BASE_OFFSET 0x300000
> +/* Each TTYVK channel (TO or FROM) is 0x10000 */
> +#define BAR1_TTYVK_CHAN_OFFSET 0x100000
> +/* Each TTYVK channel has TO and FROM, hence the * 2 */
> +#define BAR1_TTYVK_BASE(index) (BAR1_TTYVK_BASE_OFFSET + \
> + ((index) * BAR1_TTYVK_CHAN_OFFSET * 2))
> +/* TO TTYVK channel base comes before FROM for each index */
> +#define TO_TTYK_BASE(index) BAR1_TTYVK_BASE(index)
> +#define FROM_TTYK_BASE(index) (BAR1_TTYVK_BASE(index) + \
> + BAR1_TTYVK_CHAN_OFFSET)
> +
> +struct bcm_vk_tty_chan {
> + uint32_t reserved;
> + uint32_t size;
> + uint32_t wr;
> + uint32_t rd;
> + uint32_t *data;
> +};
> +
> +#define VK_BAR_CHAN(v, DIR, e) ((v)->DIR##_offset \
> + + offsetof(struct bcm_vk_tty_chan, e))
> +#define VK_BAR_CHAN_SIZE(v, DIR) VK_BAR_CHAN(v, DIR, size)
> +#define VK_BAR_CHAN_WR(v, DIR) VK_BAR_CHAN(v, DIR, wr)
> +#define VK_BAR_CHAN_RD(v, DIR) VK_BAR_CHAN(v, DIR, rd)
> +#define VK_BAR_CHAN_DATA(v, DIR, off) (VK_BAR_CHAN(v, DIR, data) + off)
> +
> +/* Poll every 1/10 of second - temp hack till we use MSI interrupt */
> +#define SERIAL_TIMER_VALUE (HZ / 10)
> +
> +#if defined(BCM_VK_LEGACY_API)
> +
> +/* No support in legacy case, and do a dummy init and exit */
> +int bcm_vk_tty_init(struct bcm_vk *vk, char *name)
> +{
> + struct device *dev = &vk->pdev->dev;
> +
> + dev_dbg(dev, "init %s\n", name);
> + return 0;
> +}
> +
> +void bcm_vk_tty_exit(struct bcm_vk *vk)
> +{
> + struct device *dev = &vk->pdev->dev;
> +
> + dev_dbg(dev, "exit\n");
> +}
> +
> +#else
> +
> +static void bcm_vk_tty_poll(struct timer_list *t)
> +{
> + struct bcm_vk *vk = from_timer(vk, t, serial_timer);
> + struct bcm_vk_tty *vktty;
> + int card_status;
> + int ready_mask;
> + int count = 0;
> + unsigned char c;
> + int i;
> + int wr;
> +
> + card_status = vkread32(vk, BAR_0, BAR_CARD_STATUS);
> +
> + for (i = 0; i < BCM_VK_NUM_TTY; i++) {
> + /* Check the card status that the tty channel is ready */
> + ready_mask = BIT(i);
> + if ((card_status & ready_mask) == 0)
Get rid of ready_mask and use BIT(i) directly.
> + continue;
> +
> + vktty = &vk->tty[i];
> +
> + /* Fetch the wr offset in buffer from VK */
> + wr = vkread32(vk, BAR_1, VK_BAR_CHAN_WR(vktty, from));
> + if (wr >= vktty->from_size) {
> + dev_err(&vk->pdev->dev,
> + "ERROR: poll ttyVK%d wr:0x%x > 0x%x\n",
> + i, wr, vktty->from_size);
> + /* Need to signal and close device in this case */
> + return;
> + }
> +
> + /*
> + * Simple read of circular buffer and
> + * insert into tty flip buffer
> + */
> + while (vk->tty[i].rd != wr) {
> + c = vkread8(vk, BAR_1,
> + VK_BAR_CHAN_DATA(vktty, from, vktty->rd));
> + vktty->rd++;
> + if (vktty->rd >= vktty->from_size)
> + vktty->rd = 0;
> + tty_insert_flip_char(&vktty->port, c, TTY_NORMAL);
> + count++;
> + }
> + }
> +
> + if (count) {
> + tty_flip_buffer_push(&vktty->port);
> +
> + /* Update read offset from shadow register to card */
> + vkwrite32(vk, vktty->rd, BAR_1, VK_BAR_CHAN_RD(vktty, from));
> + }
> +
> + mod_timer(&vk->serial_timer, jiffies + SERIAL_TIMER_VALUE);
> +}
> +
> +static int bcm_vk_tty_open(struct tty_struct *tty, struct file *file)
> +{
> + int card_status;
> + int ready_mask;
> + struct bcm_vk *vk;
> + struct bcm_vk_tty *vktty;
> + int index;
> +
> + /* initialize the pointer in case something fails */
> + tty->driver_data = NULL;
> +
> + vk = (struct bcm_vk *)dev_get_drvdata(tty->dev);
> + index = tty->index;
> +
> + if (index >= BCM_VK_NUM_TTY)
> + return -EINVAL;
> +
> + vktty = &vk->tty[index];
> +
> + vktty->to_offset = TO_TTYK_BASE(index);
> + vktty->from_offset = FROM_TTYK_BASE(index);
> +
> + /* Do not allow tty device to be opened if tty on card not ready */
> + card_status = vkread32(vk, BAR_0, BAR_CARD_STATUS);
> + if (card_status == -1)
> + return -1;
> +
> + ready_mask = BIT(index);
> + if ((card_status & ready_mask) == 0)
> + return -1;
> +
> + /*
> + * Get shadow registers of the buffer sizes and the "to" write offset
> + * and "from" read offset
> + */
> + vktty->to_size = vkread32(vk, BAR_1, VK_BAR_CHAN_SIZE(vktty, to));
> + vktty->wr = vkread32(vk, BAR_1, VK_BAR_CHAN_WR(vktty, to));
> + vktty->from_size = vkread32(vk, BAR_1, VK_BAR_CHAN_SIZE(vktty, from));
> + vktty->rd = vkread32(vk, BAR_1, VK_BAR_CHAN_RD(vktty, from));
> +
> + if (tty->count == 1) {
> + timer_setup(&vk->serial_timer, bcm_vk_tty_poll, 0);
> + mod_timer(&vk->serial_timer, jiffies + SERIAL_TIMER_VALUE);
> + }
> + return 0;
> +}
> +
> +static void bcm_vk_tty_close(struct tty_struct *tty, struct file *file)
> +{
> + struct bcm_vk *vk;
> +
> + vk = (struct bcm_vk *)dev_get_drvdata(tty->dev);
struct bcm_vk *vk = dev_get_drvdata(tty->dev);
> +
> + if (tty->count == 1)
> + del_timer_sync(&vk->serial_timer);
> +}
> +
> +static int bcm_vk_tty_write(struct tty_struct *tty,
> + const unsigned char *buffer,
> + int count)
> +{
> + int index;
> + struct bcm_vk *vk;
> + struct bcm_vk_tty *vktty;
> + int i;
> + int retval;
> +
> + index = tty->index;
> + vk = (struct bcm_vk *)dev_get_drvdata(tty->dev);
The cast is unnecessary.
> + vktty = &vk->tty[index];
> +
> + /* Simple write each byte to circular buffer */
> + for (i = 0; i < count; i++) {
> + vkwrite8(vk,
> + buffer[i],
> + BAR_1,
> + VK_BAR_CHAN_DATA(vktty, to, vktty->wr));
vkwrite8(vk, buffer[i], BAR_1,
VK_BAR_CHAN_DATA(vktty, to, vktty->wr));
> + vktty->wr++;
> + if (vktty->wr >= vktty->to_size)
> + vktty->wr = 0;
> + }
> + /* Update write offset from shadow register to card */
> + /* TODO: Need to add write to doorbell here */
> + vkwrite32(vk, vktty->wr, BAR_1, VK_BAR_CHAN_WR(vktty, to));
> +
> + retval = count;
> +
> + return retval;
Delete the "retval" variable.
> +}
> +
> +static int bcm_vk_tty_write_room(struct tty_struct *tty)
> +{
> + int room;
> + struct bcm_vk *vk;
> + struct bcm_vk_tty *vktty;
> +
> + vk = (struct bcm_vk *)dev_get_drvdata(tty->dev);
Delete cast.
> + vktty = &vk->tty[tty->index];
> +
> + /*
> + * Calculate how much room is left in the device
> + * Just return the size -1 of buffer. We could care about
> + * overflow but don't at this point.
> + */
This comment raises more questions than it answers. Why would we care
about overflow and why don't we at this point? Perhaps just delete the
comment.
> + room = vktty->to_size - 1;
> +
> + return room;
Delete the room variable and the vktty variable.
return vk->tty[tty->index].to_size - 1;
> +}
> +
> +static const struct tty_operations serial_ops = {
> + .open = bcm_vk_tty_open,
> + .close = bcm_vk_tty_close,
> + .write = bcm_vk_tty_write,
> + .write_room = bcm_vk_tty_write_room,
> +};
> +
> +int bcm_vk_tty_init(struct bcm_vk *vk, char *name)
> +{
> + int i;
> + int err;
> + unsigned long flags;
> + struct tty_driver *tty_drv;
> + struct device *dev = &vk->pdev->dev;
> +
> + /* allocate the tty driver */
> + flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
> + tty_drv = tty_alloc_driver(BCM_VK_NUM_TTY, flags);
Delete the flags variable.
> +
Delete this blank line.
> + if (IS_ERR(tty_drv)) {
> + err = PTR_ERR(tty_drv);
> + goto err_exit;
return PTR_ERR(tty_drv);
> + }
> + /* Save struct tty_driver for uninstalling the device */
> + vk->tty_drv = tty_drv;
> +
> + /* initialize the tty driver */
> + tty_drv->driver_name = KBUILD_MODNAME;
> + tty_drv->name = kstrdup(name, GFP_KERNEL);
> + if (!tty_drv->name) {
> + err = -ENOMEM;
> + goto err_put_tty_driver;
> + }
> + tty_drv->type = TTY_DRIVER_TYPE_SERIAL;
> + tty_drv->subtype = SERIAL_TYPE_NORMAL;
> + tty_drv->init_termios = tty_std_termios;
> + tty_set_operations(tty_drv, &serial_ops);
> +
> + /* register the tty driver */
> + err = tty_register_driver(tty_drv);
> + if (err) {
> + dev_err(dev, "tty_register_driver failed\n");
> + goto err_kfree_tty_name;
> + }
> +
> + for (i = 0; i < BCM_VK_NUM_TTY; i++) {
> + struct device *tty_dev;
> +
> + tty_port_init(&vk->tty[i].port);
> + tty_dev = tty_port_register_device(&vk->tty[i].port,
> + tty_drv,
> + i,
> + dev);
tty_dev = tty_port_register_device(&vk->tty[i].port, tty_drv,
i, dev);
> + dev_set_drvdata(tty_dev, vk);
> +
> + if (IS_ERR(tty_dev)) {
Calling dev_set_drvdata() before checking for IS_ERR().
> + int j;
> +
> + for (j = 0; j < i; j++)
> + tty_port_unregister_device(&vk->tty[j].port,
> + tty_drv,
> + j);
> + goto err_tty_unregister_driver;
No error code.
> + }
if (IS_ERR(tty_dev)) {
err = PTR_ERR(tty_dev);
goto unwind;
}
> + }
> +
> + return 0;
> +
unwind:
while (--i >= 0)
tty_port_unregister_device(&vk->tty[i].port, tty_drv, i);
> +err_tty_unregister_driver:
> + tty_unregister_driver(tty_drv);
> +
> +err_kfree_tty_name:
> + kfree(tty_drv->name);
> + tty_drv->name = NULL;
> +
> +err_put_tty_driver:
> + put_tty_driver(tty_drv);
> +
> +err_exit:
> + return err;
> +}
> +
> +void bcm_vk_tty_exit(struct bcm_vk *vk)
> +{
> + int i;
> +
> + del_timer_sync(&vk->serial_timer);
> + for (i = 0; i < BCM_VK_NUM_TTY; ++i) {
> + tty_port_unregister_device(&vk->tty[i].port,
> + vk->tty_drv,
> + i);
> + tty_port_destroy(&vk->tty[i].port);
> + }
> + tty_unregister_driver(vk->tty_drv);
> + put_tty_driver(vk->tty_drv);
> +
> + kfree(vk->tty_drv->name);
^^^^^^^^^^^
Use after free because of the put_tty_driver(). I think test again
with CONFIG_PAGE_POISONING?
> + vk->tty_drv->name = NULL;
> +}
regards,
dan carpenter
^ permalink raw reply
* OpenBMC on AMD
From: Konstantin Klubnichkin @ 2020-02-20 10:45 UTC (permalink / raw)
To: openbmc
[-- Attachment #1: Type: text/html, Size: 499 bytes --]
^ permalink raw reply
* [linux-next:master 2342/3265] net/bluetooth/smp.c:2185 smp_cmd_pairing_random() error: uninitialized symbol 'passkey'.
From: Dan Carpenter @ 2020-02-20 10:46 UTC (permalink / raw)
To: kbuild
[-- Attachment #1: Type: text/plain, Size: 9694 bytes --]
tree: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
head: 1d7f85df0f9c0456520ae86dc597bca87980d253
commit: cee5f20fece32cd1722230cb05333f39db860698 [2342/3265] Bluetooth: secure bluetooth stack from bluedump attack
If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
smatch warnings:
net/bluetooth/smp.c:2185 smp_cmd_pairing_random() error: uninitialized symbol 'passkey'.
# https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=cee5f20fece32cd1722230cb05333f39db860698
git remote add linux-next https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
git remote update linux-next
git checkout cee5f20fece32cd1722230cb05333f39db860698
vim +/passkey +2185 net/bluetooth/smp.c
da85e5e5afeb72 Vinicius Costa Gomes 2011-06-09 2113 static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb)
88ba43b662b6b9 Anderson Briglia 2011-06-09 2114 {
5d88cc73dded31 Johan Hedberg 2014-08-08 2115 struct l2cap_chan *chan = conn->smp;
5d88cc73dded31 Johan Hedberg 2014-08-08 2116 struct smp_chan *smp = chan->data;
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2117 struct hci_conn *hcon = conn->hcon;
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2118 u8 *pkax, *pkbx, *na, *nb;
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2119 u32 passkey;
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2120 int err;
7d24ddcc1140d2 Anderson Briglia 2011-06-09 2121
8aab47574a7f5b Vinicius Costa Gomes 2011-09-05 2122 BT_DBG("conn %p", conn);
7d24ddcc1140d2 Anderson Briglia 2011-06-09 2123
c46b98bea5691c Johan Hedberg 2014-02-18 2124 if (skb->len < sizeof(smp->rrnd))
38e4a915663f3f Johan Hedberg 2014-05-08 2125 return SMP_INVALID_PARAMS;
c46b98bea5691c Johan Hedberg 2014-02-18 2126
943a732ab6440f Johan Hedberg 2014-03-18 2127 memcpy(smp->rrnd, skb->data, sizeof(smp->rrnd));
8aab47574a7f5b Vinicius Costa Gomes 2011-09-05 2128 skb_pull(skb, sizeof(smp->rrnd));
3158c50c33c1ac Vinicius Costa Gomes 2011-06-14 2129
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2130 if (!test_bit(SMP_FLAG_SC, &smp->flags))
861580a970f1ab Johan Hedberg 2014-05-20 2131 return smp_random(smp);
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2132
580039e838a7ef Johan Hedberg 2014-12-03 2133 if (hcon->out) {
580039e838a7ef Johan Hedberg 2014-12-03 2134 pkax = smp->local_pk;
580039e838a7ef Johan Hedberg 2014-12-03 2135 pkbx = smp->remote_pk;
580039e838a7ef Johan Hedberg 2014-12-03 2136 na = smp->prnd;
580039e838a7ef Johan Hedberg 2014-12-03 2137 nb = smp->rrnd;
580039e838a7ef Johan Hedberg 2014-12-03 2138 } else {
580039e838a7ef Johan Hedberg 2014-12-03 2139 pkax = smp->remote_pk;
580039e838a7ef Johan Hedberg 2014-12-03 2140 pkbx = smp->local_pk;
580039e838a7ef Johan Hedberg 2014-12-03 2141 na = smp->rrnd;
580039e838a7ef Johan Hedberg 2014-12-03 2142 nb = smp->prnd;
580039e838a7ef Johan Hedberg 2014-12-03 2143 }
580039e838a7ef Johan Hedberg 2014-12-03 2144
a29b073351ffdd Johan Hedberg 2014-10-28 2145 if (smp->method == REQ_OOB) {
a29b073351ffdd Johan Hedberg 2014-10-28 2146 if (!hcon->out)
a29b073351ffdd Johan Hedberg 2014-10-28 2147 smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM,
a29b073351ffdd Johan Hedberg 2014-10-28 2148 sizeof(smp->prnd), smp->prnd);
a29b073351ffdd Johan Hedberg 2014-10-28 2149 SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK);
a29b073351ffdd Johan Hedberg 2014-10-28 2150 goto mackey_and_ltk;
a29b073351ffdd Johan Hedberg 2014-10-28 2151 }
a29b073351ffdd Johan Hedberg 2014-10-28 2152
38606f1418cc9c Johan Hedberg 2014-06-25 2153 /* Passkey entry has special treatment */
38606f1418cc9c Johan Hedberg 2014-06-25 2154 if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY)
38606f1418cc9c Johan Hedberg 2014-06-25 2155 return sc_passkey_round(smp, SMP_CMD_PAIRING_RANDOM);
38606f1418cc9c Johan Hedberg 2014-06-25 2156
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2157 if (hcon->out) {
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2158 u8 cfm[16];
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2159
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2160 err = smp_f4(smp->tfm_cmac, smp->remote_pk, smp->local_pk,
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2161 smp->rrnd, 0, cfm);
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2162 if (err)
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2163 return SMP_UNSPECIFIED;
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2164
329d82309824ff Jason A. Donenfeld 2017-06-10 2165 if (crypto_memneq(smp->pcnf, cfm, 16))
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2166 return SMP_CONFIRM_FAILED;
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2167 } else {
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2168 smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd),
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2169 smp->prnd);
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2170 SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK);
cee5f20fece32c Howard Chung 2020-02-14 2171
cee5f20fece32c Howard Chung 2020-02-14 2172 /* Only Just-Works pairing requires extra checks */
cee5f20fece32c Howard Chung 2020-02-14 2173 if (smp->method != JUST_WORKS)
cee5f20fece32c Howard Chung 2020-02-14 2174 goto mackey_and_ltk;
cee5f20fece32c Howard Chung 2020-02-14 2175
cee5f20fece32c Howard Chung 2020-02-14 2176 /* If there already exists long term key in local host, leave
cee5f20fece32c Howard Chung 2020-02-14 2177 * the decision to user space since the remote device could
cee5f20fece32c Howard Chung 2020-02-14 2178 * be legitimate or malicious.
cee5f20fece32c Howard Chung 2020-02-14 2179 */
cee5f20fece32c Howard Chung 2020-02-14 2180 if (hci_find_ltk(hcon->hdev, &hcon->dst, hcon->dst_type,
cee5f20fece32c Howard Chung 2020-02-14 2181 hcon->role)) {
cee5f20fece32c Howard Chung 2020-02-14 2182 err = mgmt_user_confirm_request(hcon->hdev, &hcon->dst,
cee5f20fece32c Howard Chung 2020-02-14 2183 hcon->type,
cee5f20fece32c Howard Chung 2020-02-14 2184 hcon->dst_type,
cee5f20fece32c Howard Chung 2020-02-14 @2185 passkey, 1);
^^^^^^^
Uninitialized until later in the function.
cee5f20fece32c Howard Chung 2020-02-14 2186 if (err)
cee5f20fece32c Howard Chung 2020-02-14 2187 return SMP_UNSPECIFIED;
cee5f20fece32c Howard Chung 2020-02-14 2188 set_bit(SMP_FLAG_WAIT_USER, &smp->flags);
cee5f20fece32c Howard Chung 2020-02-14 2189 }
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2190 }
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2191
a29b073351ffdd Johan Hedberg 2014-10-28 2192 mackey_and_ltk:
760b018b6cf08e Johan Hedberg 2014-06-06 2193 /* Generate MacKey and LTK */
760b018b6cf08e Johan Hedberg 2014-06-06 2194 err = sc_mackey_and_ltk(smp, smp->mackey, smp->tk);
760b018b6cf08e Johan Hedberg 2014-06-06 2195 if (err)
760b018b6cf08e Johan Hedberg 2014-06-06 2196 return SMP_UNSPECIFIED;
760b018b6cf08e Johan Hedberg 2014-06-06 2197
a29b073351ffdd Johan Hedberg 2014-10-28 2198 if (smp->method == JUST_WORKS || smp->method == REQ_OOB) {
dddd3059e3bdd0 Johan Hedberg 2014-06-01 2199 if (hcon->out) {
38606f1418cc9c Johan Hedberg 2014-06-25 2200 sc_dhkey_check(smp);
dddd3059e3bdd0 Johan Hedberg 2014-06-01 2201 SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK);
dddd3059e3bdd0 Johan Hedberg 2014-06-01 2202 }
dddd3059e3bdd0 Johan Hedberg 2014-06-01 2203 return 0;
dddd3059e3bdd0 Johan Hedberg 2014-06-01 2204 }
dddd3059e3bdd0 Johan Hedberg 2014-06-01 2205
38606f1418cc9c Johan Hedberg 2014-06-25 2206 err = smp_g2(smp->tfm_cmac, pkax, pkbx, na, nb, &passkey);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
38606f1418cc9c Johan Hedberg 2014-06-25 2207 if (err)
38606f1418cc9c Johan Hedberg 2014-06-25 2208 return SMP_UNSPECIFIED;
38606f1418cc9c Johan Hedberg 2014-06-25 2209
38606f1418cc9c Johan Hedberg 2014-06-25 2210 err = mgmt_user_confirm_request(hcon->hdev, &hcon->dst, hcon->type,
38606f1418cc9c Johan Hedberg 2014-06-25 2211 hcon->dst_type, passkey, 0);
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2212 if (err)
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2213 return SMP_UNSPECIFIED;
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2214
38606f1418cc9c Johan Hedberg 2014-06-25 2215 set_bit(SMP_FLAG_WAIT_USER, &smp->flags);
38606f1418cc9c Johan Hedberg 2014-06-25 2216
191dc7fe2d3a8d Johan Hedberg 2014-06-06 2217 return 0;
88ba43b662b6b9 Anderson Briglia 2011-06-09 2218 }
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.