* [PATCH v1 1/6] mm: Section numbers use the type "unsigned long"
From: David Hildenbrand @ 2019-06-14 10:01 UTC (permalink / raw)
To: linux-kernel
Cc: Stephen Rothwell, Michal Hocko, Pavel Tatashin, Baoquan He,
David Hildenbrand, Greg Kroah-Hartman, Rafael J. Wysocki,
Mel Gorman, Wei Yang, linux-mm, linux-acpi, Mike Rapoport,
Arun KS, Johannes Weiner, Dan Williams, linuxppc-dev,
Andrew Morton, Vlastimil Babka, Oscar Salvador
In-Reply-To: <20190614100114.311-1-david@redhat.com>
We are using a mixture of "int" and "unsigned long". Let's make this
consistent by using "unsigned long" everywhere. We'll do the same with
memory block ids next.
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Arun KS <arunks@codeaurora.org>
Cc: Pavel Tatashin <pasha.tatashin@oracle.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
drivers/base/memory.c | 9 +++++----
include/linux/mmzone.h | 4 ++--
mm/sparse.c | 12 ++++++------
3 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 826dd76f662e..5b3a2fd250ba 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -34,7 +34,7 @@ static DEFINE_MUTEX(mem_sysfs_mutex);
static int sections_per_block;
-static inline int base_memory_block_id(int section_nr)
+static inline int base_memory_block_id(unsigned long section_nr)
{
return section_nr / sections_per_block;
}
@@ -691,10 +691,11 @@ static int init_memory_block(struct memory_block **memory, int block_id,
return ret;
}
-static int add_memory_block(int base_section_nr)
+static int add_memory_block(unsigned long base_section_nr)
{
+ int ret, section_count = 0;
struct memory_block *mem;
- int i, ret, section_count = 0;
+ unsigned long i;
for (i = base_section_nr;
i < base_section_nr + sections_per_block;
@@ -822,7 +823,7 @@ static const struct attribute_group *memory_root_attr_groups[] = {
*/
int __init memory_dev_init(void)
{
- unsigned int i;
+ unsigned long i;
int ret;
int err;
unsigned long block_sz;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 427b79c39b3c..83b6aae16f13 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1220,7 +1220,7 @@ static inline struct mem_section *__nr_to_section(unsigned long nr)
return NULL;
return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
}
-extern int __section_nr(struct mem_section* ms);
+extern unsigned long __section_nr(struct mem_section *ms);
extern unsigned long usemap_size(void);
/*
@@ -1292,7 +1292,7 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn)
return __nr_to_section(pfn_to_section_nr(pfn));
}
-extern int __highest_present_section_nr;
+extern unsigned long __highest_present_section_nr;
#ifndef CONFIG_HAVE_ARCH_PFN_VALID
static inline int pfn_valid(unsigned long pfn)
diff --git a/mm/sparse.c b/mm/sparse.c
index 1552c855d62a..e8c57e039be8 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -102,7 +102,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
#endif
#ifdef CONFIG_SPARSEMEM_EXTREME
-int __section_nr(struct mem_section* ms)
+unsigned long __section_nr(struct mem_section *ms)
{
unsigned long root_nr;
struct mem_section *root = NULL;
@@ -121,9 +121,9 @@ int __section_nr(struct mem_section* ms)
return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
}
#else
-int __section_nr(struct mem_section* ms)
+unsigned long __section_nr(struct mem_section *ms)
{
- return (int)(ms - mem_section[0]);
+ return (unsigned long)(ms - mem_section[0]);
}
#endif
@@ -178,10 +178,10 @@ void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
* Keeping track of this gives us an easy way to break out of
* those loops early.
*/
-int __highest_present_section_nr;
+unsigned long __highest_present_section_nr;
static void section_mark_present(struct mem_section *ms)
{
- int section_nr = __section_nr(ms);
+ unsigned long section_nr = __section_nr(ms);
if (section_nr > __highest_present_section_nr)
__highest_present_section_nr = section_nr;
@@ -189,7 +189,7 @@ static void section_mark_present(struct mem_section *ms)
ms->section_mem_map |= SECTION_MARKED_PRESENT;
}
-static inline int next_present_section_nr(int section_nr)
+static inline unsigned long next_present_section_nr(unsigned long section_nr)
{
do {
section_nr++;
--
2.21.0
^ permalink raw reply related
* [PATCH v1 0/6] mm: Further memory block device cleanups
From: David Hildenbrand @ 2019-06-14 10:01 UTC (permalink / raw)
To: linux-kernel
Cc: Oscar Salvador, Michal Hocko, David Hildenbrand, Wei Yang,
Keith Busch, linux-mm, Arun KS, Rashmica Gupta, Thomas Gleixner,
Stephen Rothwell, Michael Neuling, Baoquan He, Rafael J. Wysocki,
Pavel Tatashin, linux-acpi, Len Brown, Pavel Tatashin,
Pavel Tatashin, Anshuman Khandual, mike.travis@hpe.com,
linuxppc-dev, Mike Rapoport, Qian Cai, Dan Williams,
Vlastimil Babka, Oscar Salvador, Juergen Gross, Andrew Banman,
Greg Kroah-Hartman, Rafael J. Wysocki, Johannes Weiner,
Paul Mackerras, Andrew Morton, Mel Gorman
Some further cleanups around memory block devices. Especially, clean up
and simplify walk_memory_range(). Including some other minor cleanups.
Based on: linux-next
Minor conflict with Dan's subsection hot-add series.
Compiled + tested on x86 with DIMMs under QEMU.
David Hildenbrand (6):
mm: Section numbers use the type "unsigned long"
drivers/base/memory: Use "unsigned long" for block ids
mm: Make register_mem_sect_under_node() static
mm/memory_hotplug: Rename walk_memory_range() and pass start+size
instead of pfns
mm/memory_hotplug: Move and simplify walk_memory_blocks()
drivers/base/memory.c: Get rid of find_memory_block_hinted()
arch/powerpc/platforms/powernv/memtrace.c | 22 +++---
drivers/acpi/acpi_memhotplug.c | 19 ++----
drivers/base/memory.c | 81 +++++++++++++++++------
drivers/base/node.c | 8 ++-
include/linux/memory.h | 5 +-
include/linux/memory_hotplug.h | 2 -
include/linux/mmzone.h | 4 +-
include/linux/node.h | 7 --
mm/memory_hotplug.c | 57 +---------------
mm/sparse.c | 12 ++--
10 files changed, 92 insertions(+), 125 deletions(-)
--
2.21.0
^ permalink raw reply
* [PATCH v1 2/6] drivers/base/memory: Use "unsigned long" for block ids
From: David Hildenbrand @ 2019-06-14 10:01 UTC (permalink / raw)
To: linux-kernel
Cc: David Hildenbrand, Greg Kroah-Hartman, Rafael J. Wysocki,
linux-mm, linux-acpi, Dan Williams, linuxppc-dev, Andrew Morton
In-Reply-To: <20190614100114.311-1-david@redhat.com>
Block ids are just shifted section numbers, so let's also use
"unsigned long" for them, too.
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
drivers/base/memory.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 5b3a2fd250ba..3ed08e67e64f 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -34,12 +34,12 @@ static DEFINE_MUTEX(mem_sysfs_mutex);
static int sections_per_block;
-static inline int base_memory_block_id(unsigned long section_nr)
+static inline unsigned long base_memory_block_id(unsigned long section_nr)
{
return section_nr / sections_per_block;
}
-static inline int pfn_to_block_id(unsigned long pfn)
+static inline unsigned long pfn_to_block_id(unsigned long pfn)
{
return base_memory_block_id(pfn_to_section_nr(pfn));
}
@@ -587,7 +587,7 @@ int __weak arch_get_memory_phys_device(unsigned long start_pfn)
* A reference for the returned object is held and the reference for the
* hinted object is released.
*/
-static struct memory_block *find_memory_block_by_id(int block_id,
+static struct memory_block *find_memory_block_by_id(unsigned long block_id,
struct memory_block *hint)
{
struct device *hintdev = hint ? &hint->dev : NULL;
@@ -604,7 +604,7 @@ static struct memory_block *find_memory_block_by_id(int block_id,
struct memory_block *find_memory_block_hinted(struct mem_section *section,
struct memory_block *hint)
{
- int block_id = base_memory_block_id(__section_nr(section));
+ unsigned long block_id = base_memory_block_id(__section_nr(section));
return find_memory_block_by_id(block_id, hint);
}
@@ -663,8 +663,8 @@ int register_memory(struct memory_block *memory)
return ret;
}
-static int init_memory_block(struct memory_block **memory, int block_id,
- unsigned long state)
+static int init_memory_block(struct memory_block **memory,
+ unsigned long block_id, unsigned long state)
{
struct memory_block *mem;
unsigned long start_pfn;
@@ -730,8 +730,8 @@ static void unregister_memory(struct memory_block *memory)
*/
int create_memory_block_devices(unsigned long start, unsigned long size)
{
- const int start_block_id = pfn_to_block_id(PFN_DOWN(start));
- int end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
+ const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
+ unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
struct memory_block *mem;
unsigned long block_id;
int ret = 0;
@@ -767,10 +767,10 @@ int create_memory_block_devices(unsigned long start, unsigned long size)
*/
void remove_memory_block_devices(unsigned long start, unsigned long size)
{
- const int start_block_id = pfn_to_block_id(PFN_DOWN(start));
- const int end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
+ const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
+ const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
struct memory_block *mem;
- int block_id;
+ unsigned long block_id;
if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
!IS_ALIGNED(size, memory_block_size_bytes())))
--
2.21.0
^ permalink raw reply related
* [PATCH v1 4/6] mm/memory_hotplug: Rename walk_memory_range() and pass start+size instead of pfns
From: David Hildenbrand @ 2019-06-14 10:01 UTC (permalink / raw)
To: linux-kernel
Cc: Michal Hocko, David Hildenbrand, Wei Yang, linux-mm,
Paul Mackerras, Rashmica Gupta, Dan Williams, Michael Neuling,
linux-acpi, Len Brown, Pavel Tatashin, Anshuman Khandual,
Qian Cai, Thomas Gleixner, Oscar Salvador, Juergen Gross,
Greg Kroah-Hartman, Rafael J. Wysocki, Arun KS, Andrew Morton,
linuxppc-dev
In-Reply-To: <20190614100114.311-1-david@redhat.com>
walk_memory_range() was once used to iterate over sections. Now, it
iterates over memory blocks. Rename the function, fixup the
documentation. Also, pass start+size instead of PFNs, which is what most
callers already have at hand. (we'll rework link_mem_sections() most
probably soon)
Follow-up patches wil rework, simplify, and move walk_memory_blocks() to
drivers/base/memory.c.
Note: walk_memory_blocks() only works correctly right now if the
start_pfn is aligned to a section start. This is the case right now,
but we'll generalize the function in a follow up patch so the semantics
match the documentation.
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Len Brown <lenb@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Pavel Tatashin <pavel.tatashin@microsoft.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Qian Cai <cai@lca.pw>
Cc: Arun KS <arunks@codeaurora.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
arch/powerpc/platforms/powernv/memtrace.c | 22 ++++++++++-----------
drivers/acpi/acpi_memhotplug.c | 19 ++++--------------
drivers/base/node.c | 5 +++--
include/linux/memory_hotplug.h | 2 +-
mm/memory_hotplug.c | 24 ++++++++++++-----------
5 files changed, 32 insertions(+), 40 deletions(-)
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 5e53c1392d3b..8c82c041afe6 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -70,23 +70,24 @@ static int change_memblock_state(struct memory_block *mem, void *arg)
/* called with device_hotplug_lock held */
static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
{
+ const unsigned long start = PFN_PHYS(start_pfn);
+ const unsigned long size = PFN_PHYS(nr_pages);
u64 end_pfn = start_pfn + nr_pages - 1;
- if (walk_memory_range(start_pfn, end_pfn, NULL,
- check_memblock_online))
+ if (walk_memory_blocks(start, size, NULL, check_memblock_online))
return false;
- walk_memory_range(start_pfn, end_pfn, (void *)MEM_GOING_OFFLINE,
- change_memblock_state);
+ walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE,
+ change_memblock_state);
if (offline_pages(start_pfn, nr_pages)) {
- walk_memory_range(start_pfn, end_pfn, (void *)MEM_ONLINE,
- change_memblock_state);
+ walk_memory_blocks(start, size, (void *)MEM_ONLINE,
+ change_memblock_state);
return false;
}
- walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
- change_memblock_state);
+ walk_memory_blocks(start, size, (void *)MEM_OFFLINE,
+ change_memblock_state);
return true;
@@ -242,9 +243,8 @@ static int memtrace_online(void)
*/
if (!memhp_auto_online) {
lock_device_hotplug();
- walk_memory_range(PFN_DOWN(ent->start),
- PFN_UP(ent->start + ent->size - 1),
- NULL, online_mem_block);
+ walk_memory_blocks(ent->start, ent->size, NULL,
+ online_mem_block);
unlock_device_hotplug();
}
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index db013dc21c02..e294f44a7850 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -155,16 +155,6 @@ static int acpi_memory_check_device(struct acpi_memory_device *mem_device)
return 0;
}
-static unsigned long acpi_meminfo_start_pfn(struct acpi_memory_info *info)
-{
- return PFN_DOWN(info->start_addr);
-}
-
-static unsigned long acpi_meminfo_end_pfn(struct acpi_memory_info *info)
-{
- return PFN_UP(info->start_addr + info->length-1);
-}
-
static int acpi_bind_memblk(struct memory_block *mem, void *arg)
{
return acpi_bind_one(&mem->dev, arg);
@@ -173,9 +163,8 @@ static int acpi_bind_memblk(struct memory_block *mem, void *arg)
static int acpi_bind_memory_blocks(struct acpi_memory_info *info,
struct acpi_device *adev)
{
- return walk_memory_range(acpi_meminfo_start_pfn(info),
- acpi_meminfo_end_pfn(info), adev,
- acpi_bind_memblk);
+ return walk_memory_blocks(info->start_addr, info->length, adev,
+ acpi_bind_memblk);
}
static int acpi_unbind_memblk(struct memory_block *mem, void *arg)
@@ -186,8 +175,8 @@ static int acpi_unbind_memblk(struct memory_block *mem, void *arg)
static void acpi_unbind_memory_blocks(struct acpi_memory_info *info)
{
- walk_memory_range(acpi_meminfo_start_pfn(info),
- acpi_meminfo_end_pfn(info), NULL, acpi_unbind_memblk);
+ walk_memory_blocks(info->start_addr, info->length, NULL,
+ acpi_unbind_memblk);
}
static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
diff --git a/drivers/base/node.c b/drivers/base/node.c
index e6364e3e3e31..d8c02e65df68 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -833,8 +833,9 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn)
{
- return walk_memory_range(start_pfn, end_pfn, (void *)&nid,
- register_mem_sect_under_node);
+ return walk_memory_blocks(PFN_PHYS(start_pfn),
+ PFN_PHYS(end_pfn - start_pfn), (void *)&nid,
+ register_mem_sect_under_node);
}
#ifdef CONFIG_HUGETLBFS
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 79e0add6a597..d9fffc34949f 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -340,7 +340,7 @@ static inline void __remove_memory(int nid, u64 start, u64 size) {}
#endif /* CONFIG_MEMORY_HOTREMOVE */
extern void __ref free_area_init_core_hotplug(int nid);
-extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
+extern int walk_memory_blocks(unsigned long start, unsigned long size,
void *arg, int (*func)(struct memory_block *, void *));
extern int __add_memory(int nid, u64 start, u64 size);
extern int add_memory(int nid, u64 start, u64 size);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a88c5f334e5a..122a7d31efdd 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1126,8 +1126,7 @@ int __ref add_memory_resource(int nid, struct resource *res)
/* online pages if requested */
if (memhp_auto_online)
- walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1),
- NULL, online_memory_block);
+ walk_memory_blocks(start, size, NULL, online_memory_block);
return ret;
error:
@@ -1665,20 +1664,24 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
#endif /* CONFIG_MEMORY_HOTREMOVE */
/**
- * walk_memory_range - walks through all mem sections in [start_pfn, end_pfn)
- * @start_pfn: start pfn of the memory range
- * @end_pfn: end pfn of the memory range
+ * walk_memory_blocks - walk through all present memory blocks overlapped
+ * by the range [start, start + size)
+ *
+ * @start: start address of the memory range
+ * @size: size of the memory range
* @arg: argument passed to func
- * @func: callback for each memory section walked
+ * @func: callback for each memory block walked
*
- * This function walks through all present mem sections in range
- * [start_pfn, end_pfn) and call func on each mem section.
+ * This function walks through all present memory blocks overlapped by the
+ * range [start, start + size), calling func on each memory block.
*
* Returns the return value of func.
*/
-int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
+int walk_memory_blocks(unsigned long start, unsigned long size,
void *arg, int (*func)(struct memory_block *, void *))
{
+ const unsigned long start_pfn = PFN_DOWN(start);
+ const unsigned long end_pfn = PFN_UP(start + size - 1);
struct memory_block *mem = NULL;
struct mem_section *section;
unsigned long pfn, section_nr;
@@ -1824,8 +1827,7 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size)
* whether all memory blocks in question are offline and return error
* if this is not the case.
*/
- rc = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
- check_memblock_offlined_cb);
+ rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb);
if (rc)
goto done;
--
2.21.0
^ permalink raw reply related
* [PATCH v1 3/6] mm: Make register_mem_sect_under_node() static
From: David Hildenbrand @ 2019-06-14 10:01 UTC (permalink / raw)
To: linux-kernel
Cc: Keith Busch, David Hildenbrand, Greg Kroah-Hartman,
Rafael J. Wysocki, linux-mm, linux-acpi, Dan Williams,
linuxppc-dev, Andrew Morton, Oscar Salvador
In-Reply-To: <20190614100114.311-1-david@redhat.com>
It is only used internally.
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Oscar Salvador <osalvador@suse.de>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
drivers/base/node.c | 3 ++-
include/linux/node.h | 7 -------
2 files changed, 2 insertions(+), 8 deletions(-)
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 9be88fd05147..e6364e3e3e31 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -752,7 +752,8 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
}
/* register memory section under specified node if it spans that node */
-int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg)
+static int register_mem_sect_under_node(struct memory_block *mem_blk,
+ void *arg)
{
int ret, nid = *(int *)arg;
unsigned long pfn, sect_start_pfn, sect_end_pfn;
diff --git a/include/linux/node.h b/include/linux/node.h
index 548c226966a2..4866f32a02d8 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -137,8 +137,6 @@ static inline int register_one_node(int nid)
extern void unregister_one_node(int nid);
extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
-extern int register_mem_sect_under_node(struct memory_block *mem_blk,
- void *arg);
extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
extern int register_memory_node_under_compute_node(unsigned int mem_nid,
@@ -170,11 +168,6 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
{
return 0;
}
-static inline int register_mem_sect_under_node(struct memory_block *mem_blk,
- void *arg)
-{
- return 0;
-}
static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
{
}
--
2.21.0
^ permalink raw reply related
* [PATCH v1 5/6] mm/memory_hotplug: Move and simplify walk_memory_blocks()
From: David Hildenbrand @ 2019-06-14 10:01 UTC (permalink / raw)
To: linux-kernel
Cc: Oscar Salvador, Stephen Rothwell, Michal Hocko, Pavel Tatashin,
David Hildenbrand, mike.travis@hpe.com, Greg Kroah-Hartman,
Rafael J. Wysocki, Wei Yang, linux-mm, linux-acpi, Andrew Banman,
Arun KS, Qian Cai, Dan Williams, linuxppc-dev, Andrew Morton
In-Reply-To: <20190614100114.311-1-david@redhat.com>
Let's move walk_memory_blocks() to the place where memory block logic
resides and simplify it. While at it, add a type for the callback function.
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Andrew Banman <andrew.banman@hpe.com>
Cc: "mike.travis@hpe.com" <mike.travis@hpe.com>
Cc: Oscar Salvador <osalvador@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Arun KS <arunks@codeaurora.org>
Cc: Qian Cai <cai@lca.pw>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
drivers/base/memory.c | 42 ++++++++++++++++++++++++++
include/linux/memory.h | 3 ++
include/linux/memory_hotplug.h | 2 --
mm/memory_hotplug.c | 55 ----------------------------------
4 files changed, 45 insertions(+), 57 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 3ed08e67e64f..4f2e2f3b3d78 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -44,6 +44,11 @@ static inline unsigned long pfn_to_block_id(unsigned long pfn)
return base_memory_block_id(pfn_to_section_nr(pfn));
}
+static inline unsigned long phys_to_block_id(unsigned long phys)
+{
+ return pfn_to_block_id(PFN_DOWN(phys));
+}
+
static int memory_subsys_online(struct device *dev);
static int memory_subsys_offline(struct device *dev);
@@ -853,3 +858,40 @@ int __init memory_dev_init(void)
printk(KERN_ERR "%s() failed: %d\n", __func__, ret);
return ret;
}
+
+/**
+ * walk_memory_blocks - walk through all present memory blocks overlapped
+ * by the range [start, start + size)
+ *
+ * @start: start address of the memory range
+ * @size: size of the memory range
+ * @arg: argument passed to func
+ * @func: callback for each memory section walked
+ *
+ * This function walks through all present memory blocks overlapped by the
+ * range [start, start + size), calling func on each memory block.
+ *
+ * In case func() returns an error, walking is aborted and the error is
+ * returned.
+ */
+int walk_memory_blocks(unsigned long start, unsigned long size,
+ void *arg, walk_memory_blocks_func_t func)
+{
+ const unsigned long start_block_id = phys_to_block_id(start);
+ const unsigned long end_block_id = phys_to_block_id(start + size - 1);
+ struct memory_block *mem;
+ unsigned long block_id;
+ int ret = 0;
+
+ for (block_id = start_block_id; block_id <= end_block_id; block_id++) {
+ mem = find_memory_block_by_id(block_id, NULL);
+ if (!mem)
+ continue;
+
+ ret = func(mem, arg);
+ put_device(&mem->dev);
+ if (ret)
+ break;
+ }
+ return ret;
+}
diff --git a/include/linux/memory.h b/include/linux/memory.h
index f26a5417ec5d..b3b388775a30 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -119,6 +119,9 @@ extern int memory_isolate_notify(unsigned long val, void *v);
extern struct memory_block *find_memory_block_hinted(struct mem_section *,
struct memory_block *);
extern struct memory_block *find_memory_block(struct mem_section *);
+typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
+extern int walk_memory_blocks(unsigned long start, unsigned long size,
+ void *arg, walk_memory_blocks_func_t func);
#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index d9fffc34949f..475aff8efbf8 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -340,8 +340,6 @@ static inline void __remove_memory(int nid, u64 start, u64 size) {}
#endif /* CONFIG_MEMORY_HOTREMOVE */
extern void __ref free_area_init_core_hotplug(int nid);
-extern int walk_memory_blocks(unsigned long start, unsigned long size,
- void *arg, int (*func)(struct memory_block *, void *));
extern int __add_memory(int nid, u64 start, u64 size);
extern int add_memory(int nid, u64 start, u64 size);
extern int add_memory_resource(int nid, struct resource *resource);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 122a7d31efdd..fc558e9ff939 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1661,62 +1661,7 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
{
return __offline_pages(start_pfn, start_pfn + nr_pages);
}
-#endif /* CONFIG_MEMORY_HOTREMOVE */
-/**
- * walk_memory_blocks - walk through all present memory blocks overlapped
- * by the range [start, start + size)
- *
- * @start: start address of the memory range
- * @size: size of the memory range
- * @arg: argument passed to func
- * @func: callback for each memory block walked
- *
- * This function walks through all present memory blocks overlapped by the
- * range [start, start + size), calling func on each memory block.
- *
- * Returns the return value of func.
- */
-int walk_memory_blocks(unsigned long start, unsigned long size,
- void *arg, int (*func)(struct memory_block *, void *))
-{
- const unsigned long start_pfn = PFN_DOWN(start);
- const unsigned long end_pfn = PFN_UP(start + size - 1);
- struct memory_block *mem = NULL;
- struct mem_section *section;
- unsigned long pfn, section_nr;
- int ret;
-
- for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
- section_nr = pfn_to_section_nr(pfn);
- if (!present_section_nr(section_nr))
- continue;
-
- section = __nr_to_section(section_nr);
- /* same memblock? */
- if (mem)
- if ((section_nr >= mem->start_section_nr) &&
- (section_nr <= mem->end_section_nr))
- continue;
-
- mem = find_memory_block_hinted(section, mem);
- if (!mem)
- continue;
-
- ret = func(mem, arg);
- if (ret) {
- kobject_put(&mem->dev.kobj);
- return ret;
- }
- }
-
- if (mem)
- kobject_put(&mem->dev.kobj);
-
- return 0;
-}
-
-#ifdef CONFIG_MEMORY_HOTREMOVE
static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
{
int ret = !is_memblock_offlined(mem);
--
2.21.0
^ permalink raw reply related
* [PATCH v1 6/6] drivers/base/memory.c: Get rid of find_memory_block_hinted()
From: David Hildenbrand @ 2019-06-14 10:01 UTC (permalink / raw)
To: linux-kernel
Cc: Stephen Rothwell, Pavel Tatashin, David Hildenbrand,
mike.travis@hpe.com, Greg Kroah-Hartman, Rafael J. Wysocki,
linux-mm, linux-acpi, Dan Williams, linuxppc-dev, Andrew Morton
In-Reply-To: <20190614100114.311-1-david@redhat.com>
No longer needed, let's remove it.
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: "mike.travis@hpe.com" <mike.travis@hpe.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
drivers/base/memory.c | 12 +++---------
include/linux/memory.h | 2 --
2 files changed, 3 insertions(+), 11 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 4f2e2f3b3d78..42e5a7493fe8 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -606,14 +606,6 @@ static struct memory_block *find_memory_block_by_id(unsigned long block_id,
return to_memory_block(dev);
}
-struct memory_block *find_memory_block_hinted(struct mem_section *section,
- struct memory_block *hint)
-{
- unsigned long block_id = base_memory_block_id(__section_nr(section));
-
- return find_memory_block_by_id(block_id, hint);
-}
-
/*
* For now, we have a linear search to go find the appropriate
* memory_block corresponding to a particular phys_index. If
@@ -624,7 +616,9 @@ struct memory_block *find_memory_block_hinted(struct mem_section *section,
*/
struct memory_block *find_memory_block(struct mem_section *section)
{
- return find_memory_block_hinted(section, NULL);
+ unsigned long block_id = base_memory_block_id(__section_nr(section));
+
+ return find_memory_block_by_id(block_id, hint);
}
static struct attribute *memory_memblk_attrs[] = {
diff --git a/include/linux/memory.h b/include/linux/memory.h
index b3b388775a30..02e633f3ede0 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -116,8 +116,6 @@ void remove_memory_block_devices(unsigned long start, unsigned long size);
extern int memory_dev_init(void);
extern int memory_notify(unsigned long val, void *v);
extern int memory_isolate_notify(unsigned long val, void *v);
-extern struct memory_block *find_memory_block_hinted(struct mem_section *,
- struct memory_block *);
extern struct memory_block *find_memory_block(struct mem_section *);
typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
extern int walk_memory_blocks(unsigned long start, unsigned long size,
--
2.21.0
^ permalink raw reply related
* Re: [PATCH] powerpc: Enable kernel XZ compression option on PPC_85xx
From: Christophe Leroy @ 2019-06-14 10:06 UTC (permalink / raw)
To: Michael Ellerman, Daniel Axtens, Pawel Dembicki
Cc: linuxppc-dev, Paul Mackerras, linux-kernel, Christian Lamparter
In-Reply-To: <87ftodempa.fsf@concordia.ellerman.id.au>
Le 13/06/2019 à 13:42, Michael Ellerman a écrit :
> Daniel Axtens <dja@axtens.net> writes:
>> Pawel Dembicki <paweldembicki@gmail.com> writes:
>>
>>> Enable kernel XZ compression option on PPC_85xx. Tested with
>>> simpleImage on TP-Link TL-WDR4900 (Freescale P1014 processor).
>>>
>>> Suggested-by: Christian Lamparter <chunkeey@gmail.com>
>>> Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
>>> ---
>>> arch/powerpc/Kconfig | 2 +-
>>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>>> index 8c1c636308c8..daf4cb968922 100644
>>> --- a/arch/powerpc/Kconfig
>>> +++ b/arch/powerpc/Kconfig
>>> @@ -196,7 +196,7 @@ config PPC
>>> select HAVE_IOREMAP_PROT
>>> select HAVE_IRQ_EXIT_ON_IRQ_STACK
>>> select HAVE_KERNEL_GZIP
>>> - select HAVE_KERNEL_XZ if PPC_BOOK3S || 44x
>>> + select HAVE_KERNEL_XZ if PPC_BOOK3S || 44x || PPC_85xx
>>
>> (I'm not super well versed in the compression stuff, so apologies if
>> this is a dumb question.) If it's this simple, is there any reason we
>> can't turn it on generally, or convert it to a blacklist of platforms
>> known not to work?
>
> For some platforms enabling XZ requires that your u-boot has XZ support,
> and I'm not very clear on when that support landed in u-boot and what
> boards have it. And there are boards out there with old/custom u-boots
> that effectively can't be updated.
I don't think that it has anything to do with u-boot.
AFAIK, today's mainline U-boot only supports GZIP (by default) and the
following optional ones: LZO, LZMA, LZ4.
If we want to set additional compression types for u-boot, it is not
enough to select HAVE_KERNEL_XXXX, we also have to update uImage
generation scripts.
See the series I sent some time ago:
https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=104153
I'll resent it without bzip2 as today's uboot doesn't support bzip2 anymore.
>
> But as a server guy I don't really know the details of all that very
> well. So if someone tells me that we should enable XZ for everything, or
> as you say just black list some platforms, then that's fine by me.
>
I guess we first need to understand how this is used.
Christophe
^ permalink raw reply
* [PATCH v3 2/3] powerpc/boot: Add lzma support for uImage
From: Christophe Leroy @ 2019-06-14 10:16 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <1b4946c9e580b51b6ca2ddc5963d66406c013c2d.1560507284.git.christophe.leroy@c-s.fr>
This patch allows to generate lzma compressed uImage
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
v3: no change
v2: restore alphabetic order in Kconfig
---
arch/powerpc/Kconfig | 1 +
arch/powerpc/boot/Makefile | 2 ++
arch/powerpc/boot/wrapper | 5 ++++-
3 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8c1c636308c8..5542430ba261 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -196,6 +196,7 @@ config PPC
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK
select HAVE_KERNEL_GZIP
+ select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE
select HAVE_KERNEL_XZ if PPC_BOOK3S || 44x
select HAVE_KPROBES
select HAVE_KPROBES_ON_FTRACE
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 73d1f3562978..9b7b11a22925 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -22,6 +22,7 @@ all: $(obj)/zImage
compress-$(CONFIG_KERNEL_GZIP) := CONFIG_KERNEL_GZIP
compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ
+compress-$(CONFIG_KERNEL_LZMA) := CONFIG_KERNEL_LZMA
ifdef CROSS32_COMPILE
BOOTCC := $(CROSS32_COMPILE)gcc
@@ -257,6 +258,7 @@ endif
compressor-$(CONFIG_KERNEL_GZIP) := gz
compressor-$(CONFIG_KERNEL_XZ) := xz
+compressor-$(CONFIG_KERNEL_LZMA) := lzma
# args (to if_changed): 1 = (this rule), 2 = platform, 3 = dts 4=dtb 5=initrd
quiet_cmd_wrap = WRAP $@
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 85d97360b1c9..19887f6ad7c1 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -136,7 +136,7 @@ while [ "$#" -gt 0 ]; do
-Z)
shift
[ "$#" -gt 0 ] || usage
- [ "$1" != "gz" -o "$1" != "xz" -o "$1" != "none" ] || usage
+ [ "$1" != "gz" -o "$1" != "xz" -o "$1" != "lzma" -o "$1" != "none" ] || usage
compression=".$1"
uboot_comp=$1
@@ -373,6 +373,9 @@ if [ -z "$cacheit" -o ! -f "$vmz$compression" -o "$vmz$compression" -ot "$kernel
.gz)
gzip -n -f -9 "$vmz.$$"
;;
+ .lzma)
+ xz --format=lzma -f -6 "$vmz.$$"
+ ;;
*)
# drop the compression suffix so the stripped vmlinux is used
compression=
--
2.13.3
^ permalink raw reply related
* [PATCH v3 1/3] powerpc/boot: don't force gzipped uImage
From: Christophe Leroy @ 2019-06-14 10:16 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
This patch modifies the generation of uImage by handing over
the selected compression type instead of forcing gzip
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
v3: no change
v2: no change
---
arch/powerpc/boot/wrapper | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 532d45833396..85d97360b1c9 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -40,6 +40,7 @@ dts=
cacheit=
binary=
compression=.gz
+uboot_comp=gzip
pie=
format=
@@ -130,6 +131,7 @@ while [ "$#" -gt 0 ]; do
;;
-z)
compression=.gz
+ uboot_comp=gzip
;;
-Z)
shift
@@ -137,15 +139,21 @@ while [ "$#" -gt 0 ]; do
[ "$1" != "gz" -o "$1" != "xz" -o "$1" != "none" ] || usage
compression=".$1"
+ uboot_comp=$1
if [ $compression = ".none" ]; then
compression=
+ uboot_comp=none
fi
+ if [ $uboot_comp = "gz" ]; then
+ uboot_comp=gzip
+ fi
;;
--no-gzip)
# a "feature" of the the wrapper script is that it can be used outside
# the kernel tree. So keeping this around for backwards compatibility.
compression=
+ uboot_comp=none
;;
-?)
usage
@@ -368,6 +376,7 @@ if [ -z "$cacheit" -o ! -f "$vmz$compression" -o "$vmz$compression" -ot "$kernel
*)
# drop the compression suffix so the stripped vmlinux is used
compression=
+ uboot_comp=none
;;
esac
@@ -411,7 +420,7 @@ membase=`${CROSS}objdump -p "$kernel" | grep -m 1 LOAD | awk '{print $7}'`
case "$platform" in
uboot)
rm -f "$ofile"
- ${MKIMAGE} -A ppc -O linux -T kernel -C gzip -a $membase -e $membase \
+ ${MKIMAGE} -A ppc -O linux -T kernel -C $uboot_comp -a $membase -e $membase \
$uboot_version -d "$vmz" "$ofile"
if [ -z "$cacheit" ]; then
rm -f "$vmz"
--
2.13.3
^ permalink raw reply related
* [PATCH v3 3/3] powerpc/boot: Add lzo support for uImage
From: Christophe Leroy @ 2019-06-14 10:16 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <1b4946c9e580b51b6ca2ddc5963d66406c013c2d.1560507284.git.christophe.leroy@c-s.fr>
This patch allows to generate lzo compressed uImage
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
v3: rebased following the drop of patch 3 (bzip2) which is not supported anymore by uboot.
v2: restore alphabetic order in Kconfig
---
arch/powerpc/Kconfig | 1 +
arch/powerpc/boot/Makefile | 2 ++
arch/powerpc/boot/wrapper | 5 ++++-
3 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5542430ba261..2b041f88b593 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -197,6 +197,7 @@ config PPC
select HAVE_IRQ_EXIT_ON_IRQ_STACK
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE
+ select HAVE_KERNEL_LZO if DEFAULT_UIMAGE
select HAVE_KERNEL_XZ if PPC_BOOK3S || 44x
select HAVE_KPROBES
select HAVE_KPROBES_ON_FTRACE
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 9b7b11a22925..36fb51a9329f 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -23,6 +23,7 @@ all: $(obj)/zImage
compress-$(CONFIG_KERNEL_GZIP) := CONFIG_KERNEL_GZIP
compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ
compress-$(CONFIG_KERNEL_LZMA) := CONFIG_KERNEL_LZMA
+compress-$(CONFIG_KERNEL_LZO) := CONFIG_KERNEL_LZO
ifdef CROSS32_COMPILE
BOOTCC := $(CROSS32_COMPILE)gcc
@@ -259,6 +260,7 @@ endif
compressor-$(CONFIG_KERNEL_GZIP) := gz
compressor-$(CONFIG_KERNEL_XZ) := xz
compressor-$(CONFIG_KERNEL_LZMA) := lzma
+compressor-$(CONFIG_KERNEL_LZO) := lzo
# args (to if_changed): 1 = (this rule), 2 = platform, 3 = dts 4=dtb 5=initrd
quiet_cmd_wrap = WRAP $@
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 19887f6ad7c1..5148ac271f28 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -136,7 +136,7 @@ while [ "$#" -gt 0 ]; do
-Z)
shift
[ "$#" -gt 0 ] || usage
- [ "$1" != "gz" -o "$1" != "xz" -o "$1" != "lzma" -o "$1" != "none" ] || usage
+ [ "$1" != "gz" -o "$1" != "xz" -o "$1" != "lzma" -o "$1" != "lzo" -o "$1" != "none" ] || usage
compression=".$1"
uboot_comp=$1
@@ -376,6 +376,9 @@ if [ -z "$cacheit" -o ! -f "$vmz$compression" -o "$vmz$compression" -ot "$kernel
.lzma)
xz --format=lzma -f -6 "$vmz.$$"
;;
+ .lzo)
+ lzop -f -9 "$vmz.$$"
+ ;;
*)
# drop the compression suffix so the stripped vmlinux is used
compression=
--
2.13.3
^ permalink raw reply related
* Re: [PATCH v3 2/4] crypto: talitos - fix hash on SEC1.
From: Horia Geanta @ 2019-06-14 11:32 UTC (permalink / raw)
To: Christophe Leroy, Herbert Xu, David S. Miller
Cc: linuxppc-dev@lists.ozlabs.org, linux-crypto@vger.kernel.org,
linux-kernel@vger.kernel.org
In-Reply-To: <732ca0ff440bf4cd589d844cfda71d96efd500f5.1560429844.git.christophe.leroy@c-s.fr>
On 6/13/2019 3:48 PM, Christophe Leroy wrote:
> @@ -336,15 +336,18 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
> tail = priv->chan[ch].tail;
> while (priv->chan[ch].fifo[tail].desc) {
> __be32 hdr;
> + struct talitos_edesc *edesc;
>
> request = &priv->chan[ch].fifo[tail];
> + edesc = container_of(request->desc, struct talitos_edesc, desc);
Not needed for all cases, should be moved to the block that uses it.
>
> /* descriptors with their done bits set don't get the error */
> rmb();
> if (!is_sec1)
> hdr = request->desc->hdr;
> else if (request->desc->next_desc)
> - hdr = (request->desc + 1)->hdr1;
> + hdr = ((struct talitos_desc *)
> + (edesc->buf + edesc->dma_len))->hdr1;
> else
> hdr = request->desc->hdr1;
>
[snip]
> @@ -2058,7 +2065,18 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
> sg_copy_to_buffer(areq->src, nents,
> ctx_buf + req_ctx->nbuf, offset);
> req_ctx->nbuf += offset;
> - req_ctx->psrc = areq->src;
> + for (sg = areq->src; sg && offset >= sg->length;
> + offset -= sg->length, sg = sg_next(sg))
> + ;
> + if (offset) {
> + sg_init_table(req_ctx->bufsl, 2);
> + sg_set_buf(req_ctx->bufsl, sg_virt(sg) + offset,
> + sg->length - offset);
> + sg_chain(req_ctx->bufsl, 2, sg_next(sg));
> + req_ctx->psrc = req_ctx->bufsl;
Isn't this what scatterwalk_ffwd() does?
Horia
^ permalink raw reply
* Re: [PATCH 2/2] powerpc: expose secure variables via sysfs
From: Nayna @ 2019-06-14 13:13 UTC (permalink / raw)
To: Greg Kroah-Hartman
Cc: linux-efi, Ard Biesheuvel, Eric Ricther, Nayna Jain, linux-kernel,
Mimi Zohar, Claudio Carvalho, Matthew Garret, linuxppc-dev,
Paul Mackerras, Jeremy Kerr, Elaine Palmer, linux-integrity,
George Wilson
In-Reply-To: <20190614063443.GB17056@kroah.com>
On 06/14/2019 02:34 AM, Greg Kroah-Hartman wrote:
> On Thu, Jun 13, 2019 at 04:50:27PM -0400, Nayna Jain wrote:
>> As part of PowerNV secure boot support, OS verification keys are stored
>> and controlled by OPAL as secure variables. These need to be exposed to
>> the userspace so that sysadmins can perform key management tasks.
>>
>> This patch adds the support to expose secure variables via a sysfs
>> interface It reuses the the existing efi defined hooks and backend in
>> order to maintain the compatibility with the userspace tools.
>>
>> Though it reuses a great deal of efi, POWER platforms do not use EFI.
>> A new config, POWER_SECVAR_SYSFS, is defined to enable this new sysfs
>> interface.
>>
>> Signed-off-by: Nayna Jain <nayna@linux.ibm.com>
>> ---
>> arch/powerpc/Kconfig | 2 +
>> drivers/firmware/Makefile | 1 +
>> drivers/firmware/efi/efivars.c | 2 +-
>> drivers/firmware/powerpc/Kconfig | 12 +
>> drivers/firmware/powerpc/Makefile | 3 +
>> drivers/firmware/powerpc/efi_error.c | 46 ++++
>> drivers/firmware/powerpc/secvar.c | 326 +++++++++++++++++++++++++++
>> 7 files changed, 391 insertions(+), 1 deletion(-)
>> create mode 100644 drivers/firmware/powerpc/Kconfig
>> create mode 100644 drivers/firmware/powerpc/Makefile
>> create mode 100644 drivers/firmware/powerpc/efi_error.c
>> create mode 100644 drivers/firmware/powerpc/secvar.c
> If you add/remove/modify sysfs files, you also need to update the
> relevant Documentation/ABI/ entry as well. Please add something there
> to describe your new files when you resend the next version of this
> patch series.
>
>> diff --git a/drivers/firmware/powerpc/Kconfig b/drivers/firmware/powerpc/Kconfig
>> new file mode 100644
>> index 000000000000..e0303fc517d5
>> --- /dev/null
>> +++ b/drivers/firmware/powerpc/Kconfig
>> @@ -0,0 +1,12 @@
>> +config POWER_SECVAR_SYSFS
>> + tristate "Enable sysfs interface for POWER secure variables"
>> + default n
> default is always n, no need to list it.
>
>> --- /dev/null
>> +++ b/drivers/firmware/powerpc/efi_error.c
>> @@ -0,0 +1,46 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2019 IBM Corporation
>> + * Author: Nayna Jain <nayna@linux.ibm.com>
>> + *
>> + * efi_error.c
>> + * - Error codes as understood by efi based tools
>> + * Taken from drivers/firmware/efi/efi.c
> Why not just export the symbol from the original file instead of
> duplicating it here?
>
>> +static int convert_buffer_to_efi_guid(u8 *buffer, efi_guid_t *guid)
>> +{
>> + u32 *a1;
>> + u16 *a2;
>> + u16 *a3;
>> +
>> + a1 = kzalloc(4, GFP_KERNEL);
> No error checking in this function for memory issues at all?
>
>> + memcpy(a1, buffer, 4);
>> + *a1 = be32_to_cpu(*a1);
>> +
>> + a2 = kzalloc(2, GFP_KERNEL);
>> + memcpy(a2, buffer+4, 2);
>> + *a2 = be16_to_cpu(*a2);
>> +
>> + a3 = kzalloc(2, GFP_KERNEL);
>> + memcpy(a3, buffer+6, 2);
>> + *a3 = be16_to_cpu(*a3);
>> +
>> + *guid = EFI_GUID(*a1, *a2, *a3, *(buffer + 8),
>> + *(buffer + 9),
>> + *(buffer + 10),
>> + *(buffer + 11),
>> + *(buffer + 12),
>> + *(buffer + 13),
>> + *(buffer + 14),
>> + *(buffer + 15));
>> +
>> + kfree(a1);
>> + kfree(a2);
>> + kfree(a3);
>> + return 0;
>> +}
>> +static efi_status_t powerpc_get_next_variable(unsigned long *name_size,
>> + efi_char16_t *name,
>> + efi_guid_t *vendor)
>> +{
>> + int rc;
>> + u8 *key;
>> + int namesize;
>> + unsigned long keylen;
>> + unsigned long keysize = 1024;
>> + unsigned long *mdsize;
>> + u8 *mdata = NULL;
>> + efi_guid_t guid;
>> +
>> + if (ucs2_strnlen(name, 1024) > 0) {
>> + createkey(name, &key, &keylen);
>> + } else {
>> + keylen = 0;
>> + key = kzalloc(1024, GFP_KERNEL);
>> + }
>> +
>> + pr_info("%s: powerpc get next variable, key is %s\n", __func__, key);
> Don't put debugging info like this in the kernel log of everyone :(
>
>> +
>> + rc = opal_get_next_variable(key, &keylen, keysize);
>> + if (rc) {
>> + kfree(key);
>> + return opal_to_efi_status(rc);
>> + }
>> +
>> + mdsize = kzalloc(sizeof(unsigned long), GFP_KERNEL);
> No error checking?
>
>> + rc = opal_get_variable_size(key, keylen, mdsize, NULL);
>> + if (rc)
>> + goto out;
>> +
>> + if (*mdsize <= 0)
>> + goto out;
>> +
>> + mdata = kzalloc(*mdsize, GFP_KERNEL);
>> +
>> + rc = opal_get_variable(key, keylen, mdata, mdsize, NULL, NULL);
>> + if (rc)
>> + goto out;
>> +
>> + if (*mdsize > 0) {
>> + namesize = *mdsize - sizeof(efi_guid_t) - sizeof(u32);
>> + if (namesize > 0) {
>> + memset(&guid, 0, sizeof(efi_guid_t));
>> + convert_buffer_to_efi_guid(mdata + namesize, &guid);
>> + memcpy(vendor, &guid, sizeof(efi_guid_t));
>> + memset(name, 0, namesize + 2);
>> + memcpy(name, mdata, namesize);
>> + *name_size = namesize + 2;
>> + name[namesize++] = 0;
>> + name[namesize] = 0;
>> + }
>> + }
>> +
>> +out:
>> + kfree(mdsize);
>> + kfree(mdata);
>> +
>> + return opal_to_efi_status(rc);
>> +}
>> +
>> +static efi_status_t powerpc_set_variable(efi_char16_t *name, efi_guid_t *vendor,
>> + u32 attr, unsigned long data_size,
>> + void *data)
>> +{
>> + int rc;
>> + u8 *key;
>> + unsigned long keylen;
>> + u8 *metadata;
>> + unsigned long mdsize;
>> +
>> + if (!name)
>> + return EFI_INVALID_PARAMETER;
>> +
>> + if (!vendor)
>> + return EFI_INVALID_PARAMETER;
>> +
>> + createkey(name, &key, &keylen);
>> + pr_info("%s: nayna key is %s\n", __func__, key);
> Again, please remove all of your debugging code when resending.
>
>> +
>> + createmetadata(name, vendor, &attr, &metadata, &mdsize);
>> +
>> + rc = opal_set_variable(key, keylen, metadata, mdsize, data, data_size);
>> +
>> + return opal_to_efi_status(rc);
>> +}
>> +
>> +
>> +static const struct efivar_operations efivar_ops = {
>> + .get_variable = powerpc_get_variable,
>> + .set_variable = powerpc_set_variable,
>> + .get_next_variable = powerpc_get_next_variable,
>> +};
>> +
>> +
>> +static __init int power_secvar_init(void)
>> +{
>> + int rc = 0;
>> + unsigned long ver = 0;
>> +
>> + rc = opal_variable_version(&ver);
>> + if (ver != BACKEND_TC_COMPAT_V1) {
>> + pr_info("Compatible backend unsupported\n");
>> + return -1;
> Do not make up error numbers, use the defined values please.
Thanks Greg for the review !!
I will address everything in the next version.
Thanks & Regards,
- Nayna
^ permalink raw reply
* [PATCH] crypto: nx: no need to check return value of debugfs_create functions
From: Greg Kroah-Hartman @ 2019-06-14 13:50 UTC (permalink / raw)
To: Breno Leitão, Nayna Jain, Paulo Flabiano Smorigo,
Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Herbert Xu
Cc: linuxppc-dev, linux-crypto
When calling debugfs functions, there is no need to ever check the
return value. The function can work or not, but the code logic should
never do something different based on this.
Also, there is no need to store the individual debugfs file names,
especially as the whole directiry is deleted at once, so remove the
unneeded structure entirely.
Cc: "Breno Leitão" <leitao@debian.org>
Cc: Nayna Jain <nayna@linux.ibm.com>
Cc: Paulo Flabiano Smorigo <pfsmorigo@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: linux-crypto@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
drivers/crypto/nx/nx.c | 4 +-
drivers/crypto/nx/nx.h | 12 +-----
drivers/crypto/nx/nx_debugfs.c | 71 +++++++++++-----------------------
3 files changed, 26 insertions(+), 61 deletions(-)
diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
index 3a5e31be4764..20b5e276f184 100644
--- a/drivers/crypto/nx/nx.c
+++ b/drivers/crypto/nx/nx.c
@@ -581,9 +581,7 @@ static int nx_register_algs(void)
memset(&nx_driver.stats, 0, sizeof(struct nx_stats));
- rc = NX_DEBUGFS_INIT(&nx_driver);
- if (rc)
- goto out;
+ NX_DEBUGFS_INIT(&nx_driver);
nx_driver.of.status = NX_OKAY;
diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h
index c3e54af18645..c6b5a3be02be 100644
--- a/drivers/crypto/nx/nx.h
+++ b/drivers/crypto/nx/nx.h
@@ -76,20 +76,12 @@ struct nx_stats {
atomic_t last_error_pid;
};
-struct nx_debugfs {
- struct dentry *dfs_root;
- struct dentry *dfs_aes_ops, *dfs_aes_bytes;
- struct dentry *dfs_sha256_ops, *dfs_sha256_bytes;
- struct dentry *dfs_sha512_ops, *dfs_sha512_bytes;
- struct dentry *dfs_errors, *dfs_last_error, *dfs_last_error_pid;
-};
-
struct nx_crypto_driver {
struct nx_stats stats;
struct nx_of of;
struct vio_dev *viodev;
struct vio_driver viodriver;
- struct nx_debugfs dfs;
+ struct dentry *dfs_root;
};
#define NX_GCM4106_NONCE_LEN (4)
@@ -177,7 +169,7 @@ struct nx_sg *nx_walk_and_build(struct nx_sg *, unsigned int,
#define NX_DEBUGFS_INIT(drv) nx_debugfs_init(drv)
#define NX_DEBUGFS_FINI(drv) nx_debugfs_fini(drv)
-int nx_debugfs_init(struct nx_crypto_driver *);
+void nx_debugfs_init(struct nx_crypto_driver *);
void nx_debugfs_fini(struct nx_crypto_driver *);
#else
#define NX_DEBUGFS_INIT(drv) (0)
diff --git a/drivers/crypto/nx/nx_debugfs.c b/drivers/crypto/nx/nx_debugfs.c
index 7ab2e8dcd9b4..3aa80a6e34a1 100644
--- a/drivers/crypto/nx/nx_debugfs.c
+++ b/drivers/crypto/nx/nx_debugfs.c
@@ -42,62 +42,37 @@
* Documentation/ABI/testing/debugfs-pfo-nx-crypto
*/
-int nx_debugfs_init(struct nx_crypto_driver *drv)
+void nx_debugfs_init(struct nx_crypto_driver *drv)
{
- struct nx_debugfs *dfs = &drv->dfs;
+ struct dentry *root;
- dfs->dfs_root = debugfs_create_dir(NX_NAME, NULL);
+ root = debugfs_create_dir(NX_NAME, NULL);
+ drv->dfs_root = root;
- dfs->dfs_aes_ops =
- debugfs_create_u32("aes_ops",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root, (u32 *)&drv->stats.aes_ops);
- dfs->dfs_sha256_ops =
- debugfs_create_u32("sha256_ops",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u32 *)&drv->stats.sha256_ops);
- dfs->dfs_sha512_ops =
- debugfs_create_u32("sha512_ops",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u32 *)&drv->stats.sha512_ops);
- dfs->dfs_aes_bytes =
- debugfs_create_u64("aes_bytes",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u64 *)&drv->stats.aes_bytes);
- dfs->dfs_sha256_bytes =
- debugfs_create_u64("sha256_bytes",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u64 *)&drv->stats.sha256_bytes);
- dfs->dfs_sha512_bytes =
- debugfs_create_u64("sha512_bytes",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u64 *)&drv->stats.sha512_bytes);
- dfs->dfs_errors =
- debugfs_create_u32("errors",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root, (u32 *)&drv->stats.errors);
- dfs->dfs_last_error =
- debugfs_create_u32("last_error",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u32 *)&drv->stats.last_error);
- dfs->dfs_last_error_pid =
- debugfs_create_u32("last_error_pid",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u32 *)&drv->stats.last_error_pid);
- return 0;
+ debugfs_create_u32("aes_ops", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.aes_ops);
+ debugfs_create_u32("sha256_ops", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.sha256_ops);
+ debugfs_create_u32("sha512_ops", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.sha512_ops);
+ debugfs_create_u64("aes_bytes", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u64 *)&drv->stats.aes_bytes);
+ debugfs_create_u64("sha256_bytes", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u64 *)&drv->stats.sha256_bytes);
+ debugfs_create_u64("sha512_bytes", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u64 *)&drv->stats.sha512_bytes);
+ debugfs_create_u32("errors", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.errors);
+ debugfs_create_u32("last_error", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.last_error);
+ debugfs_create_u32("last_error_pid", S_IRUSR | S_IRGRP | S_IROTH,
+ dfs_root, (u32 *)&drv->stats.last_error_pid);
}
void
nx_debugfs_fini(struct nx_crypto_driver *drv)
{
- debugfs_remove_recursive(drv->dfs.dfs_root);
+ debugfs_remove_recursive(drv->dfs_root);
}
#endif
--
2.22.0
^ permalink raw reply related
* [PATCH v2] crypto: nx: no need to check return value of debugfs_create functions
From: Greg Kroah-Hartman @ 2019-06-14 14:29 UTC (permalink / raw)
To: Breno Leitão, Nayna Jain, Paulo Flabiano Smorigo,
Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Herbert Xu
Cc: linuxppc-dev, linux-crypto
In-Reply-To: <20190614135031.GA5809@kroah.com>
When calling debugfs functions, there is no need to ever check the
return value. The function can work or not, but the code logic should
never do something different based on this.
Also, there is no need to store the individual debugfs file names,
especially as the whole directiry is deleted at once, so remove the
unneeded structure entirely.
Cc: "Breno Leitão" <leitao@debian.org>
Cc: Nayna Jain <nayna@linux.ibm.com>
Cc: Paulo Flabiano Smorigo <pfsmorigo@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: linux-crypto@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
v2: fixed build error found by kbuild
drivers/crypto/nx/nx.c | 4 +-
drivers/crypto/nx/nx.h | 12 +-----
drivers/crypto/nx/nx_debugfs.c | 71 +++++++++++-----------------------
3 files changed, 26 insertions(+), 61 deletions(-)
diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
index 3a5e31be4764..20b5e276f184 100644
--- a/drivers/crypto/nx/nx.c
+++ b/drivers/crypto/nx/nx.c
@@ -581,9 +581,7 @@ static int nx_register_algs(void)
memset(&nx_driver.stats, 0, sizeof(struct nx_stats));
- rc = NX_DEBUGFS_INIT(&nx_driver);
- if (rc)
- goto out;
+ NX_DEBUGFS_INIT(&nx_driver);
nx_driver.of.status = NX_OKAY;
diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h
index c3e54af18645..c6b5a3be02be 100644
--- a/drivers/crypto/nx/nx.h
+++ b/drivers/crypto/nx/nx.h
@@ -76,20 +76,12 @@ struct nx_stats {
atomic_t last_error_pid;
};
-struct nx_debugfs {
- struct dentry *dfs_root;
- struct dentry *dfs_aes_ops, *dfs_aes_bytes;
- struct dentry *dfs_sha256_ops, *dfs_sha256_bytes;
- struct dentry *dfs_sha512_ops, *dfs_sha512_bytes;
- struct dentry *dfs_errors, *dfs_last_error, *dfs_last_error_pid;
-};
-
struct nx_crypto_driver {
struct nx_stats stats;
struct nx_of of;
struct vio_dev *viodev;
struct vio_driver viodriver;
- struct nx_debugfs dfs;
+ struct dentry *dfs_root;
};
#define NX_GCM4106_NONCE_LEN (4)
@@ -177,7 +169,7 @@ struct nx_sg *nx_walk_and_build(struct nx_sg *, unsigned int,
#define NX_DEBUGFS_INIT(drv) nx_debugfs_init(drv)
#define NX_DEBUGFS_FINI(drv) nx_debugfs_fini(drv)
-int nx_debugfs_init(struct nx_crypto_driver *);
+void nx_debugfs_init(struct nx_crypto_driver *);
void nx_debugfs_fini(struct nx_crypto_driver *);
#else
#define NX_DEBUGFS_INIT(drv) (0)
diff --git a/drivers/crypto/nx/nx_debugfs.c b/drivers/crypto/nx/nx_debugfs.c
index 7ab2e8dcd9b4..add1d8d0d23c 100644
--- a/drivers/crypto/nx/nx_debugfs.c
+++ b/drivers/crypto/nx/nx_debugfs.c
@@ -42,62 +42,37 @@
* Documentation/ABI/testing/debugfs-pfo-nx-crypto
*/
-int nx_debugfs_init(struct nx_crypto_driver *drv)
+void nx_debugfs_init(struct nx_crypto_driver *drv)
{
- struct nx_debugfs *dfs = &drv->dfs;
+ struct dentry *root;
- dfs->dfs_root = debugfs_create_dir(NX_NAME, NULL);
+ root = debugfs_create_dir(NX_NAME, NULL);
+ drv->dfs_root = root;
- dfs->dfs_aes_ops =
- debugfs_create_u32("aes_ops",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root, (u32 *)&drv->stats.aes_ops);
- dfs->dfs_sha256_ops =
- debugfs_create_u32("sha256_ops",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u32 *)&drv->stats.sha256_ops);
- dfs->dfs_sha512_ops =
- debugfs_create_u32("sha512_ops",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u32 *)&drv->stats.sha512_ops);
- dfs->dfs_aes_bytes =
- debugfs_create_u64("aes_bytes",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u64 *)&drv->stats.aes_bytes);
- dfs->dfs_sha256_bytes =
- debugfs_create_u64("sha256_bytes",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u64 *)&drv->stats.sha256_bytes);
- dfs->dfs_sha512_bytes =
- debugfs_create_u64("sha512_bytes",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u64 *)&drv->stats.sha512_bytes);
- dfs->dfs_errors =
- debugfs_create_u32("errors",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root, (u32 *)&drv->stats.errors);
- dfs->dfs_last_error =
- debugfs_create_u32("last_error",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u32 *)&drv->stats.last_error);
- dfs->dfs_last_error_pid =
- debugfs_create_u32("last_error_pid",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u32 *)&drv->stats.last_error_pid);
- return 0;
+ debugfs_create_u32("aes_ops", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.aes_ops);
+ debugfs_create_u32("sha256_ops", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.sha256_ops);
+ debugfs_create_u32("sha512_ops", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.sha512_ops);
+ debugfs_create_u64("aes_bytes", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u64 *)&drv->stats.aes_bytes);
+ debugfs_create_u64("sha256_bytes", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u64 *)&drv->stats.sha256_bytes);
+ debugfs_create_u64("sha512_bytes", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u64 *)&drv->stats.sha512_bytes);
+ debugfs_create_u32("errors", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.errors);
+ debugfs_create_u32("last_error", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.last_error);
+ debugfs_create_u32("last_error_pid", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.last_error_pid);
}
void
nx_debugfs_fini(struct nx_crypto_driver *drv)
{
- debugfs_remove_recursive(drv->dfs.dfs_root);
+ debugfs_remove_recursive(drv->dfs_root);
}
#endif
--
2.22.0
^ permalink raw reply related
* [PATCH v2 0/9] Provide vcpu dispatch statistics
From: Naveen N. Rao @ 2019-06-14 15:43 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Aneesh Kumar K.V, Mingming Cao, linuxppc-dev
Changes since v1:
- Changes have been done primarily to address a few build issues, by
moving changes out of mm/numa.c to other places.
- Patches 1-5,8-9 have been rebased with patches 4 and 5 being
interchanged, and patch 4 incorporating #ifdef CONFIG_PPC_SPLPAR.
- Patch 6 is new and moves vphn code under pseries.
- Patch 7 has changes to move most of the code from mm/numa.c into
platforms/pseries/lpar.c, along with guarding some of these under
CONFIG_PPC_SPLPAR.
Changes since RFC:
- Patches 1/8 to 5/8: no changes, except rebase to powerpc/merge
- Patch 6/8: The mutex guarding the vphn hcall has been dropped. It was
only meant to serialize hcalls issued when stats are initially
enabled. However, in reality, the various per-cpu workers will be
scheduled at slightly different times and chances of hcalls for
retrieving the same associativity information at the same time is very
less. Even in that case, there are no other side effects.
- Patch 6/8: The third column for vcpu dispatches on the same core, but
different thread has been dropped and merged with the second column.
- Patch 7/8: new patch to ensure we don't take too much time while
enabling/disabling statistics on large systems with heavy workload.
- Patch 8/8: new patch adding a document describing the fields in the
procfs file.
--
This series adds a new procfs file /proc/powerpc/vcpudispatch_stats for
providing statistics around how the LPAR processors are dispatched by
the POWER Hypervisor, in a shared LPAR environment. Patch 7/9 has more
details on how the statistics are gathered.
An example output:
$ sudo cat /proc/powerpc/vcpudispatch_stats
cpu0 6839 4126 2683 30 0 6821 18 0
cpu1 2515 1274 1229 12 0 2509 6 0
cpu2 2317 1198 1109 10 0 2312 5 0
cpu3 2259 1165 1088 6 0 2256 3 0
cpu4 2205 1143 1056 6 0 2202 3 0
cpu5 2165 1121 1038 6 0 2162 3 0
cpu6 2183 1127 1050 6 0 2180 3 0
cpu7 2193 1133 1052 8 0 2187 6 0
cpu8 2165 1115 1032 18 0 2156 9 0
cpu9 2301 1252 1033 16 0 2293 8 0
cpu10 2197 1138 1041 18 0 2187 10 0
cpu11 2273 1185 1062 26 0 2260 13 0
cpu12 2186 1125 1043 18 0 2177 9 0
cpu13 2161 1115 1030 16 0 2153 8 0
cpu14 2206 1153 1033 20 0 2196 10 0
cpu15 2163 1115 1032 16 0 2155 8 0
In the output above, for vcpu0, there have been 6839 dispatches since
statistics were enabled. 4126 of those dispatches were on the same
physical cpu as the last time. 2683 were on a different core, but within
the same chip, while 30 dispatches were on a different chip compared to
its last dispatch.
Also, out of the total of 6839 dispatches, we see that there have been
6821 dispatches on the vcpu's home node, while 18 dispatches were
outside its home node, on a neighbouring chip.
- Naveen
Naveen N. Rao (9):
powerpc/pseries: Use macros for referring to the DTL enable mask
powerpc/pseries: Do not save the previous DTL mask value
powerpc/pseries: Factor out DTL buffer allocation and registration
routines
powerpc/pseries: Introduce helpers to gatekeep DTLB usage
powerpc/pseries: Generalize hcall_vphn()
powerpc/pseries: Move mm/book3s64/vphn.c under platforms/pseries/
powerpc/pseries: Provide vcpu dispatch statistics
powerpc/pseries: Protect against hogging the cpu while setting up the
stats
powerpc/pseries: Add documentation for vcpudispatch_stats
Documentation/powerpc/vcpudispatch_stats.txt | 68 ++
arch/powerpc/include/asm/lppaca.h | 11 +
arch/powerpc/include/asm/plpar_wrappers.h | 23 +
arch/powerpc/include/asm/topology.h | 6 +
arch/powerpc/mm/book3s64/Makefile | 1 -
arch/powerpc/mm/book3s64/vphn.h | 16 -
arch/powerpc/mm/numa.c | 34 +-
arch/powerpc/platforms/pseries/Makefile | 1 +
arch/powerpc/platforms/pseries/dtl.c | 22 +-
arch/powerpc/platforms/pseries/lpar.c | 647 +++++++++++++++++-
arch/powerpc/platforms/pseries/setup.c | 34 +-
.../{mm/book3s64 => platforms/pseries}/vphn.c | 16 +-
12 files changed, 784 insertions(+), 95 deletions(-)
create mode 100644 Documentation/powerpc/vcpudispatch_stats.txt
delete mode 100644 arch/powerpc/mm/book3s64/vphn.h
rename arch/powerpc/{mm/book3s64 => platforms/pseries}/vphn.c (83%)
--
2.21.0
^ permalink raw reply
* [PATCH v2 1/9] powerpc/pseries: Use macros for referring to the DTL enable mask
From: Naveen N. Rao @ 2019-06-14 15:43 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Aneesh Kumar K.V, Mingming Cao, linuxppc-dev
In-Reply-To: <cover.1560526066.git.naveen.n.rao@linux.vnet.ibm.com>
Introduce macros to encode the DTL enable mask fields and use those
instead of hardcoding numbers.
Acked-by: Nathan Lynch <nathanl@linux.ibm.com>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/lppaca.h | 11 +++++++++++
arch/powerpc/platforms/pseries/dtl.c | 8 +-------
arch/powerpc/platforms/pseries/lpar.c | 2 +-
arch/powerpc/platforms/pseries/setup.c | 2 +-
4 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 806494283e2a..9837bb2b3e69 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -141,6 +141,17 @@ struct dtl_entry {
#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */
#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
+/*
+ * Dispatch trace log event enable mask:
+ * 0x1: voluntary virtual processor waits
+ * 0x2: time-slice preempts
+ * 0x4: virtual partition memory page faults
+ */
+#define DTL_LOG_CEDE 0x1
+#define DTL_LOG_PREEMPT 0x2
+#define DTL_LOG_FAULT 0x4
+#define DTL_LOG_ALL (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT)
+
extern struct kmem_cache *dtl_cache;
/*
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index ab5de985a787..337378f3d1fd 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -27,13 +27,7 @@ struct dtl {
};
static DEFINE_PER_CPU(struct dtl, cpu_dtl);
-/*
- * Dispatch trace log event mask:
- * 0x7: 0x1: voluntary virtual processor waits
- * 0x2: time-slice preempts
- * 0x4: virtual partition memory page faults
- */
-static u8 dtl_event_mask = 0x7;
+static u8 dtl_event_mask = DTL_LOG_ALL;
/*
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 73620dfb63a1..96c4677cf8ab 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -113,7 +113,7 @@ void vpa_init(int cpu)
pr_err("WARNING: DTL registration of cpu %d (hw %d) "
"failed with %ld\n", smp_processor_id(),
hwcpu, ret);
- lppaca_of(cpu).dtl_enable_mask = 2;
+ lppaca_of(cpu).dtl_enable_mask = DTL_LOG_PREEMPT;
}
}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 8fa012a65a71..a643e45d0544 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -312,7 +312,7 @@ static int alloc_dispatch_logs(void)
pr_err("WARNING: DTL registration of cpu %d (hw %d) failed "
"with %d\n", smp_processor_id(),
hard_smp_processor_id(), ret);
- get_paca()->lppaca_ptr->dtl_enable_mask = 2;
+ get_paca()->lppaca_ptr->dtl_enable_mask = DTL_LOG_PREEMPT;
return 0;
}
--
2.21.0
^ permalink raw reply related
* [PATCH v2 2/9] powerpc/pseries: Do not save the previous DTL mask value
From: Naveen N. Rao @ 2019-06-14 15:44 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Aneesh Kumar K.V, Mingming Cao, linuxppc-dev
In-Reply-To: <cover.1560526066.git.naveen.n.rao@linux.vnet.ibm.com>
When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is enabled, we always initialize
DTL enable mask to DTL_LOG_PREEMPT (0x2). There are no other places
where the mask is changed. As such, when reading the DTL log buffer
through debugfs, there is no need to save and restore the previous mask
value.
We don't need to save and restore the earlier mask value if
CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not enabled. So, remove the field
from the structure as well.
Acked-by: Nathan Lynch <nathanl@linux.ibm.com>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
arch/powerpc/platforms/pseries/dtl.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 337378f3d1fd..a88176afba32 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -42,7 +42,6 @@ struct dtl_ring {
struct dtl_entry *write_ptr;
struct dtl_entry *buf;
struct dtl_entry *buf_end;
- u8 saved_dtl_mask;
};
static DEFINE_PER_CPU(struct dtl_ring, dtl_rings);
@@ -92,7 +91,6 @@ static int dtl_start(struct dtl *dtl)
dtlr->write_ptr = dtl->buf;
/* enable event logging */
- dtlr->saved_dtl_mask = lppaca_of(dtl->cpu).dtl_enable_mask;
lppaca_of(dtl->cpu).dtl_enable_mask |= dtl_event_mask;
dtl_consumer = consume_dtle;
@@ -110,7 +108,7 @@ static void dtl_stop(struct dtl *dtl)
dtlr->buf = NULL;
/* restore dtl_enable_mask */
- lppaca_of(dtl->cpu).dtl_enable_mask = dtlr->saved_dtl_mask;
+ lppaca_of(dtl->cpu).dtl_enable_mask = DTL_LOG_PREEMPT;
if (atomic_dec_and_test(&dtl_count))
dtl_consumer = NULL;
--
2.21.0
^ permalink raw reply related
* [PATCH v2 4/9] powerpc/pseries: Introduce helpers to gatekeep DTLB usage
From: Naveen N. Rao @ 2019-06-14 15:44 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Aneesh Kumar K.V, Mingming Cao, linuxppc-dev
In-Reply-To: <cover.1560526066.git.naveen.n.rao@linux.vnet.ibm.com>
Since we would be introducing a new user of the DTL buffer in a
subsequent patch, add helpers to gatekeep use of the DTL buffer. The
current usage of the DTL buffer from debugfs is at a per-cpu level
(corresponding to the cpu debugfs file that is opened). Subsequently, we
will have users enabling/accessing DTLB for all online cpus. These
helpers allow any number of per-cpu users, or a single global user
exclusively.
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/plpar_wrappers.h | 2 ++
arch/powerpc/platforms/pseries/dtl.c | 10 +++++-
arch/powerpc/platforms/pseries/lpar.c | 38 +++++++++++++++++++++++
3 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index d08feb1bc2bd..ab7dd454b6eb 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -88,6 +88,8 @@ static inline long register_dtl(unsigned long cpu, unsigned long vpa)
return vpa_call(H_VPA_REG_DTL, cpu, vpa);
}
+extern bool register_dtl_buffer_access(bool global);
+extern void unregister_dtl_buffer_access(bool global);
extern void register_dtl_buffer(int cpu);
extern void alloc_dtl_buffers(void);
extern void vpa_init(int cpu);
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index a88176afba32..de531311cd9a 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -180,11 +180,15 @@ static int dtl_enable(struct dtl *dtl)
if (dtl->buf)
return -EBUSY;
+ if (register_dtl_buffer_access(false))
+ return -EBUSY;
+
n_entries = dtl_buf_entries;
buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu));
if (!buf) {
printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n",
__func__, dtl->cpu);
+ unregister_dtl_buffer_access(false);
return -ENOMEM;
}
@@ -201,8 +205,11 @@ static int dtl_enable(struct dtl *dtl)
}
spin_unlock(&dtl->lock);
- if (rc)
+ if (rc) {
+ unregister_dtl_buffer_access(false);
kmem_cache_free(dtl_cache, buf);
+ }
+
return rc;
}
@@ -214,6 +221,7 @@ static void dtl_disable(struct dtl *dtl)
dtl->buf = NULL;
dtl->buf_entries = 0;
spin_unlock(&dtl->lock);
+ unregister_dtl_buffer_access(false);
}
/* file interface */
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 8c5377fe9985..53e005c84078 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -99,6 +99,44 @@ void register_dtl_buffer(int cpu)
}
}
+#ifdef CONFIG_PPC_SPLPAR
+static DEFINE_SPINLOCK(dtl_buffer_refctr_lock);
+static unsigned int dtl_buffer_global_refctr, dtl_buffer_percpu_refctr;
+
+bool register_dtl_buffer_access(bool global)
+{
+ int rc = 0;
+
+ spin_lock(&dtl_buffer_refctr_lock);
+
+ if ((global && (dtl_buffer_global_refctr || dtl_buffer_percpu_refctr))
+ || (!global && dtl_buffer_global_refctr)) {
+ rc = -1;
+ } else {
+ if (global)
+ dtl_buffer_global_refctr++;
+ else
+ dtl_buffer_percpu_refctr++;
+ }
+
+ spin_unlock(&dtl_buffer_refctr_lock);
+
+ return rc;
+}
+
+void unregister_dtl_buffer_access(bool global)
+{
+ spin_lock(&dtl_buffer_refctr_lock);
+
+ if (global)
+ dtl_buffer_global_refctr--;
+ else
+ dtl_buffer_percpu_refctr--;
+
+ spin_unlock(&dtl_buffer_refctr_lock);
+}
+#endif /* CONFIG_PPC_SPLPAR */
+
void vpa_init(int cpu)
{
int hwcpu = get_hard_smp_processor_id(cpu);
--
2.21.0
^ permalink raw reply related
* [PATCH v2 3/9] powerpc/pseries: Factor out DTL buffer allocation and registration routines
From: Naveen N. Rao @ 2019-06-14 15:44 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Aneesh Kumar K.V, Mingming Cao, linuxppc-dev
In-Reply-To: <cover.1560526066.git.naveen.n.rao@linux.vnet.ibm.com>
Introduce new helpers for DTL buffer allocation and registration and
have the existing code use those.
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/plpar_wrappers.h | 2 +
arch/powerpc/platforms/pseries/lpar.c | 66 ++++++++++++++++-------
arch/powerpc/platforms/pseries/setup.c | 34 +-----------
3 files changed, 52 insertions(+), 50 deletions(-)
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index cff5a411e595..d08feb1bc2bd 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -88,6 +88,8 @@ static inline long register_dtl(unsigned long cpu, unsigned long vpa)
return vpa_call(H_VPA_REG_DTL, cpu, vpa);
}
+extern void register_dtl_buffer(int cpu);
+extern void alloc_dtl_buffers(void);
extern void vpa_init(int cpu);
static inline long plpar_pte_enter(unsigned long flags,
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 96c4677cf8ab..8c5377fe9985 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -52,13 +52,58 @@ EXPORT_SYMBOL(plpar_hcall);
EXPORT_SYMBOL(plpar_hcall9);
EXPORT_SYMBOL(plpar_hcall_norets);
+void alloc_dtl_buffers(void)
+{
+ int cpu;
+ struct paca_struct *pp;
+ struct dtl_entry *dtl;
+
+ for_each_possible_cpu(cpu) {
+ pp = paca_ptrs[cpu];
+ dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
+ if (!dtl) {
+ pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
+ cpu);
+ pr_warn("Stolen time statistics will be unreliable\n");
+ break;
+ }
+
+ pp->dtl_ridx = 0;
+ pp->dispatch_log = dtl;
+ pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
+ pp->dtl_curr = dtl;
+ }
+}
+
+void register_dtl_buffer(int cpu)
+{
+ long ret;
+ struct paca_struct *pp;
+ struct dtl_entry *dtl;
+ int hwcpu = get_hard_smp_processor_id(cpu);
+
+ pp = paca_ptrs[cpu];
+ dtl = pp->dispatch_log;
+ if (dtl) {
+ pp->dtl_ridx = 0;
+ pp->dtl_curr = dtl;
+ lppaca_of(cpu).dtl_idx = 0;
+
+ /* hypervisor reads buffer length from this field */
+ dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
+ ret = register_dtl(hwcpu, __pa(dtl));
+ if (ret)
+ pr_err("WARNING: DTL registration of cpu %d (hw %d) "
+ "failed with %ld\n", cpu, hwcpu, ret);
+ lppaca_of(cpu).dtl_enable_mask = DTL_LOG_PREEMPT;
+ }
+}
+
void vpa_init(int cpu)
{
int hwcpu = get_hard_smp_processor_id(cpu);
unsigned long addr;
long ret;
- struct paca_struct *pp;
- struct dtl_entry *dtl;
/*
* The spec says it "may be problematic" if CPU x registers the VPA of
@@ -99,22 +144,7 @@ void vpa_init(int cpu)
/*
* Register dispatch trace log, if one has been allocated.
*/
- pp = paca_ptrs[cpu];
- dtl = pp->dispatch_log;
- if (dtl) {
- pp->dtl_ridx = 0;
- pp->dtl_curr = dtl;
- lppaca_of(cpu).dtl_idx = 0;
-
- /* hypervisor reads buffer length from this field */
- dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
- ret = register_dtl(hwcpu, __pa(dtl));
- if (ret)
- pr_err("WARNING: DTL registration of cpu %d (hw %d) "
- "failed with %ld\n", smp_processor_id(),
- hwcpu, ret);
- lppaca_of(cpu).dtl_enable_mask = DTL_LOG_PREEMPT;
- }
+ register_dtl_buffer(cpu);
}
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index a643e45d0544..e4d75b958593 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -273,46 +273,16 @@ struct kmem_cache *dtl_cache;
*/
static int alloc_dispatch_logs(void)
{
- int cpu, ret;
- struct paca_struct *pp;
- struct dtl_entry *dtl;
-
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return 0;
if (!dtl_cache)
return 0;
- for_each_possible_cpu(cpu) {
- pp = paca_ptrs[cpu];
- dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
- if (!dtl) {
- pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
- cpu);
- pr_warn("Stolen time statistics will be unreliable\n");
- break;
- }
-
- pp->dtl_ridx = 0;
- pp->dispatch_log = dtl;
- pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
- pp->dtl_curr = dtl;
- }
+ alloc_dtl_buffers();
/* Register the DTL for the current (boot) cpu */
- dtl = get_paca()->dispatch_log;
- get_paca()->dtl_ridx = 0;
- get_paca()->dtl_curr = dtl;
- get_paca()->lppaca_ptr->dtl_idx = 0;
-
- /* hypervisor reads buffer length from this field */
- dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
- ret = register_dtl(hard_smp_processor_id(), __pa(dtl));
- if (ret)
- pr_err("WARNING: DTL registration of cpu %d (hw %d) failed "
- "with %d\n", smp_processor_id(),
- hard_smp_processor_id(), ret);
- get_paca()->lppaca_ptr->dtl_enable_mask = DTL_LOG_PREEMPT;
+ register_dtl_buffer(smp_processor_id());
return 0;
}
--
2.21.0
^ permalink raw reply related
* [PATCH v2 5/9] powerpc/pseries: Generalize hcall_vphn()
From: Naveen N. Rao @ 2019-06-14 15:44 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Aneesh Kumar K.V, Mingming Cao, linuxppc-dev
In-Reply-To: <cover.1560526066.git.naveen.n.rao@linux.vnet.ibm.com>
H_HOME_NODE_ASSOCIATIVITY hcall can take two different flags and return
different associativity information in each case. Generalize the
existing hcall_vphn() function to take flags as an argument and to
return the result. Update the only existing user to pass the proper
arguments.
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
arch/powerpc/mm/book3s64/vphn.h | 8 ++++++++
arch/powerpc/mm/numa.c | 27 +++++++++++++--------------
2 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/arch/powerpc/mm/book3s64/vphn.h b/arch/powerpc/mm/book3s64/vphn.h
index f0b93c2dd578..f7ff1e0c3801 100644
--- a/arch/powerpc/mm/book3s64/vphn.h
+++ b/arch/powerpc/mm/book3s64/vphn.h
@@ -11,6 +11,14 @@
*/
#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1)
+/*
+ * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags:
+ * 1 for retrieving associativity information for a guest cpu
+ * 2 for retrieving associativity information for a host/hypervisor cpu
+ */
+#define VPHN_FLAG_VCPU 1
+#define VPHN_FLAG_PCPU 2
+
extern int vphn_unpack_associativity(const long *packed, __be32 *unpacked);
#endif
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 917904d2fe97..42ea79514420 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1083,6 +1083,17 @@ static void reset_topology_timer(void);
static int topology_timer_secs = 1;
static int topology_inited;
+static long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity)
+{
+ long rc;
+ long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+
+ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, cpu);
+ vphn_unpack_associativity(retbuf, associativity);
+
+ return rc;
+}
+
/*
* Change polling interval for associativity changes.
*/
@@ -1161,25 +1172,13 @@ static int update_cpu_associativity_changes_mask(void)
* Retrieve the new associativity information for a virtual processor's
* home node.
*/
-static long hcall_vphn(unsigned long cpu, __be32 *associativity)
-{
- long rc;
- long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
- u64 flags = 1;
- int hwcpu = get_hard_smp_processor_id(cpu);
-
- rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
- vphn_unpack_associativity(retbuf, associativity);
-
- return rc;
-}
-
static long vphn_get_associativity(unsigned long cpu,
__be32 *associativity)
{
long rc;
- rc = hcall_vphn(cpu, associativity);
+ rc = hcall_vphn(get_hard_smp_processor_id(cpu),
+ VPHN_FLAG_VCPU, associativity);
switch (rc) {
case H_FUNCTION:
--
2.21.0
^ permalink raw reply related
* [PATCH v2 6/9] powerpc/pseries: Move mm/book3s64/vphn.c under platforms/pseries/
From: Naveen N. Rao @ 2019-06-14 15:44 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Aneesh Kumar K.V, Mingming Cao, linuxppc-dev
In-Reply-To: <cover.1560526066.git.naveen.n.rao@linux.vnet.ibm.com>
hcall_vphn() is specific to pseries and will be used in a subsequent
patch. So, move it to a more appropriate place under
arch/powerpc/platforms/pseries. Also merge vphn.h into plpar_wrappers.h
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/plpar_wrappers.h | 19 +++++++++++++++
arch/powerpc/mm/book3s64/Makefile | 1 -
arch/powerpc/mm/book3s64/vphn.h | 24 -------------------
arch/powerpc/mm/numa.c | 13 +---------
arch/powerpc/platforms/pseries/Makefile | 1 +
.../{mm/book3s64 => platforms/pseries}/vphn.c | 16 +++++++++++--
6 files changed, 35 insertions(+), 39 deletions(-)
delete mode 100644 arch/powerpc/mm/book3s64/vphn.h
rename arch/powerpc/{mm/book3s64 => platforms/pseries}/vphn.c (83%)
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index ab7dd454b6eb..5f72f1fa4f81 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -11,6 +11,25 @@
#include <asm/paca.h>
#include <asm/page.h>
+/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */
+#define VPHN_REGISTER_COUNT 6
+
+/*
+ * 6 64-bit registers unpacked into up to 24 be32 associativity values. To
+ * form the complete property we have to add the length in the first cell.
+ */
+#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1)
+
+/*
+ * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags:
+ * 1 for retrieving associativity information for a guest cpu
+ * 2 for retrieving associativity information for a host/hypervisor cpu
+ */
+#define VPHN_FLAG_VCPU 1
+#define VPHN_FLAG_PCPU 2
+
+long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
+
static inline long poll_pending(void)
{
return plpar_hcall_norets(H_POLL_PENDING);
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
index 974b4fc19f4f..fd393b8be14f 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -10,7 +10,6 @@ obj-$(CONFIG_PPC_NATIVE) += hash_native.o
obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o
-obj-$(CONFIG_PPC_SPLPAR) += vphn.o
obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o
ifdef CONFIG_HUGETLB_PAGE
obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o
diff --git a/arch/powerpc/mm/book3s64/vphn.h b/arch/powerpc/mm/book3s64/vphn.h
deleted file mode 100644
index f7ff1e0c3801..000000000000
--- a/arch/powerpc/mm/book3s64/vphn.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ARCH_POWERPC_MM_VPHN_H_
-#define _ARCH_POWERPC_MM_VPHN_H_
-
-/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */
-#define VPHN_REGISTER_COUNT 6
-
-/*
- * 6 64-bit registers unpacked into up to 24 be32 associativity values. To
- * form the complete property we have to add the length in the first cell.
- */
-#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1)
-
-/*
- * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags:
- * 1 for retrieving associativity information for a guest cpu
- * 2 for retrieving associativity information for a host/hypervisor cpu
- */
-#define VPHN_FLAG_VCPU 1
-#define VPHN_FLAG_PCPU 2
-
-extern int vphn_unpack_associativity(const long *packed, __be32 *unpacked);
-
-#endif
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 42ea79514420..5027c9cfe56a 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1064,7 +1064,7 @@ u64 memory_hotplug_max(void)
/* Virtual Processor Home Node (VPHN) support */
#ifdef CONFIG_PPC_SPLPAR
-#include "book3s64/vphn.h"
+#include <asm/plpar_wrappers.h>
struct topology_update_data {
struct topology_update_data *next;
@@ -1083,17 +1083,6 @@ static void reset_topology_timer(void);
static int topology_timer_secs = 1;
static int topology_inited;
-static long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity)
-{
- long rc;
- long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
-
- rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, cpu);
- vphn_unpack_associativity(retbuf, associativity);
-
- return rc;
-}
-
/*
* Change polling interval for associativity changes.
*/
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index a43ec843c8e2..ab3d59aeacca 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_LPARCFG) += lparcfg.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_IBMEBUS) += ibmebus.o
obj-$(CONFIG_PAPR_SCM) += papr_scm.o
+obj-$(CONFIG_PPC_SPLPAR) += vphn.o
ifdef CONFIG_PPC_PSERIES
obj-$(CONFIG_SUSPEND) += suspend.o
diff --git a/arch/powerpc/mm/book3s64/vphn.c b/arch/powerpc/platforms/pseries/vphn.c
similarity index 83%
rename from arch/powerpc/mm/book3s64/vphn.c
rename to arch/powerpc/platforms/pseries/vphn.c
index 0ee7734afb50..6bbed333e4f2 100644
--- a/arch/powerpc/mm/book3s64/vphn.c
+++ b/arch/powerpc/platforms/pseries/vphn.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
+#include <asm/plpar_wrappers.h>
#include <asm/byteorder.h>
-#include "vphn.h"
+#include <asm/hvcall.h>
/*
* The associativity domain numbers are returned from the hypervisor as a
@@ -22,7 +23,7 @@
*
* Convert to the sequence they would appear in the ibm,associativity property.
*/
-int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
+static int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
{
__be64 be_packed[VPHN_REGISTER_COUNT];
int i, nr_assoc_doms = 0;
@@ -71,3 +72,14 @@ int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
return nr_assoc_doms;
}
+
+long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity)
+{
+ long rc;
+ long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+
+ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, cpu);
+ vphn_unpack_associativity(retbuf, associativity);
+
+ return rc;
+}
--
2.21.0
^ permalink raw reply related
* [PATCH v2 7/9] powerpc/pseries: Provide vcpu dispatch statistics
From: Naveen N. Rao @ 2019-06-14 15:44 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Aneesh Kumar K.V, Mingming Cao, linuxppc-dev
In-Reply-To: <cover.1560526066.git.naveen.n.rao@linux.vnet.ibm.com>
For Shared Processor LPARs, the POWER Hypervisor maintains a relatively
static mapping of the LPAR processors (vcpus) to physical processor
chips (representing the "home" node) and tries to always dispatch vcpus
on their associated physical processor chip. However, under certain
scenarios, vcpus may be dispatched on a different processor chip (away
from its home node). The actual physical processor number on which a
certain vcpu is dispatched is available to the guest in the
'processor_id' field of each DTL entry.
The guest can discover the home node of each vcpu through the
H_HOME_NODE_ASSOCIATIVITY(flags=1) hcall. The guest can also discover
the associativity of physical processors, as represented in the DTL
entry, through the H_HOME_NODE_ASSOCIATIVITY(flags=2) hcall.
These can then be compared to determine if the vcpu was dispatched on
its home node or not. If the vcpu was not dispatched on the home node,
it is possible to determine if the vcpu was dispatched in a different
chip, socket or drawer.
Introduce a procfs file /proc/powerpc/vcpudispatch_stats that can be
used to obtain these statistics. Writing '1' to this file enables
collecting the statistics, while writing '0' disables the statistics.
The statistics themselves are available by reading the procfs file. By
default, the DTLB log for each vcpu is processed 50 times a second so as
not to miss any entries. This processing frequency can be changed
through /proc/powerpc/vcpudispatch_stats_freq.
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/topology.h | 6 +
arch/powerpc/mm/numa.c | 16 +
arch/powerpc/platforms/pseries/lpar.c | 536 +++++++++++++++++++++++++-
3 files changed, 556 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index f85e2b01c3df..2f7e1ea5089e 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -35,6 +35,7 @@ static inline int pcibus_to_node(struct pci_bus *bus)
cpu_all_mask : \
cpumask_of_node(pcibus_to_node(bus)))
+extern int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc);
extern int __node_distance(int, int);
#define node_distance(a, b) __node_distance(a, b)
@@ -84,6 +85,11 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
+static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+{
+ return 0;
+}
+
#endif /* CONFIG_NUMA */
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 5027c9cfe56a..3afa884feead 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -163,6 +163,22 @@ static void unmap_cpu_from_node(unsigned long cpu)
}
#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
+int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+{
+ int dist = 0;
+
+ int i, index;
+
+ for (i = 0; i < distance_ref_points_depth; i++) {
+ index = be32_to_cpu(distance_ref_points[i]);
+ if (cpu1_assoc[index] == cpu2_assoc[index])
+ break;
+ dist++;
+ }
+
+ return dist;
+}
+
/* must hold reference to node during call */
static const __be32 *of_get_associativity(struct device_node *dev)
{
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 53e005c84078..200a96c8626a 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -17,6 +17,10 @@
#include <linux/jump_label.h>
#include <linux/delay.h>
#include <linux/stop_machine.h>
+#include <linux/spinlock.h>
+#include <linux/cpuhotplug.h>
+#include <linux/workqueue.h>
+#include <linux/proc_fs.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/page.h>
@@ -52,6 +56,12 @@ EXPORT_SYMBOL(plpar_hcall);
EXPORT_SYMBOL(plpar_hcall9);
EXPORT_SYMBOL(plpar_hcall_norets);
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+static u8 dtl_mask = DTL_LOG_PREEMPT;
+#else
+static u8 dtl_mask;
+#endif
+
void alloc_dtl_buffers(void)
{
int cpu;
@@ -60,11 +70,15 @@ void alloc_dtl_buffers(void)
for_each_possible_cpu(cpu) {
pp = paca_ptrs[cpu];
+ if (pp->dispatch_log)
+ continue;
dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
if (!dtl) {
pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
cpu);
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
pr_warn("Stolen time statistics will be unreliable\n");
+#endif
break;
}
@@ -84,7 +98,7 @@ void register_dtl_buffer(int cpu)
pp = paca_ptrs[cpu];
dtl = pp->dispatch_log;
- if (dtl) {
+ if (dtl && dtl_mask) {
pp->dtl_ridx = 0;
pp->dtl_curr = dtl;
lppaca_of(cpu).dtl_idx = 0;
@@ -95,13 +109,72 @@ void register_dtl_buffer(int cpu)
if (ret)
pr_err("WARNING: DTL registration of cpu %d (hw %d) "
"failed with %ld\n", cpu, hwcpu, ret);
- lppaca_of(cpu).dtl_enable_mask = DTL_LOG_PREEMPT;
+ lppaca_of(cpu).dtl_enable_mask = dtl_mask;
}
}
+
#ifdef CONFIG_PPC_SPLPAR
+struct dtl_worker {
+ struct delayed_work work;
+ int cpu;
+};
+
+struct vcpu_dispatch_data {
+ int last_disp_cpu;
+
+ int total_disp;
+
+ int same_cpu_disp;
+ int same_chip_disp;
+ int diff_chip_disp;
+ int far_chip_disp;
+
+ int numa_home_disp;
+ int numa_remote_disp;
+ int numa_far_disp;
+};
+
+/*
+ * This represents the number of cpus in the hypervisor. Since there is no
+ * architected way to discover the number of processors in the host, we
+ * provision for dealing with NR_CPUS. This is currently 2048 by default, and
+ * is sufficient for our purposes. This will need to be tweaked if
+ * CONFIG_NR_CPUS is changed.
+ */
+#define NR_CPUS_H NR_CPUS
+
+static DEFINE_PER_CPU(struct vcpu_dispatch_data, vcpu_disp_data);
+static DEFINE_PER_CPU(u64, dtl_entry_ridx);
+static DEFINE_PER_CPU(struct dtl_worker, dtl_workers);
+static enum cpuhp_state dtl_worker_state;
+static DEFINE_MUTEX(dtl_worker_mutex);
+static unsigned int dtl_worker_refctr;
static DEFINE_SPINLOCK(dtl_buffer_refctr_lock);
static unsigned int dtl_buffer_global_refctr, dtl_buffer_percpu_refctr;
+static int vcpudispatch_stats_on __read_mostly;
+static int vcpudispatch_stats_freq = 50;
+static __be32 *vcpu_associativity, *pcpu_associativity;
+
+
+static void free_dtl_buffers(void)
+{
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ int cpu;
+ struct paca_struct *pp;
+
+ for_each_possible_cpu(cpu) {
+ pp = paca_ptrs[cpu];
+ if (!pp->dispatch_log)
+ continue;
+ kmem_cache_free(dtl_cache, pp->dispatch_log);
+ pp->dtl_ridx = 0;
+ pp->dispatch_log = 0;
+ pp->dispatch_log_end = 0;
+ pp->dtl_curr = 0;
+ }
+#endif
+}
bool register_dtl_buffer_access(bool global)
{
@@ -135,6 +208,465 @@ void unregister_dtl_buffer_access(bool global)
spin_unlock(&dtl_buffer_refctr_lock);
}
+
+static int init_cpu_associativity(void)
+{
+ vcpu_associativity = kcalloc(num_possible_cpus() / threads_per_core,
+ VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL);
+ pcpu_associativity = kcalloc(NR_CPUS_H / threads_per_core,
+ VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL);
+
+ if (!vcpu_associativity || !pcpu_associativity) {
+ pr_err("error allocating memory for associativity information\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void destroy_cpu_associativity(void)
+{
+ kfree(vcpu_associativity);
+ kfree(pcpu_associativity);
+ vcpu_associativity = pcpu_associativity = 0;
+}
+
+static __be32 *__get_cpu_associativity(int cpu, __be32 *cpu_assoc, int flag)
+{
+ __be32 *assoc;
+ int rc = 0;
+
+ assoc = &cpu_assoc[(int)(cpu / threads_per_core) * VPHN_ASSOC_BUFSIZE];
+ if (!assoc[0]) {
+ rc = hcall_vphn(cpu, flag, &assoc[0]);
+ if (rc)
+ return NULL;
+ }
+
+ return assoc;
+}
+
+static __be32 *get_pcpu_associativity(int cpu)
+{
+ return __get_cpu_associativity(cpu, pcpu_associativity, VPHN_FLAG_PCPU);
+}
+
+static __be32 *get_vcpu_associativity(int cpu)
+{
+ return __get_cpu_associativity(cpu, vcpu_associativity, VPHN_FLAG_VCPU);
+}
+
+static int cpu_relative_dispatch_distance(int last_disp_cpu, int cur_disp_cpu)
+{
+ __be32 *last_disp_cpu_assoc, *cur_disp_cpu_assoc;
+
+ if (last_disp_cpu >= NR_CPUS_H || cur_disp_cpu >= NR_CPUS_H)
+ return -EINVAL;
+
+ last_disp_cpu_assoc = get_pcpu_associativity(last_disp_cpu);
+ cur_disp_cpu_assoc = get_pcpu_associativity(cur_disp_cpu);
+
+ if (!last_disp_cpu_assoc || !cur_disp_cpu_assoc)
+ return -EIO;
+
+ return cpu_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
+}
+
+static int cpu_home_node_dispatch_distance(int disp_cpu)
+{
+ __be32 *disp_cpu_assoc, *vcpu_assoc;
+ int vcpu_id = smp_processor_id();
+
+ if (disp_cpu >= NR_CPUS_H) {
+ pr_debug_ratelimited("vcpu dispatch cpu %d > %d\n",
+ disp_cpu, NR_CPUS_H);
+ return -EINVAL;
+ }
+
+ disp_cpu_assoc = get_pcpu_associativity(disp_cpu);
+ vcpu_assoc = get_vcpu_associativity(vcpu_id);
+
+ if (!disp_cpu_assoc || !vcpu_assoc)
+ return -EIO;
+
+ return cpu_distance(disp_cpu_assoc, vcpu_assoc);
+}
+
+static void update_vcpu_disp_stat(int disp_cpu)
+{
+ struct vcpu_dispatch_data *disp;
+ int distance;
+
+ disp = this_cpu_ptr(&vcpu_disp_data);
+ if (disp->last_disp_cpu == -1) {
+ disp->last_disp_cpu = disp_cpu;
+ return;
+ }
+
+ disp->total_disp++;
+
+ if (disp->last_disp_cpu == disp_cpu ||
+ (cpu_first_thread_sibling(disp->last_disp_cpu) ==
+ cpu_first_thread_sibling(disp_cpu)))
+ disp->same_cpu_disp++;
+ else {
+ distance = cpu_relative_dispatch_distance(disp->last_disp_cpu,
+ disp_cpu);
+ if (distance < 0)
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n",
+ smp_processor_id());
+ else {
+ switch (distance) {
+ case 0:
+ disp->same_chip_disp++;
+ break;
+ case 1:
+ disp->diff_chip_disp++;
+ break;
+ case 2:
+ disp->far_chip_disp++;
+ break;
+ default:
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d (%d -> %d): unexpected relative dispatch distance %d\n",
+ smp_processor_id(),
+ disp->last_disp_cpu,
+ disp_cpu,
+ distance);
+ }
+ }
+ }
+
+ distance = cpu_home_node_dispatch_distance(disp_cpu);
+ if (distance < 0)
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n",
+ smp_processor_id());
+ else {
+ switch (distance) {
+ case 0:
+ disp->numa_home_disp++;
+ break;
+ case 1:
+ disp->numa_remote_disp++;
+ break;
+ case 2:
+ disp->numa_far_disp++;
+ break;
+ default:
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d on %d: unexpected numa dispatch distance %d\n",
+ smp_processor_id(),
+ disp_cpu,
+ distance);
+ }
+ }
+
+ disp->last_disp_cpu = disp_cpu;
+}
+
+static void process_dtl_buffer(struct work_struct *work)
+{
+ struct dtl_entry dtle;
+ u64 i = __this_cpu_read(dtl_entry_ridx);
+ struct dtl_entry *dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
+ struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
+ struct lppaca *vpa = local_paca->lppaca_ptr;
+ struct dtl_worker *d = container_of(work, struct dtl_worker, work.work);
+
+ if (!local_paca->dispatch_log)
+ return;
+
+ /* if we have been migrated away, we cancel ourself */
+ if (d->cpu != smp_processor_id()) {
+ pr_debug("vcpudispatch_stats: cpu %d worker migrated -- canceling worker\n",
+ smp_processor_id());
+ return;
+ }
+
+ if (i == be64_to_cpu(vpa->dtl_idx))
+ goto out;
+
+ while (i < be64_to_cpu(vpa->dtl_idx)) {
+ dtle = *dtl;
+ barrier();
+ if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
+ /* buffer has overflowed */
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d lost %lld DTL samples\n",
+ d->cpu,
+ be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG - i);
+ i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
+ dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
+ continue;
+ }
+ update_vcpu_disp_stat(be16_to_cpu(dtle.processor_id));
+ ++i;
+ ++dtl;
+ if (dtl == dtl_end)
+ dtl = local_paca->dispatch_log;
+ }
+
+ __this_cpu_write(dtl_entry_ridx, i);
+
+out:
+ schedule_delayed_work_on(d->cpu, to_delayed_work(work),
+ HZ / vcpudispatch_stats_freq);
+}
+
+static int dtl_worker_online(unsigned int cpu)
+{
+ struct dtl_worker *d = &per_cpu(dtl_workers, cpu);
+
+ memset(d, 0, sizeof(*d));
+ INIT_DELAYED_WORK(&d->work, process_dtl_buffer);
+ d->cpu = cpu;
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ per_cpu(dtl_entry_ridx, cpu) = 0;
+ register_dtl_buffer(cpu);
+#else
+ per_cpu(dtl_entry_ridx, cpu) = be64_to_cpu(lppaca_of(cpu).dtl_idx);
+#endif
+
+ schedule_delayed_work_on(cpu, &d->work, HZ / vcpudispatch_stats_freq);
+ return 0;
+}
+
+static int dtl_worker_offline(unsigned int cpu)
+{
+ struct dtl_worker *d = &per_cpu(dtl_workers, cpu);
+
+ cancel_delayed_work_sync(&d->work);
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ unregister_dtl(get_hard_smp_processor_id(cpu));
+#endif
+
+ return 0;
+}
+
+static void set_global_dtl_mask(u8 mask)
+{
+ int cpu;
+
+ dtl_mask = mask;
+ for_each_present_cpu(cpu)
+ lppaca_of(cpu).dtl_enable_mask = dtl_mask;
+}
+
+static void reset_global_dtl_mask(void)
+{
+ int cpu;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ dtl_mask = DTL_LOG_PREEMPT;
+#else
+ dtl_mask = 0;
+#endif
+ for_each_present_cpu(cpu)
+ lppaca_of(cpu).dtl_enable_mask = dtl_mask;
+}
+
+static int dtl_worker_enable(void)
+{
+ int rc = 0, state;
+
+ mutex_lock(&dtl_worker_mutex);
+
+ if (dtl_worker_refctr) {
+ dtl_worker_refctr++;
+ goto out;
+ }
+
+ if (register_dtl_buffer_access(1)) {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ set_global_dtl_mask(DTL_LOG_ALL);
+
+ /* Setup dtl buffers and register those */
+ alloc_dtl_buffers();
+
+ state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/dtl:online",
+ dtl_worker_online, dtl_worker_offline);
+ if (state < 0) {
+ pr_err("vcpudispatch_stats: unable to setup workqueue for DTL processing\n");
+ free_dtl_buffers();
+ reset_global_dtl_mask();
+ unregister_dtl_buffer_access(1);
+ rc = -EINVAL;
+ goto out;
+ }
+ dtl_worker_state = state;
+ dtl_worker_refctr++;
+
+out:
+ mutex_unlock(&dtl_worker_mutex);
+ return rc;
+}
+
+static void dtl_worker_disable(void)
+{
+ mutex_lock(&dtl_worker_mutex);
+ dtl_worker_refctr--;
+ if (!dtl_worker_refctr) {
+ cpuhp_remove_state(dtl_worker_state);
+ dtl_worker_state = 0;
+ free_dtl_buffers();
+ reset_global_dtl_mask();
+ unregister_dtl_buffer_access(1);
+ }
+ mutex_unlock(&dtl_worker_mutex);
+}
+
+static ssize_t vcpudispatch_stats_write(struct file *file, const char __user *p,
+ size_t count, loff_t *ppos)
+{
+ struct vcpu_dispatch_data *disp;
+ int rc, cmd, cpu;
+ char buf[16];
+
+ if (count > 15)
+ return -EINVAL;
+
+ if (copy_from_user(buf, p, count))
+ return -EFAULT;
+
+ buf[count] = 0;
+ rc = kstrtoint(buf, 0, &cmd);
+ if (rc || cmd < 0 || cmd > 1) {
+ pr_err("vcpudispatch_stats: please use 0 to disable or 1 to enable dispatch statistics\n");
+ return rc ? rc : -EINVAL;
+ }
+
+ if ((cmd == 0 && !vcpudispatch_stats_on) ||
+ (cmd == 1 && vcpudispatch_stats_on))
+ return count;
+
+ if (cmd) {
+ rc = init_cpu_associativity();
+ if (rc)
+ return rc;
+
+ for_each_possible_cpu(cpu) {
+ disp = per_cpu_ptr(&vcpu_disp_data, cpu);
+ memset(disp, 0, sizeof(*disp));
+ disp->last_disp_cpu = -1;
+ }
+
+ rc = dtl_worker_enable();
+ if (rc) {
+ destroy_cpu_associativity();
+ return rc;
+ }
+ } else {
+ dtl_worker_disable();
+ destroy_cpu_associativity();
+ }
+
+ vcpudispatch_stats_on = cmd;
+
+ return count;
+}
+
+static int vcpudispatch_stats_display(struct seq_file *p, void *v)
+{
+ int cpu;
+ struct vcpu_dispatch_data *disp;
+
+ if (!vcpudispatch_stats_on) {
+ seq_puts(p, "off\n");
+ return 0;
+ }
+
+ for_each_online_cpu(cpu) {
+ disp = per_cpu_ptr(&vcpu_disp_data, cpu);
+ seq_printf(p, "cpu%d", cpu);
+ seq_put_decimal_ull(p, " ", disp->total_disp);
+ seq_put_decimal_ull(p, " ", disp->same_cpu_disp);
+ seq_put_decimal_ull(p, " ", disp->same_chip_disp);
+ seq_put_decimal_ull(p, " ", disp->diff_chip_disp);
+ seq_put_decimal_ull(p, " ", disp->far_chip_disp);
+ seq_put_decimal_ull(p, " ", disp->numa_home_disp);
+ seq_put_decimal_ull(p, " ", disp->numa_remote_disp);
+ seq_put_decimal_ull(p, " ", disp->numa_far_disp);
+ seq_puts(p, "\n");
+ }
+
+ return 0;
+}
+
+static int vcpudispatch_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, vcpudispatch_stats_display, NULL);
+}
+
+static const struct file_operations vcpudispatch_stats_proc_ops = {
+ .open = vcpudispatch_stats_open,
+ .read = seq_read,
+ .write = vcpudispatch_stats_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static ssize_t vcpudispatch_stats_freq_write(struct file *file,
+ const char __user *p, size_t count, loff_t *ppos)
+{
+ int rc, freq;
+ char buf[16];
+
+ if (count > 15)
+ return -EINVAL;
+
+ if (copy_from_user(buf, p, count))
+ return -EFAULT;
+
+ buf[count] = 0;
+ rc = kstrtoint(buf, 0, &freq);
+ if (rc || freq < 1 || freq > HZ) {
+ pr_err("vcpudispatch_stats_freq: please specify a frequency between 1 and %d\n",
+ HZ);
+ return rc ? rc : -EINVAL;
+ }
+
+ vcpudispatch_stats_freq = freq;
+
+ return count;
+}
+
+static int vcpudispatch_stats_freq_display(struct seq_file *p, void *v)
+{
+ seq_printf(p, "%d\n", vcpudispatch_stats_freq);
+ return 0;
+}
+
+static int vcpudispatch_stats_freq_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, vcpudispatch_stats_freq_display, NULL);
+}
+
+static const struct file_operations vcpudispatch_stats_freq_proc_ops = {
+ .open = vcpudispatch_stats_freq_open,
+ .read = seq_read,
+ .write = vcpudispatch_stats_freq_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init vcpudispatch_stats_procfs_init(void)
+{
+ if (!lppaca_shared_proc(get_lppaca()))
+ return 0;
+
+ if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL,
+ &vcpudispatch_stats_proc_ops))
+ pr_err("vcpudispatch_stats: error creating procfs file\n");
+ else if (!proc_create("powerpc/vcpudispatch_stats_freq", 0600, NULL,
+ &vcpudispatch_stats_freq_proc_ops))
+ pr_err("vcpudispatch_stats_freq: error creating procfs file\n");
+
+ return 0;
+}
+
+machine_device_initcall(pseries, vcpudispatch_stats_procfs_init);
#endif /* CONFIG_PPC_SPLPAR */
void vpa_init(int cpu)
--
2.21.0
^ permalink raw reply related
* [PATCH v2 9/9] powerpc/pseries: Add documentation for vcpudispatch_stats
From: Naveen N. Rao @ 2019-06-14 15:44 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Aneesh Kumar K.V, Mingming Cao, linuxppc-dev
In-Reply-To: <cover.1560526066.git.naveen.n.rao@linux.vnet.ibm.com>
Add a document describing the fields provided by
/proc/powerpc/vcpudispatch_stats.
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
Documentation/powerpc/vcpudispatch_stats.txt | 68 ++++++++++++++++++++
1 file changed, 68 insertions(+)
create mode 100644 Documentation/powerpc/vcpudispatch_stats.txt
diff --git a/Documentation/powerpc/vcpudispatch_stats.txt b/Documentation/powerpc/vcpudispatch_stats.txt
new file mode 100644
index 000000000000..e21476bfd78c
--- /dev/null
+++ b/Documentation/powerpc/vcpudispatch_stats.txt
@@ -0,0 +1,68 @@
+VCPU Dispatch Statistics:
+=========================
+
+For Shared Processor LPARs, the POWER Hypervisor maintains a relatively
+static mapping of the LPAR processors (vcpus) to physical processor
+chips (representing the "home" node) and tries to always dispatch vcpus
+on their associated physical processor chip. However, under certain
+scenarios, vcpus may be dispatched on a different processor chip (away
+from its home node).
+
+/proc/powerpc/vcpudispatch_stats can be used to obtain statistics
+related to the vcpu dispatch behavior. Writing '1' to this file enables
+collecting the statistics, while writing '0' disables the statistics.
+By default, the DTLB log for each vcpu is processed 50 times a second so
+as not to miss any entries. This processing frequency can be changed
+through /proc/powerpc/vcpudispatch_stats_freq.
+
+The statistics themselves are available by reading the procfs file
+/proc/powerpc/vcpudispatch_stats. Each line in the output corresponds to
+a vcpu as represented by the first field, followed by 8 numbers.
+
+The first number corresponds to:
+1. total vcpu dispatches since the beginning of statistics collection
+
+The next 4 numbers represent vcpu dispatch dispersions:
+2. number of times this vcpu was dispatched on the same processor as last
+ time
+3. number of times this vcpu was dispatched on a different processor core
+ as last time, but within the same chip
+4. number of times this vcpu was dispatched on a different chip
+5. number of times this vcpu was dispatches on a different socket/drawer
+(next numa boundary)
+
+The final 3 numbers represent statistics in relation to the home node of
+the vcpu:
+6. number of times this vcpu was dispatched in its home node (chip)
+7. number of times this vcpu was dispatched in a different node
+8. number of times this vcpu was dispatched in a node further away (numa
+distance)
+
+An example output:
+ $ sudo cat /proc/powerpc/vcpudispatch_stats
+ cpu0 6839 4126 2683 30 0 6821 18 0
+ cpu1 2515 1274 1229 12 0 2509 6 0
+ cpu2 2317 1198 1109 10 0 2312 5 0
+ cpu3 2259 1165 1088 6 0 2256 3 0
+ cpu4 2205 1143 1056 6 0 2202 3 0
+ cpu5 2165 1121 1038 6 0 2162 3 0
+ cpu6 2183 1127 1050 6 0 2180 3 0
+ cpu7 2193 1133 1052 8 0 2187 6 0
+ cpu8 2165 1115 1032 18 0 2156 9 0
+ cpu9 2301 1252 1033 16 0 2293 8 0
+ cpu10 2197 1138 1041 18 0 2187 10 0
+ cpu11 2273 1185 1062 26 0 2260 13 0
+ cpu12 2186 1125 1043 18 0 2177 9 0
+ cpu13 2161 1115 1030 16 0 2153 8 0
+ cpu14 2206 1153 1033 20 0 2196 10 0
+ cpu15 2163 1115 1032 16 0 2155 8 0
+
+In the output above, for vcpu0, there have been 6839 dispatches since
+statistics were enabled. 4126 of those dispatches were on the same
+physical cpu as the last time. 2683 were on a different core, but within
+the same chip, while 30 dispatches were on a different chip compared to
+its last dispatch.
+
+Also, out of the total of 6839 dispatches, we see that there have been
+6821 dispatches on the vcpu's home node, while 18 dispatches were
+outside its home node, on a neighbouring chip.
--
2.21.0
^ permalink raw reply related
* [PATCH v2 8/9] powerpc/pseries: Protect against hogging the cpu while setting up the stats
From: Naveen N. Rao @ 2019-06-14 15:44 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Aneesh Kumar K.V, Mingming Cao, linuxppc-dev
In-Reply-To: <cover.1560526066.git.naveen.n.rao@linux.vnet.ibm.com>
When enabling or disabling the vcpu dispatch statistics, we do a lot of
work including allocating/deallocating memory across all possible cpus
for the DTL buffer. In order to guard against hogging the cpu for too
long, track the time we're taking and yield the processor if necessary.
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/plpar_wrappers.h | 2 +-
arch/powerpc/platforms/pseries/lpar.c | 29 ++++++++++++++++-------
arch/powerpc/platforms/pseries/setup.c | 2 +-
3 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 5f72f1fa4f81..5e5faabd7ba2 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -110,7 +110,7 @@ static inline long register_dtl(unsigned long cpu, unsigned long vpa)
extern bool register_dtl_buffer_access(bool global);
extern void unregister_dtl_buffer_access(bool global);
extern void register_dtl_buffer(int cpu);
-extern void alloc_dtl_buffers(void);
+extern void alloc_dtl_buffers(unsigned long *time_limit);
extern void vpa_init(int cpu);
static inline long plpar_pte_enter(unsigned long flags,
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 200a96c8626a..318b08ada086 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -62,7 +62,7 @@ static u8 dtl_mask = DTL_LOG_PREEMPT;
static u8 dtl_mask;
#endif
-void alloc_dtl_buffers(void)
+void alloc_dtl_buffers(unsigned long *time_limit)
{
int cpu;
struct paca_struct *pp;
@@ -86,6 +86,11 @@ void alloc_dtl_buffers(void)
pp->dispatch_log = dtl;
pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
pp->dtl_curr = dtl;
+
+ if (time_limit && time_after(jiffies, *time_limit)) {
+ cond_resched();
+ *time_limit = jiffies + HZ;
+ }
}
}
@@ -157,7 +162,7 @@ static int vcpudispatch_stats_freq = 50;
static __be32 *vcpu_associativity, *pcpu_associativity;
-static void free_dtl_buffers(void)
+static void free_dtl_buffers(unsigned long *time_limit)
{
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
int cpu;
@@ -172,6 +177,11 @@ static void free_dtl_buffers(void)
pp->dispatch_log = 0;
pp->dispatch_log_end = 0;
pp->dtl_curr = 0;
+
+ if (time_limit && time_after(jiffies, *time_limit)) {
+ cond_resched();
+ *time_limit = jiffies + HZ;
+ }
}
#endif
}
@@ -464,7 +474,7 @@ static void reset_global_dtl_mask(void)
lppaca_of(cpu).dtl_enable_mask = dtl_mask;
}
-static int dtl_worker_enable(void)
+static int dtl_worker_enable(unsigned long *time_limit)
{
int rc = 0, state;
@@ -483,13 +493,13 @@ static int dtl_worker_enable(void)
set_global_dtl_mask(DTL_LOG_ALL);
/* Setup dtl buffers and register those */
- alloc_dtl_buffers();
+ alloc_dtl_buffers(time_limit);
state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/dtl:online",
dtl_worker_online, dtl_worker_offline);
if (state < 0) {
pr_err("vcpudispatch_stats: unable to setup workqueue for DTL processing\n");
- free_dtl_buffers();
+ free_dtl_buffers(time_limit);
reset_global_dtl_mask();
unregister_dtl_buffer_access(1);
rc = -EINVAL;
@@ -503,14 +513,14 @@ static int dtl_worker_enable(void)
return rc;
}
-static void dtl_worker_disable(void)
+static void dtl_worker_disable(unsigned long *time_limit)
{
mutex_lock(&dtl_worker_mutex);
dtl_worker_refctr--;
if (!dtl_worker_refctr) {
cpuhp_remove_state(dtl_worker_state);
dtl_worker_state = 0;
- free_dtl_buffers();
+ free_dtl_buffers(time_limit);
reset_global_dtl_mask();
unregister_dtl_buffer_access(1);
}
@@ -520,6 +530,7 @@ static void dtl_worker_disable(void)
static ssize_t vcpudispatch_stats_write(struct file *file, const char __user *p,
size_t count, loff_t *ppos)
{
+ unsigned long time_limit = jiffies + HZ;
struct vcpu_dispatch_data *disp;
int rc, cmd, cpu;
char buf[16];
@@ -552,13 +563,13 @@ static ssize_t vcpudispatch_stats_write(struct file *file, const char __user *p,
disp->last_disp_cpu = -1;
}
- rc = dtl_worker_enable();
+ rc = dtl_worker_enable(&time_limit);
if (rc) {
destroy_cpu_associativity();
return rc;
}
} else {
- dtl_worker_disable();
+ dtl_worker_disable(&time_limit);
destroy_cpu_associativity();
}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index e4d75b958593..76fd6de90581 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -279,7 +279,7 @@ static int alloc_dispatch_logs(void)
if (!dtl_cache)
return 0;
- alloc_dtl_buffers();
+ alloc_dtl_buffers(0);
/* Register the DTL for the current (boot) cpu */
register_dtl_buffer(smp_processor_id());
--
2.21.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox