From: David Gibson <david@gibson.dropbear.id.au>
To: paulus@samba.org, mpe@ellerman.id.au, benh@kernel.crashing.org
Cc: linuxppc-dev@lists.ozlabs.org, aik@ozlabs.ru, thuth@redhat.com,
lvivier@redhat.com, David Gibson <david@gibson.dropbear.id.au>
Subject: [RFCv2 7/9] pseries: Add support for hash table resizing
Date: Fri, 29 Jan 2016 16:24:01 +1100 [thread overview]
Message-ID: <1454045043-25545-8-git-send-email-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <1454045043-25545-1-git-send-email-david@gibson.dropbear.id.au>
This adds support for using experimental hypercalls to change the size
of the main hash page table while running as a PAPR guest. For now these
hypercalls are only in experimental qemu versions.
The interface is two part: first H_RESIZE_HPT_PREPARE is used to allocate
and prepare the new hash table. This may be slow, but can be done
asynchronously. Then, H_RESIZE_HPT_COMMIT is used to switch to the new
hash table. This requires that no CPUs be concurrently updating the HPT,
and so must be run under stop_machine().
This also adds a debugfs file which can be used to manually control
HPT resizing or testing purposes.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
arch/powerpc/include/asm/machdep.h | 1 +
arch/powerpc/mm/hash_utils_64.c | 28 +++++++++
arch/powerpc/platforms/pseries/lpar.c | 110 ++++++++++++++++++++++++++++++++++
3 files changed, 139 insertions(+)
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index a7d3f66..532d795 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -61,6 +61,7 @@ struct machdep_calls {
unsigned long addr,
unsigned char *hpte_slot_array,
int psize, int ssize, int local);
+ int (*resize_hpt)(unsigned long shift);
/*
* Special for kexec.
* To be called in real mode with interrupts disabled. No locks are
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index d63f7dc..882e409 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -34,6 +34,7 @@
#include <linux/signal.h>
#include <linux/memblock.h>
#include <linux/context_tracking.h>
+#include <linux/debugfs.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
@@ -1578,3 +1579,30 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
/* Finally limit subsequent allocations */
memblock_set_current_limit(ppc64_rma_size);
}
+
+static int ppc64_pft_size_get(void *data, u64 *val)
+{
+ *val = ppc64_pft_size;
+ return 0;
+}
+
+static int ppc64_pft_size_set(void *data, u64 val)
+{
+ if (!ppc_md.resize_hpt)
+ return -ENODEV;
+ return ppc_md.resize_hpt(val);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_ppc64_pft_size,
+ ppc64_pft_size_get, ppc64_pft_size_set, "%llu\n");
+
+static int __init hash64_debugfs(void)
+{
+ if (!debugfs_create_file("pft-size", 0600, powerpc_debugfs_root,
+ NULL, &fops_ppc64_pft_size)) {
+ pr_err("lpar: unable to create ppc64_pft_size debugsfs file\n");
+ }
+
+ return 0;
+}
+machine_device_initcall(pseries, hash64_debugfs);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 92d472d..ebf02e7 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -27,6 +27,8 @@
#include <linux/console.h>
#include <linux/export.h>
#include <linux/jump_label.h>
+#include <linux/delay.h>
+#include <linux/stop_machine.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/page.h>
@@ -603,6 +605,113 @@ static int __init disable_bulk_remove(char *str)
__setup("bulk_remove=", disable_bulk_remove);
+#define HPT_RESIZE_TIMEOUT 10000 /* ms */
+
+struct hpt_resize_state {
+ unsigned long shift;
+ int commit_rc;
+};
+
+static int pseries_lpar_resize_hpt_commit(void *data)
+{
+ struct hpt_resize_state *state = data;
+
+ state->commit_rc = plpar_resize_hpt_commit(0, state->shift);
+ if (state->commit_rc != H_SUCCESS)
+ return -EIO;
+
+ /* Hypervisor has transitioned the HTAB, update our globals */
+ ppc64_pft_size = state->shift;
+ htab_size_bytes = 1UL << ppc64_pft_size;
+ htab_hash_mask = (htab_size_bytes >> 7) - 1;
+
+ return 0;
+}
+
+/* Must be called in user context */
+static int pseries_lpar_resize_hpt(unsigned long shift)
+{
+ struct hpt_resize_state state = {
+ .shift = shift,
+ .commit_rc = H_FUNCTION,
+ };
+ unsigned int delay, total_delay = 0;
+ int rc;
+ ktime_t t0, t1, t2;
+
+ might_sleep();
+
+ if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE))
+ return -ENODEV;
+
+ printk(KERN_INFO "lpar: Attempting to resize HPT to shift %lu\n",
+ shift);
+
+ t0 = ktime_get();
+
+ rc = plpar_resize_hpt_prepare(0, shift);
+ while (H_IS_LONG_BUSY(rc)) {
+ delay = get_longbusy_msecs(rc);
+ total_delay += delay;
+ if (total_delay > HPT_RESIZE_TIMEOUT) {
+ /* prepare call with shift==0 cancels an
+ * in-progress resize */
+ rc = plpar_resize_hpt_prepare(0, 0);
+ if (rc != H_SUCCESS)
+ printk(KERN_WARNING
+ "lpar: Unexpected error %d cancelling timed out HPT resize\n",
+ rc);
+ return -ETIMEDOUT;
+ }
+ msleep(delay);
+ rc = plpar_resize_hpt_prepare(0, shift);
+ };
+
+ switch (rc) {
+ case H_SUCCESS:
+ /* Continue on */
+ break;
+
+ case H_PARAMETER:
+ return -EINVAL;
+ case H_RESOURCE:
+ return -EPERM;
+ default:
+ printk(KERN_WARNING
+ "lpar: Unexpected error %d from H_RESIZE_HPT_PREPARE\n",
+ rc);
+ return -EIO;
+ }
+
+ t1 = ktime_get();
+
+ rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);
+
+ t2 = ktime_get();
+
+ if (rc != 0) {
+ switch (state.commit_rc) {
+ case H_PTEG_FULL:
+ printk(KERN_WARNING
+ "lpar: Hash collision while resizing HPT\n");
+ return -ENOSPC;
+
+ default:
+ printk(KERN_WARNING
+ "lpar: Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
+ state.commit_rc);
+ return -EIO;
+ };
+ }
+
+ printk(KERN_INFO
+ "lpar: HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
+ shift, (long long) ktime_ms_delta(t1, t0),
+ (long long) ktime_ms_delta(t2, t1));
+
+ return 0;
+}
+
void __init hpte_init_lpar(void)
{
ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate;
@@ -614,6 +723,7 @@ void __init hpte_init_lpar(void)
ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range;
ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear;
ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
+ ppc_md.resize_hpt = pseries_lpar_resize_hpt;
}
#ifdef CONFIG_PPC_SMLPAR
--
2.5.0
next prev parent reply other threads:[~2016-01-29 5:23 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-01-29 5:23 [RFCv2 0/9] PAPR hash page table resizing (guest side) David Gibson
2016-01-29 5:23 ` [RFCv2 1/9] memblock: Don't mark memblock_phys_mem_size() as __init David Gibson
2016-02-01 5:50 ` Anshuman Khandual
2016-02-08 2:46 ` Paul Mackerras
2016-01-29 5:23 ` [RFCv2 2/9] arch/powerpc: Clean up error handling for htab_remove_mapping David Gibson
2016-02-01 5:54 ` Anshuman Khandual
2016-02-08 2:48 ` Paul Mackerras
2016-01-29 5:23 ` [RFCv2 3/9] arch/powerpc: Handle removing maybe-present bolted HPTEs David Gibson
2016-02-01 5:58 ` Anshuman Khandual
2016-02-02 1:08 ` David Gibson
2016-02-02 13:49 ` Denis Kirjanov
2016-02-08 2:54 ` Paul Mackerras
2016-02-09 0:43 ` David Gibson
2016-01-29 5:23 ` [RFCv2 4/9] arch/powerpc: Clean up memory hotplug failure paths David Gibson
2016-02-01 6:29 ` Anshuman Khandual
2016-02-02 15:04 ` Nathan Fontenot
2016-02-03 4:31 ` David Gibson
2016-02-08 5:47 ` Paul Mackerras
2016-01-29 5:23 ` [RFCv2 5/9] arch/powerpc: Split hash page table sizing heuristic into a helper David Gibson
2016-02-01 7:04 ` Anshuman Khandual
2016-02-02 1:04 ` David Gibson
2016-02-04 10:56 ` Anshuman Khandual
2016-02-08 5:57 ` Paul Mackerras
2016-01-29 5:24 ` [RFCv2 6/9] pseries: Add hypercall wrappers for hash page table resizing David Gibson
2016-02-01 7:11 ` Anshuman Khandual
2016-02-02 0:58 ` David Gibson
2016-02-04 11:11 ` Anshuman Khandual
2016-02-07 22:33 ` David Gibson
2016-02-08 5:58 ` Paul Mackerras
2016-01-29 5:24 ` David Gibson [this message]
2016-02-01 8:31 ` [RFCv2 7/9] pseries: Add support for hash " Anshuman Khandual
2016-02-01 11:04 ` David Gibson
2016-02-08 5:59 ` Paul Mackerras
2016-01-29 5:24 ` [RFCv2 8/9] pseries: Advertise HPT resizing support via CAS David Gibson
2016-02-01 8:36 ` Anshuman Khandual
2016-02-08 6:00 ` Paul Mackerras
2016-01-29 5:24 ` [RFCv2 9/9] pseries: Automatically resize HPT for memory hot add/remove David Gibson
2016-02-01 8:51 ` Anshuman Khandual
2016-02-01 10:55 ` David Gibson
2016-02-08 6:01 ` Paul Mackerras
2016-02-01 5:50 ` [RFCv2 0/9] PAPR hash page table resizing (guest side) Anshuman Khandual
2016-02-02 0:57 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1454045043-25545-8-git-send-email-david@gibson.dropbear.id.au \
--to=david@gibson.dropbear.id.au \
--cc=aik@ozlabs.ru \
--cc=benh@kernel.crashing.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=lvivier@redhat.com \
--cc=mpe@ellerman.id.au \
--cc=paulus@samba.org \
--cc=thuth@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).