From: Song Liu <song@kernel.org>
To: <linux-mm@kvack.org>, <linux-kernel@vger.kernel.org>
Cc: <akpm@linux-foundation.org>, <x86@kernel.org>,
<peterz@infradead.org>, <hch@lst.de>, <kernel-team@fb.com>,
<rick.p.edgecombe@intel.com>, <dave.hansen@intel.com>,
<urezki@gmail.com>, Song Liu <song@kernel.org>
Subject: [RFC v2 4/4] vmalloc_exec: share a huge page with kernel text
Date: Fri, 7 Oct 2022 16:43:15 -0700 [thread overview]
Message-ID: <20221007234315.2877365-5-song@kernel.org> (raw)
In-Reply-To: <20221007234315.2877365-1-song@kernel.org>
On x86 kernel, we allocate 2MB pages for kernel text up to
round_down(_etext, 2MB). Therefore, some of the kernel text is still
on 4kB pages. With vmalloc_exec, we can allocate 2MB pages up to
round_up(_etext, 2MB), and use the rest of the page for modules and
BPF programs.
Here is an example:
[root@eth50-1 ~]# grep _etext /proc/kallsyms
ffffffff82202a08 T _etext
[root@eth50-1 ~]# grep bpf_prog_ /proc/kallsyms | tail -n 3
ffffffff8220f920 t bpf_prog_cc61a5364ac11d93_handle__sched_wakeup [bpf]
ffffffff8220fa28 t bpf_prog_cc61a5364ac11d93_handle__sched_wakeup_new [bpf]
ffffffff8220fad4 t bpf_prog_3bf73fa16f5e3d92_handle__sched_switch [bpf]
[root@eth50-1 ~]# grep 0xffffffff82200000 /sys/kernel/debug/page_tables/kernel
0xffffffff82200000-0xffffffff82400000 2M ro PSE x pmd
[root@eth50-1 ~]# grep xfs_flush_inodes /proc/kallsyms
ffffffff822ba910 t xfs_flush_inodes_worker [xfs]
ffffffff822bc580 t xfs_flush_inodes [xfs]
ffffffff82200000-ffffffff82400000 is a 2MB page, serving kernel text, xfs
module, and bpf programs.
Signed-off-by: Song Liu <song@kernel.org>
---
arch/x86/mm/init_64.c | 3 ++-
mm/vmalloc.c | 24 ++++++++++++++++++++++++
2 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0fe690ebc269..d94f196c541a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1367,12 +1367,13 @@ int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask)
int kernel_set_to_readonly;
+#define PMD_ALIGN(x) (((unsigned long)(x) + (PMD_SIZE - 1)) & PMD_MASK)
void mark_rodata_ro(void)
{
unsigned long start = PFN_ALIGN(_text);
unsigned long rodata_start = PFN_ALIGN(__start_rodata);
unsigned long end = (unsigned long)__end_rodata_hpage_align;
- unsigned long text_end = PFN_ALIGN(_etext);
+ unsigned long text_end = PMD_ALIGN(_etext);
unsigned long rodata_end = PFN_ALIGN(__end_rodata);
unsigned long all_end;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9212ff96b871..41509bbec583 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -75,6 +75,9 @@ static const bool vmap_allow_huge = false;
#define PMD_ALIGN(addr) ALIGN(addr, PMD_SIZE)
#define PMD_ALIGN_DOWN(addr) ALIGN_DOWN(addr, PMD_SIZE)
+static struct vm_struct text_tail_vm;
+static struct vmap_area text_tail_va;
+
bool is_vmalloc_addr(const void *x)
{
unsigned long addr = (unsigned long)kasan_reset_tag(x);
@@ -637,6 +640,8 @@ int is_vmalloc_or_module_addr(const void *x)
unsigned long addr = (unsigned long)kasan_reset_tag(x);
if (addr >= MODULES_VADDR && addr < MODULES_END)
return 1;
+ if (addr >= text_tail_va.va_start && addr < text_tail_va.va_end)
+ return 1;
#endif
return is_vmalloc_addr(x);
}
@@ -2422,6 +2427,24 @@ static void vmap_init_free_space(void)
}
}
+static void register_text_tail_vm(void)
+{
+ unsigned long start = PFN_ALIGN((unsigned long)_etext);
+ unsigned long end = PMD_ALIGN((unsigned long)_etext);
+ struct vmap_area *va;
+
+ va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
+ if (WARN_ON_ONCE(!va))
+ return;
+ text_tail_vm.addr = (void *)start;
+ text_tail_vm.size = end - start;
+ text_tail_va.va_start = start;
+ text_tail_va.va_end = end;
+ text_tail_va.vm = &text_tail_vm;
+ memcpy(va, &text_tail_va, sizeof(*va));
+ insert_vmap_area_augment(va, NULL, &free_text_area_root, &free_text_area_list);
+}
+
void __init vmalloc_init(void)
{
struct vmap_area *va;
@@ -2432,6 +2455,7 @@ void __init vmalloc_init(void)
* Create the cache for vmap_area objects.
*/
vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC);
+ register_text_tail_vm();
for_each_possible_cpu(i) {
struct vmap_block_queue *vbq;
--
2.30.2
next prev parent reply other threads:[~2022-10-07 23:46 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-07 23:43 [RFC v2 0/4] vmalloc_exec for modules and BPF programs Song Liu
2022-10-07 23:43 ` [RFC v2 1/4] vmalloc: introduce vmalloc_exec and vfree_exec Song Liu
2022-10-10 18:13 ` Edgecombe, Rick P
2022-10-10 19:04 ` Song Liu
2022-10-10 19:59 ` Edgecombe, Rick P
2022-10-07 23:43 ` [RFC v2 2/4] bpf: use vmalloc_exec Song Liu
2022-10-07 23:43 ` [RFC v2 3/4] modules, x86: use vmalloc_exec for module core Song Liu
2022-10-14 3:48 ` Aaron Lu
2022-10-14 6:07 ` Song Liu
[not found] ` <fb7a38faa52ce0f35061473c9c8b56394a726e59.camel@intel.com>
2022-10-14 18:26 ` Song Liu
2022-10-07 23:43 ` Song Liu [this message]
2022-10-10 18:32 ` [RFC v2 4/4] vmalloc_exec: share a huge page with kernel text Edgecombe, Rick P
2022-10-10 19:08 ` Song Liu
2022-10-10 20:09 ` Edgecombe, Rick P
[not found] ` <2B66E2E7-7D32-418C-9DFD-1E17180300B4@fb.com>
2022-10-11 20:40 ` Edgecombe, Rick P
2022-10-12 5:37 ` Song Liu
2022-10-12 18:38 ` Edgecombe, Rick P
2022-10-12 19:01 ` Song Liu
2022-10-08 0:17 ` [RFC v2 0/4] vmalloc_exec for modules and BPF programs Song Liu
2022-10-12 19:03 ` Song Liu
2022-10-17 7:26 ` Christoph Hellwig
2022-10-17 16:23 ` Song Liu
2022-10-18 14:50 ` Christoph Hellwig
2022-10-18 15:05 ` Song Liu
2022-10-18 15:40 ` Christoph Hellwig
2022-10-18 15:40 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221007234315.2877365-5-song@kernel.org \
--to=song@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=dave.hansen@intel.com \
--cc=hch@lst.de \
--cc=kernel-team@fb.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=peterz@infradead.org \
--cc=rick.p.edgecombe@intel.com \
--cc=urezki@gmail.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).