* [PATCH v8 bpf-next 1/9] x86/Kconfig: select HAVE_ARCH_HUGE_VMALLOC with HAVE_ARCH_HUGE_VMAP
2022-02-01 6:27 [PATCH v8 bpf-next 0/9] bpf_prog_pack allocator Song Liu
@ 2022-02-01 6:27 ` Song Liu
2022-02-01 6:27 ` [PATCH v8 bpf-next 2/9] bpf: use bytes instead of pages for bpf_jit_[charge|uncharge]_modmem Song Liu
` (7 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Song Liu @ 2022-02-01 6:27 UTC (permalink / raw)
To: bpf, netdev, linux-kernel
Cc: ast, daniel, andrii, kernel-team, peterz, x86, iii, Song Liu
From: Song Liu <songliubraving@fb.com>
This enables module_alloc() to allocate huge page for 2MB+ requests.
To check the difference of this change, we need enable config
CONFIG_PTDUMP_DEBUGFS, and call module_alloc(2MB). Before the change,
/sys/kernel/debug/page_tables/kernel shows pte for this map. With the
change, /sys/kernel/debug/page_tables/ show pmd for thie map.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
arch/x86/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6fddb63271d9..e0e0d00cf103 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -159,6 +159,7 @@ config X86
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
+ select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if X86_64
--
2.30.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v8 bpf-next 2/9] bpf: use bytes instead of pages for bpf_jit_[charge|uncharge]_modmem
2022-02-01 6:27 [PATCH v8 bpf-next 0/9] bpf_prog_pack allocator Song Liu
2022-02-01 6:27 ` [PATCH v8 bpf-next 1/9] x86/Kconfig: select HAVE_ARCH_HUGE_VMALLOC with HAVE_ARCH_HUGE_VMAP Song Liu
@ 2022-02-01 6:27 ` Song Liu
2022-02-01 6:27 ` [PATCH v8 bpf-next 3/9] bpf: use size instead of pages in bpf_binary_header Song Liu
` (6 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Song Liu @ 2022-02-01 6:27 UTC (permalink / raw)
To: bpf, netdev, linux-kernel
Cc: ast, daniel, andrii, kernel-team, peterz, x86, iii, Song Liu
From: Song Liu <songliubraving@fb.com>
This enables sub-page memory charge and allocation.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
include/linux/bpf.h | 4 ++--
kernel/bpf/core.c | 17 ++++++++---------
kernel/bpf/trampoline.c | 6 +++---
3 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6eb0b180d33b..366f88afd56b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -846,8 +846,8 @@ void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
-int bpf_jit_charge_modmem(u32 pages);
-void bpf_jit_uncharge_modmem(u32 pages);
+int bpf_jit_charge_modmem(u32 size);
+void bpf_jit_uncharge_modmem(u32 size);
bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
#else
static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 04a8d5bea552..6ca0550c4b24 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -833,12 +833,11 @@ static int __init bpf_jit_charge_init(void)
}
pure_initcall(bpf_jit_charge_init);
-int bpf_jit_charge_modmem(u32 pages)
+int bpf_jit_charge_modmem(u32 size)
{
- if (atomic_long_add_return(pages, &bpf_jit_current) >
- (bpf_jit_limit >> PAGE_SHIFT)) {
+ if (atomic_long_add_return(size, &bpf_jit_current) > bpf_jit_limit) {
if (!bpf_capable()) {
- atomic_long_sub(pages, &bpf_jit_current);
+ atomic_long_sub(size, &bpf_jit_current);
return -EPERM;
}
}
@@ -846,9 +845,9 @@ int bpf_jit_charge_modmem(u32 pages)
return 0;
}
-void bpf_jit_uncharge_modmem(u32 pages)
+void bpf_jit_uncharge_modmem(u32 size)
{
- atomic_long_sub(pages, &bpf_jit_current);
+ atomic_long_sub(size, &bpf_jit_current);
}
void *__weak bpf_jit_alloc_exec(unsigned long size)
@@ -879,11 +878,11 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
pages = size / PAGE_SIZE;
- if (bpf_jit_charge_modmem(pages))
+ if (bpf_jit_charge_modmem(size))
return NULL;
hdr = bpf_jit_alloc_exec(size);
if (!hdr) {
- bpf_jit_uncharge_modmem(pages);
+ bpf_jit_uncharge_modmem(size);
return NULL;
}
@@ -906,7 +905,7 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr)
u32 pages = hdr->pages;
bpf_jit_free_exec(hdr);
- bpf_jit_uncharge_modmem(pages);
+ bpf_jit_uncharge_modmem(pages << PAGE_SHIFT);
}
/* This symbol is only overridden by archs that have different
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 4b6974a195c1..e76a488c09c3 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -213,7 +213,7 @@ static void __bpf_tramp_image_put_deferred(struct work_struct *work)
im = container_of(work, struct bpf_tramp_image, work);
bpf_image_ksym_del(&im->ksym);
bpf_jit_free_exec(im->image);
- bpf_jit_uncharge_modmem(1);
+ bpf_jit_uncharge_modmem(PAGE_SIZE);
percpu_ref_exit(&im->pcref);
kfree_rcu(im, rcu);
}
@@ -310,7 +310,7 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
if (!im)
goto out;
- err = bpf_jit_charge_modmem(1);
+ err = bpf_jit_charge_modmem(PAGE_SIZE);
if (err)
goto out_free_im;
@@ -332,7 +332,7 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
out_free_image:
bpf_jit_free_exec(im->image);
out_uncharge:
- bpf_jit_uncharge_modmem(1);
+ bpf_jit_uncharge_modmem(PAGE_SIZE);
out_free_im:
kfree(im);
out:
--
2.30.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v8 bpf-next 3/9] bpf: use size instead of pages in bpf_binary_header
2022-02-01 6:27 [PATCH v8 bpf-next 0/9] bpf_prog_pack allocator Song Liu
2022-02-01 6:27 ` [PATCH v8 bpf-next 1/9] x86/Kconfig: select HAVE_ARCH_HUGE_VMALLOC with HAVE_ARCH_HUGE_VMAP Song Liu
2022-02-01 6:27 ` [PATCH v8 bpf-next 2/9] bpf: use bytes instead of pages for bpf_jit_[charge|uncharge]_modmem Song Liu
@ 2022-02-01 6:27 ` Song Liu
2022-02-01 6:27 ` [PATCH v8 bpf-next 4/9] bpf: use prog->jited_len in bpf_prog_ksym_set_addr() Song Liu
` (5 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Song Liu @ 2022-02-01 6:27 UTC (permalink / raw)
To: bpf, netdev, linux-kernel
Cc: ast, daniel, andrii, kernel-team, peterz, x86, iii, Song Liu
From: Song Liu <songliubraving@fb.com>
This is necessary to charge sub page memory for the BPF program.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
include/linux/filter.h | 6 +++---
kernel/bpf/core.c | 11 +++++------
2 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index d23e999dc032..5855eb474c62 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -548,7 +548,7 @@ struct sock_fprog_kern {
#define BPF_IMAGE_ALIGNMENT 8
struct bpf_binary_header {
- u32 pages;
+ u32 size;
u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
};
@@ -886,8 +886,8 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
{
set_vm_flush_reset_perms(hdr);
- set_memory_ro((unsigned long)hdr, hdr->pages);
- set_memory_x((unsigned long)hdr, hdr->pages);
+ set_memory_ro((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
+ set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
}
static inline struct bpf_binary_header *
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 6ca0550c4b24..14199228a6f0 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -543,7 +543,7 @@ bpf_prog_ksym_set_addr(struct bpf_prog *prog)
WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
prog->aux->ksym.start = (unsigned long) prog->bpf_func;
- prog->aux->ksym.end = addr + hdr->pages * PAGE_SIZE;
+ prog->aux->ksym.end = addr + hdr->size;
}
static void
@@ -866,7 +866,7 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
bpf_jit_fill_hole_t bpf_fill_ill_insns)
{
struct bpf_binary_header *hdr;
- u32 size, hole, start, pages;
+ u32 size, hole, start;
WARN_ON_ONCE(!is_power_of_2(alignment) ||
alignment > BPF_IMAGE_ALIGNMENT);
@@ -876,7 +876,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
* random section of illegal instructions.
*/
size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
- pages = size / PAGE_SIZE;
if (bpf_jit_charge_modmem(size))
return NULL;
@@ -889,7 +888,7 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
/* Fill space with illegal/arch-dep instructions. */
bpf_fill_ill_insns(hdr, size);
- hdr->pages = pages;
+ hdr->size = size;
hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
PAGE_SIZE - sizeof(*hdr));
start = (get_random_int() % hole) & ~(alignment - 1);
@@ -902,10 +901,10 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
void bpf_jit_binary_free(struct bpf_binary_header *hdr)
{
- u32 pages = hdr->pages;
+ u32 size = hdr->size;
bpf_jit_free_exec(hdr);
- bpf_jit_uncharge_modmem(pages << PAGE_SHIFT);
+ bpf_jit_uncharge_modmem(size);
}
/* This symbol is only overridden by archs that have different
--
2.30.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v8 bpf-next 4/9] bpf: use prog->jited_len in bpf_prog_ksym_set_addr()
2022-02-01 6:27 [PATCH v8 bpf-next 0/9] bpf_prog_pack allocator Song Liu
` (2 preceding siblings ...)
2022-02-01 6:27 ` [PATCH v8 bpf-next 3/9] bpf: use size instead of pages in bpf_binary_header Song Liu
@ 2022-02-01 6:27 ` Song Liu
2022-02-04 7:41 ` Song Liu
2022-02-01 6:27 ` [PATCH v8 bpf-next 5/9] x86/alternative: introduce text_poke_copy Song Liu
` (4 subsequent siblings)
8 siblings, 1 reply; 12+ messages in thread
From: Song Liu @ 2022-02-01 6:27 UTC (permalink / raw)
To: bpf, netdev, linux-kernel
Cc: ast, daniel, andrii, kernel-team, peterz, x86, iii, Song Liu
Using prog->jited_len is simpler and more accurate than current
estimation (header + header->size).
Signed-off-by: Song Liu <song@kernel.org>
---
kernel/bpf/core.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 14199228a6f0..e3fe53df0a71 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -537,13 +537,10 @@ long bpf_jit_limit_max __read_mostly;
static void
bpf_prog_ksym_set_addr(struct bpf_prog *prog)
{
- const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
- unsigned long addr = (unsigned long)hdr;
-
WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
prog->aux->ksym.start = (unsigned long) prog->bpf_func;
- prog->aux->ksym.end = addr + hdr->size;
+ prog->aux->ksym.end = prog->aux->ksym.start + prog->jited_len;
}
static void
--
2.30.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH v8 bpf-next 4/9] bpf: use prog->jited_len in bpf_prog_ksym_set_addr()
2022-02-01 6:27 ` [PATCH v8 bpf-next 4/9] bpf: use prog->jited_len in bpf_prog_ksym_set_addr() Song Liu
@ 2022-02-04 7:41 ` Song Liu
2022-02-04 18:05 ` Song Liu
0 siblings, 1 reply; 12+ messages in thread
From: Song Liu @ 2022-02-04 7:41 UTC (permalink / raw)
To: bpf, Networking, open list
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, Kernel Team,
Peter Zijlstra, X86 ML, Ilya Leoshkevich
On Mon, Jan 31, 2022 at 10:31 PM Song Liu <song@kernel.org> wrote:
>
> Using prog->jited_len is simpler and more accurate than current
> estimation (header + header->size).
>
> Signed-off-by: Song Liu <song@kernel.org>
Hmm... CI [1] reports error on test_progs 159/tailcalls, and bisect points to
this one. However, I couldn't figure out why this breaks tail call.
round_up(PAGE_SIZE) does fix it though. But that won't be accurate, right?
Any suggestions on what could be the reason for these failures?
Thanks,
Song
[1] https://github.com/kernel-patches/bpf/runs/5060194776?check_suite_focus=true
> ---
> kernel/bpf/core.c | 5 +----
> 1 file changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 14199228a6f0..e3fe53df0a71 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -537,13 +537,10 @@ long bpf_jit_limit_max __read_mostly;
> static void
> bpf_prog_ksym_set_addr(struct bpf_prog *prog)
> {
> - const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
> - unsigned long addr = (unsigned long)hdr;
> -
> WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
>
> prog->aux->ksym.start = (unsigned long) prog->bpf_func;
> - prog->aux->ksym.end = addr + hdr->size;
> + prog->aux->ksym.end = prog->aux->ksym.start + prog->jited_len;
> }
>
> static void
> --
> 2.30.2
>
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH v8 bpf-next 4/9] bpf: use prog->jited_len in bpf_prog_ksym_set_addr()
2022-02-04 7:41 ` Song Liu
@ 2022-02-04 18:05 ` Song Liu
0 siblings, 0 replies; 12+ messages in thread
From: Song Liu @ 2022-02-04 18:05 UTC (permalink / raw)
To: Song Liu
Cc: bpf, Networking, open list, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Kernel Team, Peter Zijlstra, X86 ML,
Ilya Leoshkevich
> On Feb 3, 2022, at 11:41 PM, Song Liu <song@kernel.org> wrote:
>
> On Mon, Jan 31, 2022 at 10:31 PM Song Liu <song@kernel.org> wrote:
>>
>> Using prog->jited_len is simpler and more accurate than current
>> estimation (header + header->size).
>>
>> Signed-off-by: Song Liu <song@kernel.org>
>
> Hmm... CI [1] reports error on test_progs 159/tailcalls, and bisect points to
> this one. However, I couldn't figure out why this breaks tail call.
> round_up(PAGE_SIZE) does fix it though. But that won't be accurate, right?
>
> Any suggestions on what could be the reason for these failures?
>
> Thanks,
> Song
>
> [1] https://github.com/kernel-patches/bpf/runs/5060194776?check_suite_focus=true
I guess this is the missing piece:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1ae41d0cf96c..bbef86cb4e72 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -13067,6 +13067,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
prog->jited = 1;
prog->bpf_func = func[0]->bpf_func;
+ prog->jited_len = func[0]->jited_len;
prog->aux->func = func;
prog->aux->func_cnt = env->subprog_cnt;
bpf_prog_jit_attempt_done(prog);
Will send v9 with this.
>
>> ---
>> kernel/bpf/core.c | 5 +----
>> 1 file changed, 1 insertion(+), 4 deletions(-)
>>
>> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
>> index 14199228a6f0..e3fe53df0a71 100644
>> --- a/kernel/bpf/core.c
>> +++ b/kernel/bpf/core.c
>> @@ -537,13 +537,10 @@ long bpf_jit_limit_max __read_mostly;
>> static void
>> bpf_prog_ksym_set_addr(struct bpf_prog *prog)
>> {
>> - const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
>> - unsigned long addr = (unsigned long)hdr;
>> -
>> WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
>>
>> prog->aux->ksym.start = (unsigned long) prog->bpf_func;
>> - prog->aux->ksym.end = addr + hdr->size;
>> + prog->aux->ksym.end = prog->aux->ksym.start + prog->jited_len;
>> }
>>
>> static void
>> --
>> 2.30.2
>>
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v8 bpf-next 5/9] x86/alternative: introduce text_poke_copy
2022-02-01 6:27 [PATCH v8 bpf-next 0/9] bpf_prog_pack allocator Song Liu
` (3 preceding siblings ...)
2022-02-01 6:27 ` [PATCH v8 bpf-next 4/9] bpf: use prog->jited_len in bpf_prog_ksym_set_addr() Song Liu
@ 2022-02-01 6:27 ` Song Liu
2022-02-01 6:28 ` [PATCH v8 bpf-next 6/9] bpf: introduce bpf_arch_text_copy Song Liu
` (3 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Song Liu @ 2022-02-01 6:27 UTC (permalink / raw)
To: bpf, netdev, linux-kernel
Cc: ast, daniel, andrii, kernel-team, peterz, x86, iii, Song Liu
This will be used by BPF jit compiler to dump JITed binary to a RX huge
page, and thus allow multiple BPF programs sharing the a huge (2MB) page.
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Song Liu <song@kernel.org>
---
arch/x86/include/asm/text-patching.h | 1 +
arch/x86/kernel/alternative.c | 34 ++++++++++++++++++++++++++++
2 files changed, 35 insertions(+)
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index b7421780e4e9..4cc18ba1b75e 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -44,6 +44,7 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void text_poke_sync(void);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
+extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5007c3ffe96f..018b61febf0e 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1102,6 +1102,40 @@ void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
return __text_poke(addr, opcode, len);
}
+/**
+ * text_poke_copy - Copy instructions into (an unused part of) RX memory
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy, could be more than 2x PAGE_SIZE
+ *
+ * Not safe against concurrent execution; useful for JITs to dump
+ * new code blocks into unused regions of RX memory. Can be used in
+ * conjunction with synchronize_rcu_tasks() to wait for existing
+ * execution to quiesce after having made sure no existing functions
+ * pointers are live.
+ */
+void *text_poke_copy(void *addr, const void *opcode, size_t len)
+{
+ unsigned long start = (unsigned long)addr;
+ size_t patched = 0;
+
+ if (WARN_ON_ONCE(core_kernel_text(start)))
+ return NULL;
+
+ mutex_lock(&text_mutex);
+ while (patched < len) {
+ unsigned long ptr = start + patched;
+ size_t s;
+
+ s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched);
+
+ __text_poke((void *)ptr, opcode + patched, s);
+ patched += s;
+ }
+ mutex_unlock(&text_mutex);
+ return addr;
+}
+
static void do_sync_core(void *info)
{
sync_core();
--
2.30.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v8 bpf-next 6/9] bpf: introduce bpf_arch_text_copy
2022-02-01 6:27 [PATCH v8 bpf-next 0/9] bpf_prog_pack allocator Song Liu
` (4 preceding siblings ...)
2022-02-01 6:27 ` [PATCH v8 bpf-next 5/9] x86/alternative: introduce text_poke_copy Song Liu
@ 2022-02-01 6:28 ` Song Liu
2022-02-01 6:28 ` [PATCH v8 bpf-next 7/9] bpf: introduce bpf_prog_pack allocator Song Liu
` (2 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Song Liu @ 2022-02-01 6:28 UTC (permalink / raw)
To: bpf, netdev, linux-kernel
Cc: ast, daniel, andrii, kernel-team, peterz, x86, iii, Song Liu
This will be used to copy JITed text to RO protected module memory. On
x86, bpf_arch_text_copy is implemented with text_poke_copy.
bpf_arch_text_copy returns pointer to dst on success, and ERR_PTR(errno)
on errors.
Signed-off-by: Song Liu <song@kernel.org>
---
arch/x86/net/bpf_jit_comp.c | 7 +++++++
include/linux/bpf.h | 2 ++
kernel/bpf/core.c | 5 +++++
3 files changed, 14 insertions(+)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 36f6fc3e6e69..c13d148f7396 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2412,3 +2412,10 @@ bool bpf_jit_supports_kfunc_call(void)
{
return true;
}
+
+void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+ if (text_poke_copy(dst, src, len) == NULL)
+ return ERR_PTR(-EINVAL);
+ return dst;
+}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 366f88afd56b..ea0d7fd4a410 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2362,6 +2362,8 @@ enum bpf_text_poke_type {
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *addr1, void *addr2);
+void *bpf_arch_text_copy(void *dst, void *src, size_t len);
+
struct btf_id_set;
bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index e3fe53df0a71..a5ec480f9862 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2440,6 +2440,11 @@ int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
return -ENOTSUPP;
}
+void * __weak bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+ return ERR_PTR(-ENOTSUPP);
+}
+
DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
EXPORT_SYMBOL(bpf_stats_enabled_key);
--
2.30.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v8 bpf-next 7/9] bpf: introduce bpf_prog_pack allocator
2022-02-01 6:27 [PATCH v8 bpf-next 0/9] bpf_prog_pack allocator Song Liu
` (5 preceding siblings ...)
2022-02-01 6:28 ` [PATCH v8 bpf-next 6/9] bpf: introduce bpf_arch_text_copy Song Liu
@ 2022-02-01 6:28 ` Song Liu
2022-02-01 6:28 ` [PATCH v8 bpf-next 8/9] bpf: introduce bpf_jit_binary_pack_[alloc|finalize|free] Song Liu
2022-02-01 6:28 ` [PATCH v8 bpf-next 9/9] bpf, x86_64: use bpf_jit_binary_pack_alloc Song Liu
8 siblings, 0 replies; 12+ messages in thread
From: Song Liu @ 2022-02-01 6:28 UTC (permalink / raw)
To: bpf, netdev, linux-kernel
Cc: ast, daniel, andrii, kernel-team, peterz, x86, iii, Song Liu
Most BPF programs are small, but they consume a page each. For systems
with busy traffic and many BPF programs, this could add significant
pressure to instruction TLB.
Introduce bpf_prog_pack allocator to pack multiple BPF programs in a huge
page. The memory is then allocated in 64 byte chunks.
Memory allocated by bpf_prog_pack allocator is RO protected after initial
allocation. To write to it, the user (jit engine) need to use text poke
API.
Signed-off-by: Song Liu <song@kernel.org>
---
kernel/bpf/core.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 127 insertions(+)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index a5ec480f9862..7ae590897b73 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -805,6 +805,133 @@ int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
return slot;
}
+/*
+ * BPF program pack allocator.
+ *
+ * Most BPF programs are pretty small. Allocating a hole page for each
+ * program is sometime a waste. Many small bpf program also adds pressure
+ * to instruction TLB. To solve this issue, we introduce a BPF program pack
+ * allocator. The prog_pack allocator uses HPAGE_PMD_SIZE page (2MB on x86)
+ * to host BPF programs.
+ */
+#define BPF_PROG_PACK_SIZE HPAGE_PMD_SIZE
+#define BPF_PROG_CHUNK_SHIFT 6
+#define BPF_PROG_CHUNK_SIZE (1 << BPF_PROG_CHUNK_SHIFT)
+#define BPF_PROG_CHUNK_MASK (~(BPF_PROG_CHUNK_SIZE - 1))
+#define BPF_PROG_CHUNK_COUNT (BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE)
+
+struct bpf_prog_pack {
+ struct list_head list;
+ void *ptr;
+ unsigned long bitmap[BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)];
+};
+
+#define BPF_PROG_MAX_PACK_PROG_SIZE HPAGE_PMD_SIZE
+#define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE)
+
+static DEFINE_MUTEX(pack_mutex);
+static LIST_HEAD(pack_list);
+
+static struct bpf_prog_pack *alloc_new_pack(void)
+{
+ struct bpf_prog_pack *pack;
+
+ pack = kzalloc(sizeof(*pack), GFP_KERNEL);
+ if (!pack)
+ return NULL;
+ pack->ptr = module_alloc(BPF_PROG_PACK_SIZE);
+ if (!pack->ptr) {
+ kfree(pack);
+ return NULL;
+ }
+ bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE);
+ list_add_tail(&pack->list, &pack_list);
+
+ set_vm_flush_reset_perms(pack->ptr);
+ set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
+ set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
+ return pack;
+}
+
+static void *bpf_prog_pack_alloc(u32 size)
+{
+ unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size);
+ struct bpf_prog_pack *pack;
+ unsigned long pos;
+ void *ptr = NULL;
+
+ if (size > BPF_PROG_MAX_PACK_PROG_SIZE) {
+ size = round_up(size, PAGE_SIZE);
+ ptr = module_alloc(size);
+ if (ptr) {
+ set_vm_flush_reset_perms(ptr);
+ set_memory_ro((unsigned long)ptr, size / PAGE_SIZE);
+ set_memory_x((unsigned long)ptr, size / PAGE_SIZE);
+ }
+ return ptr;
+ }
+ mutex_lock(&pack_mutex);
+ list_for_each_entry(pack, &pack_list, list) {
+ pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
+ nbits, 0);
+ if (pos < BPF_PROG_CHUNK_COUNT)
+ goto found_free_area;
+ }
+
+ pack = alloc_new_pack();
+ if (!pack)
+ goto out;
+
+ pos = 0;
+
+found_free_area:
+ bitmap_set(pack->bitmap, pos, nbits);
+ ptr = (void *)(pack->ptr) + (pos << BPF_PROG_CHUNK_SHIFT);
+
+out:
+ mutex_unlock(&pack_mutex);
+ return ptr;
+}
+
+static void bpf_prog_pack_free(struct bpf_binary_header *hdr)
+{
+ struct bpf_prog_pack *pack = NULL, *tmp;
+ unsigned int nbits;
+ unsigned long pos;
+ void *pack_ptr;
+
+ if (hdr->size > BPF_PROG_MAX_PACK_PROG_SIZE) {
+ module_memfree(hdr);
+ return;
+ }
+
+ pack_ptr = (void *)((unsigned long)hdr & ~(BPF_PROG_PACK_SIZE - 1));
+ mutex_lock(&pack_mutex);
+
+ list_for_each_entry(tmp, &pack_list, list) {
+ if (tmp->ptr == pack_ptr) {
+ pack = tmp;
+ break;
+ }
+ }
+
+ if (WARN_ONCE(!pack, "bpf_prog_pack bug\n"))
+ goto out;
+
+ nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size);
+ pos = ((unsigned long)hdr - (unsigned long)pack_ptr) >> BPF_PROG_CHUNK_SHIFT;
+
+ bitmap_clear(pack->bitmap, pos, nbits);
+ if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
+ BPF_PROG_CHUNK_COUNT, 0) == 0) {
+ list_del(&pack->list);
+ module_memfree(pack->ptr);
+ kfree(pack);
+ }
+out:
+ mutex_unlock(&pack_mutex);
+}
+
static atomic_long_t bpf_jit_current;
/* Can be overridden by an arch's JIT compiler if it has a custom,
--
2.30.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v8 bpf-next 8/9] bpf: introduce bpf_jit_binary_pack_[alloc|finalize|free]
2022-02-01 6:27 [PATCH v8 bpf-next 0/9] bpf_prog_pack allocator Song Liu
` (6 preceding siblings ...)
2022-02-01 6:28 ` [PATCH v8 bpf-next 7/9] bpf: introduce bpf_prog_pack allocator Song Liu
@ 2022-02-01 6:28 ` Song Liu
2022-02-01 6:28 ` [PATCH v8 bpf-next 9/9] bpf, x86_64: use bpf_jit_binary_pack_alloc Song Liu
8 siblings, 0 replies; 12+ messages in thread
From: Song Liu @ 2022-02-01 6:28 UTC (permalink / raw)
To: bpf, netdev, linux-kernel
Cc: ast, daniel, andrii, kernel-team, peterz, x86, iii, Song Liu
From: Song Liu <songliubraving@fb.com>
This is the jit binary allocator built on top of bpf_prog_pack.
bpf_prog_pack allocates RO memory, which cannot be used directly by the
JIT engine. Therefore, a temporary rw buffer is allocated for the JIT
engine. Once JIT is done, bpf_jit_binary_pack_finalize is used to copy
the program to the RO memory.
bpf_jit_binary_pack_alloc reserves 16 bytes of extra space for illegal
instructions, which is small than the 128 bytes space reserved by
bpf_jit_binary_alloc. This change is necessary for bpf_jit_binary_hdr
to find the correct header. Also, flag use_bpf_prog_pack is added to
differentiate a program allocated by bpf_jit_binary_pack_alloc.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
include/linux/bpf.h | 1 +
include/linux/filter.h | 21 ++++----
kernel/bpf/core.c | 108 ++++++++++++++++++++++++++++++++++++++++-
3 files changed, 120 insertions(+), 10 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ea0d7fd4a410..2fc7e5c5ef41 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -953,6 +953,7 @@ struct bpf_prog_aux {
bool sleepable;
bool tail_call_reachable;
bool xdp_has_frags;
+ bool use_bpf_prog_pack;
struct hlist_node tramp_hlist;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5855eb474c62..1cb1af917617 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -890,15 +890,6 @@ static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
}
-static inline struct bpf_binary_header *
-bpf_jit_binary_hdr(const struct bpf_prog *fp)
-{
- unsigned long real_start = (unsigned long)fp->bpf_func;
- unsigned long addr = real_start & PAGE_MASK;
-
- return (void *)addr;
-}
-
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
{
@@ -1068,6 +1059,18 @@ void *bpf_jit_alloc_exec(unsigned long size);
void bpf_jit_free_exec(void *addr);
void bpf_jit_free(struct bpf_prog *fp);
+struct bpf_binary_header *
+bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image,
+ unsigned int alignment,
+ struct bpf_binary_header **rw_hdr,
+ u8 **rw_image,
+ bpf_jit_fill_hole_t bpf_fill_ill_insns);
+int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
+ struct bpf_binary_header *ro_header,
+ struct bpf_binary_header *rw_header);
+void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
+ struct bpf_binary_header *rw_header);
+
int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
struct bpf_jit_poke_descriptor *poke);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7ae590897b73..306aa63fa58e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1031,6 +1031,109 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr)
bpf_jit_uncharge_modmem(size);
}
+/* Allocate jit binary from bpf_prog_pack allocator.
+ * Since the allocated memory is RO+X, the JIT engine cannot write directly
+ * to the memory. To solve this problem, a RW buffer is also allocated at
+ * as the same time. The JIT engine should calculate offsets based on the
+ * RO memory address, but write JITed program to the RW buffer. Once the
+ * JIT engine finishes, it calls bpf_jit_binary_pack_finalize, which copies
+ * the JITed program to the RO memory.
+ */
+struct bpf_binary_header *
+bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr,
+ unsigned int alignment,
+ struct bpf_binary_header **rw_header,
+ u8 **rw_image,
+ bpf_jit_fill_hole_t bpf_fill_ill_insns)
+{
+ struct bpf_binary_header *ro_header;
+ u32 size, hole, start;
+
+ WARN_ON_ONCE(!is_power_of_2(alignment) ||
+ alignment > BPF_IMAGE_ALIGNMENT);
+
+ /* add 16 bytes for a random section of illegal instructions */
+ size = round_up(proglen + sizeof(*ro_header) + 16, BPF_PROG_CHUNK_SIZE);
+
+ if (bpf_jit_charge_modmem(size))
+ return NULL;
+ ro_header = bpf_prog_pack_alloc(size);
+ if (!ro_header) {
+ bpf_jit_uncharge_modmem(size);
+ return NULL;
+ }
+
+ *rw_header = kvmalloc(size, GFP_KERNEL);
+ if (!*rw_header) {
+ bpf_prog_pack_free(ro_header);
+ bpf_jit_uncharge_modmem(size);
+ return NULL;
+ }
+
+ /* Fill space with illegal/arch-dep instructions. */
+ bpf_fill_ill_insns(*rw_header, size);
+ (*rw_header)->size = size;
+
+ hole = min_t(unsigned int, size - (proglen + sizeof(*ro_header)),
+ BPF_PROG_CHUNK_SIZE - sizeof(*ro_header));
+ start = (get_random_int() % hole) & ~(alignment - 1);
+
+ *image_ptr = &ro_header->image[start];
+ *rw_image = &(*rw_header)->image[start];
+
+ return ro_header;
+}
+
+/* Copy JITed text from rw_header to its final location, the ro_header. */
+int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
+ struct bpf_binary_header *ro_header,
+ struct bpf_binary_header *rw_header)
+{
+ void *ptr;
+
+ ptr = bpf_arch_text_copy(ro_header, rw_header, rw_header->size);
+
+ kvfree(rw_header);
+
+ if (IS_ERR(ptr)) {
+ bpf_prog_pack_free(ro_header);
+ return PTR_ERR(ptr);
+ }
+ prog->aux->use_bpf_prog_pack = true;
+ return 0;
+}
+
+/* bpf_jit_binary_pack_free is called in two different scenarios:
+ * 1) when the program is freed after;
+ * 2) when the JIT engine fails (before bpf_jit_binary_pack_finalize).
+ * For case 2), we need to free both the RO memory and the RW buffer.
+ * Also, ro_header->size in 2) is not properly set yet, so rw_header->size
+ * is used for uncharge.
+ */
+void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
+ struct bpf_binary_header *rw_header)
+{
+ u32 size = rw_header ? rw_header->size : ro_header->size;
+
+ bpf_prog_pack_free(ro_header);
+ kvfree(rw_header);
+ bpf_jit_uncharge_modmem(size);
+}
+
+static inline struct bpf_binary_header *
+bpf_jit_binary_hdr(const struct bpf_prog *fp)
+{
+ unsigned long real_start = (unsigned long)fp->bpf_func;
+ unsigned long addr;
+
+ if (fp->aux->use_bpf_prog_pack)
+ addr = real_start & BPF_PROG_CHUNK_MASK;
+ else
+ addr = real_start & PAGE_MASK;
+
+ return (void *)addr;
+}
+
/* This symbol is only overridden by archs that have different
* requirements than the usual eBPF JITs, f.e. when they only
* implement cBPF JIT, do not set images read-only, etc.
@@ -1040,7 +1143,10 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
if (fp->jited) {
struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
- bpf_jit_binary_free(hdr);
+ if (fp->aux->use_bpf_prog_pack)
+ bpf_jit_binary_pack_free(hdr, NULL /* rw_buffer */);
+ else
+ bpf_jit_binary_free(hdr);
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
}
--
2.30.2
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v8 bpf-next 9/9] bpf, x86_64: use bpf_jit_binary_pack_alloc
2022-02-01 6:27 [PATCH v8 bpf-next 0/9] bpf_prog_pack allocator Song Liu
` (7 preceding siblings ...)
2022-02-01 6:28 ` [PATCH v8 bpf-next 8/9] bpf: introduce bpf_jit_binary_pack_[alloc|finalize|free] Song Liu
@ 2022-02-01 6:28 ` Song Liu
8 siblings, 0 replies; 12+ messages in thread
From: Song Liu @ 2022-02-01 6:28 UTC (permalink / raw)
To: bpf, netdev, linux-kernel
Cc: ast, daniel, andrii, kernel-team, peterz, x86, iii, Song Liu
From: Song Liu <songliubraving@fb.com>
Use bpf_jit_binary_pack_alloc in x86_64 jit. The jit engine first writes
the program to the rw buffer. When the jit is done, the program is copied
to the final location with bpf_jit_binary_pack_finalize.
Note that we need to do bpf_tail_call_direct_fixup after finalize.
Therefore, the text_live = false logic in __bpf_arch_text_poke is no
longer needed.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
arch/x86/net/bpf_jit_comp.c | 58 ++++++++++++++++++++-----------------
1 file changed, 31 insertions(+), 27 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index c13d148f7396..643f38b91e30 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -330,8 +330,7 @@ static int emit_jump(u8 **pprog, void *func, void *ip)
}
static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
- void *old_addr, void *new_addr,
- const bool text_live)
+ void *old_addr, void *new_addr)
{
const u8 *nop_insn = x86_nops[5];
u8 old_insn[X86_PATCH_SIZE];
@@ -365,10 +364,7 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
goto out;
ret = 1;
if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
- if (text_live)
- text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
- else
- memcpy(ip, new_insn, X86_PATCH_SIZE);
+ text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
ret = 0;
}
out:
@@ -384,7 +380,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
/* BPF poking in modules is not supported */
return -EINVAL;
- return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
+ return __bpf_arch_text_poke(ip, t, old_addr, new_addr);
}
#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
@@ -558,24 +554,15 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
mutex_lock(&array->aux->poke_mutex);
target = array->ptrs[poke->tail_call.key];
if (target) {
- /* Plain memcpy is used when image is not live yet
- * and still not locked as read-only. Once poke
- * location is active (poke->tailcall_target_stable),
- * any parallel bpf_arch_text_poke() might occur
- * still on the read-write image until we finally
- * locked it as read-only. Both modifications on
- * the given image are under text_mutex to avoid
- * interference.
- */
ret = __bpf_arch_text_poke(poke->tailcall_target,
BPF_MOD_JUMP, NULL,
(u8 *)target->bpf_func +
- poke->adj_off, false);
+ poke->adj_off);
BUG_ON(ret < 0);
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
BPF_MOD_JUMP,
(u8 *)poke->tailcall_target +
- X86_PATCH_SIZE, NULL, false);
+ X86_PATCH_SIZE, NULL);
BUG_ON(ret < 0);
}
WRITE_ONCE(poke->tailcall_target_stable, true);
@@ -866,7 +853,7 @@ static void emit_nops(u8 **pprog, int len)
#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
-static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
int oldproglen, struct jit_context *ctx, bool jmp_padding)
{
bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
@@ -893,8 +880,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
push_callee_regs(&prog, callee_regs_used);
ilen = prog - temp;
- if (image)
- memcpy(image + proglen, temp, ilen);
+ if (rw_image)
+ memcpy(rw_image + proglen, temp, ilen);
proglen += ilen;
addrs[0] = proglen;
prog = temp;
@@ -1323,6 +1310,9 @@ st: if (is_imm8(insn->off))
pr_err("extable->insn doesn't fit into 32-bit\n");
return -EFAULT;
}
+ /* switch ex to rw buffer for writes */
+ ex = (void *)rw_image + ((void *)ex - (void *)image);
+
ex->insn = delta;
ex->data = EX_TYPE_BPF;
@@ -1705,7 +1695,7 @@ st: if (is_imm8(insn->off))
pr_err("bpf_jit: fatal error\n");
return -EFAULT;
}
- memcpy(image + proglen, temp, ilen);
+ memcpy(rw_image + proglen, temp, ilen);
}
proglen += ilen;
addrs[i] = proglen;
@@ -2246,6 +2236,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs)
}
struct x64_jit_data {
+ struct bpf_binary_header *rw_header;
struct bpf_binary_header *header;
int *addrs;
u8 *image;
@@ -2258,6 +2249,7 @@ struct x64_jit_data {
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
+ struct bpf_binary_header *rw_header = NULL;
struct bpf_binary_header *header = NULL;
struct bpf_prog *tmp, *orig_prog = prog;
struct x64_jit_data *jit_data;
@@ -2266,6 +2258,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
bool tmp_blinded = false;
bool extra_pass = false;
bool padding = false;
+ u8 *rw_image = NULL;
u8 *image = NULL;
int *addrs;
int pass;
@@ -2301,6 +2294,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
oldproglen = jit_data->proglen;
image = jit_data->image;
header = jit_data->header;
+ rw_header = jit_data->rw_header;
+ rw_image = (void *)rw_header + ((void *)image - (void *)header);
extra_pass = true;
padding = true;
goto skip_init_addrs;
@@ -2331,12 +2326,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
for (pass = 0; pass < MAX_PASSES || image; pass++) {
if (!padding && pass >= PADDING_PASSES)
padding = true;
- proglen = do_jit(prog, addrs, image, oldproglen, &ctx, padding);
+ proglen = do_jit(prog, addrs, image, rw_image, oldproglen, &ctx, padding);
if (proglen <= 0) {
out_image:
image = NULL;
if (header)
- bpf_jit_binary_free(header);
+ bpf_jit_binary_pack_free(header, rw_header);
prog = orig_prog;
goto out_addrs;
}
@@ -2360,8 +2355,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
sizeof(struct exception_table_entry);
/* allocate module memory for x86 insns and extable */
- header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size,
- &image, align, jit_fill_hole);
+ header = bpf_jit_binary_pack_alloc(roundup(proglen, align) + extable_size,
+ &image, align, &rw_header, &rw_image,
+ jit_fill_hole);
if (!header) {
prog = orig_prog;
goto out_addrs;
@@ -2377,14 +2373,22 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
if (image) {
if (!prog->is_func || extra_pass) {
+ /*
+ * bpf_jit_binary_pack_finalize fails in two scenarios:
+ * 1) header is not pointing to proper module memory;
+ * 2) the arch doesn't support bpf_arch_text_copy().
+ *
+ * Both cases are serious bugs that we should not continue.
+ */
+ BUG_ON(bpf_jit_binary_pack_finalize(prog, header, rw_header));
bpf_tail_call_direct_fixup(prog);
- bpf_jit_binary_lock_ro(header);
} else {
jit_data->addrs = addrs;
jit_data->ctx = ctx;
jit_data->proglen = proglen;
jit_data->image = image;
jit_data->header = header;
+ jit_data->rw_header = rw_header;
}
prog->bpf_func = (void *)image;
prog->jited = 1;
--
2.30.2
^ permalink raw reply related [flat|nested] 12+ messages in thread