All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dennis Zhou <dennisszhou@gmail.com>
To: Yafang Shao <laoar.shao@gmail.com>
Cc: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	kafai@fb.com, songliubraving@fb.com, yhs@fb.com,
	john.fastabend@gmail.com, kpsingh@kernel.org,
	quentin@isovalent.com, hannes@cmpxchg.org, mhocko@kernel.org,
	roman.gushchin@linux.dev, shakeelb@google.com,
	songmuchun@bytedance.com, akpm@linux-foundation.org,
	cl@linux.com, penberg@kernel.org, rientjes@google.com,
	iamjoonsoo.kim@lge.com, vbabka@suse.cz, linux-mm@kvack.org,
	bpf@vger.kernel.org
Subject: Re: [RFC PATCH bpf-next 07/10] mm: Add helper to recharge percpu address
Date: Wed, 22 Jun 2022 22:25:31 -0700	[thread overview]
Message-ID: <YrP5S64OsUD6Hmgo@fedora> (raw)
In-Reply-To: <20220619155032.32515-8-laoar.shao@gmail.com>

Hello,

On Sun, Jun 19, 2022 at 03:50:29PM +0000, Yafang Shao wrote:
> This patch introduces a helper to recharge the corresponding pages of
> a given percpu address. It is similar with how to recharge a kmalloc'ed
> address.
> 
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
>  include/linux/percpu.h |  1 +
>  mm/percpu.c            | 98 ++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 99 insertions(+)
> 
> diff --git a/include/linux/percpu.h b/include/linux/percpu.h
> index f1ec5ad1351c..e88429410179 100644
> --- a/include/linux/percpu.h
> +++ b/include/linux/percpu.h
> @@ -128,6 +128,7 @@ extern void __init setup_per_cpu_areas(void);
>  extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1);
>  extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1);
>  extern void free_percpu(void __percpu *__pdata);
> +bool recharge_percpu(void __percpu *__pdata, int step);

Nit: can you add extern to keep the file consistent.

>  extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
>  
>  #define alloc_percpu_gfp(type, gfp)					\
> diff --git a/mm/percpu.c b/mm/percpu.c
> index 3633eeefaa0d..fd81f4d79f2f 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -2310,6 +2310,104 @@ void free_percpu(void __percpu *ptr)
>  }
>  EXPORT_SYMBOL_GPL(free_percpu);
>  
> +#ifdef CONFIG_MEMCG_KMEM
> +bool recharge_percpu(void __percpu *ptr, int step)
> +{
> +	int bit_off, off, bits, size, end;
> +	struct obj_cgroup *objcg_old;
> +	struct obj_cgroup *objcg_new;
> +	struct pcpu_chunk *chunk;
> +	unsigned long flags;
> +	void *addr;
> +
> +	WARN_ON(!in_task());
> +
> +	if (!ptr)
> +		return true;
> +
> +	addr = __pcpu_ptr_to_addr(ptr);
> +	spin_lock_irqsave(&pcpu_lock, flags);
> +	chunk = pcpu_chunk_addr_search(addr);
> +	off = addr - chunk->base_addr;
> +	objcg_old = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT];
> +	if (!objcg_old && step != MEMCG_KMEM_POST_CHARGE) {
> +		spin_unlock_irqrestore(&pcpu_lock, flags);
> +		return true;
> +	}
> +
> +	bit_off = off / PCPU_MIN_ALLOC_SIZE;
> +	/* find end index */
> +	end = find_next_bit(chunk->bound_map, pcpu_chunk_map_bits(chunk),
> +			bit_off + 1);
> +	bits = end - bit_off;
> +	size = bits * PCPU_MIN_ALLOC_SIZE;
> +
> +	switch (step) {
> +	case MEMCG_KMEM_PRE_CHARGE:
> +		objcg_new = get_obj_cgroup_from_current();
> +		WARN_ON(!objcg_new);
> +		if (obj_cgroup_charge(objcg_new, GFP_KERNEL,
> +				      size * num_possible_cpus())) {
> +			obj_cgroup_put(objcg_new);
> +			spin_unlock_irqrestore(&pcpu_lock, flags);
> +			return false;
> +		}
> +		break;
> +	case MEMCG_KMEM_UNCHARGE:
> +		obj_cgroup_uncharge(objcg_old, size * num_possible_cpus());
> +		rcu_read_lock();
> +		mod_memcg_state(obj_cgroup_memcg(objcg_old), MEMCG_PERCPU_B,
> +			-(size * num_possible_cpus()));
> +		rcu_read_unlock();
> +		chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL;
> +		obj_cgroup_put(objcg_old);
> +		break;
> +	case MEMCG_KMEM_POST_CHARGE:
> +		rcu_read_lock();
> +		chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = obj_cgroup_from_current();
> +		mod_memcg_state(mem_cgroup_from_task(current), MEMCG_PERCPU_B,
> +			(size * num_possible_cpus()));
> +		rcu_read_unlock();
> +		break;
> +	case MEMCG_KMEM_CHARGE_ERR:
> +		/*
> +		 * In case fail to charge to the new one in the pre charge state,
> +		 * for example, we have pre-charged one memcg successfully but fail
> +		 * to pre-charge the second memcg, then we should uncharge the first
> +		 * memcg.
> +		 */
> +		objcg_new = obj_cgroup_from_current();
> +		obj_cgroup_uncharge(objcg_new, size * num_possible_cpus());
> +		obj_cgroup_put(objcg_new);
> +		rcu_read_lock();
> +		mod_memcg_state(obj_cgroup_memcg(objcg_new), MEMCG_PERCPU_B,
> +			-(size * num_possible_cpus()));
> +		rcu_read_unlock();
> +
> +		break;
> +	}

I'm not really the biggest fan of this step stuff. I see why you're
doing it because you want to do all or nothing recharging the percpu bpf
maps. Is there a way to have percpu own this logic and attempt to do all
or nothing instead? I realize bpf is likely the largest percpu user, but
the recharge_percpu() api seems to be more generic than forcing
potential other users in the future to open code it.

> +
> +	spin_unlock_irqrestore(&pcpu_lock, flags);
> +
> +	return true;
> +}
> +EXPORT_SYMBOL(recharge_percpu);
> +
> +#else /* CONFIG_MEMCG_KMEM */
> +
> +bool charge_percpu(void __percpu *ptr, bool charge)
> +{
> +	return true;
> +}
> +EXPORT_SYMBOL(charge_percpu);
> +
> +void uncharge_percpu(void __percpu *ptr)
> +{
> +}

I'm guessing this is supposed to be recharge_percpu() not
(un)charge_percpu().

> +EXPORT_SYMBOL(uncharge_percpu);
> +
> +#endif /* CONFIG_MEMCG_KMEM */
> +
>  bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr)
>  {
>  #ifdef CONFIG_SMP
> -- 
> 2.17.1
> 
> 

Thanks,
Dennis

  reply	other threads:[~2022-06-23  5:25 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-19 15:50 [RFC PATCH bpf-next 00/10] bpf, mm: Recharge pages when reuse bpf map Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 01/10] mm, memcg: Add a new helper memcg_should_recharge() Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 02/10] bpftool: Show memcg info of bpf map Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 03/10] mm, memcg: Add new helper obj_cgroup_from_current() Yafang Shao
2022-06-23  3:01   ` Roman Gushchin
2022-06-25 13:54     ` Yafang Shao
2022-06-26  1:52       ` Roman Gushchin
2022-06-19 15:50 ` [RFC PATCH bpf-next 04/10] mm, memcg: Make obj_cgroup_{charge, uncharge}_pages public Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 05/10] mm: Add helper to recharge kmalloc'ed address Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 06/10] mm: Add helper to recharge vmalloc'ed address Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 07/10] mm: Add helper to recharge percpu address Yafang Shao
2022-06-23  5:25   ` Dennis Zhou [this message]
2022-06-25 14:18     ` Yafang Shao
2022-06-27  3:04       ` Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 08/10] bpf: Recharge memory when reuse bpf map Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 09/10] bpf: Make bpf_map_{save, release}_memcg public Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 10/10] bpf: Support recharge for hash map Yafang Shao
2022-06-21 23:28 ` [RFC PATCH bpf-next 00/10] bpf, mm: Recharge pages when reuse bpf map Alexei Starovoitov
2022-06-22 14:03   ` Yafang Shao
2022-06-23  3:29 ` Roman Gushchin
2022-06-25  3:26   ` Yafang Shao
2022-06-26  3:28     ` Roman Gushchin
2022-06-26  3:32       ` Roman Gushchin
2022-06-26  6:38         ` Yafang Shao
2022-06-26  6:25       ` Yafang Shao
2022-07-02  4:23         ` Roman Gushchin
2022-07-02 15:24           ` Yafang Shao
2022-07-02 15:33             ` Roman Gushchin
2022-06-27  0:40     ` Alexei Starovoitov
2022-06-27 15:02       ` Yafang Shao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YrP5S64OsUD6Hmgo@fedora \
    --to=dennisszhou@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=cl@linux.com \
    --cc=daniel@iogearbox.net \
    --cc=hannes@cmpxchg.org \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=john.fastabend@gmail.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=laoar.shao@gmail.com \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=penberg@kernel.org \
    --cc=quentin@isovalent.com \
    --cc=rientjes@google.com \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeelb@google.com \
    --cc=songliubraving@fb.com \
    --cc=songmuchun@bytedance.com \
    --cc=vbabka@suse.cz \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.