public inbox for cgroups@vger.kernel.org
 help / color / mirror / Atom feed
  • * [PATCH v3 2/4] mm/oom: handle remote ooms
           [not found] <20211111234203.1824138-1-almasrymina@google.com>
           [not found] ` <20211111234203.1824138-1-almasrymina-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
    @ 2021-11-11 23:42 ` Mina Almasry
           [not found]   ` <20211111234203.1824138-3-almasrymina-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
      2021-11-11 23:42 ` [PATCH v3 3/4] mm, shmem: add tmpfs memcg= option documentation Mina Almasry
      2 siblings, 1 reply; 20+ messages in thread
    From: Mina Almasry @ 2021-11-11 23:42 UTC (permalink / raw)
      Cc: Mina Almasry, Michal Hocko, Theodore Ts'o, Greg Thelen,
    	Shakeel Butt, Andrew Morton, Hugh Dickins, Roman Gushchin,
    	Johannes Weiner, Tejun Heo, Vladimir Davydov, Muchun Song, riel,
    	linux-mm, linux-fsdevel, cgroups
    
    On remote ooms (OOMs due to remote charging), the oom-killer will attempt
    to find a task to kill in the memcg under oom, if the oom-killer
    is unable to find one, the oom-killer should simply return ENOMEM to the
    allocating process.
    
    If we're in pagefault path and we're unable to return ENOMEM to the
    allocating process, we instead kill the allocating process.
    
    Signed-off-by: Mina Almasry <almasrymina@google.com>
    
    Cc: Michal Hocko <mhocko@suse.com>
    Cc: Theodore Ts'o <tytso@mit.edu>
    Cc: Greg Thelen <gthelen@google.com>
    Cc: Shakeel Butt <shakeelb@google.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Hugh Dickins <hughd@google.com>
    CC: Roman Gushchin <guro@fb.com>
    Cc: Johannes Weiner <hannes@cmpxchg.org>
    Cc: Hugh Dickins <hughd@google.com>
    Cc: Tejun Heo <tj@kernel.org>
    Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
    Cc: Muchun Song <songmuchun@bytedance.com>
    Cc: riel@surriel.com
    Cc: linux-mm@kvack.org
    Cc: linux-fsdevel@vger.kernel.org
    Cc: cgroups@vger.kernel.org
    
    ---
    
    Changes in v3:
    - Fixed build failures/warnings Reported-by: kernel test robot <lkp@intel.com>
    
    Changes in v2:
    - Moved the remote oom handling as Roman requested.
    - Used mem_cgroup_from_task(current) instead of grabbing the memcg from
    current->mm
    
    ---
     include/linux/memcontrol.h | 16 ++++++++++++++++
     mm/memcontrol.c            | 29 +++++++++++++++++++++++++++++
     mm/oom_kill.c              | 22 ++++++++++++++++++++++
     3 files changed, 67 insertions(+)
    
    diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
    index 8583d37c05d9b..b7a045ace7b2c 100644
    --- a/include/linux/memcontrol.h
    +++ b/include/linux/memcontrol.h
    @@ -944,6 +944,7 @@ struct mem_cgroup *mem_cgroup_get_from_path(const char *path);
      * it.
      */
     int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len);
    +bool is_remote_oom(struct mem_cgroup *memcg_under_oom);
    
     void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
     		int zid, int nr_pages);
    @@ -981,6 +982,11 @@ static inline void mem_cgroup_exit_user_fault(void)
     	current->in_user_fault = 0;
     }
    
    +static inline bool is_in_user_fault(void)
    +{
    +	return current->in_user_fault;
    +}
    +
     static inline bool task_in_memcg_oom(struct task_struct *p)
     {
     	return p->memcg_in_oom;
    @@ -1281,6 +1287,11 @@ static inline int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf,
     	return 0;
     }
    
    +static inline bool is_remote_oom(struct mem_cgroup *memcg_under_oom)
    +{
    +	return false;
    +}
    +
     static inline int mem_cgroup_swapin_charge_page(struct page *page,
     			struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
     {
    @@ -1472,6 +1483,11 @@ static inline void mem_cgroup_exit_user_fault(void)
     {
     }
    
    +static inline bool is_in_user_fault(void)
    +{
    +	return false;
    +}
    +
     static inline bool task_in_memcg_oom(struct task_struct *p)
     {
     	return false;
    diff --git a/mm/memcontrol.c b/mm/memcontrol.c
    index b3d8f52a63d17..8019c396bfdd9 100644
    --- a/mm/memcontrol.c
    +++ b/mm/memcontrol.c
    @@ -2664,6 +2664,35 @@ int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len)
     	return ret < 0 ? ret : 0;
     }
    
    +/*
    + * Returns true if current's mm is a descendant of the memcg_under_oom (or
    + * equal to it). False otherwise. This is used by the oom-killer to detect
    + * ooms due to remote charging.
    + */
    +bool is_remote_oom(struct mem_cgroup *memcg_under_oom)
    +{
    +	struct mem_cgroup *current_memcg;
    +	bool is_remote_oom;
    +
    +	if (!memcg_under_oom)
    +		return false;
    +
    +	rcu_read_lock();
    +	current_memcg = mem_cgroup_from_task(current);
    +	if (current_memcg && !css_tryget_online(&current_memcg->css))
    +		current_memcg = NULL;
    +	rcu_read_unlock();
    +
    +	if (!current_memcg)
    +		return false;
    +
    +	is_remote_oom =
    +		!mem_cgroup_is_descendant(current_memcg, memcg_under_oom);
    +	css_put(&current_memcg->css);
    +
    +	return is_remote_oom;
    +}
    +
     /*
      * Set or clear (if @memcg is NULL) charge association from file system to
      * memcg.  If @memcg != NULL, then a css reference must be held by the caller to
    diff --git a/mm/oom_kill.c b/mm/oom_kill.c
    index 0a7e16b16b8c3..499924efab370 100644
    --- a/mm/oom_kill.c
    +++ b/mm/oom_kill.c
    @@ -1108,6 +1108,28 @@ bool out_of_memory(struct oom_control *oc)
     	select_bad_process(oc);
     	/* Found nothing?!?! */
     	if (!oc->chosen) {
    +		if (is_remote_oom(oc->memcg)) {
    +			/*
    +			 * For remote ooms in userfaults, we have no choice but
    +			 * to kill the allocating process.
    +			 */
    +			if (is_in_user_fault() &&
    +			    !oom_unkillable_task(current)) {
    +				get_task_struct(current);
    +				oc->chosen = current;
    +				oom_kill_process(
    +					oc,
    +					"Out of memory (Killing remote allocating task)");
    +				return true;
    +			}
    +
    +			/*
    +			 * For remote ooms in non-userfaults, simply return
    +			 * ENOMEM to the caller.
    +			 */
    +			return false;
    +		}
    +
     		dump_header(oc, NULL);
     		pr_warn("Out of memory and no killable processes...\n");
     		/*
    --
    2.34.0.rc1.387.gb447b232ab-goog
    
    ^ permalink raw reply related	[flat|nested] 20+ messages in thread
  • * [PATCH v3 3/4] mm, shmem: add tmpfs memcg= option documentation
           [not found] <20211111234203.1824138-1-almasrymina@google.com>
           [not found] ` <20211111234203.1824138-1-almasrymina-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
      2021-11-11 23:42 ` [PATCH v3 2/4] mm/oom: handle remote ooms Mina Almasry
    @ 2021-11-11 23:42 ` Mina Almasry
      2 siblings, 0 replies; 20+ messages in thread
    From: Mina Almasry @ 2021-11-11 23:42 UTC (permalink / raw)
      Cc: Mina Almasry, Michal Hocko, Theodore Ts'o, Greg Thelen,
    	Shakeel Butt, Andrew Morton, Hugh Dickins, Roman Gushchin,
    	Johannes Weiner, Tejun Heo, Vladimir Davydov, Muchun Song, riel,
    	linux-mm, linux-fsdevel, cgroups
    
    Signed-off-by: Mina Almasry <almasrymina@google.com>
    
    Cc: Michal Hocko <mhocko@suse.com>
    Cc: Theodore Ts'o <tytso@mit.edu>
    Cc: Greg Thelen <gthelen@google.com>
    Cc: Shakeel Butt <shakeelb@google.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Hugh Dickins <hughd@google.com>
    Cc: Roman Gushchin <guro@fb.com>
    Cc: Johannes Weiner <hannes@cmpxchg.org>
    Cc: Hugh Dickins <hughd@google.com>
    Cc: Tejun Heo <tj@kernel.org>
    Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
    Cc: Muchun Song <songmuchun@bytedance.com>
    Cc: riel@surriel.com
    Cc: linux-mm@kvack.org
    Cc: linux-fsdevel@vger.kernel.org
    Cc: cgroups@vger.kernel.org
    
    ---
     Documentation/filesystems/tmpfs.rst | 17 +++++++++++++++++
     1 file changed, 17 insertions(+)
    
    diff --git a/Documentation/filesystems/tmpfs.rst b/Documentation/filesystems/tmpfs.rst
    index 0408c245785e3..1ab04e8fa9222 100644
    --- a/Documentation/filesystems/tmpfs.rst
    +++ b/Documentation/filesystems/tmpfs.rst
    @@ -137,6 +137,23 @@ mount options.  It can be added later, when the tmpfs is already mounted
     on MountPoint, by 'mount -o remount,mpol=Policy:NodeList MountPoint'.
    
    
    +If CONFIG_MEMCG is enabled, tmpfs has a mount option to specify the memory
    +cgroup to be charged for page allocations.
    +
    +memcg=/sys/fs/cgroup/unified/test/: data page allocations are charged to
    +cgroup /sys/fs/cgroup/unified/test/.
    +
    +When charging memory to the remote memcg (memcg specified with memcg=) and
    +hitting the limit, the oom-killer will be invoked and will attempt to kill
    +a process in the remote memcg. If no such processes are found, the remote
    +charging process gets an ENOMEM. If the remote charging process is in the
    +pagefault path, it gets killed.
    +
    +Only processes that have access to /sys/fs/cgroup/unified/test/cgroup.procs can
    +mount a tmpfs with memcg=/sys/fs/cgroup/unified/test. Thus, a process is able
    +to charge memory to a cgroup only if it itself is able to enter that cgroup.
    +
    +
     To specify the initial root directory you can use the following mount
     options:
    
    --
    2.34.0.rc1.387.gb447b232ab-goog
    
    ^ permalink raw reply related	[flat|nested] 20+ messages in thread

  • end of thread, other threads:[~2021-11-19 22:32 UTC | newest]
    
    Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
    -- links below jump to the message on this page --
         [not found] <20211111234203.1824138-1-almasrymina@google.com>
         [not found] ` <20211111234203.1824138-1-almasrymina-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
    2021-11-11 23:42   ` [PATCH v3 1/4] mm/shmem: support deterministic charging of tmpfs Mina Almasry
    2021-11-11 23:42   ` [PATCH v3 4/4] mm, shmem, selftests: add tmpfs memcg= mount option tests Mina Almasry
    2021-11-11 23:42 ` [PATCH v3 2/4] mm/oom: handle remote ooms Mina Almasry
         [not found]   ` <20211111234203.1824138-3-almasrymina-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
    2021-11-12  7:51     ` Michal Hocko
         [not found]       ` <YY4dHPu/bcVdoJ4R-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
    2021-11-12  8:12         ` Mina Almasry
    2021-11-12  8:36           ` Michal Hocko
         [not found]             ` <YY4nm9Kvkt2FJPph-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
    2021-11-12 17:59               ` Mina Almasry
    2021-11-15 10:58                 ` Michal Hocko
         [not found]                   ` <YZI9ZbRVdRtE2m70-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
    2021-11-15 17:32                     ` Shakeel Butt
    2021-11-16  0:58                     ` Mina Almasry
         [not found]                       ` <CAHS8izPcnwOqf8bjfrEd9VFxdA6yX3+a-TeHsxGgpAR+_bRdNA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
    2021-11-16  9:28                         ` Michal Hocko
         [not found]                           ` <YZN5tkhHomj6HSb2-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
    2021-11-16  9:39                             ` Michal Hocko
    2021-11-16 10:17                             ` Mina Almasry
         [not found]                               ` <CAHS8izNTbvhjEEb=ZrH2_4ECkVhxnCLzyd=78uWmHA_02iiA9Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
    2021-11-16 11:29                                 ` Michal Hocko
    2021-11-16 21:27                                   ` Mina Almasry
    2021-11-16 21:55                                     ` Shakeel Butt
         [not found]                                       ` <CALvZod7FHO6edK1cR+rbt6cG=+zUzEx3+rKWT5mi73Q29_Y5qA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
    2021-11-18  8:48                                         ` Michal Hocko
         [not found]                                           ` <YZYTaSVUWUhW0d9t-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
    2021-11-19 22:32                                             ` Mina Almasry
         [not found]                                     ` <CAHS8izPyCDucFBa9ZKz09g3QVqSWLmAyOmwN+vr=X2y7yZjRQA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
    2021-11-18  8:47                                       ` Michal Hocko
    2021-11-11 23:42 ` [PATCH v3 3/4] mm, shmem: add tmpfs memcg= option documentation Mina Almasry
    

    This is a public inbox, see mirroring instructions
    for how to clone and mirror all data and code used for this inbox