linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2
       [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
@ 2016-06-13 19:44 ` Topi Miettinen
  2016-06-14  7:01   ` Michal Hocko
  2016-06-13 19:44 ` [RFC 07/18] limits: track RLIMIT_FSIZE actual max Topi Miettinen
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 15+ messages in thread
From: Topi Miettinen @ 2016-06-13 19:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Topi Miettinen, Johannes Weiner, Michal Hocko, Vladimir Davydov,
	Andrew Morton,
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER MEMCG,
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER MEMCG

Present maximum used memory in cgroup memory.current_max.

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 include/linux/page_counter.h |  7 ++++++-
 mm/memcontrol.c              | 13 +++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 7e62920..be4de17 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -9,9 +9,9 @@ struct page_counter {
 	atomic_long_t count;
 	unsigned long limit;
 	struct page_counter *parent;
+	unsigned long watermark;
 
 	/* legacy */
-	unsigned long watermark;
 	unsigned long failcnt;
 };
 
@@ -34,6 +34,11 @@ static inline unsigned long page_counter_read(struct page_counter *counter)
 	return atomic_long_read(&counter->count);
 }
 
+static inline unsigned long page_counter_read_watermark(struct page_counter *counter)
+{
+	return counter->watermark;
+}
+
 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages);
 bool page_counter_try_charge(struct page_counter *counter,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 75e7440..5513771 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4966,6 +4966,14 @@ static u64 memory_current_read(struct cgroup_subsys_state *css,
 	return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE;
 }
 
+static u64 memory_current_max_read(struct cgroup_subsys_state *css,
+				   struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return (u64)page_counter_read_watermark(&memcg->memory) * PAGE_SIZE;
+}
+
 static int memory_low_show(struct seq_file *m, void *v)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
@@ -5179,6 +5187,11 @@ static struct cftype memory_files[] = {
 		.read_u64 = memory_current_read,
 	},
 	{
+		.name = "current_max",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = memory_current_max_read,
+	},
+	{
 		.name = "low",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = memory_low_show,
-- 
2.8.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC 07/18] limits: track RLIMIT_FSIZE actual max
       [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
  2016-06-13 19:44 ` [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2 Topi Miettinen
@ 2016-06-13 19:44 ` Topi Miettinen
  2016-06-13 19:44 ` [RFC 08/18] limits: track RLIMIT_DATA " Topi Miettinen
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Topi Miettinen @ 2016-06-13 19:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Topi Miettinen, Alexander Viro, Andrew Morton, Jan Kara,
	Johannes Weiner, Michal Hocko, Ross Zwisler, Kirill A. Shutemov,
	Mel Gorman, Junichi Nomura, Matthew Wilcox,
	open list:FILESYSTEMS VFS and infrastructure,
	open list:MEMORY MANAGEMENT

Track maximum file size, presented in /proc/self/limits.

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 fs/attr.c    | 2 ++
 mm/filemap.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/fs/attr.c b/fs/attr.c
index 25b24d0..1b620f7 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -116,6 +116,8 @@ int inode_newsize_ok(const struct inode *inode, loff_t offset)
 			return -ETXTBSY;
 	}
 
+	bump_rlimit(RLIMIT_FSIZE, offset);
+
 	return 0;
 out_sig:
 	send_sig(SIGXFSZ, current, 0);
diff --git a/mm/filemap.c b/mm/filemap.c
index 00ae878..1fa9864 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2447,6 +2447,7 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 			send_sig(SIGXFSZ, current, 0);
 			return -EFBIG;
 		}
+		bump_rlimit(RLIMIT_FSIZE, iocb->ki_pos);
 		iov_iter_truncate(from, limit - (unsigned long)pos);
 	}
 
-- 
2.8.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC 08/18] limits: track RLIMIT_DATA actual max
       [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
  2016-06-13 19:44 ` [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2 Topi Miettinen
  2016-06-13 19:44 ` [RFC 07/18] limits: track RLIMIT_FSIZE actual max Topi Miettinen
@ 2016-06-13 19:44 ` Topi Miettinen
  2016-06-13 19:44 ` [RFC 10/18] limits: track RLIMIT_STACK " Topi Miettinen
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Topi Miettinen @ 2016-06-13 19:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Topi Miettinen, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, Alexander Viro,
	Michal Hocko, Andrew Morton, Vlastimil Babka, Cyrill Gorcunov,
	Eric W. Biederman, Mateusz Guzik, John Stultz, Ben Segall,
	Alexey Dobriyan, Kirill A. Shutemov, Oleg Nesterov, Chen Gang,
	Konstantin Khlebnikov, Andrea Arcangeli, Andrey Ryabinin,
	open list:FILESYSTEMS VFS and infrastructure,
	open list:MEMORY MANAGEMENT

Track maximum size of data VM, presented in /proc/self/limits.

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 arch/x86/ia32/ia32_aout.c | 1 +
 fs/binfmt_aout.c          | 1 +
 fs/binfmt_flat.c          | 1 +
 kernel/sys.c              | 2 ++
 mm/mmap.c                 | 6 +++++-
 5 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index cb26f18..8a7d502 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -398,6 +398,7 @@ beyond_if:
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 =
 	regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
 	set_fs(USER_DS);
+	bump_limit(RLIMIT_DATA, ex.a_data + ex.a_bss);
 	return 0;
 }
 
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index ae1b540..86c6548 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -330,6 +330,7 @@ beyond_if:
 	regs->gp = ex.a_gpvalue;
 #endif
 	start_thread(regs, ex.a_entry, current->mm->start_stack);
+	bump_limit(RLIMIT_DATA, ex.a_data + ex.a_bss);
 	return 0;
 }
 
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index caf9e39..e309dad 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -792,6 +792,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 			libinfo->lib_list[id].start_brk) +	/* start brk */
 			stack_len);
 
+	bump_limit(RLIMIT_DATA, data_len + bss_len);
 	return 0;
 err:
 	return ret;
diff --git a/kernel/sys.c b/kernel/sys.c
index 89d5be4..6629f6f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1896,6 +1896,8 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
 	if (prctl_map.auxv_size)
 		memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
 
+	bump_limit(RLIMIT_DATA, mm->end_data - mm->start_data);
+
 	up_write(&mm->mmap_sem);
 	return 0;
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index de2c176..61867de 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -228,6 +228,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 		goto out;
 
 set_brk:
+	bump_rlimit(RLIMIT_DATA, (brk - mm->start_brk) +
+		    (mm->end_data - mm->start_data));
 	mm->brk = brk;
 	populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
 	up_write(&mm->mmap_sem);
@@ -2924,8 +2926,10 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
 		mm->exec_vm += npages;
 	else if (is_stack_mapping(flags))
 		mm->stack_vm += npages;
-	else if (is_data_mapping(flags))
+	else if (is_data_mapping(flags)) {
 		mm->data_vm += npages;
+		bump_rlimit(RLIMIT_DATA, mm->data_vm << PAGE_SHIFT);
+	}
 }
 
 static int special_mapping_fault(struct vm_area_struct *vma,
-- 
2.8.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC 10/18] limits: track RLIMIT_STACK actual max
       [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
                   ` (2 preceding siblings ...)
  2016-06-13 19:44 ` [RFC 08/18] limits: track RLIMIT_DATA " Topi Miettinen
@ 2016-06-13 19:44 ` Topi Miettinen
  2016-06-13 19:44 ` [RFC 12/18] limits: track RLIMIT_MEMLOCK " Topi Miettinen
  2016-06-13 19:44 ` [RFC 13/18] limits: track RLIMIT_AS " Topi Miettinen
  5 siblings, 0 replies; 15+ messages in thread
From: Topi Miettinen @ 2016-06-13 19:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Topi Miettinen, Andrew Morton, Oleg Nesterov, Kirill A. Shutemov,
	Chen Gang, Michal Hocko, Konstantin Khlebnikov, Andrea Arcangeli,
	Andrey Ryabinin, open list:MEMORY MANAGEMENT

Track maximum stack size, presented in /proc/self/limits.

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 mm/mmap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/mmap.c b/mm/mmap.c
index 61867de..0963e7f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2019,6 +2019,8 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 	if (security_vm_enough_memory_mm(mm, grow))
 		return -ENOMEM;
 
+	bump_rlimit(RLIMIT_STACK, actual_size);
+
 	return 0;
 }
 
-- 
2.8.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC 12/18] limits: track RLIMIT_MEMLOCK actual max
       [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
                   ` (3 preceding siblings ...)
  2016-06-13 19:44 ` [RFC 10/18] limits: track RLIMIT_STACK " Topi Miettinen
@ 2016-06-13 19:44 ` Topi Miettinen
  2016-06-13 20:43   ` Alex Williamson
  2016-06-18  0:59   ` Doug Ledford
  2016-06-13 19:44 ` [RFC 13/18] limits: track RLIMIT_AS " Topi Miettinen
  5 siblings, 2 replies; 15+ messages in thread
From: Topi Miettinen @ 2016-06-13 19:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Topi Miettinen, Tony Luck, Fenghua Yu, Alexander Graf,
	Paolo Bonzini, Radim Krčmář,
	Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Doug Ledford, Sean Hefty, Hal Rosenstock, Mike Marciniszyn,
	Dennis Dalessandro, Christian Benvenuti, Dave Goodell,
	Sudeep Dutt, Ashutosh Dixit, Alex Williamson, Ingo Molnar,
	Peter Zijlstra, Alexei Starovoitov, Arnaldo Carvalho de Melo,
	Alexander Shishkin, Andrew Morton, Konstantin Khlebnikov,
	Jiri Slaby, Cyrill Gorcunov, Thomas Gleixner, Dave Hansen,
	Greg Kroah-Hartman, Dan Carpenter, Nikhil Rao, Vlastimil Babka,
	Kirill A. Shutemov, Michal Hocko, Eric B Munson, Alexey Klimov,
	Andrea Arcangeli, Alexander Kuleshov, Oleg Nesterov, Chen Gang,
	Andrey Ryabinin, David Rientjes, Hugh Dickins, Laurent Dufour,
	open list:IA64 Itanium PLATFORM,
	open list:KERNEL VIRTUAL MACHINE KVM FOR POWERPC,
	open list:KERNEL VIRTUAL MACHINE KVM,
	open list:LINUX FOR POWERPC 32-BIT AND 64-BIT,
	open list:INFINIBAND SUBSYSTEM,
	open list:BPF Safe dynamic programs and tools,
	open list:MEMORY MANAGEMENT

Track maximum size of locked memory, presented in /proc/self/limits.

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 arch/ia64/kernel/perfmon.c                 |  1 +
 arch/powerpc/kvm/book3s_64_vio.c           |  1 +
 arch/powerpc/mm/mmu_context_iommu.c        |  1 +
 drivers/infiniband/core/umem.c             |  1 +
 drivers/infiniband/hw/hfi1/user_pages.c    |  1 +
 drivers/infiniband/hw/qib/qib_user_pages.c |  1 +
 drivers/infiniband/hw/usnic/usnic_uiom.c   |  2 ++
 drivers/misc/mic/scif/scif_rma.c           |  1 +
 drivers/vfio/vfio_iommu_spapr_tce.c        |  2 ++
 drivers/vfio/vfio_iommu_type1.c            |  2 ++
 include/linux/sched.h                      | 10 ++++++++--
 kernel/bpf/syscall.c                       |  6 ++++++
 kernel/events/core.c                       |  1 +
 mm/mlock.c                                 |  7 +++++++
 mm/mmap.c                                  |  3 +++
 mm/mremap.c                                |  3 +++
 16 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 2436ad5..d05ff3b 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2341,6 +2341,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
 	ctx->ctx_smpl_vaddr = (void *)vma->vm_start;
 	*(unsigned long *)user_vaddr = vma->vm_start;
 
+	task_bump_rlimit(task, RLIMIT_MEMLOCK, size);
 	return 0;
 
 error:
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 18cf6d1..2714bbf 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -71,6 +71,7 @@ static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
 			ret = -ENOMEM;
 		else
 			current->mm->locked_vm += stt_pages;
+		bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
 	} else {
 		if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
 			stt_pages = current->mm->locked_vm;
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index da6a216..ace8b9d 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -46,6 +46,7 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
 			ret = -ENOMEM;
 		else
 			mm->locked_vm += npages;
+		bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
 	} else {
 		if (WARN_ON_ONCE(npages > mm->locked_vm))
 			npages = mm->locked_vm;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index fe4d2e1..9bd9638 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -224,6 +224,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
 	ret = 0;
 
+	bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
 out:
 	if (ret < 0) {
 		if (need_release)
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 88e10b5f..096910d7 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -111,6 +111,7 @@ int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
 
 	down_write(&current->mm->mmap_sem);
 	current->mm->pinned_vm += ret;
+	bump_rlimit(RLIMIT_MEMLOCK, current->mm->pinned_vm << PAGE_SHIFT);
 	up_write(&current->mm->mmap_sem);
 
 	return ret;
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 2d2b94f..06f93de 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -74,6 +74,7 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
 	}
 
 	current->mm->pinned_vm += num_pages;
+	bump_rlimit(RLIMIT_MEMLOCK, current->mm->pinned_vm << PAGE_SHIFT);
 
 	ret = 0;
 	goto bail;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index a0b6ebe..83409dc 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -178,6 +178,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 		ret = 0;
 	}
 
+	bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
+
 out:
 	if (ret < 0)
 		usnic_uiom_put_pages(chunk_list, 0);
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index e0203b1..1d6315a 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -306,6 +306,7 @@ static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
 		return -ENOMEM;
 	}
 	mm->pinned_vm = locked;
+	bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
 	return 0;
 }
 
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 80378dd..769a5b8 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -55,6 +55,8 @@ static long try_increment_locked_vm(long npages)
 			rlimit(RLIMIT_MEMLOCK),
 			ret ? " - exceeded" : "");
 
+	bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
+
 	up_write(&current->mm->mmap_sem);
 
 	return ret;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 2ba1942..4c6e7a3 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -312,6 +312,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
 		}
 	}
 
+	bump_rlimit(RLIMIT_MEMLOCK, (current->mm->locked_vm + i) << PAGE_SHIFT);
+
 	if (!rsvd)
 		vfio_lock_acct(i);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index feb9bb7..d3f3c9f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3378,10 +3378,16 @@ static inline unsigned long rlimit_max(unsigned int limit)
 	return task_rlimit_max(current, limit);
 }
 
+static inline void task_bump_rlimit(struct task_struct *tsk,
+				    unsigned int limit, unsigned long r)
+{
+	if (READ_ONCE(tsk->signal->rlim_curmax[limit]) < r)
+		tsk->signal->rlim_curmax[limit] = r;
+}
+
 static inline void bump_rlimit(unsigned int limit, unsigned long r)
 {
-	if (READ_ONCE(current->signal->rlim_curmax[limit]) < r)
-		current->signal->rlim_curmax[limit] = r;
+	return task_bump_rlimit(current, limit, r);
 }
 
 #ifdef CONFIG_CPU_FREQ
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 46ecce4..192001e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -76,6 +76,9 @@ static int bpf_map_charge_memlock(struct bpf_map *map)
 		return -EPERM;
 	}
 	map->user = user;
+	/* XXX resource limits apply per task, not per user */
+	bump_rlimit(RLIMIT_MEMLOCK, atomic_long_read(&user->locked_vm) <<
+		    PAGE_SHIFT);
 	return 0;
 }
 
@@ -601,6 +604,9 @@ static int bpf_prog_charge_memlock(struct bpf_prog *prog)
 		return -EPERM;
 	}
 	prog->aux->user = user;
+	/* XXX resource limits apply per task, not per user */
+	bump_rlimit(RLIMIT_MEMLOCK, atomic_long_read(&user->locked_vm) <<
+		    PAGE_SHIFT);
 	return 0;
 }
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9c51ec3..92467e8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5075,6 +5075,7 @@ accounting:
 		if (!ret)
 			rb->aux_mmap_locked = extra;
 	}
+	bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
 
 unlock:
 	if (!ret) {
diff --git a/mm/mlock.c b/mm/mlock.c
index ef8dc9f..554bee9 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -648,6 +648,8 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla
 	if (error)
 		return error;
 
+	bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
+
 	error = __mm_populate(start, len, 0);
 	if (error)
 		return __mlock_posix_error_return(error);
@@ -761,6 +763,8 @@ SYSCALL_DEFINE1(mlockall, int, flags)
 	if (!ret && (flags & MCL_CURRENT))
 		mm_populate(0, TASK_SIZE);
 
+	bump_rlimit(RLIMIT_MEMLOCK, current->mm->total_vm << PAGE_SHIFT);
+
 	return ret;
 }
 
@@ -798,6 +802,9 @@ int user_shm_lock(size_t size, struct user_struct *user)
 	get_uid(user);
 	user->locked_shm += locked;
 	allowed = 1;
+
+	/* XXX resource limits apply per task, not per user */
+	bump_rlimit(RLIMIT_MEMLOCK, user->locked_shm << PAGE_SHIFT);
 out:
 	spin_unlock(&shmlock_user_lock);
 	return allowed;
diff --git a/mm/mmap.c b/mm/mmap.c
index 0963e7f..4e683dd 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2020,6 +2020,9 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 		return -ENOMEM;
 
 	bump_rlimit(RLIMIT_STACK, actual_size);
+	if (vma->vm_flags & VM_LOCKED)
+		bump_rlimit(RLIMIT_MEMLOCK,
+			    (mm->locked_vm + grow) << PAGE_SHIFT);
 
 	return 0;
 }
diff --git a/mm/mremap.c b/mm/mremap.c
index 1f157ad..ade3e13 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -394,6 +394,9 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
 		*p = charged;
 	}
 
+	if (vma->vm_flags & VM_LOCKED)
+		bump_rlimit(RLIMIT_MEMLOCK, (mm->locked_vm << PAGE_SHIFT) +
+			    new_len - old_len);
 	return vma;
 }
 
-- 
2.8.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC 13/18] limits: track RLIMIT_AS actual max
       [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
                   ` (4 preceding siblings ...)
  2016-06-13 19:44 ` [RFC 12/18] limits: track RLIMIT_MEMLOCK " Topi Miettinen
@ 2016-06-13 19:44 ` Topi Miettinen
  5 siblings, 0 replies; 15+ messages in thread
From: Topi Miettinen @ 2016-06-13 19:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Topi Miettinen, Andrew Morton, Oleg Nesterov, Kirill A. Shutemov,
	Chen Gang, Michal Hocko, Konstantin Khlebnikov, Andrea Arcangeli,
	Andrey Ryabinin, David Rientjes, Vlastimil Babka, Hugh Dickins,
	Laurent Dufour, Alexander Kuleshov, open list:MEMORY MANAGEMENT

Track maximum size of address space, presented in /proc/self/limits.

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 mm/mmap.c   | 4 ++++
 mm/mremap.c | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/mm/mmap.c b/mm/mmap.c
index 4e683dd..4876c21 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2706,6 +2706,9 @@ static int do_brk(unsigned long addr, unsigned long len)
 out:
 	perf_event_mmap(vma);
 	mm->total_vm += len >> PAGE_SHIFT;
+
+	bump_rlimit(RLIMIT_AS, mm->total_vm << PAGE_SHIFT);
+
 	mm->data_vm += len >> PAGE_SHIFT;
 	if (flags & VM_LOCKED)
 		mm->locked_vm += (len >> PAGE_SHIFT);
@@ -2926,6 +2929,7 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
 void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
 {
 	mm->total_vm += npages;
+	bump_rlimit(RLIMIT_AS, mm->total_vm << PAGE_SHIFT);
 
 	if (is_exec_mapping(flags))
 		mm->exec_vm += npages;
diff --git a/mm/mremap.c b/mm/mremap.c
index ade3e13..6be3c01 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -397,6 +397,9 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
 	if (vma->vm_flags & VM_LOCKED)
 		bump_rlimit(RLIMIT_MEMLOCK, (mm->locked_vm << PAGE_SHIFT) +
 			    new_len - old_len);
+	bump_rlimit(RLIMIT_AS, (mm->total_vm << PAGE_SHIFT) +
+		    new_len - old_len);
+
 	return vma;
 }
 
-- 
2.8.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [RFC 12/18] limits: track RLIMIT_MEMLOCK actual max
  2016-06-13 19:44 ` [RFC 12/18] limits: track RLIMIT_MEMLOCK " Topi Miettinen
@ 2016-06-13 20:43   ` Alex Williamson
  2016-06-13 21:17     ` Topi Miettinen
  2016-06-18  0:59   ` Doug Ledford
  1 sibling, 1 reply; 15+ messages in thread
From: Alex Williamson @ 2016-06-13 20:43 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: linux-kernel, Tony Luck, Fenghua Yu, Alexander Graf,
	Paolo Bonzini, Radim Krčmář,
	Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Doug Ledford, Sean Hefty, Hal Rosenstock, Mike Marciniszyn,
	Dennis Dalessandro, Christian Benvenuti, Dave Goodell,
	Sudeep Dutt, Ashutosh Dixit, Ingo Molnar, Peter Zijlstra,
	Alexei Starovoitov, Arnaldo Carvalho de Melo, Alexander Shishkin,
	Andrew Morton, Konstantin Khlebnikov, Jiri Slaby, Cyrill Gorcunov,
	Thomas Gleixner, Dave Hansen, Greg Kroah-Hartman, Dan Carpenter,
	Nikhil Rao, Vlastimil Babka, Kirill A. Shutemov, Michal Hocko,
	Eric B Munson, Alexey Klimov, Andrea Arcangeli,
	Alexander Kuleshov, Oleg Nesterov, Chen Gang, Andrey Ryabinin,
	David Rientjes, Hugh Dickins, Laurent Dufour,
	open list:IA64 Itanium PLATFORM,
	open list:KERNEL VIRTUAL MACHINE KVM FOR POWERPC,
	open list:KERNEL VIRTUAL MACHINE KVM,
	open list:LINUX FOR POWERPC 32-BIT AND 64-BIT,
	open list:INFINIBAND SUBSYSTEM,
	open list:BPF Safe dynamic programs and tools,
	open list:MEMORY MANAGEMENT

On Mon, 13 Jun 2016 22:44:19 +0300
Topi Miettinen <toiwoton@gmail.com> wrote:

> Track maximum size of locked memory, presented in /proc/self/limits.
> 
> Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
> ---
>  arch/ia64/kernel/perfmon.c                 |  1 +
>  arch/powerpc/kvm/book3s_64_vio.c           |  1 +
>  arch/powerpc/mm/mmu_context_iommu.c        |  1 +
>  drivers/infiniband/core/umem.c             |  1 +
>  drivers/infiniband/hw/hfi1/user_pages.c    |  1 +
>  drivers/infiniband/hw/qib/qib_user_pages.c |  1 +
>  drivers/infiniband/hw/usnic/usnic_uiom.c   |  2 ++
>  drivers/misc/mic/scif/scif_rma.c           |  1 +
>  drivers/vfio/vfio_iommu_spapr_tce.c        |  2 ++
>  drivers/vfio/vfio_iommu_type1.c            |  2 ++
>  include/linux/sched.h                      | 10 ++++++++--
>  kernel/bpf/syscall.c                       |  6 ++++++
>  kernel/events/core.c                       |  1 +
>  mm/mlock.c                                 |  7 +++++++
>  mm/mmap.c                                  |  3 +++
>  mm/mremap.c                                |  3 +++
>  16 files changed, 41 insertions(+), 2 deletions(-)
...  
>
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 2ba1942..4c6e7a3 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -312,6 +312,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
>  		}
>  	}
>  
> +	bump_rlimit(RLIMIT_MEMLOCK, (current->mm->locked_vm + i) << PAGE_SHIFT);
> +
>  	if (!rsvd)
>  		vfio_lock_acct(i);
>  


Not all cases passing through here bump rlimit (see: rsvd), there's an
entire case above the other end of this closing bracket that does bump
rlimit but returns before here, and I wonder why we wouldn't just do
this in our vfio_lock_acct() accounting function anyway.  Thanks,

Alex

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC 12/18] limits: track RLIMIT_MEMLOCK actual max
  2016-06-13 20:43   ` Alex Williamson
@ 2016-06-13 21:17     ` Topi Miettinen
  0 siblings, 0 replies; 15+ messages in thread
From: Topi Miettinen @ 2016-06-13 21:17 UTC (permalink / raw)
  To: Alex Williamson
  Cc: linux-kernel, Tony Luck, Fenghua Yu, Alexander Graf,
	Paolo Bonzini, Radim Krčmář,
	Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Doug Ledford, Sean Hefty, Hal Rosenstock, Mike Marciniszyn,
	Dennis Dalessandro, Christian Benvenuti, Dave Goodell,
	Sudeep Dutt, Ashutosh Dixit, Ingo Molnar, Peter Zijlstra,
	Alexei Starovoitov, Arnaldo Carvalho de Melo, Alexander Shishkin,
	Andrew Morton, Konstantin Khlebnikov, Jiri Slaby, Cyrill Gorcunov,
	Thomas Gleixner, Dave Hansen, Greg Kroah-Hartman, Dan Carpenter,
	Nikhil Rao, Vlastimil Babka, Kirill A. Shutemov, Michal Hocko,
	Eric B Munson, Alexey Klimov, Andrea Arcangeli,
	Alexander Kuleshov, Oleg Nesterov, Chen Gang, Andrey Ryabinin,
	David Rientjes, Hugh Dickins, Laurent Dufour,
	open list:IA64 (Itanium) PLATFORM,
	open list:KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC,
	open list:KERNEL VIRTUAL MACHINE (KVM),
	open list:LINUX FOR POWERPC (32-BIT AND 64-BIT),
	open list:INFINIBAND SUBSYSTEM,
	open list:BPF (Safe dynamic programs and tools),
	open list:MEMORY MANAGEMENT

On 06/13/16 20:43, Alex Williamson wrote:
> On Mon, 13 Jun 2016 22:44:19 +0300
> Topi Miettinen <toiwoton@gmail.com> wrote:
> 
>> Track maximum size of locked memory, presented in /proc/self/limits.
>>
>> Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
>> ---
>>  arch/ia64/kernel/perfmon.c                 |  1 +
>>  arch/powerpc/kvm/book3s_64_vio.c           |  1 +
>>  arch/powerpc/mm/mmu_context_iommu.c        |  1 +
>>  drivers/infiniband/core/umem.c             |  1 +
>>  drivers/infiniband/hw/hfi1/user_pages.c    |  1 +
>>  drivers/infiniband/hw/qib/qib_user_pages.c |  1 +
>>  drivers/infiniband/hw/usnic/usnic_uiom.c   |  2 ++
>>  drivers/misc/mic/scif/scif_rma.c           |  1 +
>>  drivers/vfio/vfio_iommu_spapr_tce.c        |  2 ++
>>  drivers/vfio/vfio_iommu_type1.c            |  2 ++
>>  include/linux/sched.h                      | 10 ++++++++--
>>  kernel/bpf/syscall.c                       |  6 ++++++
>>  kernel/events/core.c                       |  1 +
>>  mm/mlock.c                                 |  7 +++++++
>>  mm/mmap.c                                  |  3 +++
>>  mm/mremap.c                                |  3 +++
>>  16 files changed, 41 insertions(+), 2 deletions(-)
> ...  
>>
>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
>> index 2ba1942..4c6e7a3 100644
>> --- a/drivers/vfio/vfio_iommu_type1.c
>> +++ b/drivers/vfio/vfio_iommu_type1.c
>> @@ -312,6 +312,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
>>  		}
>>  	}
>>  
>> +	bump_rlimit(RLIMIT_MEMLOCK, (current->mm->locked_vm + i) << PAGE_SHIFT);
>> +
>>  	if (!rsvd)
>>  		vfio_lock_acct(i);
>>  
> 
> 
> Not all cases passing through here bump rlimit (see: rsvd), there's an
> entire case above the other end of this closing bracket that does bump
> rlimit but returns before here, and I wonder why we wouldn't just do
> this in our vfio_lock_acct() accounting function anyway.  Thanks,

Yes, just make disable_hugepages case go to end of function.

-Topi

> 
> Alex
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2
  2016-06-13 19:44 ` [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2 Topi Miettinen
@ 2016-06-14  7:01   ` Michal Hocko
  2016-06-14 15:47     ` Topi Miettinen
  0 siblings, 1 reply; 15+ messages in thread
From: Michal Hocko @ 2016-06-14  7:01 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: linux-kernel, Johannes Weiner, Vladimir Davydov, Andrew Morton,
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG),
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)

On Mon 13-06-16 22:44:10, Topi Miettinen wrote:
> Present maximum used memory in cgroup memory.current_max.

It would be really much more preferable to present the usecase in the
patch description. It is true that this information is presented in the
v1 API but the current policy is to export new knobs only when there is
a reasonable usecase for it.

> Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
> ---
>  include/linux/page_counter.h |  7 ++++++-
>  mm/memcontrol.c              | 13 +++++++++++++
>  2 files changed, 19 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
> index 7e62920..be4de17 100644
> --- a/include/linux/page_counter.h
> +++ b/include/linux/page_counter.h
> @@ -9,9 +9,9 @@ struct page_counter {
>  	atomic_long_t count;
>  	unsigned long limit;
>  	struct page_counter *parent;
> +	unsigned long watermark;
>  
>  	/* legacy */
> -	unsigned long watermark;
>  	unsigned long failcnt;
>  };
>  
> @@ -34,6 +34,11 @@ static inline unsigned long page_counter_read(struct page_counter *counter)
>  	return atomic_long_read(&counter->count);
>  }
>  
> +static inline unsigned long page_counter_read_watermark(struct page_counter *counter)
> +{
> +	return counter->watermark;
> +}
> +
>  void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
>  void page_counter_charge(struct page_counter *counter, unsigned long nr_pages);
>  bool page_counter_try_charge(struct page_counter *counter,
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 75e7440..5513771 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4966,6 +4966,14 @@ static u64 memory_current_read(struct cgroup_subsys_state *css,
>  	return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE;
>  }
>  
> +static u64 memory_current_max_read(struct cgroup_subsys_state *css,
> +				   struct cftype *cft)
> +{
> +	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
> +
> +	return (u64)page_counter_read_watermark(&memcg->memory) * PAGE_SIZE;
> +}
> +
>  static int memory_low_show(struct seq_file *m, void *v)
>  {
>  	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
> @@ -5179,6 +5187,11 @@ static struct cftype memory_files[] = {
>  		.read_u64 = memory_current_read,
>  	},
>  	{
> +		.name = "current_max",
> +		.flags = CFTYPE_NOT_ON_ROOT,
> +		.read_u64 = memory_current_max_read,
> +	},
> +	{
>  		.name = "low",
>  		.flags = CFTYPE_NOT_ON_ROOT,
>  		.seq_show = memory_low_show,
> -- 
> 2.8.1

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2
  2016-06-14  7:01   ` Michal Hocko
@ 2016-06-14 15:47     ` Topi Miettinen
  2016-06-14 16:04       ` Johannes Weiner
  0 siblings, 1 reply; 15+ messages in thread
From: Topi Miettinen @ 2016-06-14 15:47 UTC (permalink / raw)
  To: Michal Hocko
  Cc: linux-kernel, Johannes Weiner, Vladimir Davydov, Andrew Morton,
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG),
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)

On 06/14/16 07:01, Michal Hocko wrote:
> On Mon 13-06-16 22:44:10, Topi Miettinen wrote:
>> Present maximum used memory in cgroup memory.current_max.
> 
> It would be really much more preferable to present the usecase in the
> patch description. It is true that this information is presented in the
> v1 API but the current policy is to export new knobs only when there is
> a reasonable usecase for it.
> 

This was stated in the cover letter:
https://lkml.org/lkml/2016/6/13/857

"There are many basic ways to control processes, including capabilities,
cgroups and resource limits. However, there are far fewer ways to find out
useful values for the limits, except blind trial and error.

This patch series attempts to fix that by giving at least a nice starting
point from the actual maximum values. I looked where each limit is checked
and added a call to limit bump nearby."

"Cgroups
[RFC 02/18] cgroup_pids: track maximum pids
[RFC 03/18] memcontrol: present maximum used memory also for
[RFC 04/18] device_cgroup: track and present accessed devices

For tasks and memory cgroup limits the situation is somewhat better as the
current tasks and memory status can be easily seen with ps(1). However, any
transient tasks or temporary higher memory use might slip from the view.
Device use may be seen with advanced MAC tools, like TOMOYO, but there is no
universal method. Program sources typically give no useful indication about
memory use or how many tasks there could be."

I can add some of this to the commit message, is that sufficient for you?

>> Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
>> ---
>>  include/linux/page_counter.h |  7 ++++++-
>>  mm/memcontrol.c              | 13 +++++++++++++
>>  2 files changed, 19 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
>> index 7e62920..be4de17 100644
>> --- a/include/linux/page_counter.h
>> +++ b/include/linux/page_counter.h
>> @@ -9,9 +9,9 @@ struct page_counter {
>>  	atomic_long_t count;
>>  	unsigned long limit;
>>  	struct page_counter *parent;
>> +	unsigned long watermark;
>>  
>>  	/* legacy */
>> -	unsigned long watermark;
>>  	unsigned long failcnt;
>>  };
>>  
>> @@ -34,6 +34,11 @@ static inline unsigned long page_counter_read(struct page_counter *counter)
>>  	return atomic_long_read(&counter->count);
>>  }
>>  
>> +static inline unsigned long page_counter_read_watermark(struct page_counter *counter)
>> +{
>> +	return counter->watermark;
>> +}
>> +
>>  void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
>>  void page_counter_charge(struct page_counter *counter, unsigned long nr_pages);
>>  bool page_counter_try_charge(struct page_counter *counter,
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 75e7440..5513771 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -4966,6 +4966,14 @@ static u64 memory_current_read(struct cgroup_subsys_state *css,
>>  	return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE;
>>  }
>>  
>> +static u64 memory_current_max_read(struct cgroup_subsys_state *css,
>> +				   struct cftype *cft)
>> +{
>> +	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
>> +
>> +	return (u64)page_counter_read_watermark(&memcg->memory) * PAGE_SIZE;
>> +}
>> +
>>  static int memory_low_show(struct seq_file *m, void *v)
>>  {
>>  	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
>> @@ -5179,6 +5187,11 @@ static struct cftype memory_files[] = {
>>  		.read_u64 = memory_current_read,
>>  	},
>>  	{
>> +		.name = "current_max",
>> +		.flags = CFTYPE_NOT_ON_ROOT,
>> +		.read_u64 = memory_current_max_read,
>> +	},
>> +	{
>>  		.name = "low",
>>  		.flags = CFTYPE_NOT_ON_ROOT,
>>  		.seq_show = memory_low_show,
>> -- 
>> 2.8.1
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2
  2016-06-14 15:47     ` Topi Miettinen
@ 2016-06-14 16:04       ` Johannes Weiner
  2016-06-14 17:15         ` Topi Miettinen
  0 siblings, 1 reply; 15+ messages in thread
From: Johannes Weiner @ 2016-06-14 16:04 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: Michal Hocko, linux-kernel, Vladimir Davydov, Andrew Morton,
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG),
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)

On Tue, Jun 14, 2016 at 03:47:20PM +0000, Topi Miettinen wrote:
> On 06/14/16 07:01, Michal Hocko wrote:
> > On Mon 13-06-16 22:44:10, Topi Miettinen wrote:
> >> Present maximum used memory in cgroup memory.current_max.
> > 
> > It would be really much more preferable to present the usecase in the
> > patch description. It is true that this information is presented in the
> > v1 API but the current policy is to export new knobs only when there is
> > a reasonable usecase for it.
> > 
> 
> This was stated in the cover letter:
> https://lkml.org/lkml/2016/6/13/857
> 
> "There are many basic ways to control processes, including capabilities,
> cgroups and resource limits. However, there are far fewer ways to find out
> useful values for the limits, except blind trial and error.
> 
> This patch series attempts to fix that by giving at least a nice starting
> point from the actual maximum values. I looked where each limit is checked
> and added a call to limit bump nearby."
> 
> "Cgroups
> [RFC 02/18] cgroup_pids: track maximum pids
> [RFC 03/18] memcontrol: present maximum used memory also for
> [RFC 04/18] device_cgroup: track and present accessed devices
> 
> For tasks and memory cgroup limits the situation is somewhat better as the
> current tasks and memory status can be easily seen with ps(1). However, any
> transient tasks or temporary higher memory use might slip from the view.
> Device use may be seen with advanced MAC tools, like TOMOYO, but there is no
> universal method. Program sources typically give no useful indication about
> memory use or how many tasks there could be."
> 
> I can add some of this to the commit message, is that sufficient for you?

It's useful to have a short summary of the justification in each patch
as well. Other than that it's fine to be broader and more detailed
about your motivation in the coverletter.

I didn't catch the coverletter, though. It makes sense to CC
recipients of any of those patches on the full series, including the
cover, since even though we are specialized in certain areas of the
code, many of us are interested in the whole picture of addressing a
problem, and not just the few bits in our area without more context.

As far as the memcg part of this series goes, one concern is that page
cache is trimmed back only when there is pressure, so in all but very
few cases the high watermark you are introducing will be pegged to the
configured limit. It doesn't give a whole lot of insight.

But there are consumers that are less/not compressible than cache,
such as anonymous memory, unreclaimable slab, maybe socket buffers
etc. Having spikes in those slip through two sampling points is an
issue, indeed. Adding consumer-specific watermarks might be useful.

Thanks

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2
  2016-06-14 16:04       ` Johannes Weiner
@ 2016-06-14 17:15         ` Topi Miettinen
  2016-06-16 10:27           ` Michal Hocko
  0 siblings, 1 reply; 15+ messages in thread
From: Topi Miettinen @ 2016-06-14 17:15 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Michal Hocko, linux-kernel, Vladimir Davydov, Andrew Morton,
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG),
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)

On 06/14/16 16:04, Johannes Weiner wrote:
> On Tue, Jun 14, 2016 at 03:47:20PM +0000, Topi Miettinen wrote:
>> On 06/14/16 07:01, Michal Hocko wrote:
>>> On Mon 13-06-16 22:44:10, Topi Miettinen wrote:
>>>> Present maximum used memory in cgroup memory.current_max.
>>>
>>> It would be really much more preferable to present the usecase in the
>>> patch description. It is true that this information is presented in the
>>> v1 API but the current policy is to export new knobs only when there is
>>> a reasonable usecase for it.
>>>
>>
>> This was stated in the cover letter:
>> https://lkml.org/lkml/2016/6/13/857
>>
>> "There are many basic ways to control processes, including capabilities,
>> cgroups and resource limits. However, there are far fewer ways to find out
>> useful values for the limits, except blind trial and error.
>>
>> This patch series attempts to fix that by giving at least a nice starting
>> point from the actual maximum values. I looked where each limit is checked
>> and added a call to limit bump nearby."
>>
>> "Cgroups
>> [RFC 02/18] cgroup_pids: track maximum pids
>> [RFC 03/18] memcontrol: present maximum used memory also for
>> [RFC 04/18] device_cgroup: track and present accessed devices
>>
>> For tasks and memory cgroup limits the situation is somewhat better as the
>> current tasks and memory status can be easily seen with ps(1). However, any
>> transient tasks or temporary higher memory use might slip from the view.
>> Device use may be seen with advanced MAC tools, like TOMOYO, but there is no
>> universal method. Program sources typically give no useful indication about
>> memory use or how many tasks there could be."
>>
>> I can add some of this to the commit message, is that sufficient for you?
> 
> It's useful to have a short summary of the justification in each patch
> as well. Other than that it's fine to be broader and more detailed
> about your motivation in the coverletter.
> 
> I didn't catch the coverletter, though. It makes sense to CC
> recipients of any of those patches on the full series, including the
> cover, since even though we are specialized in certain areas of the
> code, many of us are interested in the whole picture of addressing a
> problem, and not just the few bits in our area without more context.
> 

Thank you for this nice explanation. I suppose "git send-email
--cc-cmd=scripts/get_maintainer.pl" doesn't do this.

> As far as the memcg part of this series goes, one concern is that page
> cache is trimmed back only when there is pressure, so in all but very
> few cases the high watermark you are introducing will be pegged to the
> configured limit. It doesn't give a whole lot of insight.
> 

So using the high watermark would not give a very useful starting point
for the user who wished to configure the memory limit? What else could
be used instead?

> But there are consumers that are less/not compressible than cache,
> such as anonymous memory, unreclaimable slab, maybe socket buffers
> etc. Having spikes in those slip through two sampling points is an
> issue, indeed. Adding consumer-specific watermarks might be useful.
> 
> Thanks
> 

OK, but there's no limiting or tuning mechanism in place for now for
those, or is there? How could the results be used?

-Topi

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2
  2016-06-14 17:15         ` Topi Miettinen
@ 2016-06-16 10:27           ` Michal Hocko
  0 siblings, 0 replies; 15+ messages in thread
From: Michal Hocko @ 2016-06-16 10:27 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: Johannes Weiner, linux-kernel, Vladimir Davydov, Andrew Morton,
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG),
	open list:CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)

On Tue 14-06-16 17:15:06, Topi Miettinen wrote:
> On 06/14/16 16:04, Johannes Weiner wrote:
[...]
> > I didn't catch the coverletter, though. It makes sense to CC
> > recipients of any of those patches on the full series, including the
> > cover, since even though we are specialized in certain areas of the
> > code, many of us are interested in the whole picture of addressing a
> > problem, and not just the few bits in our area without more context.
> > 
> 
> Thank you for this nice explanation. I suppose "git send-email
> --cc-cmd=scripts/get_maintainer.pl" doesn't do this.

No it doesn't. What I do for this kind of series is the following. Put
an explicit CC (acked, reviews etc...) to each patch. git format-patch
$RANGE and then
$ git send-email --cc-cmd=./cc-cmd-only-cover.sh $DEFAULT_TO_CC --compose *.patch

$ cat cc-cmd-only-cover.sh
#!/bin/bash

if [[ $1 == *gitsendemail.msg* || $1 == *cover-letter* ]]; then
        grep '<.*@.*>' -h *.patch | sed 's/^.*: //' | sort | uniq
fi

A bit error prone because you have to cleanup any previous patch files
from the directory but works more or less well for me.

s 
> > As far as the memcg part of this series goes, one concern is that page
> > cache is trimmed back only when there is pressure, so in all but very
> > few cases the high watermark you are introducing will be pegged to the
> > configured limit. It doesn't give a whole lot of insight.
> > 
> 
> So using the high watermark would not give a very useful starting point
> for the user who wished to configure the memory limit? What else could
> be used instead?

we have an event notification mechanism. In v1 it is vmpressure and v2
you will get a notification when the high/max limit is hit or when we
hit the oom.
-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC 12/18] limits: track RLIMIT_MEMLOCK actual max
  2016-06-13 19:44 ` [RFC 12/18] limits: track RLIMIT_MEMLOCK " Topi Miettinen
  2016-06-13 20:43   ` Alex Williamson
@ 2016-06-18  0:59   ` Doug Ledford
  2016-06-18  7:00     ` Topi Miettinen
  1 sibling, 1 reply; 15+ messages in thread
From: Doug Ledford @ 2016-06-18  0:59 UTC (permalink / raw)
  To: Topi Miettinen, linux-kernel
  Cc: Tony Luck, Fenghua Yu, Alexander Graf, Paolo Bonzini,
	Radim Krčmář, Benjamin Herrenschmidt,
	Paul Mackerras, Michael Ellerman, Sean Hefty, Hal Rosenstock,
	Mike Marciniszyn, Dennis Dalessandro, Christian Benvenuti,
	Dave Goodell, Sudeep Dutt, Ashutosh Dixit, Alex Williamson,
	Ingo Molnar, Peter Zijlstra, Alexei Starovoitov,
	Arnaldo Carvalho de Melo, Alexander Shishkin, Andrew Morton,
	Konstantin Khlebnikov, Jiri Slaby, Cyrill Gorcunov,
	Thomas Gleixner, Dave Hansen, Greg Kroah-Hartman, Dan Carpenter,
	Nikhil Rao, Vlastimil Babka, Kirill A. Shutemov, Michal Hocko,
	Eric B Munson, Alexey Klimov, Andrea Arcangeli,
	Alexander Kuleshov, Oleg Nesterov, Chen Gang, Andrey Ryabinin,
	David Rientjes, Hugh Dickins, Laurent Dufour,
	open list:IA64 (Itanium) PLATFORM,
	open list:KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC,
	open list:KERNEL VIRTUAL MACHINE (KVM),
	open list:LINUX FOR POWERPC (32-BIT AND 64-BIT),
	open list:INFINIBAND SUBSYSTEM,
	open list:BPF (Safe dynamic programs and tools),
	open list:MEMORY MANAGEMENT


[-- Attachment #1.1: Type: text/plain, Size: 4505 bytes --]

On 6/13/2016 3:44 PM, Topi Miettinen wrote:
> Track maximum size of locked memory, presented in /proc/self/limits.

You should have probably Cc:ed everyone on the cover letter and probably
patch 1 of this series.  This patch is hard to decipher without the
additional context of those items.  However, that said, I think I see
what you are doing.  But your wording of your comments below is bad:

> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index feb9bb7..d3f3c9f 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -3378,10 +3378,16 @@ static inline unsigned long rlimit_max(unsigned int limit)
>  	return task_rlimit_max(current, limit);
>  }
>  
> +static inline void task_bump_rlimit(struct task_struct *tsk,
> +				    unsigned int limit, unsigned long r)
> +{
> +	if (READ_ONCE(tsk->signal->rlim_curmax[limit]) < r)
> +		tsk->signal->rlim_curmax[limit] = r;
> +}
> +
>  static inline void bump_rlimit(unsigned int limit, unsigned long r)
>  {
> -	if (READ_ONCE(current->signal->rlim_curmax[limit]) < r)
> -		current->signal->rlim_curmax[limit] = r;
> +	return task_bump_rlimit(current, limit, r);
>  }
>  
>  #ifdef CONFIG_CPU_FREQ
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 46ecce4..192001e 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -76,6 +76,9 @@ static int bpf_map_charge_memlock(struct bpf_map *map)
>  		return -EPERM;
>  	}
>  	map->user = user;
> +	/* XXX resource limits apply per task, not per user */
> +	bump_rlimit(RLIMIT_MEMLOCK, atomic_long_read(&user->locked_vm) <<
> +		    PAGE_SHIFT);

No, these resource limits do not apply per task.  They are per user.
However, you are doing maximum  usage accounting on a per-task basis by
adding a new counter to the signal struct of the task.  Fine, but your
comments need to reflect that instead of the confusing comment above.
In addition, your function name is horrible for what you are doing.  A
person reading this function will think that you are bumping the actual
rlimit on the task, which is not what you are doing.  You are performing
per-task accounting of MEMLOCK memory.  The actual permission checks are
per-user, and the primary accounting is per-user.  So, really, this is
just a nice little feature that provides a more granular per-task usage
(but not control) so a user can see where their overall memlock memory
is being used.  Fine.  I would reword the comment something like this:

/* XXX resource is tracked and limit enforced on a per user basis,
   but we track it on a per-task basis as well so users can identify
   hogs of this resource, stats can be found in /proc/<pid>/limits */

And I would rename bump_rlimit and task_bump_rlimit to something like
account_rlimit and task_account_rlimit.  Calling it bump just gives the
wrong idea entirely on first read.

>  	return 0;
>  }
>  
> @@ -601,6 +604,9 @@ static int bpf_prog_charge_memlock(struct bpf_prog *prog)
>  		return -EPERM;
>  	}
>  	prog->aux->user = user;
> +	/* XXX resource limits apply per task, not per user */
> +	bump_rlimit(RLIMIT_MEMLOCK, atomic_long_read(&user->locked_vm) <<
> +		    PAGE_SHIFT);
>  	return 0;
>  }

> @@ -798,6 +802,9 @@ int user_shm_lock(size_t size, struct user_struct *user)
>  	get_uid(user);
>  	user->locked_shm += locked;
>  	allowed = 1;
> +
> +	/* XXX resource limits apply per task, not per user */
> +	bump_rlimit(RLIMIT_MEMLOCK, user->locked_shm << PAGE_SHIFT);
>  out:
>  	spin_unlock(&shmlock_user_lock);
>  	return allowed;
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 0963e7f..4e683dd 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2020,6 +2020,9 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
>  		return -ENOMEM;
>  
>  	bump_rlimit(RLIMIT_STACK, actual_size);
> +	if (vma->vm_flags & VM_LOCKED)
> +		bump_rlimit(RLIMIT_MEMLOCK,
> +			    (mm->locked_vm + grow) << PAGE_SHIFT);
>  
>  	return 0;
>  }
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 1f157ad..ade3e13 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -394,6 +394,9 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
>  		*p = charged;
>  	}
>  
> +	if (vma->vm_flags & VM_LOCKED)
> +		bump_rlimit(RLIMIT_MEMLOCK, (mm->locked_vm << PAGE_SHIFT) +
> +			    new_len - old_len);
>  	return vma;
>  }
>  
> 


-- 
Doug Ledford <dledford@redhat.com>
    GPG Key ID: 0E572FDD


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 884 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC 12/18] limits: track RLIMIT_MEMLOCK actual max
  2016-06-18  0:59   ` Doug Ledford
@ 2016-06-18  7:00     ` Topi Miettinen
  0 siblings, 0 replies; 15+ messages in thread
From: Topi Miettinen @ 2016-06-18  7:00 UTC (permalink / raw)
  To: Doug Ledford, linux-kernel
  Cc: Tony Luck, Fenghua Yu, Alexander Graf, Paolo Bonzini,
	Radim Krčmář, Benjamin Herrenschmidt,
	Paul Mackerras, Michael Ellerman, Sean Hefty, Hal Rosenstock,
	Mike Marciniszyn, Dennis Dalessandro, Christian Benvenuti,
	Dave Goodell, Sudeep Dutt, Ashutosh Dixit, Alex Williamson,
	Ingo Molnar, Peter Zijlstra, Alexei Starovoitov,
	Arnaldo Carvalho de Melo, Alexander Shishkin, Andrew Morton,
	Konstantin Khlebnikov, Jiri Slaby, Cyrill Gorcunov,
	Thomas Gleixner, Dave Hansen, Greg Kroah-Hartman, Dan Carpenter,
	Nikhil Rao, Vlastimil Babka, Kirill A. Shutemov, Michal Hocko,
	Eric B Munson, Alexey Klimov, Andrea Arcangeli,
	Alexander Kuleshov, Oleg Nesterov, Chen Gang, Andrey Ryabinin,
	David Rientjes, Hugh Dickins, Laurent Dufour,
	open list:IA64 (Itanium) PLATFORM,
	open list:KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC,
	open list:KERNEL VIRTUAL MACHINE (KVM),
	open list:LINUX FOR POWERPC (32-BIT AND 64-BIT),
	open list:INFINIBAND SUBSYSTEM,
	open list:BPF (Safe dynamic programs and tools),
	open list:MEMORY MANAGEMENT

On 06/18/16 00:59, Doug Ledford wrote:
> On 6/13/2016 3:44 PM, Topi Miettinen wrote:
>> Track maximum size of locked memory, presented in /proc/self/limits.
> 
> You should have probably Cc:ed everyone on the cover letter and probably
> patch 1 of this series.  This patch is hard to decipher without the
> additional context of those items.  However, that said, I think I see

Yes, I didn't know to CC everybody involved, sorry about that.

> what you are doing.  But your wording of your comments below is bad:
> 
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index feb9bb7..d3f3c9f 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -3378,10 +3378,16 @@ static inline unsigned long rlimit_max(unsigned int limit)
>>  	return task_rlimit_max(current, limit);
>>  }
>>  
>> +static inline void task_bump_rlimit(struct task_struct *tsk,
>> +				    unsigned int limit, unsigned long r)
>> +{
>> +	if (READ_ONCE(tsk->signal->rlim_curmax[limit]) < r)
>> +		tsk->signal->rlim_curmax[limit] = r;
>> +}
>> +
>>  static inline void bump_rlimit(unsigned int limit, unsigned long r)
>>  {
>> -	if (READ_ONCE(current->signal->rlim_curmax[limit]) < r)
>> -		current->signal->rlim_curmax[limit] = r;
>> +	return task_bump_rlimit(current, limit, r);
>>  }
>>  
>>  #ifdef CONFIG_CPU_FREQ
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index 46ecce4..192001e 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -76,6 +76,9 @@ static int bpf_map_charge_memlock(struct bpf_map *map)
>>  		return -EPERM;
>>  	}
>>  	map->user = user;
>> +	/* XXX resource limits apply per task, not per user */
>> +	bump_rlimit(RLIMIT_MEMLOCK, atomic_long_read(&user->locked_vm) <<
>> +		    PAGE_SHIFT);
> 
> No, these resource limits do not apply per task.  They are per user.

The problem could be that the manual pages do not say that but more to
the opposite direction. For example, setrlimit(2) says that some limits
(RLIMIT_MEMLOCK only for SHML_LOCK and others like RLIMIT_MSGQUEUE)
apply indeed per user but others are per process. This note in mlock(2)
could be also easily read as specifying a per process limit:

"Since Linux 2.6.9, no limits are placed on the amount of memory that
       a privileged process can lock and the RLIMIT_MEMLOCK soft resource
       limit instead defines a limit on how much memory an unprivileged
       process may lock."

It's also confusing (to me, at least) that the limit values are stored
in per task structures, so the actual limits can be different for each
process for the same user.

The limits are also sometimes compared to per task
current->mm->pinned_vm, in other places to current->mm->locked_vm and in
still other places to per user user->locked_vm. How can the same limit
apply to all of them at the same time? I'd think the user can actually
lock many times the limit because of this.

Anyway, assuming that the actual implementation is always correct and
unchangeable due to ABI stability reasons, it's useless to add XXX
comments like I did.

> However, you are doing maximum  usage accounting on a per-task basis by
> adding a new counter to the signal struct of the task.  Fine, but your
> comments need to reflect that instead of the confusing comment above.
> In addition, your function name is horrible for what you are doing.  A
> person reading this function will think that you are bumping the actual
> rlimit on the task, which is not what you are doing.  You are performing
> per-task accounting of MEMLOCK memory.  The actual permission checks are
> per-user, and the primary accounting is per-user.  So, really, this is
> just a nice little feature that provides a more granular per-task usage
> (but not control) so a user can see where their overall memlock memory
> is being used.  Fine.  I would reword the comment something like this:
> 
> /* XXX resource is tracked and limit enforced on a per user basis,
>    but we track it on a per-task basis as well so users can identify
>    hogs of this resource, stats can be found in /proc/<pid>/limits */
> 
> And I would rename bump_rlimit and task_bump_rlimit to something like
> account_rlimit and task_account_rlimit.  Calling it bump just gives the
> wrong idea entirely on first read.

Right, others have also proposed better names.

-Topi

> 
>>  	return 0;
>>  }
>>  
>> @@ -601,6 +604,9 @@ static int bpf_prog_charge_memlock(struct bpf_prog *prog)
>>  		return -EPERM;
>>  	}
>>  	prog->aux->user = user;
>> +	/* XXX resource limits apply per task, not per user */
>> +	bump_rlimit(RLIMIT_MEMLOCK, atomic_long_read(&user->locked_vm) <<
>> +		    PAGE_SHIFT);
>>  	return 0;
>>  }
> 
>> @@ -798,6 +802,9 @@ int user_shm_lock(size_t size, struct user_struct *user)
>>  	get_uid(user);
>>  	user->locked_shm += locked;
>>  	allowed = 1;
>> +
>> +	/* XXX resource limits apply per task, not per user */
>> +	bump_rlimit(RLIMIT_MEMLOCK, user->locked_shm << PAGE_SHIFT);
>>  out:
>>  	spin_unlock(&shmlock_user_lock);
>>  	return allowed;
>> diff --git a/mm/mmap.c b/mm/mmap.c
>> index 0963e7f..4e683dd 100644
>> --- a/mm/mmap.c
>> +++ b/mm/mmap.c
>> @@ -2020,6 +2020,9 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
>>  		return -ENOMEM;
>>  
>>  	bump_rlimit(RLIMIT_STACK, actual_size);
>> +	if (vma->vm_flags & VM_LOCKED)
>> +		bump_rlimit(RLIMIT_MEMLOCK,
>> +			    (mm->locked_vm + grow) << PAGE_SHIFT);
>>  
>>  	return 0;
>>  }
>> diff --git a/mm/mremap.c b/mm/mremap.c
>> index 1f157ad..ade3e13 100644
>> --- a/mm/mremap.c
>> +++ b/mm/mremap.c
>> @@ -394,6 +394,9 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
>>  		*p = charged;
>>  	}
>>  
>> +	if (vma->vm_flags & VM_LOCKED)
>> +		bump_rlimit(RLIMIT_MEMLOCK, (mm->locked_vm << PAGE_SHIFT) +
>> +			    new_len - old_len);
>>  	return vma;
>>  }
>>  
>>
> 
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2016-06-18  7:00 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
2016-06-13 19:44 ` [RFC 03/18] memcontrol: present maximum used memory also for cgroup-v2 Topi Miettinen
2016-06-14  7:01   ` Michal Hocko
2016-06-14 15:47     ` Topi Miettinen
2016-06-14 16:04       ` Johannes Weiner
2016-06-14 17:15         ` Topi Miettinen
2016-06-16 10:27           ` Michal Hocko
2016-06-13 19:44 ` [RFC 07/18] limits: track RLIMIT_FSIZE actual max Topi Miettinen
2016-06-13 19:44 ` [RFC 08/18] limits: track RLIMIT_DATA " Topi Miettinen
2016-06-13 19:44 ` [RFC 10/18] limits: track RLIMIT_STACK " Topi Miettinen
2016-06-13 19:44 ` [RFC 12/18] limits: track RLIMIT_MEMLOCK " Topi Miettinen
2016-06-13 20:43   ` Alex Williamson
2016-06-13 21:17     ` Topi Miettinen
2016-06-18  0:59   ` Doug Ledford
2016-06-18  7:00     ` Topi Miettinen
2016-06-13 19:44 ` [RFC 13/18] limits: track RLIMIT_AS " Topi Miettinen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).