linux-api.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org,
	tools@kernel.org, Sasha Levin <sashal@kernel.org>
Subject: [RFC 11/19] mm/mlock: add API specification for mlock2
Date: Sat, 14 Jun 2025 09:48:50 -0400	[thread overview]
Message-ID: <20250614134858.790460-12-sashal@kernel.org> (raw)
In-Reply-To: <20250614134858.790460-1-sashal@kernel.org>

Add kernel API specification for the mlock2() system call.

Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 mm/mlock.c | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)

diff --git a/mm/mlock.c b/mm/mlock.c
index a37102df54b01..af2ab78acc226 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -768,6 +768,154 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
 	return do_mlock(start, len, VM_LOCKED);
 }
 
+
+DEFINE_KERNEL_API_SPEC(sys_mlock2)
+	KAPI_DESCRIPTION("Lock pages in memory with flags")
+	KAPI_LONG_DESC("Enhanced version of mlock() that supports flags. "
+		       "MLOCK_ONFAULT flag allows locking pages on fault rather than immediately.")
+	KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE)
+
+	/* Parameters */
+	KAPI_PARAM(0, "start", "unsigned long", "Starting address of memory range to lock")
+		KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+		.type = KAPI_TYPE_UINT,
+		.constraint_type = KAPI_CONSTRAINT_NONE,
+		.constraints = "Rounded down to page boundary",
+	KAPI_PARAM_END
+
+	KAPI_PARAM(1, "len", "size_t", "Length of memory range to lock in bytes")
+		KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+		.type = KAPI_TYPE_UINT,
+		.constraint_type = KAPI_CONSTRAINT_RANGE,
+		KAPI_PARAM_RANGE(0, LONG_MAX)
+		.constraints = "Rounded up to page boundary",
+	KAPI_PARAM_END
+
+	KAPI_PARAM(2, "flags", "int", "Flags controlling lock behavior")
+		KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+		.type = KAPI_TYPE_INT,
+		.constraint_type = KAPI_CONSTRAINT_MASK,
+		.valid_mask = MLOCK_ONFAULT,
+		.constraints = "Only MLOCK_ONFAULT flag is currently supported",
+	KAPI_PARAM_END
+
+	/* Return specification */
+	KAPI_RETURN("long", "0 on success, negative error code on failure")
+		.type = KAPI_TYPE_INT,
+		.check_type = KAPI_RETURN_ERROR_CHECK,
+		.success_value = 0,
+	KAPI_RETURN_END
+
+	/* Error codes */
+	KAPI_ERROR(0, -EINVAL, "EINVAL", "Invalid flags", "Unknown flags were specified (flags & ~MLOCK_ONFAULT).")
+	KAPI_ERROR(1, -ENOMEM, "ENOMEM", "Address range issue", "Some of the specified range is not mapped, has unmapped gaps, or the lock would cause the number of mapped regions to exceed the limit.")
+	KAPI_ERROR(2, -EPERM, "EPERM", "Insufficient privileges", "The caller is not privileged (no CAP_IPC_LOCK) and RLIMIT_MEMLOCK is 0.")
+	KAPI_ERROR(3, -EAGAIN, "EAGAIN", "Some or all memory could not be locked", "Some or all of the specified address range could not be locked.")
+	KAPI_ERROR(4, -EINTR, "EINTR", "Interrupted by signal", "The operation was interrupted by a fatal signal before completion.")
+
+	/* Signal specifications */
+	KAPI_SIGNAL(0, 0, "FATAL_SIGNALS", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_RETURN)
+		KAPI_SIGNAL_CONDITION("Fatal signal pending during mmap_write_lock_killable")
+		KAPI_SIGNAL_DESC("Fatal signals (SIGKILL, SIGTERM, etc.) can interrupt the operation when acquiring mmap_write_lock_killable(), causing -EINTR return")
+		KAPI_SIGNAL_RESTARTABLE
+	KAPI_SIGNAL_END
+
+	KAPI_SIGNAL(1, SIGBUS, "SIGBUS", KAPI_SIGNAL_SEND, KAPI_SIGNAL_ACTION_DEFAULT)
+		KAPI_SIGNAL_TARGET("Current process")
+		KAPI_SIGNAL_CONDITION("Memory access to locked page fails")
+		KAPI_SIGNAL_DESC("Can be generated if accessing a locked page that cannot be brought into memory (e.g., truncated file mapping)")
+	KAPI_SIGNAL_END
+
+	/* Side effects */
+	KAPI_SIDE_EFFECT(0, KAPI_EFFECT_MODIFY_STATE | KAPI_EFFECT_ALLOC_MEMORY,
+			 "process memory",
+			 "Locks pages into physical memory, preventing swapping")
+		KAPI_EFFECT_REVERSIBLE
+		KAPI_EFFECT_CONDITION("Pages become resident in RAM")
+	KAPI_SIDE_EFFECT_END
+
+	KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE,
+			 "mm->locked_vm",
+			 "Increases process locked memory counter")
+		KAPI_EFFECT_REVERSIBLE
+		KAPI_EFFECT_CONDITION("Counted against RLIMIT_MEMLOCK")
+	KAPI_SIDE_EFFECT_END
+
+	KAPI_SIDE_EFFECT(2, KAPI_EFFECT_ALLOC_MEMORY,
+			 "page tables",
+			 "May allocate and populate page table entries")
+		KAPI_EFFECT_CONDITION("Pages not already present")
+	KAPI_SIDE_EFFECT_END
+
+	KAPI_SIDE_EFFECT(3, KAPI_EFFECT_MODIFY_STATE,
+			 "VMA flags",
+			 "Sets VM_LOCKED and optionally VM_LOCKONFAULT on affected VMAs")
+		KAPI_EFFECT_REVERSIBLE
+	KAPI_SIDE_EFFECT_END
+
+	KAPI_SIDE_EFFECT(4, KAPI_EFFECT_FILESYSTEM,
+			 "page fault behavior",
+			 "With MLOCK_ONFAULT, changes how future page faults are handled")
+		KAPI_EFFECT_CONDITION("MLOCK_ONFAULT flag specified")
+	KAPI_SIDE_EFFECT_END
+
+	/* State transitions */
+	KAPI_STATE_TRANS(0, "memory pages",
+			 "swappable", "locked in RAM",
+			 "Pages become non-swappable and pinned in physical memory")
+		KAPI_STATE_TRANS_COND("Without MLOCK_ONFAULT")
+	KAPI_STATE_TRANS_END
+
+	KAPI_STATE_TRANS(1, "VMA flags",
+			 "unlocked", "VM_LOCKED set",
+			 "Virtual memory area marked as locked")
+	KAPI_STATE_TRANS_END
+
+	KAPI_STATE_TRANS(2, "VMA flags",
+			 "normal fault", "VM_LOCKONFAULT set",
+			 "VMA marked to lock pages on future faults")
+		KAPI_STATE_TRANS_COND("MLOCK_ONFAULT flag specified")
+	KAPI_STATE_TRANS_END
+
+	KAPI_STATE_TRANS(3, "page residency",
+			 "may be swapped", "resident in memory",
+			 "Pages brought into RAM and kept there")
+		KAPI_STATE_TRANS_COND("Without MLOCK_ONFAULT")
+	KAPI_STATE_TRANS_END
+
+	KAPI_STATE_TRANS(4, "process statistics",
+			 "normal memory accounting", "locked memory accounting",
+			 "Memory counted against RLIMIT_MEMLOCK")
+	KAPI_STATE_TRANS_END
+
+	/* Locking information */
+	KAPI_LOCK(0, "mmap_lock", KAPI_LOCK_RWLOCK)
+		KAPI_LOCK_DESC("Process memory map write lock")
+		KAPI_LOCK_ACQUIRED
+		KAPI_LOCK_RELEASED
+		KAPI_LOCK_DESC("Protects VMA modifications during lock operation")
+	KAPI_LOCK_END
+
+	KAPI_LOCK(1, "lru_lock", KAPI_LOCK_SPINLOCK)
+		KAPI_LOCK_DESC("Per-memcg LRU list lock")
+		KAPI_LOCK_ACQUIRED
+		KAPI_LOCK_RELEASED
+		KAPI_LOCK_DESC("Taken when moving pages to unevictable list when locking pages")
+	KAPI_LOCK_END
+
+	.error_count = 5,
+	.param_count = 3,
+	.since_version = "4.4",
+	.signal_count = 2,
+	.side_effect_count = 5,
+	.state_trans_count = 5,
+	.lock_count = 2,
+	.examples = "mlock2(addr, len, 0);            // Same as mlock()\n"
+		    "mlock2(addr, len, MLOCK_ONFAULT); // Lock on fault",
+	.notes = "MLOCK_ONFAULT flag defers actual page locking until pages are accessed. "
+		 "Memory locks do not stack. Locks are not inherited by child processes.",
+KAPI_END_SPEC;
+
 SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags)
 {
 	vm_flags_t vm_flags = VM_LOCKED;
-- 
2.39.5


  parent reply	other threads:[~2025-06-14 13:49 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-14 13:48 [RFC 00/19] Kernel API Specification Framework Sasha Levin
2025-06-14 13:48 ` [RFC 01/19] kernel/api: introduce kernel API specification framework Sasha Levin
2025-06-14 13:48 ` [RFC 02/19] eventpoll: add API specification for epoll_create1 Sasha Levin
2025-06-14 13:48 ` [RFC 03/19] eventpoll: add API specification for epoll_create Sasha Levin
2025-06-14 13:48 ` [RFC 04/19] eventpoll: add API specification for epoll_ctl Sasha Levin
2025-06-14 13:48 ` [RFC 05/19] eventpoll: add API specification for epoll_wait Sasha Levin
2025-06-14 13:48 ` [RFC 06/19] eventpoll: add API specification for epoll_pwait Sasha Levin
2025-06-14 13:48 ` [RFC 07/19] eventpoll: add API specification for epoll_pwait2 Sasha Levin
2025-06-14 13:48 ` [RFC 08/19] exec: add API specification for execve Sasha Levin
2025-06-16 21:39   ` Florian Weimer
2025-06-17  1:51     ` Sasha Levin
2025-06-17  7:13       ` Florian Weimer
2025-06-17 22:58         ` Sasha Levin
2025-06-14 13:48 ` [RFC 09/19] exec: add API specification for execveat Sasha Levin
2025-06-14 13:48 ` [RFC 10/19] mm/mlock: add API specification for mlock Sasha Levin
2025-06-14 13:48 ` Sasha Levin [this message]
2025-06-14 13:48 ` [RFC 12/19] mm/mlock: add API specification for mlockall Sasha Levin
2025-06-14 13:48 ` [RFC 13/19] mm/mlock: add API specification for munlock Sasha Levin
2025-06-14 13:48 ` [RFC 14/19] mm/mlock: add API specification for munlockall Sasha Levin
2025-06-14 13:48 ` [RFC 15/19] kernel/api: add debugfs interface for kernel API specifications Sasha Levin
2025-06-14 13:48 ` [RFC 16/19] kernel/api: add IOCTL specification infrastructure Sasha Levin
2025-06-14 13:48 ` [RFC 17/19] fwctl: add detailed IOCTL API specifications Sasha Levin
2025-06-14 13:48 ` [RFC 18/19] binder: " Sasha Levin
2025-06-14 13:48 ` [RFC 19/19] tools/kapi: Add kernel API specification extraction tool Sasha Levin
2025-06-17 12:08 ` [RFC 00/19] Kernel API Specification Framework David Laight
2025-06-18 21:29 ` Kees Cook
2025-06-19  0:22   ` Sasha Levin
2025-06-23 13:28     ` Dmitry Vyukov
2025-06-24 14:06       ` Cyril Hrubis
2025-06-24 14:30         ` Dmitry Vyukov
2025-06-24 15:27           ` Cyril Hrubis
2025-06-24 20:04       ` Sasha Levin
2025-06-25  8:49         ` Dmitry Vyukov
2025-06-25  8:52         ` Dmitry Vyukov
2025-06-25 15:46           ` Cyril Hrubis
2025-06-25 15:55           ` Sasha Levin
2025-06-26  8:32             ` Dmitry Vyukov
2025-06-26  8:37               ` Dmitry Vyukov
2025-06-26 16:23                 ` Sasha Levin
2025-06-27  6:23                   ` Dmitry Vyukov
2025-06-30 14:27                     ` Sasha Levin
2025-07-01  6:11                       ` Dmitry Vyukov
2025-06-25  8:56         ` Dmitry Vyukov
2025-06-25 16:23           ` Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250614134858.790460-12-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tools@kernel.org \
    --cc=workflows@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).