From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: linux-doc@vger.kernel.org, linux-api@vger.kernel.org,
workflows@vger.kernel.org, tools@kernel.org,
Sasha Levin <sashal@kernel.org>
Subject: [RFC v2 11/22] mm/mlock: add API specification for mlock2
Date: Tue, 24 Jun 2025 14:07:31 -0400 [thread overview]
Message-ID: <20250624180742.5795-12-sashal@kernel.org> (raw)
In-Reply-To: <20250624180742.5795-1-sashal@kernel.org>
Add kernel API specification for the mlock2() system call.
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
mm/mlock.c | 163 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 163 insertions(+)
diff --git a/mm/mlock.c b/mm/mlock.c
index b97768b1cfa60..869c6ba0a7ec8 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -805,6 +805,169 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
return do_mlock(start, len, VM_LOCKED);
}
+
+DEFINE_KERNEL_API_SPEC(sys_mlock2)
+ KAPI_DESCRIPTION("Lock pages in memory with flags")
+ KAPI_LONG_DESC("Enhanced version of mlock() that supports flags. "
+ "MLOCK_ONFAULT flag allows locking pages on fault rather than immediately.")
+ KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE)
+
+ /* Parameters */
+ KAPI_PARAM(0, "start", "unsigned long", "Starting address of memory range to lock")
+ KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+ KAPI_PARAM_TYPE(KAPI_TYPE_UINT)
+ KAPI_PARAM_CONSTRAINT_TYPE(KAPI_CONSTRAINT_NONE)
+ KAPI_PARAM_CONSTRAINT("Rounded down to page boundary")
+ KAPI_PARAM_END
+
+ KAPI_PARAM(1, "len", "size_t", "Length of memory range to lock in bytes")
+ KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+ KAPI_PARAM_TYPE(KAPI_TYPE_UINT)
+ KAPI_PARAM_CONSTRAINT_TYPE(KAPI_CONSTRAINT_RANGE)
+ KAPI_PARAM_RANGE(0, LONG_MAX)
+ KAPI_PARAM_CONSTRAINT("Rounded up to page boundary")
+ KAPI_PARAM_END
+
+ KAPI_PARAM(2, "flags", "int", "Flags controlling lock behavior")
+ KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+ KAPI_PARAM_TYPE(KAPI_TYPE_INT)
+ KAPI_PARAM_CONSTRAINT_TYPE(KAPI_CONSTRAINT_MASK)
+ KAPI_PARAM_VALID_MASK(MLOCK_ONFAULT)
+ KAPI_PARAM_CONSTRAINT("Only MLOCK_ONFAULT flag is currently supported")
+ KAPI_PARAM_END
+
+ /* Return specification */
+ KAPI_RETURN("long", "0 on success, negative error code on failure")
+ KAPI_RETURN_TYPE(KAPI_TYPE_INT)
+ .check_type = KAPI_RETURN_ERROR_CHECK,
+ .success_value = 0,
+ KAPI_RETURN_END
+
+ /* Error codes */
+ KAPI_ERROR(0, -EINVAL, "EINVAL", "Invalid flags", "Unknown flags were specified (flags & ~MLOCK_ONFAULT).")
+ KAPI_ERROR(1, -ENOMEM, "ENOMEM", "Address range issue", "Some of the specified range is not mapped, has unmapped gaps, or the lock would cause the number of mapped regions to exceed the limit.")
+ KAPI_ERROR(2, -EPERM, "EPERM", "Insufficient privileges", "The caller is not privileged (no CAP_IPC_LOCK) and RLIMIT_MEMLOCK is 0.")
+ KAPI_ERROR(3, -EAGAIN, "EAGAIN", "Some or all memory could not be locked", "Some or all of the specified address range could not be locked.")
+ KAPI_ERROR(4, -EINTR, "EINTR", "Interrupted by signal", "The operation was interrupted by a fatal signal before completion.")
+
+ /* Signal specifications */
+ KAPI_SIGNAL(0, 0, "FATAL_SIGNALS", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_RETURN)
+ KAPI_SIGNAL_CONDITION("Fatal signal pending during mmap_write_lock_killable")
+ KAPI_SIGNAL_DESC("Fatal signals (SIGKILL, SIGTERM, etc.) can interrupt the operation when acquiring mmap_write_lock_killable(), causing -EINTR return")
+ KAPI_SIGNAL_RESTARTABLE
+ KAPI_SIGNAL_END
+
+ KAPI_SIGNAL(1, SIGBUS, "SIGBUS", KAPI_SIGNAL_SEND, KAPI_SIGNAL_ACTION_DEFAULT)
+ KAPI_SIGNAL_TARGET("Current process")
+ KAPI_SIGNAL_CONDITION("Memory access to locked page fails")
+ KAPI_SIGNAL_DESC("Can be generated if accessing a locked page that cannot be brought into memory (e.g., truncated file mapping)")
+ KAPI_SIGNAL_END
+
+ /* Side effects */
+ KAPI_SIDE_EFFECT(0, KAPI_EFFECT_MODIFY_STATE | KAPI_EFFECT_ALLOC_MEMORY,
+ "process memory",
+ "Locks pages into physical memory, preventing swapping")
+ KAPI_EFFECT_REVERSIBLE
+ KAPI_EFFECT_CONDITION("Pages become resident in RAM")
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE,
+ "mm->locked_vm",
+ "Increases process locked memory counter")
+ KAPI_EFFECT_REVERSIBLE
+ KAPI_EFFECT_CONDITION("Counted against RLIMIT_MEMLOCK")
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(2, KAPI_EFFECT_ALLOC_MEMORY,
+ "page tables",
+ "May allocate and populate page table entries")
+ KAPI_EFFECT_CONDITION("Pages not already present")
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(3, KAPI_EFFECT_MODIFY_STATE,
+ "VMA flags",
+ "Sets VM_LOCKED and optionally VM_LOCKONFAULT on affected VMAs")
+ KAPI_EFFECT_REVERSIBLE
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(4, KAPI_EFFECT_FILESYSTEM,
+ "page fault behavior",
+ "With MLOCK_ONFAULT, changes how future page faults are handled")
+ KAPI_EFFECT_CONDITION("MLOCK_ONFAULT flag specified")
+ KAPI_SIDE_EFFECT_END
+
+ /* State transitions */
+ KAPI_STATE_TRANS(0, "memory pages",
+ "swappable", "locked in RAM",
+ "Pages become non-swappable and pinned in physical memory")
+ KAPI_STATE_TRANS_COND("Without MLOCK_ONFAULT")
+ KAPI_STATE_TRANS_END
+
+ KAPI_STATE_TRANS(1, "VMA flags",
+ "unlocked", "VM_LOCKED set",
+ "Virtual memory area marked as locked")
+ KAPI_STATE_TRANS_END
+
+ KAPI_STATE_TRANS(2, "VMA flags",
+ "normal fault", "VM_LOCKONFAULT set",
+ "VMA marked to lock pages on future faults")
+ KAPI_STATE_TRANS_COND("MLOCK_ONFAULT flag specified")
+ KAPI_STATE_TRANS_END
+
+ KAPI_STATE_TRANS(3, "page residency",
+ "may be swapped", "resident in memory",
+ "Pages brought into RAM and kept there")
+ KAPI_STATE_TRANS_COND("Without MLOCK_ONFAULT")
+ KAPI_STATE_TRANS_END
+
+ KAPI_STATE_TRANS(4, "process statistics",
+ "normal memory accounting", "locked memory accounting",
+ "Memory counted against RLIMIT_MEMLOCK")
+ KAPI_STATE_TRANS_END
+
+ /* Locking information */
+ KAPI_LOCK(0, "mmap_lock", KAPI_LOCK_RWLOCK)
+ KAPI_LOCK_DESC("Process memory map write lock")
+ KAPI_LOCK_ACQUIRED
+ KAPI_LOCK_RELEASED
+ KAPI_LOCK_DESC("Protects VMA modifications during lock operation")
+ KAPI_LOCK_END
+
+ KAPI_LOCK(1, "lru_lock", KAPI_LOCK_SPINLOCK)
+ KAPI_LOCK_DESC("Per-memcg LRU list lock")
+ KAPI_LOCK_ACQUIRED
+ KAPI_LOCK_RELEASED
+ KAPI_LOCK_DESC("Taken when moving pages to unevictable list when locking pages")
+ KAPI_LOCK_END
+
+ KAPI_ERROR_COUNT(5)
+ KAPI_PARAM_COUNT(3)
+ KAPI_SINCE_VERSION("4.4")
+ KAPI_SIGNAL_COUNT(2)
+ KAPI_SIDE_EFFECT_COUNT(5)
+ KAPI_STATE_TRANS_COUNT(5)
+ KAPI_LOCK_COUNT(2)
+
+ /* Capability specifications */
+ KAPI_CAPABILITY(0, CAP_IPC_LOCK, "CAP_IPC_LOCK", KAPI_CAP_BYPASS_CHECK)
+ KAPI_CAP_ALLOWS("Lock unlimited amount of memory (no RLIMIT_MEMLOCK enforcement)")
+ KAPI_CAP_WITHOUT("Must respect RLIMIT_MEMLOCK resource limit")
+ KAPI_CAP_CONDITION("Checked when RLIMIT_MEMLOCK is 0 or locking would exceed limit")
+ KAPI_CAP_PRIORITY(0)
+ KAPI_CAPABILITY_END
+
+ KAPI_CAPABILITY_COUNT(1)
+
+ KAPI_EXAMPLES("mlock2(addr, len, 0); // Same as mlock()\n"
+ "mlock2(addr, len, MLOCK_ONFAULT); // Lock on fault")
+ KAPI_NOTES("MLOCK_ONFAULT flag defers actual page locking until pages are accessed. "
+ "Memory locks do not stack. Locks are not inherited by child processes. "
+ "Commonly used by real-time applications to prevent page faults. Also used "
+ "for security to prevent sensitive data (e.g., cryptographic keys) from being "
+ "written to swap. Note: locked pages may still be saved to swap during "
+ "system suspend/hibernate.")
+KAPI_END_SPEC;
+
SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags)
{
vm_flags_t vm_flags = VM_LOCKED;
--
2.39.5
next prev parent reply other threads:[~2025-06-24 18:07 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-24 18:07 [RFC v2 00/22] Kernel API specification framework Sasha Levin
2025-06-24 18:07 ` [RFC v2 01/22] kernel/api: introduce kernel " Sasha Levin
2025-06-30 19:53 ` Jonathan Corbet
2025-06-30 22:20 ` Mauro Carvalho Chehab
2025-07-01 14:23 ` Sasha Levin
2025-07-01 15:25 ` Mauro Carvalho Chehab
2025-07-01 19:01 ` Jonathan Corbet
2025-07-01 20:50 ` Sasha Levin
2025-07-01 21:43 ` Jonathan Corbet
2025-07-01 22:16 ` Sasha Levin
2025-06-24 18:07 ` [RFC v2 02/22] eventpoll: add API specification for epoll_create1 Sasha Levin
2025-06-24 18:07 ` [RFC v2 03/22] eventpoll: add API specification for epoll_create Sasha Levin
2025-06-24 18:07 ` [RFC v2 04/22] eventpoll: add API specification for epoll_ctl Sasha Levin
2025-06-24 18:07 ` [RFC v2 05/22] eventpoll: add API specification for epoll_wait Sasha Levin
2025-06-24 18:07 ` [RFC v2 06/22] eventpoll: add API specification for epoll_pwait Sasha Levin
2025-06-24 18:07 ` [RFC v2 07/22] eventpoll: add API specification for epoll_pwait2 Sasha Levin
2025-06-24 18:07 ` [RFC v2 08/22] exec: add API specification for execve Sasha Levin
2025-06-24 18:07 ` [RFC v2 09/22] exec: add API specification for execveat Sasha Levin
2025-06-24 18:07 ` [RFC v2 10/22] mm/mlock: add API specification for mlock Sasha Levin
2025-06-24 18:07 ` Sasha Levin [this message]
2025-06-24 18:07 ` [RFC v2 12/22] mm/mlock: add API specification for mlockall Sasha Levin
2025-06-24 18:07 ` [RFC v2 13/22] mm/mlock: add API specification for munlock Sasha Levin
2025-06-24 18:07 ` [RFC v2 14/22] mm/mlock: add API specification for munlockall Sasha Levin
2025-06-24 18:07 ` [RFC v2 15/22] kernel/api: add debugfs interface for kernel API specifications Sasha Levin
2025-06-24 18:07 ` [RFC v2 16/22] kernel/api: add IOCTL specification infrastructure Sasha Levin
2025-06-24 18:07 ` [RFC v2 17/22] fwctl: add detailed IOCTL API specifications Sasha Levin
2025-06-24 18:07 ` [RFC v2 18/22] binder: " Sasha Levin
2025-06-24 18:07 ` [RFC v2 19/22] kernel/api: Add sysfs validation support to kernel API specification framework Sasha Levin
2025-06-24 18:07 ` [RFC v2 20/22] block: sysfs API specifications Sasha Levin
2025-06-24 18:07 ` [RFC v2 21/22] net/socket: add API specification for socket() Sasha Levin
2025-06-24 18:07 ` [RFC v2 22/22] tools/kapi: Add kernel API specification extraction tool Sasha Levin
2025-07-01 2:43 ` [RFC v2 00/22] Kernel API specification framework Jake Edge
2025-07-01 14:54 ` Sasha Levin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250624180742.5795-12-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=linux-api@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=tools@kernel.org \
--cc=workflows@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).