Linux Security Modules development
 help / color / mirror / Atom feed
* Re: [PATCH v5 2/9] landlock: Control pathname UNIX domain socket resolution by path
From: Günther Noack @ 2026-03-14 23:15 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: Günther Noack, John Johansen, Tingmao Wang, Justin Suess,
	Jann Horn, linux-security-module, Samasth Norway Ananda,
	Matthieu Buffet, Mikhail Ivanov, konstantin.meskhidze,
	Demi Marie Obenour, Alyssa Ross, Tahera Fahimi
In-Reply-To: <20260308.IexeiQuae7ee@digikod.net>

On Sun, Mar 08, 2026 at 12:50:06PM +0100, Mickaël Salaün wrote:
> On Sun, Mar 08, 2026 at 10:09:52AM +0100, Mickaël Salaün wrote:
> > On Thu, Feb 19, 2026 at 02:59:38PM +0100, Günther Noack wrote:
> > > On Thu, Feb 19, 2026 at 10:45:44AM +0100, Mickaël Salaün wrote:
> > > > On Wed, Feb 18, 2026 at 10:37:16AM +0100, Mickaël Salaün wrote:
> > > > > On Sun, Feb 15, 2026 at 11:51:50AM +0100, Günther Noack wrote:
> > > > > > * Add a new access right LANDLOCK_ACCESS_FS_RESOLVE_UNIX, which
> > > > > >   controls the look up operations for named UNIX domain sockets.  The
> > > > > >   resolution happens during connect() and sendmsg() (depending on
> > > > > >   socket type).
> > > > > > * Hook into the path lookup in unix_find_bsd() in af_unix.c, using a
> > > > > >   LSM hook.  Make policy decisions based on the new access rights
> > > > > > * Increment the Landlock ABI version.
> > > > > > * Minor test adaptions to keep the tests working.
> > > > > > 
> > > > > > With this access right, access is granted if either of the following
> > > > > > conditions is met:
> > > > > > 
> > > > > > * The target socket's filesystem path was allow-listed using a
> > > > > >   LANDLOCK_RULE_PATH_BENEATH rule, *or*:
> > > > > > * The target socket was created in the same Landlock domain in which
> > > > > >   LANDLOCK_ACCESS_FS_RESOLVE_UNIX was restricted.
> > > > > > 
> > > > > > In case of a denial, connect() and sendmsg() return EACCES, which is
> > > > > > the same error as it is returned if the user does not have the write
> > > > > > bit in the traditional Unix file system permissions of that file.
> > > > > > 
> > > > > > This feature was created with substantial discussion and input from
> > > > > > Justin Suess, Tingmao Wang and Mickaël Salaün.
> > > > > > 
> > > > > > Cc: Tingmao Wang <m@maowtm.org>
> > > > > > Cc: Justin Suess <utilityemal77@gmail.com>
> > > > > > Cc: Mickaël Salaün <mic@digikod.net>
> > > > > > Suggested-by: Jann Horn <jannh@google.com>
> > > > > > Link: https://github.com/landlock-lsm/linux/issues/36
> > > > > > Signed-off-by: Günther Noack <gnoack3000@gmail.com>
> > > > > > ---
> > > > > >  include/uapi/linux/landlock.h                |  10 ++
> > > > > >  security/landlock/access.h                   |  11 +-
> > > > > >  security/landlock/audit.c                    |   1 +
> > > > > >  security/landlock/fs.c                       | 102 ++++++++++++++++++-
> > > > > >  security/landlock/limits.h                   |   2 +-
> > > > > >  security/landlock/syscalls.c                 |   2 +-
> > > > > >  tools/testing/selftests/landlock/base_test.c |   2 +-
> > > > > >  tools/testing/selftests/landlock/fs_test.c   |   5 +-
> > > > > >  8 files changed, 128 insertions(+), 7 deletions(-)
> > > > 
> > > > > > index 60ff217ab95b..8d0edf94037d 100644
> > > > > > --- a/security/landlock/audit.c
> > > > > > +++ b/security/landlock/audit.c
> > > > > > @@ -37,6 +37,7 @@ static const char *const fs_access_strings[] = {
> > > > > >  	[BIT_INDEX(LANDLOCK_ACCESS_FS_REFER)] = "fs.refer",
> > > > > >  	[BIT_INDEX(LANDLOCK_ACCESS_FS_TRUNCATE)] = "fs.truncate",
> > > > > >  	[BIT_INDEX(LANDLOCK_ACCESS_FS_IOCTL_DEV)] = "fs.ioctl_dev",
> > > > > > +	[BIT_INDEX(LANDLOCK_ACCESS_FS_RESOLVE_UNIX)] = "fs.resolve_unix",
> > > > > >  };
> > > > > >  
> > > > > >  static_assert(ARRAY_SIZE(fs_access_strings) == LANDLOCK_NUM_ACCESS_FS);
> > > > > > diff --git a/security/landlock/fs.c b/security/landlock/fs.c
> > > > > > index e764470f588c..76035c6f2bf1 100644
> > > > > > --- a/security/landlock/fs.c
> > > > > > +++ b/security/landlock/fs.c
> > > > > > @@ -27,6 +27,7 @@
> > > > > >  #include <linux/lsm_hooks.h>
> > > > > >  #include <linux/mount.h>
> > > > > >  #include <linux/namei.h>
> > > > > > +#include <linux/net.h>
> > > > > >  #include <linux/path.h>
> > > > > >  #include <linux/pid.h>
> > > > > >  #include <linux/rcupdate.h>
> > > > > > @@ -314,7 +315,8 @@ static struct landlock_object *get_inode_object(struct inode *const inode)
> > > > > >  	LANDLOCK_ACCESS_FS_WRITE_FILE | \
> > > > > >  	LANDLOCK_ACCESS_FS_READ_FILE | \
> > > > > >  	LANDLOCK_ACCESS_FS_TRUNCATE | \
> > > > > > -	LANDLOCK_ACCESS_FS_IOCTL_DEV)
> > > > > > +	LANDLOCK_ACCESS_FS_IOCTL_DEV | \
> > > > > > +	LANDLOCK_ACCESS_FS_RESOLVE_UNIX)
> > > > > >  /* clang-format on */
> > > > > >  
> > > > > >  /*
> > > > > > @@ -1561,6 +1563,103 @@ static int hook_path_truncate(const struct path *const path)
> > > > > >  	return current_check_access_path(path, LANDLOCK_ACCESS_FS_TRUNCATE);
> > > > > >  }
> > > > > >  
> > > > > > +/**
> > > > > > + * unmask_scoped_access - Remove access right bits in @masks in all layers
> > > > > > + *                        where @client and @server have the same domain
> > > > > > + *
> > > > > > + * This does the same as domain_is_scoped(), but unmasks bits in @masks.
> > > > > > + * It can not return early as domain_is_scoped() does.
> > > > 
> > > > Why can't we use the same logic as for other scopes?
> > > 
> > > The other scopes, for which this is implemented in domain_is_scoped(),
> > > do not need to do this layer-by-layer.
> > > 
> > > I have to admit, in my initial implementation, I was using
> > > domain_is_scoped() directly, and the logic at the end of the hook was
> > > roughly:
> > > 
> > >    --- BUGGY CODE START ---
> > >        // ...
> > >        
> > >        if (!domain_is_scoped(..., ..., LANDLOCK_ACCESS_FS_RESOLVE_UNIX))
> > >            return 0;  /* permitted */
> > > 
> > >        return current_check_access_path(path, LANDLOCK_ACCESS_FS_RESOLVE_UNIX)
> > >    }
> > >    --- BUGGY CODE END ---
> > > 
> > > Unfortunately, that is a logic error though -- it implements the formula
> > > 
> > >    Access granted if:
> > >    (FOR-ALL l ∈ layers scoped-access-ok(l)) OR (FOR-ALL l ∈ layers path-access-ok(l))     (WRONG!)
> > > 
> > > but the formula we want is:
> > > 
> > >    Access granted if:
> > >    FOR-ALL l ∈ layers (scoped-access-ok(l) OR path-access-ok(l))     (CORRECT!)
> > 
> > It is worth it to add this explanation to the unmask_scoped_access()
> > description, also pointing to the test that check this case.
> > 
> > > 
> > > This makes a difference in the case where (pseudocode):
> > > 
> > >    1. landlock_restrict_self(RESOLVE_UNIX)  // d1
> > >    2. create_unix_server("./sock")
> > >    3. landlock_restrict_self(RESOLVE_UNIX, rule=Allow(".", RESOLVE_UNIX))  // d2
> > >    4. connect_unix("./sock")
> > > 
> > >    ,------------------------------------------------d1--,
> > >    |                                                    |
> > >    |    ./sock server                                   |
> > >    |       ^                                            |
> > >    |       |                                            |
> > >    |  ,------------------------------------------d2--,  |
> > >    |  |    |                                         |  |
> > >    |  |  client                                      |  |
> > >    |  |                                              |  |
> > >    |  '----------------------------------------------'  |
> > >    |                                                    |
> > >    '----------------------------------------------------'
> > > 
> > > (BTW, this scenario is covered in the selftests, that is why there is
> > > a variant of these selftests where instead of applying "no domain", we
> > > apply a domain with an exception rule like in step 3 in the pseudocode
> > > above.  Applying that domain should behave the same as applying no
> > > domain at all.)
> > > 
> > > Intuitively, it is clear that the access should be granted:
> > > 
> > >   - d1 does not restrict access to the server,
> > >     because the socket was created within d1 itself.
> > >   - d2 does not restrict access to the server,
> > >     because it has a rule to allow it
> > > 
> > > But the "buggy code" logic above comes to a different conclusion:
> > > 
> > >   - the domain_is_scoped() check denies the access, because the server
> > >     is in a more privileged domain relative to the client domain.
> > >   - the current_check_access_path() check denies the access as well,
> > >     because the socket's path is not allow-listed in d1.
> > > 
> > > In the 'intuitive' reasoning above, we are checking d1 and d2
> > > independently of each other.  While Landlock is not implemented like
> > > that internally, we need to stay consistent with it so that domains
> > > compose correctly.  The way to do that is to track is access check
> > > results on a per-layer basis again, and that is why
> > > unmask_scoped_access() uses a layer mask for tracking.  The original
> > > domain_is_scoped() does not use a layer mask, but that also means that
> > > it can return early in some scenarios -- if for any of the relevant
> > > layer depths, the client and server domains are not the same, it exits
> > > early with failure because it's overall not fulfillable any more.  In
> > > the RESOLVE_UNIX case though, we need to remember in which layers we
> > > failed (both high an low ones), because these layers can still be
> > > fulfilled with a PATH_BENEATH rule later.
> > > 
> > > Summary:
> > > 
> > > Option 1: We *can* unify this if you want.  It just might come at a
> > > small performance penalty for domain_is_scoped(), which now uses the
> > > larger layer mask data structure and can't do the same early returns
> > > any more as before.
> > > 
> > > Option 2: Alternatively, if we move the two functions into the same
> > > module, we can keep them separate but still test them against each
> > > other to make sure they are in-line:
> > > 
> > > This invocation should return true...
> > > 
> > >   domain_is_scoped(cli, srv, access)
> > > 
> > > ...in the exactly the same situations where this invocation leaves any
> > > bits set in layer_masks:
> > > 
> > >   landlock_init_layer_masks(dom, access, &layer_masks, LL_KEY_INODE);
> > >   unmask_scoped_access(cli, srv, &layer_masks, access);
> > > 
> > > What do you prefer?
> > 
> > I was thinking about factoring out domain_is_scoped() with
> > unmask_scoped_access() but, after some tests, it is not worth it.  Your
> > approach is simple and good.
> > 
> > > 
> > > 
> > > > > > + *
> > > > > > + * @client: Client domain
> > > > > > + * @server: Server domain
> > > > > > + * @masks: Layer access masks to unmask
> > > > > > + * @access: Access bit that controls scoping
> > > > > > + */
> > > > > > +static void unmask_scoped_access(const struct landlock_ruleset *const client,
> > > > > > +				 const struct landlock_ruleset *const server,
> > > > > > +				 struct layer_access_masks *const masks,
> > > > > > +				 const access_mask_t access)
> > > > > 
> > > > > This helper should be moved to task.c and factored out with
> > > > > domain_is_scoped().  This should be a dedicated patch.
> > > > 
> > > > Well, if domain_is_scoped() can be refactored and made generic, it would
> > > > make more sense to move it to domain.c
> > > > 
> > > > > 
> > > > > > +{
> > > > > > +	int client_layer, server_layer;
> > > > > > +	const struct landlock_hierarchy *client_walker, *server_walker;
> > > > > > +
> > > > > > +	if (WARN_ON_ONCE(!client))
> > > > > > +		return; /* should not happen */
> > 
> > Please no comment after ";"
> > 
> > > > > > +
> > > > > > +	if (!server)
> > > > > > +		return; /* server has no Landlock domain; nothing to clear */
> > > > > > +
> > > > > > +	client_layer = client->num_layers - 1;
> > > > > > +	client_walker = client->hierarchy;
> > > > > > +	server_layer = server->num_layers - 1;
> > > > > > +	server_walker = server->hierarchy;
> > > > > > +
> > > > > > +	/*
> > > > > > +	 * Clears the access bits at all layers where the client domain is the
> > > > > > +	 * same as the server domain.  We start the walk at min(client_layer,
> > > > > > +	 * server_layer).  The layer bits until there can not be cleared because
> > > > > > +	 * either the client or the server domain is missing.
> > > > > > +	 */
> > > > > > +	for (; client_layer > server_layer; client_layer--)
> > > > > > +		client_walker = client_walker->parent;
> > > > > > +
> > > > > > +	for (; server_layer > client_layer; server_layer--)
> > > > > > +		server_walker = server_walker->parent;
> > > > > > +
> > > > > > +	for (; client_layer >= 0; client_layer--) {
> > > > > > +		if (masks->access[client_layer] & access &&
> > > > > > +		    client_walker == server_walker)
> > 
> > I'd prefer to first check client_walker == server_walker and then the
> > access.  My main concern is that only one bit of access matching
> > masks->access[client_layer] clear all the access request bits.  In
> > practice there is only one, for now, but this code should be more strict
> > by following a defensive approach.

This function works even if multiple access request bits with
"scope-like" semantics were being checked in parallel; if you consider
the logic in:

  if (masks->access[client_layer] & access &&
      client_walker == server_walker)
          masks->access[client_layer] &= ~access;

you'll realize that the check for "masks->access[client_layer] &
access" is technically irrelevant - if that check fails, all the
affected bits are already zero, so clearing them is a no-op.  This
code is equivalent, but might perform slightly more writes (although
it likely does not make a performance difference in practice):

  if (client_walker == server_walker)
          masks->access[client_layer] &= ~access;

With that code it's a bit easier to see that "access" is actually only
used to decide which bits to clear.  This works both with one and with
multiple access rights.

This follows the same logic as outlined in the comment above in the
code, where it says:

    Clears the access bits at all layers where the client domain is the
    same as the server domain.  We start the walk at min(client_layer,
    server_layer).  The layer bits until there can not be cleared because
    either the client or the server domain is missing.

Clearing bits that aren't there is a no-op



<Optional Math>

I found it helpful to visualize the scoping logic, this is directly
from my notes: (Web version is at https://wiki.gnoack.org/LandlockDomainIsScoped)

The domain_is_scoped() helper implements the following predicate:

  ∀ l ∈ (0,16): (hasbit(self, l) implies-that domain(self, l) == domain(other, l))

That is, we require for each layer l nesting depth that:

  * **If** scoping is active at the layer,
  * **Then** the domains of self and other are the same
             at the given nesting depth.

For example:

       [ ]
        |
       [x]     self and other have the same domain at this depth
        |
       [ ]
      /   \
    [x]   [ ]  self and other have differing domains at this depth
     |     |
    [ ]   [ ]
     |
    [ ]     "other"             "x" marks a domain where "self" has
                                    set the scoping bit
  "self"

</Optional Math>


> > > > > > +			masks->access[client_layer] &= ~access;
> 
> Actually, why not removing the access argument and just reset
> masks->access[client_layer]?  The doc would need some updates.

It would feel brittle to me if this function were to clear out
unrelated access rights. It receives a struct layer_access_masks after
all, where it is normally expected that multiple kinds of access
rights are set.  In my understanding, the bit masking does not cost
much extra performance compared to clearing it out entirely, so I'd
prefer to have clearer semantics and only operate on the access rights
that it's about, even when the other bits are all zero at the moment.

(For full disclosure, I have contemplated for a bit whether
hook_unix_find() should take a layer_mask_t-like type where each bit
indicates whether a given access right
(LANDLOCK_ACCESS_FS_RESOLVE_UNIX, in this case) is set at a given
layer, and then it would only clear out the bits there.  That would be
in some ways simpler, but then the caller would still need to convert
back and forth to a layer mask anyway, because that's what the other
functions there take.  So it didn't seem like a good option in the
bigger scheme (and I would also prefer to not re-introduce
layer_mask_t after we just removed it).)

Maybe I did not understand your remark fully though;
Does my argument sound reasonable?

–Günther

^ permalink raw reply

* Re: [PATCH v5 8/9] landlock: Document FS access right for pathname UNIX sockets
From: Günther Noack @ 2026-03-14 21:16 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: John Johansen, Justin Suess, linux-security-module, Tingmao Wang,
	Samasth Norway Ananda, Matthieu Buffet, Mikhail Ivanov,
	konstantin.meskhidze, Demi Marie Obenour, Alyssa Ross, Jann Horn,
	Tahera Fahimi
In-Reply-To: <20260218.AXoosuwo8aen@digikod.net>

On Wed, Feb 18, 2026 at 10:39:23AM +0100, Mickaël Salaün wrote:
> On Sun, Feb 15, 2026 at 11:51:56AM +0100, Günther Noack wrote:
> > --- a/Documentation/userspace-api/landlock.rst
> > +++ b/Documentation/userspace-api/landlock.rst
> > @@ -77,7 +77,8 @@ to be explicit about the denied-by-default access rights.
> >              LANDLOCK_ACCESS_FS_MAKE_SYM |
> >              LANDLOCK_ACCESS_FS_REFER |
> >              LANDLOCK_ACCESS_FS_TRUNCATE |
> > -            LANDLOCK_ACCESS_FS_IOCTL_DEV,
> > +            LANDLOCK_ACCESS_FS_IOCTL_DEV |
> > +            LANDLOCK_ACCESS_FS_RESOLVE_UNIX,
> >          .handled_access_net =
> >              LANDLOCK_ACCESS_NET_BIND_TCP |
> >              LANDLOCK_ACCESS_NET_CONNECT_TCP,
> > @@ -127,6 +128,12 @@ version, and only use the available subset of access rights:
> >          /* Removes LANDLOCK_SCOPE_* for ABI < 6 */
> >          ruleset_attr.scoped &= ~(LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET |
> >                                   LANDLOCK_SCOPE_SIGNAL);
> > +        __attribute__((fallthrough));
> > +    case 7:
> > +        __attribute__((fallthrough));
> 
> I don't think the fallthrough attribute is needed here.  Same for the
> sample.

Thanks, done.


> > +    case 8:
> > +        /* Removes LANDLOCK_ACCESS_FS_RESOLVE_UNIX for ABI < 8 */
> 
> ABI < 9

Good catch, done.

–Günther

^ permalink raw reply

* Re: [PATCH v1] selftests/landlock: Test tsync interruption and cancellation paths
From: Günther Noack @ 2026-03-14 21:10 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: Günther Noack, linux-security-module, Justin Suess,
	Tingmao Wang, Yihan Ding
In-Reply-To: <20260310190416.1913908-1-mic@digikod.net>

Hello Mickaël!

On Tue, Mar 10, 2026 at 08:04:15PM +0100, Mickaël Salaün wrote:
> Add tsync_interrupt test to exercise the signal interruption path in
> landlock_restrict_sibling_threads().  When a signal interrupts
> wait_for_completion_interruptible() while the calling thread waits for
> sibling threads to finish credential preparation, the kernel:
> 
> 1. Sets ERESTARTNOINTR to request a transparent syscall restart.
> 2. Calls cancel_tsync_works() to opportunistically dequeue task works
>    that have not started running yet.
> 3. Breaks out of the preparation loop, then unblocks remaining
>    task works via complete_all() and waits for them to finish.
> 4. Returns the error, causing abort_creds() in the syscall handler.
> 
> Specifically, cancel_tsync_works() in its entirety, the ERESTARTNOINTR
> error branch in landlock_restrict_sibling_threads(), and the
> abort_creds() error branch in the landlock_restrict_self() syscall
> handler are timing-dependent and not exercised by the existing tsync
> tests, making code coverage measurements non-deterministic.
> 
> The test spawns a signaler thread that rapidly sends SIGUSR1 to the
> calling thread while it performs landlock_restrict_self() with
> LANDLOCK_RESTRICT_SELF_TSYNC.  Since ERESTARTNOINTR causes a
> transparent restart, userspace always sees the syscall succeed.
> 
> This is a best-effort coverage test: the interruption path is exercised
> when the signal lands during the preparation wait, which depends on
> thread scheduling.  The test creates enough idle sibling threads (200)
> to ensure multiple serialized waves of credential preparation even on
> machines with many cores (e.g., 64), widening the window for the
> signaler.  Deterministic coverage would require wrapping the wait call
> with ALLOW_ERROR_INJECTION() and using CONFIG_FAIL_FUNCTION.
> 
> Cc: Günther Noack <gnoack@google.com>
> Cc: Justin Suess <utilityemal77@gmail.com>
> Cc: Tingmao Wang <m@maowtm.org>
> Cc: Yihan Ding <dingyihan@uniontech.com>
> Signed-off-by: Mickaël Salaün <mic@digikod.net>
> ---
>  tools/testing/selftests/landlock/tsync_test.c | 91 ++++++++++++++++++-
>  1 file changed, 90 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/testing/selftests/landlock/tsync_test.c b/tools/testing/selftests/landlock/tsync_test.c
> index 37ef0d2270db..2b9ad4f154f4 100644
> --- a/tools/testing/selftests/landlock/tsync_test.c
> +++ b/tools/testing/selftests/landlock/tsync_test.c
> @@ -6,9 +6,10 @@
>   */
>  
>  #define _GNU_SOURCE
> +#include <linux/landlock.h>
>  #include <pthread.h>
> +#include <signal.h>
>  #include <sys/prctl.h>
> -#include <linux/landlock.h>
>  
>  #include "common.h"
>  
> @@ -158,4 +159,92 @@ TEST(competing_enablement)
>  	EXPECT_EQ(0, close(ruleset_fd));
>  }
>  
> +static void signal_nop_handler(int sig)
> +{
> +}
> +
> +struct signaler_data {
> +	pthread_t target;
> +	volatile bool stop;
> +};
> +
> +static void *signaler_thread(void *data)
> +{
> +	struct signaler_data *sd = data;
> +
> +	while (!sd->stop)
> +		pthread_kill(sd->target, SIGUSR1);
> +
> +	return NULL;
> +}
> +
> +/*
> + * Number of idle sibling threads.  This must be large enough that even on
> + * machines with many cores, the sibling threads cannot all complete their
> + * credential preparation in a single parallel wave, otherwise the signaler
> + * thread has no window to interrupt wait_for_completion_interruptible().
> + * 200 threads on a 64-core machine yields ~3 serialized waves, giving the
> + * tight signal loop enough time to land an interruption.
> + */
> +#define NUM_IDLE_THREADS 200
> +
> +/*
> + * Exercises the tsync interruption and cancellation paths in tsync.c.
> + *
> + * When a signal interrupts the calling thread while it waits for sibling
> + * threads to finish their credential preparation
> + * (wait_for_completion_interruptible in landlock_restrict_sibling_threads),
> + * the kernel sets ERESTARTNOINTR, cancels queued task works that have not
> + * started yet (cancel_tsync_works), then waits for the remaining works to
> + * finish.  On the error return, syscalls.c aborts the prepared credentials.
> + * The kernel automatically restarts the syscall, so userspace sees success.
> + */
> +TEST(tsync_interrupt)
> +{
> +	size_t i;
> +	pthread_t threads[NUM_IDLE_THREADS];
> +	pthread_t signaler;
> +	struct signaler_data sd;
> +	struct sigaction sa = {};
> +	const int ruleset_fd = create_ruleset(_metadata);
> +
> +	disable_caps(_metadata);
> +
> +	/* Install a no-op SIGUSR1 handler so the signal does not kill us. */
> +	sa.sa_handler = signal_nop_handler;
> +	sigemptyset(&sa.sa_mask);
> +	ASSERT_EQ(0, sigaction(SIGUSR1, &sa, NULL));
> +
> +	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
> +
> +	for (i = 0; i < NUM_IDLE_THREADS; i++)
> +		ASSERT_EQ(0, pthread_create(&threads[i], NULL, idle, NULL));
> +
> +	/*
> +	 * Start a signaler thread that continuously sends SIGUSR1 to the
> +	 * calling thread.  This maximizes the chance of interrupting
> +	 * wait_for_completion_interruptible() in the kernel's tsync path.
> +	 */
> +	sd.target = pthread_self();
> +	sd.stop = false;
> +	ASSERT_EQ(0, pthread_create(&signaler, NULL, signaler_thread, &sd));
> +
> +	/*
> +	 * The syscall may be interrupted and transparently restarted by the
> +	 * kernel (ERESTARTNOINTR).  From userspace, it should always succeed.
> +	 */
> +	EXPECT_EQ(0, landlock_restrict_self(ruleset_fd,
> +					    LANDLOCK_RESTRICT_SELF_TSYNC));
> +
> +	sd.stop = true;
> +	ASSERT_EQ(0, pthread_join(signaler, NULL));
> +
> +	for (i = 0; i < NUM_IDLE_THREADS; i++) {
> +		ASSERT_EQ(0, pthread_cancel(threads[i]));
> +		ASSERT_EQ(0, pthread_join(threads[i], NULL));
> +	}
> +
> +	EXPECT_EQ(0, close(ruleset_fd));
> +}
> +
>  TEST_HARNESS_MAIN
> -- 
> 2.53.0

The purpose of a test is to catch errors, so I broke the
ERESTARTNOINTR error handling code path, but I could not get the test
to fail.  Did you manage to reproduce any of these bugs with it, by
any chance, and in what configuration did that work?  I tried with
both QEMU (a bit more) and UML (a bit less), but had no luck.

(Does this need to run in a loop like the Syzkaller-generated deadlock
reproducer, so that we have a chance of catching these bugs at all?)

–Günther

^ permalink raw reply

* Re: [PATCH v2 1/2] nilfs2: fix 64-bit division operations in nilfs_bmap_find_target_in_group()
From: Jeff Layton @ 2026-03-14 12:59 UTC (permalink / raw)
  To: David Laight
  Cc: Ryusuke Konishi, Viacheslav Dubeyko, Christian Brauner,
	Mimi Zohar, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg,
	Paul Moore, James Morris, Serge E. Hallyn, linux-nilfs,
	linux-kernel, linux-integrity, linux-security-module,
	linux-fsdevel, kernel test robot
In-Reply-To: <20260314124748.1ccdf93b@pumpkin>

On Sat, 2026-03-14 at 12:47 +0000, David Laight wrote:
> On Fri, 13 Mar 2026 14:45:20 -0400
> Jeff Layton <jlayton@kernel.org> wrote:
> 
> > With the change to make inode->i_ino a u64, the build started failing on
> > 32-bit ARM with:
> > 
> >     ERROR: modpost: "__aeabi_uldivmod" [fs/nilfs2/nilfs2.ko] undefined!
> > 
> > Fix this by using the 64-bit division interfaces in
> > nilfs_bmap_find_target_in_group().
> > 
> > Fixes: 998a59d371c2 ("treewide: fix missed i_ino format specifier conversions")
> > Reported-by: kernel test robot <lkp@intel.com>
> > Closes: https://lore.kernel.org/oe-kbuild-all/202603100602.KPxiClIO-lkp@intel.com/
> > Reviewed-by: Viacheslav Dubeyko <slava@dubeyko.com>
> > Acked-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> >  fs/nilfs2/bmap.c | 9 ++++++---
> >  1 file changed, 6 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
> > index 824f2bd91c167965ec3a660202b6e6c5f1fe007e..abcf5252578ad24f694bfccf525893674bfcb4bc 100644
> > --- a/fs/nilfs2/bmap.c
> > +++ b/fs/nilfs2/bmap.c
> > @@ -455,11 +455,14 @@ __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap)
> >  {
> >  	struct inode *dat = nilfs_bmap_get_dat(bmap);
> >  	unsigned long entries_per_group = nilfs_palloc_entries_per_group(dat);
> > -	unsigned long group = bmap->b_inode->i_ino / entries_per_group;
> 
> Are you sure entries_per_group can be more than 32 bits?
> It looks like something that will be the same size on 32 and 64bit.
> 

I'm not sure of anything here. I'm just want to get this to compile on
all arches. FWIW, I'm not looking to optimize anything in this patch.

> > +	unsigned long group;
> > +	u32 index;
> > +
> > +	group = div_u64(bmap->b_inode->i_ino, entries_per_group);
> 
> You don't need the full 64 by 64 divide.
> IIRC there are both div_u64_u32() and div_u64_ulong().
>
> > +	div_u64_rem(bmap->b_inode->i_ino, NILFS_BMAP_GROUP_DIV, &index);
> 
> NILFD_BMAP_GROUP_DIV is 8 (and probably has to be a power of 2).
> So:
> 	index = bmap->b_inode->i_ino & (NILFS_BMAP_GROUP_DIV - 1);
> is the same and likely much faster to calculate.
> (The compiler will have done that optimisation before.)
> 
> 

That all sounds reasonable to me. At this point though, it would be
better if the NILFS2 folks stepped in with how they'd prefer this be
done.

> 
> >  
> >  	return group * entries_per_group +
> > -		(bmap->b_inode->i_ino % NILFS_BMAP_GROUP_DIV) *
> > -		(entries_per_group / NILFS_BMAP_GROUP_DIV);
> > +	       index * (entries_per_group / NILFS_BMAP_GROUP_DIV);
> >  }
> >  
> >  static struct lock_class_key nilfs_bmap_dat_lock_key;
> > 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply

* Re: [PATCH v2 1/2] nilfs2: fix 64-bit division operations in nilfs_bmap_find_target_in_group()
From: David Laight @ 2026-03-14 12:47 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Ryusuke Konishi, Viacheslav Dubeyko, Christian Brauner,
	Mimi Zohar, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg,
	Paul Moore, James Morris, Serge E. Hallyn, linux-nilfs,
	linux-kernel, linux-integrity, linux-security-module,
	linux-fsdevel, kernel test robot
In-Reply-To: <20260313-iino-u64-v2-1-f9abda2464d5@kernel.org>

On Fri, 13 Mar 2026 14:45:20 -0400
Jeff Layton <jlayton@kernel.org> wrote:

> With the change to make inode->i_ino a u64, the build started failing on
> 32-bit ARM with:
> 
>     ERROR: modpost: "__aeabi_uldivmod" [fs/nilfs2/nilfs2.ko] undefined!
> 
> Fix this by using the 64-bit division interfaces in
> nilfs_bmap_find_target_in_group().
> 
> Fixes: 998a59d371c2 ("treewide: fix missed i_ino format specifier conversions")
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202603100602.KPxiClIO-lkp@intel.com/
> Reviewed-by: Viacheslav Dubeyko <slava@dubeyko.com>
> Acked-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/nilfs2/bmap.c | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
> index 824f2bd91c167965ec3a660202b6e6c5f1fe007e..abcf5252578ad24f694bfccf525893674bfcb4bc 100644
> --- a/fs/nilfs2/bmap.c
> +++ b/fs/nilfs2/bmap.c
> @@ -455,11 +455,14 @@ __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap)
>  {
>  	struct inode *dat = nilfs_bmap_get_dat(bmap);
>  	unsigned long entries_per_group = nilfs_palloc_entries_per_group(dat);
> -	unsigned long group = bmap->b_inode->i_ino / entries_per_group;

Are you sure entries_per_group can be more than 32 bits?
It looks like something that will be the same size on 32 and 64bit.

> +	unsigned long group;
> +	u32 index;
> +
> +	group = div_u64(bmap->b_inode->i_ino, entries_per_group);

You don't need the full 64 by 64 divide.
IIRC there are both div_u64_u32() and div_u64_ulong().

> +	div_u64_rem(bmap->b_inode->i_ino, NILFS_BMAP_GROUP_DIV, &index);

NILFD_BMAP_GROUP_DIV is 8 (and probably has to be a power of 2).
So:
	index = bmap->b_inode->i_ino & (NILFS_BMAP_GROUP_DIV - 1);
is the same and likely much faster to calculate.
(The compiler will have done that optimisation before.)

	David


>  
>  	return group * entries_per_group +
> -		(bmap->b_inode->i_ino % NILFS_BMAP_GROUP_DIV) *
> -		(entries_per_group / NILFS_BMAP_GROUP_DIV);
> +	       index * (entries_per_group / NILFS_BMAP_GROUP_DIV);
>  }
>  
>  static struct lock_class_key nilfs_bmap_dat_lock_key;
> 


^ permalink raw reply

* Re: [PATCH v2 2/2] EVM: add comment describing why ino field is still unsigned long
From: Mimi Zohar @ 2026-03-13 19:50 UTC (permalink / raw)
  To: Jeff Layton, Ryusuke Konishi, Viacheslav Dubeyko,
	Christian Brauner, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg,
	Paul Moore, James Morris, Serge E. Hallyn
  Cc: linux-nilfs, linux-kernel, linux-integrity, linux-security-module,
	linux-fsdevel
In-Reply-To: <20260313-iino-u64-v2-2-f9abda2464d5@kernel.org>

On Fri, 2026-03-13 at 14:45 -0400, Jeff Layton wrote:
> Mimi pointed out that we didn't widen the inode number field in struct
> h_misc alongside the inode->i_ino widening. While we could make an
> equivalent change there, that would require EVM resigning on all 32-bit
> hosts.
> 
> Instead, leave the field as an unsigned long. This should have no effect
> on 64-bit hosts, and allow things to continue working on 32-bit hosts in
> the cases where the i_ino fits in 32-bits.
> 
> Add a comment explaining why it's being left as unsigned long.
> 
> Cc: Mimi Zohar <zohar@linux.ibm.com>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>

Thanks, Jeff.

Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>


> ---
>  security/integrity/evm/evm_crypto.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
> index c0ca4eedb0fe5d5c30f45f515a4bc90248ec64ea..1c41af2f91a60a714878ff93b554c90e45546503 100644
> --- a/security/integrity/evm/evm_crypto.c
> +++ b/security/integrity/evm/evm_crypto.c
> @@ -144,6 +144,12 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode,
>  			  char type, char *digest)
>  {
>  	struct h_misc {
> +		/*
> +		 * Although inode->i_ino is now u64, this field remains
> +		 * unsigned long to allow existing HMAC and signatures from
> +		 * 32-bit hosts to continue working when i_ino hasn't changed
> +		 * and fits in a u32.
> +		 */
>  		unsigned long ino;
>  		__u32 generation;
>  		uid_t uid;

^ permalink raw reply

* Re: [PATCH 02/61] btrfs: Prefer IS_ERR_OR_NULL over manual NULL check
From: David Sterba @ 2026-03-13 19:22 UTC (permalink / raw)
  To: Philipp Hahn
  Cc: amd-gfx, apparmor, bpf, ceph-devel, cocci, dm-devel, dri-devel,
	gfs2, intel-gfx, intel-wired-lan, iommu, kvm, linux-arm-kernel,
	linux-block, linux-bluetooth, linux-btrfs, linux-cifs, linux-clk,
	linux-erofs, linux-ext4, linux-fsdevel, linux-gpio, linux-hyperv,
	linux-input, linux-kernel, linux-leds, linux-media, linux-mips,
	linux-mm, linux-modules, linux-mtd, linux-nfs, linux-omap,
	linux-phy, linux-pm, linux-rockchip, linux-s390, linux-scsi,
	linux-sctp, linux-security-module, linux-sh, linux-sound,
	linux-stm32, linux-trace-kernel, linux-usb, linux-wireless,
	netdev, ntfs3, samba-technical, sched-ext, target-devel,
	tipc-discussion, v9fs, Chris Mason, David Sterba
In-Reply-To: <20260310-b4-is_err_or_null-v1-2-bd63b656022d@avm.de>

On Tue, Mar 10, 2026 at 12:48:28PM +0100, Philipp Hahn wrote:
> Prefer using IS_ERR_OR_NULL() over using IS_ERR() and a manual NULL
> check.
> 
> IS_ERR_OR_NULL() already uses likely(!ptr) internally. checkpatch does
> not like nesting it:
> > WARNING: nested (un)?likely() calls, IS_ERR_OR_NULL already uses
> > unlikely() internally
> Remove the explicit use of likely().
> 
> Change generated with coccinelle.
> 
> To: Chris Mason <clm@fb.com>
> To: David Sterba <dsterba@suse.com>
> Cc: linux-btrfs@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Philipp Hahn <phahn-oss@avm.de>

Added to for-next, we seem to be using IS_ERR_OR_NULL() already in a
few other places so this is makes sense for consistency. Thanks.

^ permalink raw reply

* [PATCH v2 2/2] EVM: add comment describing why ino field is still unsigned long
From: Jeff Layton @ 2026-03-13 18:45 UTC (permalink / raw)
  To: Ryusuke Konishi, Viacheslav Dubeyko, Christian Brauner,
	Mimi Zohar, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg,
	Paul Moore, James Morris, Serge E. Hallyn
  Cc: linux-nilfs, linux-kernel, linux-integrity, linux-security-module,
	linux-fsdevel, Jeff Layton
In-Reply-To: <20260313-iino-u64-v2-0-f9abda2464d5@kernel.org>

Mimi pointed out that we didn't widen the inode number field in struct
h_misc alongside the inode->i_ino widening. While we could make an
equivalent change there, that would require EVM resigning on all 32-bit
hosts.

Instead, leave the field as an unsigned long. This should have no effect
on 64-bit hosts, and allow things to continue working on 32-bit hosts in
the cases where the i_ino fits in 32-bits.

Add a comment explaining why it's being left as unsigned long.

Cc: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 security/integrity/evm/evm_crypto.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index c0ca4eedb0fe5d5c30f45f515a4bc90248ec64ea..1c41af2f91a60a714878ff93b554c90e45546503 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -144,6 +144,12 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode,
 			  char type, char *digest)
 {
 	struct h_misc {
+		/*
+		 * Although inode->i_ino is now u64, this field remains
+		 * unsigned long to allow existing HMAC and signatures from
+		 * 32-bit hosts to continue working when i_ino hasn't changed
+		 * and fits in a u32.
+		 */
 		unsigned long ino;
 		__u32 generation;
 		uid_t uid;

-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 1/2] nilfs2: fix 64-bit division operations in nilfs_bmap_find_target_in_group()
From: Jeff Layton @ 2026-03-13 18:45 UTC (permalink / raw)
  To: Ryusuke Konishi, Viacheslav Dubeyko, Christian Brauner,
	Mimi Zohar, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg,
	Paul Moore, James Morris, Serge E. Hallyn
  Cc: linux-nilfs, linux-kernel, linux-integrity, linux-security-module,
	linux-fsdevel, Jeff Layton, kernel test robot
In-Reply-To: <20260313-iino-u64-v2-0-f9abda2464d5@kernel.org>

With the change to make inode->i_ino a u64, the build started failing on
32-bit ARM with:

    ERROR: modpost: "__aeabi_uldivmod" [fs/nilfs2/nilfs2.ko] undefined!

Fix this by using the 64-bit division interfaces in
nilfs_bmap_find_target_in_group().

Fixes: 998a59d371c2 ("treewide: fix missed i_ino format specifier conversions")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202603100602.KPxiClIO-lkp@intel.com/
Reviewed-by: Viacheslav Dubeyko <slava@dubeyko.com>
Acked-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nilfs2/bmap.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 824f2bd91c167965ec3a660202b6e6c5f1fe007e..abcf5252578ad24f694bfccf525893674bfcb4bc 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -455,11 +455,14 @@ __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap)
 {
 	struct inode *dat = nilfs_bmap_get_dat(bmap);
 	unsigned long entries_per_group = nilfs_palloc_entries_per_group(dat);
-	unsigned long group = bmap->b_inode->i_ino / entries_per_group;
+	unsigned long group;
+	u32 index;
+
+	group = div_u64(bmap->b_inode->i_ino, entries_per_group);
+	div_u64_rem(bmap->b_inode->i_ino, NILFS_BMAP_GROUP_DIV, &index);
 
 	return group * entries_per_group +
-		(bmap->b_inode->i_ino % NILFS_BMAP_GROUP_DIV) *
-		(entries_per_group / NILFS_BMAP_GROUP_DIV);
+	       index * (entries_per_group / NILFS_BMAP_GROUP_DIV);
 }
 
 static struct lock_class_key nilfs_bmap_dat_lock_key;

-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 0/2] vfs: follow-on fixes for i_ino widening
From: Jeff Layton @ 2026-03-13 18:45 UTC (permalink / raw)
  To: Ryusuke Konishi, Viacheslav Dubeyko, Christian Brauner,
	Mimi Zohar, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg,
	Paul Moore, James Morris, Serge E. Hallyn
  Cc: linux-nilfs, linux-kernel, linux-integrity, linux-security-module,
	linux-fsdevel, Jeff Layton, kernel test robot

Just some patches to fix follow-on issues reported after the
inode->i_ino widening series. Christian, could you toss these
onto the vfs-7.1.kino branch?

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
Changes in v2:
- rename variable in nilfs2 patch from "rem" to "index"
- reword comment and commit log for better accuracy in EVM patch

---
Jeff Layton (2):
      nilfs2: fix 64-bit division operations in nilfs_bmap_find_target_in_group()
      EVM: add comment describing why ino field is still unsigned long

 fs/nilfs2/bmap.c                    | 9 ++++++---
 security/integrity/evm/evm_crypto.c | 6 ++++++
 2 files changed, 12 insertions(+), 3 deletions(-)
---
base-commit: 9840bb66e7e5dffd72b03201318f154a10b06b4a
change-id: 20260310-iino-u64-424fa570d850

Best regards,
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply

* [PATCH v2 0/2] vfs: follow-on fixes for i_ino widening
From: Jeff Layton @ 2026-03-13 18:44 UTC (permalink / raw)
  To: Ryusuke Konishi, Viacheslav Dubeyko, Christian Brauner,
	Mimi Zohar, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg,
	Paul Moore, James Morris, Serge E. Hallyn
  Cc: linux-nilfs, linux-kernel, linux-integrity, linux-security-module,
	Jeff Layton, kernel test robot

Just some patches to fix follow-on issues reported after the
inode->i_ino widening series. Christian, could you toss these
onto the vfs-7.1.kino branch?

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
Changes in v2:
- rename variable in nilfs2 patch from "rem" to "index"
- reword comment and commit log for better accuracy in EVM patch

---
Jeff Layton (2):
      nilfs2: fix 64-bit division operations in nilfs_bmap_find_target_in_group()
      EVM: add comment describing why ino field is still unsigned long

 fs/nilfs2/bmap.c                    | 9 ++++++---
 security/integrity/evm/evm_crypto.c | 6 ++++++
 2 files changed, 12 insertions(+), 3 deletions(-)
---
base-commit: 9840bb66e7e5dffd72b03201318f154a10b06b4a
change-id: 20260310-iino-u64-424fa570d850

Best regards,
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply

* Re: [PATCH] integrity: Eliminate weak definition of arch_get_secureboot()
From: Mimi Zohar @ 2026-03-13 15:35 UTC (permalink / raw)
  To: Nathan Chancellor
  Cc: Arnd Bergmann, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg,
	Alexander Egorenkov, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy, Heiko Carstens, Vasily Gorbik,
	Alexander Gordeev, Christian Borntraeger, Sven Schnelle,
	Paul Moore, James Morris, Serge E. Hallyn, Coiby Xu, linux-kernel,
	linuxppc-dev, linux-s390, linux-integrity, linux-security-module,
	llvm
In-Reply-To: <20260312205533.GC2747807@ax162>

On Thu, 2026-03-12 at 13:55 -0700, Nathan Chancellor wrote:
> On Thu, Mar 12, 2026 at 12:07:41PM -0400, Mimi Zohar wrote:
> > I pushed out the patch to next-integrity, but am a bit concerned about the
> > definition:
> > 
> > +config HAVE_ARCH_GET_SECUREBOOT
> > +       def_bool EFI
> > +
> 
> What is concerning about the definition with regards to s390?
> 
> > Has anyone actually tested this patch on s390, not just compiled it?  If so, I'd
> > appreciate a tested-by tag.
> 
> It would be good to test (if it is possible to test in QEMU, I am happy
> to attempt to do so). As far as I can tell, 31a6a07eefeb placed
> arch_get_secureboot() in such a way that the __weak definition would be
> used when CONFIG_KEXEC_FILE was disabled, even though ipl_secure_flag
> should always be available, which this patch avoids.

Thanks, Nathan.  Fortunately I got access to an s390 and was able to test.  It
seems to be working.

Mimi

^ permalink raw reply

* Re: [PATCH] integrity: Eliminate weak definition of arch_get_secureboot()
From: Nathan Chancellor @ 2026-03-12 20:55 UTC (permalink / raw)
  To: Mimi Zohar
  Cc: Arnd Bergmann, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg,
	Alexander Egorenkov, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy, Heiko Carstens, Vasily Gorbik,
	Alexander Gordeev, Christian Borntraeger, Sven Schnelle,
	Paul Moore, James Morris, Serge E. Hallyn, Coiby Xu, linux-kernel,
	linuxppc-dev, linux-s390, linux-integrity, linux-security-module,
	llvm
In-Reply-To: <a985c90d9df8ba0fc63f65117cc8e884f70e6035.camel@linux.ibm.com>

On Thu, Mar 12, 2026 at 12:07:41PM -0400, Mimi Zohar wrote:
> I pushed out the patch to next-integrity, but am a bit concerned about the
> definition:
> 
> +config HAVE_ARCH_GET_SECUREBOOT
> +       def_bool EFI
> +

What is concerning about the definition with regards to s390?

> Has anyone actually tested this patch on s390, not just compiled it?  If so, I'd
> appreciate a tested-by tag.

It would be good to test (if it is possible to test in QEMU, I am happy
to attempt to do so). As far as I can tell, 31a6a07eefeb placed
arch_get_secureboot() in such a way that the __weak definition would be
used when CONFIG_KEXEC_FILE was disabled, even though ipl_secure_flag
should always be available, which this patch avoids.

Cheers,
Nathan

^ permalink raw reply

* Re: [PATCH 48/61] mtd: Prefer IS_ERR_OR_NULL over manual NULL check
From: Richard Weinberger @ 2026-03-12 19:33 UTC (permalink / raw)
  To: Philipp Hahn
  Cc: amd-gfx, apparmor, bpf, ceph-devel, cocci, dm-devel,
	DRI mailing list, gfs2, intel-gfx, intel-wired-lan, iommu, kvm,
	linux-arm-kernel, linux-block, linux-bluetooth, linux-btrfs,
	linux-cifs, linux-clk, linux-erofs, linux-ext4, linux-fsdevel,
	linux-gpio, linux-hyperv, linux-input, linux-kernel, linux-leds,
	linux-media, linux-mips, linux-mm, linux-modules, linux-mtd,
	linux-nfs, linux-omap, linux-phy, linux-pm, linux-rockchip,
	linux-s390, linux-scsi, linux-sctp, LSM, linux-sh, linux-sound,
	linux-stm32, linux-trace-kernel, linux-usb, linux-wireless,
	netdev, ntfs3, samba-technical, sched-ext, target-devel,
	tipc-discussion, v9fs, Miquel Raynal, Vignesh Raghavendra
In-Reply-To: <20260310-b4-is_err_or_null-v1-48-bd63b656022d@avm.de>

----- Ursprüngliche Mail -----
> Von: "Philipp Hahn" <phahn-oss@avm.de>
> -	if (gpiomtd->nwp && !IS_ERR(gpiomtd->nwp))
> +	if (!IS_ERR_OR_NULL(gpiomtd->nwp))

No, please don't.

This makes reading the code not easier.

Thanks,
//richard

^ permalink raw reply

* Re: [PATCH 00/61] treewide: Use IS_ERR_OR_NULL over manual NULL check - refactor
From: Jason Gunthorpe @ 2026-03-12 16:54 UTC (permalink / raw)
  To: James Bottomley
  Cc: Kuan-Wei Chiu, Philipp Hahn, amd-gfx, apparmor, bpf, ceph-devel,
	cocci, dm-devel, dri-devel, gfs2, intel-gfx, intel-wired-lan,
	iommu, kvm, linux-arm-kernel, linux-block, linux-bluetooth,
	linux-btrfs, linux-cifs, linux-clk, linux-erofs, linux-ext4,
	linux-fsdevel, linux-gpio, linux-hyperv, linux-input,
	linux-kernel, linux-leds, linux-media, linux-mips, linux-mm,
	linux-modules, linux-mtd, linux-nfs, linux-omap, linux-phy,
	linux-pm, linux-rockchip, linux-s390, linux-scsi, linux-sctp,
	linux-security-module, linux-sh, linux-sound, linux-stm32,
	linux-trace-kernel, linux-usb, linux-wireless, netdev, ntfs3,
	samba-technical, sched-ext, target-devel, tipc-discussion, v9fs
In-Reply-To: <f5688b895eaebabae6545a0d9baf8f1404e8454e.camel@HansenPartnership.com>

On Thu, Mar 12, 2026 at 11:32:37AM -0400, James Bottomley wrote:
> On Thu, 2026-03-12 at 09:57 -0300, Jason Gunthorpe wrote:
> > On Wed, Mar 11, 2026 at 02:40:36AM +0800, Kuan-Wei Chiu wrote:
> > 
> > > IMHO, the necessity of IS_ERR_OR_NULL() often highlights a
> > > confusing or flawed API design. It usually implies that the caller
> > > is unsure whether a failure results in an error pointer or a NULL
> > > pointer. 
> > 
> > +1
> > 
> > IS_ERR_OR_NULL() should always be looked on with suspicion. Very
> > little should be returning some tri-state 'ERR' 'NULL' 'SUCCESS'
> > pointer. What does the middle condition even mean? IS_ERR_OR_NULL()
> > implies ERR and NULL are semanticly the same, so fix the things to
> > always use ERR.
> 
> Not in any way supporting the original patch.  However, the pattern
> ERR, NULL, PTR is used extensively in the dentry code of filesystems. 
> See the try_lookup..() set of functions in fs/namei.c
> 
> The meaning is
> 
> PTR - I found it
> NULL - It definitely doesn't exist
> ERR - something went wrong during the lookup.
> 
> So I don't think you can blanket say this pattern is wrong.

Lots of places also would return ENOENT, I'd argue that is easier to
use..

But yes, I did use the word "suspicion" not blanket wrong :)

Jason

^ permalink raw reply

* Re: [PATCH 38/61] net: Prefer IS_ERR_OR_NULL over manual NULL check
From: Przemek Kitszel @ 2026-03-12 16:11 UTC (permalink / raw)
  To: Philipp Hahn
  Cc: amd-gfx, apparmor, bpf, ceph-devel, cocci, dm-devel, dri-devel,
	gfs2, intel-gfx, intel-wired-lan, iommu, kvm, linux-arm-kernel,
	linux-block, linux-bluetooth, linux-btrfs, linux-cifs, linux-clk,
	linux-erofs, linux-ext4, linux-fsdevel, linux-gpio, linux-hyperv,
	linux-input, linux-kernel, linux-leds, linux-media, linux-mips,
	linux-mm, linux-modules, linux-mtd, linux-nfs, linux-omap,
	linux-phy, linux-pm, linux-rockchip, linux-s390, linux-scsi,
	linux-sctp, linux-security-module, linux-sh, linux-sound,
	linux-stm32, linux-trace-kernel, linux-usb, linux-wireless,
	netdev, ntfs3, samba-technical, sched-ext, target-devel,
	tipc-discussion, v9fs, Igor Russkikh, Andrew Lunn,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Pavan Chebbi, Michael Chan, Potnuri Bharat Teja, Tony Nguyen,
	Taras Chornyi, Maxime Coquelin, Alexandre Torgue,
	Iyappan Subramanian, Keyur Chudgar, Quan Nguyen, Heiner Kallweit,
	Russell King
In-Reply-To: <20260310-b4-is_err_or_null-v1-38-bd63b656022d@avm.de>

On 3/10/26 12:49, Philipp Hahn wrote:
> Prefer using IS_ERR_OR_NULL() over using IS_ERR() and a manual NULL
> check.
> 
> Change generated with coccinelle.
> 
> To: Igor Russkikh <irusskikh@marvell.com>
> To: Andrew Lunn <andrew+netdev@lunn.ch>
> To: "David S. Miller" <davem@davemloft.net>
> To: Eric Dumazet <edumazet@google.com>
> To: Jakub Kicinski <kuba@kernel.org>
> To: Paolo Abeni <pabeni@redhat.com>
> To: Pavan Chebbi <pavan.chebbi@broadcom.com>
> To: Michael Chan <mchan@broadcom.com>
> To: Potnuri Bharat Teja <bharat@chelsio.com>
> To: Tony Nguyen <anthony.l.nguyen@intel.com>
> To: Przemek Kitszel <przemyslaw.kitszel@intel.com>
> To: Taras Chornyi <taras.chornyi@plvision.eu>
> To: Maxime Coquelin <mcoquelin.stm32@gmail.com>
> To: Alexandre Torgue <alexandre.torgue@foss.st.com>
> To: Iyappan Subramanian <iyappan@os.amperecomputing.com>
> To: Keyur Chudgar <keyur@os.amperecomputing.com>
> To: Quan Nguyen <quan@os.amperecomputing.com>
> To: Heiner Kallweit <hkallweit1@gmail.com>
> To: Russell King <linux@armlinux.org.uk>
> Cc: netdev@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: intel-wired-lan@lists.osuosl.org
> Cc: linux-stm32@st-md-mailman.stormreply.com
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-usb@vger.kernel.org
> Signed-off-by: Philipp Hahn <phahn-oss@avm.de>

this is too trivial change, especially when combined like that
https://docs.kernel.org/process/maintainer-netdev.html#clean-up-patches

> ---
>   drivers/net/ethernet/aquantia/atlantic/aq_ring.c        | 2 +-
>   drivers/net/ethernet/broadcom/tg3.c                     | 2 +-
>   drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c    | 3 +--
>   drivers/net/ethernet/intel/ice/devlink/devlink.c        | 2 +-
>   drivers/net/ethernet/marvell/prestera/prestera_router.c | 2 +-
>   drivers/net/ethernet/stmicro/stmmac/stmmac_main.c       | 2 +-
>   drivers/net/mdio/mdio-xgene.c                           | 2 +-
>   drivers/net/usb/r8152.c                                 | 2 +-
>   8 files changed, 8 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
> index e270327e47fd804cc8ee5cfd53ed1b993c955c41..43edef35c4b1ff606b2f1519a07fad4c9a990ad4 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
> @@ -810,7 +810,7 @@ static int __aq_ring_xdp_clean(struct aq_ring_s *rx_ring,
>   		}
>   
>   		skb = aq_xdp_run_prog(aq_nic, &xdp, rx_ring, buff);
> -		if (IS_ERR(skb) || !skb)
> +		if (IS_ERR_OR_NULL(skb))
>   			continue;
>   
>   		if (ptp_hwtstamp_len > 0)
> diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
> index 2328fce336447eb4a796f9300ccc0ab536ff0a35..8ed79f34f03d81184dcc12e6eaff009cb8f7756e 100644
> --- a/drivers/net/ethernet/broadcom/tg3.c
> +++ b/drivers/net/ethernet/broadcom/tg3.c
> @@ -7943,7 +7943,7 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
>   
>   	segs = skb_gso_segment(skb, tp->dev->features &
>   				    ~(NETIF_F_TSO | NETIF_F_TSO6));
> -	if (IS_ERR(segs) || !segs) {
> +	if (IS_ERR_OR_NULL(segs)) {
>   		tnapi->tx_dropped++;
>   		goto tg3_tso_bug_end;
>   	}
> diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
> index 3307e50426819087ad985178c4a5383f16b8e7b4..1c8a6445d4b2e3535d8f1b7908dd02d8dd2f23fa 100644
> --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
> +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
> @@ -1032,8 +1032,7 @@ static void ch_flower_stats_handler(struct work_struct *work)
>   	do {
>   		rhashtable_walk_start(&iter);
>   
> -		while ((flower_entry = rhashtable_walk_next(&iter)) &&
> -		       !IS_ERR(flower_entry)) {
> +		while (!IS_ERR_OR_NULL((flower_entry = rhashtable_walk_next(&iter)))) {
>   			ret = cxgb4_get_filter_counters(adap->port[0],
>   							flower_entry->filter_id,
>   							&packets, &bytes,
> diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c
> index 6c72bd15db6d75a1d4fa04ef8fefbd26fb6e84bd..3d08b9187fd76ca3198af28111b6f1c1765ea01e 100644
> --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c
> +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c
> @@ -791,7 +791,7 @@ static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node
>   						  node->parent->rate_node);
>   	}
>   
> -	if (rate_node && !IS_ERR(rate_node))
> +	if (!IS_ERR_OR_NULL(rate_node))
>   		node->rate_node = rate_node;
>   
>   traverse_children:
> diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c
> index b036b173a308b5f994ad8538eb010fa27196988c..4492938e8a3da91d32efe8d45ccbe2eb437c0e49 100644
> --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c
> +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c
> @@ -1061,7 +1061,7 @@ static void __prestera_k_arb_hw_state_upd(struct prestera_switch *sw,
>   		n = NULL;
>   	}
>   
> -	if (!IS_ERR(n) && n) {
> +	if (!IS_ERR_OR_NULL(n)) {
>   		neigh_event_send(n, NULL);
>   		neigh_release(n);
>   	} else {
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index 6827c99bde8c22db42b363d2d36ad6f26075ed50..356a4e9ce04b1fcf8786d7274d31ace404be2cf6 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -1275,7 +1275,7 @@ static int stmmac_init_phy(struct net_device *dev)
>   	/* Some DT bindings do not set-up the PHY handle. Let's try to
>   	 * manually parse it
>   	 */
> -	if (!phy_fwnode || IS_ERR(phy_fwnode)) {
> +	if (IS_ERR_OR_NULL(phy_fwnode)) {
>   		int addr = priv->plat->phy_addr;
>   		struct phy_device *phydev;
>   
> diff --git a/drivers/net/mdio/mdio-xgene.c b/drivers/net/mdio/mdio-xgene.c
> index a8f91a4b7fed0927ee14e408000cd3a2bfb9b09a..09b30b563295c6085dc1358ac361301e5cf6b2a8 100644
> --- a/drivers/net/mdio/mdio-xgene.c
> +++ b/drivers/net/mdio/mdio-xgene.c
> @@ -265,7 +265,7 @@ struct phy_device *xgene_enet_phy_register(struct mii_bus *bus, int phy_addr)
>   	struct phy_device *phy_dev;
>   
>   	phy_dev = get_phy_device(bus, phy_addr, false);
> -	if (!phy_dev || IS_ERR(phy_dev))
> +	if (IS_ERR_OR_NULL(phy_dev))
>   		return NULL;
>   
>   	if (phy_device_register(phy_dev))
> diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
> index 0c83bbbea2e7c322ee6339893e281237663bd3ae..73f17ebd7d40007eec5004f887a46249defd28ab 100644
> --- a/drivers/net/usb/r8152.c
> +++ b/drivers/net/usb/r8152.c
> @@ -2218,7 +2218,7 @@ static void r8152_csum_workaround(struct r8152 *tp, struct sk_buff *skb,
>   
>   		features &= ~(NETIF_F_SG | NETIF_F_IPV6_CSUM | NETIF_F_TSO6);
>   		segs = skb_gso_segment(skb, features);
> -		if (IS_ERR(segs) || !segs)
> +		if (IS_ERR_OR_NULL(segs))
>   			goto drop;
>   
>   		__skb_queue_head_init(&seg_list);
> 


^ permalink raw reply

* Re: [PATCH] integrity: Eliminate weak definition of arch_get_secureboot()
From: Mimi Zohar @ 2026-03-12 16:07 UTC (permalink / raw)
  To: Arnd Bergmann, Nathan Chancellor, Roberto Sassu, Dmitry Kasatkin,
	Eric Snowberg, Alexander Egorenkov
  Cc: Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
	Christophe Leroy, Heiko Carstens, Vasily Gorbik,
	Alexander Gordeev, Christian Borntraeger, Sven Schnelle,
	Paul Moore, James Morris, Serge E. Hallyn, Coiby Xu, linux-kernel,
	linuxppc-dev, linux-s390, linux-integrity, linux-security-module,
	llvm
In-Reply-To: <d2089740-16d8-4ca4-a61c-8c381f8e30a0@app.fastmail.com>

On Thu, 2026-03-12 at 16:03 +0100, Arnd Bergmann wrote:
> On Mon, Mar 9, 2026, at 21:37, Nathan Chancellor wrote:
> > security/integrity/secure_boot.c contains a single __weak function,
> > which breaks recordmcount when building with clang:
> > 
> >   $ make -skj"$(nproc)" ARCH=powerpc LLVM=1 ppc64_defconfig 
> > security/integrity/secure_boot.o
> >   Cannot find symbol for section 2: .text.
> >   security/integrity/secure_boot.o: failed
> > 
> > Introduce a Kconfig symbol, CONFIG_HAVE_ARCH_GET_SECUREBOOT, to indicate
> > that an architecture provides a definition of arch_get_secureboot().
> > Provide a static inline stub when this symbol is not defined to achieve
> > the same effect as the __weak function, allowing secure_boot.c to be
> > removed altogether. Move the s390 definition of arch_get_secureboot()
> > out of the CONFIG_KEXEC_FILE block to ensure it is always available, as
> > it does not actually depend on KEXEC_FILE.
> > 
> > Fixes: 31a6a07eefeb ("integrity: Make arch_ima_get_secureboot integrity-wide")
> > Signed-off-by: Nathan Chancellor <nathan@kernel.org>
> 
> Acked-by: Arnd Bergmann <arnd@arndb.de>

I pushed out the patch to next-integrity, but am a bit concerned about the
definition:

+config HAVE_ARCH_GET_SECUREBOOT
+       def_bool EFI
+

Has anyone actually tested this patch on s390, not just compiled it?  If so, I'd
appreciate a tested-by tag.

thanks,

Mimi

^ permalink raw reply

* Re: [PATCH 00/61] treewide: Use IS_ERR_OR_NULL over manual NULL check - refactor
From: James Bottomley @ 2026-03-12 15:32 UTC (permalink / raw)
  To: Jason Gunthorpe, Kuan-Wei Chiu
  Cc: Philipp Hahn, amd-gfx, apparmor, bpf, ceph-devel, cocci, dm-devel,
	dri-devel, gfs2, intel-gfx, intel-wired-lan, iommu, kvm,
	linux-arm-kernel, linux-block, linux-bluetooth, linux-btrfs,
	linux-cifs, linux-clk, linux-erofs, linux-ext4, linux-fsdevel,
	linux-gpio, linux-hyperv, linux-input, linux-kernel, linux-leds,
	linux-media, linux-mips, linux-mm, linux-modules, linux-mtd,
	linux-nfs, linux-omap, linux-phy, linux-pm, linux-rockchip,
	linux-s390, linux-scsi, linux-sctp, linux-security-module,
	linux-sh, linux-sound, linux-stm32, linux-trace-kernel, linux-usb,
	linux-wireless, netdev, ntfs3, samba-technical, sched-ext,
	target-devel, tipc-discussion, v9fs
In-Reply-To: <20260312125730.GI1469476@ziepe.ca>

On Thu, 2026-03-12 at 09:57 -0300, Jason Gunthorpe wrote:
> On Wed, Mar 11, 2026 at 02:40:36AM +0800, Kuan-Wei Chiu wrote:
> 
> > IMHO, the necessity of IS_ERR_OR_NULL() often highlights a
> > confusing or flawed API design. It usually implies that the caller
> > is unsure whether a failure results in an error pointer or a NULL
> > pointer. 
> 
> +1
> 
> IS_ERR_OR_NULL() should always be looked on with suspicion. Very
> little should be returning some tri-state 'ERR' 'NULL' 'SUCCESS'
> pointer. What does the middle condition even mean? IS_ERR_OR_NULL()
> implies ERR and NULL are semanticly the same, so fix the things to
> always use ERR.

Not in any way supporting the original patch.  However, the pattern
ERR, NULL, PTR is used extensively in the dentry code of filesystems. 
See the try_lookup..() set of functions in fs/namei.c

The meaning is

PTR - I found it
NULL - It definitely doesn't exist
ERR - something went wrong during the lookup.

So I don't think you can blanket say this pattern is wrong.

Regards,

James


^ permalink raw reply

* Re: [PATCH] integrity: Eliminate weak definition of arch_get_secureboot()
From: Arnd Bergmann @ 2026-03-12 15:03 UTC (permalink / raw)
  To: Nathan Chancellor, Mimi Zohar, Roberto Sassu, Dmitry Kasatkin,
	Eric Snowberg
  Cc: Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
	Christophe Leroy, Heiko Carstens, Vasily Gorbik,
	Alexander Gordeev, Christian Borntraeger, Sven Schnelle,
	Paul Moore, James Morris, Serge E. Hallyn, Coiby Xu, linux-kernel,
	linuxppc-dev, linux-s390, linux-integrity, linux-security-module,
	llvm
In-Reply-To: <20260309-integrity-drop-weak-arch-get-secureboot-v1-1-6460d5c4bb89@kernel.org>

On Mon, Mar 9, 2026, at 21:37, Nathan Chancellor wrote:
> security/integrity/secure_boot.c contains a single __weak function,
> which breaks recordmcount when building with clang:
>
>   $ make -skj"$(nproc)" ARCH=powerpc LLVM=1 ppc64_defconfig 
> security/integrity/secure_boot.o
>   Cannot find symbol for section 2: .text.
>   security/integrity/secure_boot.o: failed
>
> Introduce a Kconfig symbol, CONFIG_HAVE_ARCH_GET_SECUREBOOT, to indicate
> that an architecture provides a definition of arch_get_secureboot().
> Provide a static inline stub when this symbol is not defined to achieve
> the same effect as the __weak function, allowing secure_boot.c to be
> removed altogether. Move the s390 definition of arch_get_secureboot()
> out of the CONFIG_KEXEC_FILE block to ensure it is always available, as
> it does not actually depend on KEXEC_FILE.
>
> Fixes: 31a6a07eefeb ("integrity: Make arch_ima_get_secureboot integrity-wide")
> Signed-off-by: Nathan Chancellor <nathan@kernel.org>

Acked-by: Arnd Bergmann <arnd@arndb.de>

^ permalink raw reply

* Re: [RFC PATCH v1 11/11] landlock: Add documentation for capability and namespace restrictions
From: Justin Suess @ 2026-03-12 14:48 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: Christian Brauner, Günther Noack, Paul Moore,
	Serge E . Hallyn, Lennart Poettering, Mikhail Ivanov,
	Nicolas Bouchinet, Shervin Oloumi, Tingmao Wang, kernel-team,
	linux-fsdevel, linux-kernel, linux-security-module
In-Reply-To: <20260312100444.2609563-12-mic@digikod.net>

On Thu, Mar 12, 2026 at 11:04:44AM +0100, Mickaël Salaün wrote:
> Document the two new Landlock permission categories in the userspace
> API guide, admin guide, and kernel security documentation.
> 
> The userspace API guide adds sections on capability restriction
> (LANDLOCK_PERM_CAPABILITY_USE with LANDLOCK_RULE_CAPABILITY), namespace
> restriction (LANDLOCK_PERM_NAMESPACE_ENTER with LANDLOCK_RULE_NAMESPACE
> covering creation via unshare/clone and entry via setns), and the
> backward-compatible degradation pattern for ABI < 9.  A table documents
> the per-namespace-type capability requirements for both creation and
> entry.
> 
> The admin guide adds the new perm.namespace_enter and
> perm.capability_use audit blocker names with their object identification
> fields (namespace_type, namespace_inum, capability).
> 
> The kernel security documentation adds a "Ruleset restriction models"
> section defining the three models (handled_access_*, handled_perm,
> scoped), their coverage and compatibility properties, and the criteria
> for choosing between them for future features.  It also documents
> composability with user namespaces and adds kernel-doc references for
> the new capability and namespace headers.
> 
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Günther Noack <gnoack@google.com>
> Cc: Paul Moore <paul@paul-moore.com>
> Cc: Serge E. Hallyn <serge@hallyn.com>
> Signed-off-by: Mickaël Salaün <mic@digikod.net>
> ---
>  Documentation/admin-guide/LSM/landlock.rst |  19 ++-
>  Documentation/security/landlock.rst        |  80 ++++++++++-
>  Documentation/userspace-api/landlock.rst   | 156 ++++++++++++++++++++-
>  3 files changed, 245 insertions(+), 10 deletions(-)
> 
> diff --git a/Documentation/admin-guide/LSM/landlock.rst b/Documentation/admin-guide/LSM/landlock.rst
> index 9923874e2156..99c6a599ce9e 100644
> --- a/Documentation/admin-guide/LSM/landlock.rst
> +++ b/Documentation/admin-guide/LSM/landlock.rst
> @@ -6,7 +6,7 @@ Landlock: system-wide management
>  ================================
>  
>  :Author: Mickaël Salaün
> -:Date: January 2026
> +:Date: March 2026
>  
>  Landlock can leverage the audit framework to log events.
>  
> @@ -59,14 +59,25 @@ AUDIT_LANDLOCK_ACCESS
>          - scope.abstract_unix_socket - Abstract UNIX socket connection denied
>          - scope.signal - Signal sending denied
>  
> +    **perm.*** - Permission restrictions (ABI 9+):
> +        - perm.namespace_enter - Namespace entry was denied (creation via
> +          :manpage:`unshare(2)` / :manpage:`clone(2)` or joining via
> +          :manpage:`setns(2)`);
> +          ``namespace_type`` indicates the type (hex CLONE_NEW* bitmask),
> +          ``namespace_inum`` identifies the target namespace for
> +          :manpage:`setns(2)` operations
> +        - perm.capability_use - Capability use was denied;
> +          ``capability`` indicates the capability number
> +
>      Multiple blockers can appear in a single event (comma-separated) when
>      multiple access rights are missing. For example, creating a regular file
>      in a directory that lacks both ``make_reg`` and ``refer`` rights would show
>      ``blockers=fs.make_reg,fs.refer``.
>  
> -    The object identification fields (path, dev, ino for filesystem; opid,
> -    ocomm for signals) depend on the type of access being blocked and provide
> -    context about what resource was involved in the denial.
> +    The object identification fields depend on the type of access being blocked:
> +    ``path``, ``dev``, ``ino`` for filesystem; ``opid``, ``ocomm`` for signals;
> +    ``namespace_type`` and ``namespace_inum`` for namespace operations;
> +    ``capability`` for capability use.
>  
>  
>  AUDIT_LANDLOCK_DOMAIN
> diff --git a/Documentation/security/landlock.rst b/Documentation/security/landlock.rst
> index 3e4d4d04cfae..cd3d640ca5c9 100644
> --- a/Documentation/security/landlock.rst
> +++ b/Documentation/security/landlock.rst
> @@ -7,7 +7,7 @@ Landlock LSM: kernel documentation
>  ==================================
>  
>  :Author: Mickaël Salaün
> -:Date: September 2025
> +:Date: March 2026
>  
>  Landlock's goal is to create scoped access-control (i.e. sandboxing).  To
>  harden a whole system, this feature should be available to any process,
> @@ -89,6 +89,72 @@ this is required to keep access controls consistent over the whole system, and
>  this avoids unattended bypasses through file descriptor passing (i.e. confused
>  deputy attack).
>  
> +Composability with user namespaces
> +----------------------------------
> +
> +Landlock domain-based scoping and the kernel's user namespace-based capability
> +scoping enforce isolation over independent hierarchies.  Landlock checks domain
> +ancestry; the kernel's ``ns_capable()`` checks user namespace ancestry.  These
> +hierarchies are orthogonal: Landlock enforcement is deterministic with respect
> +to its own configuration, regardless of namespace or capability state, and vice
> +versa.  This orthogonality is a design invariant that must hold for all new
> +scoped features.
The last sentence on orthogonality may better belong under the restriction
model section for scoped access rights. I assume that future scopes must
also be deterministic with respect to landlock's configuration as well,
not just user namespaces.
> +
> +Ruleset restriction models
> +--------------------------
+1

This section is very helpful for aligning new features with a particular
model.

> +
> +Landlock provides three restriction models, each with different coverage
> +and compatibility properties.
Maybe add:

Each restriction model below corresponds to one or more fields of
``struct landlock_ruleset_attr``.

> +
> +Access rights (``handled_access_*``)
> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> +
> +Access rights control **enumerated operations on kernel objects**
> +identified by a rule key (a file hierarchy or a network port).  Each
> +``handled_access_*`` field declares a set of access rights that the
> +ruleset restricts.  Multiple access rights share a single rule type.
> +Operations for which no access right exists yet remain uncontrolled;
> +new rights are added incrementally across ABI versions.
> +
> +Permissions (``handled_perm``)
> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> +
> +Permissions control **broad operations enforced at single kernel
> +chokepoints**, achieving complete deny-by-default coverage.  Each
> +``LANDLOCK_PERM_*`` flag maps to its own rule type.  When a ruleset
> +handles a permission, all instances of that operation are denied unless
> +explicitly allowed by a rule.  New kernel values (new ``CAP_*``
> +capabilities, new ``CLONE_NEW*`` namespace types) are automatically
> +denied without any Landlock update.
> +
> +Each permission flag names a single gateway operation whose control
> +transitively covers an open-ended set of downstream operations: for
> +example, exercising a capability enables privileged operations across
> +many subsystems; entering a namespace enables gaining capabilities in a
> +new context.
> +
> +Permission rules identify what to allow using constants defined by other
> +kernel subsystems (``CAP_*``, ``CLONE_NEW*``).  Unknown values are
> +silently ignored because deny-by-default ensures they are denied anyway.
> +In contrast, unknown ``LANDLOCK_PERM_*`` flags in ``handled_perm`` are
> +rejected (``-EINVAL``), since Landlock owns that namespace.
> +
> +Scopes (``scoped``)
> +~~~~~~~~~~~~~~~~~~~~
> +
> +Scopes restrict **cross-domain interactions** categorically, without
> +rules.  Setting a scope flag (e.g. ``LANDLOCK_SCOPE_SIGNAL``) denies the
> +operation to targets outside the Landlock domain or its children.  Like
> +permissions, scopes provide complete coverage of the controlled
> +operation.
> +
> +When adding new Landlock features, new operations on existing rule types
> +extend the corresponding ``handled_access_*`` field (e.g. a new
> +filesystem operation extends ``handled_access_fs``).  A new object
> +category with multiple fine-grained operations would use a new
> +``handled_access_*`` field.  New rule types that control a single
> +chokepoint operation use ``handled_perm``.
> +
>  Tests
>  =====
>  
> @@ -110,6 +176,18 @@ Filesystem
>  .. kernel-doc:: security/landlock/fs.h
>      :identifiers:
>  
> +Namespace
> +---------
> +
> +.. kernel-doc:: security/landlock/ns.h
> +    :identifiers:
> +
> +Capability
> +----------
> +
> +.. kernel-doc:: security/landlock/cap.h
> +    :identifiers:
> +
>  Process credential
>  ------------------
>  
> diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst
> index 13134bccdd39..238d30a18162 100644
> --- a/Documentation/userspace-api/landlock.rst
> +++ b/Documentation/userspace-api/landlock.rst
> @@ -8,7 +8,7 @@ Landlock: unprivileged access control
>  =====================================
>  
>  :Author: Mickaël Salaün
> -:Date: January 2026
> +:Date: March 2026
>  
>  The goal of Landlock is to enable restriction of ambient rights (e.g. global
>  filesystem or network access) for a set of processes.  Because Landlock
> @@ -33,7 +33,7 @@ A Landlock rule describes an action on an object which the process intends to
>  perform.  A set of rules is aggregated in a ruleset, which can then restrict
>  the thread enforcing it, and its future children.
>  
> -The two existing types of rules are:
> +The existing types of rules are:
>  
>  Filesystem rules
>      For these rules, the object is a file hierarchy,
> @@ -44,6 +44,14 @@ Network rules (since ABI v4)
>      For these rules, the object is a TCP port,
>      and the related actions are defined with `network access rights`.
>  
> +Capability rules (since ABI v9)
> +    For these rules, the object is a set of Linux capabilities,
> +    and the related actions are defined with `permission flags`.
> +
> +Namespace rules (since ABI v9)
> +    For these rules, the object is a set of namespace types,
> +    and the related actions are defined with `permission flags`.
> +
>  Defining and enforcing a security policy
>  ----------------------------------------
>  
> @@ -84,6 +92,9 @@ to be explicit about the denied-by-default access rights.
>          .scoped =
>              LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET |
>              LANDLOCK_SCOPE_SIGNAL,
> +        .handled_perm =
> +            LANDLOCK_PERM_CAPABILITY_USE |
> +            LANDLOCK_PERM_NAMESPACE_ENTER,
>      };
>  
>  Because we may not know which kernel version an application will be executed
> @@ -127,6 +138,12 @@ version, and only use the available subset of access rights:
>          /* Removes LANDLOCK_SCOPE_* for ABI < 6 */
>          ruleset_attr.scoped &= ~(LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET |
>                                   LANDLOCK_SCOPE_SIGNAL);
> +        __attribute__((fallthrough));
> +    case 6:
> +    case 7:
> +    case 8:
> +        /* Removes permission support for ABI < 9 */
> +        ruleset_attr.handled_perm = 0;
>      }
>  
>  This enables the creation of an inclusive ruleset that will contain our rules.
> @@ -191,6 +208,42 @@ number for a specific action: HTTPS connections.
>      err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
>                              &net_port, 0);
>  
> +For capability access-control, we can add rules that allow specific
> +capabilities.  For instance, to allow ``CAP_SYS_CHROOT`` (so the sandboxed
> +process can call :manpage:`chroot(2)` inside a user namespace):
> +
> +.. code-block:: c
> +
> +    struct landlock_capability_attr cap_attr = {
> +        .allowed_perm = LANDLOCK_PERM_CAPABILITY_USE,
> +        .capabilities = (1ULL << CAP_SYS_CHROOT),
> +    };
> +
> +    err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_CAPABILITY,
> +                            &cap_attr, 0);
> +
> +For namespace access-control, we can add rules that allow entering specific
> +namespace types (creating them via :manpage:`unshare(2)` / :manpage:`clone(2)`
> +or joining them via :manpage:`setns(2)`).  For instance, to allow creating user
> +namespaces (which grants all capabilities inside the new namespace):
> +
> +.. code-block:: c
> +
> +    struct landlock_namespace_attr ns_attr = {
> +        .allowed_perm = LANDLOCK_PERM_NAMESPACE_ENTER,
> +        .namespace_types = CLONE_NEWUSER,
> +    };
> +
> +    err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NAMESPACE,
> +                            &ns_attr, 0);
> +
> +Together, these two rules allow an unprivileged process to create a user
> +namespace and call :manpage:`chroot(2)` inside it, while denying all other
> +capabilities and namespace types.  User namespace creation is the one operation
> +that does not require ``CAP_SYS_ADMIN``, so no capability rule is needed for it.
> +See `Capability and namespace restrictions`_ for details on capability
> +requirements.
> +
>  When passing a non-zero ``flags`` argument to ``landlock_restrict_self()``, a
>  similar backwards compatibility check is needed for the restrict flags
>  (see sys_landlock_restrict_self() documentation for available flags):
> @@ -354,10 +407,87 @@ The operations which can be scoped are:
>      A :manpage:`sendto(2)` on a socket which was previously connected will not
>      be restricted.  This works for both datagram and stream sockets.
>  
> -IPC scoping does not support exceptions via :manpage:`landlock_add_rule(2)`.
> +Scoping does not support exceptions via :manpage:`landlock_add_rule(2)`.
>  If an operation is scoped within a domain, no rules can be added to allow access
>  to resources or processes outside of the scope.
>  
> +Capability and namespace restrictions
> +-------------------------------------
> +
> +See Documentation/security/landlock.rst for the design rationale behind
> +the permission model (``handled_perm``) and how it differs from access
> +rights (``handled_access_*``) and scopes (``scoped``).
> +When a process creates a user namespace, the kernel grants all capabilities
> +within that namespace.  While these capabilities cannot directly bypass Landlock
> +restrictions (Landlock enforces access controls independently of capability
> +checks), they open kernel code paths that are normally unreachable to
> +unprivileged users and may contain exploitable bugs.
> +
> +Landlock provides two complementary permissions to address this.
> +``LANDLOCK_PERM_CAPABILITY_USE`` restricts which capabilities a process can use,
> +even when it holds them.  ``LANDLOCK_PERM_NAMESPACE_ENTER`` restricts which
> +namespace types a process can create (via :manpage:`unshare(2)` or
> +:manpage:`clone(2)`) or join (via :manpage:`setns(2)`).  After creating a user
> +namespace, the granted capabilities are scoped to namespaces owned by that user
> +namespace or its descendants; to exercise a capability such as
> +``CAP_NET_ADMIN``, the process must create a namespace of the corresponding type
> +(e.g., a network namespace).  Configuring both permissions together provides
> +full coverage: ``LANDLOCK_PERM_CAPABILITY_USE`` restricts which capabilities are
> +available, while ``LANDLOCK_PERM_NAMESPACE_ENTER`` restricts the namespaces in
> +which they can be used.
Maybe add a section on the what this does versus PR_SET_NO_NEW_PRIVS.

The difference might be obvious to people familiar with namespaces and
capabilities, but not to many users less familiar with the subject.

I could see users using the LANDLOCK_PERM_* flags erroneously
assuming that LANDLOCK_PERM_CAPABILITY_USE is required to restrict gaining of
new capabilities through execve(), (ie through setuid) when in fact this is
already restricted if nnp is set.

Some clarification on this would be helpful here or where
PR_SET_NO_NEW_PRIVS is discussed in the Landlock docs.
> +
> +When a Landlock domain handles ``LANDLOCK_PERM_CAPABILITY_USE``, all Linux
> +:manpage:`capabilities(7)` are denied by default unless a rule explicitly allows
Nit:

all Linux :manpage:`capabilities(7)`

might be better as

the exercise of all Linux :manpage:`capabilities(7)`

Since as pointed out before we do not restrict their precense, but their
exercise.
> +them.  This is purely restrictive: Landlock can only deny capabilities that the
> +traditional capability mechanism would have allowed, never grant additional ones.
> +Rules are added with ``LANDLOCK_RULE_CAPABILITY`` using a
> +&struct landlock_capability_attr.  Each rule specifies a set of ``CAP_*`` values
> +(as a bitmask) to allow.  Capabilities above ``CAP_LAST_CAP`` are silently
> +accepted but have no effect since the kernel never checks them; this means new
> +capabilities introduced by future kernels are automatically denied.
> +
> +When a Landlock domain handles ``LANDLOCK_PERM_NAMESPACE_ENTER``, namespace
> +creation and entry are denied by default unless a rule explicitly allows them.
> +Rules are added with ``LANDLOCK_RULE_NAMESPACE`` using a
> +&struct landlock_namespace_attr.  Each rule specifies a set of ``CLONE_NEW*``
> +flags to allow.
> +
> +In practice, unprivileged processes first create a user namespace (which requires
> +no capability and grants all capabilities within it), then use those capabilities
> +to create other namespace types.  All non-user namespace types require
> +``CAP_SYS_ADMIN`` for both creation and :manpage:`setns(2)` entry; mount
> +namespace entry additionally requires ``CAP_SYS_CHROOT``.  For
> +:manpage:`setns(2)`, capabilities are checked relative to the target namespace,
> +so a process in an ancestor user namespace naturally satisfies them; this
> +includes joining user namespaces, which requires ``CAP_SYS_ADMIN``.  When
> +``LANDLOCK_PERM_CAPABILITY_USE`` is also handled, each of these capabilities
> +must be explicitly allowed by a rule.
> +
> +When combining ``CLONE_NEWUSER`` with other ``CLONE_NEW*`` flags in a single
> +:manpage:`unshare(2)` call, the ``CAP_SYS_ADMIN`` check targets the newly
> +created user namespace, which is handled by ``LANDLOCK_PERM_NAMESPACE_ENTER``
> +independently from ``LANDLOCK_PERM_CAPABILITY_USE``.  Performing the user
> +namespace creation and the additional namespace creation in two separate
> +:manpage:`unshare(2)` calls requires a rule allowing ``CAP_SYS_ADMIN`` if the
> +domain also handles ``LANDLOCK_PERM_CAPABILITY_USE``.
> +
> +More generally, Landlock domains and user namespaces form independent
> +hierarchies: Landlock domains restrict what actions are allowed (each stacked
> +layer narrows the permitted set), while user namespaces restrict where
> +capabilities take effect (only within the process's own namespace and its
> +descendants).  Landlock access controls are fully determined by the domain
> +configuration, regardless of the process's position in the user namespace
> +hierarchy.  When creating child user namespaces, it is recommended to also
> +create a dedicated Landlock domain with restrictions relevant to each namespace
> +context.
> +
> +Note that ``LANDLOCK_PERM_CAPABILITY_USE`` restricts the *use* of capabilities,
> +not their presence in the process's credential.  Capability sets can change
> +after a domain is enforced through user namespace entry, :manpage:`execve(2)` of
> +binaries with file capabilities, or :manpage:`capset(2)`.  In all cases,
> +:manpage:`capget(2)` will report the credential's capability sets, but any
> +denied capability will fail with ``EPERM`` when exercised.
> +
>  Truncating files
>  ----------------
>  
> @@ -515,7 +645,7 @@ Access rights
>  -------------
>  
>  .. kernel-doc:: include/uapi/linux/landlock.h
> -    :identifiers: fs_access net_access scope
> +    :identifiers: fs_access net_access scope perm
>  
>  Creating a new ruleset
>  ----------------------
> @@ -534,7 +664,8 @@ Extending a ruleset
>  
>  .. kernel-doc:: include/uapi/linux/landlock.h
>      :identifiers: landlock_rule_type landlock_path_beneath_attr
> -                  landlock_net_port_attr
> +                  landlock_net_port_attr landlock_capability_attr
> +                  landlock_namespace_attr
>  
>  Enforcing a ruleset
>  -------------------
> @@ -685,6 +816,21 @@ enforce Landlock rulesets across all threads of the calling process
>  using the ``LANDLOCK_RESTRICT_SELF_TSYNC`` flag passed to
>  sys_landlock_restrict_self().
>  
> +Capability restriction (ABI < 9)
> +--------------------------------
> +
> +Starting with the Landlock ABI version 9, it is possible to restrict
> +:manpage:`capabilities(7)` with the new ``LANDLOCK_PERM_CAPABILITY_USE``
> +permission flag and ``LANDLOCK_RULE_CAPABILITY`` rule type.
> +
> +Namespace restriction (ABI < 9)
> +-------------------------------
> +
> +Starting with the Landlock ABI version 9, it is possible to restrict
> +namespace creation (:manpage:`unshare(2)`, :manpage:`clone(2)`) and entry
> +(:manpage:`setns(2)`) with the new ``LANDLOCK_PERM_NAMESPACE_ENTER`` permission
> +flag and ``LANDLOCK_RULE_NAMESPACE`` rule type.
> +
>  .. _kernel_support:
>  
>  Kernel support
> -- 
> 2.53.0
> 

^ permalink raw reply

* Re: [PATCH 00/61] treewide: Use IS_ERR_OR_NULL over manual NULL check - refactor
From: Jason Gunthorpe @ 2026-03-12 12:57 UTC (permalink / raw)
  To: Kuan-Wei Chiu
  Cc: Philipp Hahn, amd-gfx, apparmor, bpf, ceph-devel, cocci, dm-devel,
	dri-devel, gfs2, intel-gfx, intel-wired-lan, iommu, kvm,
	linux-arm-kernel, linux-block, linux-bluetooth, linux-btrfs,
	linux-cifs, linux-clk, linux-erofs, linux-ext4, linux-fsdevel,
	linux-gpio, linux-hyperv, linux-input, linux-kernel, linux-leds,
	linux-media, linux-mips, linux-mm, linux-modules, linux-mtd,
	linux-nfs, linux-omap, linux-phy, linux-pm, linux-rockchip,
	linux-s390, linux-scsi, linux-sctp, linux-security-module,
	linux-sh, linux-sound, linux-stm32, linux-trace-kernel, linux-usb,
	linux-wireless, netdev, ntfs3, samba-technical, sched-ext,
	target-devel, tipc-discussion, v9fs
In-Reply-To: <abBlpGKO842B3yl9@google.com>

On Wed, Mar 11, 2026 at 02:40:36AM +0800, Kuan-Wei Chiu wrote:

> IMHO, the necessity of IS_ERR_OR_NULL() often highlights a confusing or
> flawed API design. It usually implies that the caller is unsure whether
> a failure results in an error pointer or a NULL pointer. 

+1

IS_ERR_OR_NULL() should always be looked on with suspicion. Very
little should be returning some tri-state 'ERR' 'NULL' 'SUCCESS'
pointer. What does the middle condition even mean? IS_ERR_OR_NULL()
implies ERR and NULL are semanticly the same, so fix the things to
always use ERR.

If you want to improve things work to get rid of the NULL checks this
script identifies. Remove ERR or NULL because only one can ever
happen, or fix the source to consistently return ERR.

Jason

^ permalink raw reply

* Re: [PATCH v6] lsm: Add LSM hook security_unix_find
From: Günther Noack @ 2026-03-12 11:57 UTC (permalink / raw)
  To: Paul Moore
  Cc: Justin Suess, Günther Noack, brauner, demiobenour,
	fahimitahera, hi, horms, ivanov.mikhail1, jannh, jmorris,
	john.johansen, konstantin.meskhidze, linux-security-module, m,
	matthieu, mic, netdev, samasth.norway.ananda, serge, viro
In-Reply-To: <CAHC9VhSA=jaKTXg-Tmzzpaj9STGMXH3ZMgQm_XvicimRqdW0+w@mail.gmail.com>

On Wed, Mar 11, 2026 at 12:08:43PM -0400, Paul Moore wrote:
> On Wed, Mar 11, 2026 at 8:34 AM Justin Suess <utilityemal77@gmail.com> wrote:
> >
> > On Tue, Mar 10, 2026 at 06:39:12PM -0400, Paul Moore wrote:
> > > On Thu, Feb 19, 2026 at 3:26 PM Günther Noack <gnoack3000@gmail.com> wrote:
> > > > On Thu, Feb 19, 2026 at 03:04:59PM -0500, Justin Suess wrote:
> > > > > Add a LSM hook security_unix_find.
> > > > >
> > > > > This hook is called to check the path of a named unix socket before a
> > > > > connection is initiated. The peer socket may be inspected as well.
> > > > >
> > > > > Why existing hooks are unsuitable:
> > > > >
> > > > > Existing socket hooks, security_unix_stream_connect(),
> > > > > security_unix_may_send(), and security_socket_connect() don't provide
> > > > > TOCTOU-free / namespace independent access to the paths of sockets.
> > > > >
> > > > > (1) We cannot resolve the path from the struct sockaddr in existing hooks.
> > > > > This requires another path lookup. A change in the path between the
> > > > > two lookups will cause a TOCTOU bug.
> > > > >
> > > > > (2) We cannot use the struct path from the listening socket, because it
> > > > > may be bound to a path in a different namespace than the caller,
> > > > > resulting in a path that cannot be referenced at policy creation time.
> > > > >
> > > > > Cc: Günther Noack <gnoack3000@gmail.com>
> > > > > Cc: Tingmao Wang <m@maowtm.org>
> > > > > Signed-off-by: Justin Suess <utilityemal77@gmail.com>
> > > > > ---
> > > > >  include/linux/lsm_hook_defs.h |  5 +++++
> > > > >  include/linux/security.h      | 11 +++++++++++
> > > > >  net/unix/af_unix.c            | 13 ++++++++++---
> > > > >  security/security.c           | 20 ++++++++++++++++++++
> > > > >  4 files changed, 46 insertions(+), 3 deletions(-)
> > >
> > > ...
> > >
> > > > Reviewed-by: Günther Noack <gnoack3000@gmail.com>
> > > >
> > > > Thank you, this looks good. I'll include it in the next version of the
> > > > Unix connect patch set again.
> > >
> > > I'm looking for this patchset to review/ACK the new hook in context,
> > > but I'm not seeing it in my inbox or lore.  Did I simply miss the
> > > patchset or is it still a work in progress?  No worries if it hasn't
> > > been posted yet, I just wanted to make sure I wasn't holding this up
> > > any more than I already may have :)
> >
> > Good Morning Paul,
> >
> > Can't speak to the rest of the patch, but I sent this LSM hook for
> > review purposes before inclusion with the rest of the V6 of this patch.
> >
> > Günther added his review tag, but I was asked to make some minor comment / commit
> > message updates. I sent the same patch, with updated comments/commit to him
> > in a follow up, off-list email to avoid spamming the list. No code changes were
> > made, just comments.
> >
> > I don't think this particular patch will change substantially, unless we find
> > something unexpected. But the way we use the hook may change (esp wrt to
> > locking and the SOCK_DEAD state), which is important for your review.
> >
> > So you may want to hold off your review until the full V6 series gets sent so
> > you can review the hook in context. There were some questions about
> > locking that needed proper digging into. [1]
> 
> Great, thanks for the update, that was helpful.  As you recommend,
> I'll hold off on reviewing this further until we have the full context
> of the other patchset; we've already talked about this hook addition a
> few times anyway, and based on a quick look yesterday, nothing
> particularly evil jumped out at me.

Yes, thanks - I have been busy with the TSYNC fixes recently, which
were more urgent because it's in the RC for 7.0, but will get back to
the UNIX restrictions soon.

—Günther

^ permalink raw reply

* [RFC PATCH v1 03/11] nsproxy: Add FOR_EACH_NS_TYPE() X-macro and CLONE_NS_ALL
From: Mickaël Salaün @ 2026-03-12 10:04 UTC (permalink / raw)
  To: Christian Brauner, Günther Noack, Paul Moore,
	Serge E . Hallyn
  Cc: Mickaël Salaün, Justin Suess, Lennart Poettering,
	Mikhail Ivanov, Nicolas Bouchinet, Shervin Oloumi, Tingmao Wang,
	kernel-team, linux-fsdevel, linux-kernel, linux-security-module
In-Reply-To: <20260312100444.2609563-1-mic@digikod.net>

Introduce the FOR_EACH_NS_TYPE(X) macro as the single source of truth
for the set of (struct type, CLONE_NEW* flag) pairs that define Linux
namespace types.

Currently, the list of CLONE_NEW* flags is duplicated inline in
multiple call sites and would need another copy in each new consumer.
This makes it easy to miss one when a new namespace type is added.

Derive two things from the X-macro:

- CLONE_NS_ALL: Bitmask of all known CLONE_NEW* flags, usable as a
  validity mask or iteration bound.

- ns_common_type(): Rewritten to use the X-macro via a leading-comma
  _Generic pattern, so the struct-to-flag mapping stays in sync with the
  flag set automatically.

Replace the inline flag enumerations in copy_namespaces(),
unshare_nsproxy_namespaces(), check_setns_flags(), and
ksys_unshare() with CLONE_NS_ALL.

When a new namespace type is added, only FOR_EACH_NS_TYPE needs to
be updated; CLONE_NS_ALL, ns_common_type(), and all the call sites
pick up the change automatically.

Cc: Christian Brauner <brauner@kernel.org>
Cc: Günther Noack <gnoack@google.com>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 include/linux/ns/ns_common_types.h | 44 +++++++++++++++++++++++-------
 kernel/fork.c                      |  7 ++---
 kernel/nsproxy.c                   | 13 +++------
 3 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/include/linux/ns/ns_common_types.h b/include/linux/ns/ns_common_types.h
index 170288e2e895..5cfe0ce3c881 100644
--- a/include/linux/ns/ns_common_types.h
+++ b/include/linux/ns/ns_common_types.h
@@ -7,6 +7,7 @@
 #include <linux/rbtree.h>
 #include <linux/refcount.h>
 #include <linux/types.h>
+#include <uapi/linux/sched.h>
 
 struct cgroup_namespace;
 struct dentry;
@@ -187,15 +188,38 @@ struct ns_common {
 		struct user_namespace *:   (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations   : NULL), \
 		struct uts_namespace *:    (IS_ENABLED(CONFIG_UTS_NS)  ? &utsns_operations    : NULL))
 
-#define ns_common_type(__ns)                                \
-	_Generic((__ns),                                    \
-		struct cgroup_namespace *: CLONE_NEWCGROUP, \
-		struct ipc_namespace *:    CLONE_NEWIPC,    \
-		struct mnt_namespace *:    CLONE_NEWNS,     \
-		struct net *:              CLONE_NEWNET,    \
-		struct pid_namespace *:    CLONE_NEWPID,    \
-		struct time_namespace *:   CLONE_NEWTIME,   \
-		struct user_namespace *:   CLONE_NEWUSER,   \
-		struct uts_namespace *:    CLONE_NEWUTS)
+/*
+ * FOR_EACH_NS_TYPE - Canonical list of namespace types
+ *
+ * Enumerates all (struct type, CLONE_NEW* flag) pairs.  This is the
+ * single source of truth used to derive ns_common_type() and
+ * CLONE_NS_ALL.  When adding a new namespace type, add a single entry
+ * here; all consumers update automatically.
+ *
+ * @X: Callback macro taking (struct_name, clone_flag) as arguments.
+ */
+#define FOR_EACH_NS_TYPE(X)                  \
+	X(cgroup_namespace, CLONE_NEWCGROUP) \
+	X(ipc_namespace, CLONE_NEWIPC)       \
+	X(mnt_namespace, CLONE_NEWNS)        \
+	X(net, CLONE_NEWNET)                 \
+	X(pid_namespace, CLONE_NEWPID)       \
+	X(time_namespace, CLONE_NEWTIME)     \
+	X(user_namespace, CLONE_NEWUSER)     \
+	X(uts_namespace, CLONE_NEWUTS)
+
+/* Bitmask of all known CLONE_NEW* flags. */
+#define _NS_TYPE_FLAG_OR(struct_name, flag) | (flag)
+#define CLONE_NS_ALL                        (0 FOR_EACH_NS_TYPE(_NS_TYPE_FLAG_OR))
+
+/*
+ * ns_common_type - Map a namespace struct pointer to its CLONE_NEW* flag
+ *
+ * Uses a leading-comma pattern so the FOR_EACH_NS_TYPE expansion
+ * produces ", struct foo *: FLAG" entries without a trailing comma.
+ */
+#define _NS_TYPE_ASSOC(struct_name, flag) , struct struct_name *: (flag)
+
+#define ns_common_type(__ns) _Generic((__ns)FOR_EACH_NS_TYPE(_NS_TYPE_ASSOC))
 
 #endif /* _LINUX_NS_COMMON_TYPES_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 65113a304518..767559acd060 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -46,6 +46,7 @@
 #include <linux/mm_inline.h>
 #include <linux/memblock.h>
 #include <linux/nsproxy.h>
+#include <linux/ns/ns_common_types.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
 #include <linux/cgroup.h>
@@ -3046,11 +3047,9 @@ void __init proc_caches_init(void)
  */
 static int check_unshare_flags(unsigned long unshare_flags)
 {
-	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
+	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_SIGHAND|
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
-				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
-				CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP|
-				CLONE_NEWTIME))
+				CLONE_NS_ALL))
 		return -EINVAL;
 	/*
 	 * Not implemented, but pretend it works if there is nothing
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f0b30d1907e7..7181886331c8 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <linux/nsproxy.h>
+#include <linux/ns/ns_common_types.h>
 #include <linux/init_task.h>
 #include <linux/mnt_namespace.h>
 #include <linux/utsname.h>
@@ -170,9 +171,7 @@ int copy_namespaces(u64 flags, struct task_struct *tsk)
 	struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
 	struct nsproxy *new_ns;
 
-	if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			      CLONE_NEWPID | CLONE_NEWNET |
-			      CLONE_NEWCGROUP | CLONE_NEWTIME)))) {
+	if (likely(!(flags & (CLONE_NS_ALL & ~CLONE_NEWUSER)))) {
 		if ((flags & CLONE_VM) ||
 		    likely(old_ns->time_ns_for_children == old_ns->time_ns)) {
 			get_nsproxy(old_ns);
@@ -214,9 +213,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 	struct user_namespace *user_ns;
 	int err = 0;
 
-	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			       CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
-			       CLONE_NEWTIME)))
+	if (!(unshare_flags & (CLONE_NS_ALL & ~CLONE_NEWUSER)))
 		return 0;
 
 	user_ns = new_cred ? new_cred->user_ns : current_user_ns();
@@ -292,9 +289,7 @@ int exec_task_namespaces(void)
 
 static int check_setns_flags(unsigned long flags)
 {
-	if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-				 CLONE_NEWNET | CLONE_NEWTIME | CLONE_NEWUSER |
-				 CLONE_NEWPID | CLONE_NEWCGROUP)))
+	if (!flags || (flags & ~CLONE_NS_ALL))
 		return -EINVAL;
 
 #ifndef CONFIG_USER_NS
-- 
2.53.0


^ permalink raw reply related

* [RFC PATCH v1 02/11] security: Add LSM_AUDIT_DATA_NS for namespace audit records
From: Mickaël Salaün @ 2026-03-12 10:04 UTC (permalink / raw)
  To: Christian Brauner, Günther Noack, Paul Moore,
	Serge E . Hallyn
  Cc: Mickaël Salaün, Justin Suess, Lennart Poettering,
	Mikhail Ivanov, Nicolas Bouchinet, Shervin Oloumi, Tingmao Wang,
	kernel-team, linux-fsdevel, linux-kernel, linux-security-module
In-Reply-To: <20260312100444.2609563-1-mic@digikod.net>

Add a new LSM audit data type LSM_AUDIT_DATA_NS that logs namespace
information in audit records.  Two fields are provided, matching the
field names of struct ns_common:

- ns_type: the CLONE_NEW* flag identifying the namespace type, logged in
  hexadecimal.

- inum: the proc inode number identifying a specific namespace instance.
  Namespace inode numbers are allocated by proc_alloc_inum() via
  ida_alloc_max() bounded to UINT_MAX, so the value always fits in 32
  bits.

A new audit data type is needed because no existing LSM_AUDIT_DATA_*
type carries namespace information.  The closest alternatives (e.g.
LSM_AUDIT_DATA_TASK or LSM_AUDIT_DATA_NONE with custom strings) would
either lose the namespace type or require ad-hoc formatting that
bypasses the structured audit data union.

Cc: Christian Brauner <brauner@kernel.org>
Cc: Günther Noack <gnoack@google.com>
Cc: Paul Moore <paul@paul-moore.com>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 include/linux/lsm_audit.h | 5 +++++
 security/lsm_audit.c      | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 382c56a97bba..6e20a56b8c22 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -78,6 +78,7 @@ struct common_audit_data {
 #define LSM_AUDIT_DATA_NOTIFICATION 16
 #define LSM_AUDIT_DATA_ANONINODE	17
 #define LSM_AUDIT_DATA_NLMSGTYPE	18
+#define LSM_AUDIT_DATA_NS		19
 	union 	{
 		struct path path;
 		struct dentry *dentry;
@@ -100,6 +101,10 @@ struct common_audit_data {
 		int reason;
 		const char *anonclass;
 		u16 nlmsg_type;
+		struct {
+			u32 ns_type;
+			unsigned int inum;
+		} ns;
 	} u;
 	/* this union contains LSM specific data */
 	union {
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 7d623b00495c..7f71a77c1c12 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -403,6 +403,10 @@ void audit_log_lsm_data(struct audit_buffer *ab,
 	case LSM_AUDIT_DATA_NLMSGTYPE:
 		audit_log_format(ab, " nl-msgtype=%hu", a->u.nlmsg_type);
 		break;
+	case LSM_AUDIT_DATA_NS:
+		audit_log_format(ab, " namespace_type=0x%x namespace_inum=%u",
+				 a->u.ns.ns_type, a->u.ns.inum);
+		break;
 	} /* switch (a->type) */
 }
 
-- 
2.53.0


^ permalink raw reply related

* [RFC PATCH v1 05/11] landlock: Enforce namespace entry restrictions
From: Mickaël Salaün @ 2026-03-12 10:04 UTC (permalink / raw)
  To: Christian Brauner, Günther Noack, Paul Moore,
	Serge E . Hallyn
  Cc: Mickaël Salaün, Justin Suess, Lennart Poettering,
	Mikhail Ivanov, Nicolas Bouchinet, Shervin Oloumi, Tingmao Wang,
	kernel-team, linux-fsdevel, linux-kernel, linux-security-module
In-Reply-To: <20260312100444.2609563-1-mic@digikod.net>

Add Landlock enforcement for namespace entry via the LSM namespace_alloc
and namespace_install hooks.  This lets a sandboxed process restrict
which namespace types it can acquire, using
LANDLOCK_PERM_NAMESPACE_ENTER and per-type rules.

Introduce the handled_perm field in struct landlock_ruleset_attr for
permission categories that control broad operations enforced at single
kernel chokepoints, achieving complete deny-by-default coverage.  Each
LANDLOCK_PERM_* flag names a gateway operation (use, enter) whose
control transitively covers downstream operations.  Rule values
reference constants from other kernel subsystems (CLONE_NEW* for
namespaces); unknown values are silently accepted because the allow-list
denies them by default.  See the "Ruleset restriction models" section in
the kernel documentation for the full design rationale.

Add two namespace hooks:

- hook_namespace_alloc() fires during unshare(CLONE_NEW*) and
  clone(CLONE_NEW*) via __ns_common_init(), and checks the namespace
  type against the domain's allowed set.

- hook_namespace_install() fires during setns() via validate_ns(),
  performing the same type-based check.  Both hooks set namespace_type
  in the audit data; hook_namespace_install() also sets inum for the
  target namespace.

Both hooks perform a pure bitmask check: if the namespace's CLONE_NEW*
type is not in the layer's allowed set, the operation is denied.  No
domain ancestry bypass, no namespace creator tracking, just a flat
per-layer allowed-types bitmask.

Add the perm_rules bitfield to struct layer_rights (introduced by a
preceding commit) to store per-layer namespace type bitmasks.  The 8-bit
NS field maps to the 8 known namespace types via
landlock_ns_type_to_bit(), keeping the storage compact.

LANDLOCK_RULE_NAMESPACE uses struct landlock_namespace_attr with an
allowed_perm field (matching the pattern of allowed_access in existing
rule types) and a namespace_types bitmask of CLONE_NEW* flags.  Unknown
namespace type bits are silently accepted for forward compatibility;
they have no effect since the allow-list denies by default.

User namespace creation does not require capabilities, so Landlock can
restrict it directly.  Non-user namespace types require CAP_SYS_ADMIN
before the Landlock check is reached; when both
LANDLOCK_PERM_NAMESPACE_ENTER and LANDLOCK_PERM_CAPABILITY_USE are
handled, both must allow the operation.

Five KUnit tests verify the landlock_ns_type_to_bit() and
landlock_ns_types_to_bits() conversion helpers.

Cc: Christian Brauner <brauner@kernel.org>
Cc: Günther Noack <gnoack@google.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Serge E. Hallyn <serge@hallyn.com>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 include/uapi/linux/landlock.h                |  58 +++++-
 security/landlock/Makefile                   |   1 +
 security/landlock/access.h                   |  42 ++++-
 security/landlock/audit.c                    |   4 +
 security/landlock/audit.h                    |   1 +
 security/landlock/cred.h                     |  42 +++++
 security/landlock/limits.h                   |   7 +
 security/landlock/ns.c                       | 188 +++++++++++++++++++
 security/landlock/ns.h                       |  74 ++++++++
 security/landlock/ruleset.c                  |  11 +-
 security/landlock/ruleset.h                  |  25 ++-
 security/landlock/setup.c                    |   2 +
 security/landlock/syscalls.c                 |  70 ++++++-
 tools/testing/selftests/landlock/base_test.c |   2 +-
 14 files changed, 509 insertions(+), 18 deletions(-)
 create mode 100644 security/landlock/ns.c
 create mode 100644 security/landlock/ns.h

diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index f88fa1f68b77..b76e656241df 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -51,6 +51,14 @@ struct landlock_ruleset_attr {
 	 * resources (e.g. IPCs).
 	 */
 	__u64 scoped;
+	/**
+	 * @handled_perm: Bitmask of permissions (cf. `Permission flags`_)
+	 * that this ruleset handles.  Each permission controls a broad
+	 * operation enforced at a kernel chokepoint: all instances of
+	 * that operation are denied unless explicitly allowed by a rule.
+	 * See Documentation/security/landlock.rst for the rationale.
+	 */
+	__u64 handled_perm;
 };
 
 /**
@@ -153,6 +161,11 @@ enum landlock_rule_type {
 	 * landlock_net_port_attr .
 	 */
 	LANDLOCK_RULE_NET_PORT,
+	/**
+	 * @LANDLOCK_RULE_NAMESPACE: Type of a &struct
+	 * landlock_namespace_attr .
+	 */
+	LANDLOCK_RULE_NAMESPACE,
 };
 
 /**
@@ -206,6 +219,24 @@ struct landlock_net_port_attr {
 	__u64 port;
 };
 
+/**
+ * struct landlock_namespace_attr - Namespace type definition
+ *
+ * Argument of sys_landlock_add_rule() with %LANDLOCK_RULE_NAMESPACE.
+ */
+struct landlock_namespace_attr {
+	/**
+	 * @allowed_perm: Must be set to %LANDLOCK_PERM_NAMESPACE_ENTER.
+	 */
+	__u64 allowed_perm;
+	/**
+	 * @namespace_types: Bitmask of namespace types (``CLONE_NEW*`` flags)
+	 * that should be allowed to be entered under this rule.  Unknown bits
+	 * are silently ignored for forward compatibility.
+	 */
+	__u64 namespace_types;
+};
+
 /**
  * DOC: fs_access
  *
@@ -379,6 +410,31 @@ struct landlock_net_port_attr {
 /* clang-format off */
 #define LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET		(1ULL << 0)
 #define LANDLOCK_SCOPE_SIGNAL		                (1ULL << 1)
-/* clang-format on*/
+/* clang-format on */
+
+/**
+ * DOC: perm
+ *
+ * Permission flags
+ * ~~~~~~~~~~~~~~~~
+ *
+ * These flags restrict broad operations enforced at kernel chokepoints.
+ * Each flag names a gateway operation whose control transitively covers
+ * an open-ended set of downstream operations.  Handled permissions that
+ * are not explicitly allowed by a rule are denied by default.  Rule
+ * values reference constants from other kernel subsystems; unknown values
+ * are silently accepted for forward compatibility since the allow-list
+ * denies them by default.
+ * See Documentation/security/landlock.rst for design details.
+ *
+ * - %LANDLOCK_PERM_NAMESPACE_ENTER: Restrict entering (creating or joining
+ *   via :manpage:`setns(2)`) specific namespace types.  A process in a
+ *   Landlock domain that handles this permission is denied from entering
+ *   namespace types that are not explicitly allowed by a
+ *   %LANDLOCK_RULE_NAMESPACE rule.
+ */
+/* clang-format off */
+#define LANDLOCK_PERM_NAMESPACE_ENTER			(1ULL << 0)
+/* clang-format on */
 
 #endif /* _UAPI_LINUX_LANDLOCK_H */
diff --git a/security/landlock/Makefile b/security/landlock/Makefile
index ffa7646d99f3..734aed4ac1bf 100644
--- a/security/landlock/Makefile
+++ b/security/landlock/Makefile
@@ -8,6 +8,7 @@ landlock-y := \
 	cred.o \
 	task.o \
 	fs.o \
+	ns.o \
 	tsync.o
 
 landlock-$(CONFIG_INET) += net.o
diff --git a/security/landlock/access.h b/security/landlock/access.h
index b3e147771a0e..9c67987a77ae 100644
--- a/security/landlock/access.h
+++ b/security/landlock/access.h
@@ -42,6 +42,8 @@ static_assert(BITS_PER_TYPE(access_mask_t) >= LANDLOCK_NUM_ACCESS_FS);
 static_assert(BITS_PER_TYPE(access_mask_t) >= LANDLOCK_NUM_ACCESS_NET);
 /* Makes sure all scoped rights can be stored. */
 static_assert(BITS_PER_TYPE(access_mask_t) >= LANDLOCK_NUM_SCOPE);
+/* Makes sure all permission types can be stored. */
+static_assert(BITS_PER_TYPE(access_mask_t) >= LANDLOCK_NUM_PERM);
 /* Makes sure for_each_set_bit() and for_each_clear_bit() calls are OK. */
 static_assert(sizeof(unsigned long) >= sizeof(access_mask_t));
 
@@ -50,6 +52,7 @@ struct access_masks {
 	access_mask_t fs : LANDLOCK_NUM_ACCESS_FS;
 	access_mask_t net : LANDLOCK_NUM_ACCESS_NET;
 	access_mask_t scope : LANDLOCK_NUM_SCOPE;
+	access_mask_t perm : LANDLOCK_NUM_PERM;
 };
 
 union access_masks_all {
@@ -61,14 +64,47 @@ union access_masks_all {
 static_assert(sizeof(typeof_member(union access_masks_all, masks)) ==
 	      sizeof(typeof_member(union access_masks_all, all)));
 
+/**
+ * struct perm_rules - Per-layer allowed bitmasks for permission types
+ *
+ * Compact bitfield struct holding the allowed bitmasks for permission
+ * types that use flat (non-tree) per-layer storage.  All fields share
+ * a single 64-bit storage unit.
+ */
+struct perm_rules {
+	/**
+	 * @ns: Allowed namespace types.  Each bit corresponds to a
+	 * sequential index assigned by the ``_LANDLOCK_NS_*`` enum
+	 * (derived from ``FOR_EACH_NS_TYPE``).  Bits are converted from
+	 * ``CLONE_NEW*`` flags at rule-add time via
+	 * ``landlock_ns_types_to_bits()`` and at enforcement time via
+	 * ``landlock_ns_type_to_bit()``.
+	 */
+	u64 ns : LANDLOCK_NUM_PERM_NS;
+};
+
+static_assert(sizeof(struct perm_rules) == sizeof(u64));
+
 /**
  * struct layer_rights - Per-layer access configuration
  *
- * Wraps the handled-access bitfields together with any additional per-layer
- * data (e.g. allowed bitmasks added by future patches).  This is the element
- * type of the &struct landlock_ruleset.layers FAM.
+ * Wraps the handled-access bitfields together with per-layer allowed
+ * bitmasks.  This is the element type of the &struct
+ * landlock_ruleset.layers FAM.
+ *
+ * Unlike filesystem and network access rights, which are tracked per-object
+ * in red-black trees, namespace types use a flat bitmask because their
+ * keyspace is small and bounded (~8 namespace types).  A single rule adds
+ * to the allowed set via bitwise OR; at enforcement time each layer is
+ * checked directly (no tree lookup needed).
  */
 struct layer_rights {
+	/**
+	 * @allowed: Per-layer allowed bitmasks for permission types.
+	 * Placed before @handled to avoid an internal padding hole
+	 * (8-byte perm_rules followed by 4-byte access_masks).
+	 */
+	struct perm_rules allowed;
 	/**
 	 * @handled: Bitmask of access rights handled (i.e. restricted) by
 	 * this layer.
diff --git a/security/landlock/audit.c b/security/landlock/audit.c
index 60ff217ab95b..46a635893914 100644
--- a/security/landlock/audit.c
+++ b/security/landlock/audit.c
@@ -78,6 +78,10 @@ get_blocker(const enum landlock_request_type type,
 	case LANDLOCK_REQUEST_SCOPE_SIGNAL:
 		WARN_ON_ONCE(access_bit != -1);
 		return "scope.signal";
+
+	case LANDLOCK_REQUEST_NAMESPACE:
+		WARN_ON_ONCE(access_bit != -1);
+		return "perm.namespace_enter";
 	}
 
 	WARN_ON_ONCE(1);
diff --git a/security/landlock/audit.h b/security/landlock/audit.h
index 56778331b58c..e9e52fb628f5 100644
--- a/security/landlock/audit.h
+++ b/security/landlock/audit.h
@@ -21,6 +21,7 @@ enum landlock_request_type {
 	LANDLOCK_REQUEST_NET_ACCESS,
 	LANDLOCK_REQUEST_SCOPE_ABSTRACT_UNIX_SOCKET,
 	LANDLOCK_REQUEST_SCOPE_SIGNAL,
+	LANDLOCK_REQUEST_NAMESPACE,
 };
 
 /*
diff --git a/security/landlock/cred.h b/security/landlock/cred.h
index 3e2a7e88710e..68067ff53ead 100644
--- a/security/landlock/cred.h
+++ b/security/landlock/cred.h
@@ -153,6 +153,48 @@ landlock_get_applicable_subject(const struct cred *const cred,
 	return NULL;
 }
 
+/**
+ * landlock_perm_is_denied - Check if a permission bitmask request is denied
+ *
+ * @domain: The enforced domain.
+ * @perm_bit: The LANDLOCK_PERM_* flag to check.
+ * @request_value: Compact bitmask to look for (e.g. result of
+ *                 ``landlock_ns_type_to_bit(CLONE_NEWNET)``).
+ *
+ * Iterate from the youngest layer to the oldest.  For each layer that
+ * handles @perm_bit, check whether @request_value is present in the
+ * layer's allowed bitmask.  Return on the first (youngest) denying
+ * layer.
+ *
+ * Return: The youngest denying layer + 1, or 0 if allowed.
+ */
+static inline size_t
+landlock_perm_is_denied(const struct landlock_ruleset *const domain,
+			const access_mask_t perm_bit, const u64 request_value)
+{
+	ssize_t layer;
+
+	for (layer = domain->num_layers - 1; layer >= 0; layer--) {
+		u64 allowed;
+
+		if (!(domain->layers[layer].handled.perm & perm_bit))
+			continue;
+
+		switch (perm_bit) {
+		case LANDLOCK_PERM_NAMESPACE_ENTER:
+			allowed = domain->layers[layer].allowed.ns;
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			return layer + 1;
+		}
+
+		if (!(allowed & request_value))
+			return layer + 1;
+	}
+	return 0;
+}
+
 __init void landlock_add_cred_hooks(void);
 
 #endif /* _SECURITY_LANDLOCK_CRED_H */
diff --git a/security/landlock/limits.h b/security/landlock/limits.h
index eb584f47288d..e361b653fcf5 100644
--- a/security/landlock/limits.h
+++ b/security/landlock/limits.h
@@ -12,6 +12,7 @@
 
 #include <linux/bitops.h>
 #include <linux/limits.h>
+#include <linux/ns/ns_common_types.h>
 #include <uapi/linux/landlock.h>
 
 /* clang-format off */
@@ -31,6 +32,12 @@
 #define LANDLOCK_MASK_SCOPE		((LANDLOCK_LAST_SCOPE << 1) - 1)
 #define LANDLOCK_NUM_SCOPE		__const_hweight64(LANDLOCK_MASK_SCOPE)
 
+#define LANDLOCK_LAST_PERM		LANDLOCK_PERM_NAMESPACE_ENTER
+#define LANDLOCK_MASK_PERM		((LANDLOCK_LAST_PERM << 1) - 1)
+#define LANDLOCK_NUM_PERM		__const_hweight64(LANDLOCK_MASK_PERM)
+
+#define LANDLOCK_NUM_PERM_NS		__const_hweight64((u64)(CLONE_NS_ALL))
+
 #define LANDLOCK_LAST_RESTRICT_SELF	LANDLOCK_RESTRICT_SELF_TSYNC
 #define LANDLOCK_MASK_RESTRICT_SELF	((LANDLOCK_LAST_RESTRICT_SELF << 1) - 1)
 
diff --git a/security/landlock/ns.c b/security/landlock/ns.c
new file mode 100644
index 000000000000..fd9e00a295d2
--- /dev/null
+++ b/security/landlock/ns.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock - Namespace hooks
+ *
+ * Copyright © 2026 Cloudflare
+ */
+
+#include <linux/lsm_audit.h>
+#include <linux/lsm_hooks.h>
+#include <linux/ns/ns_common_types.h>
+#include <linux/ns_common.h>
+#include <linux/nsproxy.h>
+#include <uapi/linux/landlock.h>
+
+#include "audit.h"
+#include "cred.h"
+#include "limits.h"
+#include "ns.h"
+#include "ruleset.h"
+#include "setup.h"
+
+/* Ensures the audit inum field can hold ns_common.inum without truncation. */
+static_assert(sizeof(((struct common_audit_data *)NULL)->u.ns.inum) >=
+	      sizeof(((struct ns_common *)NULL)->inum));
+
+static const struct access_masks ns_perm = {
+	.perm = LANDLOCK_PERM_NAMESPACE_ENTER,
+};
+
+/**
+ * hook_namespace_alloc - Check namespace entry permission for creation
+ *
+ * @ns: The namespace being initialized.
+ *
+ * Checks if the current domain allows entering (creating) this namespace
+ * type.  Fires during unshare(2) and clone(2) via __ns_common_init() in
+ * kernel/nscommon.c.
+ *
+ * Return: 0 if allowed, -EPERM if namespace creation is denied.
+ */
+static int hook_namespace_alloc(struct ns_common *const ns)
+{
+	const struct landlock_cred_security *subject;
+	size_t denied_layer;
+
+	WARN_ON_ONCE(!(CLONE_NS_ALL & ns->ns_type));
+
+	subject =
+		landlock_get_applicable_subject(current_cred(), ns_perm, NULL);
+	if (!subject)
+		return 0;
+
+	denied_layer = landlock_perm_is_denied(
+		subject->domain, LANDLOCK_PERM_NAMESPACE_ENTER,
+		landlock_ns_type_to_bit(ns->ns_type));
+	if (!denied_layer)
+		return 0;
+
+	landlock_log_denial(subject, &(struct landlock_request){
+					     .type = LANDLOCK_REQUEST_NAMESPACE,
+					     .audit.type = LSM_AUDIT_DATA_NS,
+					     .audit.u.ns.ns_type = ns->ns_type,
+					     .layer_plus_one = denied_layer,
+				     });
+	return -EPERM;
+}
+
+/**
+ * hook_namespace_install - Check namespace entry permission
+ *
+ * @nsset: The namespace set being modified.
+ * @ns: The namespace being entered.
+ *
+ * Checks if the current domain restricts entering this namespace type.
+ * Fires during setns(2) via validate_ns() in kernel/nsproxy.c.
+ * Uses the same type-based check as hook_namespace_alloc(): the
+ * restriction is on which namespace types the process can enter,
+ * regardless of who created the namespace.
+ *
+ * Return: 0 if entry is allowed, -EPERM if denied.
+ */
+static int hook_namespace_install(const struct nsset *nsset,
+				  struct ns_common *ns)
+{
+	const struct landlock_cred_security *subject;
+	size_t denied_layer;
+
+	WARN_ON_ONCE(!(CLONE_NS_ALL & ns->ns_type));
+
+	subject =
+		landlock_get_applicable_subject(current_cred(), ns_perm, NULL);
+	if (!subject)
+		return 0;
+
+	denied_layer = landlock_perm_is_denied(
+		subject->domain, LANDLOCK_PERM_NAMESPACE_ENTER,
+		landlock_ns_type_to_bit(ns->ns_type));
+	if (!denied_layer)
+		return 0;
+
+	landlock_log_denial(subject, &(struct landlock_request){
+					     .type = LANDLOCK_REQUEST_NAMESPACE,
+					     .audit.type = LSM_AUDIT_DATA_NS,
+					     .audit.u.ns.ns_type = ns->ns_type,
+					     .audit.u.ns.inum = ns->inum,
+					     .layer_plus_one = denied_layer,
+				     });
+	return -EPERM;
+}
+
+static struct security_hook_list landlock_hooks[] __ro_after_init = {
+	LSM_HOOK_INIT(namespace_alloc, hook_namespace_alloc),
+	LSM_HOOK_INIT(namespace_install, hook_namespace_install),
+};
+
+__init void landlock_add_ns_hooks(void)
+{
+	security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+			   &landlock_lsmid);
+}
+
+#ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST
+
+#include <kunit/test.h>
+
+/* clang-format off */
+#define _TEST_NS_BIT(struct_name, flag) \
+	do { \
+		const u64 bit = landlock_ns_type_to_bit(flag); \
+		KUNIT_EXPECT_NE(test, 0ULL, bit); \
+		KUNIT_EXPECT_EQ(test, 0ULL, seen &bit); \
+		seen |= bit; \
+	} while (0);
+/* clang-format on */
+
+static void test_ns_type_to_bit(struct kunit *const test)
+{
+	u64 seen = 0;
+
+	FOR_EACH_NS_TYPE(_TEST_NS_BIT)
+
+	KUNIT_EXPECT_EQ(test, GENMASK_ULL(LANDLOCK_NUM_PERM_NS - 1, 0), seen);
+}
+
+static void test_ns_type_to_bit_unknown(struct kunit *const test)
+{
+	KUNIT_EXPECT_EQ(test, 0ULL, landlock_ns_type_to_bit(CLONE_THREAD));
+}
+
+static void test_ns_types_to_bits_all(struct kunit *const test)
+{
+	KUNIT_EXPECT_EQ(test, GENMASK_ULL(LANDLOCK_NUM_PERM_NS - 1, 0),
+			landlock_ns_types_to_bits(CLONE_NS_ALL));
+}
+
+/* clang-format off */
+#define _TEST_NS_SINGLE(struct_name, flag) \
+	KUNIT_EXPECT_EQ(test, landlock_ns_type_to_bit(flag), \
+			landlock_ns_types_to_bits(flag));
+/* clang-format on */
+
+static void test_ns_types_to_bits_single(struct kunit *const test)
+{
+	FOR_EACH_NS_TYPE(_TEST_NS_SINGLE)
+}
+
+static void test_ns_types_to_bits_zero(struct kunit *const test)
+{
+	KUNIT_EXPECT_EQ(test, 0ULL, landlock_ns_types_to_bits(0));
+}
+
+static struct kunit_case test_cases[] = {
+	KUNIT_CASE(test_ns_type_to_bit),
+	KUNIT_CASE(test_ns_type_to_bit_unknown),
+	KUNIT_CASE(test_ns_types_to_bits_all),
+	KUNIT_CASE(test_ns_types_to_bits_single),
+	KUNIT_CASE(test_ns_types_to_bits_zero),
+	{}
+};
+
+static struct kunit_suite test_suite = {
+	.name = "landlock_ns",
+	.test_cases = test_cases,
+};
+
+kunit_test_suite(test_suite);
+
+#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */
diff --git a/security/landlock/ns.h b/security/landlock/ns.h
new file mode 100644
index 000000000000..c731ecc08f8c
--- /dev/null
+++ b/security/landlock/ns.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock - Namespace hooks
+ *
+ * Copyright © 2026 Cloudflare
+ */
+
+#ifndef _SECURITY_LANDLOCK_NS_H
+#define _SECURITY_LANDLOCK_NS_H
+
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/compiler_attributes.h>
+#include <linux/ns/ns_common_types.h>
+#include <linux/types.h>
+
+#include "limits.h"
+
+/* _LANDLOCK_NS_CLONE_NEWCGROUP, */
+#define _LANDLOCK_NS_ENUM(struct_name, flag) _LANDLOCK_NS_##flag,
+
+/* _LANDLOCK_NS_CLONE_NEWCGROUP = 0, */
+enum {
+	FOR_EACH_NS_TYPE(_LANDLOCK_NS_ENUM) _LANDLOCK_NUM_NS_TYPES,
+};
+
+static_assert(_LANDLOCK_NUM_NS_TYPES == LANDLOCK_NUM_PERM_NS);
+
+/*
+ * case CLONE_NEWCGROUP:
+ *         return BIT_ULL(_LANDLOCK_NS_CLONE_NEWCGROUP);
+ */
+/* clang-format off */
+#define _LANDLOCK_NS_CASE(struct_name, flag) \
+	case flag: \
+		return BIT_ULL(_LANDLOCK_NS_##flag);
+/* clang-format on */
+
+static inline __attribute_const__ u64
+landlock_ns_type_to_bit(const unsigned long ns_type)
+{
+	switch (ns_type) {
+		FOR_EACH_NS_TYPE(_LANDLOCK_NS_CASE)
+	default:
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+}
+
+/*
+ * if (ns_types & CLONE_NEWCGROUP)
+ *         bits |= BIT_ULL(_LANDLOCK_NS_CLONE_NEWCGROUP);
+ */
+/* clang-format off */
+#define _LANDLOCK_NS_CONVERT(struct_name, flag) \
+	do { \
+		if (ns_types & (flag)) \
+			bits |= BIT_ULL(_LANDLOCK_NS_##flag); \
+	} while (0);
+/* clang-format on */
+
+static inline __attribute_const__ u64
+landlock_ns_types_to_bits(const u64 ns_types)
+{
+	u64 bits = 0;
+
+	WARN_ON_ONCE(ns_types & ~CLONE_NS_ALL);
+	FOR_EACH_NS_TYPE(_LANDLOCK_NS_CONVERT)
+	return bits;
+}
+
+__init void landlock_add_ns_hooks(void);
+
+#endif /* _SECURITY_LANDLOCK_NS_H */
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index a7f8be37ec31..7321e2f19b03 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -53,15 +53,14 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers)
 	return new_ruleset;
 }
 
-struct landlock_ruleset *
-landlock_create_ruleset(const access_mask_t fs_access_mask,
-			const access_mask_t net_access_mask,
-			const access_mask_t scope_mask)
+struct landlock_ruleset *landlock_create_ruleset(
+	const access_mask_t fs_access_mask, const access_mask_t net_access_mask,
+	const access_mask_t scope_mask, const access_mask_t perm_mask)
 {
 	struct landlock_ruleset *new_ruleset;
 
 	/* Informs about useless ruleset. */
-	if (!fs_access_mask && !net_access_mask && !scope_mask)
+	if (!fs_access_mask && !net_access_mask && !scope_mask && !perm_mask)
 		return ERR_PTR(-ENOMSG);
 	new_ruleset = create_ruleset(1);
 	if (IS_ERR(new_ruleset))
@@ -72,6 +71,8 @@ landlock_create_ruleset(const access_mask_t fs_access_mask,
 		landlock_add_net_access_mask(new_ruleset, net_access_mask, 0);
 	if (scope_mask)
 		landlock_add_scope_mask(new_ruleset, scope_mask, 0);
+	if (perm_mask)
+		landlock_add_perm_mask(new_ruleset, perm_mask, 0);
 	return new_ruleset;
 }
 
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index 900c47eb0216..747261391c00 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -190,10 +190,9 @@ struct landlock_ruleset {
 	};
 };
 
-struct landlock_ruleset *
-landlock_create_ruleset(const access_mask_t access_mask_fs,
-			const access_mask_t access_mask_net,
-			const access_mask_t scope_mask);
+struct landlock_ruleset *landlock_create_ruleset(
+	const access_mask_t access_mask_fs, const access_mask_t access_mask_net,
+	const access_mask_t scope_mask, const access_mask_t perm_mask);
 
 void landlock_put_ruleset(struct landlock_ruleset *const ruleset);
 void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset);
@@ -303,6 +302,24 @@ landlock_get_scope_mask(const struct landlock_ruleset *const ruleset,
 	return ruleset->layers[layer_level].handled.scope;
 }
 
+static inline void
+landlock_add_perm_mask(struct landlock_ruleset *const ruleset,
+		       const access_mask_t perm_mask, const u16 layer_level)
+{
+	access_mask_t mask = perm_mask & LANDLOCK_MASK_PERM;
+
+	/* Should already be checked in sys_landlock_create_ruleset(). */
+	WARN_ON_ONCE(perm_mask != mask);
+	ruleset->layers[layer_level].handled.perm |= mask;
+}
+
+static inline access_mask_t
+landlock_get_perm_mask(const struct landlock_ruleset *const ruleset,
+		       const u16 layer_level)
+{
+	return ruleset->layers[layer_level].handled.perm;
+}
+
 bool landlock_unmask_layers(const struct landlock_rule *const rule,
 			    struct layer_access_masks *masks);
 
diff --git a/security/landlock/setup.c b/security/landlock/setup.c
index 47dac1736f10..a7ed776b41b4 100644
--- a/security/landlock/setup.c
+++ b/security/landlock/setup.c
@@ -17,6 +17,7 @@
 #include "fs.h"
 #include "id.h"
 #include "net.h"
+#include "ns.h"
 #include "setup.h"
 #include "task.h"
 
@@ -68,6 +69,7 @@ static int __init landlock_init(void)
 	landlock_add_task_hooks();
 	landlock_add_fs_hooks();
 	landlock_add_net_hooks();
+	landlock_add_ns_hooks();
 	landlock_init_id();
 	landlock_initialized = true;
 	pr_info("Up and running.\n");
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index 2aa7b50d875f..152d952e98f6 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -20,6 +20,7 @@
 #include <linux/fs.h>
 #include <linux/limits.h>
 #include <linux/mount.h>
+#include <linux/ns/ns_common_types.h>
 #include <linux/path.h>
 #include <linux/sched.h>
 #include <linux/security.h>
@@ -34,6 +35,7 @@
 #include "fs.h"
 #include "limits.h"
 #include "net.h"
+#include "ns.h"
 #include "ruleset.h"
 #include "setup.h"
 #include "tsync.h"
@@ -95,7 +97,9 @@ static void build_check_abi(void)
 	struct landlock_ruleset_attr ruleset_attr;
 	struct landlock_path_beneath_attr path_beneath_attr;
 	struct landlock_net_port_attr net_port_attr;
+	struct landlock_namespace_attr namespace_attr;
 	size_t ruleset_size, path_beneath_size, net_port_size;
+	size_t namespace_size;
 
 	/*
 	 * For each user space ABI structures, first checks that there is no
@@ -105,8 +109,9 @@ static void build_check_abi(void)
 	ruleset_size = sizeof(ruleset_attr.handled_access_fs);
 	ruleset_size += sizeof(ruleset_attr.handled_access_net);
 	ruleset_size += sizeof(ruleset_attr.scoped);
+	ruleset_size += sizeof(ruleset_attr.handled_perm);
 	BUILD_BUG_ON(sizeof(ruleset_attr) != ruleset_size);
-	BUILD_BUG_ON(sizeof(ruleset_attr) != 24);
+	BUILD_BUG_ON(sizeof(ruleset_attr) != 32);
 
 	path_beneath_size = sizeof(path_beneath_attr.allowed_access);
 	path_beneath_size += sizeof(path_beneath_attr.parent_fd);
@@ -117,6 +122,11 @@ static void build_check_abi(void)
 	net_port_size += sizeof(net_port_attr.port);
 	BUILD_BUG_ON(sizeof(net_port_attr) != net_port_size);
 	BUILD_BUG_ON(sizeof(net_port_attr) != 16);
+
+	namespace_size = sizeof(namespace_attr.allowed_perm);
+	namespace_size += sizeof(namespace_attr.namespace_types);
+	BUILD_BUG_ON(sizeof(namespace_attr) != namespace_size);
+	BUILD_BUG_ON(sizeof(namespace_attr) != 16);
 }
 
 /* Ruleset handling */
@@ -166,7 +176,7 @@ static const struct file_operations ruleset_fops = {
  * If the change involves a fix that requires userspace awareness, also update
  * the errata documentation in Documentation/userspace-api/landlock.rst .
  */
-const int landlock_abi_version = 8;
+const int landlock_abi_version = 9;
 
 /**
  * sys_landlock_create_ruleset - Create a new ruleset
@@ -249,10 +259,16 @@ SYSCALL_DEFINE3(landlock_create_ruleset,
 	if ((ruleset_attr.scoped | LANDLOCK_MASK_SCOPE) != LANDLOCK_MASK_SCOPE)
 		return -EINVAL;
 
+	/* Checks permission content (and 32-bits cast). */
+	if ((ruleset_attr.handled_perm | LANDLOCK_MASK_PERM) !=
+	    LANDLOCK_MASK_PERM)
+		return -EINVAL;
+
 	/* Checks arguments and transforms to kernel struct. */
 	ruleset = landlock_create_ruleset(ruleset_attr.handled_access_fs,
 					  ruleset_attr.handled_access_net,
-					  ruleset_attr.scoped);
+					  ruleset_attr.scoped,
+					  ruleset_attr.handled_perm);
 	if (IS_ERR(ruleset))
 		return PTR_ERR(ruleset);
 
@@ -390,13 +406,57 @@ static int add_rule_net_port(struct landlock_ruleset *ruleset,
 					net_port_attr.allowed_access);
 }
 
+static int add_rule_namespace(struct landlock_ruleset *const ruleset,
+			      const void __user *const rule_attr)
+{
+	struct landlock_namespace_attr ns_attr;
+	int res;
+	access_mask_t mask;
+
+	/* Copies raw user space buffer. */
+	res = copy_from_user(&ns_attr, rule_attr, sizeof(ns_attr));
+	if (res)
+		return -EFAULT;
+
+	/* Informs about useless rule: empty allowed_perm. */
+	if (!ns_attr.allowed_perm)
+		return -ENOMSG;
+
+	/* The allowed_perm must match LANDLOCK_PERM_NAMESPACE_ENTER. */
+	if (ns_attr.allowed_perm != LANDLOCK_PERM_NAMESPACE_ENTER)
+		return -EINVAL;
+
+	/* Checks that allowed_perm matches the @ruleset constraints. */
+	mask = landlock_get_perm_mask(ruleset, 0);
+	if (!(mask & LANDLOCK_PERM_NAMESPACE_ENTER))
+		return -EINVAL;
+
+	/* Informs about useless rule: empty namespace_types. */
+	if (!ns_attr.namespace_types)
+		return -ENOMSG;
+
+	/*
+	 * Stores only the namespace types this kernel knows about.
+	 * Unknown bits are silently accepted for forward compatibility:
+	 * user space compiled against newer headers can pass new
+	 * CLONE_NEW* flags without getting EINVAL on older kernels.
+	 * Unknown bits have no effect because no hook checks them.
+	 */
+	mutex_lock(&ruleset->lock);
+	ruleset->layers[0].allowed.ns |= landlock_ns_types_to_bits(
+		ns_attr.namespace_types & CLONE_NS_ALL);
+	mutex_unlock(&ruleset->lock);
+	return 0;
+}
+
 /**
  * sys_landlock_add_rule - Add a new rule to a ruleset
  *
  * @ruleset_fd: File descriptor tied to the ruleset that should be extended
  *		with the new rule.
  * @rule_type: Identify the structure type pointed to by @rule_attr:
- *             %LANDLOCK_RULE_PATH_BENEATH or %LANDLOCK_RULE_NET_PORT.
+ *             %LANDLOCK_RULE_PATH_BENEATH, %LANDLOCK_RULE_NET_PORT, or
+ *             %LANDLOCK_RULE_NAMESPACE.
  * @rule_attr: Pointer to a rule (matching the @rule_type).
  * @flags: Must be 0.
  *
@@ -446,6 +506,8 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
 		return add_rule_path_beneath(ruleset, rule_attr);
 	case LANDLOCK_RULE_NET_PORT:
 		return add_rule_net_port(ruleset, rule_attr);
+	case LANDLOCK_RULE_NAMESPACE:
+		return add_rule_namespace(ruleset, rule_attr);
 	default:
 		return -EINVAL;
 	}
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 0fea236ef4bd..30d37234086c 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -76,7 +76,7 @@ TEST(abi_version)
 	const struct landlock_ruleset_attr ruleset_attr = {
 		.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
 	};
-	ASSERT_EQ(8, landlock_create_ruleset(NULL, 0,
+	ASSERT_EQ(9, landlock_create_ruleset(NULL, 0,
 					     LANDLOCK_CREATE_RULESET_VERSION));
 
 	ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
-- 
2.53.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox